def connectUpdateCreateStatisticsDB(directory): '''Connects to the statistics database used for updating the statistics of all channels, updates it if necessary or creates it if it does not exist :param path: The path of the database :type path: string :raises: :class:``sqlite3.Error: Unable to connect to database :returns: Connection to the database :rtype: sqlite3.Connection ''' #Connect to database dbCon = yta.connectDB(os.path.join(directory, "statistics.db")) db = dbCon.cursor() #Get database version try: r = db.execute("SELECT dbversion FROM setup ORDER BY id DESC LIMIT 1;") version = r.fetchone()[0] del r except sqlite3.Error: #No version field: new database version = 0 if version < __statisticsdbversion__: try: #Perform initial setup if version < 1: #Set encoding dbCon.execute("pragma encoding=UTF8") #Create tables cmd = """ CREATE TABLE setup ( id INTEGER PRIMARY KEY UNIQUE NOT NULL, autoupdate BOOLEAN NOT NULL, lastupdate INTEGER NOT NULL, maxcount INTEGER NOT NULL, dbversion INTEGER NOT NULL ); """ dbCon.execute(cmd) cmd = """ CREATE TABLE channels ( id INTEGER PRIMARY KEY UNIQUE NOT NULL, name STRING UNIQUE NOT NULL, lastupdate INTEGER NOT NULL, complete BOOLEAN NOT NULL ); """ dbCon.execute(cmd) #Set db version version = 1 db.execute( "INSERT INTO setup(autoupdate,lastupdate,maxcount,dbversion) VALUES(?,?,?,?)", (False, 0, 100000, version)) dbCon.commit() except sqlite3.Error as e: print("ERROR: Unable to upgrade database (\"{}\")".format(e)) dbCon.rollback() yta.closeDB(dbCon) sys.exit(1) #Return connection to database return dbCon
def writeDownloadedFile(dbPath, filePath, replace, videoID): '''Write file containing Youtube IDs of all videos already archived :param dbPath: Path of the archive database :type dbPath: string :param filePath: Path where the file containing all existing IDs should be written to :type filePath: string :param replace: Whether to replace the existing video in the archive database :type replace: boolean :param videoID: The new video id :type videoID: string ''' #Check if db exists if not os.path.isfile(dbPath): return try: with open(filePath, 'w+') as f: #Connect to database db = yta.connectDB(dbPath) #Read IDs of all videos already in archive r = db.execute("SELECT youtubeID FROM videos;") for item in r.fetchall(): #Write IDs to file if not (replace and videoID == item[0]): f.write("youtube {}\n".format(item[0])) yta.closeDB(db) except sqlite3.Error: return
def createEmpty(dbPath): '''Create a database without adding information :param dbPath: The path of the archive database :type dbPath: string ''' #Create/connect database db = createOrConnectDB(dbPath) insert = "INSERT INTO channel(name, url, playlist, language, videos, lastupdate, dbversion, maxresolution, totalsize) VALUES(?,?,?,?,?,?,?,?,?)" db.execute(insert, ('', '', '', '', 0, 0, yta.__dbversion__, "default", 0)) yta.closeDB(db)
def readInfoFromDB(dbPath): '''Read playlist and language from database :param dbPath: Path of the archive database :type dbPath: string :raises: :class:``sqlite3.Error: Unable to read from database :returns: List with language code at index 0 and playlist at index 1 :rtype: list of string ''' db = yta.connectDB(dbPath) r = db.execute( "SELECT language,playlist FROM channel ORDER BY id DESC LIMIT 1;") item = r.fetchone() yta.closeDB(db) return [item[0], item[1]]
def _updateSubdirStatistics(db, path, name, captions, amendCaptions, maxcount, lastupdate, complete, apiKey): '''Update the statistics for one subdir :param db: Connection to the statistics database :type db: sqlite3.Connection :param path: The path of the subdir :type path: string :param name: The channel/subdir name :type name: string :param captions: Whether to check if captions were added since archiving the video (Default: False) :type captions: boolean :param amendCaptions: Whether to download the captions that were added since the video was archived :type amendCaptions: boolean, optional :param maxcount: The max number of videos allowed to update :type maxcount: integer :param lastupdate: Timestamp of the last update :type lastupdate: integer :param complete: Whether the last update was complete :type complete: boolean :param apiKey: The API-Key for the Youtube-API :type apiKey: string :raises: :class:``requests.exceptions.RequestException: Unable to connect to API endpoint :returns: Number of update counts left :rtype: integer ''' #Print status print("Updating \"{}\"".format(name)) #Connect to channel database channelDB = yta.connectDB(os.path.join(path, "archive.db")) #Perform update updateTimestamp = int(time.time()) maxcount, complete = updateStatistics(channelDB, lastupdate, captions, maxcount, apiKey, amendCaptions) #Close channel db yta.closeDB(channelDB) #Write new info to database db.execute( "UPDATE channels SET lastupdate = ?, complete = ? WHERE name = ?;", (updateTimestamp, complete, name)) return maxcount
def createNewTestDB(path): '''Create a test db using the two ytacommon methods''' #Connect dbCon = ytacommon.connectDB(path) #Create video table ytacommon.createVideoTable(dbCon) insert = "INSERT INTO videos(title,creator,date,timestamp,youtubeID,filename,checksum,language,width,height,resolution,statisticsupdated,filesize) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?)" dbCon.execute( insert, ("Test", "Test", "2020-01-01", 1577836800, "test", "test.mp4", "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08", "en", 1920, 1080, "Full HD", 1577836800, 1000000)) #Create channel table ytacommon.createChannelTable(dbCon) insert = "INSERT INTO channel(name, url, playlist, language, videos, lastupdate, dbversion, maxresolution, totalsize) VALUES(?,?,?,?,?,?,?,?,?)" dbCon.execute( insert, ('', '', '', '', 0, 0, ytacommon.__dbversion__, "default", 0)) #Close ytacommon.closeDB(dbCon)
def upgradeDB(request): '''Prepare and upgrade database with versions given via the "internal_dbversion" marker in the for of (oldversion, newversion), verify upgraded database version number, yield connection to database, and close and delete it afterwards ''' #Get database version oldVersion = request.node.get_closest_marker("internal_dbversion").args[0] newVersion = request.node.get_closest_marker("internal_dbversion").args[1] #Prepare test database dbPath = prepareAndUpgradeDatabase(oldVersion) #Connect to database _dbCon = ytacommon.connectDB(dbPath) #Verify version r = _dbCon.execute( "SELECT dbversion FROM channel ORDER BY id DESC LIMIT 1;") assert r.fetchone()[0] >= newVersion yield _dbCon ytacommon.closeDB(_dbCon) utils.deleteIfExists(dbPath)
def addMetadata(args): '''Add additional metadata to archive database :param args: The command line arguments given by the user :type args: list ''' #Get database path parser = argparse.ArgumentParser( prog="ytameta", description="Add additional metadata to existing archive databases") parser.add_argument( "DIR", help="The directory containing the archive database to work on") args = parser.parse_args(args) path = os.path.normpath(os.path.abspath(args.DIR)) dbPath = os.path.join(path, "archive.db") if not os.path.isdir(path) or not os.path.isfile(dbPath): parser.error("DIR must be a directory containing an archive database") #Check if database needs upgrade yta.upgradeDatabase(dbPath) #Connect to database dbCon = yta.connectDB(dbPath) db = dbCon.cursor() #Save thumbnails to database r = db.execute("SELECT youtubeID FROM videos;") for item in r.fetchall(): #Get video filepath youtubeID = item[0] try: [timestamp, duration, tags, _, _, _, _, _] = getMetadata(youtubeID) db.execute( "UPDATE videos SET timestamp = ?, duration = ?, tags = ? WHERE youtubeID = ?", (timestamp, duration, tags, youtubeID)) except yta.NoAPIKeyError: break except requests.exceptions.RequestException: print("ERROR: Unable to load metadata for {}".format(youtubeID)) continue #Close database yta.closeDB(dbCon)
def updateAllStatistics(path, automatic=False, captions=False, amendCaptions=False): '''Update the video statistics from all subdirs :param path: The path of the parent directory :type path: string :param automatic: Whether the update was started automatically or from user input (Default: False) :type automatic: boolean :param captions: Whether to check if captions were added since archiving the video (Default: False) :type captions: boolean, optional :param amendCaptions: Whether to download the captions that were added since the video was archived :type amendCaptions: boolean, optional :raises: :class:``ytacommon.NoAPIKeyError: Unable to read API key from file :raises: :class:``requests.exceptions.RequestException: Unable to connect to API endpoint ''' updateStarted = int(time.time()) #Print message if automatic: print("\nUPDATING VIDEO STATISTICS DUE TO DATABASE OPTION") else: print("\nUPDATING VIDEO STATISTICS") #Get subdirs in path subdirs = [ os.path.join(path, name) for name in os.listdir(path) if os.path.isdir(os.path.join(path, name)) ] subdirs = [ sub for sub in subdirs if os.path.isfile(os.path.join(sub, "archive.db")) ] if not subdirs: print( "ERROR: No subdirs with archive databases at \'{}\'".format(path)) return #Connect to database dbCon = connectUpdateCreateStatisticsDB(path) db = dbCon.cursor() #Check if quota was reset since last update lastupdate = db.execute( "SELECT lastupdate FROM setup WHERE id = 1 LIMIT 1;").fetchone()[0] if lastupdate > getResetTimestamp(): print( "WARNING: Statistics update skipped because no quota reset since the last update" ) yta.closeDB(dbCon) return print('') #Get channels r = db.execute("SELECT name,lastupdate,complete FROM channels;") channels = {} for item in r.fetchall(): channels[item[0]] = [item[1], item[2]] #Get maxcount maxcount = db.execute( "SELECT maxcount FROM setup WHERE id = 1 LIMIT 1;").fetchone()[0] #Get API key apiKey = yta.getAPIKey() if not apiKey: raise yta.NoAPIKeyError #Loop through subdirs, skip completed ones skippedSubdirs = [] for subdir in subdirs: #Check if maxcount was reached if maxcount == 0: break #Get last update info name = os.path.basename(os.path.normpath(subdir)) try: lastupdate, complete = channels[name] except KeyError: lastupdate = sys.maxsize complete = False db.execute( "INSERT INTO channels(name,lastupdate,complete) VALUES(?,?,?);", (name, lastupdate, complete)) #If completed, skip for now if complete: skippedSubdirs.append(subdir) continue #Update statistics try: maxcount = _updateSubdirStatistics(db, subdir, name, captions, amendCaptions, maxcount, lastupdate, complete, apiKey) except requests.exceptions.RequestException as e: print( "ERROR: Network error while trying to update the statistics (\"{}\")" .format(e)) return #Loop through skipped subdirs i = 0 count = len(skippedSubdirs) random.shuffle(skippedSubdirs) for subdir in skippedSubdirs: #Check if maxcount was reached if maxcount == 0: break i += 1 #Get last update info name = os.path.basename(os.path.normpath(subdir)) lastupdate, complete = channels[name] #Update statistics print("({}/{}) ".format(i, count), end='') try: maxcount = _updateSubdirStatistics(db, subdir, name, captions, amendCaptions, maxcount, lastupdate, complete, apiKey) except requests.exceptions.RequestException as e: print( "ERROR: Network error while trying to update the statistics (\"{}\")" .format(e)) return #Write lastupdate to statistics database db.execute("UPDATE setup SET lastupdate = ? WHERE id = 1", (updateStarted, )) #Close database yta.closeDB(dbCon)
def findMissing(args): '''Find discrepancies between files and database :param argv: The command line arguments given by the user :type argv: list ''' parser = argparse.ArgumentParser(prog="ytamissing", description="Find discrepancies between files and archive database in directory") parser.add_argument("DIR", help="The directory to work on") args = parser.parse_args(args) path = os.path.normpath(os.path.abspath(args.DIR)) if not os.path.isdir(path): parser.error("DIR must be a directory") #Read IDs from file fFiles = [] fIDs = [] cmd = ["exiftool", "-api", "largefilesupport=1", "-m", "-Comment", path] p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) for line in p.stdout.readlines(): line = line.decode("utf-8").strip() if line.startswith("Comment"): vid = line.split(':', 2)[2].strip() fIDs.append(vid) fFiles[-1]["id"] = vid if line.startswith("=="): fFiles.append({"name" : os.path.basename(line.split(' ', 1)[1].strip())}) if not fIDs: print("No videos found in directory") return #Read IDs from database dbPath = os.path.join(path, "archive.db") aFiles = [] try: #Check if database needs upgrade yta.upgradeDatabase(dbPath) db = yta.connectDB(dbPath) r = db.execute("SELECT youtubeID,title FROM videos;") for item in r.fetchall(): #Write ids to list aFiles.append({"name" : item[1], "id" : item[0]}) except sqlite3.Error as e: print(e) return if not aFiles: print("No videos found in archive db") return #Compare IDs found = False for aFile in aFiles: try: fIDs.remove(aFile["id"]) except ValueError: found = True print("Video file \"{}\" missing (ID: {})".format(aFile["name"], aFile["id"])) for fID in fIDs: found = True fFile = [f for f in fFiles if f["id"] == fID][0] print("Video \"{}\" not in database (ID: {})".format(fFile["name"], fFile["id"])) if not found: print("No discrepancies between files and database") #Close db yta.closeDB(db)
def archiveAll(args): '''Call archive script for all subdirs :param args: The command line arguments given by the user :type args: list ''' #Set all to false for subsequent calls args.all = False #Set statistics to false for subsequent calls updateStatistics = args.statistics updateCaptions = args.captions amendCaptions = args.amendcaptions args.statistics = False args.captions = False args.amendcaptions = False t1 = time.time() #Get path path = os.path.normpath(os.path.abspath(args.DIR)) #Check for progress file in directory progressPath = os.path.join(path, "progress.json") try: with open(progressPath, 'r') as f: progress = json.load(f) if (t1 - progress["abortTime"]) > 3600: progress = {"elapsed": 0} os.remove(progressPath) except OSError: progress = {"elapsed": 0} logFile = os.path.join(path, "log") if "subdirs" in progress: subdirs = progress["subdirs"] counter = progress["counter"] - 1 channels = progress["channels"] #Print message if channels > 1: print("CONTINUING ARCHIVING ALL {} CHANNELS IN \'{}\'\n".format( channels, path)) else: #Get subdirs in path subdirs = [ os.path.join(path, name) for name in sorted(os.listdir(path), key=str.casefold) if os.path.isdir(os.path.join(path, name)) ] subdirs = [ sub for sub in subdirs if os.path.isfile(os.path.join(sub, "archive.db")) ] random.shuffle(subdirs) if not subdirs: print("ERROR: No subdirs with archive databases at \'{}\'".format( path)) return #Prepare channels = len(subdirs) progress["channels"] = channels counter = 0 with open(logFile, 'w') as f: f.truncate() #Print message if channels > 1: print("ARCHIVING ALL {} CHANNELS IN \'{}\'\n".format( channels, path)) #Initiate error log errorLog = "" #Loop through all subdirs try: t2 = time.time() leftover = subdirs.copy() for subdir in subdirs: counter += 1 name = os.path.basename(os.path.normpath(subdir)) args.DIR = subdir args.LANG = None args.VIDEO = None print("\nARCHIVING \'{}\' ({}/{})".format(name, counter, channels)) archive(args, True) #Read errors from log error = "" with open(os.path.join(subdir, "log"), 'r') as f: lines = f.readlines() for i in range(len(lines)): if lines[i].startswith("ERROR"): error += "\n" + lines[i - 1] + lines[i] if error: errorLog += '\n\n' + name + '\n' + error leftover.remove(subdir) except KeyboardInterrupt: #Aborting, write progress file and log t = time.time() progress["counter"] = counter progress["subdirs"] = leftover progress["elapsed"] += t - t2 progress["abortTime"] = t with open(progressPath, 'w') as f: json.dump(progress, f) if errorLog: with open(logFile, 'a') as f: f.writelines(errorLog) #Rethrow exception raise #Progress file no longer relevant, removing it try: os.remove(progressPath) except OSError: pass #Write error log if not errorLog: errorLog = "No errors\n" with open(logFile, 'a') as f: f.writelines(errorLog) t3 = time.time() #Check if statistics is set to autoupdate autoUpdateStatistics = False if not updateStatistics or updateCaptions: try: statsDB = yta.connectDB(os.path.join(path, "statistics.db")) r = statsDB.execute( "SELECT autoupdate FROM setup ORDER BY id DESC LIMIT 1;") autoUpdateStatistics = bool(r.fetchone()[0]) del r except sqlite3.Error: pass finally: try: yta.closeDB(statsDB) except sqlite3.Error: pass #Update statistics if updateStatistics or autoUpdateStatistics or updateCaptions or amendCaptions: statTime = True try: ytameta.updateAllStatistics(path, autoUpdateStatistics, updateCaptions, amendCaptions) except yta.NoAPIKeyError: print( "ERROR: Unable to update video statistics as no API key is available" ) except RequestException as e: print( "ERROR: Unable to update video statistics due to connection error: \"{}\"" .format(e)) else: statTime = False #Print time t4 = time.time() print("\nTotal runtime: {}\nArchive runtime: {}".format( yta.intervalToStr(progress["elapsed"] + (t4 - t1)), yta.intervalToStr(progress["elapsed"] + (t3 - t2)))) if statTime: print("Statistic runtime: " + yta.intervalToStr(t4 - t3)) print("\nDONE!")
def archive(args, parsed=False): '''Archive youtube videos or playlists :param args: The command line arguments given by the user :type args: list ''' #Parse arguments if not parsed: parser = argparse.ArgumentParser( prog="ytarchiver", description="Download and archive Youtube videos or playlists") parser.add_argument( "-a", "--all", action="store_const", dest="all", const=True, default=False, help= "Run archiver for all subdirectories with archive databases. In this mode, LANG and VIDEO will always be read from the databases" ) parser.add_argument("-c", "--check", action="store_const", dest="check", const="-c", default="", help="Check each file after download") group = parser.add_mutually_exclusive_group() group.add_argument("-s", "--statistics", action="store_const", dest="statistics", const=True, default=False, help="Update the video statistics") group.add_argument( "-u", "--captions", action="store_const", dest="captions", const=True, default=False, help= "List videos where captions were added since archiving (forces -s)" ) group.add_argument( "-x", "--amendcaptions", action="store_const", dest="amendcaptions", const=True, default=False, help= "Download captions were they were added since archiving (forces -u and consequently -s)" ) parser.add_argument( "-r", "--replace", action="store_const", dest="replace", const="-r", default="", help="Replace an existing video (a video ID has to be provided)") group = parser.add_mutually_exclusive_group() group.add_argument("-8k", "--8K", action="store_const", dest="quality", const="8k", help="Limit download resolution to 8K") group.add_argument("-4k", "--4K", action="store_const", dest="quality", const="4k", help="Limit download resolution to 4K (default)") group.add_argument("-hd", "--HD", action="store_const", dest="quality", const="hd", help="Limit download resolution to full HD") parser.add_argument("-V", "--version", action="version", version='%(prog)s {}'.format(yta.__version__)) parser.add_argument("DIR", help="The directory to work in") parser.add_argument( "LANG", nargs='?', help="The video language (read from the database if not given)") parser.add_argument( "-f", "--file", action="store", dest="file", help="Read IDs to archive from a batch file with one ID per line") group.add_argument( "--filter", action="store", dest="filter", default=None, help= "Filter videos to download using Youtube-dl's match filter option") parser.add_argument( "VIDEO", nargs='?', help= "The Youtube video or playlist ID (read from the database if not given)" ) args = parser.parse_args(args) if args.all and args.file: parser.error("-a cannot be used in combination with batch file") if args.all and args.replace: parser.error("-a cannot be used in combination with replace") #Check if API key provided yta.getAPIKey(True) #Archive all subdirectories if args.all: archiveAll(args) return #Validate path path = os.path.normpath(os.path.abspath(args.DIR)) if not os.path.isdir(path): parser.error("An existing directory must be specified") #Check if database exists dbPath = os.path.join(path, "archive.db") if not os.path.isfile(dbPath): #No database found, ask to create one while True: q = input("New archive. Populate with channel info? [Y|n] ") if not q: q = 'y' a = q[0].lower() if a in ['y', 'n']: break if a == 'y': ytainfo.add(dbPath) else: ytainfo.createEmpty(dbPath) t1 = time.time() #Check if database needs upgrade yta.upgradeDatabase(dbPath) #Check if ID and language are specified if not args.LANG or (not args.VIDEO and not args.file): #Try reading playlist and language from database try: (args.LANG, args.VIDEO) = readInfoFromDB(dbPath) except (sqlite3.Error, TypeError): #Try reading playlist and language from files try: with open(os.path.join(path, "language"), 'r') as f: args.LANG = f.readline().strip() with open(os.path.join(path, "playlist"), 'r') as f: args.VIDEO = f.readline().strip() except (IndexError, OSError): parser.error( "LANG and VIDEO must be specified if no database exists.") #Update lastupdate field updateTimestamp = int(time.time()) db = yta.connectDB(dbPath) db.execute("UPDATE channel SET lastupdate = ? WHERE id = 1", (updateTimestamp, )) #Replace existing video if args.replace: try: youtubeID = db.execute( "SELECT youtubeID FROM videos WHERE youtubeID = ?;", (args.VIDEO, )).fetchone()[0] assert youtubeID except (sqlite3.Error, TypeError, AssertionError): print( "ERROR: Unable to replace video with ID \"{}\" as it is not in the archive database" .format(args.VIDEO)) return #Get format string if args.quality: q = args.quality else: q = db.execute( "SELECT maxresolution FROM channel WHERE id=1;").fetchone()[0] dlformat = yta.getFormatString(q) #Prepare download dlfilePath = os.path.join(path, "downloaded") dbPath = os.path.join(path, "archive.db") writeDownloadedFile(dbPath, dlfilePath, args.replace, args.VIDEO) dlpath = os.path.join(path, "ID%(id)s&%(title)s.%(ext)s") postHook = PostHook(args.LANG, db, args.check, args.replace) #Set options ytdlOpts = { "call_home": False, "quiet": False, "format": dlformat, "ignoreerrors": True, "download_archive": dlfilePath, "writesubtitles": True, "subtitleslangs": [args.LANG], "writedescription": True, "writethumbnail": True, "outtmpl": dlpath, "cachedir": False, "youtube_include_dash_manifest": True, "retries": 10, "fragment_retries": 25, "skip_unavailable_fragments": False, "continuedl": True, "extractor_args": { "youtube": { "player_client": ["android"] } }, "throttledratelimit": 100000, "allow_playlist_files": False, "post_hooks": [postHook.finished] } ytdlOpts["postprocessors"] = [{ "key": "FFmpegVideoConvertor", "preferedformat": "mp4" }, { "key": "FFmpegMetadata" }, { "key": "EmbedThumbnail", "already_have_thumbnail": False }] if args.filter: ytdlOpts["match_filter"] = matchFilterFunc(args.filter) #Check if archiving one video/playlist or using a batch file if args.file: with open(args.file, 'r', encoding="utf-8") as f: url = readBatchURLs(f) else: url = [args.VIDEO] #Prepare log logFile = os.path.join(path, "log") #Download with DoubleLogger(logFile): with yt_dlp.YoutubeDL(ytdlOpts) as ytdl: ytdl.download(url) #Print status print("Download complete, updating database...") #Update video number and totalsize try: db.execute( "UPDATE channel SET videos = (SELECT count(id) FROM videos), totalsize = (SELECT sum(filesize) FROM videos) WHERE id = 1;" ) except sqlite3.Error: pass #Update statistics if args.statistics or args.captions or args.amendcaptions: print("Updating video statistics...") try: ytameta.updateStatistics(db, updateTimestamp, args.captions, amendCaptions=args.amendcaptions) except yta.NoAPIKeyError: print( "ERROR: Unable to update video statistics as no API key is available" ) except RequestException as e: print( "ERROR: Unable to update video statistics due to connection error: \"{}\"" .format(e)) #Close database yta.closeDB(db) #Print time t2 = time.time() print("DONE! Duration: " + yta.intervalToStr(t2 - t1)) #Remove download archive file try: os.remove(dlfilePath) except OSError: pass
def fix(args, parsed=False): '''Update artist in database and metadata :param args: The command line arguments given by the user :type args: list ''' #Parse arguments if not parsed: parser = argparse.ArgumentParser( prog="ytafix", description="Fix wrong artist information") parser.add_argument( "-a", "--all", action="store_const", dest="all", const=True, default=False, help= "Run fixer for all subdirectories with archive databases. In this mode, the ARTIST will always be read from the database" ) parser.add_argument("DIR", help="The directory to work in") parser.add_argument( "ARTIST", nargs='?', help="The correct artist name (read from the database if not given)" ) args = parser.parse_args(args) #Run fixer for all subdirectories if args.all: fixAll(args) return #Validate path path = os.path.normpath(os.path.abspath(args.DIR)) dbPath = os.path.join(path, "archive.db") if not os.path.isdir(path) or not os.path.isfile(dbPath): parser.error("DIR must be a directory containing an archive database") #Check if database needs upgrade yta.upgradeDatabase(dbPath) #Connect to database db = yta.connectDB(dbPath) #Get correct artist if args.ARTIST: artist = args.ARTIST else: try: r = db.execute("SELECT name FROM channel LIMIT 1;") (artist, ) = r.fetchone() del r if not artist: raise sqlite3.Error except sqlite3.Error: parser.error( "No correct artist specified and unable to read it from the database" ) #Read filenames and checksums from database files = [] try: r = db.execute("SELECT creator,filename,youtubeID FROM videos;") for f in r.fetchall(): files.append({"artist": f[0], "name": f[1], "id": f[2]}) del r except sqlite3.Error as e: print(e) return found = False for f in files: #Compare artist if f["artist"] != artist: found = True filepath = os.path.join(path, f["name"]) try: #Change meta data artist, title = fixVideo(filepath, f["id"], artist) #Calculate checksums checksum = yta.calcSHA(filepath) #Update database db.execute( "UPDATE videos SET checksum = ?, creator = ? , title = ? WHERE youtubeID = ?", (checksum, artist, title, f["id"])) except requests.exceptions.HTTPError: print("ERROR: Unable to fix \"{}\"".format(f["name"])) continue print("File \"{}\" fixed".format(f["name"])) if not found: print("No files to fix") #Close database yta.closeDB(db)
def check(args, parsed=False): '''Perform integrity checks on files :param args: The command line arguments given by the user :type args: list ''' if not parsed: parser = argparse.ArgumentParser( prog="ytacheck", description="Verify integrity of archived files") parser.add_argument("DIR", help="The directory to work in") parser.add_argument( "-a", "--all", action="store_const", dest="all", const=True, default=False, help="Run checker for all subdirectories with archive databases") parser.add_argument( "-c", "--check", action="store_const", dest="check", const=True, default=False, help="Perform additional integrity check using ffmpeg") args = parser.parse_args(args) #Run checker for all subdirectories if args.all: checkAll(args) return [] #Validate path path = os.path.normpath(os.path.abspath(args.DIR)) dbPath = os.path.join(path, "archive.db") if not os.path.isdir(path) or not os.path.isfile(dbPath): parser.error("DIR must be a directory containing an archive database") #Read filenames and checksums from database files = [] errors = [] try: #Check if database needs upgrade yta.upgradeDatabase(dbPath) db = yta.connectDB(dbPath) r = db.execute("SELECT id,filename,checksum FROM videos;") for f in r.fetchall(): files.append({"checksum": f[2], "name": f[1], "id": f[0]}) except sqlite3.Error as e: sys.exit( "ERROR: Unable to read from database (Error: \"{}\")".format(e)) for f in files: filepath = os.path.join(path, f["name"]) #CHeck if file exits if not os.path.isfile(filepath): msg = "ERROR: File \"{}\" missing".format(f["name"]) print(msg) errors.append(msg) continue #Check movie file if args.check: cmd = ["ffmpeg", "-v", "error", "-i", filepath, "-f", "null", "-"] out, _ = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate() if out: msg = "ERROR: File \"{}\" corrupt!".format(f["name"]) print(msg) errors.append(msg) else: print("File \"{}\" check passed".format(f["name"])) #Calculate checksums checksum = yta.calcSHA(filepath) if not f["checksum"]: db.execute("UPDATE videos SET checksum = ? WHERE id = ?", (checksum, f["id"])) print("WARNING: File \"{}\" no checksum in database, adding {}". format(f["name"], checksum)) else: if f["checksum"] == checksum: print("File \"{}\" checksums match".format(f["name"])) else: msg = "ERROR: Checksum mismatch for file \"{}\" (New checksum: {})".format( f["name"], checksum) print(msg) errors.append(msg) #Close database yta.closeDB(db) #Print status if errors: print("\nDONE, {} CORRUPTED FILE(S)".format(len(errors))) else: print("\nDONE, NO CORRUPTED FILE") #Return errors return errors
def postprocess(args): '''Postprocess a video file or a directory of video files :param args: The command line arguments given by the user :type args: list ''' #Get files files = [] parser = argparse.ArgumentParser( prog="ytapost", description= "Perform the postprocessing steps on a downloaded video file") parser.add_argument("-c", "--check", action="store_const", dest="check", const=True, default=False, help="Check file integrity") parser.add_argument("-r", "--replace", action="store_const", dest="replace", const=True, default=False, help="Replace existing file") parser.add_argument("PATH", help="The file or the directory to work with") parser.add_argument("LANG", nargs='?', default="", help="The video language") args = parser.parse_args(args) path = os.path.normpath(os.path.abspath(args.PATH)) if os.path.isfile(path): dirPath = os.path.dirname(path) if path.lower().endswith((".m4v", ".mp4")): files.append(path) else: parser.error( "Unsupported file format, only .mp4 and .m4v are supported") elif os.path.isdir(path): dirPath = path allf = [ f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f)) ] for f in allf: if f.lower().endswith((".m4v", ".mp4")): files.append(os.path.join(path, f)) if not files: parser.error( "No supported files in directory, only .mp4 and .m4v are supported" ) #Connect to database try: dbFile = os.path.join(dirPath, "archive.db") dbCon = createOrConnectDB(dbFile) db = dbCon.cursor() except sqlite3.Error as e: print(e) return for f in files: processFile(f, args.LANG, db, args.check, args.replace) yta.closeDB(dbCon)
def add(dbPath): '''Add channel info to the archive database :param dbPath: The path of the archive database :type dbPath: string ''' #Create/connect database db = createOrConnectDB(dbPath) print("ADDING CHANNEL INFO") #Get channel name while True: q = input("Channel name: ") if q: break name = q.strip() #Get channel url while True: q = input("Channel url: ") if q: break url = q.strip() #Get playlist name while True: q = input("Video playlist: ") if q: break playlist = q.strip() #Get channel language while True: q = input("Channel language code: ") if q: break language = q.strip() #Get channel Description print( "Channel description (Press Ctrl-D [Ctrl-Z on WIN] directly to skip or after input to save):" ) desc = [] while True: try: line = input() desc.append(line) except EOFError: break if desc: desc = '\n'.join(desc) else: desc = None #Get join date q = input("Join date (YYYY-MM-DD) or enter to skip: ") if q: joined = q.strip() else: joined = None #Get location q = input("Location (or enter to skip): ") if q: location = q.strip() else: location = None #Get links print("Add links: Prettyname first, then the url. Enter to continue") links = "" i = 0 while True: i += 1 q1 = input("Prettyname no. {} (or enter to continue): ".format(i)) if not q1: break q2 = input("URL no. {}: ".format(i)) if not q2: print("ERROR: URL must be specified") continue links += q1.strip() + '\t' + q2.strip() + '\n' if not links: links = None #Get profile picture while True: q = input("Profile picture URL (or enter to skip): ") if not q: profile = None profileformat = None break try: [profile, profileformat] = yta.loadImage(q.strip()) break except requests.exceptions.HTTPError: print("ERROR: Invalid URL") continue #Get banner image while True: q = input("Banner image URL (or enter to skip): ") if not q: banner = None bannerformat = None break try: [banner, bannerformat] = yta.loadImage(q.strip()) break except requests.exceptions.HTTPError: print("ERROR: Invalid URL") continue insert = "INSERT INTO channel(name, url, playlist, language, description, location, joined, links, profile, profileformat, banner, bannerformat, videos, lastupdate, dbversion, maxresolution, totalsize) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)" db.execute(insert, (name, url, playlist, language, desc, location, joined, links, profile, profileformat, banner, bannerformat, 0, 0, yta.__dbversion__, "default", 0)) print("FINISHED ADDING CHANNEL INFO") yta.closeDB(db)