Example #1
0
def connectUpdateCreateStatisticsDB(directory):
    '''Connects to the statistics database used for updating the statistics of
    all channels, updates it if necessary or creates it if it does not exist

    :param path: The path of the database
    :type path: string

    :raises: :class:``sqlite3.Error: Unable to connect to database

    :returns: Connection to the database
    :rtype: sqlite3.Connection
    '''
    #Connect to database
    dbCon = yta.connectDB(os.path.join(directory, "statistics.db"))
    db = dbCon.cursor()
    #Get database version
    try:
        r = db.execute("SELECT dbversion FROM setup ORDER BY id DESC LIMIT 1;")
        version = r.fetchone()[0]
        del r
    except sqlite3.Error:
        #No version field: new database
        version = 0

    if version < __statisticsdbversion__:
        try:
            #Perform initial setup
            if version < 1:
                #Set encoding
                dbCon.execute("pragma encoding=UTF8")
                #Create tables
                cmd = """ CREATE TABLE setup (
                              id INTEGER PRIMARY KEY UNIQUE NOT NULL,
                              autoupdate BOOLEAN NOT NULL,
                              lastupdate INTEGER NOT NULL,
                              maxcount INTEGER NOT NULL,
                              dbversion INTEGER NOT NULL
                          ); """
                dbCon.execute(cmd)
                cmd = """ CREATE TABLE channels (
                              id INTEGER PRIMARY KEY UNIQUE NOT NULL,
                              name STRING UNIQUE NOT NULL,
                              lastupdate INTEGER NOT NULL,
                              complete BOOLEAN NOT NULL
                          ); """
                dbCon.execute(cmd)
                #Set db version
                version = 1
                db.execute(
                    "INSERT INTO setup(autoupdate,lastupdate,maxcount,dbversion) VALUES(?,?,?,?)",
                    (False, 0, 100000, version))
                dbCon.commit()
        except sqlite3.Error as e:
            print("ERROR: Unable to upgrade database (\"{}\")".format(e))
            dbCon.rollback()
            yta.closeDB(dbCon)
            sys.exit(1)

    #Return connection to database
    return dbCon
Example #2
0
def writeDownloadedFile(dbPath, filePath, replace, videoID):
    '''Write file containing Youtube IDs of all videos already archived

    :param dbPath: Path of the archive database
    :type dbPath: string
    :param filePath: Path where the file containing all existing IDs should be written to
    :type filePath: string
    :param replace: Whether to replace the existing video in the archive database
    :type replace: boolean
    :param videoID: The new video id
    :type videoID: string
    '''
    #Check if db exists
    if not os.path.isfile(dbPath):
        return
    try:
        with open(filePath, 'w+') as f:
            #Connect to database
            db = yta.connectDB(dbPath)
            #Read IDs of all videos already in archive
            r = db.execute("SELECT youtubeID FROM videos;")
            for item in r.fetchall():
                #Write IDs to file
                if not (replace and videoID == item[0]):
                    f.write("youtube {}\n".format(item[0]))
            yta.closeDB(db)
    except sqlite3.Error:
        return
Example #3
0
def createEmpty(dbPath):
    '''Create a database without adding information

    :param dbPath: The path of the archive database
    :type dbPath: string
    '''
    #Create/connect database
    db = createOrConnectDB(dbPath)
    insert = "INSERT INTO channel(name, url, playlist, language, videos, lastupdate, dbversion, maxresolution, totalsize) VALUES(?,?,?,?,?,?,?,?,?)"
    db.execute(insert, ('', '', '', '', 0, 0, yta.__dbversion__, "default", 0))

    yta.closeDB(db)
Example #4
0
def readInfoFromDB(dbPath):
    '''Read playlist and language from database

    :param dbPath: Path of the archive database
    :type dbPath: string

    :raises: :class:``sqlite3.Error: Unable to read from database

    :returns: List with language code at index 0 and playlist at index 1
    :rtype: list of string
    '''
    db = yta.connectDB(dbPath)
    r = db.execute(
        "SELECT language,playlist FROM channel ORDER BY id DESC LIMIT 1;")
    item = r.fetchone()
    yta.closeDB(db)
    return [item[0], item[1]]
Example #5
0
def _updateSubdirStatistics(db, path, name, captions, amendCaptions, maxcount,
                            lastupdate, complete, apiKey):
    '''Update the statistics for one subdir

    :param db: Connection to the statistics database
    :type db: sqlite3.Connection
    :param path: The path of the subdir
    :type path: string
    :param name: The channel/subdir name
    :type name: string
    :param captions: Whether to check if captions were added since archiving the video (Default: False)
    :type captions: boolean
    :param amendCaptions: Whether to download the captions that were added since the video was archived
    :type amendCaptions: boolean, optional
    :param maxcount: The max number of videos allowed to update
    :type maxcount: integer
    :param lastupdate: Timestamp of the last update
    :type lastupdate: integer
    :param complete: Whether the last update was complete
    :type complete: boolean
    :param apiKey: The API-Key for the Youtube-API
    :type apiKey: string

    :raises: :class:``requests.exceptions.RequestException: Unable to connect to API endpoint

    :returns: Number of update counts left
    :rtype: integer
    '''
    #Print status
    print("Updating \"{}\"".format(name))
    #Connect to channel database
    channelDB = yta.connectDB(os.path.join(path, "archive.db"))
    #Perform update
    updateTimestamp = int(time.time())
    maxcount, complete = updateStatistics(channelDB, lastupdate, captions,
                                          maxcount, apiKey, amendCaptions)
    #Close channel db
    yta.closeDB(channelDB)
    #Write new info to database
    db.execute(
        "UPDATE channels SET lastupdate = ?, complete = ? WHERE name = ?;",
        (updateTimestamp, complete, name))

    return maxcount
Example #6
0
def createNewTestDB(path):
    '''Create a test db using the two ytacommon methods'''
    #Connect
    dbCon = ytacommon.connectDB(path)
    #Create video table
    ytacommon.createVideoTable(dbCon)
    insert = "INSERT INTO videos(title,creator,date,timestamp,youtubeID,filename,checksum,language,width,height,resolution,statisticsupdated,filesize) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?)"
    dbCon.execute(
        insert,
        ("Test", "Test", "2020-01-01", 1577836800, "test", "test.mp4",
         "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08",
         "en", 1920, 1080, "Full HD", 1577836800, 1000000))
    #Create channel table
    ytacommon.createChannelTable(dbCon)
    insert = "INSERT INTO channel(name, url, playlist, language, videos, lastupdate, dbversion, maxresolution, totalsize) VALUES(?,?,?,?,?,?,?,?,?)"
    dbCon.execute(
        insert, ('', '', '', '', 0, 0, ytacommon.__dbversion__, "default", 0))
    #Close
    ytacommon.closeDB(dbCon)
Example #7
0
def upgradeDB(request):
    '''Prepare and upgrade database with versions given via the "internal_dbversion"
    marker in the for of (oldversion, newversion), verify upgraded database version
    number, yield connection to database, and close and delete it afterwards
    '''
    #Get database version
    oldVersion = request.node.get_closest_marker("internal_dbversion").args[0]
    newVersion = request.node.get_closest_marker("internal_dbversion").args[1]
    #Prepare test database
    dbPath = prepareAndUpgradeDatabase(oldVersion)
    #Connect to database
    _dbCon = ytacommon.connectDB(dbPath)
    #Verify version
    r = _dbCon.execute(
        "SELECT dbversion FROM channel ORDER BY id DESC LIMIT 1;")
    assert r.fetchone()[0] >= newVersion
    yield _dbCon
    ytacommon.closeDB(_dbCon)
    utils.deleteIfExists(dbPath)
Example #8
0
def addMetadata(args):
    '''Add additional metadata to archive database

    :param args: The command line arguments given by the user
    :type args: list
    '''
    #Get database path
    parser = argparse.ArgumentParser(
        prog="ytameta",
        description="Add additional metadata to existing archive databases")
    parser.add_argument(
        "DIR", help="The directory containing the archive database to work on")
    args = parser.parse_args(args)

    path = os.path.normpath(os.path.abspath(args.DIR))
    dbPath = os.path.join(path, "archive.db")
    if not os.path.isdir(path) or not os.path.isfile(dbPath):
        parser.error("DIR must be a directory containing an archive database")

    #Check if database needs upgrade
    yta.upgradeDatabase(dbPath)

    #Connect to database
    dbCon = yta.connectDB(dbPath)
    db = dbCon.cursor()
    #Save thumbnails to database
    r = db.execute("SELECT youtubeID FROM videos;")
    for item in r.fetchall():
        #Get video filepath
        youtubeID = item[0]
        try:
            [timestamp, duration, tags, _, _, _, _, _] = getMetadata(youtubeID)
            db.execute(
                "UPDATE videos SET timestamp = ?, duration = ?, tags = ? WHERE youtubeID = ?",
                (timestamp, duration, tags, youtubeID))
        except yta.NoAPIKeyError:
            break
        except requests.exceptions.RequestException:
            print("ERROR: Unable to load metadata for {}".format(youtubeID))
            continue
    #Close database
    yta.closeDB(dbCon)
Example #9
0
def updateAllStatistics(path,
                        automatic=False,
                        captions=False,
                        amendCaptions=False):
    '''Update the video statistics from all subdirs

    :param path: The path of the parent directory
    :type path: string
    :param automatic: Whether the update was started automatically or from user input (Default: False)
    :type automatic: boolean
    :param captions: Whether to check if captions were added since archiving the video (Default: False)
    :type captions: boolean, optional
    :param amendCaptions: Whether to download the captions that were added since the video was archived
    :type amendCaptions: boolean, optional

    :raises: :class:``ytacommon.NoAPIKeyError: Unable to read API key from file
    :raises: :class:``requests.exceptions.RequestException: Unable to connect to API endpoint
    '''
    updateStarted = int(time.time())
    #Print message
    if automatic:
        print("\nUPDATING VIDEO STATISTICS DUE TO DATABASE OPTION")
    else:
        print("\nUPDATING VIDEO STATISTICS")
    #Get subdirs in path
    subdirs = [
        os.path.join(path, name) for name in os.listdir(path)
        if os.path.isdir(os.path.join(path, name))
    ]
    subdirs = [
        sub for sub in subdirs
        if os.path.isfile(os.path.join(sub, "archive.db"))
    ]
    if not subdirs:
        print(
            "ERROR: No subdirs with archive databases at \'{}\'".format(path))
        return
    #Connect to database
    dbCon = connectUpdateCreateStatisticsDB(path)
    db = dbCon.cursor()
    #Check if quota was reset since last update
    lastupdate = db.execute(
        "SELECT lastupdate FROM setup WHERE id = 1 LIMIT 1;").fetchone()[0]
    if lastupdate > getResetTimestamp():
        print(
            "WARNING: Statistics update skipped because no quota reset since the last update"
        )
        yta.closeDB(dbCon)
        return
    print('')
    #Get channels
    r = db.execute("SELECT name,lastupdate,complete FROM channels;")
    channels = {}
    for item in r.fetchall():
        channels[item[0]] = [item[1], item[2]]
    #Get maxcount
    maxcount = db.execute(
        "SELECT maxcount FROM setup WHERE id = 1 LIMIT 1;").fetchone()[0]
    #Get API key
    apiKey = yta.getAPIKey()
    if not apiKey:
        raise yta.NoAPIKeyError
    #Loop through subdirs, skip completed ones
    skippedSubdirs = []
    for subdir in subdirs:
        #Check if maxcount was reached
        if maxcount == 0:
            break
        #Get last update info
        name = os.path.basename(os.path.normpath(subdir))
        try:
            lastupdate, complete = channels[name]
        except KeyError:
            lastupdate = sys.maxsize
            complete = False
            db.execute(
                "INSERT INTO channels(name,lastupdate,complete) VALUES(?,?,?);",
                (name, lastupdate, complete))
        #If completed, skip for now
        if complete:
            skippedSubdirs.append(subdir)
            continue
        #Update statistics
        try:
            maxcount = _updateSubdirStatistics(db, subdir, name, captions,
                                               amendCaptions, maxcount,
                                               lastupdate, complete, apiKey)
        except requests.exceptions.RequestException as e:
            print(
                "ERROR: Network error while trying to update the statistics (\"{}\")"
                .format(e))
            return

    #Loop through skipped subdirs
    i = 0
    count = len(skippedSubdirs)
    random.shuffle(skippedSubdirs)
    for subdir in skippedSubdirs:
        #Check if maxcount was reached
        if maxcount == 0:
            break
        i += 1
        #Get last update info
        name = os.path.basename(os.path.normpath(subdir))
        lastupdate, complete = channels[name]
        #Update statistics
        print("({}/{}) ".format(i, count), end='')
        try:
            maxcount = _updateSubdirStatistics(db, subdir, name, captions,
                                               amendCaptions, maxcount,
                                               lastupdate, complete, apiKey)
        except requests.exceptions.RequestException as e:
            print(
                "ERROR: Network error while trying to update the statistics (\"{}\")"
                .format(e))
            return

    #Write lastupdate to statistics database
    db.execute("UPDATE setup SET lastupdate = ? WHERE id = 1",
               (updateStarted, ))
    #Close database
    yta.closeDB(dbCon)
Example #10
0
def findMissing(args):
    '''Find discrepancies between files and database

    :param argv: The command line arguments given by the user
    :type argv: list
    '''
    parser = argparse.ArgumentParser(prog="ytamissing", description="Find discrepancies between files and archive database in directory")
    parser.add_argument("DIR", help="The directory to work on")
    args = parser.parse_args(args)
    path = os.path.normpath(os.path.abspath(args.DIR))
    if not os.path.isdir(path):
        parser.error("DIR must be a directory")

    #Read IDs from file
    fFiles = []
    fIDs = []
    cmd = ["exiftool", "-api", "largefilesupport=1", "-m", "-Comment", path]
    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    for line in p.stdout.readlines():
        line = line.decode("utf-8").strip()
        if line.startswith("Comment"):
            vid = line.split(':', 2)[2].strip()
            fIDs.append(vid)
            fFiles[-1]["id"] = vid
        if line.startswith("=="):
            fFiles.append({"name" : os.path.basename(line.split(' ', 1)[1].strip())})
    if not fIDs:
        print("No videos found in directory")
        return
    #Read IDs from database
    dbPath = os.path.join(path, "archive.db")
    aFiles = []
    try:
        #Check if database needs upgrade
        yta.upgradeDatabase(dbPath)

        db = yta.connectDB(dbPath)
        r = db.execute("SELECT youtubeID,title FROM videos;")
        for item in r.fetchall():
            #Write ids to list
            aFiles.append({"name" : item[1], "id" : item[0]})
    except sqlite3.Error as e:
        print(e)
        return
    if not aFiles:
        print("No videos found in archive db")
        return
    #Compare IDs
    found = False
    for aFile in aFiles:
        try:
            fIDs.remove(aFile["id"])
        except ValueError:
            found = True
            print("Video file \"{}\" missing (ID: {})".format(aFile["name"], aFile["id"]))
    for fID in fIDs:
        found = True
        fFile = [f for f in fFiles if f["id"] == fID][0]
        print("Video \"{}\" not in database (ID: {})".format(fFile["name"], fFile["id"]))
    if not found:
        print("No discrepancies between files and database")
    #Close db
    yta.closeDB(db)
Example #11
0
def archiveAll(args):
    '''Call archive script for all subdirs

    :param args: The command line arguments given by the user
    :type args: list
    '''
    #Set all to false for subsequent calls
    args.all = False

    #Set statistics to false for subsequent calls
    updateStatistics = args.statistics
    updateCaptions = args.captions
    amendCaptions = args.amendcaptions
    args.statistics = False
    args.captions = False
    args.amendcaptions = False

    t1 = time.time()

    #Get path
    path = os.path.normpath(os.path.abspath(args.DIR))

    #Check for progress file in directory
    progressPath = os.path.join(path, "progress.json")
    try:
        with open(progressPath, 'r') as f:
            progress = json.load(f)
        if (t1 - progress["abortTime"]) > 3600:
            progress = {"elapsed": 0}
            os.remove(progressPath)
    except OSError:
        progress = {"elapsed": 0}
    logFile = os.path.join(path, "log")

    if "subdirs" in progress:
        subdirs = progress["subdirs"]
        counter = progress["counter"] - 1
        channels = progress["channels"]
        #Print message
        if channels > 1:
            print("CONTINUING ARCHIVING ALL {} CHANNELS IN \'{}\'\n".format(
                channels, path))
    else:
        #Get subdirs in path
        subdirs = [
            os.path.join(path, name)
            for name in sorted(os.listdir(path), key=str.casefold)
            if os.path.isdir(os.path.join(path, name))
        ]
        subdirs = [
            sub for sub in subdirs
            if os.path.isfile(os.path.join(sub, "archive.db"))
        ]
        random.shuffle(subdirs)
        if not subdirs:
            print("ERROR: No subdirs with archive databases at \'{}\'".format(
                path))
            return
        #Prepare
        channels = len(subdirs)
        progress["channels"] = channels
        counter = 0
        with open(logFile, 'w') as f:
            f.truncate()
        #Print message
        if channels > 1:
            print("ARCHIVING ALL {} CHANNELS IN \'{}\'\n".format(
                channels, path))
    #Initiate error log
    errorLog = ""
    #Loop through all subdirs
    try:
        t2 = time.time()
        leftover = subdirs.copy()
        for subdir in subdirs:
            counter += 1
            name = os.path.basename(os.path.normpath(subdir))
            args.DIR = subdir
            args.LANG = None
            args.VIDEO = None
            print("\nARCHIVING \'{}\' ({}/{})".format(name, counter, channels))
            archive(args, True)
            #Read errors from log
            error = ""
            with open(os.path.join(subdir, "log"), 'r') as f:
                lines = f.readlines()
                for i in range(len(lines)):
                    if lines[i].startswith("ERROR"):
                        error += "\n" + lines[i - 1] + lines[i]
            if error:
                errorLog += '\n\n' + name + '\n' + error
            leftover.remove(subdir)
    except KeyboardInterrupt:
        #Aborting, write progress file and log
        t = time.time()
        progress["counter"] = counter
        progress["subdirs"] = leftover
        progress["elapsed"] += t - t2
        progress["abortTime"] = t
        with open(progressPath, 'w') as f:
            json.dump(progress, f)
        if errorLog:
            with open(logFile, 'a') as f:
                f.writelines(errorLog)
        #Rethrow exception
        raise
    #Progress file no longer relevant, removing it
    try:
        os.remove(progressPath)
    except OSError:
        pass
    #Write error log
    if not errorLog:
        errorLog = "No errors\n"
    with open(logFile, 'a') as f:
        f.writelines(errorLog)

    t3 = time.time()

    #Check if statistics is set to autoupdate
    autoUpdateStatistics = False
    if not updateStatistics or updateCaptions:
        try:
            statsDB = yta.connectDB(os.path.join(path, "statistics.db"))
            r = statsDB.execute(
                "SELECT autoupdate FROM setup ORDER BY id DESC LIMIT 1;")
            autoUpdateStatistics = bool(r.fetchone()[0])
            del r
        except sqlite3.Error:
            pass
        finally:
            try:
                yta.closeDB(statsDB)
            except sqlite3.Error:
                pass

    #Update statistics
    if updateStatistics or autoUpdateStatistics or updateCaptions or amendCaptions:
        statTime = True
        try:
            ytameta.updateAllStatistics(path, autoUpdateStatistics,
                                        updateCaptions, amendCaptions)
        except yta.NoAPIKeyError:
            print(
                "ERROR: Unable to update video statistics as no API key is available"
            )
        except RequestException as e:
            print(
                "ERROR: Unable to update video statistics due to connection error: \"{}\""
                .format(e))
    else:
        statTime = False

    #Print time
    t4 = time.time()
    print("\nTotal runtime: {}\nArchive runtime: {}".format(
        yta.intervalToStr(progress["elapsed"] + (t4 - t1)),
        yta.intervalToStr(progress["elapsed"] + (t3 - t2))))
    if statTime:
        print("Statistic runtime: " + yta.intervalToStr(t4 - t3))

    print("\nDONE!")
Example #12
0
def archive(args, parsed=False):
    '''Archive youtube videos or playlists

    :param args: The command line arguments given by the user
    :type args: list
    '''

    #Parse arguments
    if not parsed:
        parser = argparse.ArgumentParser(
            prog="ytarchiver",
            description="Download and archive Youtube videos or playlists")
        parser.add_argument(
            "-a",
            "--all",
            action="store_const",
            dest="all",
            const=True,
            default=False,
            help=
            "Run archiver for all subdirectories with archive databases. In this mode, LANG and VIDEO will always be read from the databases"
        )
        parser.add_argument("-c",
                            "--check",
                            action="store_const",
                            dest="check",
                            const="-c",
                            default="",
                            help="Check each file after download")
        group = parser.add_mutually_exclusive_group()
        group.add_argument("-s",
                           "--statistics",
                           action="store_const",
                           dest="statistics",
                           const=True,
                           default=False,
                           help="Update the video statistics")
        group.add_argument(
            "-u",
            "--captions",
            action="store_const",
            dest="captions",
            const=True,
            default=False,
            help=
            "List videos where captions were added since archiving (forces -s)"
        )
        group.add_argument(
            "-x",
            "--amendcaptions",
            action="store_const",
            dest="amendcaptions",
            const=True,
            default=False,
            help=
            "Download captions were they were added since archiving (forces -u and consequently -s)"
        )
        parser.add_argument(
            "-r",
            "--replace",
            action="store_const",
            dest="replace",
            const="-r",
            default="",
            help="Replace an existing video (a video ID has to be provided)")
        group = parser.add_mutually_exclusive_group()
        group.add_argument("-8k",
                           "--8K",
                           action="store_const",
                           dest="quality",
                           const="8k",
                           help="Limit download resolution to 8K")
        group.add_argument("-4k",
                           "--4K",
                           action="store_const",
                           dest="quality",
                           const="4k",
                           help="Limit download resolution to 4K (default)")
        group.add_argument("-hd",
                           "--HD",
                           action="store_const",
                           dest="quality",
                           const="hd",
                           help="Limit download resolution to full HD")
        parser.add_argument("-V",
                            "--version",
                            action="version",
                            version='%(prog)s {}'.format(yta.__version__))
        parser.add_argument("DIR", help="The directory to work in")
        parser.add_argument(
            "LANG",
            nargs='?',
            help="The video language (read from the database if not given)")
        parser.add_argument(
            "-f",
            "--file",
            action="store",
            dest="file",
            help="Read IDs to archive from a batch file with one ID per line")
        group.add_argument(
            "--filter",
            action="store",
            dest="filter",
            default=None,
            help=
            "Filter videos to download using Youtube-dl's match filter option")
        parser.add_argument(
            "VIDEO",
            nargs='?',
            help=
            "The Youtube video or playlist ID (read from the database if not given)"
        )
        args = parser.parse_args(args)

        if args.all and args.file:
            parser.error("-a cannot be used in combination with batch file")
        if args.all and args.replace:
            parser.error("-a cannot be used in combination with replace")

    #Check if API key provided
    yta.getAPIKey(True)

    #Archive all subdirectories
    if args.all:
        archiveAll(args)
        return

    #Validate path
    path = os.path.normpath(os.path.abspath(args.DIR))
    if not os.path.isdir(path):
        parser.error("An existing directory must be specified")

    #Check if database exists
    dbPath = os.path.join(path, "archive.db")
    if not os.path.isfile(dbPath):
        #No database found, ask to create one
        while True:
            q = input("New archive. Populate with channel info? [Y|n] ")
            if not q:
                q = 'y'
            a = q[0].lower()
            if a in ['y', 'n']:
                break
        if a == 'y':
            ytainfo.add(dbPath)
        else:
            ytainfo.createEmpty(dbPath)

    t1 = time.time()

    #Check if database needs upgrade
    yta.upgradeDatabase(dbPath)

    #Check if ID and language are specified
    if not args.LANG or (not args.VIDEO and not args.file):
        #Try reading playlist and language from database
        try:
            (args.LANG, args.VIDEO) = readInfoFromDB(dbPath)
        except (sqlite3.Error, TypeError):
            #Try reading playlist and language from files
            try:
                with open(os.path.join(path, "language"), 'r') as f:
                    args.LANG = f.readline().strip()
                with open(os.path.join(path, "playlist"), 'r') as f:
                    args.VIDEO = f.readline().strip()
            except (IndexError, OSError):
                parser.error(
                    "LANG and VIDEO must be specified if no database exists.")

    #Update lastupdate field
    updateTimestamp = int(time.time())
    db = yta.connectDB(dbPath)
    db.execute("UPDATE channel SET lastupdate = ? WHERE id = 1",
               (updateTimestamp, ))

    #Replace existing video
    if args.replace:
        try:
            youtubeID = db.execute(
                "SELECT youtubeID FROM videos WHERE youtubeID = ?;",
                (args.VIDEO, )).fetchone()[0]
            assert youtubeID
        except (sqlite3.Error, TypeError, AssertionError):
            print(
                "ERROR: Unable to replace video with ID \"{}\" as it is not in the archive database"
                .format(args.VIDEO))
            return

    #Get format string
    if args.quality:
        q = args.quality
    else:
        q = db.execute(
            "SELECT maxresolution FROM channel WHERE id=1;").fetchone()[0]
    dlformat = yta.getFormatString(q)

    #Prepare download
    dlfilePath = os.path.join(path, "downloaded")
    dbPath = os.path.join(path, "archive.db")
    writeDownloadedFile(dbPath, dlfilePath, args.replace, args.VIDEO)
    dlpath = os.path.join(path, "ID%(id)s&%(title)s.%(ext)s")
    postHook = PostHook(args.LANG, db, args.check, args.replace)

    #Set options
    ytdlOpts = {
        "call_home": False,
        "quiet": False,
        "format": dlformat,
        "ignoreerrors": True,
        "download_archive": dlfilePath,
        "writesubtitles": True,
        "subtitleslangs": [args.LANG],
        "writedescription": True,
        "writethumbnail": True,
        "outtmpl": dlpath,
        "cachedir": False,
        "youtube_include_dash_manifest": True,
        "retries": 10,
        "fragment_retries": 25,
        "skip_unavailable_fragments": False,
        "continuedl": True,
        "extractor_args": {
            "youtube": {
                "player_client": ["android"]
            }
        },
        "throttledratelimit": 100000,
        "allow_playlist_files": False,
        "post_hooks": [postHook.finished]
    }
    ytdlOpts["postprocessors"] = [{
        "key": "FFmpegVideoConvertor",
        "preferedformat": "mp4"
    }, {
        "key": "FFmpegMetadata"
    }, {
        "key": "EmbedThumbnail",
        "already_have_thumbnail": False
    }]
    if args.filter:
        ytdlOpts["match_filter"] = matchFilterFunc(args.filter)

    #Check if archiving one video/playlist or using a batch file
    if args.file:
        with open(args.file, 'r', encoding="utf-8") as f:
            url = readBatchURLs(f)
    else:
        url = [args.VIDEO]

    #Prepare log
    logFile = os.path.join(path, "log")
    #Download
    with DoubleLogger(logFile):
        with yt_dlp.YoutubeDL(ytdlOpts) as ytdl:
            ytdl.download(url)

    #Print status
    print("Download complete, updating database...")

    #Update video number and totalsize
    try:
        db.execute(
            "UPDATE channel SET videos = (SELECT count(id) FROM videos), totalsize = (SELECT sum(filesize) FROM videos) WHERE id = 1;"
        )
    except sqlite3.Error:
        pass

    #Update statistics
    if args.statistics or args.captions or args.amendcaptions:
        print("Updating video statistics...")
        try:
            ytameta.updateStatistics(db,
                                     updateTimestamp,
                                     args.captions,
                                     amendCaptions=args.amendcaptions)
        except yta.NoAPIKeyError:
            print(
                "ERROR: Unable to update video statistics as no API key is available"
            )
        except RequestException as e:
            print(
                "ERROR: Unable to update video statistics due to connection error: \"{}\""
                .format(e))

    #Close database
    yta.closeDB(db)

    #Print time
    t2 = time.time()
    print("DONE! Duration: " + yta.intervalToStr(t2 - t1))

    #Remove download archive file
    try:
        os.remove(dlfilePath)
    except OSError:
        pass
Example #13
0
def fix(args, parsed=False):
    '''Update artist in database and metadata

    :param args: The command line arguments given by the user
    :type args: list
    '''
    #Parse arguments
    if not parsed:
        parser = argparse.ArgumentParser(
            prog="ytafix", description="Fix wrong artist information")
        parser.add_argument(
            "-a",
            "--all",
            action="store_const",
            dest="all",
            const=True,
            default=False,
            help=
            "Run fixer for all subdirectories with archive databases. In this mode, the ARTIST will always be read from the database"
        )
        parser.add_argument("DIR", help="The directory to work in")
        parser.add_argument(
            "ARTIST",
            nargs='?',
            help="The correct artist name (read from the database if not given)"
        )
        args = parser.parse_args(args)

    #Run fixer for all subdirectories
    if args.all:
        fixAll(args)
        return

    #Validate path
    path = os.path.normpath(os.path.abspath(args.DIR))
    dbPath = os.path.join(path, "archive.db")
    if not os.path.isdir(path) or not os.path.isfile(dbPath):
        parser.error("DIR must be a directory containing an archive database")

    #Check if database needs upgrade
    yta.upgradeDatabase(dbPath)

    #Connect to database
    db = yta.connectDB(dbPath)

    #Get correct artist
    if args.ARTIST:
        artist = args.ARTIST
    else:
        try:
            r = db.execute("SELECT name FROM channel LIMIT 1;")
            (artist, ) = r.fetchone()
            del r
            if not artist:
                raise sqlite3.Error
        except sqlite3.Error:
            parser.error(
                "No correct artist specified and unable to read it from the database"
            )

    #Read filenames and checksums from database
    files = []
    try:
        r = db.execute("SELECT creator,filename,youtubeID FROM videos;")
        for f in r.fetchall():
            files.append({"artist": f[0], "name": f[1], "id": f[2]})
        del r
    except sqlite3.Error as e:
        print(e)
        return

    found = False
    for f in files:
        #Compare artist
        if f["artist"] != artist:
            found = True
            filepath = os.path.join(path, f["name"])
            try:
                #Change meta data
                artist, title = fixVideo(filepath, f["id"], artist)
                #Calculate checksums
                checksum = yta.calcSHA(filepath)
                #Update database
                db.execute(
                    "UPDATE videos SET checksum = ?, creator = ? , title = ? WHERE youtubeID = ?",
                    (checksum, artist, title, f["id"]))
            except requests.exceptions.HTTPError:
                print("ERROR: Unable to fix \"{}\"".format(f["name"]))
                continue
            print("File \"{}\" fixed".format(f["name"]))
    if not found:
        print("No files to fix")
    #Close database
    yta.closeDB(db)
Example #14
0
def check(args, parsed=False):
    '''Perform integrity checks on files

    :param args: The command line arguments given by the user
    :type args: list
    '''
    if not parsed:
        parser = argparse.ArgumentParser(
            prog="ytacheck", description="Verify integrity of archived files")
        parser.add_argument("DIR", help="The directory to work in")
        parser.add_argument(
            "-a",
            "--all",
            action="store_const",
            dest="all",
            const=True,
            default=False,
            help="Run checker for all subdirectories with archive databases")
        parser.add_argument(
            "-c",
            "--check",
            action="store_const",
            dest="check",
            const=True,
            default=False,
            help="Perform additional integrity check using ffmpeg")
        args = parser.parse_args(args)

    #Run checker for all subdirectories
    if args.all:
        checkAll(args)
        return []

    #Validate path
    path = os.path.normpath(os.path.abspath(args.DIR))
    dbPath = os.path.join(path, "archive.db")
    if not os.path.isdir(path) or not os.path.isfile(dbPath):
        parser.error("DIR must be a directory containing an archive database")

    #Read filenames and checksums from database
    files = []
    errors = []
    try:
        #Check if database needs upgrade
        yta.upgradeDatabase(dbPath)

        db = yta.connectDB(dbPath)
        r = db.execute("SELECT id,filename,checksum FROM videos;")
        for f in r.fetchall():
            files.append({"checksum": f[2], "name": f[1], "id": f[0]})
    except sqlite3.Error as e:
        sys.exit(
            "ERROR: Unable to read from database (Error: \"{}\")".format(e))

    for f in files:
        filepath = os.path.join(path, f["name"])
        #CHeck if file exits
        if not os.path.isfile(filepath):
            msg = "ERROR: File \"{}\" missing".format(f["name"])
            print(msg)
            errors.append(msg)
            continue
        #Check movie file
        if args.check:
            cmd = ["ffmpeg", "-v", "error", "-i", filepath, "-f", "null", "-"]
            out, _ = subprocess.Popen(cmd,
                                      stdout=subprocess.PIPE,
                                      stderr=subprocess.STDOUT).communicate()
            if out:
                msg = "ERROR: File \"{}\" corrupt!".format(f["name"])
                print(msg)
                errors.append(msg)
            else:
                print("File \"{}\" check passed".format(f["name"]))
        #Calculate checksums
        checksum = yta.calcSHA(filepath)

        if not f["checksum"]:
            db.execute("UPDATE videos SET checksum = ? WHERE id = ?",
                       (checksum, f["id"]))
            print("WARNING: File \"{}\" no checksum in database, adding {}".
                  format(f["name"], checksum))
        else:
            if f["checksum"] == checksum:
                print("File \"{}\" checksums match".format(f["name"]))
            else:
                msg = "ERROR: Checksum mismatch for file \"{}\" (New checksum: {})".format(
                    f["name"], checksum)
                print(msg)
                errors.append(msg)
    #Close database
    yta.closeDB(db)

    #Print status
    if errors:
        print("\nDONE, {} CORRUPTED FILE(S)".format(len(errors)))
    else:
        print("\nDONE, NO CORRUPTED FILE")
    #Return errors
    return errors
Example #15
0
def postprocess(args):
    '''Postprocess a video file or a directory of video files

    :param args: The command line arguments given by the user
    :type args: list
    '''
    #Get files
    files = []
    parser = argparse.ArgumentParser(
        prog="ytapost",
        description=
        "Perform the postprocessing steps on a downloaded video file")
    parser.add_argument("-c",
                        "--check",
                        action="store_const",
                        dest="check",
                        const=True,
                        default=False,
                        help="Check file integrity")
    parser.add_argument("-r",
                        "--replace",
                        action="store_const",
                        dest="replace",
                        const=True,
                        default=False,
                        help="Replace existing file")
    parser.add_argument("PATH", help="The file or the directory to work with")
    parser.add_argument("LANG",
                        nargs='?',
                        default="",
                        help="The video language")
    args = parser.parse_args(args)

    path = os.path.normpath(os.path.abspath(args.PATH))
    if os.path.isfile(path):
        dirPath = os.path.dirname(path)
        if path.lower().endswith((".m4v", ".mp4")):
            files.append(path)
        else:
            parser.error(
                "Unsupported file format, only .mp4 and .m4v are supported")
    elif os.path.isdir(path):
        dirPath = path
        allf = [
            f for f in os.listdir(path)
            if os.path.isfile(os.path.join(path, f))
        ]
        for f in allf:
            if f.lower().endswith((".m4v", ".mp4")):
                files.append(os.path.join(path, f))
        if not files:
            parser.error(
                "No supported files in directory, only .mp4 and .m4v are supported"
            )

    #Connect to database
    try:
        dbFile = os.path.join(dirPath, "archive.db")
        dbCon = createOrConnectDB(dbFile)
        db = dbCon.cursor()
    except sqlite3.Error as e:
        print(e)
        return

    for f in files:
        processFile(f, args.LANG, db, args.check, args.replace)

    yta.closeDB(dbCon)
Example #16
0
def add(dbPath):
    '''Add channel info to the archive database

    :param dbPath: The path of the archive database
    :type dbPath: string
    '''
    #Create/connect database
    db = createOrConnectDB(dbPath)
    print("ADDING CHANNEL INFO")
    #Get channel name
    while True:
        q = input("Channel name: ")
        if q:
            break
    name = q.strip()
    #Get channel url
    while True:
        q = input("Channel url: ")
        if q:
            break
    url = q.strip()
    #Get playlist name
    while True:
        q = input("Video playlist: ")
        if q:
            break
    playlist = q.strip()
    #Get channel language
    while True:
        q = input("Channel language code: ")
        if q:
            break
    language = q.strip()
    #Get channel Description
    print(
        "Channel description (Press Ctrl-D [Ctrl-Z on WIN] directly to skip or after input to save):"
    )
    desc = []
    while True:
        try:
            line = input()
            desc.append(line)
        except EOFError:
            break
    if desc:
        desc = '\n'.join(desc)
    else:
        desc = None
    #Get join date
    q = input("Join date (YYYY-MM-DD) or enter to skip: ")
    if q:
        joined = q.strip()
    else:
        joined = None
    #Get location
    q = input("Location (or enter to skip): ")
    if q:
        location = q.strip()
    else:
        location = None
    #Get links
    print("Add links: Prettyname first, then the url. Enter to continue")
    links = ""
    i = 0
    while True:
        i += 1
        q1 = input("Prettyname no. {} (or enter to continue): ".format(i))
        if not q1:
            break
        q2 = input("URL no. {}: ".format(i))
        if not q2:
            print("ERROR: URL must be specified")
            continue
        links += q1.strip() + '\t' + q2.strip() + '\n'
    if not links:
        links = None
    #Get profile picture
    while True:
        q = input("Profile picture URL (or enter to skip): ")
        if not q:
            profile = None
            profileformat = None
            break
        try:
            [profile, profileformat] = yta.loadImage(q.strip())
            break
        except requests.exceptions.HTTPError:
            print("ERROR: Invalid URL")
            continue
    #Get banner image
    while True:
        q = input("Banner image URL (or enter to skip): ")
        if not q:
            banner = None
            bannerformat = None
            break
        try:
            [banner, bannerformat] = yta.loadImage(q.strip())
            break
        except requests.exceptions.HTTPError:
            print("ERROR: Invalid URL")
            continue

    insert = "INSERT INTO channel(name, url, playlist, language, description, location, joined, links, profile, profileformat, banner, bannerformat, videos, lastupdate, dbversion, maxresolution, totalsize) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"
    db.execute(insert, (name, url, playlist, language, desc, location, joined,
                        links, profile, profileformat, banner, bannerformat, 0,
                        0, yta.__dbversion__, "default", 0))
    print("FINISHED ADDING CHANNEL INFO")

    yta.closeDB(db)