Esempio n. 1
0
def upload_listens(force):
    """ Invoke script to upload listens to HDFS.
    """
    from listenbrainz_spark.ftp.download import ListenbrainzDataDownloader
    from listenbrainz_spark.hdfs.upload import ListenbrainzDataUploader
    with app.app_context():
        downloader_obj = ListenbrainzDataDownloader()
        src = downloader_obj.download_listens(path.FTP_FILES_PATH)
        uploader_obj = ListenbrainzDataUploader()
        uploader_obj.upload_listens(src, force=force)
def upload_listens(force, incremental, id):
    """ Invoke script to upload listens to HDFS.
    """
    from listenbrainz_spark.ftp.download import ListenbrainzDataDownloader
    from listenbrainz_spark.hdfs.upload import ListenbrainzDataUploader
    with app.app_context():
        downloader_obj = ListenbrainzDataDownloader()
        dump_type = 'incremental' if incremental else 'full'
        src, _ = downloader_obj.download_listens(directory=path.FTP_FILES_PATH, listens_dump_id=id, dump_type=dump_type)
        uploader_obj = ListenbrainzDataUploader()
        uploader_obj.upload_listens(src, force=force)
def import_full_dump_to_hdfs(dump_id: int = None) -> str:
    """ Import the full dump with the given dump_id if specified otherwise the
     latest full dump.

    Notes:
        Deletes all the existing listens and uploads listens from new dump.
    Args:
        dump_id: id of the full dump to be imported
    Returns:
        the name of the imported dump
    """
    with tempfile.TemporaryDirectory() as temp_dir:
        downloader = ListenbrainzDataDownloader()
        src, dump_name, dump_id = downloader.download_listens(
            directory=temp_dir,
            dump_type=DumpType.FULL,
            listens_dump_id=dump_id)
        downloader.connection.close()
        ListenbrainzDataUploader().upload_new_listens_full_dump(src)
    utils.insert_dump_data(dump_id, DumpType.FULL, datetime.utcnow())
    return dump_name