def upload_listens(force): """ Invoke script to upload listens to HDFS. """ from listenbrainz_spark.ftp.download import ListenbrainzDataDownloader from listenbrainz_spark.hdfs.upload import ListenbrainzDataUploader with app.app_context(): downloader_obj = ListenbrainzDataDownloader() src = downloader_obj.download_listens(path.FTP_FILES_PATH) uploader_obj = ListenbrainzDataUploader() uploader_obj.upload_listens(src, force=force)
def upload_listens(force, incremental, id): """ Invoke script to upload listens to HDFS. """ from listenbrainz_spark.ftp.download import ListenbrainzDataDownloader from listenbrainz_spark.hdfs.upload import ListenbrainzDataUploader with app.app_context(): downloader_obj = ListenbrainzDataDownloader() dump_type = 'incremental' if incremental else 'full' src, _ = downloader_obj.download_listens(directory=path.FTP_FILES_PATH, listens_dump_id=id, dump_type=dump_type) uploader_obj = ListenbrainzDataUploader() uploader_obj.upload_listens(src, force=force)
def import_full_dump_to_hdfs(dump_id: int = None) -> str: """ Import the full dump with the given dump_id if specified otherwise the latest full dump. Notes: Deletes all the existing listens and uploads listens from new dump. Args: dump_id: id of the full dump to be imported Returns: the name of the imported dump """ with tempfile.TemporaryDirectory() as temp_dir: downloader = ListenbrainzDataDownloader() src, dump_name, dump_id = downloader.download_listens( directory=temp_dir, dump_type=DumpType.FULL, listens_dump_id=dump_id) downloader.connection.close() ListenbrainzDataUploader().upload_new_listens_full_dump(src) utils.insert_dump_data(dump_id, DumpType.FULL, datetime.utcnow()) return dump_name