def start_workflow(shared_state, start_date, review_number=0): db_connection = setup_db().connect() logger = Logger(db_connection) shared_state.job_id = None shared_state.completed = False max_downloads = environ.get('MAX_DOWNLOADS') if max_downloads is not None: max_downloads = int(max_downloads) max_upload_workers = int(environ.get('MAX_UPLOADERS', 20)) try: workflow = Workflow( db_connection, logger, start_date, max_downloads, max_upload_workers, environ.get('ALLOW_REPEAT', 'FALSE') == 'TRUE' ) workflow.start(shared_state) except Exception: logger.exception() if shared_state.job_id is not None: job_serializer = Serializer(db_connection, job) job_serializer.put(shared_state.job_id, { 'status': JobStatus.FAILED, })
def main(): db_connection = setup_db().connect() logger = Logger(db_connection) logger.info('Initializing Downloader') try: run_downloader(db_connection, logger) except Exception: logger.exception() logger.info('Finishing Downloader')
def upload_worker(queue, job_id, worker_id): # Uploader to the S3 bucket. db_connection = setup_db().connect() logger = Logger(db_connection, job_id) bucket_name = environ.get('UPLOAD_BUCKET') try: logger.info(f'Creating S3 uploader #{worker_id}') uploader = S3Uploader(bucket=bucket_name) granule_serializer = Serializer(db_connection, granule) while True: message = queue.get() if message == 'DONE': # Put in back for other workers. queue.put('DONE') break product_id, filename = message try: checksum = hashlib.md5(open(filename, 'rb').read()).hexdigest().upper() # Download status = SUCCESS granule_serializer.put( product_id, { 'download_status': DownloadStatus.SUCCESS, 'downloaded_at': datetime.now(), 'validated': False, 'checksum': checksum, 's3_location': f'{bucket_name}/filename' }) logger.info(f'Uploading {product_id} at #{worker_id}', f'Filename: {filename}') uploader.upload_file(filename) remove(filename) logger.info(f'Uploaded {product_id} at #{worker_id}', f'Filename: {filename}') except Exception: logger.exception() except Exception: logger.exception()