def work_for_pool(file, s3, scope): try: db_manager = DBManager(script_path) if scope == 'insert': S3Uploader.upload_file(file, s3) db_manager.insert_files_uploaded((file['file'], file['path'], int(file['size']), file['modified'], file['checksum'])) logger.info('Uploaded {filename} of size {size}'.format(filename=file['path'], size=file['size'])) elif scope == 'update': S3Uploader.upload_file(file, s3) db_manager.update_file_modified((file['modified'], file['checksum'])) logger.info('Modified time for {filename}.'.format(filename=file['path'])) elif scope == 'path': S3Uploader.change_metadata(file, s3) db_manager.update_file_path((file['path'], file['checksum'])) logger.info('Modified path for {filename}.'.format(filename=file['path'])) except Exception as e: logger.error('For file: {file}'.format(file=file)) logger.error(e)
def main(): try: """ I: Will create a table if it not exists. 1. Table for holding snapshots of current files 2: Table to verify which file has been uploaded to S3 II: 1: Then will scan the paths from the config file. 2: Will query the files found on the S3 III: 1: For each file calculate size and modified date to be appended to the S3 key metadata. 2: Calculate md5 from the name/size 3: Checks a: Check if already uploaded or modified aa: Has the path changed. If yes, we need to issue a copy operation and update on dynamodb b. If not uploaded, upload to S3 and create dynamodb record Upload by passing list of files to the pool. Currently modified files are reuploaded and overwrite the current keys (S3 versioning disabed) """ db_manager = DBManager(script_path) db_manager.create_table() all_files = {} for path in config.FOLDER_PATHS.split(';'): if os.path.exists(path) and path: logger.info('Checking files in {path}'.format(path=path)) result, _ = walking(path) all_files[path] = result else: logger.error('Path: {path} does not exist'.format(path=path)) #III #Create 2 lists of files new_files_upload = [] modified_files_upload = [] path_changed = [] for path, files in all_files.items(): logger.info('Found {files} local files in {path}'.format(files=len(files), path=path)) for i, file in files.items(): check_file_db = db_manager.is_uploaded((file['checksum'],)) #get uploaded files in the local db if not check_file_db: # if files does not exist on the table, it is new new_files_upload.append(file) elif not file['modified'] == check_file_db[0][3]: # if it exists but the modified date does not match modified_files_upload.append(file) logger.info('{file}: old mtime: {old}, new mtime:{new}'.format(file=file['path'], old=check_file_db[0][3], new=file['modified'])) elif not os.path.normpath(file['path']) == os.path.normpath(check_file_db[0][1]): path_changed.append(file) logger.info('{file}: old path: {old}, new path:{new}'.format(file=file['path'], old=check_file_db[0][1], new=file['path'])) if not new_files_upload and not modified_files_upload and not path_changed: #it should hit this line if nothing to do logger.info('No new files. No new modified files. Exiting.') exit() else: logger.info('{keys}: new files to be uploaded.'.format(keys=len(new_files_upload))) logger.info('{keys}: modified files to be uploaded.'.format(keys=len(modified_files_upload))) logger.info('{keys}: need to have the metadata path changed.'.format(keys=len(path_changed))) if new_files_upload: s3_upload(new_files_upload,scope='insert') if modified_files_upload: s3_upload(modified_files_upload, scope='update') if path_changed: s3_upload(path_changed, scope='path') except Exception as e: logger.error("error: {}".format(e))