Ejemplo n.º 1
0
def work_for_pool(file, s3, scope):
    try:
            db_manager = DBManager(script_path)
            if scope == 'insert':
                S3Uploader.upload_file(file, s3)
               
                db_manager.insert_files_uploaded((file['file'],
                                                  file['path'],
                                                  int(file['size']),
                                                  file['modified'],
                                                  file['checksum']))
                logger.info('Uploaded {filename} of size {size}'.format(filename=file['path'], size=file['size']))

            elif scope == 'update':
                S3Uploader.upload_file(file, s3)
                db_manager.update_file_modified((file['modified'], file['checksum']))
                logger.info('Modified time for {filename}.'.format(filename=file['path']))

            elif scope == 'path':
                S3Uploader.change_metadata(file, s3)
                db_manager.update_file_path((file['path'], file['checksum']))
                logger.info('Modified path for {filename}.'.format(filename=file['path']))

    except Exception as e:
        logger.error('For file: {file}'.format(file=file))
        logger.error(e)
Ejemplo n.º 2
0
def main():
    try:
        """
        I:
        Will create a table if it not exists.
        1. Table for holding snapshots of current files
        2: Table to verify which file has been uploaded to S3

        II:
        1: Then will scan the paths from the config file.
        2: Will query the files found on the S3

        III:
        1: For each file calculate size and modified date to be appended to the S3 key metadata.
        2: Calculate md5 from the name/size
        3: Checks
            a: Check if already uploaded or modified
                aa: Has the path changed. If yes, we need to issue a copy operation and update on dynamodb
            b. If not uploaded, upload to S3 and create dynamodb record



        Upload by passing list of files to the pool.
        Currently modified files are reuploaded and overwrite the current keys (S3 versioning disabed)

        """
        db_manager = DBManager(script_path)
        db_manager.create_table()

        all_files = {}

        for path in config.FOLDER_PATHS.split(';'):

            if os.path.exists(path) and path:
                logger.info('Checking files in  {path}'.format(path=path))
                result, _ = walking(path)
                all_files[path] = result

            else:
                logger.error('Path: {path} does not exist'.format(path=path))

        #III
        #Create 2 lists of files
        new_files_upload = []
        modified_files_upload = []
        path_changed = []

        for path, files in all_files.items():
            logger.info('Found {files} local files in {path}'.format(files=len(files), path=path))

            for i, file in files.items():
                check_file_db = db_manager.is_uploaded((file['checksum'],)) #get uploaded files in the local db

                if not check_file_db: # if files does not exist on the table, it is new
                    new_files_upload.append(file)

                elif  not file['modified'] == check_file_db[0][3]: # if it exists but the modified date does not match
                    modified_files_upload.append(file)

                    logger.info('{file}: old mtime: {old}, new mtime:{new}'.format(file=file['path'],
                                                                                    old=check_file_db[0][3],
                                                                                    new=file['modified']))
                elif not os.path.normpath(file['path']) == os.path.normpath(check_file_db[0][1]):
                    path_changed.append(file)
                    logger.info('{file}: old path: {old}, new path:{new}'.format(file=file['path'],
                                                                                    old=check_file_db[0][1],
                                                                                    new=file['path']))

        if not new_files_upload and not modified_files_upload and not path_changed: #it should  hit this line if nothing to do
            logger.info('No new files. No new modified files. Exiting.')
            exit()
        else:
            logger.info('{keys}: new files to be uploaded.'.format(keys=len(new_files_upload)))
            logger.info('{keys}: modified files to be uploaded.'.format(keys=len(modified_files_upload)))
            logger.info('{keys}: need to have the metadata path changed.'.format(keys=len(path_changed)))

            if new_files_upload:
                s3_upload(new_files_upload,scope='insert')

            if modified_files_upload:
                s3_upload(modified_files_upload, scope='update')

            if path_changed:
                s3_upload(path_changed, scope='path')

    except Exception as e:
        logger.error("error: {}".format(e))