예제 #1
0
def process_zip_archive(file, id):
    filename = secure_filename(file.filename)
    time_stamp = str(int(round(time.time())))
    file_save_directory = os.path.join(current_app.config['CFG_DATADIR'],
                                       str(id), time_stamp)

    if not os.path.exists(file_save_directory):
        os.makedirs(file_save_directory)

    if not filename.endswith('.oldhepdata'):
        file_path = os.path.join(file_save_directory, filename)
        file.save(file_path)

        submission_path = os.path.join(file_save_directory,
                                       remove_file_extension(filename))
        if filename.endswith('.yaml'):
            # we split the singular yaml file and create a submission directory

            error, last_updated = split_files(file_path, submission_path)
            if error:
                return {
                    "Single YAML file splitter": [{
                        "level": "error",
                        "message": str(error)
                    }]
                }

        else:
            # we are dealing with a zip, tar, etc. so we extract the contents
            extract(filename, file_path, submission_path)

        submission_found = find_file_in_directory(
            submission_path, lambda x: x == "submission.yaml")
    else:
        file_path = os.path.join(file_save_directory, 'oldhepdata')
        if not os.path.exists(file_path):
            os.makedirs(file_path)

        print('Saving file to {}'.format(os.path.join(file_path, filename)))
        file.save(os.path.join(file_path, filename))

        submission_path = os.path.join(file_save_directory, 'oldhepdata')
        submission_found = False

    if submission_found:
        basepath, submission_file_path = submission_found
    else:
        result = check_and_convert_from_oldhepdata(submission_path, id,
                                                   time_stamp)

        # Check for errors
        if type(result) == dict:
            return result
        else:
            basepath, submission_file_path = result

    return process_submission_directory(basepath, submission_file_path, id)
예제 #2
0
파일: api.py 프로젝트: HEPData/hepdata3
def process_zip_archive(file, id):
    filename = secure_filename(file.filename)
    time_stamp = str(int(round(time.time())))
    file_save_directory = os.path.join(current_app.config['CFG_DATADIR'], str(id), time_stamp)

    if not os.path.exists(file_save_directory):
        os.makedirs(file_save_directory)

    if '.oldhepdata' not in filename:
        file_path = os.path.join(file_save_directory, filename)
        file.save(file_path)

        submission_path = os.path.join(file_save_directory, remove_file_extension(filename))
        if 'yaml' in filename:
            # we split the singular yaml file and create a submission directory

            split_files(file_path, submission_path)
        else:
            # we are dealing with a zip, tar, etc. so we extract the contents
            extract(filename, file_path, submission_path)

        submission_found = find_file_in_directory(submission_path,
                                                  lambda x: x == "submission.yaml")
    else:
        file_path = os.path.join(file_save_directory, 'oldhepdata')
        if not os.path.exists(file_path):
            os.makedirs(file_path)

        if filename.endswith('.txt'):
            filename = filename.replace(".txt", "")
        print('Saving file to {}'.format(os.path.join(file_path, filename)))
        file.save(os.path.join(file_path, filename))

        submission_path = os.path.join(file_save_directory, 'oldhepdata')
        submission_found = False

    if submission_found:
        basepath, submission_file_path = submission_found
    else:
        result = check_and_convert_from_oldhepdata(submission_path, id,
                                                   time_stamp)

        # Check for errors
        if type(result) == dict:
            return result
        else:
            basepath, submission_file_path = result

    return process_submission_directory(basepath, submission_file_path, id)
예제 #3
0
def process_zip_archive(file, id):
    filename = secure_filename(file.filename)
    time_stamp = str(int(round(time.time())))
    file_save_directory = os.path.join(current_app.config['CFG_DATADIR'],
                                       str(id), time_stamp)

    if not os.path.exists(file_save_directory):
        os.makedirs(file_save_directory)

    if not filename.endswith('.oldhepdata'):
        file_path = os.path.join(file_save_directory, filename)
        print('Saving file to {}'.format(file_path))
        file.save(file_path)

        submission_path = os.path.join(file_save_directory,
                                       remove_file_extension(filename))
        submission_temp_path = tempfile.mkdtemp(
            dir=current_app.config["CFG_TMPDIR"])

        if filename.endswith('.yaml'):
            # we split the singular yaml file and create a submission directory

            error, last_updated = split_files(file_path, submission_temp_path)
            if error:
                return {
                    "Single YAML file splitter": [{
                        "level": "error",
                        "message": str(error)
                    }]
                }

        else:
            # we are dealing with a zip, tar, etc. so we extract the contents
            if not extract(file_path, submission_temp_path):
                return {
                    "Archive file extractor": [{
                        "level":
                        "error",
                        "message":
                        "{} is not a valid zip or tar archive file.".format(
                            file_path)
                    }]
                }

        if not os.path.exists(submission_path):
            os.makedirs(submission_path)

        # Move files from submission_temp_path to submission_path (try to avoid problems with EOS disk).
        if current_app.config.get('PRODUCTION_MODE',
                                  False):  # production instance at CERN
            copy_command = ['xrdcp', '-N', '-f']
            copy_submission_path = submission_path.replace(
                current_app.config['CFG_DATADIR'],
                current_app.config['EOS_DATADIR'])
        else:  # local instance
            copy_command = ['cp']
            copy_submission_path = submission_path
        print('Copying with: {} -r {} {}'.format(' '.join(copy_command),
                                                 submission_temp_path + '/.',
                                                 copy_submission_path))
        subprocess.check_output(
            copy_command +
            ['-r', submission_temp_path + '/.', copy_submission_path])
        rmtree(submission_temp_path, ignore_errors=True
               )  # can uncomment when this is definitely working

        submission_found = find_file_in_directory(
            submission_path, lambda x: x == "submission.yaml")

    else:
        file_path = os.path.join(file_save_directory, 'oldhepdata')
        if not os.path.exists(file_path):
            os.makedirs(file_path)

        print('Saving file to {}'.format(os.path.join(file_path, filename)))
        file.save(os.path.join(file_path, filename))

        submission_found = False

    if submission_found:
        basepath, submission_file_path = submission_found
    else:
        result = check_and_convert_from_oldhepdata(file_path, id, time_stamp)

        # Check for errors
        if type(result) == dict:
            return result
        else:
            basepath, submission_file_path = result

    return process_submission_directory(basepath, submission_file_path, id)
예제 #4
0
def process_zip_archive(file_path, id, old_submission_schema=False,
                        old_data_schema=False):
    (file_save_directory, filename) = os.path.split(file_path)

    if not filename.endswith('.oldhepdata'):
        file_save_directory = os.path.dirname(file_path)
        submission_path = os.path.join(file_save_directory, remove_file_extension(filename))
        submission_temp_path = tempfile.mkdtemp(dir=current_app.config["CFG_TMPDIR"])

        if filename.endswith('.yaml.gz'):
            print('Extracting: {} to {}'.format(file_path, file_path[:-3]))
            if not extract(file_path, file_path[:-3]):
                return {
                    "Archive file extractor": [{
                        "level": "error", "message": "{} is not a valid .gz file.".format(file_path)
                    }]
                }
            return process_zip_archive(file_path[:-3], id,
                                       old_submission_schema=old_submission_schema,
                                       old_data_schema=False)
        elif filename.endswith('.yaml'):
            # we split the singular yaml file and create a submission directory
            error, last_updated = split_files(file_path, submission_temp_path)
            if error:
                return {
                    "Single YAML file splitter": [{
                        "level": "error",
                        "message": str(error)
                    }]
                }
        else:
            # we are dealing with a zip, tar, etc. so we extract the contents
            try:
                unzipped_path = extract(file_path, submission_temp_path)
            except Exception as e:
                unzipped_path = None

            if not unzipped_path:
                return {
                    "Archive file extractor": [{
                        "level": "error", "message": "{} is not a valid zip or tar archive file.".format(file_path)
                    }]
                }

        copy_errors = move_files(submission_temp_path, submission_path)
        if copy_errors:
            return copy_errors

        submission_found = find_file_in_directory(submission_path, lambda x: x == "submission.yaml")

        if not submission_found:
            return {
                "Archive file extractor": [{
                    "level": "error", "message": "No submission.yaml file has been found in the archive."
                }]
            }

        basepath, submission_file_path = submission_found

    else:
        file_dir = os.path.dirname(file_save_directory)
        time_stamp = os.path.split(file_dir)[1]
        result = check_and_convert_from_oldhepdata(os.path.dirname(file_save_directory), id, time_stamp)

        # Check for errors
        if type(result) == dict:
            return result
        else:
            basepath, submission_file_path = result
            old_data_schema = True

    return process_submission_directory(basepath, submission_file_path, id,
                                        old_data_schema=old_data_schema,
                                        old_submission_schema=old_submission_schema)
예제 #5
0
파일: api.py 프로젝트: islahudinees/hepdata
def _import_record(inspire_id,
                   update_existing=False,
                   base_url='https://hepdata.net',
                   send_email=False):
    publication_information, status = get_inspire_record_information(
        inspire_id)
    if status != "success":
        log.error("Failed to retrieve publication information for " +
                  inspire_id)
        return False

    current_submission = get_latest_hepsubmission(inspire_id=inspire_id)

    if not current_submission:
        log.info(
            "The record with id {0} does not exist in the database, so we're loading it."
            .format(inspire_id))
        publication_information["inspire_id"] = inspire_id
        record_information = create_record(publication_information)
        recid = record_information['recid']
    else:
        log.info("The record with inspire id {0} already exists.".format(
            inspire_id))
        if update_existing:
            log.info("Updating instead")
            recid = current_submission.publication_recid
        else:
            log.info("Not updating as update_existing is False")
            return False

    try:
        download_path = _download_file(base_url, inspire_id)

        filename = os.path.basename(download_path)

        time_stamp = str(int(round(time.time())))
        file_save_directory = get_data_path_for_record(str(recid), time_stamp)
        if not os.path.exists(file_save_directory):
            os.makedirs(file_save_directory)

        file_path = os.path.join(file_save_directory, filename)
        log.info("Moving file to %s" % file_path)
        shutil.copy(download_path, file_path)

        # Create submission
        admin_user_id = 1
        hepsubmission = get_or_create_hepsubmission(recid, admin_user_id)
        db.session.add(hepsubmission)
        db.session.commit()

        # Then process the payload as for any other record
        errors = process_zip_archive(file_path, recid)
        if errors:
            log.info("Errors processing archive. Re-trying with old schema.")
            # Try again with old schema
            # Need to clean up first to avoid errors
            # First delete tables
            cleanup_submission(recid, 1, [])
            # Next remove remaining files
            file_save_directory = os.path.dirname(file_path)
            submission_path = os.path.join(file_save_directory,
                                           remove_file_extension(filename))
            shutil.rmtree(submission_path)

            errors = process_zip_archive(file_path,
                                         recid,
                                         old_submission_schema=True,
                                         old_data_schema=True)

            if errors:
                log.error("Could not process zip archive: ")
                for file, file_errors in errors.items():
                    log.error("    %s:" % file)
                    for error in file_errors:
                        log.error("        %s" % error['message'])

                raise ValueError("Could not validate record.")

        # Delete any previous upload folders
        cleanup_old_files(hepsubmission)

        log.info("Finalising record %s" % recid)

        result_json = do_finalise(recid,
                                  force_finalise=True,
                                  update=(current_submission is not None),
                                  convert=False,
                                  send_email=send_email)
        result = json.loads(result_json)

        if result and result['success']:
            log.info("Imported record %s with %s submissions" %
                     (recid, result['data_count']))
            return True
        else:
            raise ValueError("Failed to finalise record.")
    except Exception as e:
        # Unload record
        unload_submission(recid)
        log.error(e)
        return False