Exemple #1
0
def process_zip_archive(file, id):
    filename = secure_filename(file.filename)
    time_stamp = str(int(round(time.time())))
    file_save_directory = os.path.join(current_app.config['CFG_DATADIR'], str(id), time_stamp)

    if not os.path.exists(file_save_directory):
        os.makedirs(file_save_directory)

    if '.oldhepdata' not in filename:
        file_path = os.path.join(file_save_directory, filename)
        file.save(file_path)

        submission_path = os.path.join(file_save_directory, remove_file_extension(filename))
        if 'yaml' in filename:
            # we split the singular yaml file and create a submission directory

            split_files(file_path, submission_path)
        else:
            # we are dealing with a zip, tar, etc. so we extract the contents
            extract(filename, file_path, submission_path)

        submission_found = find_file_in_directory(submission_path,
                                                  lambda x: x == "submission.yaml")
    else:
        file_path = os.path.join(file_save_directory, 'oldhepdata')
        if not os.path.exists(file_path):
            os.makedirs(file_path)

        if filename.endswith('.txt'):
            filename = filename.replace(".txt", "")
        print('Saving file to {}'.format(os.path.join(file_path, filename)))
        file.save(os.path.join(file_path, filename))

        submission_path = os.path.join(file_save_directory, 'oldhepdata')
        submission_found = False

    if submission_found:
        basepath, submission_file_path = submission_found
    else:
        result = check_and_convert_from_oldhepdata(submission_path, id,
                                                   time_stamp)

        # Check for errors
        if type(result) == dict:
            return result
        else:
            basepath, submission_file_path = result

    return process_submission_directory(basepath, submission_file_path, id)
Exemple #2
0
def test_file_download_and_split(app, migrator, identifiers):
    """___test_file_download_and_split___"""
    with app.app_context():
        for test_id in identifiers:
            temp_file = migrator.download_file(test_id["hepdata_id"])
            assert temp_file is not None

            split_files(
                temp_file,
                os.path.join(app.config['CFG_TMPDIR'], test_id["hepdata_id"]),
                os.path.join(app.config['CFG_TMPDIR'],
                             test_id["hepdata_id"] + ".zip"))

            assert (os.path.exists(
                os.path.join(app.config['CFG_TMPDIR'], test_id["hepdata_id"])))
Exemple #3
0
def process_zip_archive(file, id):
    filename = secure_filename(file.filename)
    time_stamp = str(int(round(time.time())))
    file_save_directory = os.path.join(current_app.config['CFG_DATADIR'],
                                       str(id), time_stamp)

    if not os.path.exists(file_save_directory):
        os.makedirs(file_save_directory)

    if not filename.endswith('.oldhepdata'):
        file_path = os.path.join(file_save_directory, filename)
        file.save(file_path)

        submission_path = os.path.join(file_save_directory,
                                       remove_file_extension(filename))
        if filename.endswith('.yaml'):
            # we split the singular yaml file and create a submission directory

            error, last_updated = split_files(file_path, submission_path)
            if error:
                return {
                    "Single YAML file splitter": [{
                        "level": "error",
                        "message": str(error)
                    }]
                }

        else:
            # we are dealing with a zip, tar, etc. so we extract the contents
            extract(filename, file_path, submission_path)

        submission_found = find_file_in_directory(
            submission_path, lambda x: x == "submission.yaml")
    else:
        file_path = os.path.join(file_save_directory, 'oldhepdata')
        if not os.path.exists(file_path):
            os.makedirs(file_path)

        print('Saving file to {}'.format(os.path.join(file_path, filename)))
        file.save(os.path.join(file_path, filename))

        submission_path = os.path.join(file_save_directory, 'oldhepdata')
        submission_found = False

    if submission_found:
        basepath, submission_file_path = submission_found
    else:
        result = check_and_convert_from_oldhepdata(submission_path, id,
                                                   time_stamp)

        # Check for errors
        if type(result) == dict:
            return result
        else:
            basepath, submission_file_path = result

    return process_submission_directory(basepath, submission_file_path, id)
Exemple #4
0
    def prepare_files_for_submission(self, inspire_id, force_retrieval=False):
        """
        Either returns a file if it already exists, or downloads it and
        splits it.
        :param inspire_id:
        :return: output location if succesful, None if not
        """
        output_location = os.path.join(current_app.config['CFG_DATADIR'], inspire_id)

        if not os.path.exists(output_location) or force_retrieval:
            print('Downloading file for {0}'.format(inspire_id))
            file_location = self.download_file(inspire_id)

            if file_location:
                output_location = os.path.join(current_app.config['CFG_DATADIR'], inspire_id)
                split_files(file_location, output_location, '{0}.zip'.format(output_location))
            else:
                return None
        else:
            print('File for {0} already in system...no download required.'.format(inspire_id))

        return output_location
Exemple #5
0
    def prepare_files_for_submission(self, inspire_id, force_retrieval=False):
        """
        Either returns a file if it already exists, or downloads it and
        splits it.

        :param inspire_id:
        :return: output location if successful, None if not
        """
        output_location = os.path.join(current_app.config["CFG_DATADIR"],
                                       inspire_id)
        last_updated = datetime.now()

        download = not os.path.exists(output_location) or (
            get_file_in_directory(output_location, 'yaml') is None)

        if download or force_retrieval:
            print("Downloading file for {0}".format(inspire_id))
            file_location = self.download_file(inspire_id)

            if file_location:
                output_location = os.path.join(
                    current_app.config["CFG_DATADIR"], inspire_id)
                error, last_updated = split_files(
                    file_location, output_location,
                    "{0}.zip".format(output_location))

                # remove temporary download file after processing
                try:
                    os.remove(file_location)
                except:
                    log.info('Unable to remove {0}'.format(file_location))
            else:
                output_location = None
        else:
            print("File for {0} already in system...no download required.".
                  format(inspire_id))

        return output_location, last_updated
Exemple #6
0
def process_zip_archive(file, id):
    filename = secure_filename(file.filename)
    time_stamp = str(int(round(time.time())))
    file_save_directory = os.path.join(current_app.config['CFG_DATADIR'],
                                       str(id), time_stamp)

    if not os.path.exists(file_save_directory):
        os.makedirs(file_save_directory)

    if not filename.endswith('.oldhepdata'):
        file_path = os.path.join(file_save_directory, filename)
        print('Saving file to {}'.format(file_path))
        file.save(file_path)

        submission_path = os.path.join(file_save_directory,
                                       remove_file_extension(filename))
        submission_temp_path = tempfile.mkdtemp(
            dir=current_app.config["CFG_TMPDIR"])

        if filename.endswith('.yaml'):
            # we split the singular yaml file and create a submission directory

            error, last_updated = split_files(file_path, submission_temp_path)
            if error:
                return {
                    "Single YAML file splitter": [{
                        "level": "error",
                        "message": str(error)
                    }]
                }

        else:
            # we are dealing with a zip, tar, etc. so we extract the contents
            if not extract(file_path, submission_temp_path):
                return {
                    "Archive file extractor": [{
                        "level":
                        "error",
                        "message":
                        "{} is not a valid zip or tar archive file.".format(
                            file_path)
                    }]
                }

        if not os.path.exists(submission_path):
            os.makedirs(submission_path)

        # Move files from submission_temp_path to submission_path (try to avoid problems with EOS disk).
        if current_app.config.get('PRODUCTION_MODE',
                                  False):  # production instance at CERN
            copy_command = ['xrdcp', '-N', '-f']
            copy_submission_path = submission_path.replace(
                current_app.config['CFG_DATADIR'],
                current_app.config['EOS_DATADIR'])
        else:  # local instance
            copy_command = ['cp']
            copy_submission_path = submission_path
        print('Copying with: {} -r {} {}'.format(' '.join(copy_command),
                                                 submission_temp_path + '/.',
                                                 copy_submission_path))
        subprocess.check_output(
            copy_command +
            ['-r', submission_temp_path + '/.', copy_submission_path])
        rmtree(submission_temp_path, ignore_errors=True
               )  # can uncomment when this is definitely working

        submission_found = find_file_in_directory(
            submission_path, lambda x: x == "submission.yaml")

    else:
        file_path = os.path.join(file_save_directory, 'oldhepdata')
        if not os.path.exists(file_path):
            os.makedirs(file_path)

        print('Saving file to {}'.format(os.path.join(file_path, filename)))
        file.save(os.path.join(file_path, filename))

        submission_found = False

    if submission_found:
        basepath, submission_file_path = submission_found
    else:
        result = check_and_convert_from_oldhepdata(file_path, id, time_stamp)

        # Check for errors
        if type(result) == dict:
            return result
        else:
            basepath, submission_file_path = result

    return process_submission_directory(basepath, submission_file_path, id)
Exemple #7
0
def process_zip_archive(file_path, id, old_submission_schema=False,
                        old_data_schema=False):
    (file_save_directory, filename) = os.path.split(file_path)

    if not filename.endswith('.oldhepdata'):
        file_save_directory = os.path.dirname(file_path)
        submission_path = os.path.join(file_save_directory, remove_file_extension(filename))
        submission_temp_path = tempfile.mkdtemp(dir=current_app.config["CFG_TMPDIR"])

        if filename.endswith('.yaml.gz'):
            print('Extracting: {} to {}'.format(file_path, file_path[:-3]))
            if not extract(file_path, file_path[:-3]):
                return {
                    "Archive file extractor": [{
                        "level": "error", "message": "{} is not a valid .gz file.".format(file_path)
                    }]
                }
            return process_zip_archive(file_path[:-3], id,
                                       old_submission_schema=old_submission_schema,
                                       old_data_schema=False)
        elif filename.endswith('.yaml'):
            # we split the singular yaml file and create a submission directory
            error, last_updated = split_files(file_path, submission_temp_path)
            if error:
                return {
                    "Single YAML file splitter": [{
                        "level": "error",
                        "message": str(error)
                    }]
                }
        else:
            # we are dealing with a zip, tar, etc. so we extract the contents
            try:
                unzipped_path = extract(file_path, submission_temp_path)
            except Exception as e:
                unzipped_path = None

            if not unzipped_path:
                return {
                    "Archive file extractor": [{
                        "level": "error", "message": "{} is not a valid zip or tar archive file.".format(file_path)
                    }]
                }

        copy_errors = move_files(submission_temp_path, submission_path)
        if copy_errors:
            return copy_errors

        submission_found = find_file_in_directory(submission_path, lambda x: x == "submission.yaml")

        if not submission_found:
            return {
                "Archive file extractor": [{
                    "level": "error", "message": "No submission.yaml file has been found in the archive."
                }]
            }

        basepath, submission_file_path = submission_found

    else:
        file_dir = os.path.dirname(file_save_directory)
        time_stamp = os.path.split(file_dir)[1]
        result = check_and_convert_from_oldhepdata(os.path.dirname(file_save_directory), id, time_stamp)

        # Check for errors
        if type(result) == dict:
            return result
        else:
            basepath, submission_file_path = result
            old_data_schema = True

    return process_submission_directory(basepath, submission_file_path, id,
                                        old_data_schema=old_data_schema,
                                        old_submission_schema=old_submission_schema)