Example #1
0
def check_and_convert_from_oldhepdata(input_directory, id, timestamp):
    """ Check if the input directory contains a .oldhepdata file
    and convert it to YAML if it happens. """
    converted_path = os.path.join(current_app.config['CFG_DATADIR'], str(id), timestamp, 'yaml')
    oldhepdata_found = find_file_in_directory(
        input_directory,
        lambda x: x.endswith('.oldhepdata'),
    )
    if not oldhepdata_found:
        return {
            "Converter": [{
                "level": "error",
                "message": "No file with .oldhepdata extension or a submission.yaml"
                           " file has been found in the archive."
            }]
        }
    successful = convert_oldhepdata_to_yaml(oldhepdata_found[1],
                                            converted_path)
    if not successful:
        return {
            "Converter": [{
                "level": "error",
                "message": "The conversion from oldhepdata "
                           "to the YAML format has not succeeded. "
                           "Please submit archives in the new format."
            }]
        }

    return find_file_in_directory(
        converted_path,
        lambda x: x == "submission.yaml"
    )
Example #2
0
def check_and_convert_from_oldhepdata(input_directory, id, timestamp):
    """
    Check if the input directory contains a .oldhepdata file
    and convert it to YAML if it happens.
    """
    converted_path = get_data_path_for_record(str(id), timestamp, 'yaml')

    oldhepdata_found = find_file_in_directory(
        input_directory,
        lambda x: x.endswith('.oldhepdata'),
    )
    if not oldhepdata_found:
        return {
            "Converter": [{
                "level":
                "error",
                "message":
                "No file with .oldhepdata extension has been found."
            }]
        }

    converted_temp_dir = tempfile.mkdtemp(dir=current_app.config["CFG_TMPDIR"])
    converted_temp_path = os.path.join(converted_temp_dir, 'yaml')

    try:
        successful = convert_oldhepdata_to_yaml(oldhepdata_found[1],
                                                converted_temp_path)
        if not successful:
            # Parse error message from title of HTML file, removing part of string after final "//".
            soup = BeautifulSoup(open(converted_temp_path), "lxml")
            errormsg = soup.title.string.rsplit("//", 1)[0]

    except Error as error:  # hepdata_converter_ws_client.Error
        successful = False
        errormsg = str(error)

    if not successful:
        shutil.rmtree(converted_temp_dir, ignore_errors=True
                      )  # can uncomment when this is definitely working

        return {
            "Converter": [{
                "level":
                "error",
                "message":
                "The conversion from oldhepdata "
                "to the YAML format has not succeeded. "
                "Error message from converter follows:<br/><br/>" + errormsg
            }]
        }
    else:
        copy_errors = move_files(converted_temp_path, converted_path)
        if copy_errors:
            return copy_errors

    return find_file_in_directory(converted_path,
                                  lambda x: x == "submission.yaml")
Example #3
0
def check_and_convert_from_oldhepdata(input_directory, id, timestamp):
    """
    Check if the input directory contains a .oldhepdata file
    and convert it to YAML if it happens.
    """
    converted_path = os.path.join(current_app.config['CFG_DATADIR'], str(id), timestamp, 'yaml')

    if not os.path.exists(converted_path):
        os.makedirs(converted_path)

    oldhepdata_found = find_file_in_directory(
        input_directory,
        lambda x: x.endswith('.oldhepdata'),
    )
    if not oldhepdata_found:
        return {
            "Converter": [{
                "level": "error",
                "message": "No file with .oldhepdata extension or a submission.yaml"
                           " file has been found in the archive."
            }]
        }

    converted_temp_dir = tempfile.mkdtemp(dir=current_app.config["CFG_TMPDIR"])
    converted_temp_path = os.path.join(converted_temp_dir, 'yaml')

    successful = convert_oldhepdata_to_yaml(oldhepdata_found[1], converted_temp_path)
    if not successful:
        # Parse error message from title of HTML file, removing part of string after final "//".
        soup = BeautifulSoup(open(converted_temp_path), "lxml")
        errormsg = soup.title.string.rsplit("//", 1)[0]
        rmtree(converted_temp_dir, ignore_errors=True) # can uncomment when this is definitely working

        return {
            "Converter": [{
                "level": "error",
                "message": "The conversion from oldhepdata "
                           "to the YAML format has not succeeded. "
                           "Error message from converter follows.\n" + errormsg
            }]
        }
    else:
        # Move files from converted_temp_path to converted_path (try to avoid problems on EOS disk).
        if current_app.config.get('PRODUCTION_MODE', False): # production instance at CERN
            copy_command = ['xrdcp', '-N', '-f']
            copy_converted_path = converted_path.replace(current_app.config['CFG_DATADIR'], current_app.config['EOS_DATADIR'])
        else: # local instance
            copy_command = ['cp']
            copy_converted_path = converted_path
        print('Copying with: {} -r {} {}'.format(' '.join(copy_command), converted_temp_path + '/.', copy_converted_path))
        subprocess.check_output(copy_command + ['-r', converted_temp_path + '/.', copy_converted_path])
        rmtree(converted_temp_dir, ignore_errors=True) # can uncomment when this is definitely working

    return find_file_in_directory(converted_path, lambda x: x == "submission.yaml")
Example #4
0
def convert_zip_archive(input_archive, output_archive, options):
    """ Convert a zip archive into a targz path with given options. """
    input_root_dir = tempfile.mkdtemp()
    with zipfile.ZipFile(input_archive, 'r') as zip_archive:
        zip_archive.extractall(path=input_root_dir)

    # Find the appropriate file/directory in the input archive
    input = options.get('input_format', 'yaml')
    validation = find_file_in_directory(
        input_root_dir,
        lambda x: x == 'submission.yaml' if input == 'yaml' else x.endswith('.oldhepdata')
    )
    if not validation:
        return None

    input_directory, input_file = validation

    successful = convert(
        CFG_CONVERTER_URL,
        input_directory if input == 'yaml' else input_file,
        output=output_archive,
        options=options,
        extract=False,
    )
    rmtree(input_root_dir)

    # Error occurred, the output is a HTML file
    if not successful:
        output_file = output_archive[:-7] + '.html'
    else:
        output_file = output_archive
    move(output_archive, output_file)

    return output_file
Example #5
0
def convert_zip_archive(input_archive, output_archive, options):
    """ Convert a zip archive into a targz path with given options. """
    input_root_dir = tempfile.mkdtemp()
    with zipfile.ZipFile(input_archive, 'r') as zip_archive:
        zip_archive.extractall(path=input_root_dir)

    # Find the appropriate file/directory in the input archive
    input = options.get('input_format', 'yaml')
    validation = find_file_in_directory(
        input_root_dir,
        lambda x: x == 'submission.yaml' if input == 'yaml' else x.endswith('.oldhepdata')
    )
    if not validation:
        return None

    input_directory, input_file = validation

    successful = convert(
        CFG_CONVERTER_URL,
        input_directory if input == 'yaml' else input_file,
        output=output_archive,
        options=options,
        extract=False,
    )
    rmtree(input_root_dir)

    # Error occurred, the output is a HTML file
    if not successful:
        output_file = output_archive[:-7] + '.html'
    else:
        output_file = output_archive
    move(output_archive, output_file)

    return output_file
Example #6
0
def process_zip_archive(file, id):
    filename = secure_filename(file.filename)
    time_stamp = str(int(round(time.time())))
    file_save_directory = os.path.join(current_app.config['CFG_DATADIR'],
                                       str(id), time_stamp)

    if not os.path.exists(file_save_directory):
        os.makedirs(file_save_directory)

    if not filename.endswith('.oldhepdata'):
        file_path = os.path.join(file_save_directory, filename)
        file.save(file_path)

        submission_path = os.path.join(file_save_directory,
                                       remove_file_extension(filename))
        if filename.endswith('.yaml'):
            # we split the singular yaml file and create a submission directory

            error, last_updated = split_files(file_path, submission_path)
            if error:
                return {
                    "Single YAML file splitter": [{
                        "level": "error",
                        "message": str(error)
                    }]
                }

        else:
            # we are dealing with a zip, tar, etc. so we extract the contents
            extract(filename, file_path, submission_path)

        submission_found = find_file_in_directory(
            submission_path, lambda x: x == "submission.yaml")
    else:
        file_path = os.path.join(file_save_directory, 'oldhepdata')
        if not os.path.exists(file_path):
            os.makedirs(file_path)

        print('Saving file to {}'.format(os.path.join(file_path, filename)))
        file.save(os.path.join(file_path, filename))

        submission_path = os.path.join(file_save_directory, 'oldhepdata')
        submission_found = False

    if submission_found:
        basepath, submission_file_path = submission_found
    else:
        result = check_and_convert_from_oldhepdata(submission_path, id,
                                                   time_stamp)

        # Check for errors
        if type(result) == dict:
            return result
        else:
            basepath, submission_file_path = result

    return process_submission_directory(basepath, submission_file_path, id)
Example #7
0
def prepare_data_folder(input_archive, input_format):
    input_root_dir = tempfile.mkdtemp(dir=current_app.config['CFG_TMPDIR'])
    try:
        with zipfile.ZipFile(input_archive, 'r') as zip_archive:
            zip_archive.extractall(path=input_root_dir)

        # Find the appropriate file/directory in the input archive
        yield find_file_in_directory(
            input_root_dir, lambda x: x == 'submission.yaml'
            if input_format == 'yaml' else x.endswith('.oldhepdata'))
    finally:
        rmtree(input_root_dir)
Example #8
0
def check_and_convert_from_oldhepdata(input_directory, id, timestamp):
    """ Check if the input directory contains a .oldhepdata file
    and convert it to YAML if it happens. """
    converted_path = os.path.join(current_app.config['CFG_DATADIR'], str(id),
                                  timestamp, 'yaml')
    oldhepdata_found = find_file_in_directory(
        input_directory,
        lambda x: x.endswith('.oldhepdata'),
    )
    if not oldhepdata_found:
        return {
            "Converter": [{
                "level":
                "error",
                "message":
                "No file with .oldhepdata extension or a submission.yaml"
                " file has been found in the archive."
            }]
        }
    successful = convert_oldhepdata_to_yaml(oldhepdata_found[1],
                                            converted_path)
    if not successful:
        # Parse error message from title of HTML file, removing part of string after final "//".
        soup = BeautifulSoup(open(converted_path), "lxml")
        errormsg = soup.title.string.rsplit("//", 1)[0]

        return {
            "Converter": [{
                "level":
                "error",
                "message":
                "The conversion from oldhepdata "
                "to the YAML format has not succeeded. "
                "Error message from converter follows.\n" + errormsg
            }]
        }

    return find_file_in_directory(converted_path,
                                  lambda x: x == "submission.yaml")
Example #9
0
def process_zip_archive(file, id):
    filename = secure_filename(file.filename)
    time_stamp = str(int(round(time.time())))
    file_save_directory = os.path.join(current_app.config['CFG_DATADIR'], str(id), time_stamp)

    if not os.path.exists(file_save_directory):
        os.makedirs(file_save_directory)

    if '.oldhepdata' not in filename:
        file_path = os.path.join(file_save_directory, filename)
        file.save(file_path)

        submission_path = os.path.join(file_save_directory, remove_file_extension(filename))
        if 'yaml' in filename:
            # we split the singular yaml file and create a submission directory

            split_files(file_path, submission_path)
        else:
            # we are dealing with a zip, tar, etc. so we extract the contents
            extract(filename, file_path, submission_path)

        submission_found = find_file_in_directory(submission_path,
                                                  lambda x: x == "submission.yaml")
    else:
        file_path = os.path.join(file_save_directory, 'oldhepdata')
        if not os.path.exists(file_path):
            os.makedirs(file_path)

        if filename.endswith('.txt'):
            filename = filename.replace(".txt", "")
        print('Saving file to {}'.format(os.path.join(file_path, filename)))
        file.save(os.path.join(file_path, filename))

        submission_path = os.path.join(file_save_directory, 'oldhepdata')
        submission_found = False

    if submission_found:
        basepath, submission_file_path = submission_found
    else:
        result = check_and_convert_from_oldhepdata(submission_path, id,
                                                   time_stamp)

        # Check for errors
        if type(result) == dict:
            return result
        else:
            basepath, submission_file_path = result

    return process_submission_directory(basepath, submission_file_path, id)
Example #10
0
def create_original_with_resources(submission, data_filepath, output_path):
    """Copy or create 'original' zip file, i.e. yaml files with resources. If
    resources were imported from hepdata.cedar.ac.uk we create a new zip
    in a format that could be re-uploaded as a submission.

    :param type submission: HEPSubmission object
    :param type data_filepath: Path to original file
    :param type output_path: Path to output file (in converted dir)
    :return: None
    """
    resource_location = os.path.join(
        get_data_path_for_record(str(submission.publication_recid)),
        'resources'
    )
    if os.path.isdir(resource_location):
        # There is a resources directory from when this record was imported
        # from the old hepdata site. We need to create a new zip with the
        # contents of data_filepath and resources
        with tempfile.TemporaryDirectory(dir=current_app.config['CFG_TMPDIR']) as tmpdir:
            # Copy resources directory into 'contents' dir in temp directory
            contents_path = os.path.join(tmpdir, 'contents')
            shutil.copytree(resource_location, contents_path)

            # Unzip data_filepath into contents path
            shutil.unpack_archive(data_filepath, contents_path)

            # Need to go through the submission file and update the paths so
            # that all resources are at the top level. This should allow the
            # zip to be re-uploaded or imported
            submission_found = find_file_in_directory(
                contents_path,
                lambda x: x == "submission.yaml"
            )
            if submission_found:
                with fileinput.FileInput(submission_found[1], inplace=True) as file:
                    p = re.compile(r'(\s+location: )\/resource\/.*\/([^\/]+)')
                    for line in file:
                        print(p.sub(r'\g<1>\g<2>', line), end='')

            # Zip up contents dir into a new file
            base, ext = os.path.splitext(output_path)
            zip_type = 'zip' if ext == '.zip' else 'gztar'
            print("Creating archive at %s" % output_path)
            shutil.make_archive(base, zip_type, contents_path)

    else:
        shutil.copy2(data_filepath, output_path)
Example #11
0
def process_zip_archive(file, id):
    filename = secure_filename(file.filename)
    time_stamp = str(int(round(time.time())))
    file_save_directory = os.path.join(current_app.config['CFG_DATADIR'],
                                       str(id), time_stamp)

    if not os.path.exists(file_save_directory):
        os.makedirs(file_save_directory)

    if not filename.endswith('.oldhepdata'):
        file_path = os.path.join(file_save_directory, filename)
        print('Saving file to {}'.format(file_path))
        file.save(file_path)

        submission_path = os.path.join(file_save_directory,
                                       remove_file_extension(filename))
        submission_temp_path = tempfile.mkdtemp(
            dir=current_app.config["CFG_TMPDIR"])

        if filename.endswith('.yaml'):
            # we split the singular yaml file and create a submission directory

            error, last_updated = split_files(file_path, submission_temp_path)
            if error:
                return {
                    "Single YAML file splitter": [{
                        "level": "error",
                        "message": str(error)
                    }]
                }

        else:
            # we are dealing with a zip, tar, etc. so we extract the contents
            if not extract(file_path, submission_temp_path):
                return {
                    "Archive file extractor": [{
                        "level":
                        "error",
                        "message":
                        "{} is not a valid zip or tar archive file.".format(
                            file_path)
                    }]
                }

        if not os.path.exists(submission_path):
            os.makedirs(submission_path)

        # Move files from submission_temp_path to submission_path (try to avoid problems with EOS disk).
        if current_app.config.get('PRODUCTION_MODE',
                                  False):  # production instance at CERN
            copy_command = ['xrdcp', '-N', '-f']
            copy_submission_path = submission_path.replace(
                current_app.config['CFG_DATADIR'],
                current_app.config['EOS_DATADIR'])
        else:  # local instance
            copy_command = ['cp']
            copy_submission_path = submission_path
        print('Copying with: {} -r {} {}'.format(' '.join(copy_command),
                                                 submission_temp_path + '/.',
                                                 copy_submission_path))
        subprocess.check_output(
            copy_command +
            ['-r', submission_temp_path + '/.', copy_submission_path])
        rmtree(submission_temp_path, ignore_errors=True
               )  # can uncomment when this is definitely working

        submission_found = find_file_in_directory(
            submission_path, lambda x: x == "submission.yaml")

    else:
        file_path = os.path.join(file_save_directory, 'oldhepdata')
        if not os.path.exists(file_path):
            os.makedirs(file_path)

        print('Saving file to {}'.format(os.path.join(file_path, filename)))
        file.save(os.path.join(file_path, filename))

        submission_found = False

    if submission_found:
        basepath, submission_file_path = submission_found
    else:
        result = check_and_convert_from_oldhepdata(file_path, id, time_stamp)

        # Check for errors
        if type(result) == dict:
            return result
        else:
            basepath, submission_file_path = result

    return process_submission_directory(basepath, submission_file_path, id)
Example #12
0
def get_file_in_directory(path, extension):
    directory, file = find_file_in_directory(path, lambda x: x.endswith(extension))
    return file
Example #13
0
def process_zip_archive(file_path, id, old_submission_schema=False,
                        old_data_schema=False):
    (file_save_directory, filename) = os.path.split(file_path)

    if not filename.endswith('.oldhepdata'):
        file_save_directory = os.path.dirname(file_path)
        submission_path = os.path.join(file_save_directory, remove_file_extension(filename))
        submission_temp_path = tempfile.mkdtemp(dir=current_app.config["CFG_TMPDIR"])

        if filename.endswith('.yaml.gz'):
            print('Extracting: {} to {}'.format(file_path, file_path[:-3]))
            if not extract(file_path, file_path[:-3]):
                return {
                    "Archive file extractor": [{
                        "level": "error", "message": "{} is not a valid .gz file.".format(file_path)
                    }]
                }
            return process_zip_archive(file_path[:-3], id,
                                       old_submission_schema=old_submission_schema,
                                       old_data_schema=False)
        elif filename.endswith('.yaml'):
            # we split the singular yaml file and create a submission directory
            error, last_updated = split_files(file_path, submission_temp_path)
            if error:
                return {
                    "Single YAML file splitter": [{
                        "level": "error",
                        "message": str(error)
                    }]
                }
        else:
            # we are dealing with a zip, tar, etc. so we extract the contents
            try:
                unzipped_path = extract(file_path, submission_temp_path)
            except Exception as e:
                unzipped_path = None

            if not unzipped_path:
                return {
                    "Archive file extractor": [{
                        "level": "error", "message": "{} is not a valid zip or tar archive file.".format(file_path)
                    }]
                }

        copy_errors = move_files(submission_temp_path, submission_path)
        if copy_errors:
            return copy_errors

        submission_found = find_file_in_directory(submission_path, lambda x: x == "submission.yaml")

        if not submission_found:
            return {
                "Archive file extractor": [{
                    "level": "error", "message": "No submission.yaml file has been found in the archive."
                }]
            }

        basepath, submission_file_path = submission_found

    else:
        file_dir = os.path.dirname(file_save_directory)
        time_stamp = os.path.split(file_dir)[1]
        result = check_and_convert_from_oldhepdata(os.path.dirname(file_save_directory), id, time_stamp)

        # Check for errors
        if type(result) == dict:
            return result
        else:
            basepath, submission_file_path = result
            old_data_schema = True

    return process_submission_directory(basepath, submission_file_path, id,
                                        old_data_schema=old_data_schema,
                                        old_submission_schema=old_submission_schema)
Example #14
0
def get_file_in_directory(path, extension):
    directory, file = find_file_in_directory(path, lambda x: x.endswith(extension))

    return file
Example #15
0
def get_file_in_directory(path, extension):
    file_info = find_file_in_directory(path, lambda x: x.endswith(extension))
    return file_info[1] if file_info else None