def check_and_convert_from_oldhepdata(input_directory, id, timestamp): """ Check if the input directory contains a .oldhepdata file and convert it to YAML if it happens. """ converted_path = os.path.join(current_app.config['CFG_DATADIR'], str(id), timestamp, 'yaml') oldhepdata_found = find_file_in_directory( input_directory, lambda x: x.endswith('.oldhepdata'), ) if not oldhepdata_found: return { "Converter": [{ "level": "error", "message": "No file with .oldhepdata extension or a submission.yaml" " file has been found in the archive." }] } successful = convert_oldhepdata_to_yaml(oldhepdata_found[1], converted_path) if not successful: return { "Converter": [{ "level": "error", "message": "The conversion from oldhepdata " "to the YAML format has not succeeded. " "Please submit archives in the new format." }] } return find_file_in_directory( converted_path, lambda x: x == "submission.yaml" )
def check_and_convert_from_oldhepdata(input_directory, id, timestamp): """ Check if the input directory contains a .oldhepdata file and convert it to YAML if it happens. """ converted_path = get_data_path_for_record(str(id), timestamp, 'yaml') oldhepdata_found = find_file_in_directory( input_directory, lambda x: x.endswith('.oldhepdata'), ) if not oldhepdata_found: return { "Converter": [{ "level": "error", "message": "No file with .oldhepdata extension has been found." }] } converted_temp_dir = tempfile.mkdtemp(dir=current_app.config["CFG_TMPDIR"]) converted_temp_path = os.path.join(converted_temp_dir, 'yaml') try: successful = convert_oldhepdata_to_yaml(oldhepdata_found[1], converted_temp_path) if not successful: # Parse error message from title of HTML file, removing part of string after final "//". soup = BeautifulSoup(open(converted_temp_path), "lxml") errormsg = soup.title.string.rsplit("//", 1)[0] except Error as error: # hepdata_converter_ws_client.Error successful = False errormsg = str(error) if not successful: shutil.rmtree(converted_temp_dir, ignore_errors=True ) # can uncomment when this is definitely working return { "Converter": [{ "level": "error", "message": "The conversion from oldhepdata " "to the YAML format has not succeeded. " "Error message from converter follows:<br/><br/>" + errormsg }] } else: copy_errors = move_files(converted_temp_path, converted_path) if copy_errors: return copy_errors return find_file_in_directory(converted_path, lambda x: x == "submission.yaml")
def check_and_convert_from_oldhepdata(input_directory, id, timestamp): """ Check if the input directory contains a .oldhepdata file and convert it to YAML if it happens. """ converted_path = os.path.join(current_app.config['CFG_DATADIR'], str(id), timestamp, 'yaml') if not os.path.exists(converted_path): os.makedirs(converted_path) oldhepdata_found = find_file_in_directory( input_directory, lambda x: x.endswith('.oldhepdata'), ) if not oldhepdata_found: return { "Converter": [{ "level": "error", "message": "No file with .oldhepdata extension or a submission.yaml" " file has been found in the archive." }] } converted_temp_dir = tempfile.mkdtemp(dir=current_app.config["CFG_TMPDIR"]) converted_temp_path = os.path.join(converted_temp_dir, 'yaml') successful = convert_oldhepdata_to_yaml(oldhepdata_found[1], converted_temp_path) if not successful: # Parse error message from title of HTML file, removing part of string after final "//". soup = BeautifulSoup(open(converted_temp_path), "lxml") errormsg = soup.title.string.rsplit("//", 1)[0] rmtree(converted_temp_dir, ignore_errors=True) # can uncomment when this is definitely working return { "Converter": [{ "level": "error", "message": "The conversion from oldhepdata " "to the YAML format has not succeeded. " "Error message from converter follows.\n" + errormsg }] } else: # Move files from converted_temp_path to converted_path (try to avoid problems on EOS disk). if current_app.config.get('PRODUCTION_MODE', False): # production instance at CERN copy_command = ['xrdcp', '-N', '-f'] copy_converted_path = converted_path.replace(current_app.config['CFG_DATADIR'], current_app.config['EOS_DATADIR']) else: # local instance copy_command = ['cp'] copy_converted_path = converted_path print('Copying with: {} -r {} {}'.format(' '.join(copy_command), converted_temp_path + '/.', copy_converted_path)) subprocess.check_output(copy_command + ['-r', converted_temp_path + '/.', copy_converted_path]) rmtree(converted_temp_dir, ignore_errors=True) # can uncomment when this is definitely working return find_file_in_directory(converted_path, lambda x: x == "submission.yaml")
def convert_zip_archive(input_archive, output_archive, options): """ Convert a zip archive into a targz path with given options. """ input_root_dir = tempfile.mkdtemp() with zipfile.ZipFile(input_archive, 'r') as zip_archive: zip_archive.extractall(path=input_root_dir) # Find the appropriate file/directory in the input archive input = options.get('input_format', 'yaml') validation = find_file_in_directory( input_root_dir, lambda x: x == 'submission.yaml' if input == 'yaml' else x.endswith('.oldhepdata') ) if not validation: return None input_directory, input_file = validation successful = convert( CFG_CONVERTER_URL, input_directory if input == 'yaml' else input_file, output=output_archive, options=options, extract=False, ) rmtree(input_root_dir) # Error occurred, the output is a HTML file if not successful: output_file = output_archive[:-7] + '.html' else: output_file = output_archive move(output_archive, output_file) return output_file
def process_zip_archive(file, id): filename = secure_filename(file.filename) time_stamp = str(int(round(time.time()))) file_save_directory = os.path.join(current_app.config['CFG_DATADIR'], str(id), time_stamp) if not os.path.exists(file_save_directory): os.makedirs(file_save_directory) if not filename.endswith('.oldhepdata'): file_path = os.path.join(file_save_directory, filename) file.save(file_path) submission_path = os.path.join(file_save_directory, remove_file_extension(filename)) if filename.endswith('.yaml'): # we split the singular yaml file and create a submission directory error, last_updated = split_files(file_path, submission_path) if error: return { "Single YAML file splitter": [{ "level": "error", "message": str(error) }] } else: # we are dealing with a zip, tar, etc. so we extract the contents extract(filename, file_path, submission_path) submission_found = find_file_in_directory( submission_path, lambda x: x == "submission.yaml") else: file_path = os.path.join(file_save_directory, 'oldhepdata') if not os.path.exists(file_path): os.makedirs(file_path) print('Saving file to {}'.format(os.path.join(file_path, filename))) file.save(os.path.join(file_path, filename)) submission_path = os.path.join(file_save_directory, 'oldhepdata') submission_found = False if submission_found: basepath, submission_file_path = submission_found else: result = check_and_convert_from_oldhepdata(submission_path, id, time_stamp) # Check for errors if type(result) == dict: return result else: basepath, submission_file_path = result return process_submission_directory(basepath, submission_file_path, id)
def prepare_data_folder(input_archive, input_format): input_root_dir = tempfile.mkdtemp(dir=current_app.config['CFG_TMPDIR']) try: with zipfile.ZipFile(input_archive, 'r') as zip_archive: zip_archive.extractall(path=input_root_dir) # Find the appropriate file/directory in the input archive yield find_file_in_directory( input_root_dir, lambda x: x == 'submission.yaml' if input_format == 'yaml' else x.endswith('.oldhepdata')) finally: rmtree(input_root_dir)
def check_and_convert_from_oldhepdata(input_directory, id, timestamp): """ Check if the input directory contains a .oldhepdata file and convert it to YAML if it happens. """ converted_path = os.path.join(current_app.config['CFG_DATADIR'], str(id), timestamp, 'yaml') oldhepdata_found = find_file_in_directory( input_directory, lambda x: x.endswith('.oldhepdata'), ) if not oldhepdata_found: return { "Converter": [{ "level": "error", "message": "No file with .oldhepdata extension or a submission.yaml" " file has been found in the archive." }] } successful = convert_oldhepdata_to_yaml(oldhepdata_found[1], converted_path) if not successful: # Parse error message from title of HTML file, removing part of string after final "//". soup = BeautifulSoup(open(converted_path), "lxml") errormsg = soup.title.string.rsplit("//", 1)[0] return { "Converter": [{ "level": "error", "message": "The conversion from oldhepdata " "to the YAML format has not succeeded. " "Error message from converter follows.\n" + errormsg }] } return find_file_in_directory(converted_path, lambda x: x == "submission.yaml")
def process_zip_archive(file, id): filename = secure_filename(file.filename) time_stamp = str(int(round(time.time()))) file_save_directory = os.path.join(current_app.config['CFG_DATADIR'], str(id), time_stamp) if not os.path.exists(file_save_directory): os.makedirs(file_save_directory) if '.oldhepdata' not in filename: file_path = os.path.join(file_save_directory, filename) file.save(file_path) submission_path = os.path.join(file_save_directory, remove_file_extension(filename)) if 'yaml' in filename: # we split the singular yaml file and create a submission directory split_files(file_path, submission_path) else: # we are dealing with a zip, tar, etc. so we extract the contents extract(filename, file_path, submission_path) submission_found = find_file_in_directory(submission_path, lambda x: x == "submission.yaml") else: file_path = os.path.join(file_save_directory, 'oldhepdata') if not os.path.exists(file_path): os.makedirs(file_path) if filename.endswith('.txt'): filename = filename.replace(".txt", "") print('Saving file to {}'.format(os.path.join(file_path, filename))) file.save(os.path.join(file_path, filename)) submission_path = os.path.join(file_save_directory, 'oldhepdata') submission_found = False if submission_found: basepath, submission_file_path = submission_found else: result = check_and_convert_from_oldhepdata(submission_path, id, time_stamp) # Check for errors if type(result) == dict: return result else: basepath, submission_file_path = result return process_submission_directory(basepath, submission_file_path, id)
def create_original_with_resources(submission, data_filepath, output_path): """Copy or create 'original' zip file, i.e. yaml files with resources. If resources were imported from hepdata.cedar.ac.uk we create a new zip in a format that could be re-uploaded as a submission. :param type submission: HEPSubmission object :param type data_filepath: Path to original file :param type output_path: Path to output file (in converted dir) :return: None """ resource_location = os.path.join( get_data_path_for_record(str(submission.publication_recid)), 'resources' ) if os.path.isdir(resource_location): # There is a resources directory from when this record was imported # from the old hepdata site. We need to create a new zip with the # contents of data_filepath and resources with tempfile.TemporaryDirectory(dir=current_app.config['CFG_TMPDIR']) as tmpdir: # Copy resources directory into 'contents' dir in temp directory contents_path = os.path.join(tmpdir, 'contents') shutil.copytree(resource_location, contents_path) # Unzip data_filepath into contents path shutil.unpack_archive(data_filepath, contents_path) # Need to go through the submission file and update the paths so # that all resources are at the top level. This should allow the # zip to be re-uploaded or imported submission_found = find_file_in_directory( contents_path, lambda x: x == "submission.yaml" ) if submission_found: with fileinput.FileInput(submission_found[1], inplace=True) as file: p = re.compile(r'(\s+location: )\/resource\/.*\/([^\/]+)') for line in file: print(p.sub(r'\g<1>\g<2>', line), end='') # Zip up contents dir into a new file base, ext = os.path.splitext(output_path) zip_type = 'zip' if ext == '.zip' else 'gztar' print("Creating archive at %s" % output_path) shutil.make_archive(base, zip_type, contents_path) else: shutil.copy2(data_filepath, output_path)
def process_zip_archive(file, id): filename = secure_filename(file.filename) time_stamp = str(int(round(time.time()))) file_save_directory = os.path.join(current_app.config['CFG_DATADIR'], str(id), time_stamp) if not os.path.exists(file_save_directory): os.makedirs(file_save_directory) if not filename.endswith('.oldhepdata'): file_path = os.path.join(file_save_directory, filename) print('Saving file to {}'.format(file_path)) file.save(file_path) submission_path = os.path.join(file_save_directory, remove_file_extension(filename)) submission_temp_path = tempfile.mkdtemp( dir=current_app.config["CFG_TMPDIR"]) if filename.endswith('.yaml'): # we split the singular yaml file and create a submission directory error, last_updated = split_files(file_path, submission_temp_path) if error: return { "Single YAML file splitter": [{ "level": "error", "message": str(error) }] } else: # we are dealing with a zip, tar, etc. so we extract the contents if not extract(file_path, submission_temp_path): return { "Archive file extractor": [{ "level": "error", "message": "{} is not a valid zip or tar archive file.".format( file_path) }] } if not os.path.exists(submission_path): os.makedirs(submission_path) # Move files from submission_temp_path to submission_path (try to avoid problems with EOS disk). if current_app.config.get('PRODUCTION_MODE', False): # production instance at CERN copy_command = ['xrdcp', '-N', '-f'] copy_submission_path = submission_path.replace( current_app.config['CFG_DATADIR'], current_app.config['EOS_DATADIR']) else: # local instance copy_command = ['cp'] copy_submission_path = submission_path print('Copying with: {} -r {} {}'.format(' '.join(copy_command), submission_temp_path + '/.', copy_submission_path)) subprocess.check_output( copy_command + ['-r', submission_temp_path + '/.', copy_submission_path]) rmtree(submission_temp_path, ignore_errors=True ) # can uncomment when this is definitely working submission_found = find_file_in_directory( submission_path, lambda x: x == "submission.yaml") else: file_path = os.path.join(file_save_directory, 'oldhepdata') if not os.path.exists(file_path): os.makedirs(file_path) print('Saving file to {}'.format(os.path.join(file_path, filename))) file.save(os.path.join(file_path, filename)) submission_found = False if submission_found: basepath, submission_file_path = submission_found else: result = check_and_convert_from_oldhepdata(file_path, id, time_stamp) # Check for errors if type(result) == dict: return result else: basepath, submission_file_path = result return process_submission_directory(basepath, submission_file_path, id)
def get_file_in_directory(path, extension): directory, file = find_file_in_directory(path, lambda x: x.endswith(extension)) return file
def process_zip_archive(file_path, id, old_submission_schema=False, old_data_schema=False): (file_save_directory, filename) = os.path.split(file_path) if not filename.endswith('.oldhepdata'): file_save_directory = os.path.dirname(file_path) submission_path = os.path.join(file_save_directory, remove_file_extension(filename)) submission_temp_path = tempfile.mkdtemp(dir=current_app.config["CFG_TMPDIR"]) if filename.endswith('.yaml.gz'): print('Extracting: {} to {}'.format(file_path, file_path[:-3])) if not extract(file_path, file_path[:-3]): return { "Archive file extractor": [{ "level": "error", "message": "{} is not a valid .gz file.".format(file_path) }] } return process_zip_archive(file_path[:-3], id, old_submission_schema=old_submission_schema, old_data_schema=False) elif filename.endswith('.yaml'): # we split the singular yaml file and create a submission directory error, last_updated = split_files(file_path, submission_temp_path) if error: return { "Single YAML file splitter": [{ "level": "error", "message": str(error) }] } else: # we are dealing with a zip, tar, etc. so we extract the contents try: unzipped_path = extract(file_path, submission_temp_path) except Exception as e: unzipped_path = None if not unzipped_path: return { "Archive file extractor": [{ "level": "error", "message": "{} is not a valid zip or tar archive file.".format(file_path) }] } copy_errors = move_files(submission_temp_path, submission_path) if copy_errors: return copy_errors submission_found = find_file_in_directory(submission_path, lambda x: x == "submission.yaml") if not submission_found: return { "Archive file extractor": [{ "level": "error", "message": "No submission.yaml file has been found in the archive." }] } basepath, submission_file_path = submission_found else: file_dir = os.path.dirname(file_save_directory) time_stamp = os.path.split(file_dir)[1] result = check_and_convert_from_oldhepdata(os.path.dirname(file_save_directory), id, time_stamp) # Check for errors if type(result) == dict: return result else: basepath, submission_file_path = result old_data_schema = True return process_submission_directory(basepath, submission_file_path, id, old_data_schema=old_data_schema, old_submission_schema=old_submission_schema)
def get_file_in_directory(path, extension): file_info = find_file_in_directory(path, lambda x: x.endswith(extension)) return file_info[1] if file_info else None