def process_zip_archive(file, id): filename = secure_filename(file.filename) time_stamp = str(int(round(time.time()))) file_save_directory = os.path.join(current_app.config['CFG_DATADIR'], str(id), time_stamp) if not os.path.exists(file_save_directory): os.makedirs(file_save_directory) if '.oldhepdata' not in filename: file_path = os.path.join(file_save_directory, filename) file.save(file_path) submission_path = os.path.join(file_save_directory, remove_file_extension(filename)) if 'yaml' in filename: # we split the singular yaml file and create a submission directory split_files(file_path, submission_path) else: # we are dealing with a zip, tar, etc. so we extract the contents extract(filename, file_path, submission_path) submission_found = find_file_in_directory(submission_path, lambda x: x == "submission.yaml") else: file_path = os.path.join(file_save_directory, 'oldhepdata') if not os.path.exists(file_path): os.makedirs(file_path) if filename.endswith('.txt'): filename = filename.replace(".txt", "") print('Saving file to {}'.format(os.path.join(file_path, filename))) file.save(os.path.join(file_path, filename)) submission_path = os.path.join(file_save_directory, 'oldhepdata') submission_found = False if submission_found: basepath, submission_file_path = submission_found else: result = check_and_convert_from_oldhepdata(submission_path, id, time_stamp) # Check for errors if type(result) == dict: return result else: basepath, submission_file_path = result return process_submission_directory(basepath, submission_file_path, id)
def test_file_download_and_split(app, migrator, identifiers): """___test_file_download_and_split___""" with app.app_context(): for test_id in identifiers: temp_file = migrator.download_file(test_id["hepdata_id"]) assert temp_file is not None split_files( temp_file, os.path.join(app.config['CFG_TMPDIR'], test_id["hepdata_id"]), os.path.join(app.config['CFG_TMPDIR'], test_id["hepdata_id"] + ".zip")) assert (os.path.exists( os.path.join(app.config['CFG_TMPDIR'], test_id["hepdata_id"])))
def process_zip_archive(file, id): filename = secure_filename(file.filename) time_stamp = str(int(round(time.time()))) file_save_directory = os.path.join(current_app.config['CFG_DATADIR'], str(id), time_stamp) if not os.path.exists(file_save_directory): os.makedirs(file_save_directory) if not filename.endswith('.oldhepdata'): file_path = os.path.join(file_save_directory, filename) file.save(file_path) submission_path = os.path.join(file_save_directory, remove_file_extension(filename)) if filename.endswith('.yaml'): # we split the singular yaml file and create a submission directory error, last_updated = split_files(file_path, submission_path) if error: return { "Single YAML file splitter": [{ "level": "error", "message": str(error) }] } else: # we are dealing with a zip, tar, etc. so we extract the contents extract(filename, file_path, submission_path) submission_found = find_file_in_directory( submission_path, lambda x: x == "submission.yaml") else: file_path = os.path.join(file_save_directory, 'oldhepdata') if not os.path.exists(file_path): os.makedirs(file_path) print('Saving file to {}'.format(os.path.join(file_path, filename))) file.save(os.path.join(file_path, filename)) submission_path = os.path.join(file_save_directory, 'oldhepdata') submission_found = False if submission_found: basepath, submission_file_path = submission_found else: result = check_and_convert_from_oldhepdata(submission_path, id, time_stamp) # Check for errors if type(result) == dict: return result else: basepath, submission_file_path = result return process_submission_directory(basepath, submission_file_path, id)
def prepare_files_for_submission(self, inspire_id, force_retrieval=False): """ Either returns a file if it already exists, or downloads it and splits it. :param inspire_id: :return: output location if succesful, None if not """ output_location = os.path.join(current_app.config['CFG_DATADIR'], inspire_id) if not os.path.exists(output_location) or force_retrieval: print('Downloading file for {0}'.format(inspire_id)) file_location = self.download_file(inspire_id) if file_location: output_location = os.path.join(current_app.config['CFG_DATADIR'], inspire_id) split_files(file_location, output_location, '{0}.zip'.format(output_location)) else: return None else: print('File for {0} already in system...no download required.'.format(inspire_id)) return output_location
def prepare_files_for_submission(self, inspire_id, force_retrieval=False): """ Either returns a file if it already exists, or downloads it and splits it. :param inspire_id: :return: output location if successful, None if not """ output_location = os.path.join(current_app.config["CFG_DATADIR"], inspire_id) last_updated = datetime.now() download = not os.path.exists(output_location) or ( get_file_in_directory(output_location, 'yaml') is None) if download or force_retrieval: print("Downloading file for {0}".format(inspire_id)) file_location = self.download_file(inspire_id) if file_location: output_location = os.path.join( current_app.config["CFG_DATADIR"], inspire_id) error, last_updated = split_files( file_location, output_location, "{0}.zip".format(output_location)) # remove temporary download file after processing try: os.remove(file_location) except: log.info('Unable to remove {0}'.format(file_location)) else: output_location = None else: print("File for {0} already in system...no download required.". format(inspire_id)) return output_location, last_updated
def process_zip_archive(file, id): filename = secure_filename(file.filename) time_stamp = str(int(round(time.time()))) file_save_directory = os.path.join(current_app.config['CFG_DATADIR'], str(id), time_stamp) if not os.path.exists(file_save_directory): os.makedirs(file_save_directory) if not filename.endswith('.oldhepdata'): file_path = os.path.join(file_save_directory, filename) print('Saving file to {}'.format(file_path)) file.save(file_path) submission_path = os.path.join(file_save_directory, remove_file_extension(filename)) submission_temp_path = tempfile.mkdtemp( dir=current_app.config["CFG_TMPDIR"]) if filename.endswith('.yaml'): # we split the singular yaml file and create a submission directory error, last_updated = split_files(file_path, submission_temp_path) if error: return { "Single YAML file splitter": [{ "level": "error", "message": str(error) }] } else: # we are dealing with a zip, tar, etc. so we extract the contents if not extract(file_path, submission_temp_path): return { "Archive file extractor": [{ "level": "error", "message": "{} is not a valid zip or tar archive file.".format( file_path) }] } if not os.path.exists(submission_path): os.makedirs(submission_path) # Move files from submission_temp_path to submission_path (try to avoid problems with EOS disk). if current_app.config.get('PRODUCTION_MODE', False): # production instance at CERN copy_command = ['xrdcp', '-N', '-f'] copy_submission_path = submission_path.replace( current_app.config['CFG_DATADIR'], current_app.config['EOS_DATADIR']) else: # local instance copy_command = ['cp'] copy_submission_path = submission_path print('Copying with: {} -r {} {}'.format(' '.join(copy_command), submission_temp_path + '/.', copy_submission_path)) subprocess.check_output( copy_command + ['-r', submission_temp_path + '/.', copy_submission_path]) rmtree(submission_temp_path, ignore_errors=True ) # can uncomment when this is definitely working submission_found = find_file_in_directory( submission_path, lambda x: x == "submission.yaml") else: file_path = os.path.join(file_save_directory, 'oldhepdata') if not os.path.exists(file_path): os.makedirs(file_path) print('Saving file to {}'.format(os.path.join(file_path, filename))) file.save(os.path.join(file_path, filename)) submission_found = False if submission_found: basepath, submission_file_path = submission_found else: result = check_and_convert_from_oldhepdata(file_path, id, time_stamp) # Check for errors if type(result) == dict: return result else: basepath, submission_file_path = result return process_submission_directory(basepath, submission_file_path, id)
def process_zip_archive(file_path, id, old_submission_schema=False, old_data_schema=False): (file_save_directory, filename) = os.path.split(file_path) if not filename.endswith('.oldhepdata'): file_save_directory = os.path.dirname(file_path) submission_path = os.path.join(file_save_directory, remove_file_extension(filename)) submission_temp_path = tempfile.mkdtemp(dir=current_app.config["CFG_TMPDIR"]) if filename.endswith('.yaml.gz'): print('Extracting: {} to {}'.format(file_path, file_path[:-3])) if not extract(file_path, file_path[:-3]): return { "Archive file extractor": [{ "level": "error", "message": "{} is not a valid .gz file.".format(file_path) }] } return process_zip_archive(file_path[:-3], id, old_submission_schema=old_submission_schema, old_data_schema=False) elif filename.endswith('.yaml'): # we split the singular yaml file and create a submission directory error, last_updated = split_files(file_path, submission_temp_path) if error: return { "Single YAML file splitter": [{ "level": "error", "message": str(error) }] } else: # we are dealing with a zip, tar, etc. so we extract the contents try: unzipped_path = extract(file_path, submission_temp_path) except Exception as e: unzipped_path = None if not unzipped_path: return { "Archive file extractor": [{ "level": "error", "message": "{} is not a valid zip or tar archive file.".format(file_path) }] } copy_errors = move_files(submission_temp_path, submission_path) if copy_errors: return copy_errors submission_found = find_file_in_directory(submission_path, lambda x: x == "submission.yaml") if not submission_found: return { "Archive file extractor": [{ "level": "error", "message": "No submission.yaml file has been found in the archive." }] } basepath, submission_file_path = submission_found else: file_dir = os.path.dirname(file_save_directory) time_stamp = os.path.split(file_dir)[1] result = check_and_convert_from_oldhepdata(os.path.dirname(file_save_directory), id, time_stamp) # Check for errors if type(result) == dict: return result else: basepath, submission_file_path = result old_data_schema = True return process_submission_directory(basepath, submission_file_path, id, old_data_schema=old_data_schema, old_submission_schema=old_submission_schema)