def test_convert_fileobj(self, oldhepdata_file, oldhepdata_yaml_path): # test fileobj path = os.path.join(self.current_tmp, 'yaml') hepdata_converter_ws_client.convert(self.get_server_url(), oldhepdata_file, path, options={'input_format': 'oldhepdata'}) self.assertDirsEqual(oldhepdata_yaml_path, path)
def test_convert_no_extract(self, oldhepdata_path, oldhepdata_yaml_path): output = BytesIO() hepdata_converter_ws_client.convert( self.get_server_url(), oldhepdata_path, output, options={'input_format': 'oldhepdata'}, extract=False) output.seek(0) tmp_path = os.path.join(self.current_tmp, '1') with tarfile.open(mode='r:gz', fileobj=output) as tar: tar.extractall(tmp_path) self.assertDirsEqual(os.path.join(tmp_path, ARCHIVE_NAME), oldhepdata_yaml_path) path = os.path.join(self.current_tmp, 'data.tar.gz') hepdata_converter_ws_client.convert( self.get_server_url(), oldhepdata_path, path, options={'input_format': 'oldhepdata'}, extract=False) tmp_path = os.path.join(self.current_tmp, '2') with tarfile.open(path, mode='r:gz') as tar: tar.extractall(tmp_path) self.assertDirsEqual(os.path.join(tmp_path, ARCHIVE_NAME), oldhepdata_yaml_path)
def test_convert_fileobj(self, oldhepdata_file, oldhepdata_yaml_path): # test fileobj path = os.path.join(self.current_tmp, 'yaml') hepdata_converter_ws_client.convert( self.get_server_url(), oldhepdata_file, path, options={'input_format': 'oldhepdata'}) self.assertDirsEqual(oldhepdata_yaml_path, path)
def test_caching(self, oldhepdata_path, oldhepdata_yaml_path): # test paths path_1 = os.path.join(self.current_tmp, 'yaml', '1') hepdata_converter_ws_client.convert(self.get_server_url(), oldhepdata_path, path_1, options={'input_format': 'oldhepdata'}, id=1) self.assertDirsEqual(oldhepdata_yaml_path, path_1) path_2 = os.path.join(self.current_tmp, 'yaml', '2') hepdata_converter_ws_client.convert(self.get_server_url(), oldhepdata_path, path_2, options={'input_format': 'oldhepdata'}, id=1) self.assertDirsEqual(oldhepdata_yaml_path, path_2) self.assertDirsEqual(path_1, path_2)
def convert_zip_archive(input_archive, output_archive, options): """ Convert a zip archive into a targz path with given options. """ input_root_dir = tempfile.mkdtemp() with zipfile.ZipFile(input_archive, 'r') as zip_archive: zip_archive.extractall(path=input_root_dir) # Find the appropriate file/directory in the input archive input = options.get('input_format', 'yaml') validation = find_file_in_directory( input_root_dir, lambda x: x == 'submission.yaml' if input == 'yaml' else x.endswith('.oldhepdata') ) if not validation: return None input_directory, input_file = validation successful = convert( CFG_CONVERTER_URL, input_directory if input == 'yaml' else input_file, output=output_archive, options=options, extract=False, ) rmtree(input_root_dir) # Error occurred, the output is a HTML file if not successful: output_file = output_archive[:-7] + '.html' else: output_file = output_archive move(output_archive, output_file) return output_file
def convert_zip_archive(input_archive, output_archive, options): """ Convert a zip archive into a targz path with given options. :param input_archive: :param output_archive: :param options: :return: output_file """ input = options.get('input_format', 'yaml') with prepare_data_folder(input_archive, input) as validation: if not validation: return None input_directory, input_file = validation successful = convert( CFG_CONVERTER_URL, input_directory if input == 'yaml' else input_file, output=output_archive, options=options, extract=False, timeout=CFG_CONVERTER_TIMEOUT, ) # Error occurred, the output is a HTML file if not successful: output_file = output_archive[:-7] + '.html' else: output_file = output_archive move(output_archive, output_file) return output_file
def test_convert_404(self, oldhepdata_file): broken_url = self.get_server_url() + '/notavalidurl' with self.assertRaises(hepdata_converter_ws_client.Error) as cm: hepdata_converter_ws_client.convert( broken_url, oldhepdata_file, self.current_tmp, options={'input_format': 'oldhepdata'}, timeout=5) self.assertEqual('Request to %s failed' % broken_url, str(cm.exception)) self.assertTrue( isinstance(cm.exception.__cause__, requests.exceptions.HTTPError)) self.assertTrue( str(cm.exception.__cause__).startswith( "404 Client Error: NOT FOUND for url: %s" % broken_url))
def test_return_value(self, oldhepdata_file, oldhepdata_yaml_path): # test fileobj path = os.path.join(self.current_tmp, 'yaml') r = hepdata_converter_ws_client.convert(self.get_server_url(), oldhepdata_file, options={'input_format': 'oldhepdata'}) with tarfile.open(mode='r:gz', fileobj=StringIO.StringIO(r)) as tar: tar.extractall(path) self.assertDirsEqual(oldhepdata_yaml_path, os.path.join(path, ARCHIVE_NAME))
def test_convert_timeout(self, oldhepdata_file): broken_url = 'https://example.com:81' with self.assertRaises(hepdata_converter_ws_client.Error) as cm: hepdata_converter_ws_client.convert( broken_url, oldhepdata_file, self.current_tmp, options={'input_format': 'oldhepdata'}, timeout=5) self.assertEqual('Request to %s failed' % broken_url, str(cm.exception)) self.assertTrue( isinstance(cm.exception.__cause__, requests.exceptions.ConnectTimeout)) self.assertTrue( str(cm.exception.__cause__).startswith( "HTTPSConnectionPool(host='example.com', port=81): Max retries exceeded with url" ))
def test_convert_no_extract(self, oldhepdata_path, oldhepdata_yaml_path): output = StringIO.StringIO() hepdata_converter_ws_client.convert(self.get_server_url(), oldhepdata_path, output, options={'input_format': 'oldhepdata'}, extract=False) output.seek(0) tmp_path = os.path.join(self.current_tmp, '1') with tarfile.open(mode='r:gz', fileobj=output) as tar: tar.extractall(tmp_path) self.assertDirsEqual(os.path.join(tmp_path, ARCHIVE_NAME), oldhepdata_yaml_path) path = os.path.join(self.current_tmp, 'data.tar.gz') hepdata_converter_ws_client.convert(self.get_server_url(), oldhepdata_path, path, options={'input_format': 'oldhepdata'}, extract=False) tmp_path = os.path.join(self.current_tmp, '2') with tarfile.open(path, mode='r:gz') as tar: tar.extractall(tmp_path) self.assertDirsEqual(os.path.join(tmp_path, ARCHIVE_NAME), oldhepdata_yaml_path)
def test_return_value(self, oldhepdata_file, oldhepdata_yaml_path): # test fileobj path = os.path.join(self.current_tmp, 'yaml') r = hepdata_converter_ws_client.convert( self.get_server_url(), oldhepdata_file, options={'input_format': 'oldhepdata'}) with tarfile.open(mode='r:gz', fileobj=BytesIO(r)) as tar: tar.extractall(path) self.assertDirsEqual(oldhepdata_yaml_path, os.path.join(path, ARCHIVE_NAME))
def test_caching(self, oldhepdata_path, oldhepdata_yaml_path): # test paths path_1 = os.path.join(self.current_tmp, 'yaml', '1') hepdata_converter_ws_client.convert( self.get_server_url(), oldhepdata_path, path_1, options={'input_format': 'oldhepdata'}, id=1) self.assertDirsEqual(oldhepdata_yaml_path, path_1) path_2 = os.path.join(self.current_tmp, 'yaml', '2') hepdata_converter_ws_client.convert( self.get_server_url(), oldhepdata_path, path_2, options={'input_format': 'oldhepdata'}, id=1) self.assertDirsEqual(oldhepdata_yaml_path, path_2) self.assertDirsEqual(path_1, path_2)
def convert_oldhepdata_to_yaml(input_path, output_path): """ Converts the data on the server from oldhepdata format to the new one. """ options = { 'input_format': 'oldhepdata', 'output_format': 'yaml', } successful = convert( CFG_CONVERTER_URL, input_path, output=output_path, options=options ) return successful
def download_datatable(data_resource, file_format, *args, **kwargs): record_path, table_name = os.path.split(data_resource.file_location) filename = 'HEPData-{0}'.format(kwargs.pop('submission_id')) if 'table_name' in kwargs: filename += '-' + kwargs.pop('table_name').replace(' ', '') output_path = os.path.join(current_app.config['CFG_TMPDIR'], filename) if file_format == 'yaml': return send_file( data_resource.file_location, as_attachment=True, ) options = { 'input_format': 'yaml', 'output_format': file_format, 'table': table_name, 'filename': table_name.split('.')[0], } if not os.path.exists(output_path): successful = convert( CFG_CONVERTER_URL, record_path, output=output_path + '-dir', options=options, extract=False, ) else: successful = True # Error occurred, the output is a HTML file if successful: new_path = output_path + "." + file_format new_path = extract(filename + ".tar.gz", output_path + '-dir', new_path) file_to_send = get_file_in_directory(new_path, file_format) else: file_to_send = output_path + '-dir' file_format = 'html' return send_file(file_to_send, as_attachment=True, attachment_filename=filename + '.' + file_format)
def convert_oldhepdata_to_yaml(input_path, output_path): """ Converts the data on the server from oldhepdata format to the new YAML format. :param input_path: :param output_path: :return: whether conversion was successful """ options = { 'input_format': 'oldhepdata', 'output_format': 'yaml', } successful = convert( CFG_CONVERTER_URL, input_path, output=output_path, options=options ) return successful
def download_datatable(datasubmission, file_format, *args, **kwargs): """ Download a particular data table given a ``datasubmission``. :param datasubmission: :param file_format: :param args: :param kwargs: :return: display_error or send_file depending on success of conversion """ if file_format == 'json': return redirect('/record/data/{0}/{1}/{2}'.format( datasubmission.publication_recid, datasubmission.id, datasubmission.version)) elif file_format not in CFG_SUPPORTED_FORMATS: return display_error( title="The " + file_format + " output format is not supported", description="This output format is not supported. " + "Currently supported formats: " + str(CFG_SUPPORTED_FORMATS), ) dataresource = DataResource.query.filter_by( id=datasubmission.data_file).one() record_path, table_name = os.path.split(dataresource.file_location) filename = 'HEPData-{0}-v{1}'.format(kwargs.pop('submission_id'), datasubmission.version) if 'table_name' in kwargs: filename += '-' + kwargs.pop('table_name').replace(' ', '_').replace( '/', '_').replace('$', '').replace('\\', '') output_path = os.path.join(current_app.config['CFG_TMPDIR'], filename) if file_format == 'yaml': return send_file(dataresource.file_location, as_attachment=True, attachment_filename=filename + '.yaml') options = { 'input_format': 'yaml', 'output_format': file_format, 'table': table_name, 'filename': table_name.split('.')[0], 'validator_schema_version': '0.1.0', } hepsubmission = HEPSubmission.query.filter_by( publication_recid=datasubmission.publication_recid, version=datasubmission.version).first() if datasubmission.doi and hepsubmission.overall_status != 'sandbox': options['hepdata_doi'] = datasubmission.doi.rsplit( '/', 1)[0].encode('ascii') if file_format == 'yoda': rivet_analysis_name = kwargs.pop('rivet_analysis_name', '') if rivet_analysis_name: options['rivet_analysis_name'] = rivet_analysis_name elif datasubmission.publication_inspire_id: record = get_record_contents(datasubmission.publication_recid) if record: # Check if this record has a Rivet analysis, then extract the Rivet analysis name from the URL. if 'analyses' in record: for analysis in record['analyses']: if analysis['type'] == 'rivet': options['rivet_analysis_name'] = analysis[ 'analysis'].split('/')[-1] # Otherwise guess the Rivet analysis name using the collaboration name, # the creation year of the INSPIRE record, and the INSPIRE ID. if 'rivet_analysis_name' not in options: try: year = parse(record['creation_date']).year except: year = record['year'] # publication year options['rivet_analysis_name'] = '{0}_{1}_I{2}'.format( ''.join(record['collaborations']).upper(), year, datasubmission.publication_inspire_id) successful = convert( CFG_CONVERTER_URL, record_path, output=output_path + '-dir', options=options, extract=False, ) if successful: new_path = output_path + "." + file_format new_path = extract(output_path + '-dir', new_path) os.remove(output_path + '-dir') file_to_send = get_file_in_directory(new_path, file_format) else: # Error occurred, the output is a HTML file file_to_send = output_path + '-dir' file_format = 'html' return send_file(file_to_send, as_attachment=True, attachment_filename=filename + '.' + file_format)
def download_datatable(datasubmission, file_format, *args, **kwargs): """ Download a particular data table given a ``datasubmission``. :param datasubmission: :param file_format: :param args: :param kwargs: :return: display_error or send_file depending on success of conversion """ if file_format == 'json': return redirect('/record/data/{0}/{1}/{2}'.format(datasubmission.publication_recid, datasubmission.id, datasubmission.version)) elif file_format not in CFG_SUPPORTED_FORMATS: return display_error( title="The " + file_format + " output format is not supported", description="This output format is not supported. " + "Currently supported formats: " + str(CFG_SUPPORTED_FORMATS), ) dataresource = DataResource.query.filter_by(id=datasubmission.data_file).one() record_path, table_name = os.path.split(dataresource.file_location) filename = 'HEPData-{0}-v{1}'.format(kwargs.pop('submission_id'), datasubmission.version) if 'table_name' in kwargs: filename += '-' + kwargs.pop('table_name').replace(' ', '_').replace('/', '_').replace('$', '').replace('\\','') output_path = os.path.join(current_app.config['CFG_TMPDIR'], filename) if file_format == 'yaml' or file_format == 'original': return send_file( dataresource.file_location, as_attachment=True, attachment_filename=filename + '.yaml' ) options = { 'input_format': 'yaml', 'output_format': file_format, 'table': table_name, 'filename': table_name.split('.')[0], 'validator_schema_version': '0.1.0', } hepsubmission = HEPSubmission.query.filter_by(publication_recid=datasubmission.publication_recid, version=datasubmission.version).first() if datasubmission.doi and not hepsubmission.overall_status.startswith('sandbox'): options['hepdata_doi'] = datasubmission.doi.rsplit('/', 1)[0] if file_format == 'yoda': rivet_analysis_name = kwargs.pop('rivet_analysis_name', '') if not rivet_analysis_name: rivet_analysis_name = guess_rivet_analysis_name(hepsubmission) if rivet_analysis_name: options['rivet_analysis_name'] = rivet_analysis_name try: successful = convert( CFG_CONVERTER_URL, record_path, output=output_path + '-dir', options=options, extract=False, timeout=CFG_CONVERTER_TIMEOUT, ) except Error as error: # hepdata_converter_ws_client.Error return display_error(title='Report concerns to [email protected]', description=str(error)) if successful: new_path = output_path + "." + file_format new_path = extract(output_path + '-dir', new_path) os.remove(output_path + '-dir') file_to_send = get_file_in_directory(new_path, file_format) else: # Error occurred, the output is a HTML file file_to_send = output_path + '-dir' file_format = 'html' return send_file(file_to_send, as_attachment=True, attachment_filename=filename + '.' + file_format)
def download_datatable(datasubmission, file_format, *args, **kwargs): if file_format == 'json': return redirect('/record/data/{0}/{1}/{2}'.format( datasubmission.publication_recid, datasubmission.id, datasubmission.version)) elif file_format not in CFG_SUPPORTED_FORMATS: return display_error( title="The " + file_format + " output format is not supported", description="This output format is not supported. " + "Currently supported formats: " + str(CFG_SUPPORTED_FORMATS), ) dataresource = DataResource.query.filter_by( id=datasubmission.data_file).one() record_path, table_name = os.path.split(dataresource.file_location) filename = 'HEPData-{0}-v{1}'.format(kwargs.pop('submission_id'), datasubmission.version) if 'table_name' in kwargs: filename += '-' + kwargs.pop('table_name').replace(' ', '') output_path = os.path.join(current_app.config['CFG_TMPDIR'], filename) if file_format == 'yaml': return send_file(dataresource.file_location, as_attachment=True, attachment_filename=filename + '.yaml') options = { 'input_format': 'yaml', 'output_format': file_format, 'table': table_name, 'filename': table_name.split('.')[0], } if datasubmission.doi: options['hepdata_doi'] = datasubmission.doi.rsplit( '/', 1)[0].encode('ascii') if datasubmission.publication_inspire_id and file_format == 'yoda': record = get_record_contents(datasubmission.publication_recid) if record: options['rivet_analysis_name'] = '{0}_{1}_I{2}'.format( ''.join(record['collaborations']).upper(), record['year'], datasubmission.publication_inspire_id) if not os.path.exists(output_path): successful = convert( CFG_CONVERTER_URL, record_path, output=output_path + '-dir', options=options, extract=False, ) else: successful = True # Error occurred, the output is a HTML file if successful: new_path = output_path + "." + file_format new_path = extract(filename + ".tar.gz", output_path + '-dir', new_path) file_to_send = get_file_in_directory(new_path, file_format) else: file_to_send = output_path + '-dir' file_format = 'html' return send_file(file_to_send, as_attachment=True, attachment_filename=filename + '.' + file_format)