def __check_tname__(self, name): test_log = get_tests_log() test_logn = list(test_log.keys()) if not any([name in tlog for tlog in test_logn]): return name else: undef_test = True while undef_test: # Wait for input poss_names = list() std_out('Possible tests found', force=True) for ctest in test_logn: if name in ctest: poss_names.append(test_logn.index(ctest) + 1) std_out(str(test_logn.index(ctest) + 1) + ' --- ' + ctest, force=True) std_out('// --- \\\\', force=True) if len(poss_names) == 1: which_test = str(poss_names[0]) else: which_test = input( 'Similar tests found, please select one or input other name [New]: ' ) if which_test == 'New': new_name = input('Enter new name: ') break elif which_test.isdigit(): if int(which_test) in poss_names: self.full_name = test_logn[int(which_test) - 1] self.path = test_log[self.full_name]['path'] std_out(f'Test full name, {self.full_name}', force=True) return False else: std_out( "Type 'New' for other name, or test number in possible tests", 'ERROR') else: std_out("Type 'New' for other name, or test number", 'ERROR') if self.__check_tname__(new_name): self.__set_tname__(new_name)
def zenodo_upload(upload_descritor, sandbox=True, dry_run=True): ''' This section uses the code inspired by this repo https://github.com/darvasd/upload-to-zenodo Uploads a series of tests to zenodo.org using a template in /zenodo_templates and the descriptor file in data/uploads. It will need a zenodo_token environment variable to work The submission needs an additional "Publish" step. This can also be done from a script, but to be on the safe side, it is not included. (The attached file cannot be changed after publication) Parameters ---------- upload_descritor: string The descriptor's filename (yaml) in the data/uploads. Check options in the example yaml Option defaults: include_processed_data: True (for tests) include_footer_doi: True (for pdfs) include_td_html: False (for yaml test_description in html) Upload types: publication: not implemented dataset: implemented (can contain several tests in it) sandbox: boolean True uses zenodo's sandbox at sandbox.zenodo.org dry_run: True fakes uploads everything to check Returns ---------- True if all data is uploaded, False otherwise ''' def fill_template(individual_descriptor, descriptor_file_name, upload_type='dataset'): # Open base template with all keys if upload_type == 'dataset': template_file_name = 'template_zenodo_dataset' elif upload_type == 'publication': template_file_name = 'template_zenodo_publication' with open(join('zenodo_templates', f'{template_file_name}.json'), 'r') as template_file: template = json.load(template_file) filled_template = template # Fill it up for each key for key in individual_descriptor.keys(): value = individual_descriptor[key] if key in filled_template['metadata'].keys(): filled_template['metadata'][key] = value with open( join(config.paths['dataDirectory'], 'uploads', descriptor_file_name), 'w') as descriptor_json: json.dump(filled_template, descriptor_json, ensure_ascii=True) std_out(f'Created descriptor file for {descriptor_file_name}', 'SUCCESS') return json.dumps(filled_template) def get_submission_id(metadata, base_url): url = f"{base_url}/api/deposit/depositions" headers = {"Content-Type": "application/json"} response = post(url, params={'access_token': environ.zenodo_token}, data=metadata, headers=headers) if response.status_code > 210: std_out( "Error happened during submission, status code: " + str(response.status_code), 'ERROR') std_out(response.json(), "ERROR") return None # Get the submission ID submission_id = json.loads(response.text)["id"] return submission_id def upload_file(url, upload_metadata, files): response = post(url, params={'access_token': environ.zenodo_token}, data=upload_metadata, files=files) return response.status_code std_out(f'Uploading {upload_descritor} to zenodo') if dry_run: std_out(f'Dry run. Verify output before setting dry_run to False', 'WARNING') # Sandbox or not if sandbox: std_out( f'Using sandbox. Verify output before setting sandbox to False', 'WARNING') base_url = config.zenodo_sandbox_base_url else: base_url = config.zenodo_real_base_url if '.yaml' not in upload_descritor: upload_descritor = upload_descritor + '.yaml' with open(join(config.paths['dataDirectory'], 'uploads', upload_descritor), 'r') as descriptor_file: descriptor = yaml.load(descriptor_file) for key in descriptor: # Set options for processed and raw uploads stage_list = ['base'] if 'options' in descriptor[key].keys(): options = descriptor[key]['options'] else: options = { 'include_processed_data': True, 'include_footer_doi': True, 'include_td_html': False } if options['include_processed_data']: stage_list.append('processed') std_out(f'Options {options}') # Fill template if 'upload_type' in descriptor[key].keys(): upload_type = descriptor[key]['upload_type'] else: std_out(f'Upload type not set for key {key}. Skipping', 'ERROR') continue metadata = fill_template(descriptor[key], key, upload_type=upload_type) # Get submission ID if not dry_run: submission_id = get_submission_id(metadata, base_url) else: submission_id = 0 if submission_id is not None: # Dataset upload if upload_type == 'dataset': # Get the tests to upload tests = descriptor[key]['tests'] # Get url where to post the files url = f"{base_url}/api/deposit/depositions/{submission_id}/files" test_logs = get_tests_log() for test_name in tests: # Get test path std_out(f'Uploading data from test {test_name}') test_path = test_logs[test_name]['path'] # Upload the test descriptor (yaml (and html) format) td_upload = ['yaml'] with open(join(test_path, 'test_description.yaml'), 'r') as td: yaml_td = yaml.load(td) if options['include_td_html']: html_td = td_to_html(yaml_td, test_path) if html_td: td_upload.append('html') for td_format in td_upload: upload_metadata = { 'name': f'test_description_{test_name}.{td_format}' } files = { 'file': open( join(test_path, f'test_description.{td_format}'), 'rb') } file_size = getsize( join(test_path, f'test_description.{td_format}')) / ( 1024 * 1024.0 * 1024) if file_size > 50: std_out( f'File size for {test_name} is over 50Gb ({file_size})', 'WARNING') if not dry_run: status_code = upload_file(url, upload_metadata, files) else: status_code = 200 if status_code > 210: std_out( "Error happened during file upload, status code: " + str(status_code), 'ERROR') return else: std_out( f"{upload_metadata['name']} submitted with submission ID = \ {submission_id} (DOI: 10.5281/zenodo.{submission_id})", "SUCCESS") # Load the api devices to have them up to date in the cache if any(yaml_td['devices'][device]['source'] == 'api' for device in yaml_td['devices'].keys()): test = Test(test_name) test.load(options={'store_cached_api': True}) for device in yaml_td['devices'].keys(): std_out(f'Uploading data from device {device}') # Upload basic and processed data for file_stage in stage_list: file_path = '' try: # Find device files if file_stage == 'processed': file_name = f'{device}.csv' file_path = join(test_path, 'processed', file_name) upload_metadata = { 'name': f'{device}_PROCESSED.csv' } elif file_stage == 'base': if 'csv' in yaml_td['devices'][device][ 'source']: file_name = yaml_td['devices'][device][ 'processed_data_file'] file_path = join(test_path, file_name) elif yaml_td['devices'][device][ 'source'] == 'api': file_name = f'{device}.csv' file_path = join( test_path, 'cached', file_name) upload_metadata = {'name': file_name} if file_path != '': files = {'file': open(file_path, 'rb')} file_size = getsize(file_path) / ( 1024 * 1024.0 * 1024) if file_size > 50: std_out( f'File size for {file_name} over 50Gb ({file_size})', 'WARNING') if not dry_run: status_code = upload_file( url, upload_metadata, files) else: status_code = 200 if status_code > 210: std_out( f"Error happened during file upload, status code: {status_code}. Skipping", 'ERROR') continue std_out( f"{upload_metadata['name']} submitted with submission ID =\ {submission_id} (DOI: 10.5281/zenodo.{submission_id})", "SUCCESS") except: if not exists(file_path): std_out( f'File {file_name} does not exist (type = {file_stage}). Skipping', 'ERROR') # print_exc() pass # Check if we have a report in the keys if 'report' in descriptor[key].keys(): for file_name in descriptor[key]['report']: file_path = join(paths['dataDirectory'], 'uploads', file_name) if options['include_footer_doi'] and file_name.endswith( '.pdf'): output_file_path = file_path[:file_path.index( '.pdf')] + '_doi.pdf' include_footer( file_path, output_file_path, link= f'https://doi.org/10.5281/zenodo.{submission_id}' ) file_path = output_file_path upload_metadata = {'name': file_name} files = {'file': open(file_path, 'rb')} file_size = getsize(file_path) / (1024 * 1024.0 * 1024) if file_size > 50: std_out( f'File size for {file_name} is over 50Gb({file_size})', 'WARNING') if not dry_run: status_code = upload_file(url, upload_metadata, files) else: status_code = 200 if status_code > 210: std_out( f"Error happened during file upload, status code: {status_code}. Skipping", 'ERROR') continue std_out( f"{upload_metadata['name']} submitted with submission ID = \ {submission_id} (DOI: 10.5281/zenodo.{submission_id})", "SUCCESS") if upload_type == 'publication': std_out('Not implemented') return False std_out( f'Submission completed - (DOI: 10.5281/zenodo.{submission_id})', 'SUCCESS') std_out( f'------------------------------------------------------------' ) else: std_out(f'Submission ID error', 'ERROR') continue return True