def main(config, access_token, profile): app_ctx = yaml.load(config) ega_xml_info = app_ctx['ega_xml_info'] ega_xml = {} with open(ega_xml_info, 'r', newline='') as f: for l in f: analysisId = l.split('\t')[0] ega_xml[analysisId] = l.rstrip() if os.path.isfile(app_ctx['log']): os.remove(app_ctx['log']) if os.path.isfile(app_ctx['xml_mismatch']): os.remove(app_ctx['xml_mismatch']) if not os.path.isdir(app_ctx['xml_fix_dir']): os.mkdir(app_ctx['xml_fix_dir']) song_file = app_ctx[profile] with open(song_file, 'r', newline='') as f: for l in f: projectCode, objectId, analysisId, fileName, songMd5 = l.rstrip( ).split('\t') # double check for safety reason if profile == 'aws' and not projectCode in app_ctx['aws_approved']: with open(app_ctx['log'], 'a') as f: f.write( '{0}::{1} object {2}: is not allowed in AWS\n'.format( projectCode, analysisId, objectId)) continue # only download if there is no local copy fpath = os.path.join(app_ctx['xml_dir'], fileName) if not os.path.isfile(fpath): # download the xml from collab subprocess.check_output([ 'score-client', '--profile', profile, 'download', '--object-id', objectId, '--validate', 'false', '--force', '--output-dir', app_ctx['xml_dir'] ]) # get file md5sum for the one downloaded by score client scoreMd5 = get_md5(fpath) scoreSize = os.path.getsize(fpath) # handle the ega xml if analysisId.startswith('EGA') and fileName.startswith('bundle'): # generate the xml from ega jobs if not ega_xml.get(analysisId): click.echo( '{}::{}: the ega transfer job is missing'.format( projectCode, analysisId)) with open(app_ctx['log'], 'a') as f: f.write( '{0}::{1}: the ega transfer job is missing in the completed folder\n' .format(projectCode, analysisId)) continue if not os.path.isdir(app_ctx['ega_xml_dir']): os.makedirs(app_ctx['ega_xml_dir']) fpath = os.path.join(app_ctx['ega_xml_dir'], fileName) if not os.path.isfile(fpath): generate_metadata_xml(ega_xml[analysisId], app_ctx) # get file md5sum for the one to upload fileMd5 = get_md5(fpath) fileSize = os.path.getsize(fpath) if not fileMd5 == songMd5 or not fileMd5 == scoreMd5: with open(app_ctx['xml_mismatch'], 'a') as f: f.write('\t'.join( [projectCode, analysisId, scoreMd5, songMd5, fileMd5])) f.write('\n') # skip the fix ones fixpath = os.path.join(app_ctx['xml_fix_dir'], fileName + '.fix') if os.path.isfile(fixpath): continue # upload to storage subprocess.check_output([ 'score-client', '--profile', profile, 'upload', '--md5', fileMd5, '--file', fpath, '--object-id', objectId, '--force' ]) # copy xml to open meta bucket if profile == 'collab': subprocess.check_output([ 'aws', '--endpoint-url', app_ctx[profile + '_endpoint_url'], '--profile', profile, 's3', 'cp', fpath, app_ctx['meta_bucket_url'] + objectId ]) else: subprocess.check_output([ 'aws', '--profile', 'amazon_pay', 's3', 'cp', fpath, app_ctx['meta_bucket_url'] + objectId ]) # update the song server_url = app_ctx['song'][profile] api_config = ApiConfig(server_url, projectCode, access_token) api = Api(api_config) # check whether the song payload need to be updated if not fileMd5 == songMd5: # update the file fileUpdate = FileUpdateRequest() fileUpdate.fileSize = fileSize fileUpdate.fileMd5sum = fileMd5 api.update_file(objectId, fileUpdate) # publish the analysis if not api.get_analysis( analysisId).__dict__['analysisState'] == "PUBLISHED": try: api.publish(analysisId) except: with open(app_ctx['log'], 'a') as f: f.write('{0}::{1}: can not be published\n'.format( projectCode, analysisId)) with open(fixpath, 'w') as w: w.write('') return
def main(): parser = argparse.ArgumentParser( description='Generate a song payload using minimal arguments') parser.add_argument('-s', '--study-id', dest="study_id", help="Study ID", required=True) parser.add_argument('-u', '--server-url', dest="server_url", help="Server URL", required=True) parser.add_argument('-p', '--payload', dest="payload", help="JSON Payload", required=True) parser.add_argument('-o', '--output', dest="output", help="Output manifest file", required=True) parser.add_argument('-d', '--input-dir', dest="input_dir", help="Payload files directory", required=True) parser.add_argument('-t', '--access-token', dest="access_token", default=os.environ.get('ACCESSTOKEN', None), help="Server URL") parser.add_argument('-j', '--json', dest="json_output") results = parser.parse_args() study_id = results.study_id server_url = results.server_url access_token = results.access_token payload = results.payload analysis_id = json.load(open(payload)).get('analysisId') config = ApiConfig(server_url, study_id, access_token, debug=True) api = Api(config) client = FileUploadClient(api, payload, is_async_validation=True, ignore_analysis_id_collisions=True) analysis = None try: analysis = api.get_analysis(analysis_id) if analysis.analysisId is '': raise Exception('Analysis id ' + analysis_id + ' not found') payload_files = json.load(open(payload)).get('file') api_files = api.get_analysis_files(analysis_id) for i in range(0, len(api_files)): if not exists_in_file_array(file_name=api_files[i].fileName, file_md5=api_files[i].fileMd5sum, file_type=api_files[i].fileType, file_size=api_files[i].fileSize, file_access=api_files[i].fileAccess, files_array=payload_files): print( "Files in payload do not match the files on song server.") exit(1) except Exception: if analysis is None or analysis is '': client.upload() client.update_status() client.save() manifest_filename = results.output create_manifest(api, analysis_id, manifest_filename, results.input_dir) subprocess.check_output([ 'icgc-storage-client', 'upload', '--manifest', os.path.join(results.input_dir, manifest_filename), '--force' ]) api.publish(analysis_id) #client.publish() if results.json_output: with open(os.path.join(results.input_dir, manifest_filename), 'r') as f: manifest_json = {} manifest_json['analysis_id'] = f.readline().split('\t')[0] manifest_json['files'] = [] for line in f.readlines(): _file = {} _file['object_id'] = line.split('\t')[0] _file['file_name'] = line.split('\t')[1] _file['md5'] = line.split('\t')[2].strip('\n') manifest_json['files'].append(_file) with open(os.path.join(results.input_dir, results.json_output), 'w') as outfile: json.dump(manifest_json, outfile)