FilePayload(file_access='controlled', file_name=file.get('file_name'), md5sum=md5sum(file_path), file_type=get_file_type(file.get('file_name')), file_size=os.stat(file_path).st_size)) if os.path.isfile(idx_file_path): song_payload.add_file_payload( FilePayload(file_access='controlled', file_name=idx_file, md5sum=md5sum(idx_file_path), file_type=get_file_type(idx_file), file_size=os.stat(idx_file_path).st_size)) file_path = os.path.join(input_dir, metadata_file_name) song_payload.add_file_payload( FilePayload(file_access='open', file_name=metadata_file_name, md5sum=md5sum(file_path), file_type=get_file_type(metadata_file_name), file_size=os.stat(file_path).st_size)) song_payload.to_json_file(output_file) save_output_json({ 'payload': 'payload.json', 'payload_json': json.load(open(os.path.join(input_dir, 'payload.json'))) })
try: #print subprocess.check_output(['icgc-storage-client', '--profile', 'aws', 'upload','--file', file_, '--object-id', object_id, '--md5', file_md5sum, '--force']) pass #metadata step if file_.endswith('.xml'): #print subprocess.check_output(['aws', '--profile', 'amazon', 's3', 'cp', file_, os.path.join('s3://oicr.icgc.meta/metadata/', object_id)]) pass except Exception, e: with open('jt.log', 'w') as f: f.write(str(e)) sys.exit(1) task_stop = int(time.time()) output_json = { 'file': file_, 'allowed_upload': run, 'file_md5sum': file_md5sum, 'idx_file': idx_file_, 'idx_file_md5sum': idx_file_md5sum, 'file_size': file_size, 'idx_file_size': idx_file_size, 'runtime': { 'task_start': task_start, 'task_stop': task_stop } } save_output_json(output_json)
subprocess.check_output([ 'docker', 'run', '--net=host', '-e', 'ACCESSTOKEN', '-e', 'STORAGEURL=' + os.environ.get('STORAGEURL_AWS'), '-e', 'METADATAURL=' + os.environ.get('METADATAURL_AWS'), '-v', input_directory + ':/app', upload_container, 'upload', '-s', study_id, '-u', os.environ.get('METADATAURL_AWS'), '-p', '/app/' + payload, '-o', 'manifest.txt', '-j', 'manifest.json', '-d', '/app/' ]) return json.load(open(os.path.join(input_directory, 'manifest.json'))) task_dict = get_task_dict(sys.argv[1]) cwd = os.getcwd() payloads = task_dict.get('input').get('payloads') input_directory = task_dict.get('input').get('input_directory') study_id = task_dict.get('input').get('study_id') task_start = int(time.time()) manifests = [] if study_id in allowed_codes: for i in range(0, len(payloads)): manifests.append(upload_file(input_directory, study_id, payloads[i])) save_output_json({'manifests': manifests})
input_dir = task_dict.get('input').get('input_dir') study_id = task_dict.get('input').get('study_id') upload_container = "quay.io/oicr/dckr_song_upload:latest" song_server = os.environ.get('SONG_SERVER_COLLAB') subprocess.check_output(['docker', 'pull', upload_container]) subprocess.check_output([ 'docker', 'run', '--net=host', '-e', 'ACCESSTOKEN', '-e', 'STORAGEURL=' + os.environ.get('STORAGEURL_COLLAB'), '-e', 'METADATAURL=' + os.environ.get('METADATAURL_COLLAB'), '-v', input_dir + ':/app', upload_container, 'upload', '-s', study_id, '-u', song_server, '-p', '/app/' + payload, '-o', 'manifest.txt', '-j', 'manifest.json', '-d', '/app/' ]) manifest = json.load(open(os.path.join(input_dir, 'manifest.json'))) #subprocess.check_output(['docker', 'pull', 'mesosphere/aws-cli']) #for file in manifest.get('files'): # if file.get('file_name').endswith('.xml'): # subprocess.check_output(['docker', 'run', # '-e', 'AWS_ACCESS_KEY_ID='+os.environ.get('COLLAB_ACCESS_KEY_ID'), # '-e', 'AWS_SECRET_ACCESS_KEY='+os.environ.get('COLLAB_SECRET_ACCESS_KEY'), # '-v', input_dir + ':/project', # 'mesosphere/aws-cli', 's3', 'cp', # os.path.join('/project', os.path.basename(file.get('file_name'))), # os.path.join('https://object.cancercollaboratory.org:9080', file.get('object_id'))]) save_output_json({'manifest': manifest})
if not os.path.isfile(os.path.join(out_dir, file_name)): raise ValueError( 'Object ID: ' + object_id + ' could not be downloaded. Try to download with icgc-storage-client for more info.' ) # Download normal bam file object_id = task_dict.get('input').get('normal_bam').get('object_id') file_name = task_dict.get('input').get('normal_bam').get('file_name') download_file(object_id, cwd, file_name) # Download tumour bam files for i in range(0, len(task_dict.get('input').get('tumour_bams'))): object_id = task_dict.get('input').get('tumour_bams')[i].get('object_id') file_name = task_dict.get('input').get('tumour_bams')[i].get('file_name') download_file(object_id, cwd, file_name) # Download VCF files for i in range(0, len(task_dict.get('input').get('vcf_files'))): file_name = task_dict.get('input').get('vcf_files')[i].get('file_name') if not task_dict.get('input').get('vcf_files')[i].get('is_smufin'): object_id = task_dict.get('input').get('vcf_files')[i].get('object_id') download_file(object_id, cwd, file_name) else: copyfile(os.path.join('/icgc_smufin_calls', file_name), os.path.join(cwd, file_name)) save_output_json({'directory': cwd})
json_input['snv-padding'] = str(task_dict.get('input').get('snv_padding')) json_input['sv-padding'] = str(task_dict.get('input').get('sv_padding')) json_input['indel-padding'] = str(task_dict.get('input').get('indel_padding')) json_input['tumours'] = [] for i in range(0, len(task_dict.get('input').get('tumour_bams'))): tmp_json = {} tmp_json['tumourId'] = task_dict.get('input').get('tumour_bams')[i].get( 'file_name').replace('.bam', '') tmp_json['bamFileName'] = task_dict.get('input').get('tumour_bams')[i].get( 'file_name') tmp_json['oxoQScore'] = task_dict.get('input').get('tumour_bams')[i].get( 'oxog_score') tmp_json['associatedVcfs'] = [] for j in range(0, len(task_dict.get('input').get('vcf_files'))): tmp_json['associatedVcfs'].append( task_dict.get('input').get('vcf_files')[j].get('file_name')) json_input['tumours'].append(tmp_json) json_file = 'run.json' with open(json_file, 'w') as f: json.dump(json_input, f, indent=4, sort_keys=True) subprocess.check_output([ 'cwltool', '--debug', '--relax-path-checks', '--non-strict', '/home/ubuntu/pcawg-minibam/pcawg_minibam_wf.cwl', json_file ]) save_output_json({'output_directory': os.getcwd()})
import os import hashlib task_dict = get_task_dict(sys.argv[1]) cwd = os.getcwd() input_directory = task_dict.get('input').get('input_directory') normal_bam = task_dict.get('input').get('normal_bam') tumour_bams = task_dict.get('input').get('tumour_bams') experiment = task_dict.get('input').get('experiment') indel_padding = task_dict.get('input').get('indel_padding') snv_padding = task_dict.get('input').get('snv_padding') sv_padding = task_dict.get('input').get('sv_padding') associated_vcfs = task_dict.get('input').get('associated_vcfs') save_output_json(task_dict) def create_payload_json(bam, experiment, input_directory, output_file, associated_vcfs): donor_payload = DonorPayload( donor_gender=bam.get('sample').get('donor').get('gender'), donor_submitter_id=bam.get('sample').get('donor').get('submitter_id')) experiment_payload = ExperimentPayload( aligned=experiment.get('aligned'), library_strategy=experiment.get('library_strategy'), reference_genome=experiment.get('reference_genome')) file_path = os.path.join(input_directory, bam.get('minibam').get('bam_file_name')) minibam_payload = FilePayload(