FilePayload(file_access='controlled',
                    file_name=file.get('file_name'),
                    md5sum=md5sum(file_path),
                    file_type=get_file_type(file.get('file_name')),
                    file_size=os.stat(file_path).st_size))

    if os.path.isfile(idx_file_path):
        song_payload.add_file_payload(
            FilePayload(file_access='controlled',
                        file_name=idx_file,
                        md5sum=md5sum(idx_file_path),
                        file_type=get_file_type(idx_file),
                        file_size=os.stat(idx_file_path).st_size))

file_path = os.path.join(input_dir, metadata_file_name)
song_payload.add_file_payload(
    FilePayload(file_access='open',
                file_name=metadata_file_name,
                md5sum=md5sum(file_path),
                file_type=get_file_type(metadata_file_name),
                file_size=os.stat(file_path).st_size))

song_payload.to_json_file(output_file)

save_output_json({
    'payload':
    'payload.json',
    'payload_json':
    json.load(open(os.path.join(input_dir, 'payload.json')))
})
    try:
        #print subprocess.check_output(['icgc-storage-client', '--profile', 'aws', 'upload','--file', file_, '--object-id', object_id, '--md5', file_md5sum, '--force'])
        pass
        #metadata step
        if file_.endswith('.xml'):
            #print subprocess.check_output(['aws', '--profile', 'amazon', 's3', 'cp', file_, os.path.join('s3://oicr.icgc.meta/metadata/', object_id)])
            pass

    except Exception, e:
        with open('jt.log', 'w') as f:
            f.write(str(e))
        sys.exit(1)

task_stop = int(time.time())

output_json = {
    'file': file_,
    'allowed_upload': run,
    'file_md5sum': file_md5sum,
    'idx_file': idx_file_,
    'idx_file_md5sum': idx_file_md5sum,
    'file_size': file_size,
    'idx_file_size': idx_file_size,
    'runtime': {
        'task_start': task_start,
        'task_stop': task_stop
    }
}

save_output_json(output_json)
    subprocess.check_output([
        'docker', 'run', '--net=host', '-e', 'ACCESSTOKEN', '-e',
        'STORAGEURL=' + os.environ.get('STORAGEURL_AWS'), '-e',
        'METADATAURL=' + os.environ.get('METADATAURL_AWS'), '-v',
        input_directory + ':/app', upload_container, 'upload', '-s', study_id,
        '-u',
        os.environ.get('METADATAURL_AWS'), '-p', '/app/' + payload, '-o',
        'manifest.txt', '-j', 'manifest.json', '-d', '/app/'
    ])

    return json.load(open(os.path.join(input_directory, 'manifest.json')))


task_dict = get_task_dict(sys.argv[1])
cwd = os.getcwd()

payloads = task_dict.get('input').get('payloads')
input_directory = task_dict.get('input').get('input_directory')
study_id = task_dict.get('input').get('study_id')

task_start = int(time.time())

manifests = []

if study_id in allowed_codes:
    for i in range(0, len(payloads)):
        manifests.append(upload_file(input_directory, study_id, payloads[i]))

save_output_json({'manifests': manifests})
Beispiel #4
0
input_dir = task_dict.get('input').get('input_dir')
study_id = task_dict.get('input').get('study_id')

upload_container = "quay.io/oicr/dckr_song_upload:latest"
song_server = os.environ.get('SONG_SERVER_COLLAB')

subprocess.check_output(['docker', 'pull', upload_container])

subprocess.check_output([
    'docker', 'run', '--net=host', '-e', 'ACCESSTOKEN', '-e',
    'STORAGEURL=' + os.environ.get('STORAGEURL_COLLAB'), '-e',
    'METADATAURL=' + os.environ.get('METADATAURL_COLLAB'), '-v',
    input_dir + ':/app', upload_container, 'upload', '-s', study_id, '-u',
    song_server, '-p', '/app/' + payload, '-o', 'manifest.txt', '-j',
    'manifest.json', '-d', '/app/'
])
manifest = json.load(open(os.path.join(input_dir, 'manifest.json')))

#subprocess.check_output(['docker', 'pull', 'mesosphere/aws-cli'])
#for file in manifest.get('files'):
#    if file.get('file_name').endswith('.xml'):
#        subprocess.check_output(['docker', 'run',
#                                 '-e', 'AWS_ACCESS_KEY_ID='+os.environ.get('COLLAB_ACCESS_KEY_ID'),
#                                 '-e', 'AWS_SECRET_ACCESS_KEY='+os.environ.get('COLLAB_SECRET_ACCESS_KEY'),
#                                 '-v', input_dir + ':/project',
#                                 'mesosphere/aws-cli', 's3', 'cp',
#                                 os.path.join('/project', os.path.basename(file.get('file_name'))),
#                                 os.path.join('https://object.cancercollaboratory.org:9080', file.get('object_id'))])

save_output_json({'manifest': manifest})
    if not os.path.isfile(os.path.join(out_dir, file_name)):
        raise ValueError(
            'Object ID: ' + object_id +
            ' could not be downloaded. Try to download with icgc-storage-client for more info.'
        )


# Download normal bam file
object_id = task_dict.get('input').get('normal_bam').get('object_id')
file_name = task_dict.get('input').get('normal_bam').get('file_name')
download_file(object_id, cwd, file_name)

# Download tumour bam files
for i in range(0, len(task_dict.get('input').get('tumour_bams'))):
    object_id = task_dict.get('input').get('tumour_bams')[i].get('object_id')
    file_name = task_dict.get('input').get('tumour_bams')[i].get('file_name')
    download_file(object_id, cwd, file_name)

# Download VCF files
for i in range(0, len(task_dict.get('input').get('vcf_files'))):
    file_name = task_dict.get('input').get('vcf_files')[i].get('file_name')
    if not task_dict.get('input').get('vcf_files')[i].get('is_smufin'):
        object_id = task_dict.get('input').get('vcf_files')[i].get('object_id')
        download_file(object_id, cwd, file_name)
    else:
        copyfile(os.path.join('/icgc_smufin_calls', file_name),
                 os.path.join(cwd, file_name))

save_output_json({'directory': cwd})
Beispiel #6
0
json_input['snv-padding'] = str(task_dict.get('input').get('snv_padding'))
json_input['sv-padding'] = str(task_dict.get('input').get('sv_padding'))
json_input['indel-padding'] = str(task_dict.get('input').get('indel_padding'))

json_input['tumours'] = []
for i in range(0, len(task_dict.get('input').get('tumour_bams'))):
    tmp_json = {}
    tmp_json['tumourId'] = task_dict.get('input').get('tumour_bams')[i].get(
        'file_name').replace('.bam', '')
    tmp_json['bamFileName'] = task_dict.get('input').get('tumour_bams')[i].get(
        'file_name')
    tmp_json['oxoQScore'] = task_dict.get('input').get('tumour_bams')[i].get(
        'oxog_score')
    tmp_json['associatedVcfs'] = []
    for j in range(0, len(task_dict.get('input').get('vcf_files'))):
        tmp_json['associatedVcfs'].append(
            task_dict.get('input').get('vcf_files')[j].get('file_name'))
    json_input['tumours'].append(tmp_json)

json_file = 'run.json'

with open(json_file, 'w') as f:
    json.dump(json_input, f, indent=4, sort_keys=True)

subprocess.check_output([
    'cwltool', '--debug', '--relax-path-checks', '--non-strict',
    '/home/ubuntu/pcawg-minibam/pcawg_minibam_wf.cwl', json_file
])

save_output_json({'output_directory': os.getcwd()})
import os
import hashlib

task_dict = get_task_dict(sys.argv[1])
cwd = os.getcwd()

input_directory = task_dict.get('input').get('input_directory')
normal_bam = task_dict.get('input').get('normal_bam')
tumour_bams = task_dict.get('input').get('tumour_bams')
experiment = task_dict.get('input').get('experiment')
indel_padding = task_dict.get('input').get('indel_padding')
snv_padding = task_dict.get('input').get('snv_padding')
sv_padding = task_dict.get('input').get('sv_padding')
associated_vcfs = task_dict.get('input').get('associated_vcfs')

save_output_json(task_dict)


def create_payload_json(bam, experiment, input_directory, output_file,
                        associated_vcfs):
    donor_payload = DonorPayload(
        donor_gender=bam.get('sample').get('donor').get('gender'),
        donor_submitter_id=bam.get('sample').get('donor').get('submitter_id'))
    experiment_payload = ExperimentPayload(
        aligned=experiment.get('aligned'),
        library_strategy=experiment.get('library_strategy'),
        reference_genome=experiment.get('reference_genome'))

    file_path = os.path.join(input_directory,
                             bam.get('minibam').get('bam_file_name'))
    minibam_payload = FilePayload(