Example #1
0
def run_gnos_audit(args):
    args = parser.parse_args()
    syn = synapseclient.Synapse()
    syn.login()
    """
    r_map = synqueue.getValues(syn, "Normal_WGS_alignment_GNOS_repos", **config)
    repos = {}
    for i in r_map.values():
        repos[i] = True
    print repos.keys()
    """
    server_list = ["https://gtrepo-osdc-tcga.annailabs.com"]

    uuid_map = {}
    uuid_map['broad'] = dict(
        (a[1], a[0])
        for a in synqueue.getValues(syn, "Broad_VCF_UUID", **config).items())
    uuid_map['muse'] = dict(
        (a[1], a[0])
        for a in synqueue.getValues(syn, "Muse_VCF_UUID", **config).items())
    uuid_map['broad_tar'] = dict(
        (a[1], a[0])
        for a in synqueue.getValues(syn, "Broad_TAR_UUID", **config).items())

    analysis_re = re.compile(r'<analysis_id>(.*)</analysis_id>')

    found = {}
    for server in server_list:
        handle = urlopen(server +
                         "/cghub/metadata/analysisId?study=tcga_pancancer_vcf")
        for line in handle:
            res = analysis_re.search(line)
            if res:
                gid = res.group(1)
                for p in uuid_map:
                    if gid in uuid_map[p]:
                        pid = uuid_map[p][gid]
                        if pid not in found:
                            found[pid] = {}
                        found[pid][p] = [server, gid]
    print "\n\n"
    for p in found:
        print p, "\t".join(
            "%s (%s : %s)" % (a[0], a[1][0], a[1][1])
            for a in sorted(found[p].items(), key=lambda x: x[0]))
def run_gnos_audit(args):
    args = parser.parse_args()
    syn = synapseclient.Synapse()
    syn.login()

    """
    r_map = synqueue.getValues(syn, "Normal_WGS_alignment_GNOS_repos", **config)
    repos = {}
    for i in r_map.values():
        repos[i] = True
    print repos.keys()
    """
    server_list = ["https://gtrepo-osdc-tcga.annailabs.com"]

    uuid_map = {}
    uuid_map['broad'] = dict( (a[1], a[0]) for a in synqueue.getValues(syn, "Broad_VCF_UUID",  **config).items() )
    uuid_map['muse']  = dict( (a[1], a[0]) for a in synqueue.getValues(syn, "Muse_VCF_UUID",  **config).items() )
    uuid_map['broad_tar'] = dict( (a[1], a[0]) for a in synqueue.getValues(syn, "Broad_TAR_UUID", **config).items() )

    analysis_re = re.compile(r'<analysis_id>(.*)</analysis_id>')

    found = {}
    for server in server_list:
        handle = urlopen(server + "/cghub/metadata/analysisId?study=tcga_pancancer_vcf")
        for line in handle:
            res = analysis_re.search(line)
            if res:
                gid = res.group(1)
                for p in uuid_map:
                    if gid in uuid_map[p]:
                        pid = uuid_map[p][gid]
                        if pid not in found:
                            found[pid] = {}
                        found[pid][p] = [server, gid]
    print "\n\n"
    for p in found:
        print p, "\t".join( "%s (%s : %s)" % (a[0], a[1][0], a[1][1]) for a in sorted(found[p].items(), key=lambda x:x[0]) )
Example #3
0
def run_uploadprep(args):

    if not os.path.exists(args.workdir):
        os.mkdir(args.workdir)
    doc = from_url(args.out_base)
    file_map = {'broad': {}, 'muse': {}, 'broad_tar': {}}

    syn = synapseclient.Synapse()
    syn.login()

    wl_map = {}
    job_map = {}
    for ent in synqueue.listAssignments(syn, list_all=True, **config):
        wl_map[ent['id']] = ent['meta']

    uuid_map = {}
    uuid_map['broad'] = synqueue.getValues(syn,
                                           "Broad_VCF_UUID",
                                           orSet=lambda x: str(uuid.uuid4()),
                                           **config)
    uuid_map['muse'] = synqueue.getValues(syn,
                                          "Muse_VCF_UUID",
                                          orSet=lambda x: str(uuid.uuid4()),
                                          **config)
    uuid_map['broad_tar'] = synqueue.getValues(
        syn, "Broad_TAR_UUID", orSet=lambda x: str(uuid.uuid4()), **config)

    #scan through all of the docs
    for id, entry in doc.filter():
        donor = None
        #look for docs with donor tags
        if 'tags' in entry and 'state' in entry and entry['state'] == 'ok':
            for s in entry['tags']:
                tmp = s.split(":")
                if tmp[0] == 'donor':
                    donor = tmp[1]
        if donor is not None and donor in wl_map:
            if donor not in job_map:
                job_map[donor] = {}
            #scan out the job metrics for this job
            if 'job' in entry and 'job_metrics' in entry['job']:
                job_id = entry['job']['id']
                tool_id = entry['job']['tool_id']
                job_info = {tool_id: {}}
                for met in entry['job']['job_metrics']:
                    job_info[tool_id][met['name']] = met['raw_value']
                job_map[donor][job_id] = job_info
            donor_tumor = wl_map[donor][
                'Tumour_WGS_alignment_GNOS_analysis_IDs']
            #look for the vcf output files
            if entry.get('visible', False) and entry.get(
                    'extension', None) in ["vcf", "vcf_bgzip"]:
                pipeline = None
                method = None
                call_type = None
                variant_type = None
                #fill out the info depending on which caller created the file
                if entry['name'].split('.')[0] in ['MUSE_1']:
                    pipeline = "muse"
                    method = entry['name'].replace(".", "-")
                    variant_type = 'somatic'
                    call_type = 'snv_mnv'
                elif entry['name'].split(".")[0] in [
                        'broad-dRanger', 'broad-dRanger_snowman',
                        'broad-snowman', 'broad-mutect'
                ]:
                    pipeline = "broad"
                    method = entry['name'].split(".")[0]
                    if 'somatic' in entry['name']:
                        variant_type = 'somatic'
                    elif 'germline' in entry['name']:
                        variant_type = 'germline'
                    else:
                        raise Exception("Unknown variant type")
                    if 'snv_mnv.vcf' in entry['name']:
                        call_type = 'snv_mnv'
                    elif 'sv.vcf' in entry['name']:
                        call_type = 'sv'
                    elif 'indel.vcf' in entry['name']:
                        call_type = 'indel'
                    else:
                        raise Exception("Unknown call type: %s" %
                                        (entry['name']))
                else:
                    raise Exception("Unknown pipeline %s" % (entry['name']))

                datestr = datetime.datetime.now().strftime("%Y%m%d")
                name = "%s.%s.%s.%s.%s" % (donor_tumor, method, datestr,
                                           variant_type, call_type)

                name = re.sub(r'.vcf$', '', name)
                if entry['extension'] == 'vcf':
                    file_name = name + ".vcf"
                elif entry['extension'] == 'vcf_bgzip':
                    file_name = name + ".vcf.gz"
                target = Target(uuid=entry['uuid'])
                if doc.size(target) > 0:
                    src_file = doc.get_filename(target)
                    dst_file = os.path.join(args.workdir, file_name)

                    shutil.copy(src_file, dst_file)
                    #if the files wasn't compressed already, go ahead and do that
                    if entry['extension'] == 'vcf':
                        subprocess.check_call("bgzip -c %s > %s.gz" %
                                              (dst_file, dst_file),
                                              shell=True)
                        dst_file = dst_file + ".gz"

                    #add file to output map
                    if donor not in file_map[pipeline]:
                        file_map[pipeline][donor] = []
                    input_file = os.path.basename(dst_file)
                    file_map[pipeline][donor].append(input_file)
            else:
                if entry['name'] == "broad.tar.gz":
                    target = Target(uuid=entry['uuid'])
                    src_file = doc.get_filename(target)
                    file_map['broad_tar'][donor] = [src_file]

    timing_map = {}
    for donor in job_map:
        timing_map[donor] = {}
        for job_id in job_map[donor]:
            for tool_id in job_map[donor][job_id]:
                if tool_id not in timing_map[donor]:
                    timing_map[donor][tool_id] = []
                timing_map[donor][tool_id].append(
                    job_map[donor][job_id][tool_id])

    result_counts = {}
    for pipeline, donors in file_map.items():
        for donor in donors:
            result_counts[donor] = result_counts.get(donor, 0) + 1

    #go through every pipeline
    for pipeline, donors in file_map.items():
        #for that pipeline go through every donor
        for donor, files in donors.items():
            #we're only outputing data for donors on the work list
            if donor in wl_map and result_counts[donor] == 3:
                #output the timing json
                timing_json = os.path.abspath(
                    os.path.join(args.workdir,
                                 "%s.%s.timing.json" % (pipeline, donor)))
                with open(timing_json, "w") as handle:
                    handle.write(json.dumps(timing_map[donor]))

                #output the uploader script
                with open(
                        os.path.join(args.workdir,
                                     "%s.%s.sh" % (pipeline, donor)),
                        "w") as handle:
                    input_file = os.path.basename(dst_file)
                    urls = [
                        "%scghub/metadata/analysisFull/%s" %
                        (wl_map[donor]['Normal_WGS_alignment_GNOS_repos'],
                         wl_map[donor]['Normal_WGS_alignment_GNOS_analysis_ID']
                         ),
                        "%scghub/metadata/analysisFull/%s" %
                        (wl_map[donor]['Tumour_WGS_alignment_GNOS_repos'],
                         wl_map[donor]
                         ['Tumour_WGS_alignment_GNOS_analysis_IDs'])
                    ]
                    donor_tumor = wl_map[donor][
                        'Tumour_WGS_alignment_GNOS_analysis_IDs']

                    if pipeline in ['broad', 'muse']:
                        prep_cmd_str = ""
                        for vcf in files:
                            prep_cmd_str += "tabix -p vcf %s\n" % (vcf)
                            prep_cmd_str += "mv %s.tbi %s.idx\n" % (vcf, vcf)
                            prep_cmd_str += "md5sum %s | awk '{print$1}' > %s.md5\n" % (
                                vcf, vcf)
                            prep_cmd_str += "md5sum %s.idx | awk '{print$1}' > %s.idx.md5\n\n" % (
                                vcf, vcf)

                        related_uuids = []
                        for p in uuid_map:
                            if p != pipeline:
                                related_uuids.append(uuid_map[p][donor])

                        submit_cmd_str = "perl -I /opt/gt-download-upload-wrapper/gt-download-upload-wrapper-2.0.11/lib"
                        submit_cmd_str += " /opt/vcf-uploader/vcf-uploader-2.0.5/gnos_upload_vcf.pl"
                        submit_cmd_str += " --metadata-urls %s" % (
                            ",".join(urls))
                        submit_cmd_str += " --vcfs %s " % (",".join(files))
                        submit_cmd_str += " --vcf-md5sum-files %s " % (
                            (",".join(("%s.md5" % i for i in files))))
                        submit_cmd_str += " --vcf-idxs %s" % ((",".join(
                            ("%s.idx" % i for i in files))))
                        submit_cmd_str += " --vcf-idx-md5sum-files %s" % (
                            (",".join(("%s.idx.md5" % i for i in files))))
                        submit_cmd_str += " --outdir %s.%s.dir" % (pipeline,
                                                                   donor_tumor)
                        submit_cmd_str += " --key %s " % (args.keyfile)
                        submit_cmd_str += " --upload-url %s" % (
                            args.upload_url)
                        submit_cmd_str += " --study-refname-override %s" % (
                            args.study)
                        submit_cmd_str += " --workflow-url '%s'" % args.pipeline_src
                        submit_cmd_str += " --workflow-src-url '%s'" % args.pipeline_src
                        submit_cmd_str += " --workflow-name '%s'" % args.pipeline_name
                        submit_cmd_str += " --workflow-version '%s'" % args.pipeline_version
                        submit_cmd_str += " --vm-instance-type '%s'" % args.vm_instance_type
                        submit_cmd_str += " --vm-instance-cores %s" % args.vm_instance_cores
                        submit_cmd_str += " --vm-instance-mem-gb %s" % args.vm_instance_mem_gb
                        submit_cmd_str += " --vm-location-code %s" % args.vm_location_code
                        submit_cmd_str += " --timing-metrics-json %s" % (
                            timing_json)
                        submit_cmd_str += " --workflow-file-subset %s" % (
                            pipeline)
                        submit_cmd_str += " --related-file-subset-uuids %s" % (
                            ",".join(related_uuids))
                        submit_cmd_str += " --uuid %s" % (
                            uuid_map[pipeline][donor])
                        #submit_cmd_str += " --skip-upload"

                    if pipeline in ['broad_tar']:
                        prep_cmd_str = ""
                        new_files = []
                        for tar in files:
                            basename = donor_tumor + ".broad.intermediate"
                            prep_cmd_str = "%s/remap_broad_tar.py %s %s %s --rename %s %s" % (
                                os.path.dirname(os.path.abspath(__file__)),
                                tar, "./", basename, donor, donor_tumor)
                            new_files.append(basename + ".tar")

                        related_uuids = []
                        for p in uuid_map:
                            if p != pipeline:
                                related_uuids.append(uuid_map[p][donor])

                        submit_cmd_str = "perl -I /opt/gt-download-upload-wrapper/gt-download-upload-wrapper-2.0.11/lib"
                        submit_cmd_str += " /opt/vcf-uploader/vcf-uploader-2.0.5/gnos_upload_vcf.pl"
                        submit_cmd_str += " --metadata-urls %s" % (
                            ",".join(urls))
                        submit_cmd_str += " --tarballs %s " % (
                            ",".join(new_files))
                        submit_cmd_str += " --tarball-md5sum-files %s " % (
                            (",".join(("%s.md5" % i for i in new_files))))
                        submit_cmd_str += " --outdir %s.%s.dir" % (pipeline,
                                                                   donor_tumor)
                        submit_cmd_str += " --key %s " % (args.keyfile)
                        submit_cmd_str += " --upload-url %s" % (
                            args.upload_url)
                        submit_cmd_str += " --study-refname-override %s" % (
                            args.study)
                        submit_cmd_str += " --workflow-url '%s'" % args.pipeline_src
                        submit_cmd_str += " --workflow-src-url '%s'" % args.pipeline_src
                        submit_cmd_str += " --workflow-name '%s'" % args.pipeline_name
                        submit_cmd_str += " --workflow-version '%s'" % args.pipeline_version
                        submit_cmd_str += " --vm-instance-type '%s'" % args.vm_instance_type
                        submit_cmd_str += " --vm-instance-cores %s" % args.vm_instance_cores
                        submit_cmd_str += " --vm-instance-mem-gb %s" % args.vm_instance_mem_gb
                        submit_cmd_str += " --workflow-file-subset %s" % (
                            pipeline)
                        submit_cmd_str += " --timing-metrics-json %s" % (
                            timing_json)
                        submit_cmd_str += " --related-file-subset-uuids %s" % (
                            ",".join(related_uuids))
                        submit_cmd_str += " --uuid %s" % (
                            uuid_map[pipeline][donor])
                        #submit_cmd_str += " --skip-upload"

                    handle.write(
                        string.Template("""#!/bin/bash
set -ex
${PREP}
${SUBMIT}
echo $$? > $$0.submitted
#pushd ${SUBMIT_DIR}
#gtupload -v -c ${KEY} -u ./manifest.xml
#ECODE=$$?
#popd
#echo $$ECODE > $$0.uploaded
""").substitute(PREP=prep_cmd_str,
                    SUBMIT=submit_cmd_str,
                    SUBMIT_DIR=os.path.join(os.path.abspath(args.workdir), "vcf",
                                        pipeline + "." + donor_tumor + ".dir",
                                        uuid_map[pipeline][donor]),
                    KEY=args.keyfile))
def run_uploadprep(args):

    if not os.path.exists(args.workdir):
        os.mkdir(args.workdir)
    doc = from_url(args.out_base)
    file_map = {
        'broad' : {},
        'muse' : {},
        'broad_tar' : {}
    }

    syn = synapseclient.Synapse()
    syn.login()

    wl_map = {}
    job_map = {}
    for ent in synqueue.listAssignments(syn, list_all=True, **config):
        wl_map[ent['id']] = ent['meta']

    uuid_map = {}
    uuid_map['broad'] = synqueue.getValues(syn, "Broad_VCF_UUID", orSet=lambda x: str(uuid.uuid4()), **config)
    uuid_map['muse']  = synqueue.getValues(syn, "Muse_VCF_UUID", orSet=lambda x: str(uuid.uuid4()), **config)
    uuid_map['broad_tar'] = synqueue.getValues(syn, "Broad_TAR_UUID", orSet=lambda x: str(uuid.uuid4()), **config)

    #scan through all of the docs
    for id, entry in doc.filter():
        donor = None
        #look for docs with donor tags
        if 'tags' in entry and 'state' in entry and entry['state'] == 'ok':
            for s in entry['tags']:
                tmp = s.split(":")
                if tmp[0] == 'donor':
                    donor = tmp[1]
        if donor is not None and donor in wl_map:
            if donor not in job_map:
                job_map[donor] = {}
            #scan out the job metrics for this job
            if 'job' in entry and 'job_metrics' in entry['job']:
                job_id = entry['job']['id']
                tool_id = entry['job']['tool_id']
                job_info = { tool_id : {} }
                for met in entry['job']['job_metrics']:
                    job_info[tool_id][met['name']] = met['raw_value']
                job_map[donor][job_id] = job_info
            donor_tumor = wl_map[donor]['Tumour_WGS_alignment_GNOS_analysis_IDs']
            #look for the vcf output files
            if entry.get('visible', False) and entry.get('extension', None) in ["vcf", "vcf_bgzip"]:
                pipeline = None
                method = None
                call_type = None
                variant_type = None
                #fill out the info depending on which caller created the file
                if entry['name'].split('.')[0] in ['MUSE_1']:
                    pipeline = "muse"
                    method = entry['name'].replace(".", "-")
                    variant_type = 'somatic'
                    call_type = 'snv_mnv'
                elif entry['name'].split(".")[0] in ['broad-dRanger', 'broad-dRanger_snowman', 'broad-snowman', 'broad-mutect' ]:
                    pipeline = "broad"
                    method = entry['name'].split(".")[0]
                    if 'somatic' in entry['name']:
                        variant_type = 'somatic'
                    elif 'germline' in entry['name']:
                        variant_type = 'germline'
                    else:
                        raise Exception("Unknown variant type")
                    if 'snv_mnv.vcf' in entry['name']:
                        call_type = 'snv_mnv'
                    elif 'sv.vcf' in entry['name']:
                        call_type = 'sv'
                    elif 'indel.vcf' in entry['name']:
                        call_type = 'indel'
                    else:
                        raise Exception("Unknown call type: %s" % (entry['name']))
                else:
                    raise Exception("Unknown pipeline %s" % (entry['name']))

                datestr = datetime.datetime.now().strftime("%Y%m%d")
                name = "%s.%s.%s.%s.%s" % (donor_tumor, method, datestr, variant_type, call_type )

                name = re.sub(r'.vcf$', '', name)
                if entry['extension'] == 'vcf':
                    file_name = name + ".vcf"
                elif entry['extension'] == 'vcf_bgzip':
                    file_name = name + ".vcf.gz"
                target = Target(uuid=entry['uuid'])
                if doc.size(target) > 0:
                    src_file = doc.get_filename(target)
                    dst_file = os.path.join(args.workdir, file_name)

                    shutil.copy(src_file, dst_file)
                    #if the files wasn't compressed already, go ahead and do that
                    if entry['extension'] == 'vcf':
                        subprocess.check_call( "bgzip -c %s > %s.gz" % (dst_file, dst_file), shell=True )
                        dst_file = dst_file + ".gz"

                    #add file to output map
                    if donor not in file_map[pipeline]:
                        file_map[pipeline][donor] = []
                    input_file = os.path.basename(dst_file)
                    file_map[pipeline][donor].append(input_file)
            else:
                if entry['name'] == "broad.tar.gz":
                    target = Target(uuid=entry['uuid'])
                    src_file = doc.get_filename(target)
                    file_map['broad_tar'][donor] = [ src_file ]


    timing_map = {}
    for donor in job_map:
        timing_map[donor] = {}
        for job_id in job_map[donor]:
            for tool_id in job_map[donor][job_id]:
                if tool_id not in timing_map[donor]:
                    timing_map[donor][tool_id] = []
                timing_map[donor][tool_id].append( job_map[donor][job_id][tool_id] )

    result_counts = {}
    for pipeline, donors in file_map.items():
        for donor in donors:
            result_counts[donor] = result_counts.get(donor, 0) + 1

    #go through every pipeline
    for pipeline, donors in file_map.items():
        #for that pipeline go through every donor
        for donor, files in donors.items():
            #we're only outputing data for donors on the work list
            if donor in wl_map and result_counts[donor] == 3:
                #output the timing json
                timing_json = os.path.abspath(os.path.join(args.workdir, "%s.%s.timing.json" %(pipeline, donor)))
                with open( timing_json, "w" ) as handle:
                    handle.write(json.dumps( timing_map[donor] ) )

                #output the uploader script
                with open( os.path.join(args.workdir, "%s.%s.sh" %(pipeline, donor)), "w" ) as handle:
                    input_file = os.path.basename(dst_file)
                    urls = [
                        "%scghub/metadata/analysisFull/%s" % (wl_map[donor]['Normal_WGS_alignment_GNOS_repos'], wl_map[donor]['Normal_WGS_alignment_GNOS_analysis_ID']),
                        "%scghub/metadata/analysisFull/%s" % (wl_map[donor]['Tumour_WGS_alignment_GNOS_repos'], wl_map[donor]['Tumour_WGS_alignment_GNOS_analysis_IDs'])
                    ]
                    donor_tumor = wl_map[donor]['Tumour_WGS_alignment_GNOS_analysis_IDs']

                    if pipeline in ['broad', 'muse']:
                        prep_cmd_str = ""
                        for vcf in files:
                            prep_cmd_str += "tabix -p vcf %s\n" % (vcf)
                            prep_cmd_str += "mv %s.tbi %s.idx\n" % (vcf,vcf)
                            prep_cmd_str += "md5sum %s | awk '{print$1}' > %s.md5\n" % (vcf, vcf)
                            prep_cmd_str += "md5sum %s.idx | awk '{print$1}' > %s.idx.md5\n\n" % (vcf, vcf)

                        related_uuids = []
                        for p in uuid_map:
                            if p != pipeline:
                                related_uuids.append(uuid_map[p][donor])

                        submit_cmd_str = "perl -I /opt/gt-download-upload-wrapper/gt-download-upload-wrapper-2.0.11/lib"
                        submit_cmd_str += " /opt/vcf-uploader/vcf-uploader-2.0.5/gnos_upload_vcf.pl"
                        submit_cmd_str += " --metadata-urls %s" % (",".join(urls))
                        submit_cmd_str += " --vcfs %s " % (",".join(files))
                        submit_cmd_str += " --vcf-md5sum-files %s " % ((",".join( ("%s.md5" % i for i in files) )))
                        submit_cmd_str += " --vcf-idxs %s" % ((",".join( ("%s.idx" % i for i in files) )))
                        submit_cmd_str += " --vcf-idx-md5sum-files %s" % ((",".join( ("%s.idx.md5" % i for i in files) )))
                        submit_cmd_str += " --outdir %s.%s.dir" % (pipeline, donor_tumor)
                        submit_cmd_str += " --key %s " % (args.keyfile)
                        submit_cmd_str += " --upload-url %s" % (args.upload_url)
                        submit_cmd_str += " --study-refname-override %s" % (args.study)
                        submit_cmd_str += " --workflow-url '%s'" % args.pipeline_src
                        submit_cmd_str += " --workflow-src-url '%s'" % args.pipeline_src
                        submit_cmd_str += " --workflow-name '%s'" % args.pipeline_name
                        submit_cmd_str += " --workflow-version '%s'" % args.pipeline_version
                        submit_cmd_str += " --vm-instance-type '%s'" % args.vm_instance_type
                        submit_cmd_str += " --vm-instance-cores %s" % args.vm_instance_cores
                        submit_cmd_str += " --vm-instance-mem-gb %s" % args.vm_instance_mem_gb
                        submit_cmd_str += " --vm-location-code %s" % args.vm_location_code
                        submit_cmd_str += " --timing-metrics-json %s" % (timing_json)
                        submit_cmd_str += " --workflow-file-subset %s" % (pipeline)
                        submit_cmd_str += " --related-file-subset-uuids %s" % (",".join(related_uuids))
                        submit_cmd_str += " --uuid %s" % (uuid_map[pipeline][donor])
                        #submit_cmd_str += " --skip-upload"

                    if pipeline in ['broad_tar']:
                        prep_cmd_str = ""
                        new_files = []
                        for tar in files:
                            basename = donor_tumor + ".broad.intermediate"
                            prep_cmd_str = "%s/remap_broad_tar.py %s %s %s --rename %s %s" % (
                                os.path.dirname(os.path.abspath(__file__)),
                                tar,
                                "./",
                                basename,
                                donor, donor_tumor
                            )
                            new_files.append( basename + ".tar" )

                        related_uuids = []
                        for p in uuid_map:
                            if p != pipeline:
                                related_uuids.append(uuid_map[p][donor])

                        submit_cmd_str = "perl -I /opt/gt-download-upload-wrapper/gt-download-upload-wrapper-2.0.11/lib"
                        submit_cmd_str += " /opt/vcf-uploader/vcf-uploader-2.0.5/gnos_upload_vcf.pl"
                        submit_cmd_str += " --metadata-urls %s" % (",".join(urls))
                        submit_cmd_str += " --tarballs %s " % (",".join(new_files))
                        submit_cmd_str += " --tarball-md5sum-files %s " % ((",".join( ("%s.md5" % i for i in new_files) )))
                        submit_cmd_str += " --outdir %s.%s.dir" % (pipeline, donor_tumor)
                        submit_cmd_str += " --key %s " % (args.keyfile)
                        submit_cmd_str += " --upload-url %s" % (args.upload_url)
                        submit_cmd_str += " --study-refname-override %s" % (args.study)
                        submit_cmd_str += " --workflow-url '%s'" % args.pipeline_src
                        submit_cmd_str += " --workflow-src-url '%s'" % args.pipeline_src
                        submit_cmd_str += " --workflow-name '%s'" % args.pipeline_name
                        submit_cmd_str += " --workflow-version '%s'" % args.pipeline_version
                        submit_cmd_str += " --vm-instance-type '%s'" % args.vm_instance_type
                        submit_cmd_str += " --vm-instance-cores %s" % args.vm_instance_cores
                        submit_cmd_str += " --vm-instance-mem-gb %s" % args.vm_instance_mem_gb
                        submit_cmd_str += " --workflow-file-subset %s" % (pipeline)
                        submit_cmd_str += " --timing-metrics-json %s" % (timing_json)
                        submit_cmd_str += " --related-file-subset-uuids %s" % (",".join(related_uuids))
                        submit_cmd_str += " --uuid %s" % (uuid_map[pipeline][donor])
                        #submit_cmd_str += " --skip-upload"

                    handle.write(string.Template("""#!/bin/bash
set -ex
${PREP}
${SUBMIT}
echo $$? > $$0.submitted
#pushd ${SUBMIT_DIR}
#gtupload -v -c ${KEY} -u ./manifest.xml
#ECODE=$$?
#popd
#echo $$ECODE > $$0.uploaded
""").substitute(PREP=prep_cmd_str, SUBMIT=submit_cmd_str,
                            SUBMIT_DIR=os.path.join(os.path.abspath(args.workdir), "vcf", pipeline + "." + donor_tumor + ".dir", uuid_map[pipeline][donor] ),
                            KEY=args.keyfile
                    ) )