コード例 #1
0
ファイル: run_bwa_mem.py プロジェクト: rruizcor/CFseq
def create_sorted_bam(samfile, outlabel, logfh, force, keepsam):
    bamfile = outlabel + '.bam'
    logfh.write("\nSorting sam: {}\n".format(samfile))
    logfh.write("Creating bam: {}\n".format(bamfile))
    cmd = picardExe[:] + [ 'SortSam', 'I='+samfile, 'O='+bamfile,
           'SORT_ORDER=coordinate']
    logfh.write(" ".join(cmd)+"\n")
    if have_file(bamfile, force, stderr=logfh):
        logfh.write("Already have bam file: {}\n".format(bamfile))
    else:
        logfh.flush()
        subprocess.call(cmd, stderr=logfh)
    if not keepsam and have_file(bamfile, stderr=logfh):
        remove_file(samfile, stderr=logfh)
    return bamfile
コード例 #2
0
def run_gatk(bamfile, label, ref, args, logfh=sys.stderr):
    caller = 'HaplotypeCaller'
    outvcf = label + '.gatk.vcf'
    if args.cohort:
        outvcf = outvcf.replace('.vcf', '-cohort.vcf')
    cmd = gatkExe[:] + ['-T', caller, '--genotyping_mode', 'DISCOVERY']
    cmd.extend(['-R', ref, '-I', bamfile, '-o', outvcf])
    if args.intervals: cmd.extend(['-L', args.intervals])
    if args.dbsnp: cmd.extend(["--dbsnp", args.dbsnp])
    if args.maxreads:
        cmd.extend(['--maxReadsInRegionPerSample', str(args.maxreads)])
    if args.debug:
        bamout = label + '.gatk-debug.bam'
        cmd.extend(['-bamout', bamout])
    if args.cohort:
        cmd.extend(['-ERC', 'GVCF', '--variant_index_type', 'LINEAR'])
        cmd.extend(['--variant_index_parameter', '128000'])


#    if args.basequal:
#        cmd.extend(['-mbq', str(args.basequal)])
    logfh.write("\nGATK: " + " ".join(cmd) + "\n")
    if have_file(outvcf, args.force, stderr=logfh):
        logfh.write("  Already have {}.\n".format(outvcf))
    else:
        logfh.flush()
        check_call(cmd, stderr=logfh)
    if not os.path.isfile(outvcf):
        logfh.write("  Failed to create {}\n".format(outvcf))
        sys.stderr.write("  Failed to create {}\n".format(outvcf))
        sys.exit(1)
    return outvcf
コード例 #3
0
ファイル: calculate_depths.py プロジェクト: rruizcor/CFseq
def calculate_depths(label, infiles, args):
    bamfile = infiles[label]['bam']
    vcffile = infiles[label]['vcf']
    outlabel = get_outlabel(bamfile, 'bam', args.outdir)
    loglabel = get_outlabel(bamfile, 'bam', args.logdir) if args.logdir \
               else outlabel
    logfile = loglabel + ".calculate_depths.log"
    outfile = outlabel + '.depths{}.txt'.format(args.tag)
    sys.stderr.write("  Calculating depths for {}\n".format(outlabel))
    sys.stderr.write("    Log file: {}\n".format(logfile))
    logfh = open(logfile, 'w')
    logfh.write("Bam file: {}\n".format(bamfile))
    logfh.write("VCF file: {}\n\n".format(vcffile))
    try:
        if have_file(outfile, args.force, stderr=logfh):
            logfh.write("  Already have {}\n".format(outfile))
        else:
            logfh.write("Start time: {}\n".format(timestamp()))
            (vcffields, variants) = parse_vcf(vcffile, logfh)
            depths = find_variant_depths(variants, bamfile, logfh, args)
            add_fwd_rev_depths(depths, variants, bamfile, args)
            print_depths(outfile, variants, depths, vcffields, logfh)
            logfh.write("End time: {}\n".format(timestamp()))
    except Exception, e:
        e.args += (vcffile, )
        raise
コード例 #4
0
def create_annovar_input_file(vcffiles, outlabel, args):
    outfile = outlabel + '.avinput';
    cmd = [annovarInputExe, '-format', 'vcf4', '-allsample', '-withfreq', ]
    cmd.extend([ '-includeinfo', ])  
    sys.stderr.write("\nCreating annovar input file: "+" ".join(cmd)+"\n")
    if have_file(outfile, args.force):
        sys.stderr.write("  Already have {}.\n".format(outfile))
    else: 
        lines = []
        for vcffile in vcffiles:
            sys.stderr.write("    Running: {} {}\n".format(" ".join(cmd),
                             vcffile))
            output = subprocess.check_output(cmd + [vcffile,])
            if not output:
                sys.stderr.write("  No output.\n")
            else:
                for line in output.split("\n"):
                    v = line.split("\t")[0:5]
                    if len(v)==5: 
                        (chrom, v[1]) = cftr.CFTR_to_hg19(v[0], v[1])
                        (v[0], v[2]) = cftr.CFTR_to_hg19(v[0], v[2])
                        lines.append("\t".join(map(str, v)))
        uniqlines = sorted(set(lines))
        with open(outfile, 'w') as ofh:
            ofh.write("\n".join(uniqlines))
        if not os.path.isfile(outfile):
            sys.stderr.write("  Failed to create {}\n".format(outfile))
            sys.exit(1)
    return outfile
コード例 #5
0
ファイル: run_gatk.py プロジェクト: eulaf/CFseq
def cohort_merge_gvcfs(vcfs, ref, args):
    sys.stderr.write("Genotyping gVCFs: {} vcfs\n".format(len(vcfs)))
    label = args.cohort
    outlabel = os.path.join(args.outdir, label) if args.outdir else label
    outvcf = outlabel + ".gatk-merged.vcf"
    cmd = gatkExe[:] + ["-T", "GenotypeGVCFs"]
    cmd.extend(["-R", ref, "-o", outvcf])
    if args.intervals:
        cmd.extend(["-L", args.intervals])
    if args.dbsnp:
        cmd.extend(["--dbsnp", args.dbsnp])
    variants = ("--variant " + " --variant ".join(vcfs)).split(" ")
    cmd += variants
    if have_file(outvcf, args.force):
        sys.stderr.write("  Already have {}.\n".format(outvcf))
    else:
        loglabel = os.path.join(args.logdir, label) if args.logdir else label
        logfile = loglabel + ".gatk-merged.log"
        sys.stderr.write("    Log file {}\n".format(logfile))
        with open(logfile, "w") as logfh:
            logfh.write("CMD: {}\n".format(cmd))
            logfh.flush()
            check_call(cmd, stderr=logfh)
    if os.path.isfile(outvcf):
        sys.stderr.write("  Merged gVCF: {}\n".format(outvcf))
    else:
        sys.stderr.write("  Failed to create {}\n".format(outvcf))
        sys.exit(1)
    return outvcf
コード例 #6
0
ファイル: run_gatk.py プロジェクト: eulaf/CFseq
def run_gatk(bamfile, label, ref, args, logfh=sys.stderr):
    caller = "HaplotypeCaller"
    outvcf = label + ".gatk.vcf"
    if args.cohort:
        outvcf = outvcf.replace(".vcf", "-cohort.vcf")
    cmd = gatkExe[:] + ["-T", caller, "--genotyping_mode", "DISCOVERY"]
    cmd.extend(["-R", ref, "-I", bamfile, "-o", outvcf])
    if args.intervals:
        cmd.extend(["-L", args.intervals])
    if args.dbsnp:
        cmd.extend(["--dbsnp", args.dbsnp])
    if args.maxreads:
        cmd.extend(["--maxReadsInRegionPerSample", str(args.maxreads)])
    if args.debug:
        bamout = label + ".gatk-debug.bam"
        cmd.extend(["-bamout", bamout])
    if args.cohort:
        cmd.extend(["-ERC", "GVCF", "--variant_index_type", "LINEAR"])
        cmd.extend(["--variant_index_parameter", "128000"])
    #    if args.basequal:
    #        cmd.extend(['-mbq', str(args.basequal)])
    logfh.write("\nGATK: " + " ".join(cmd) + "\n")
    if have_file(outvcf, args.force, stderr=logfh):
        logfh.write("  Already have {}.\n".format(outvcf))
    else:
        logfh.flush()
        check_call(cmd, stderr=logfh)
    if not os.path.isfile(outvcf):
        logfh.write("  Failed to create {}\n".format(outvcf))
        sys.stderr.write("  Failed to create {}\n".format(outvcf))
        sys.exit(1)
    return outvcf
コード例 #7
0
def cohort_merge_gvcfs(vcfs, ref, args):
    sys.stderr.write("Genotyping gVCFs: {} vcfs\n".format(len(vcfs)))
    label = args.cohort
    outlabel = os.path.join(args.outdir, label) if args.outdir else label
    outvcf = outlabel + '.gatk-merged.vcf'
    cmd = gatkExe[:] + ['-T', 'GenotypeGVCFs']
    cmd.extend(['-R', ref, '-o', outvcf])
    if args.intervals: cmd.extend(['-L', args.intervals])
    if args.dbsnp: cmd.extend(["--dbsnp", args.dbsnp])
    variants = ("--variant " + " --variant ".join(vcfs)).split(' ')
    cmd += variants
    if have_file(outvcf, args.force):
        sys.stderr.write("  Already have {}.\n".format(outvcf))
    else:
        loglabel = os.path.join(args.logdir, label) if args.logdir else label
        logfile = loglabel + '.gatk-merged.log'
        sys.stderr.write("    Log file {}\n".format(logfile))
        with open(logfile, 'w') as logfh:
            logfh.write("CMD: {}\n".format(cmd))
            logfh.flush()
            check_call(cmd, stderr=logfh)
    if os.path.isfile(outvcf):
        sys.stderr.write("  Merged gVCF: {}\n".format(outvcf))
    else:
        sys.stderr.write("  Failed to create {}\n".format(outvcf))
        sys.exit(1)
    return outvcf
コード例 #8
0
def run_annovar(annov_input, outlabel, refdir, args):
    outfile1 = outlabel + '-hgvs.variant_function'
    outfile2 = outlabel + '-hgvs.exonic_variant_function'
    cmd = [annovarExe, '-build', 'hg19', '-hgvs', '-out', outlabel+'-hgvs', ]
    cmd.extend([ annov_input, refdir ])  
    sys.stderr.write("\nRunning annovar: "+" ".join(cmd)+"\n")
    if have_file(outfile1, args.force) and have_file(outfile2, args.force):
        sys.stderr.write("  Already have {} and {}.\n".format(outfile1,
                         outfile2))
    else: 
        subprocess.check_call(cmd)
        if not os.path.isfile(outfile1):
            sys.stderr.write("  Failed to create {}\n".format(outfile1))
            sys.exit(1)
        if not os.path.isfile(outfile2):
            sys.stderr.write("  Failed to create {}\n".format(outfile2))
            sys.exit(1)
    return (outfile1, outfile2)
コード例 #9
0
ファイル: run_bwa_mem.py プロジェクト: rruizcor/CFseq
def run_bwa(sample, outlabel, ref, fqfiles, logfh, force):
    samfile = outlabel + ".sam"
    bamfile = outlabel + ".bam"
    logfh.write("Output sam: {}\n".format(samfile))
    readgroup = "\\t".join(['@RG', "ID:"+sample, "SM:"+sample, "PL:Illumina",
                           "LB:"+sample, "PU:unit1"]);
    cmd = [bwaExe, 'mem', '-M', '-R', readgroup, ref,] + fqfiles
    logfh.write(" ".join(cmd)+"\n")
    if have_file(samfile, force, stderr=logfh):
        logfh.write("Already have sam file: {}\n".format(samfile))
    elif have_file(bamfile, force, stderr=logfh):
        logfh.write("Already have bam file: {}\n".format(bamfile))
    else:
        logfh.flush()
        output = subprocess.check_output(cmd, stderr=logfh)
        with open(samfile, 'w') as ofh:
            ofh.write(output)
    return samfile
コード例 #10
0
def index_bam(bamfile, args, logfh=sys.stderr):
    label = bamfile.rstrip('bam').rstrip('.')
    outidx = label + '.bai'
    cmd = picardExe[:] + ['BuildBamIndex', 'I=' + bamfile]
    if not have_file(outidx, args.force, quiet=True, stderr=logfh):
        logfh.write("\nIndex bam: " + " ".join(cmd) + "\n")
        logfh.flush()
        check_call(cmd, stderr=logfh)
    if not os.path.isfile(outidx):
        logfh.write("  Failed to create {}\n".format(outidx))
        sys.stderr.write("  Failed to create {}\n".format(outidx))
        sys.exit(1)
コード例 #11
0
ファイル: run_gatk.py プロジェクト: eulaf/CFseq
def index_bam(bamfile, args, logfh=sys.stderr):
    label = bamfile.rstrip("bam").rstrip(".")
    outidx = label + ".bai"
    cmd = picardExe[:] + ["BuildBamIndex", "I=" + bamfile]
    if not have_file(outidx, args.force, quiet=True, stderr=logfh):
        logfh.write("\nIndex bam: " + " ".join(cmd) + "\n")
        logfh.flush()
        check_call(cmd, stderr=logfh)
    if not os.path.isfile(outidx):
        logfh.write("  Failed to create {}\n".format(outidx))
        sys.stderr.write("  Failed to create {}\n".format(outidx))
        sys.exit(1)
コード例 #12
0
def create_fragment_report(fqpair, frag2primers, read_primer_file, force,
                           logfh):
    logfh.write("\nCreating fragment report\n".format(read_primer_file))
    if have_file(read_primer_file, force, stderr=logfh):
        logfh.write("  Already have {}\n".format(read_primer_file))
        return
    readnum_patt = re.compile('.*_(R[12]).*')
    readnums = [readnum_patt.sub('\\1_primer', fqfile) for fqfile in fqpair]
    logfh.write("  Writing {}\n".format(read_primer_file))
    fragcounts = {
        'tot_fragments': 0,
        'unidentified': 0,
        'singleton': 0,
        'paired-good': 0,
        'paired-other': 0,
        'misprime': 0,
    }
    with open(read_primer_file, 'w') as ofh:
        ofh.write("Fragment\t" + "\t".join(readnums) +\
                  "\tStatus\tEstimated_fragment_size\n")
        for fragname in sorted(frag2primers.keys()):
            row = [
                fragname,
            ]
            for fqfile in fqpair:
                if fqfile in frag2primers[fragname]['primers']:
                    row.append(frag2primers[fragname]['primers'][fqfile])
                else:
                    row.append('')
            status = frag2primers[fragname]['status']
            fragcounts[status] += 1
            row.extend([status, frag2primers[fragname]['ampsize']])
            ofh.write("\t".join([str(r) for r in row]) + "\n")
    fragcounts['tot_fragments'] = len(frag2primers.keys())
    logfh.write("{:<12}\t{:<6} fragments\t{:<5}%\n".format(
        "Fragment status", "Number", "Percent"))
    for k in fragcounts:
        perc = fragcounts[k] * 100.0 / fragcounts['tot_fragments']
        logfh.write("{:<12}\t{:>6} fragments\t{:5.1f}%\n".format(
            k, fragcounts[k], perc))
    logfh.write("{:<12}\t{:>6} fragments\n".format(
        "Total", fragcounts['tot_fragments']))
    fragcounts['paired'] = fragcounts['paired-good'] + \
                           fragcounts['paired-other']
    if os.path.isfile(read_primer_file):
        sys.stderr.write("    Fragment report {}\n".format(read_primer_file))
    return fragcounts
コード例 #13
0
def create_primer_report(primerreads,
                         primerlist,
                         primer_read_file,
                         logfh,
                         force=False,
                         debug=False):
    """Create file listing each primer and the reads that match it.
    Include counts of unidentified and mismatched reads.  Tally percent
    of total reads amplified by each primer pair."""
    logfh.write("\nCreating primer report\n".format(primer_read_file))
    if have_file(primer_read_file, force, stderr=logfh):
        logfh.write("  Already have {}\n".format(primer_read_file))
        return
    logfh.write("  Writing {}\n".format(primer_read_file))
    tot_reads = 0
    keylist = primerlist + ['unidentified', 'misprime']
    primercounts = {}
    primerkeys = []
    with open(primer_read_file, 'w') as ofh:
        ofh.write("Primer\tNum_reads\tReads\n")
        for primer in keylist:
            numreads = len(primerreads[primer])
            tot_reads += numreads
            readlist = ", ".join(primerreads[primer])
            primerpair = primer.rstrip('_F').rstrip('_R')
            if primerpair in primercounts:
                primercounts[primerpair] += numreads
            else:
                primerkeys.append(primerpair)
                primercounts[primerpair] = numreads
            ofh.write("{}\t{}\t{}\n".format(primer, numreads, readlist))
    if debug:
        logfh.write("{:<12}\t{:<6} reads\t{:<5}%\n".format(
            "Primer", "Number", "Percent"))
        for k in keylist:
            numreads = len(primerreads[k])
            perc = numreads * 100.0 / tot_reads
            logfh.write("{:<12}\t{:>6} reads\t{:5.2f}%\n".format(
                k, numreads, perc))
        logfh.write("{:<12}\t{:<6} reads\n".format("Total", tot_reads))
    primercounts['tot_reads'] = tot_reads
    primerkeys.insert(0, 'tot_reads')
    if os.path.isfile(primer_read_file):
        sys.stderr.write(
            "    Primer read report {}\n".format(primer_read_file))
    return (primercounts, primerkeys)
コード例 #14
0
def table_annovar(annov_input, outlabel, refdir, args):
    outfile = outlabel + '.hg19_multianno.txt'
    outfile1 = outlabel + '.variant_function'
    outfile2 = outlabel + '.exonic_variant_function'
    (protocol_list, operation_list) = annovar_protocol(annovarDBs, refdir)
    cmd = [ annovarTableExe, annov_input, refdir, '-out', outlabel, ]
    cmd.extend(['-buildver', 'hg19', '-out', outlabel, '-nastring', '.'])
    cmd.extend(['-protocol', protocol_list, '-operation', operation_list])
    sys.stderr.write("\nRunning table_annovar: "+" ".join(cmd)+"\n")
    if have_file(outfile, args.force):
        sys.stderr.write("  Already have {}.\n".format(outfile))
    else: 
        subprocess.check_call(cmd)
        if not os.path.isfile(outfile):
            sys.stderr.write("  Failed to create {}\n".format(outfile))
            sys.exit(1)
    return (outfile)
コード例 #15
0
ファイル: separate_vcf.py プロジェクト: eulaf/CFseq
def run_select_variants(vcffile, outvcf, sample, ref, logfh, args):
    logfh.write("\n-- SelectVariants --\n")
    cmd = gatkExe[:]
    cmd.extend(['-T', 'SelectVariants', '--excludeNonVariants']) 
    cmd.extend(['-R', ref])  
    cmd.extend(['--variant', vcffile])  
    cmd.extend(['-o', outvcf])  
    cmd.extend(['-sn', sample])  
    logfh.write("  Sample {}:\t{}\n".format(sample, outvcf))
    if have_file(outvcf, args.force):
        sys.stderr.write("  Already have {}.\n".format(outvcf))
    else: 
        logfh.write(" ".join(cmd)+"\n")
        logfh.flush()
        check_call(cmd, stderr=logfh)
        if os.path.isfile(outvcf):
            sys.stderr.write("  Created {}\n".format(outvcf))
        else:
            sys.stderr.write("  Failed to create {}\n".format(outvcf))
    return outvcf
コード例 #16
0
ファイル: primer_trim.py プロジェクト: eulaf/CFseq
def create_primer_report(primerreads, primerlist, primer_read_file, logfh,
                         force=False, debug=False):
    """Create file listing each primer and the reads that match it.
    Include counts of unidentified and mismatched reads.  Tally percent
    of total reads amplified by each primer pair."""
    logfh.write("\nCreating primer report\n".format(primer_read_file))
    if have_file(primer_read_file, force, stderr=logfh):
        logfh.write("  Already have {}\n".format(primer_read_file))
        return
    logfh.write("  Writing {}\n".format(primer_read_file))
    tot_reads = 0
    keylist = primerlist + ['unidentified', 'misprime']
    primercounts = {}
    primerkeys = []
    with open(primer_read_file, 'w') as ofh:
        ofh.write("Primer\tNum_reads\tReads\n")
        for primer in keylist:
            numreads = len(primerreads[primer])
            tot_reads += numreads
            readlist = ", ".join(primerreads[primer])
            primerpair = primer.rstrip('_F').rstrip('_R')
            if primerpair in primercounts:
                primercounts[primerpair] += numreads
            else:
                primerkeys.append(primerpair)
                primercounts[primerpair] = numreads
            ofh.write("{}\t{}\t{}\n".format(primer, numreads, readlist))
    if debug:
        logfh.write("{:<12}\t{:<6} reads\t{:<5}%\n".format(
            "Primer", "Number", "Percent"))
        for k in keylist:
            numreads = len(primerreads[k])
            perc = numreads*100.0/tot_reads
            logfh.write("{:<12}\t{:>6} reads\t{:5.2f}%\n".format(k, 
                numreads, perc))
        logfh.write("{:<12}\t{:<6} reads\n".format("Total", tot_reads))
    primercounts['tot_reads'] = tot_reads
    primerkeys.insert(0, 'tot_reads')
    if os.path.isfile(primer_read_file):
        sys.stderr.write("    Primer read report {}\n".format(primer_read_file))
    return (primercounts, primerkeys)
コード例 #17
0
ファイル: primer_trim.py プロジェクト: eulaf/CFseq
def run_aligner(queryfiles, primerfa, outlabel, logfh, force):
    outfile = outlabel + "-primers.cm.out"
    logfh.write("\nAlignment output in {}\n".format(outfile))
    if (have_file(outfile, force, stderr=logfh)):
        logfh.write("      Already have {}\n".format(outfile))
        return (outfile, None)
    try:
        with open(outfile, 'w') as ofh:
            for queryfile in queryfiles:
                logfh.write("    Aligning {}\n".format(queryfile))
                cmd = [CM_EXE, queryfile, primerfa, "-minscore", "12", 
                       "-minmatch", "8", "-tags", "-alignments"]
                logfh.write("      Running {}\n".format(" ".join(cmd)))
                logfh.flush()
                subprocess.check_call(cmd, stdout=ofh, stderr=logfh)
    except subprocess.CalledProcessError as e:
        sys.stderr.write("Error running cross_match for {}\n".format(outfile))
        raise
    if not os.path.isfile(outfile):
        sys.stderr.write("Output file {} not found.\n".format(outfile))
    return outfile
コード例 #18
0
ファイル: primer_trim.py プロジェクト: eulaf/CFseq
def trim_primers(fqfile, alignout, max_trim_len, primerinfo, outlabel, logfh, 
                 args):
    """Returns trimmed fastq file and file with list of sequence names"""
    trimmedfq = outlabel + ".trimmed.fastq"
    seqfile = outlabel + ".seqlist.txt"
    logfh.write("    Trimming fq: {}\n".format(trimmedfq))
    if have_files([trimmedfq, seqfile], args.force, stderr=logfh):
        logfh.write("      Already have {}\n".format(trimmedfq))
        return (trimmedfq, seqfile)
    aligns = parse_alignout(alignout)
    seqlist = []
    with open(trimmedfq, 'w') as outfq:
        inseq = FastQParser(fqfile)
        for seqrec in inseq:
            seqlist.append(seqrec.id)
            if seqrec.id in aligns:
                primer = aligns[seqrec.id]['primer']
                if primerinfo[primer]['overlap']:
                    primerend = aligns[seqrec.id]['end'] +\
                                aligns[seqrec.id]['left']
                    subrec = seqrec[primerend:]
                    if args.debug:
                        logfh.write("{}\tTrimming\t{}\n".format(
                                         primer, seqrec.id))
                else:
                    if args.debug:
                        logfh.write("{}\tNot trimming\t{}\n".format(
                                         primer, seqrec.id))
                    subrec = seqrec
            else: #trim default max_primer_len+2
                subrec = seqrec[max_trim_len:]
            outfq.write("{}\n".format(subrec.fastq()))
    logfh.write("    Seq list: {}\n".format(seqfile))
    if have_file(seqfile, True, stderr=logfh):
        logfh.write("      Still have {}\n".format(seqfile))
        sys.exit()
    with open_file(seqfile, 'w') as ifh:
        ifh.write("\n".join(seqlist)+"\n")
    return (trimmedfq, seqfile)
コード例 #19
0
ファイル: primer_trim.py プロジェクト: eulaf/CFseq
def create_fragment_report(fqpair, frag2primers, read_primer_file, force, 
                           logfh):
    logfh.write("\nCreating fragment report\n".format(read_primer_file))
    if have_file(read_primer_file, force, stderr=logfh):
        logfh.write("  Already have {}\n".format(read_primer_file))
        return
    readnum_patt = re.compile('.*_(R[12]).*')
    readnums = [ readnum_patt.sub('\\1_primer', fqfile) for fqfile in fqpair ]
    logfh.write("  Writing {}\n".format(read_primer_file))
    fragcounts = { 'tot_fragments':0, 'unidentified': 0, 'singleton': 0,
                   'paired-good': 0, 'paired-other': 0, 'misprime': 0, }
    with open(read_primer_file, 'w') as ofh:
        ofh.write("Fragment\t" + "\t".join(readnums) +\
                  "\tStatus\tEstimated_fragment_size\n")
        for fragname in sorted(frag2primers.keys()):
            row = [fragname,]
            for fqfile in fqpair:
                if fqfile in frag2primers[fragname]['primers']:
                    row.append(frag2primers[fragname]['primers'][fqfile])
                else:
                    row.append('')
            status = frag2primers[fragname]['status']
            fragcounts[status] += 1
            row.extend([status, frag2primers[fragname]['ampsize']])
            ofh.write("\t".join([str(r) for r in row])+"\n")
    fragcounts['tot_fragments'] = len(frag2primers.keys())
    logfh.write("{:<12}\t{:<6} fragments\t{:<5}%\n".format("Fragment status", 
                "Number", "Percent"))
    for k in fragcounts:
        perc = fragcounts[k]*100.0/fragcounts['tot_fragments']
        logfh.write("{:<12}\t{:>6} fragments\t{:5.1f}%\n".format(k, 
                    fragcounts[k], perc))
    logfh.write("{:<12}\t{:>6} fragments\n".format("Total", 
                fragcounts['tot_fragments']))
    fragcounts['paired'] = fragcounts['paired-good'] + \
                           fragcounts['paired-other']
    if os.path.isfile(read_primer_file):
        sys.stderr.write("    Fragment report {}\n".format(read_primer_file))
    return fragcounts
コード例 #20
0
ファイル: run_freebayes.py プロジェクト: eulaf/CFseq
def run_freebayes(bamfile, reffile, bedfile, logfh, args):
    label = get_outlabel(bamfile, args.outdir)
    outvcf = label + '.freebayes.vcf'
    cmd = [ freebayesExe, '-f', reffile, '-t', bedfile ]
    cmd.extend(['-b', bamfile, '-v', outvcf])  
#    cmd.extend(['--max-complex-gap', '5',])  
    if args.basequal:
        cmd.extend(['-q', str(args.basequal)])  
    if args.minaltcount: 
        cmd.extend(['-C', str(args.minaltcount)])
    logfh.write("\nCMD: "+" ".join(cmd)+"\n")
    if have_file(outvcf, args.force):
        sys.stderr.write("  Already have {}.\n".format(outvcf))
    else: 
        logfh.flush()
        check_call(cmd, stderr=logfh)
    if os.path.isfile(outvcf):
        sys.stderr.write("  Freebayes result in {}\n".format(outvcf))
    else:
        sys.stderr.write("  Failed to create {}\n".format(outvcf))
        sys.exit(1)
    return outvcf
コード例 #21
0
def trim_primers(fqfile, alignout, max_trim_len, primerinfo, outlabel, logfh,
                 args):
    """Returns trimmed fastq file and file with list of sequence names"""
    trimmedfq = outlabel + ".trimmed.fastq"
    seqfile = outlabel + ".seqlist.txt"
    logfh.write("    Trimming fq: {}\n".format(trimmedfq))
    if have_files([trimmedfq, seqfile], args.force, stderr=logfh):
        logfh.write("      Already have {}\n".format(trimmedfq))
        return (trimmedfq, seqfile)
    aligns = parse_alignout(alignout)
    seqlist = []
    with open(trimmedfq, 'w') as outfq:
        inseq = FastQParser(fqfile)
        for seqrec in inseq:
            seqlist.append(seqrec.id)
            if seqrec.id in aligns:
                primer = aligns[seqrec.id]['primer']
                if primerinfo[primer]['overlap']:
                    primerend = aligns[seqrec.id]['end'] +\
                                aligns[seqrec.id]['left']
                    subrec = seqrec[primerend:]
                    if args.debug:
                        logfh.write("{}\tTrimming\t{}\n".format(
                            primer, seqrec.id))
                else:
                    if args.debug:
                        logfh.write("{}\tNot trimming\t{}\n".format(
                            primer, seqrec.id))
                    subrec = seqrec
            else:  #trim default max_primer_len+2
                subrec = seqrec[max_trim_len:]
            outfq.write("{}\n".format(subrec.fastq()))
    logfh.write("    Seq list: {}\n".format(seqfile))
    if have_file(seqfile, True, stderr=logfh):
        logfh.write("      Still have {}\n".format(seqfile))
        sys.exit()
    with open_file(seqfile, 'w') as ifh:
        ifh.write("\n".join(seqlist) + "\n")
    return (trimmedfq, seqfile)
コード例 #22
0
ファイル: run_freebayes.py プロジェクト: rruizcor/CFseq
def run_freebayes(bamfile, reffile, bedfile, logfh, args):
    label = get_outlabel(bamfile, args.outdir)
    outvcf = label + '.freebayes.vcf'
    cmd = [freebayesExe, '-f', reffile, '-t', bedfile]
    cmd.extend(['-b', bamfile, '-v', outvcf])
    #    cmd.extend(['--max-complex-gap', '5',])
    if args.basequal:
        cmd.extend(['-q', str(args.basequal)])
    if args.minaltcount:
        cmd.extend(['-C', str(args.minaltcount)])
    logfh.write("\nCMD: " + " ".join(cmd) + "\n")
    if have_file(outvcf, args.force):
        sys.stderr.write("  Already have {}.\n".format(outvcf))
    else:
        logfh.flush()
        check_call(cmd, stderr=logfh)
    if os.path.isfile(outvcf):
        sys.stderr.write("  Freebayes result in {}\n".format(outvcf))
    else:
        sys.stderr.write("  Failed to create {}\n".format(outvcf))
        sys.exit(1)
    return outvcf
コード例 #23
0
def create_fasta_of_primer_region(fqfile, max_trim_len, outlabel, logfh,
                                  force):
    """Creates fasta files of the first max_trim_len bases of
    each sequence in given fqfile.  Also, returns a list with names of
    all sequences in fqfile."""
    outfile = outlabel + ".primer_region{}.fa".format(max_trim_len)
    logfh.write("    Creating 5' fasta: {}\n".format(outfile))
    outfiles = [
        outfile,
    ]
    if have_file(outfile, force, stderr=logfh):
        logfh.write("      Already have {}\n".format(outfile))
    else:
        logfh.write("      Writing {}\n".format(outfile))
        numseqs = 0
        totseqs = 0
        outfa = open(outfile, 'w')
        inseq = FastQParser(fqfile)
        for seqrec in inseq:
            numseqs += 1
            subseq = seqrec.seq[0:max_trim_len]
            outfa.write(">{}\n{}\n".format(seqrec.id, subseq))
            if numseqs == MAX_READS:
                outfa.close()
                logfh.write("      Wrote {} seqs\n".format(numseqs))
                numfiles = len(outfiles) + 1
                outfile = outlabel + ".primer_region{}-{}.fa".format(
                    max_trim_len, numfiles)
                logfh.write("      Writing {}\n".format(outfile))
                outfiles.append(outfile)
                outfa = open(outfile, 'w')
                totseqs += numseqs
                numseqs = 0
        logfh.write("      Wrote {} seqs\n".format(numseqs))
        if len(outfiles) > 1:
            totseqs += numseqs
            logfh.write("  Wrote {} total seqs\n".format(totseqs))
    return outfiles
コード例 #24
0
def run_aligner(queryfiles, primerfa, outlabel, logfh, force):
    outfile = outlabel + "-primers.cm.out"
    logfh.write("\nAlignment output in {}\n".format(outfile))
    if (have_file(outfile, force, stderr=logfh)):
        logfh.write("      Already have {}\n".format(outfile))
        return (outfile, None)
    try:
        with open(outfile, 'w') as ofh:
            for queryfile in queryfiles:
                logfh.write("    Aligning {}\n".format(queryfile))
                cmd = [
                    CM_EXE, queryfile, primerfa, "-minscore", "12",
                    "-minmatch", "8", "-tags", "-alignments"
                ]
                logfh.write("      Running {}\n".format(" ".join(cmd)))
                logfh.flush()
                subprocess.check_call(cmd, stdout=ofh, stderr=logfh)
    except subprocess.CalledProcessError as e:
        sys.stderr.write("Error running cross_match for {}\n".format(outfile))
        raise
    if not os.path.isfile(outfile):
        sys.stderr.write("Output file {} not found.\n".format(outfile))
    return outfile
コード例 #25
0
ファイル: run_freebayes.py プロジェクト: rruizcor/CFseq
def filter_freebayes_vcf(vcffile, outvcf, logfh, args):
    #    cmd = [ vcffilterExe, '-f', "QUAL > 20", "-f", "DP > 10" ]
    cmd = [vcffilterExe, '-s']
    if args.outdir:
        cmd.extend(['-o', args.outdir])
    if args.altbasequal:
        cmd.extend(['-a', str(args.altbasequal)])
    if args.dp:
        cmd.extend(['-d', str(args.dp)])
    if args.qual:
        cmd.extend(['-q', str(args.qual)])
    cmd.append(vcffile)
    logfh.write("\nCMD: " + " ".join(cmd) + "\n")
    if have_file(outvcf, args.force):
        sys.stderr.write("  Already have {}.\n".format(outvcf))
    else:
        logfh.flush()
        check_call(cmd, stderr=logfh)
    if os.path.isfile(outvcf):
        sys.stderr.write("  Filtered FreeBayes result {}\n".format(outvcf))
    else:
        sys.stderr.write("  Failed to create {}\n".format(outvcf))
        sys.exit(1)
    return outvcf
コード例 #26
0
ファイル: primer_trim.py プロジェクト: eulaf/CFseq
def create_fasta_of_primer_region(fqfile, max_trim_len, outlabel, logfh, 
                                  force):
    """Creates fasta files of the first max_trim_len bases of
    each sequence in given fqfile.  Also, returns a list with names of
    all sequences in fqfile."""
    outfile = outlabel + ".primer_region{}.fa".format(max_trim_len)
    logfh.write("    Creating 5' fasta: {}\n".format(outfile))
    outfiles = [outfile,]
    if have_file(outfile, force, stderr=logfh):
        logfh.write("      Already have {}\n".format(outfile))
    else:
        logfh.write("      Writing {}\n".format(outfile))
        numseqs = 0
        totseqs = 0
        outfa = open(outfile, 'w')
        inseq = FastQParser(fqfile)
        for seqrec in inseq:
            numseqs += 1
            subseq = seqrec.seq[0:max_trim_len]
            outfa.write(">{}\n{}\n".format(seqrec.id, subseq))
            if numseqs==MAX_READS:
                outfa.close()
                logfh.write("      Wrote {} seqs\n".format(numseqs))
                numfiles = len(outfiles) + 1
                outfile = outlabel + ".primer_region{}-{}.fa".format(
                          max_trim_len, numfiles)
                logfh.write("      Writing {}\n".format(outfile))
                outfiles.append(outfile)
                outfa = open(outfile, 'w')
                totseqs += numseqs
                numseqs = 0
        logfh.write("      Wrote {} seqs\n".format(numseqs))
        if len(outfiles)>1:
            totseqs += numseqs
            logfh.write("  Wrote {} total seqs\n".format(totseqs))
    return outfiles
コード例 #27
0
ファイル: run_freebayes.py プロジェクト: eulaf/CFseq
def filter_freebayes_vcf(vcffile, outvcf, logfh, args):
#    cmd = [ vcffilterExe, '-f', "QUAL > 20", "-f", "DP > 10" ]
    cmd = [  vcffilterExe, '-s']
    if args.outdir:
        cmd.extend(['-o', args.outdir])
    if args.altbasequal:
        cmd.extend(['-a', str(args.altbasequal)])
    if args.dp:
        cmd.extend(['-d', str(args.dp)])
    if args.qual:
        cmd.extend(['-q', str(args.qual)])
    cmd.append(vcffile)
    logfh.write("\nCMD: "+" ".join(cmd)+"\n")
    if have_file(outvcf, args.force):
        sys.stderr.write("  Already have {}.\n".format(outvcf))
    else: 
        logfh.flush()
        check_call(cmd, stderr=logfh)
    if os.path.isfile(outvcf):
        sys.stderr.write("  Filtered FreeBayes result {}\n".format(outvcf))
    else:
        sys.stderr.write("  Failed to create {}\n".format(outvcf))
        sys.exit(1)
    return outvcf
コード例 #28
0
ファイル: primer_trim.py プロジェクト: eulaf/CFseq
                        help="Name for summary file.")
    parser.add_argument("-o", "--outdir", help="Directory for output files.")
    parser.add_argument("-f", "--force", default=False, action='store_true',
                        help="Overwrite existing files.")
    parser.add_argument("-p", "--processes", default=1, type=int,
                        help="Number of processes to run.")
    parser.add_argument("--debug", default=False, action='store_true',
                        help="Debug mode.")
    parser.add_argument("--logdir", help="Directory for log files.")

    if len(sys.argv)<2:
        parser.print_help()
        sys.exit()
    args = parser.parse_args()

    primerfa = RESOURCE['primer_fa']
    if not os.path.isfile(primerfa):
        sys.exit("Could not find resource {}\n".format(primerfa))
    (primerinfo, max_primer_len) = primer_info(primerfa)
    find_overlaps(primerinfo, args.debug)
    outfiles = align_trim_all_fqfiles(args.fqfiles, primerfa, 
                                      primerinfo, max_primer_len, args)
    if args.outdir: 
        args.summary = os.path.join(args.outdir, args.summary)
    if have_file(args.summary, args.force):
        sys.stderr.write("Already have {}.\n".format(args.summary))
    else:
        samplecounts = assess_all_primers(args.fqfiles, outfiles, primerinfo, 
                                          args)
        print_summary(samplecounts, args.summary)
コード例 #29
0
                        " .variant_function and" +\
                        " .exonic_variant_function files.", )
    parser.add_argument("-o", "--outdir", help="Directory for output file.",)
    parser.add_argument("-l", "--label", help="Label for output file.",)
    parser.add_argument("-f", "--force", default=False, action='store_true',
                        help="Overwrite existing files.")

    if len(sys.argv)<2:
        parser.print_help()
        sys.exit()
    args = parser.parse_args()

    outlabel = get_outlabel(args.vcffiles, args)
    outfile = outlabel + ".results.txt"
    rejectfile = outlabel + ".rejects.txt"
    if have_file(outfile, args.force):
        sys.stderr.write("  Already have {}.\n".format(outfile))
    else:
        flatdata = defaultdict(dict)
        for vcffile in args.vcffiles:
            (fields, vcfdata) = parse_vcf(vcffile)
            (flatdata, newfields) = flatten_vcf_data(fields, vcfdata, flatdata)
        annovar_data = get_annovar_data(args.annovar)
        bedfile = cftr.RESOURCE['analysis_roi_bed']
        if not os.path.isfile(bedfile): 
            sys.exit("BED file {} not found\n".format(bedfile))
        roi = cftr.parse_bedfile(bedfile)
        create_spreadsheet(newfields, flatdata, annovar_data, roi, outfile,
                           rejectfile, args)

コード例 #30
0
ファイル: vcf_filter_freebayes.py プロジェクト: eulaf/CFseq
    parser.add_argument("-s", "--dbsnp", default=False, action='store_true',
                        help="Add dbsnp to ID field.",)
    parser.add_argument("-l", "--label", default='filtered',
                        help="Label for output files.",)
    parser.add_argument("-f", "--force", default=False, action='store_true',
                        help="Overwrite existing files.")

    if len(sys.argv)<2:
        parser.print_help()
        sys.exit()
    args = parser.parse_args()

    dbsnp = parse_dbsnp() if args.dbsnp else {}
    pos_removed = []
    for vcffile in args.vcffiles:
        sys.stderr.write("Processing {}.\n".format(vcffile))
        outlabel = os.path.basename(vcffile).rstrip('vcf').rstrip('.')
        outvcf = outlabel + '.{}.vcf'.format(args.label)
        if args.outdir:
            outvcf = os.path.join(args.outdir, outvcf)
        if have_file(outvcf, args.force):
            sys.stderr.write("  Already have {}.\n".format(outvcf))
        else:
            sys.stderr.write("Writing {}\n".format(outvcf))
            (header, fields, vcfinfo) = parse_vcf(vcffile)
            if args.dbsnp: add_dbsnp(vcfinfo, dbsnp)
            pos_rm = filter_vcf(header, fields, vcfinfo, outvcf, dbsnp, args)
            pos_removed.extend(pos_rm)
    sys.stderr.write("\nPositions removed: {}\n".format(", ".join(
        [str(j) for j in sorted([int(i) for i in set(pos_removed)])])))
コード例 #31
0
ファイル: add_mol2k.py プロジェクト: eulaf/CFseq
    #    parser.add_argument("mol2k", help="Mol2k file of variants.")
    parser.add_argument("spreadsheets", nargs="+", help="Spreadsheet to check")
    parser.add_argument("-o", "--outdir", help="Directory for output.")
    parser.add_argument("-s", "--seen", default=False, action="store_true", help="Reset Seen counter.")
    parser.add_argument("-f", "--force", default=False, action="store_true", help="Overwrite existing files.")

    if len(sys.argv) < 2:
        parser.print_help()
        sys.exit()
    args = parser.parse_args()

    mol2kfile = cftr.RESOURCE["cftr_db"]
    if not os.path.isfile(mol2kfile):
        sys.exit("Could not find mol2k resource file: {}\n".format(mol2kfile))
    (mol2k, mol2k_fields) = parse_mol2k(mol2kfile, args.seen)
    seenfile = os.path.basename(mol2kfile).rstrip("txt").rstrip("csv").rstrip(".") + ".seen.txt"
    if args.outdir:
        seenfile = os.path.join(args.outdir, seenfile)
    for spreadsheet in args.spreadsheets:
        outfile = spreadsheet.rstrip("txt").rstrip(".") + ".mol2k.txt"
        if args.outdir:
            outfile = os.path.join(args.outdir, os.path.basename(outfile))
        if have_file(outfile, args.force):
            sys.stderr.write("  Already have {}.\n".format(outfile))
        else:
            find_mol2k_variants(spreadsheet, mol2k, outfile)
    if have_file(seenfile, args.force):
        sys.stderr.write("  Already have {}.\n".format(seenfile))
    else:
        print_mol2k_seen(mol2k, mol2k_fields, seenfile)
コード例 #32
0
ファイル: tgpolyt_depths.py プロジェクト: eulaf/CFseq
                        help="Label for output files.")
    parser.add_argument("--debug", action="store_true", default=False,
                        help="Debug mode.")
    parser.add_argument("-f", "--force", default=False, action="store_true",
                        help="Overwrite existing files.")

    if len(sys.argv)<2:
        parser.print_help()
        sys.exit()
    args = parser.parse_args()

    outfile = args.outlabel + ".tgpolyt_counts.txt"
    summaryfile = args.outlabel + ".tgpolyt.txt"
    sys.stderr.write("Writing {}\n".format(outfile))
    sys.stderr.write("Writing {}\n".format(summaryfile))
    if have_file(outfile, args.force) and have_file(summaryfile, args.force):
        sys.stderr.write("Already have {} and {}\n".format(
                         outfile, summaryfile))
        sys.exit()
    outfields = ['sample', 'TG-polyT', 'frequency', 'num_reads', 
                 'tot_reads']
    summfields = ['sample', 'TG-polyT', 'hom_het', 'frequency', 
                  'num_reads']
    with open(outfile, 'w') as ofh, open(summaryfile, 'w') as sfh:
        ofh.write("\t".join(outfields)+"\n")
        sfh.write("\t".join(summfields)+"\n")
        for bamfile in args.bamfiles:
            sample = get_samplename(bamfile)
            sys.stderr.write("\nReading bam file: {}\n".format(bamfile))
            sys.stderr.write("  Sample: {}\n".format(sample))
            reads = get_reads_covering_region(bamfile, REGION)
コード例 #33
0
ファイル: uniformity_coverage.py プロジェクト: eulaf/CFseq
    parser = ArgumentParser(description=descr)
    parser.add_argument("primer2readsdir", 
                        help="Directory with primer2reads.txt files.")
    parser.add_argument("bamfiles", nargs="+", help="Bam files.")
    parser.add_argument("-o", "--outfile", default="uniformity.txt",
                        help="Name for output file.")
    parser.add_argument("-f", "--force", default=False, action="store_true",
                        help="Overwrite existing files.")
    parser.add_argument("-d", "--debug", default=False, action="store_true",
                        help="Keep intermediate files.")

    if len(sys.argv)<2:
        parser.print_help()
        sys.exit()
    args = parser.parse_args()

    if have_file(args.outfile, args.force):
        sys.stderr.write("  Already have {}\n".format(args.outfile))
        sys.exit()
    roi = parse_roi()
    p2rdict = get_primer2reads_files(args.primer2readsdir)
    ampcov = {}
    for bamfile in sorted(args.bamfiles):
        sample = os.path.basename(bamfile).split('-')[0].split('.')[0]
        if not sample in p2rdict:
            sys.stderr.write("No primer2read file for {}\n".format(bamfile))
            continue
        p2r = parse_primer2reads_file(p2rdict[sample])
        ampcov[sample] = get_amplicon_coverage(sample, bamfile, p2r, roi)
    compile_data(ampcov, roi, args.outfile)
コード例 #34
0
    )
    parser.add_argument("-f",
                        "--force",
                        default=False,
                        action='store_true',
                        help="Overwrite existing files.")

    if len(sys.argv) < 2:
        parser.print_help()
        sys.exit()
    args = parser.parse_args()

    dbsnp = parse_dbsnp() if args.dbsnp else {}
    pos_removed = []
    for vcffile in args.vcffiles:
        sys.stderr.write("Processing {}.\n".format(vcffile))
        outlabel = os.path.basename(vcffile).rstrip('vcf').rstrip('.')
        outvcf = outlabel + '.{}.vcf'.format(args.label)
        if args.outdir:
            outvcf = os.path.join(args.outdir, outvcf)
        if have_file(outvcf, args.force):
            sys.stderr.write("  Already have {}.\n".format(outvcf))
        else:
            sys.stderr.write("Writing {}\n".format(outvcf))
            (header, fields, vcfinfo) = parse_vcf(vcffile)
            if args.dbsnp: add_dbsnp(vcfinfo, dbsnp)
            pos_rm = filter_vcf(header, fields, vcfinfo, outvcf, dbsnp, args)
            pos_removed.extend(pos_rm)
    sys.stderr.write("\nPositions removed: {}\n".format(", ".join(
        [str(j) for j in sorted([int(i) for i in set(pos_removed)])])))
コード例 #35
0
ファイル: tgpolyt_depths.py プロジェクト: rruizcor/CFseq
    parser.add_argument("-f",
                        "--force",
                        default=False,
                        action="store_true",
                        help="Overwrite existing files.")

    if len(sys.argv) < 2:
        parser.print_help()
        sys.exit()
    args = parser.parse_args()

    outfile = args.outlabel + ".tgpolyt_counts.txt"
    summaryfile = args.outlabel + ".tgpolyt.txt"
    sys.stderr.write("Writing {}\n".format(outfile))
    sys.stderr.write("Writing {}\n".format(summaryfile))
    if have_file(outfile, args.force) and have_file(summaryfile, args.force):
        sys.stderr.write("Already have {} and {}\n".format(
            outfile, summaryfile))
        sys.exit()
    outfields = ['sample', 'TG-polyT', 'frequency', 'num_reads', 'tot_reads']
    summfields = ['sample', 'TG-polyT', 'hom_het', 'frequency', 'num_reads']
    with open(outfile, 'w') as ofh, open(summaryfile, 'w') as sfh:
        ofh.write("\t".join(outfields) + "\n")
        sfh.write("\t".join(summfields) + "\n")
        for bamfile in args.bamfiles:
            sample = get_samplename(bamfile)
            sys.stderr.write("\nReading bam file: {}\n".format(bamfile))
            sys.stderr.write("  Sample: {}\n".format(sample))
            reads = get_reads_covering_region(bamfile, REGION)
            (tgpolyt, totreads) = count_tgpolyt(reads, REGION, args.debug)
            report_results(ofh, sfh, sample, tgpolyt, totreads, args)
コード例 #36
0
                        "--processes",
                        default=1,
                        type=int,
                        help="Number of processes to run.")
    parser.add_argument("--debug",
                        default=False,
                        action='store_true',
                        help="Debug mode.")
    parser.add_argument("--logdir", help="Directory for log files.")

    if len(sys.argv) < 2:
        parser.print_help()
        sys.exit()
    args = parser.parse_args()

    primerfa = RESOURCE['primer_fa']
    if not os.path.isfile(primerfa):
        sys.exit("Could not find resource {}\n".format(primerfa))
    (primerinfo, max_primer_len) = primer_info(primerfa)
    find_overlaps(primerinfo, args.debug)
    outfiles = align_trim_all_fqfiles(args.fqfiles, primerfa, primerinfo,
                                      max_primer_len, args)
    if args.outdir:
        args.summary = os.path.join(args.outdir, args.summary)
    if have_file(args.summary, args.force):
        sys.stderr.write("Already have {}.\n".format(args.summary))
    else:
        samplecounts = assess_all_primers(args.fqfiles, outfiles, primerinfo,
                                          args)
        print_summary(samplecounts, args.summary)
コード例 #37
0
ファイル: add_mol2k.py プロジェクト: rruizcor/CFseq
                        "--force",
                        default=False,
                        action='store_true',
                        help="Overwrite existing files.")

    if len(sys.argv) < 2:
        parser.print_help()
        sys.exit()
    args = parser.parse_args()

    mol2kfile = cftr.RESOURCE['cftr_db']
    if not os.path.isfile(mol2kfile):
        sys.exit("Could not find mol2k resource file: {}\n".format(mol2kfile))
    (mol2k, mol2k_fields) = parse_mol2k(mol2kfile, args.seen)
    seenfile = os.path.basename(mol2kfile).rstrip('txt').\
                       rstrip('csv').rstrip('.') + ".seen.txt"
    if args.outdir:
        seenfile = os.path.join(args.outdir, seenfile)
    for spreadsheet in args.spreadsheets:
        outfile = spreadsheet.rstrip('txt').rstrip('.') + ".mol2k.txt"
        if args.outdir:
            outfile = os.path.join(args.outdir, os.path.basename(outfile))
        if have_file(outfile, args.force):
            sys.stderr.write("  Already have {}.\n".format(outfile))
        else:
            find_mol2k_variants(spreadsheet, mol2k, outfile)
    if have_file(seenfile, args.force):
        sys.stderr.write("  Already have {}.\n".format(seenfile))
    else:
        print_mol2k_seen(mol2k, mol2k_fields, seenfile)