コード例 #1
0
def alnDict2output(aln_dict, dn, order='sorting'):
    info = []
    if len(aln_dict) == 0:
        cmn.run('touch %s' % dn)
        return None
    #maxLength = max([len(each) for each in aln_dict.keys()])
    maxLength = 0
    maxNameLength = max([len(each) for each in aln_dict])
    nameformat = '{:<%s}' % maxNameLength

    names = list(aln_dict.keys())
    if order == 'sorting':
        names = sorted(names, key=lambda x: number4sorting(aln_dict[x]))
    elif order == 'grouping':
        #this is used to output inconsistent group
        #rank by grouping of species IDs
        names = sorted(names, key=lambda x: group_by_spnames(x))
    else:
        names.sort()

    for i, name in enumerate(names):
        #name = 'readgroup%s' % i
        aln = aln_dict[name]
        name = nameformat.format(name)

        toAdd = maxLength - len(aln)
        if toAdd > 0:
            aln += '-' * toAdd
        info.append('%s    %s\n' % (name, ''.join(aln)))
    cmn.write_file(''.join(info), dn)
コード例 #2
0
def old_log_newBaits_ifPossible(seqs):
    fall = '/project/biophysics/Nick_lab/wli/sequencing/scripts/data/barcodes/species_barcodes_4mapping.fa'
    takenNames = set(
        [each.strip()[1:] for each in cmn.cmd2lines('grep ">" %s' % fall)])

    fnew = '/project/biophysics/Nick_lab/wli/sequencing/scripts/data/barcodes/addedBaits_fromPipeline.fa'
    seqDict = read_fa(fnew)

    for name in seqs:
        if name not in takenNames and (name not in seqDict):
            seqDict[name] = seqs[name][20:678]

    with open(fnew, 'w') as dp:
        for name in seqDict:
            if name not in takenNames and (name not in seqDict):
                print('saving %s into database...' % name)
            fasta = '>%s\n%s\n' % (name, seqDict[name])
            dp.write(fasta)

    fverify = '/project/biophysics/Nick_lab/wli/sequencing/scripts/data/barcodes/all_barcodes_4verify.fa'
    dict2 = read_fa(fverify)
    seqDict.update(dict2)
    with open(fverify, 'w') as dp:
        for name in seqDict:
            fasta = '>%s\n%s\n' % (name.replace(
                '(assembled)', '').strip('.'), seqDict[name].replace('-', 'N'))
            dp.write(fasta)
    cmd = 'module add blast;cd /project/biophysics/Nick_lab/wli/sequencing/scripts/data/barcodes; makeblastdb -in=all_barcodes_4verify.fa -dbtype=nucl; chmod a+w all_barcodes_4verify.*'
    cmn.run(cmd)
コード例 #3
0
def parse_IDmapping_and_newDict(fn):
    print('processing fasta names...')
    cmd = 'source /home2/wli/.bash_profile; rename_fa_fullname.py %s > tmp.namelog' % fn
    cmn.run(cmd)

    seqDict = read_fa(fn + '.renamed')
    IDmapping = names2IDs(list(seqDict.keys()))
    return IDmapping, seqDict
コード例 #4
0
def compute_mash_distance(f1, f2):
    global cpu
    dn = '/tmp/%s-%s' % (cmn.lastName(f1), cmn.lastName(f2))
    cmd = '/home2/wli/local/mash-Linux64-v1.1.1/mash dist -p %s %s %s > %s' % (
        cpu, f1, f2, dn)
    cmn.run(cmd)
    #print 'cmd:', cmd
    dist = cmn.txt_read(dn).strip().split()[2]
    return dist
コード例 #5
0
def makeBlastDatabase(seqDict):
    dn = 'db4picking.fa'
    new = ['>%s\n%s\n' % (name, seqDict[name])
        for name in seqDict
		if seqDict[name].strip('N-X') != '']
    cmn.write_file(''.join(new), dn)
    cmd = 'module add blast; makeblastdb -dbtype=nucl -in=%s' % dn
    cmn.run(cmd)
    return dn
コード例 #6
0
def transfer_alea_files(fnlist):
    global transferDir
    newlist = []
    for fn in fnlist:
        print('transfering %s from archive server ...' % fn)
        cmd = 'rsync -r [email protected]:%s %s' % (fn, transferDir)
        cmn.run(cmd)
        newlist.append('%s/%s' % (transferDir, cmn.lastName(fn)))
    return newlist
コード例 #7
0
def separate_by_pair(fastqs, wdir):
    print(wdir)
    pdict = {}
    mapdict = {}
    for fastq in fastqs:
        key = '.'.join(cmn.lastName(fastq).split('.')[:-1])
        mapdict[key] = fastq

    names = list(mapdict.keys())
    length = max([len(name) for name in names])
    for i in range(length):
        if len(names) == 0:
            break

        checks = [name[:-1 - i] for name in names]

        count_dict = Counter(checks)
        for key in count_dict:
            if count_dict[key] == 2:  # got paired
                paired_names = [each for each in names if each.startswith(key)]
                fns = [mapdict[name] for name in paired_names]
                pdict[key] = fns

                #remove it from the list
                for name in paired_names:
                    names.remove(name)

    if len(pdict) == 0:
        print('Error! fastq lib name not recognized, contact Wenlin for help!')
        sys.exit()

    singleLibs = [mapdict[name] for name in names]
    print(singleLibs)

    if len(singleLibs) > 1:
        print(
            'Warnning: more than one lib detected as single lib. below is the single list:'
        )
        print('\n'.join(singleLibs))
        print('Email Wenlin for help')

    #print 'paired libs are:'
    #for key in pdict:
    #    print pdict[key]

    #print '\nsingle libs are:'
    #print ' '.join(singleLibs)

    singleFn = '%s/single.fq' % wdir
    if cmn.filexist(singleFn):
        cmn.run('rm %s' % singleFn)
    for fn in singleLibs:
        cmn.run('cat %s/%s >> %s' % (wdir, fn, singleFn))

    return pdict, singleFn
コード例 #8
0
def update_baits(bait_dict):
    adict = {}
    for i, name in enumerate(bait_dict):
        fnlabel = 'bait%s' % i
        dn = 'baits/%s.fa' % fnlabel
        seq = bait_dict[name]
        fasta = '>%s\n%s\n' % (name, ''.join(seq))
        cmn.write_file(fasta, dn)
        cmd = 'module add bwa; bwa index %s -p %s' % (dn, fnlabel)
        cmn.run(cmd)
        adict[name] = dn
    return adict
コード例 #9
0
def read_refN(ref_genomes):
    adict = {}
    for ref in ref_genomes:
        fN = '%s/%s_scaf_header.lines' % (ref_dir, ref)
        #print fN
        if not cmn.filexist(fN):
            fhead = '%s/%s_scaf.header' % (ref_dir, ref)
            cmd = 'wc -l %s > %s' % (fhead, fN)
            cmn.run(cmd)
        N = int(cmn.txt_read(fN).split()[0])
        adict[ref] = N
    return adict
コード例 #10
0
def backup_finalStat(wdir):
    ddir = '/project/biophysics/Nick_lab/mtang/archive/step4_postprocessing/final_stats/'
    fns = cmn.cmd2lines('ls %s/*.report| grep -v all_genome' % wdir)
    for fn in fns:
        print('processing %s...' % fn)
        fnlabel = cmn.lastName(fn)
        #don't back up the ones without species
        items = fnlabel.replace('_stat.report', '').split('_')
        if len(items) == 1:
            print('skip the fasta without sp for %s' % fn)
            continue

        #get the one with least NA and more data amount
        dn = '%s/%s' % (ddir, fnlabel)
        if os.path.exists(dn):
            print('merging new and old data for %s' % fnlabel)
            Nold_na, Nold_data = count_final_stat(dn)
            Nnew_na, Nnew_data = count_final_stat(fn)
            if Nnew_na < Nold_na:  #less NA
                cmn.run('cp %s %s' % (fn, dn))
            else:
                if Nnew_na == Nold_na:  #same NA number
                    if Nnew_data > Nold_data:
                        cmn.run('cp %s %s' % (fn, dn))

        else:
            cmn.run('cp %s %s' % (fn, dn))
    cmn.run('cd %s; cat *.report > allstat.txt' % ddir)
コード例 #11
0
def separate_by_pair_old(label, fns):
    #paired = [i for i in fns if ('_paired' in i) ]
    paired = [i for i in fns if ('_pair' in i) or ('_R' in i)]
    paired.sort()
    if len(paired) != 2:
        print('error: wrong number of pairs as %s' % str(paired))
        print('from: %s' % str(fns))
        print('need to change the label criterion')
        sys.exit()

    unpaired = set(fns) - set(paired)

    #parse each files
    newPaired = []
    for fn in paired:
        if os.path.exists(cmn.lastName(fn)):
            cmn.run('unlink %s;' % cmn.lastName(fn))
        cmn.run('ln -s %s' % fn)
        newPaired.append(cmn.lastName(fn))

    singleFn = '%s_single.fq' % label
    if cmn.filexist(singleFn):
        cmn.run('rm %s' % singleFn)
    for fn in unpaired:
        cmn.run('cat %s >> %s' % (fn, singleFn))

    return newPaired, singleFn
コード例 #12
0
def parse_inserted_gap(ID, seq, label):
    fn = 'sampleRun_%s/bait_insertion' % ID
    #if cmn.filexist(fn) or ('N' in seq.replace('-', 'N').strip('N')):
    if cmn.filexist(fn):
        #lines = cmn.file2lines(fn)
        #lines = sorted(lines, key=lambda x: int(x.split(',')[0][1:]))
        #Ngap = 0
        #for line in lines:
        #    items = line.strip().split()
        #    Ngap += len(items[-1])

        #check what is the right range of sequence
        print('runing blast to fix %s' % ID)
        checkSeq = seq.replace('-', 'N').strip('N')
        fquery = 'tmpInput.fa'
        fasta = '>input\n%s\n' % checkSeq
        cmn.write_file(fasta, fquery)
        dn = 'tmpBr_%s.txt' % label
        cmd = 'blastn -query %s -db /project/biophysics/Nick_lab/wli/sequencing/scripts/data/barcodes/all_barcodes_4verify.fa ' % fquery
        cmd += '-task blastn-short -dust no -outfmt \'6 qseqid sseqid qstart qend sstart send evalue pident qseq sseq\' -out %s' % dn
        cmn.run(cmd)
        isFixed = False
        for line in cmn.file2lines(dn):
            items = line.strip().split()
            #print items
            qstart, qend, sstart, send = list(map(int, items[2:6]))
            if sstart == 1 and send == 658 and qstart == 21:
                qseq, sseq = items[-2:]
                new = [
                    char1 for char1, char2 in zip(qseq, sseq) if char2 != '-'
                ]
                if len(new) == 658:
                    seq = seq[:qstart - 1] + ''.join(new) + seq[qend:]
                    print('solution found for %s' % ID)
                    isFixed = True
                break
            if sstart == 2 and send == 655 and qstart == 22:
                qseq, sseq = items[-2:]
                new = [
                    char1 for char1, char2 in zip(qseq, sseq) if char2 != '-'
                ]
                if len(new) == 654:
                    seq = seq[:qstart - 1] + ''.join(new) + seq[qend:]
                    print('solution found for %s' % ID)
                    isFixed = True
                break
        if not isFixed:
            cmn.append_file('%s\t%s\n' % (ID, label), 'cannot_fixed_indel.txt')
    return seq
コード例 #13
0
def separate_by_pair_vold(fastqs, wdir):
    pdict = {}
    mapdict = { }
    for fastq in fastqs:
        key = '.'.join(cmn.lastName(fastq).split('.')[:-1])
        mapdict[key] = fastq

    
    names = list(mapdict.keys())
    length = min([len(name) for name in names])
    for i in range(length):
        checks = [name[:-1-i] for name in names]
        count_dict = Counter(checks)
        if max(count_dict.values()) == 2: #got paired
            for name in names:
                key = name[:-1-i]
                fn = mapdict[name]
                try:
                    pdict[key].append(fn)
                except:
                    pdict[key] = [fn]
            break
    
    if len(pdict) == 0:
        print('Error! fastq lib name not recognized, contact Wenlin for help!')
        sys.exit()

    singleLibs = []
    keys = list(pdict.keys())
    for key in keys:
        libs = pdict[key]
        if len(libs) != 2:
            singleLibs += libs
            del pdict[key]
                
    #print 'paired libs are:'
    #for key in pdict:
    #    print pdict[key]
    
    #print '\nsingle libs are:'
    #print ' '.join(singleLibs)

    singleFn = '%s/single.fq' % wdir
    if cmn.filexist(singleFn):
        cmn.run('rm %s' % singleFn)
    for fn in singleLibs:
        cmn.run('cat %s >> %s' % (fn, singleFn))

    return pdict, singleFn
コード例 #14
0
def parse_fqlist(fqlist):
    alist = []
    for line in cmn.file2lines(fqlist):
        label = cmn.lastName(line)
        if 'R1' in label:
            alist.append(line)
        elif 'R2' in label:
            alist.append(line)

    if len(alist) != 2:
        print('Error! can not recoginze fastq names in %s' % fqlist)
        cmd = 'touch fastq_error'
        cmn.run(cmd)
        sys.exit()
    return [alist]
コード例 #15
0
def parse_ref(seqDict):
    cmn.mkdir('baits')

    newDict = {}
    for i, name in enumerate(seqDict):
        seq = seqDict[name]
        fnlabel = 'bait%s' % i
        dn = 'baits/%s.fa' % fnlabel
        name = name.replace('*', '').replace('"', "'")
        fasta = '>%s\n%s\n' % (name, seq)
        cmn.write_file(fasta, dn)
        cmd = 'module add bwa; bwa index %s -p %s' % (dn, fnlabel)
        cmn.run(cmd)
        newDict[name] = dn
    return newDict
コード例 #16
0
def get_mash_file(name, seq):
    global mash_file_dict, cpu
    try:
        fn = mash_file_dict[name]
    except KeyError:
        fn = '/tmp/%s' % name
        seq = ''.join(seq).replace('-', '').replace('N', '')
        fasta = '>%s\n%s\n' % (name, seq)
        cmn.write_file(fasta, fn)
        cmd = '/home2/wli/local/mash-Linux64-v1.1.1/mash sketch -n -p %s %s' % (
            cpu, fn)
        cmn.run(cmd)
        dn = fn + '.msh'
        mash_file_dict[name] = dn
        fn = dn
    return fn
コード例 #17
0
def do_barcode_blast(sequence, seqDict):
    #fref = '/project/biophysics/Nick_lab/wli/sequencing/scripts/data/barcodes/species_barcodes_4mapping.fa'
    #fadd = '/project/biophysics/Nick_lab/wli/sequencing/scripts/data/barcodes/added_from_customBaits.baitInfo'

    fdb = makeBlastDatabase(seqDict)

    #fdb = '/project/biophysics/Nick_lab/wli/sequencing/scripts/data/barcodes/all_barcodes_NoN_0.95.fasta'
    namelabel = sequence.split()[0][1:].split()[0].split('|')[0].replace('*','').split('[')[0].replace('"','').replace("'", '')
    namelabel = namelabel.replace('/', '_')
    fquery = '/tmp/%s.fa' % namelabel
    cmn.write_file(sequence, fquery)
    cmd = 'module add blast; blastn -query %s -db %s ' % (fquery, fdb)
    cmd += '-outfmt \'6 sseqid qlen slen length pident\''
    lines = cmn.cmd2lines(cmd)
    cmn.run('rm %s' % fquery)
    return lines
コード例 #18
0
def merge_sams(dn, fns):
    #dn = '%s.sam' % label

    print('merging files: %s into %s' % (str(fns), dn))

    if cmn.filexist(dn):
        cmn.run('rm ' + dn)

    fp_dn = open(dn, "a")

    filter_and_write_lines(fp_dn, fns[0], header=True)

    for fn in fns[1:]:
        filter_and_write_lines(fp_dn, fn)

    fp_dn.close()
    return dn
コード例 #19
0
def do_barcode_blast(sequence):
    fdb = '/project/biophysics/Nick_lab/wli/sequencing/scripts/data/barcodes/all_barcodes_4verify.fa'
    namelabel = sequence.split()[0][1:].split()[0].split('|')[0].replace(
        '*', '').split('[')[0].replace('"', '').replace(
            "'", '').split('(')[0].split('/')[0].split('\\')[0]
    fquery = '/tmp/%s.fa' % namelabel
    cmn.write_file(sequence, fquery)

    fbr = fquery + '.br'
    cmd = 'module add blast; blastn -max_target_seqs 1000 -query %s -db %s -ungapped ' % (
        fquery, fdb)
    cmd += '-outfmt \'6 sseqid slen length pident qstart qend qseq sseq\''
    cmd += ' -out %s ' % fbr
    #print cmd
    cmn.run(cmd)
    #cmd += ' | head -n 10'
    #lines = cmn.cmd2lines(cmd)
    lines = cmn.file2lines(fbr)
    cmn.run('rm %s' % fquery)
    return lines
コード例 #20
0
ファイル: auto_rebait.py プロジェクト: ATPs/xiaolongTools
def extract_same_genus(genus, fall):
    dn = 'genus_for_autoPicking.fa'
    namelist = []
    with open(fall) as fp, open(dn, 'w') as dp:
        for line in fp:
            if '>' in line:
                name = line[1:].strip()
                label = name.split('_')[0]
                if label == genus:
                    isGood = True
                    namelist.append(name)
                else:
                    isGood = False
            if isGood:
                dp.write(line)

    cmd = 'module add bwa; bwa index %s' % dn
    cmn.run(cmd)

    return dn, namelist
コード例 #21
0
def merge_sams(label, fns):
    dn = '%s.sam' % label

    print('merging files: %s into %s' % (str(fns), dn))

    if cmn.filexist(dn):
        cmn.run('rm ' + dn)

    cmn.run('cp %s %s' % (fns[0], dn))

    fp_dn = open(dn, "a")
    for fn in fns[1:]:
        fp = open(fn)
        for line in fp:
            if line[0] != "@" and line[0] != "[" and line.split()[2] != "*":
                #if line[0] != "@":
                fp_dn.write(line)
        fp.close()

    fp_dn.close()
    return dn
コード例 #22
0
ファイル: make_bwa_jobs.py プロジェクト: ATPs/xiaolongTools
def separate_by_pair(label, fns):
    paired = [i for i in fns if ('_paired' in i) or ('_R' in i)]
    paired.sort()
    if len(paired) != 2:
        print('error: wrong number of pairs as %s' % str(paired))
        print('from: %s' % str(fns))
        #print 'need to change the label criterion'
        print('skip this lib')
        return None, None

    unpaired = set(fns) - set(paired)

    #parse each files
    newPaired = []
    for fn in paired:
        cmn.run('ln -s %s' % fn)
        newPaired.append(cmn.lastName(fn))

    singleFn = '%s_single.fq' % label
    if cmn.filexist:
        cmn.run('rm %s' % singleFn)
    for fn in unpaired:
        cmn.run('cat %s >> %s' % (fn, singleFn))

    return newPaired, singleFn
コード例 #23
0
def backup_vcf_coverage(wdir):
    ddir = '/project/biophysics/Nick_lab/mtang/archive/step4_postprocessing/check_vcf_coverage'
    fns = cmn.cmd2lines('ls %s/*_vcf.cov' % wdir)

    #1. only back up the new version of cov file
    for fn in fns:
        print('processing %s...' % fn)
        lines = cmn.file2lines(fn)
        items = lines[-1].strip().split()
        if len(items) != 6:
            print('skip old format file %s' % fn)
            continue
        fnlabel = cmn.lastName(fn)
        dn = '%s/%s' % (ddir, fnlabel)
        if os.path.exists(dn):
            print('merging new and old data for %s' % fnlabel)
            covOld = float(cmn.file2lines(dn)[-1].split()[-2])
            cov = float(lines[-1].split()[-2])
            if cov > covOld:
                cmn.run('cp %s %s' % (fn, dn))
        else:
            cmn.run('cp %s %s' % (fn, dn))
コード例 #24
0
ファイル: auto_extend.py プロジェクト: ATPs/xiaolongTools
def grep_reads(read, f_libs, direction):
    #reverse the read
    #reverse = ''.join([rdict[i] for i in read[::-1]])

    cmds = []
    for fn in f_libs:
        cmd = '/project/biophysics/Nick_lab/wli/sequencing/scripts/grep_reads.py %s %s %s &' % (
            read, fn, direction)
        cmds.append(cmd)

    cmds.append('\nwait;\n')

    f_job = 'grep_read.job'
    cmn.write_lines(cmds, f_job)

    cmn.run('bash %s ' % f_job)
    #the output dir is grep_out
    dn = 'all_grep_reads.txt'
    cmn.run('cat grep_out/* > %s' % dn)
    fished_reads = cmn.getid(dn)

    return fished_reads
コード例 #25
0
def backup_fasta(wdir):
    ddir = '/project/biophysics/Nick_lab/mtang/archive/step4_postprocessing/map2fasta'
    fns = cmn.cmd2lines('ls %s/*_m2s.fa| grep -v all_genome' % wdir)
    for fn in fns:
        print('processing %s...' % fn)
        fnlabel = cmn.lastName(fn)
        #don't back up the ones without species
        items = fnlabel.replace('_snp_step2_MITO_m2s.fa',
                                '').replace('_snp_step2_m2s.fa', '').split('_')
        if len(items) == 1:
            print('skip the fasta without sp for %s' % fn)
            continue

        #get the least gapped one
        dn = '%s/%s' % (ddir, fnlabel)
        if os.path.exists(dn):
            print('merging new and old data for %s' % fnlabel)
            Nold = count_fasta_nonGap(dn)
            Nnew = count_fasta_nonGap(fn)
            if Nnew > Nold:
                cmn.run('cp %s %s' % (fn, dn))
        else:
            cmn.run('cp %s %s' % (fn, dn))
コード例 #26
0
def attempt_to_find_genus_by_abundence(ID, fqlist):
    tmpdir = 'tmp_%s' % ID
    cmn.mkdir(tmpdir)
    os.chdir(tmpdir)

    cmn.write_lines(fqlist, 'fqlist')
    cmd = '/work/archive/biophysics/Nick_lab/wli/project/sequencing/scripts/barcode_scripts/auto_rebait.py fqlist'
    cmn.run(cmd)

    dn = 'picked_bait.txt'
    if cmn.filexist(dn):
        genus = cmn.txt_read(dn).strip().split('_')[0].split()[0]
    else:
        genus = None
    os.chdir('..')
    cmn.run('cp %s/mapping_stat.info tmpStat/%s_mapping_stat.info' % (tmpdir, ID))
    cmn.run('rm -r %s ' % tmpdir)
    return genus
コード例 #27
0
        print("Usage: *.py fa Ncores", file=sys.stderr)
        sys.exit()

    #if the nodes are less than 4 taxa, produce a random tree
    cmd = "grep '>' %s" % (fn)
    lines = [
        each[1:].strip() for each in cmn.cmd2lines(cmd) if each.strip() != ''
    ]

    N = len(lines)
    if N < 4:
        print('Warning: fastme can not make tree of less than 4 taxa')
        print('Warning: so I make a fake tree...')
        dn = '%s.phylip.fastme.tre' % cmn.lastName(fn)
        if N == 1:
            info = '(%s);\n' % lines[0]
        if N == 2:
            a, b = lines
            info = '(%s,%s);\n' % (a, b)
        elif N == 3:
            a, b, c = lines
            info = '((%s,%s),%s);\n' % (a, b, c)
        cmn.write_file(info, dn)
        sys.exit()

    label = cmn.lastName(fn)
    cmd = 'rm RAxML_*.%s;' % label
    cmd += '/home2/wli/local/RAxML/raxmlHPC-PTHREADS-SSE3 -m GTRGAMMA -p 7112 -T %s -s %s -n %s' % (
        Ncores, label, label)
    cmn.run(cmd)
コード例 #28
0
        info = info.replace('[WL_preprocessing]', '\n'.join(step1cmds))

        #make snp call cmds
        #f_sam = merge_sams(sp, fsams)

        info = info.replace('5328', sp)
        info = info.replace('[WL_cwd]', os.getcwd())

        info2 = template2.replace('assembly_selfref', asslabel)
        info2 = info2.replace('5328', sp)
        info2 = info2.replace('[WL_cwd]', os.getcwd())

        os.chdir('..')
        fjob = 'job_files/s1_%s.job' % sp
        cmn.write_file(info, fjob)
        cmn.run('cd job_files; sbatch s1_%s.job' % sp)

        if sp not in step1_finished:
            step1_jobs.append(fjob)

        fjob = 'job_files/s2_%s.job' % sp
        cmn.write_file(info2, fjob)
        step2_jobs.append(fjob)

        #cmn.run('cd job_files; sbatch sg%s.job' % sp)

    info = ['bash %s\n' % each for each in step1_jobs]
    cmn.write_file(''.join(info), 'step1todo.cmds')

    info = ['sbatch %s\n' % each for each in step2_jobs]
    cmn.write_file(''.join(info), 'step2todo.cmds')
コード例 #29
0
    header = lines[0].split()[2:]
    #cmn.write_lines(header, 'header_names')

    #new = [' '.join(['sp%s' % i for i in xrange(len(header))])]
    new = [' '.join(header)]
    for line in lines[1:]:
        firstChar = ''
        items = line.split()[2:]
        newline = []
        for item in items:
            chars = item.split()
            if firstChar == '':
                firstChar = chars[0]

            if len(chars) == 1:
                if chars[0] == '-':
                    newline.append('0,0')
                elif firstChar == chars[0]:
                    newline.append('1,0')
                else:
                    newline.append('0,1')

            else:  #have both char
                newline.append('1,1')
        new.append(' '.join(newline))

    dn = fn + '.tmix'
    cmn.write_lines(new, dn)

    cmn.run('gzip %s' % dn)
コード例 #30
0
    for sample in groupDict:
        fqlist = groupDict[sample]
        #fqlist = cmn.cmd2lines('ls /project/biophysics/Nick_lab/wli/sequencing/Eudamine/BEAST_timing/tmp_link_fastq/%s*.fastq' % sample)
        #fqlist = cmn.cmd2lines('ls /work/biophysics/wli/workspace/filtered_6313*q')
        wdir = 'mitoD_%s' % sample
        cmn.mkdir(wdir)
        os.chdir(wdir)
        cwd = os.getcwd()
        info = template.replace('[cwd]', cwd)
        info = info.replace('[fq_files]', ' '.join(fqlist))
        info = info.replace('[sample]', sample)

        #prepare quake infiles
        fqlist_local = []
        for fq in fqlist:
            cmn.run('ln -s ' + fq)
            fqlist_local.append(cmn.lastName(fq))
        cmn.write_lines(fqlist_local, 'fqlist')
        cmn.run('ln -s fqlist infiles')

        #make fq2fa comand
        quake_fqlist = [each.replace('.fastq', '.cor.fastq') for each in fqlist_local]
        fq2fa_cmds = ['rm %s.fa 2> /dev/null' % sample]
        for fq in quake_fqlist:
            cmd = 'fq2fa %s >> %s.fa;' % (fq, sample)
            fq2fa_cmds.append(cmd)

        cmn.write_lines(quake_fqlist, 'fqlist.cor')
        cmd = '\n'.join(fq2fa_cmds)
        info = info.replace('[fq2fa_commands]', cmd)