Exemple #1
0
def getAlleleCount(bamfile, snpfile, outfile):
	brcparams   = Box()
	brcparams.f = ref
	brcparams.w = 0
	brcparams.l = snpfile

	brcparams[''] = bamfile
	cmd = '{bamrc} {args} > {outfile!r}'.format(
		bamrc = bamrc, args = cmdargs(brcparams, equal = ' '), outfile = outfile + '.tmp')
	runcmd(cmd)

	# reformated output to desired format
	reader = TsvReader(outfile + '.tmp', cnames = False)
	snper  = TsvReader(snpfile, cnames = False)
	#chr1	564773	C	14	=:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00	A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00	C:14:...	G:0:...	T:0:...	N:0:...
	writer = TsvWriter(outfile)
	writer.cnames = ['Chrm', 'pos', 'A', 'C', 'G', 'T', 'Total', 'refCount', 'mutCount']

	for r in reader:
		while True:
			try:
				snp   = next(snper)
			except StopIteration:
				break
			# use the end position, in case it's 0-based
			if snp[0] == r[0] and snp[2] == r[1]:
				counts = dict(
					A = r[5].split(':', 2)[1],
					C = r[6].split(':', 2)[1],
					G = r[7].split(':', 2)[1],
					T = r[8].split(':', 2)[1]
				)
				rec    = TsvRecord()
				rec.Chrm  = r[0]
				rec.pos   = r[1]
				rec.Total = r[3]
				rec.A = counts['A']
				rec.C = counts['C']
				rec.G = counts['G']
				rec.T = counts['T']
				# if reference allele is unknown, assuming all are ref alleles
				rec.refCount = counts.get(snp[6].upper(), r[3])
				# if mut allele is unknown, assuming no mutations happened
				rec.mutCount = counts.get(snp[7].upper(), 0)
				writer.write(rec)
				# go to next snp
				break
			else:
				# go to next r
				continue
	writer.close()
Exemple #2
0
def main(opts):
    """Main function"""
    org_tfgenes = read_tfgenes(opts.origin)
    add_tfgenes = read_tfgenes(opts.addition)
    writer = TsvWriter(opts.outfile)
    logger.info('Writing the union set to %s ...', opts.outfile)
    for gene, tfs in org_tfgenes.items():
        for tf in (tfs | add_tfgenes.pop(gene, set())):
            writer.write([tf, gene])
    for gene, tfs in add_tfgenes.items():
        for tf in tfs:
            writer.write([tf, gene])
    writer.close()
    logger.info('Done.')
Exemple #3
0
	S1	S2	..	Sn
G1	...
G2	...
"""
expreader  = TsvReader(expfile)
expdata    = [r for r in expreader if r[0] in genes or r[0] in tfs]
expreader.close()
datawriter = TsvWriter(outdata)
for i, cname in enumerate(expreader.cnames):
	if i == 0:
		# genes + tfs
		datawriter.cnames = [r[0] for r in expdata]
		datawriter.writeHead()
	else:
		datawriter.write([cname] + [r[i] for r in expdata])
datawriter.close()
del expdata
genes = [g for g in genes if g in datawriter.cnames]
tfs   = [g for g in tfs if g in datawriter.cnames]

genetfs = {g: [tf for tf in gtfs if tf in tfs] for g, gtfs in genetfs.items() if g in genes}

# save the group file
# mutfile
"""
	S1	S2	..	Sn
M1	... (0/1/2/NA)
M2	...
"""
mutreader = TsvReader(mutfile)
mutdata   = [r for r in mutreader if r[0] in mutgenes]
Exemple #4
0
reader = TsvReader(infile, cnames=False)
allsnps = set(reader.dump(0))
reader.rewind()
allgenes = set(reader.dump(1))
reader.close()

# assign a probability to each snp
nsnps = len(allsnps)
ngenes = len(allgenes)
snp_probs = dict(zip(allsnps, random.choices(range(ngenes * snppergene),
                                             k=nsnps)))

genebed = TsvWriter(genefile)
snpbed = TsvWriter(snpfile)

geneperchr = math.ceil(float(ngenes) / float(nchr))
for i, gene in enumerate(allgenes):
    chrname = 'chr' + str(int(i % nchr) + 1)
    start = (int(i / nchr) + 1) * dist
    end = start + 1
    first_snp_pos = int(start - dist/2.0 - snppergene)
    snps = (snp for snp in snp_probs
            if i * snppergene <= snp_probs[snp] < (i+1)*snppergene)
    genebed.write([chrname, start, end, gene, 0, '+'])
    for j, snp in enumerate(snps):
        snppos = first_snp_pos + j
        snpbed.write([chrname, snppos, snppos, snp, 0, '+'])

genebed.close()
snpbed.close()
Exemple #5
0
    for s in samples:
        tfamWriter.write([s, s, '0', '0', 'other', '-9'])
else:
    for s in samples:
        tfamWriter.write([
            metadata[s].FID if s in metadata and 'FID' in metadata[s] else s,
            s, (metadata[s].PID or '0')
            if s in metadata and 'PID' in metadata[s] else '0',
            (metadata[s].MID or '0')
            if s in metadata and 'MID' in metadata[s] else '0',
            (metadata[s].Sex or 'other')
            if s in metadata and 'Sex' in metadata[s] else 'other',
            (metadata[s].Pheno or '-9')
            if s in metadata and 'Pheno' in metadata[s] else '-9'
        ])
tfamWriter.close()


def getCompondGT(gt, ref, alt):
    compGTs = {
        "0": ref + ' ' + ref,
        "1": ref + ' ' + alt,
        "2": alt + ' ' + alt
    }
    return compGTs.get(gt, '0 0')


logger.info('Writing tped file ...')
tpedWriter = TsvWriter(tpedfile)
for r in inreader:
    (chrom, pos, _, ref, alt) = r[0].split('_')
Exemple #6
0
indata1 = TsvReader(infile1, **inopts1)
indata2 = TsvReader(infile2, **inopts2)

cnames1 = indata1.meta if not rnames1 else indata1.meta[1:]
cnames2 = indata2.meta if not rnames2 else indata2.meta[1:]
paired  = list(set(cnames1) & set(cnames2))
cnames1 = cnames2 = paired

if rnames1:
	cnames1 = [indata1.meta[0]] + cnames1
if rnames2:
	cnames2 = [indata2.meta[0]] + cnames2

cindex1 = [indata1.meta.index(c) for c in cnames1]
cindex2 = [indata2.meta.index(c) for c in cnames2]

outdata1 = TsvWriter(outfile1)
outdata2 = TsvWriter(outfile2)
outdata1.meta = cnames1
outdata2.meta = cnames2
outdata1.writeHead()
outdata2.writeHead()

for r1 in indata1:
	outdata1.write(r1[i] for i in cindex1)
outdata1.close()

for r2 in indata2:
	outdata2.write(r2[i] for i in cindex2)
outdata2.close()
Exemple #7
0
from bioprocs.utils.tsvio2 import TsvReader, TsvWriter

infile = {{i.infile | quote}}
outfile = {{o.outfile | quote}}
inopts = {{args.inopts | repr}}
infmt = {{args.infmt | quote}}
cutoff = {{args.cutoff | repr}}

degrees = defaultdict(lambda: 0)
if infmt.startswith('pair'):
    reader = TsvReader(infile, **inopts)
    for r in reader:
        if cutoff:
            try:
                score = float(r[2])
            except TypeError:
                raise TypeError(
                    'The 3rd column should be a score for apply the cutoff.')
            if score < cutoff:
                continue
        degrees[r[0]] += 1
        degrees[r[1]] += 1
    writer = TsvWriter(outfile)
    for node in sorted(degrees.keys(), key=lambda x: degrees[x], reverse=True):
        if infmt.endswith('complete'):
            writer.write([node, int(int(degrees[node]) / 2)])
        else:
            writer.write([node, degrees[node]])
    writer.close()
else:
    raise ValueError('Input format other than "pair" not supported yet.')