Beispiel #1
0
import os
import sys
import csv
from DigitalFingerprint.DF import DFReader, DFWriter

DF_file = 'DS19175toDS19354.composite.subsample500000.DF'
name_dict = dict(
    (r['Sample'], r['Name'])
    for r in csv.DictReader(open('sample.annotation.txt'), delimiter='\t'))
note_dict = dict(
    (r['Sample'], r['Notes'])
    for r in csv.DictReader(open('sample.annotation.txt'), delimiter='\t'))

pyrooverlap = [
    line.strip() for line in open('sample.overlapWithPyroNames.txt')
]

df_to_use = []
for df in DFReader(open(DF_file)):
    name = name_dict[df.name]
    if note_dict[df.name] in pyrooverlap:
        df.name = note_dict[df.name] + '-' + name
        df_to_use.append(df)

w = DFWriter(sys.stdout)
w.writes(df_to_use)
import os, sys, glob
from DigitalFingerprint.DF import DFWriter
from DigitalFingerprint.Pyro import Pyro

if __name__ == "__main__":
	from optparse import OptionParser

	parser = OptionParser()
	parser.add_option("-p", "--pattern", dest="pattern", help="pattern for list of input files")
	parser.add_option("-o", "--output", dest="output", help="output filename (suffix should be .DF)")
	parser.add_option("--dup-ok", dest="rename_dups", action="store_true", default=False, help="ok to have duplicate IDs (renames them)")

	options, args = parser.parse_args()
	pattern, output = options.pattern, options.output

	f = open(output, 'w')
	w = DFWriter(f)
	for file in glob.iglob(pattern):
		print >> sys.stderr, "processing", file
		name = os.path.basename(file)
		pyro = Pyro(name, file, None, options.rename_dups)
		df = pyro.make_DF()
		w.write(df)
	f.close()

options, args = parser.parse_args()

pattern = options.pattern
output_df_filename = options.output

if pattern is None or output_df_filename is None:
	print >> sys.stderr, "Must provide input file name or pattern AND output filename!"
	sys.exit(-1)

print >> sys.stderr, "Reading RefMap....this may take a while"
if options.ref_gap_map is None:
	refmap = Read.RefMap('../data/SILVA104.fece_augmented.fasta.gap_map.bz2', aln_length=50000)
else:
	refmap = Read.RefMap(options.ref_gap_map, options.ref_aln_len)

f = open(output_df_filename, 'w')
dfwriter = DFWriter(f)

for file in glob.iglob(pattern):
	# file name ex: O2.UC-1_090112.fq.bowtied
	name = os.path.basename(file)
	print >> sys.stderr, "reading {0} for DF writing....".format(file)
	readdict = Read.ReadsDict(refmap)
	readdict.read_bowtie_output(file)
	readdf = Read.ReadDF(name, refmap)
	for read in readdict:
		readdf.add_read_to_vec(read)
	dfwriter.write(readdf)

f.close()
import os
import sys
import csv
from DigitalFingerprint.DF import DFReader, DFWriter

DF_file = 'DS19175toDS19354.composite.subsample500000.DF'
name_dict = dict((r['Sample'],r['Name']) for r in csv.DictReader(open('sample.annotation.txt'), delimiter='\t'))
note_dict = dict((r['Sample'],r['Notes']) for r in csv.DictReader(open('sample.annotation.txt'), delimiter='\t'))

pyrooverlap = [line.strip() for line in open('sample.overlapWithPyroNames.txt')]

df_to_use = []
for df in DFReader(open(DF_file)):
	name = name_dict[df.name]
	if note_dict[df.name] in pyrooverlap:
		df.name = note_dict[df.name] + '-' + name
		df_to_use.append(df)

w = DFWriter(sys.stdout)
w.writes(df_to_use)