import os import sys import csv from DigitalFingerprint.DF import DFReader, DFWriter DF_file = 'DS19175toDS19354.composite.subsample500000.DF' name_dict = dict( (r['Sample'], r['Name']) for r in csv.DictReader(open('sample.annotation.txt'), delimiter='\t')) note_dict = dict( (r['Sample'], r['Notes']) for r in csv.DictReader(open('sample.annotation.txt'), delimiter='\t')) pyrooverlap = [ line.strip() for line in open('sample.overlapWithPyroNames.txt') ] df_to_use = [] for df in DFReader(open(DF_file)): name = name_dict[df.name] if note_dict[df.name] in pyrooverlap: df.name = note_dict[df.name] + '-' + name df_to_use.append(df) w = DFWriter(sys.stdout) w.writes(df_to_use)
import os, sys, glob from DigitalFingerprint.DF import DFWriter from DigitalFingerprint.Pyro import Pyro if __name__ == "__main__": from optparse import OptionParser parser = OptionParser() parser.add_option("-p", "--pattern", dest="pattern", help="pattern for list of input files") parser.add_option("-o", "--output", dest="output", help="output filename (suffix should be .DF)") parser.add_option("--dup-ok", dest="rename_dups", action="store_true", default=False, help="ok to have duplicate IDs (renames them)") options, args = parser.parse_args() pattern, output = options.pattern, options.output f = open(output, 'w') w = DFWriter(f) for file in glob.iglob(pattern): print >> sys.stderr, "processing", file name = os.path.basename(file) pyro = Pyro(name, file, None, options.rename_dups) df = pyro.make_DF() w.write(df) f.close()
options, args = parser.parse_args() pattern = options.pattern output_df_filename = options.output if pattern is None or output_df_filename is None: print >> sys.stderr, "Must provide input file name or pattern AND output filename!" sys.exit(-1) print >> sys.stderr, "Reading RefMap....this may take a while" if options.ref_gap_map is None: refmap = Read.RefMap('../data/SILVA104.fece_augmented.fasta.gap_map.bz2', aln_length=50000) else: refmap = Read.RefMap(options.ref_gap_map, options.ref_aln_len) f = open(output_df_filename, 'w') dfwriter = DFWriter(f) for file in glob.iglob(pattern): # file name ex: O2.UC-1_090112.fq.bowtied name = os.path.basename(file) print >> sys.stderr, "reading {0} for DF writing....".format(file) readdict = Read.ReadsDict(refmap) readdict.read_bowtie_output(file) readdf = Read.ReadDF(name, refmap) for read in readdict: readdf.add_read_to_vec(read) dfwriter.write(readdf) f.close()
import os import sys import csv from DigitalFingerprint.DF import DFReader, DFWriter DF_file = 'DS19175toDS19354.composite.subsample500000.DF' name_dict = dict((r['Sample'],r['Name']) for r in csv.DictReader(open('sample.annotation.txt'), delimiter='\t')) note_dict = dict((r['Sample'],r['Notes']) for r in csv.DictReader(open('sample.annotation.txt'), delimiter='\t')) pyrooverlap = [line.strip() for line in open('sample.overlapWithPyroNames.txt')] df_to_use = [] for df in DFReader(open(DF_file)): name = name_dict[df.name] if note_dict[df.name] in pyrooverlap: df.name = note_dict[df.name] + '-' + name df_to_use.append(df) w = DFWriter(sys.stdout) w.writes(df_to_use)