def test_get_ids(self): """get_ids should identify which ids are in which library""" lines = """>S74_1 E86FECS01CEVAV orig_bc=ACATGTCACGTG new_bc=ACATGTCACGTG bc_diffs=0 CTCCTC >Unassigned_2 E86FECS01EKKMF orig_bc=AGCGCTGATGTA new_bc=None bc_diffs=1 GGTGCCTCCCTCGC >S80_3 E86FECS01EKKMF orig_bc=AGCGCTGATGTA new_bc=None bc_diffs=1 GGTGCCTCCCTCGC >S80_4 E86FECS01CW66X orig_bc=AGTCCATAGCTG new_bc=AGTCCATAGCTG bc_diffs=0 GTCCTGGCAG""".splitlines() result = get_ids(lines, 1) self.assertEqual(dict(result), {'S74':['E86FECS01CEVAV'],'Unassigned':['E86FECS01EKKMF'],'S80':['E86FECS01EKKMF','E86FECS01CW66X']})
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) options, args = option_parser.parse_args() if options.debug: print "PRODUCING DEBUG OUTPUT" bad_seq_ids = set() bad_otu_ids = None # if we got a file to screen against, find the relevant ids and delete them if options.screened_rep_seqs: bad_otu_ids = get_first_id(open(options.screened_rep_seqs, 'U')) if not options.otus: raise RuntimeError( "Must specify an OTU file if performing a screen.") for line in open(options.otus, 'U'): fields = line.split() if fields[0] in bad_otu_ids: bad_seq_ids.update(fields[1:]) if options.debug: if bad_otu_ids is not None: print "Found %s bad otu ids: %s" % (len(bad_otu_ids), bad_otu_ids) print "Found %s bad seq ids: %s" % (len(bad_seq_ids), bad_seq_ids) ids = get_ids(open(options.in_fasta, 'U'), options.field, bad_seq_ids, options.debug) # add empty unassigned ids for file creation if 'Unassigned' not in ids: ids['Unassigned'] = [] if not exists(options.outdir): makedirs(options.outdir) for k, idlist in ids.items(): outfile = open(join(options.outdir, k + '.txt'), 'w') outfile.write('\n'.join(sorted(idlist))) outfile.close()