def fastq_to_dict(fastq): """docstring for fastq_to_dict""" d = {} with nopen(fastq) as fh: for name, seq, qual in read_fastx(fh): d[name] = {'seq':seq,'qual':qual} return d
def main(args): for fastq in args.fastq: print fastq meta = Counter() with nopen(fastq) as fh: for name, seq, qual in read_fastx(fh): name, cregion, fwork = name.split()[0].split(":") meta.update(["%s:%s" % (cregion, fwork)]) for combination, count in meta.iteritems(): print "%s\t%d" % (combination, count)
def fq_to_set(fq): fq_l = [] fq_d = {} with nopen(fq) as fh: for name, seq, qual in read_fastx(fh): # split name on whitespace, leaving off "1" or "2" read_id = name.split()[0] fq_l.append(read_id) # a million reads ~ 1 GB fq_d[read_id] = "@%s\n%s\n+\n%s\n" % (name, seq, qual) return set(fq_l), fq_d