コード例 #1
def main(args, options):
    """ Run the main functionality of the module (see module docstring for more information), excluding testing.
    The options argument should be generated by an optparse parser.
        infile, outfile_base = args
    except ValueError:
        sys.exit("\nError: one infile and one outfile base name are required! Got %s"%args)

    indexes = options.index_list.split(',')
    index_OUTFILES = {index: open("%s_%s.fq"%(outfile_base, index), 'w') for index in indexes}
    unmatched_OUTFILE = open("%s_unmatched.fq"%outfile_base, 'w')
    index_counts = {index: 0 for index in indexes}
    unmatched_count = 0
    if options.index_in_sequence:
        index_len = set(len(x) for x in indexes)
        if len(index_len) > 1:
            raise Exception("Indexes need to all have the same lengths!  Found lengths %s from indexes %s"%(index_len, indexes))
        index_len = index_len.pop()
        get_index = lambda name,seq:  seq[:index_len]
        make_output = lambda name, seq, qual, index: ("%s:%s"%(name,index), seq[index_len:], qual[index_len:])
        get_index = lambda name, seq:  name.split(':')[-1]
        make_output = lambda name, seq, qual, index: (name, seq, qual)
    for (name,seq,qual) in basic_seq_utilities.parse_fastq(infile):
        index = get_index(name, seq) 
            OUTFILE = index_OUTFILES[index]
            index_counts[index] += 1
        except KeyError:
            OUTFILE = unmatched_OUTFILE
            unmatched_count += 1
        basic_seq_utilities.write_fastq_line(*make_output(name, seq, qual, index), OUTFILE=OUTFILE)
    if not options.quiet:
        total = unmatched_count + sum(index_counts.values())
        print "%s reads:\n%s unmatched\n%s"%(total, value_and_percentages(unmatched_count, [total]), 
                 '\n'.join("%s %s"%(value_and_percentages(count, [total]), index) for (index,count) in index_counts.items()) )
コード例 #2
(options, args) = parser.parse_args()
    [infile,outfile] = args
except ValueError:
    sys.exit("\nError: exactly one input and output file required!")

max_values_kept = 10

if options.remove_first_header_field:   first_field_values = set()
if options.remove_last_header_field:    last_field_values = set()

# go over all sequences, remove bases as required, print all the sequences
with open(outfile,'w') as OUTFILE:
    for (header,sequence,quality) in parse_fastq(infile):
        if options.remove_first_header_field:
            first_field,header = header.split(':',1)
            if len(first_field_values)<max_values_kept:
        if options.remove_last_header_field:
            header,last_field = header.rsplit('#',1)
            if len(last_field_values)<max_values_kept:
        OUTFILE.write( "@%s\n"%header)
        OUTFILE.write(sequence + '\n')
        OUTFILE.write( "+\n")
        OUTFILE.write(quality + '\n')

def print_info_line(field_name, field_values):
    global max_values_kept