Beispiel #1
0
    fasta_recs = []
    for peak_fn in peak_fns :
        # if --peak-format is auto, figure format out from extension
        if opts.peak_format == 'auto' :
            fnbase, fnext = os.path.splitext(peak_fn)
            if fnext.lower() == '.bed' : # BED file
                peak_fmt = 'BED'
            elif fnext.lower() == '.xls' : # MACS file
                peak_fmt = 'MACS'
            else  :
                warnings.warn('Peak format specified as auto but file extension \
                               not recognized in file %s, skipping'%peak_fn)
                continue

        if peak_fmt == 'BED' :
            fasta_recs.extend(bed_to_fasta(peak_fn,nib_db,min_header=opts.min_header))
        elif peak_fmt == 'MACS' :
            fasta_recs.extend(macs_to_fasta(peak_fn,nib_db,min_header=opts.min_header))

    # write out foreground to file
    if opts.output :
        if opts.wrap_width == -1 :
            opts.wrap_width = sys.maxint
        write_fasta_to_file(dict(fasta_recs),opts.output,linelen=opts.wrap_width)
    else :
        for header, seq in fasta_recs :
            if opts.wrap_width != -1 :
                seq = textwrap.fill(seq,opts.wrap_width)
            sys.stdout.write('>%s\n%s\n'%(header,seq))
    # load up all the fasta records
    fasta_recs = {}
    for fasta_fn in fasta_fns :
        fasta = fasta_to_dict(fasta_fn)
        fasta_recs.update(fasta)

    # parse --num-seqs argument
    if opts.num_seqs.endswith('x') :
        num_seq_factor = float(opts.num_seqs[:-1])
        num_seqs = int(len(fasta_recs)*num_seq_factor)
    else :
        try :
            num_seqs = int(opts.num_seqs)
        except TypeError :
            parser.error("Incorrect format of --num-seqs argument, must either be an integer or a factor ending with x, e.g. 2.5x")

    # generate the sequences
    gen_seqs = rejection_sample_bg(fasta_recs,organism,num_samples=num_seqs,verbose=opts.verbose)

    # write out to file
    if opts.output :
        write_fasta_to_file(gen_seqs,opts.output)
    else :
        sys.stdout.write(''.join(['>%s\n%s\n'%(k,v) for k,v in gen_seqs.items()]))

    if opts.bed :
        bed_f = open(opts.bed_output,'w')
        bed_f.write(''.join([k.replace(':','\t').replace('-','\t')+'\n' for k in gen_seqs.keys()]))
        bed_f.close()