fasta_recs = [] for peak_fn in peak_fns : # if --peak-format is auto, figure format out from extension if opts.peak_format == 'auto' : fnbase, fnext = os.path.splitext(peak_fn) if fnext.lower() == '.bed' : # BED file peak_fmt = 'BED' elif fnext.lower() == '.xls' : # MACS file peak_fmt = 'MACS' else : warnings.warn('Peak format specified as auto but file extension \ not recognized in file %s, skipping'%peak_fn) continue if peak_fmt == 'BED' : fasta_recs.extend(bed_to_fasta(peak_fn,nib_db,min_header=opts.min_header)) elif peak_fmt == 'MACS' : fasta_recs.extend(macs_to_fasta(peak_fn,nib_db,min_header=opts.min_header)) # write out foreground to file if opts.output : if opts.wrap_width == -1 : opts.wrap_width = sys.maxint write_fasta_to_file(dict(fasta_recs),opts.output,linelen=opts.wrap_width) else : for header, seq in fasta_recs : if opts.wrap_width != -1 : seq = textwrap.fill(seq,opts.wrap_width) sys.stdout.write('>%s\n%s\n'%(header,seq))
# load up all the fasta records fasta_recs = {} for fasta_fn in fasta_fns : fasta = fasta_to_dict(fasta_fn) fasta_recs.update(fasta) # parse --num-seqs argument if opts.num_seqs.endswith('x') : num_seq_factor = float(opts.num_seqs[:-1]) num_seqs = int(len(fasta_recs)*num_seq_factor) else : try : num_seqs = int(opts.num_seqs) except TypeError : parser.error("Incorrect format of --num-seqs argument, must either be an integer or a factor ending with x, e.g. 2.5x") # generate the sequences gen_seqs = rejection_sample_bg(fasta_recs,organism,num_samples=num_seqs,verbose=opts.verbose) # write out to file if opts.output : write_fasta_to_file(gen_seqs,opts.output) else : sys.stdout.write(''.join(['>%s\n%s\n'%(k,v) for k,v in gen_seqs.items()])) if opts.bed : bed_f = open(opts.bed_output,'w') bed_f.write(''.join([k.replace(':','\t').replace('-','\t')+'\n' for k in gen_seqs.keys()])) bed_f.close()