def main(): options, args = doc_optparse.parse(__doc__) try: sources = args[0].translate(tree_tx).split() ref_2bit = bx.seq.twobit.TwoBitFile(open(args[1])) index = maf.MultiIndexed(args[2:]) out = maf.Writer(sys.stdout) missing_data = bool(options.missingData) use_strand = bool(options.strand) except: doc_optparse.exception() for line in sys.stdin: fields = line.split() ref_src, start, end = fields[0:3] if use_strand and len(fields) > 5: strand = fields[5] else: strand = '+' do_interval(sources, index, out, ref_src, int(start), int(end), ref_2bit, missing_data, strand) out.close()
def main(): options, args = doc_optparse.parse( __doc__ ) try: inp_file, out_file, sitetype, definition = args if options.mask: mask = int(options.mask) else: mask = 0 except: print >> sys.stderr, "Tool initialization error." sys.exit() reader = bx.align.maf.Reader( open(inp_file, 'r') ) writer = bx.align.maf.Writer( open(out_file,'w') ) mask_chr_dict = {0:'#', 1:'$', 2:'^', 3:'*', 4:'?', 5:'N'} mask = mask_chr_dict[mask] if sitetype == "CpG": if int(definition) == 1: cpgfilter = bx.align.sitemask.cpg.Restricted( mask=mask ) defn = "CpG-Restricted" else: cpgfilter = bx.align.sitemask.cpg.Inclusive( mask=mask ) defn = "CpG-Inclusive" else: cpgfilter = bx.align.sitemask.cpg.nonCpG( mask=mask ) defn = "non-CpG" cpgfilter.run( reader, writer.write ) print "%2.2f percent bases masked; Mask character = %s, Definition = %s" %(float(cpgfilter.masked)/float(cpgfilter.total) * 100, mask, defn)
def main(): # Parse command line options, args = doc_optparse.parse( __doc__ ) try: if options.comp: comp_type = options.comp else: comp_type = None score_fname = args[0] out_fname = args[1] except: doc_optparse.exit() scores = BinnedArray() ## last_chrom = None for i, ( chrom, pos, val ) in enumerate( bx.wiggle.Reader( misc.open_compressed( score_fname ) ) ): #if last_chrom is None: # last_chrom = chrom #else: # assert chrom == last_chrom, "This script expects a 'wiggle' input on only one chromosome" scores[pos] = val # Status if i % 10000 == 0: print i, "scores processed" out = open( out_fname, "w" ) if comp_type: scores.to_file( out, comp_type=comp_type ) else: scores.to_file( out ) out.close()
def __main__(): # Parse Command Line options, args = doc_optparse.parse(__doc__) # common temp file setup tmpf = tempfile.NamedTemporaryFile() # forward reads tmpqf = tempfile.NamedTemporaryFile() tmpqf = replaceNeg1(open(options.input2), tmpqf) # if paired-end data (have reverse input files) if options.input3 != "None" and options.input4 != "None": tmpr = tempfile.NamedTemporaryFile() # reverse reads # replace the -1 in the qualities file tmpqr = tempfile.NamedTemporaryFile() tmpqr = replaceNeg1(open(options.input4, 'r'), tmpqr) cmd1 = "%s/bwa_solid2fastq_modified.pl 'yes' %s %s %s %s %s %s 2>&1" % (os.path.split(sys.argv[0])[0], tmpf.name, tmpr.name, options.input1, tmpqf.name, options.input3, tmpqr.name) try: os.system(cmd1) os.system('gunzip -c %s >> %s' % (tmpf.name, options.output1)) os.system('gunzip -c %s >> %s' % (tmpr.name, options.output2)) except Exception as eq: stop_err("Error converting data to fastq format.\n" + str(eq)) tmpr.close() tmpqr.close() # if single-end data else: cmd1 = "%s/bwa_solid2fastq_modified.pl 'no' %s %s %s %s %s %s 2>&1" % (os.path.split(sys.argv[0])[0], tmpf.name, None, options.input1, tmpqf.name, None, None) try: os.system(cmd1) os.system('gunzip -c %s >> %s' % (tmpf.name, options.output1)) except Exception as eq: stop_err("Error converting data to fastq format.\n" + str(eq)) tmpqf.close() tmpf.close() sys.stdout.write('converted SOLiD data')
def __main__(): # Parse command line arguments options, args = doc_optparse.parse( __doc__ ) try: keep_header = bool( options.header ) keep_comments = bool( options.comments ) expr = args[0] colname = args[1] except: doc_optparse.exception() # Compile expression for SPEED if expr: expr = compile( expr, '<expr arg>', 'eval' ) for element in bx.tabular.io.Reader( sys.stdin ): if type( element ) is bx.tabular.io.Header: if keep_header: print str( element ) + "\t" + colname elif type( element ) is bx.tabular.io.Comment: if keep_comments: print element else: val = eval( expr, dict( row=element ) ) print str( element ) + "\t" + str( val )
def main(): options, args = doc_optparse.parse(__doc__) try: species = args # Allow a comma separated list, TODO: allow a newick format tree if len(species) == 1 and "," in species[0]: species = species[0].split(",") fuse = not (bool(options.nofuse)) except: doc_optparse.exit() maf_reader = bx.align.maf.Reader(sys.stdin) maf_writer = bx.align.maf.Writer(sys.stdout) if fuse: maf_writer = FusingAlignmentWriter(maf_writer) for m in maf_reader: new_components = get_components_for_species(m, species) if new_components: remove_all_gap_columns(new_components) m.components = new_components m.score = 0.0 maf_writer.write(m) maf_reader.close() maf_writer.close()
def main(): # Parsing Command Line here options, args = doc_optparse.parse(__doc__) try: inp_file, out_file, column, features = args except: stop_err( "One or more arguments is missing or invalid.\nUsage: prog input output column features" ) try: column = int(column) except: stop_err("Column %s is an invalid column." % column) if features is None: stop_err( "Column %d has no features to display, select another column." % (column + 1)) fo = open(out_file, 'w') for i, line in enumerate(open(inp_file)): line = line.rstrip('\r\n') if line and line.startswith('#'): # Keep valid comment lines in the output fo.write("%s\n" % line) else: try: if line.split('\t')[column] in features.split(','): fo.write("%s\n" % line) except: pass fo.close() print 'Column %d features: %s' % (column + 1, features)
def main(): mincols = 1 options, args = doc_optparse.parse( __doc__ ) try: chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 ) chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 ) if options.mincols: mincols = int( options.mincols ) pieces = bool( options.pieces ) in1_gff_format = bool( options.gff1 ) in2_gff_format = bool( options.gff2 ) in_fname, in2_fname, out_fname = args except: doc_optparse.exception() # Set readers to handle either GFF or default format. if in1_gff_format: in1_reader_wrapper = GFFReaderWrapper else: in1_reader_wrapper = NiceReaderWrapper if in2_gff_format: in2_reader_wrapper = GFFReaderWrapper else: in2_reader_wrapper = NiceReaderWrapper g1 = in1_reader_wrapper( fileinput.FileInput( in_fname ), chrom_col=chr_col_1, start_col=start_col_1, end_col=end_col_1, strand_col=strand_col_1, fix_strand=True ) if in1_gff_format: # Intersect requires coordinates in BED format. g1.convert_to_bed_coord = True g2 = in2_reader_wrapper( fileinput.FileInput( in2_fname ), chrom_col=chr_col_2, start_col=start_col_2, end_col=end_col_2, strand_col=strand_col_2, fix_strand=True ) if in2_gff_format: # Intersect requires coordinates in BED format. g2.convert_to_bed_coord = True out_file = open( out_fname, "w" ) try: for feature in intersect( [g1, g2], pieces=pieces, mincols=mincols ): if isinstance( feature, GFFFeature ): # Convert back to GFF coordinates since reader converted automatically. convert_bed_coords_to_gff( feature ) for interval in feature.intervals: out_file.write( "%s\n" % "\t".join( interval.fields ) ) elif isinstance( feature, GenomicInterval ): out_file.write( "%s\n" % "\t".join( feature.fields ) ) else: out_file.write( "%s\n" % feature ) except ParseError, e: out_file.close() fail( "Invalid file format: %s" % str( e ) )
def main(): # Parsing Command Line here options, args = doc_optparse.parse( __doc__ ) try: inp_file, out_file, column, features = args except: stop_err( "One or more arguments is missing or invalid.\nUsage: prog input output column features" ) try: column = int( column ) except: stop_err( "Column %s is an invalid column." % column ) if features == None: stop_err( "Column %d has no features to display, select another column." %( column + 1 ) ) fo=open( out_file, 'w' ) for i, line in enumerate( file( inp_file ) ): line = line.rstrip( '\r\n' ) if line and line.startswith( '#' ): # Keep valid comment lines in the output fo.write( "%s\n" % line ) else: try: if line.split( '\t' )[column] in features.split( ',' ): fo.write( "%s\n" % line ) except: pass fo.close() print 'Column %d features: %s' %( column + 1, features )
def main(): # Parse command line options, args = doc_optparse.parse(__doc__) try: score_file = open(args[0]) interval_file = open(args[1]) if len(args) > 2: out_file = open(args[2], "w") else: out_file = sys.stdout except: doc_optparse.exit() scores_by_chrom = read_scores(misc.open_compressed(sys.argv[1])) for line in open(sys.argv[2]): fields = line.split() chrom, start, stop = fields[0], int(fields[1]), int(fields[2]) if chrom in scores_by_chrom: ba = scores_by_chrom[chrom] scores = [ba[i] for i in range(start, stop)] else: scores = [] print >> out_file, " ".join(fields), " ".join(map(str, scores)) score_file.close() interval_file.close() out_file.close()
def main(): distance = 0 minregions = 2 output = 1 options, args = doc_optparse.parse(__doc__) try: chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1) if options.distance: distance = int(options.distance) if options.overlap: distance = -1 * int(options.overlap) if options.output: output = int(options.output) if options.minregions: minregions = int(options.minregions) in_fname, out_fname = args except: doc_optparse.exception() g1 = NiceReaderWrapper(fileinput.FileInput(in_fname), chrom_col=chr_col_1, start_col=start_col_1, end_col=end_col_1, strand_col=strand_col_1, fix_strand=True) # Get the cluster tree try: clusters, extra = find_clusters(g1, mincols=distance, minregions=minregions) except ParseError, exc: fail("Invalid file format: %s" % str(exc))
def main(): options, args = doc_optparse.parse(__doc__) try: chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1) chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2) in_fname, in2_fname, out_fname = args except: doc_optparse.exception() g1 = NiceReaderWrapper(fileinput.FileInput(in_fname), chrom_col=chr_col_1, start_col=start_col_1, end_col=end_col_1, strand_col=strand_col_1, fix_strand=True) g2 = NiceReaderWrapper(fileinput.FileInput(in2_fname), chrom_col=chr_col_2, start_col=start_col_2, end_col=end_col_2, strand_col=strand_col_2, fix_strand=True) out_file = open(out_fname, "w") try: for line in coverage([g1, g2]): if type(line) is GenomicInterval: out_file.write("%s\n" % "\t".join(line.fields)) else: out_file.write("%s\n" % line) except ParseError, exc: out_file.close() fail("Invalid file format: %s" % str(exc))
def main(): options, args = doc_optparse.parse( __doc__ ) try: species = args[0].split(',') nrequired = int( args[1] ) except: doc_optparse.exit() maf_reader = bx.align.maf.Reader( sys.stdin ) interval_start = None interval_end = None for m in maf_reader: ref = m.components[0] # Does this alignment have enough of the required species if nrequired <= len( [ comp for comp in m.components if comp.src.split('.')[0] in species ] ): if interval_start is None: interval_start = ref.start interval_end = ref.end else: if ref.start - interval_end < SPAN: interval_end = ref.end else: if interval_end - interval_start >= MIN: print(ref.src.split('.')[1], interval_start, interval_end) interval_start = ref.start interval_end = ref.end else: if interval_start != None and interval_end - interval_start >= MIN: print(ref.src.split('.')[1], interval_start, interval_end) interval_start = None interval_end = None
def main(): distance = 0 minregions = 2 output = 1 options, args = doc_optparse.parse( __doc__ ) try: chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 ) if options.distance: distance = int( options.distance ) if options.overlap: distance = -1 * int( options.overlap ) if options.output: output = int( options.output ) if options.minregions: minregions = int( options.minregions ) in_fname, out_fname = args except: doc_optparse.exception() g1 = NiceReaderWrapper( fileinput.FileInput( in_fname ), chrom_col=chr_col_1, start_col=start_col_1, end_col=end_col_1, strand_col=strand_col_1, fix_strand=True ) # Get the cluster tree try: clusters, extra = find_clusters( g1, mincols=distance, minregions=minregions) except ParseError, exc: fail( "Invalid file format: %s" % str( exc ) )
def __main__(): # # Parse options, args. # options, args = doc_optparse.parse( __doc__ ) try: if len(options.cols.split(',')) == 5: # BED file chrom_col, start_col, end_col, strand_col, name_col = parse_cols_arg( options.cols ) else: # gff file chrom_col, start_col, end_col, strand_col = parse_cols_arg( options.cols ) name_col = False dbkey = options.dbkey output_format = options.output_format gff_format = options.gff interpret_features = options.interpret_features GALAXY_DATA_INDEX_DIR = options.GALAXY_DATA_INDEX_DIR fasta_file = options.fasta input_filename, output_filename = args except: doc_optparse.exception() includes_strand_col = strand_col >= 0 strand = None nibs = {} # # Set path to sequence data. # if fasta_file: # Need to create 2bit file from fasta file. try: seq_path = tempfile.NamedTemporaryFile( dir="." ).name cmd = "faToTwoBit %s %s" % ( fasta_file, seq_path ) tmp_name = tempfile.NamedTemporaryFile( dir="." ).name tmp_stderr = open( tmp_name, 'wb' ) proc = subprocess.Popen( args=cmd, shell=True, stderr=tmp_stderr.fileno() ) returncode = proc.wait() tmp_stderr.close() # Get stderr, allowing for case where it's very large. tmp_stderr = open( tmp_name, 'rb' ) stderr = '' buffsize = 1048576 try: while True: stderr += tmp_stderr.read( buffsize ) if not stderr or len( stderr ) % buffsize != 0: break except OverflowError: pass tmp_stderr.close() # Error checking. if returncode != 0: raise Exception(stderr) except Exception, e: stop_err( 'Error running faToTwoBit. ' + str( e ) )
def main(): # Parse command line arguments options, args = doc_optparse.parse( __doc__ ) try: lnorm = bool( options.lnorm ) recalculate = bool( options.recalculate ) except: doc_optparse.exit() hox70 = score.build_scoring_scheme( """ A C G T 91 -114 -31 -123 -114 100 -125 -31 -31 -125 100 -114 -123 -31 -114 91 """, 400, 30, default=0 ) maf_reader = maf.Reader( sys.stdin ) for m in maf_reader: if m.text_size == 0: print "NA" continue s = m.score # Recalculate? if recalculate: s = hox70.score_alignment( m ) # Normalize? if lnorm: s = s / m.text_size # Print print s
def main(): # Parse command line options, args = doc_optparse.parse(__doc__) try: data_fname, model_fname, out_fname = args window = int(getopt(options, "window", 100)) shift = int(getopt(options, "shift", 5)) low = float(getopt(options, "low", -1.0)) high = float(getopt(options, "high", 1.0)) if options.mapping: align_count, mapping = rp.mapping.alignment_mapping_from_file(file(options.mapping)) else: mapping = None modname = getattr(options, "model") if modname is None: modname = "standard" reorder = getopt(options, "reorder", None) if reorder: reorder = map(int, reorder.split(",")) except: doc_optparse.exception() out = open(out_fname, "w") run(open(data_fname), modname, open(model_fname), out, mapping, window, shift, low, high, reorder) out.close()
def main(): # Parsing Command Line here options, args = doc_optparse.parse(__doc__) try: chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg(options.cols) inp_file, winsize, out_file, makesliding, offset = args winsize = int(winsize) offset = int(offset) makesliding = int(makesliding) except: stop_err( "Data issue, click the pencil icon in the history item to correct the metadata attributes of the input dataset." ) fo = open(out_file, "w") skipped_lines = 0 first_invalid_line = 0 invalid_line = None if offset == 0: makesliding = 0 for i, line in enumerate(file(inp_file)): line = line.strip() if line and line[0:1] != "#": try: elems = line.split("\t") start = int(elems[start_col_1]) end = int(elems[end_col_1]) if makesliding == 0: numwin = (end - start) / winsize else: numwin = (end - start) / offset if numwin > 0: for win in range(numwin): elems_1 = elems elems_1[start_col_1] = str(start) elems_1[end_col_1] = str(start + winsize) fo.write("%s\n" % "\t".join(elems_1)) if makesliding == 0: start = start + winsize else: start = start + offset if start + winsize > end: break except: skipped_lines += 1 if not invalid_line: first_invalid_line = i + 1 invalid_line = line fo.close() if makesliding == 1: print "Window size=%d, Sliding=Yes, Offset=%d" % (winsize, offset) else: print "Window size=%d, Sliding=No" % (winsize) if skipped_lines > 0: print 'Skipped %d invalid lines starting with #%d: "%s"' % (skipped_lines, first_invalid_line, invalid_line)
def main(): # Parse command line options, args = doc_optparse.parse(__doc__) try: score_file = open(args[0]) interval_file = open(args[1]) if len(args) > 2: out_file = open(args[2], 'w') else: out_file = sys.stdout except: doc_optparse.exit() scores_by_chrom = read_scores(misc.open_compressed(sys.argv[1])) for line in open(sys.argv[2]): fields = line.split() chrom, start, stop = fields[0], int(fields[1]), int(fields[2]) if chrom in scores_by_chrom: ba = scores_by_chrom[chrom] scores = [ba[i] for i in range(start, stop)] else: scores = [] print >> out_file, " ".join(fields), " ".join(map(str, scores)) score_file.close() interval_file.close() out_file.close()
def main(): options, args = doc_optparse.parse( __doc__ ) try: chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 ) chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 ) in_fname, in2_fname, out_fname = args except: doc_optparse.exception() g1 = NiceReaderWrapper( fileinput.FileInput( in_fname ), chrom_col=chr_col_1, start_col=start_col_1, end_col=end_col_1, strand_col=strand_col_1, fix_strand=True ) g2 = NiceReaderWrapper( fileinput.FileInput( in2_fname ), chrom_col=chr_col_2, start_col=start_col_2, end_col=end_col_2, strand_col=strand_col_2, fix_strand=True ) out_file = open( out_fname, "w" ) try: for line in coverage( [g1, g2] ): if type( line ) is GenomicInterval: out_file.write( "%s\n" % "\t".join( line.fields ) ) else: out_file.write( "%s\n" % line ) except ParseError, exc: out_file.close() fail( "Invalid file format: %s" % str( exc ) )
def __main__(): #Parse Command Line options, args = doc_optparse.parse( __doc__ ) seq_path = check_seq_file( options.dbkey, options.indexDir ) tmp_dir = tempfile.gettempdir() os.chdir(tmp_dir) tmpf0 = tempfile.NamedTemporaryFile(dir=tmp_dir) tmpf0bam = '%s.bam' % tmpf0.name tmpf0bambai = '%s.bam.bai' % tmpf0.name tmpf1 = tempfile.NamedTemporaryFile(dir=tmp_dir) tmpf1fai = '%s.fai' % tmpf1.name opts = '%s %s -M %s' % (('','-s')[options.lastCol=='yes'], ('','-i')[options.indels=='yes'], options.mapCap) if options.consensus == 'yes': opts += ' -c -T %s -N %s -r %s -I %s' % (options.theta, options.hapNum, options.fraction, options.phredProb) cmd1 = None cmd2 = 'cp %s %s; cp %s %s' % (options.input1, tmpf0bam, options.bamIndex, tmpf0bambai) cmd3 = 'samtools pileup %s -f %s %s > %s 2> /dev/null' if options.ref =='indexed': full_path = "%s.fai" % seq_path if not os.path.exists( full_path ): stop_err( "No sequences are available for '%s', request them by reporting this error." % options.dbkey ) cmd3 = cmd3 % (opts, seq_path, tmpf0bam, options.output1) elif options.ref == 'history': cmd1 = 'cp %s %s; samtools faidx %s' % (options.ownFile, tmpf1.name, tmpf1.name) cmd3 = cmd3 % (opts, tmpf1.name, tmpf0bam, options.output1) # index reference if necessary if cmd1: try: os.system(cmd1) if options.ref == 'history' and not os.path.exists( tmpf1fai ): stop_err( "Problem creating index file from history item." ) except Exception, eq: stop_err('Error handling reference sequence\n' + str(eq))
def __main__(): # Parse Command Line options, args = doc_optparse.parse(__doc__) # common temp file setup tmpf = tempfile.NamedTemporaryFile() # forward reads tmpqf = tempfile.NamedTemporaryFile() tmpqf = replaceNeg1(file(options.input2, "r"), tmpqf) # if paired-end data (have reverse input files) if options.input3 != "None" and options.input4 != "None": tmpr = tempfile.NamedTemporaryFile() # reverse reads # replace the -1 in the qualities file tmpqr = tempfile.NamedTemporaryFile() tmpqr = replaceNeg1(file(options.input4, "r"), tmpqr) cmd1 = "%s/bwa_solid2fastq_modified.pl 'yes' %s %s %s %s %s %s 2>&1" % ( os.path.split(sys.argv[0])[0], tmpf.name, tmpr.name, options.input1, tmpqf.name, options.input3, tmpqr.name, ) try: os.system(cmd1) os.system("gunzip -c %s >> %s" % (tmpf.name, options.output1)) os.system("gunzip -c %s >> %s" % (tmpr.name, options.output2)) except Exception, eq: stop_err("Error converting data to fastq format.\n" + str(eq)) tmpr.close() tmpqr.close()
def main(): options, args = doc_optparse.parse(__doc__) try: inp_file, out_file, sitetype, definition = args if options.mask: mask = int(options.mask) else: mask = 0 except: print >> sys.stderr, "Tool initialization error." sys.exit() reader = bx.align.maf.Reader(open(inp_file, 'r')) writer = bx.align.maf.Writer(open(out_file, 'w')) mask_chr_dict = {0: '#', 1: '$', 2: '^', 3: '*', 4: '?', 5: 'N'} mask = mask_chr_dict[mask] if sitetype == "CpG": if int(definition) == 1: cpgfilter = bx.align.sitemask.cpg.Restricted(mask=mask) defn = "CpG-Restricted" else: cpgfilter = bx.align.sitemask.cpg.Inclusive(mask=mask) defn = "CpG-Inclusive" else: cpgfilter = bx.align.sitemask.cpg.nonCpG(mask=mask) defn = "non-CpG" cpgfilter.run(reader, writer.write) print "%2.2f percent bases masked; Mask character = %s, Definition = %s" % ( float(cpgfilter.masked) / float(cpgfilter.total) * 100, mask, defn)
def __main__(): # Parse command line arguments options, args = doc_optparse.parse(__doc__) try: keep_header = bool(options.header) keep_comments = bool(options.comments) expr = args[0] colname = args[1] except: doc_optparse.exception() # Compile expression for SPEED if expr: expr = compile(expr, '<expr arg>', 'eval') for element in bx.tabular.io.Reader(sys.stdin): if type(element) is bx.tabular.io.Header: if keep_header: print str(element) + "\t" + colname elif type(element) is bx.tabular.io.Comment: if keep_comments: print element else: val = eval(expr, dict(row=element)) print str(element) + "\t" + str(val)
def main(): options, args = doc_optparse.parse( __doc__ ) try: chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 ) chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 ) in_fname, in2_fname, out_fname, direction = args except: doc_optparse.exception() g1 = NiceReaderWrapper( fileinput.FileInput( in_fname ), chrom_col=chr_col_1, start_col=start_col_1, end_col=end_col_1, strand_col=strand_col_1, fix_strand=True ) g2 = NiceReaderWrapper( fileinput.FileInput( in2_fname ), chrom_col=chr_col_2, start_col=start_col_2, end_col=end_col_2, strand_col=strand_col_2, fix_strand=True ) out_file = open( out_fname, "w" ) try: for line in proximal_region_finder([g1,g2], direction): if type( line ) is list: out_file.write( "%s\n" % "\t".join( line ) ) else: out_file.write( "%s\n" % line ) except ParseError, exc: fail( "Invalid file format: %s" % str( exc ) )
def main(): options, args = doc_optparse.parse(__doc__) try: species = args # Allow a comma separated list, TODO: allow a newick format tree if len(species) == 1 and ',' in species[0]: species = species[0].split(',') fuse = not (bool(options.nofuse)) except: doc_optparse.exit() maf_reader = bx.align.maf.Reader(sys.stdin) maf_writer = bx.align.maf.Writer(sys.stdout) if fuse: maf_writer = FusingAlignmentWriter(maf_writer) for m in maf_reader: new_components = get_components_for_species(m, species) if new_components: remove_all_gap_columns(new_components) m.components = new_components m.score = 0.0 maf_writer.write(m) maf_reader.close() maf_writer.close()
def main(): options, args = doc_optparse.parse(__doc__) try: chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1) chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2) in_fname, in2_fname, out_fname, direction = args except: doc_optparse.exception() g1 = NiceReaderWrapper(fileinput.FileInput(in_fname), chrom_col=chr_col_1, start_col=start_col_1, end_col=end_col_1, strand_col=strand_col_1, fix_strand=True) g2 = NiceReaderWrapper(fileinput.FileInput(in2_fname), chrom_col=chr_col_2, start_col=start_col_2, end_col=end_col_2, strand_col=strand_col_2, fix_strand=True) out_file = open(out_fname, "w") try: for line in proximal_region_finder([g1, g2], direction): if type(line) is list: out_file.write("%s\n" % "\t".join(line)) else: out_file.write("%s\n" % line) except ParseError, exc: fail("Invalid file format: %s" % str(exc))
def main(): # Parse command line arguments options, args = doc_optparse.parse(__doc__) try: lnorm = bool(options.lnorm) recalculate = bool(options.recalculate) except Exception: doc_optparse.exit() hox70 = score.build_scoring_scheme(""" A C G T 91 -114 -31 -123 -114 100 -125 -31 -31 -125 100 -114 -123 -31 -114 91 """, 400, 30, default=0) maf_reader = maf.Reader(sys.stdin) for m in maf_reader: if m.text_size == 0: print("NA") continue s = m.score # Recalculate? if recalculate: s = hox70.score_alignment(m) # Normalize? if lnorm: s = s / m.text_size # Print print(s)
def main(): # Parse command line options, args = doc_optparse.parse( __doc__ ) try: maf_file = args[0] # If it appears to be a bz2 file, attempt to open with table if maf_file.endswith( ".bz2" ): table_file = maf_file + "t" if not os.path.exists( table_file ): doc_optparse.exit( "To index bz2 compressed files first " "create a bz2t file with bzip-table." ) # Open with SeekableBzip2File so we have tell support maf_in = SeekableBzip2File( maf_file, table_file ) # Strip .bz2 from the filename before adding ".index" maf_file = maf_file[:-4] elif maf_file.endswith( ".lzo" ): from bx.misc.seeklzop import SeekableLzopFile table_file = maf_file + "t" if not os.path.exists( table_file ): doc_optparse.exit( "To index lzo compressed files first " "create a lzot file with lzop_build_offset_table." ) # Open with SeekableBzip2File so we have tell support maf_in = SeekableLzopFile( maf_file, table_file ) # Strip .lzo from the filename before adding ".index" maf_file = maf_file[:-4] else: maf_in = open( maf_file ) # Determine the name of the index file if len( args ) > 1: index_file = args[1] else: index_file = maf_file + ".index" if options.species: species = options.species.split( "," ) else: species = None except: doc_optparse.exception() maf_reader = bx.align.maf.Reader( maf_in ) indexes = interval_index_file.Indexes() # Need to be a bit tricky in our iteration here to get the 'tells' right while 1: pos = maf_reader.file.tell() block = maf_reader.next() if block is None: break for c in block.components: if species is not None and c.src.split('.')[0] not in species: continue indexes.add( c.src, c.forward_strand_start, c.forward_strand_end, pos, max=c.src_size ) out = open( index_file, 'w' ) indexes.write( out ) out.close()
def main(): # Parse command line options, args = doc_optparse.parse(__doc__) try: score_fname = args[0] interval_fname = args[1] if len(args) > 2: out_file = open(args[2], 'w') else: out_file = sys.stdout binned = bool(options.binned) mask_fname = options.mask except Exception: doc_optparse.exit() if binned: scores_by_chrom = load_scores_ba_dir(score_fname) else: scores_by_chrom = load_scores_wiggle(score_fname) if mask_fname: masks = binned_bitsets_from_file(open(mask_fname)) else: masks = None for line in open(interval_fname): fields = line.split() chrom, start, stop = fields[0], int(fields[1]), int(fields[2]) total = 0 count = 0 min_score = 100000000 max_score = -100000000 for i in range(start, stop): if chrom in scores_by_chrom and scores_by_chrom[chrom][i]: # Skip if base is masked if masks and chrom in masks: if masks[chrom][i]: continue # Get the score, only count if not 'nan' score = scores_by_chrom[chrom][i] if not isNaN(score): total += score count += 1 max_score = max(score, max_score) min_score = min(score, min_score) if count > 0: avg = total / count else: avg = "nan" min_score = "nan" max_score = "nan" print("\t".join( map(str, [chrom, start, stop, avg, min_score, max_score])), file=out_file) out_file.close()
def __main__(): #Parse Command Line options, args = doc_optparse.parse( __doc__ ) #get parameters for intersect command if options.priority == "second_file": file1 = options.input2 file2 = options.input1 else: file1 = options.input1 file2 = options.input2 cmd = "wc -l %s | cut -f1 -d ' '" % (file1) nrBefore = float(subprocess.check_output(cmd, shell=True)) if options.intersect == 'keep_intersect': cmd = 'intersectBed -header -sorted -a %s -b %s > %s' % ( file1, file2, options.output1 ) subprocess.check_call(cmd, shell=True) print cmd elif options.intersect == 'keep_unique': cmd = 'intersectBed -v -header -sorted -a %s -b %s > %s' % ( file1, file2, options.output1 ) subprocess.check_call(cmd, shell=True) print cmd elif options.intersect == 'keep_allele': try: x,tmp1 = tempfile.mkstemp() x,tmp2 = tempfile.mkstemp() cmd = 'intersectBed -v -a %s -b %s > %s' % ( file1, file2, tmp1 ) subprocess.check_call(cmd, shell=True) print cmd cmd = 'intersectBed -wa -wb -a %s -b %s > %s' % ( file1, file2, tmp2 ) subprocess.check_call(cmd, shell=True) print cmd subtract(tmp2,tmp1) cmd = 'sortBed -i %s > %s' % ( tmp1, options.output1 ) subprocess.check_call(cmd, shell=True) print cmd finally: os.remove(tmp1) os.remove(tmp2) cmd = "wc -l %s | cut -f1 -d ' '" % (options.output1) nrAfter = float(subprocess.check_output(cmd, shell=True)) output_handle = file(options.output2, "w") output_handle.write("# SNPs before\tSNPs after\tfraction subtracted\tfraction retained\n") output_handle.write("%i\t%i\t%.4f\t%.4f\n" % (nrBefore, nrAfter, (nrBefore-nrAfter)/nrBefore, nrAfter/nrBefore)) output_handle.close() # check that there are results in the output file print os.path.getsize( options.output1 ) sys.stdout.write( 'Intersected VCF A with VCF B\n' )
def __main__(): strout = "" # Parse Command Line options, args = doc_optparse.parse(__doc__) coverage = int(options.coverage) fin = file(options.input, "r") fout = file(options.output, "w") inLine = fin.readline() if options.format == "six": seqIndex = 0 locIndex = 1 baseIndex = 2 covIndex = 3 elif options.format == "ten": seqIndex = 0 locIndex = 1 if options.base == "first": baseIndex = 2 else: baseIndex = 3 covIndex = 7 else: seqIndex = int(options.seq_column) - 1 locIndex = int(options.loc_column) - 1 baseIndex = int(options.base_column) - 1 covIndex = int(options.cvrg_column) - 1 lastSeq = "" lastLoc = -1 locs = [] startLoc = -1 bases = [] while inLine.strip() != "": lineParts = inLine.split("\t") try: seq, loc, base, cov = ( lineParts[seqIndex], int(lineParts[locIndex]), lineParts[baseIndex], int(lineParts[covIndex]), ) except IndexError, ei: if options.format == "ten": stop_err( "It appears that you have selected 10 columns while your file has 6. Make sure that the number of columns you specify matches the number in your file.\n" + str(ei) ) else: stop_err("There appears to be something wrong with your column index values.\n" + str(ei)) except ValueError, ev: if options.format == "six": stop_err( "It appears that you have selected 6 columns while your file has 10. Make sure that the number of columns you specify matches the number in your file.\n" + str(ev) ) else: stop_err("There appears to be something wrong with your column index values.\n" + str(ev))
def main(): # Parse command line options, args = doc_optparse.parse(__doc__) try: score_fname = args[0] interval_fname = args[1] if len(args) > 2: out_file = open(args[2], "w") else: out_file = sys.stdout binned = bool(options.binned) mask_fname = options.mask except: doc_optparse.exit() if binned: scores_by_chrom = load_scores_ba_dir(score_fname) else: scores_by_chrom = load_scores_wiggle(score_fname) if mask_fname: masks = binned_bitsets_from_file(open(mask_fname)) else: masks = None for line in open(interval_fname): fields = line.split() chrom, start, stop = fields[0], int(fields[1]), int(fields[2]) total = 0 count = 0 min_score = 100000000 max_score = -100000000 for i in range(start, stop): if chrom in scores_by_chrom and scores_by_chrom[chrom][i]: # Skip if base is masked if masks and chrom in masks: if masks[chrom][i]: continue # Get the score, only count if not 'nan' score = scores_by_chrom[chrom][i] if not isNaN(score): total += score count += 1 max_score = max(score, max_score) min_score = min(score, min_score) if count > 0: avg = total / count else: avg = "nan" min_score = "nan" max_score = "nan" print >> out_file, "\t".join(map(str, [chrom, start, stop, avg, min_score, max_score])) out_file.close()
def __main__(): strout = '' #Parse Command Line options, args = doc_optparse.parse(__doc__) coverage = int(options.coverage) fin = file(options.input, 'r') fout = file(options.output, 'w') inLine = fin.readline() if options.format == 'six': seqIndex = 0 locIndex = 1 baseIndex = 2 covIndex = 3 elif options.format == 'ten': seqIndex = 0 locIndex = 1 if options.base == 'first': baseIndex = 2 else: baseIndex = 3 covIndex = 7 else: seqIndex = int(options.seq_column) - 1 locIndex = int(options.loc_column) - 1 baseIndex = int(options.base_column) - 1 covIndex = int(options.cvrg_column) - 1 lastSeq = '' lastLoc = -1 locs = [] startLoc = -1 bases = [] while inLine.strip() != '': lineParts = inLine.split('\t') try: seq, loc, base, cov = lineParts[seqIndex], int( lineParts[locIndex]), lineParts[baseIndex], int( lineParts[covIndex]) except IndexError, ei: if options.format == 'ten': stop_err( 'It appears that you have selected 10 columns while your file has 6. Make sure that the number of columns you specify matches the number in your file.\n' + str(ei)) else: stop_err( 'There appears to be something wrong with your column index values.\n' + str(ei)) except ValueError, ev: if options.format == 'six': stop_err( 'It appears that you have selected 6 columns while your file has 10. Make sure that the number of columns you specify matches the number in your file.\n' + str(ev)) else: stop_err( 'There appears to be something wrong with your column index values.\n' + str(ev))
def main(): # Parsing Command Line here options, args = doc_optparse.parse(__doc__) try: chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg(options.cols) inp_file, winsize, out_file, makesliding, offset = args winsize = int(winsize) offset = int(offset) makesliding = int(makesliding) except Exception: sys.exit("Data issue, click the pencil icon in the history item to correct the metadata attributes of the input dataset.") skipped_lines = 0 first_invalid_line = 0 invalid_line = None if offset == 0: makesliding = 0 with open(out_file, 'w') as fo, open(inp_file) as fi: for i, line in enumerate(fi): line = line.strip() if line and line[0:1] != "#": try: elems = line.split('\t') start = int(elems[start_col_1]) end = int(elems[end_col_1]) if makesliding == 0: numwin = (end - start) // winsize else: numwin = (end - start) // offset if numwin > 0: for _ in range(numwin): elems_1 = elems elems_1[start_col_1] = str(start) elems_1[end_col_1] = str(start + winsize) fo.write("%s\n" % '\t'.join(elems_1)) if makesliding == 0: start = start + winsize else: start = start + offset if start + winsize > end: break except Exception: skipped_lines += 1 if not invalid_line: first_invalid_line = i + 1 invalid_line = line if makesliding == 1: print('Window size=%d, Sliding=Yes, Offset=%d' % (winsize, offset)) else: print('Window size=%d, Sliding=No' % (winsize)) if skipped_lines > 0: print('Skipped %d invalid lines starting with #%d: "%s"' % (skipped_lines, first_invalid_line, invalid_line))
def __main__(): #Parse Command Line options, args = doc_optparse.parse(__doc__) db_build = options.db_build query_filename = options.input.strip() output_filename = options.output.strip() mega_word_size = options.word_size # -W mega_iden_cutoff = options.identity_cutoff # -p mega_evalue_cutoff = options.eval_cutoff # -e mega_temp_output = tempfile.NamedTemporaryFile().name mega_filter = options.filter_query # -F GALAXY_DATA_INDEX_DIR = options.index_dir DB_LOC = "%s/blastdb.loc" % GALAXY_DATA_INDEX_DIR # megablast parameters try: int(mega_word_size) except: stop_err('Invalid value for word size') try: float(mega_iden_cutoff) except: stop_err('Invalid value for identity cut-off') try: float(mega_evalue_cutoff) except: stop_err('Invalid value for Expectation value') # prepare the database db = {} for i, line in enumerate(file(DB_LOC)): line = line.rstrip('\r\n') if not line or line.startswith('#'): continue fields = line.split('\t') db[fields[0]] = fields[1] if not db.has_key(db_build): stop_err( 'Cannot locate the target database. Please check your location file.' ) # arguments for megablast chunk = db[(db_build)] megablast_command = "megablast -d %s -i %s -o %s -m 8 -a 8 -W %s -p %s -e %s -F %s > /dev/null 2>&1 " \ % ( chunk, query_filename, mega_temp_output, mega_word_size, mega_iden_cutoff, mega_evalue_cutoff, mega_filter ) print megablast_command try: os.system(megablast_command) except Exception, e: stop_err(str(e))
def __main__(): #Parse Command Line options, args = doc_optparse.parse(__doc__) # validate parameters error = '' try: read_len = int(options.read_len) if read_len <= 0: raise Exception, ' greater than 0' except TypeError, e: error = ': %s' % str(e)
def __main__(): # Parse Command Line options, args = doc_optparse.parse( __doc__ ) # validate parameters error = '' try: read_len = int( options.read_len ) if read_len <= 0: raise Exception(' greater than 0') except TypeError, e: error = ': %s' % str( e )
def main(): # Parse command line options, args = doc_optparse.parse( __doc__ ) try: h5_fname = args[0] mapping_fname = args[1] in_fname = args[2] out_fname = args[3] chrom_col, start_col, end_col = map( lambda x: int( x ) - 1, args[4:7] ) per_col = bool( options.perCol ) except Exception, e: doc_optparse.exception()
def main(): # Parse command line options, args = doc_optparse.parse(__doc__) try: h5_fname = args[0] mapping_fname = args[1] in_fname = args[2] out_fname = args[3] chrom_col, start_col, end_col = map(lambda x: int(x) - 1, args[4:7]) per_col = bool(options.perCol) except Exception, e: doc_optparse.exception()
def main(): # Parsing Command Line here options, args = doc_optparse.parse( __doc__ ) try: #chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols ) inp_file, out_file, pri_species, mask_species, qual_cutoff, mask_chr, mask_region, mask_length, loc_file = args qual_cutoff = int(qual_cutoff) mask_chr = int(mask_chr) mask_region = int(mask_region) if mask_region != 3: mask_length = int(mask_length) else: mask_length_r = int(mask_length.split(',')[0]) mask_length_l = int(mask_length.split(',')[1]) except: stop_err( "Data issue, click the pencil icon in the history item to correct the metadata attributes of the input dataset." ) if pri_species == 'None': stop_err( "No primary species selected, try again by selecting at least one primary species." ) if mask_species == 'None': stop_err( "No mask species selected, try again by selecting at least one species to mask." ) mask_chr_count = 0 mask_chr_dict = {0:'#', 1:'$', 2:'^', 3:'*', 4:'?', 5:'N'} mask_reg_dict = {0:'Current pos', 1:'Current+Downstream', 2:'Current+Upstream', 3:'Current+Both sides'} #ensure dbkey is present in the twobit loc file filepath = None try: pspecies_all = pri_species.split(',') pspecies_all2 = pri_species.split(',') pspecies = [] filepaths = [] for line in open(loc_file): if pspecies_all2 == []: break if line[0:1] == "#": continue fields = line.split('\t') try: build = fields[0] for i,dbkey in enumerate(pspecies_all2): if dbkey == build: pspecies.append(build) filepaths.append(fields[1]) del pspecies_all2[i] else: continue except: pass except Exception, exc: stop_err( 'Initialization errorL %s' % str( exc ) )
def main(): mincols = 1 upstream_pad = 0 downstream_pad = 0 leftfill = False rightfill = False options, args = doc_optparse.parse(__doc__) try: chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1) chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2) if options.mincols: mincols = int(options.mincols) if options.fill: if options.fill == "both": rightfill = leftfill = True else: rightfill = options.fill == "right" leftfill = options.fill == "left" in_fname, in2_fname, out_fname = args except: doc_optparse.exception() g1 = NiceReaderWrapper(fileinput.FileInput(in_fname), chrom_col=chr_col_1, start_col=start_col_1, end_col=end_col_1, strand_col=strand_col_1, fix_strand=True) g2 = NiceReaderWrapper(fileinput.FileInput(in2_fname), chrom_col=chr_col_2, start_col=start_col_2, end_col=end_col_2, strand_col=strand_col_2, fix_strand=True) out_file = open(out_fname, "w") try: for outfields in join(g1, g2, mincols=mincols, rightfill=rightfill, leftfill=leftfill): if type(outfields) is list: out_file.write("%s\n" % "\t".join(outfields)) else: out_file.write("%s\n" % outfields) except ParseError, exc: out_file.close() fail("Invalid file format: %s" % str(exc))
def __main__(): #Parse Command Line options, args = doc_optparse.parse( __doc__ ) db_build = options.db_build query_filename = options.input.strip() output_filename = options.output.strip() mega_word_size = options.word_size # -W mega_iden_cutoff = options.identity_cutoff # -p mega_evalue_cutoff = options.eval_cutoff # -e mega_temp_output = tempfile.NamedTemporaryFile().name mega_filter = options.filter_query # -F GALAXY_DATA_INDEX_DIR = options.index_dir DB_LOC = "%s/blastdb.loc" % GALAXY_DATA_INDEX_DIR # megablast parameters try: int( mega_word_size ) except: stop_err( 'Invalid value for word size' ) try: float(mega_iden_cutoff) except: stop_err( 'Invalid value for identity cut-off' ) try: float(mega_evalue_cutoff) except: stop_err( 'Invalid value for Expectation value' ) # prepare the database db = {} for i, line in enumerate( file( DB_LOC ) ): line = line.rstrip( '\r\n' ) if not line or line.startswith( '#' ): continue fields = line.split( '\t' ) db[ fields[0] ] = fields[1] if not db.has_key( db_build ): stop_err( 'Cannot locate the target database. Please check your location file.' ) # arguments for megablast chunk = db[ ( db_build ) ] megablast_command = "megablast -d %s -i %s -o %s -m 8 -a 8 -W %s -p %s -e %s -F %s > /dev/null 2>&1 " \ % ( chunk, query_filename, mega_temp_output, mega_word_size, mega_iden_cutoff, mega_evalue_cutoff, mega_filter ) print megablast_command try: os.system( megablast_command ) except Exception, e: stop_err( str( e ) )
def main(): mincols = 1 options, args = doc_optparse.parse(__doc__) try: chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1) if options.mincols: mincols = int(options.mincols) in_fname, out_fname = args except: doc_optparse.exception() g1 = NiceReaderWrapper(fileinput.FileInput(in_fname), chrom_col=chr_col_1, start_col=start_col_1, end_col=end_col_1, strand_col=strand_col_1, fix_strand=True) out_file = open(out_fname, "w") try: for line in merge(g1, mincols=mincols): if options.threecol: if type(line) is GenomicInterval: out_file.write( "%s\t%s\t%s\n" % (line.chrom, str(line.startCol), str(line.endCol))) elif type(line) is list: out_file.write("%s\t%s\t%s\n" % (line[chr_col_1], str(line[start_col_1]), str(line[end_col_1]))) else: out_file.write("%s\n" % line) else: if type(line) is GenomicInterval: out_file.write("%s\n" % "\t".join(line.fields)) elif type(line) is list: out_file.write("%s\n" % "\t".join(line)) else: out_file.write("%s\n" % line) except ParseError as exc: out_file.close() fail("Invalid file format: %s" % str(exc)) out_file.close() if g1.skipped > 0: print(skipped(g1, filedesc=" of 1st dataset"))
def __main__(): # Parse Command Line options, args = doc_optparse.parse(__doc__) try: range_filename = args[0] refindex = int(args[1]) if options.mincols: mincols = int(options.mincols) else: mincols = 10 except: doc_optparse.exit() # Load Intervals intersecter = intervals.Intersecter() for line in file(range_filename): fields = line.split() intersecter.add_interval( intervals.Interval(int(fields[0]), int(fields[1]))) # Start axt on stdout out = bx.align.axt.Writer(sys.stdout) # Iterate over input axt for axt in bx.align.axt.Reader(sys.stdin): ref_component = axt.components[refindex] # Find overlap with reference component intersections = intersecter.find(ref_component.start, ref_component.end) # Keep output axt ordered intersections.sort() # Write each intersecting block for interval in intersections: start = max(interval.start, ref_component.start) end = min(interval.end, ref_component.end) sliced = axt.slice_by_component(refindex, start, end) good = True for c in sliced.components: if c.size < 1: good = False if good and sliced.text_size > mincols: out.write(sliced) # Close output axt out.close()
def __main__(): # Parse Command Line options, args = doc_optparse.parse(__doc__) try: maf_files = args if options.prefix: prefix = options.prefix else: prefix = None except: doc_optparse.exit() # Open indexed access to mafs indexes = [ bx.align.maf.Indexed(maf_file, maf_file + ".index") for maf_file in maf_files ] # Iterate over input ranges for line in sys.stdin: fields = line.split() src, start, end = fields[0], int(fields[1]), int(fields[2]) if prefix: src = prefix + src total_length = end - start # Find overlap with reference component blocks = [] for index in indexes: blocks += index.get(src, start, end) coverage = dict() for block in blocks: overlap_start = max(start, block.components[0].start) overlap_end = min(end, block.components[0].end) length = overlap_end - overlap_start assert length > 0 for c in block.components[1:]: species = c.src.split('.')[0] try: coverage[species] += length except: coverage[species] = length print(line, end=' ') for key, value in coverage.items(): print(" ", key.ljust(10), "%0.2f" % (value / total_length))
def __main__(): # Parse Command Line options, args = doc_optparse.parse(__doc__) cmd = "fq_all2std.pl %s %s > %s" if options.command == "sol2std": cmd = cmd % (options.command, options.input, options.outputFastqsanger) elif options.command == "std2sol": cmd = cmd % (options.command, options.input, options.outputFastqsolexa) elif options.command == "fq2fa": cmd = cmd % (options.command, options.input, options.outputFasta) try: os.system(cmd) except Exception, eq: stop_err("Error converting data format.\n" + str(eq))
def __main__(): # Parse command line arguments options, args = doc_optparse.parse(__doc__) try: refindex = int(args[0]) except Exception: doc_optparse.exit() maf_reader = maf.Reader(sys.stdin) for m in maf_reader: c = m.components[refindex].src print(c[c.rfind("chr") + 3:])
def __main__(): # Parse Command Line options, args = doc_optparse.parse(__doc__) cmd = "fq_all2std.pl %s %s > %s" if options.command == 'sol2std': cmd = cmd % (options.command, options.input, options.outputFastqsanger) elif options.command == 'std2sol': cmd = cmd % (options.command, options.input, options.outputFastqsolexa) elif options.command == 'fq2fa': cmd = cmd % (options.command, options.input, options.outputFasta) try: os.system(cmd) except Exception as eq: stop_err("Error converting data format.\n" + str(eq))
def __main__(): # Parse command line arguments options, args = doc_optparse.parse( __doc__ ) try: refindex = int( args[0] ) except: doc_optparse.exit() maf_reader = maf.Reader( sys.stdin ) for m in maf_reader: c = m.components[ refindex ].src print c[ c.rfind( "chr" ) + 3 : ]
def __main__(): # Parse Command Line options, args = doc_optparse.parse( __doc__ ) try: range_filename = args[ 0 ] refindex = int( args[ 1 ] ) if options.mincols: mincols = int( options.mincols ) else: mincols = 10 except: doc_optparse.exit() # Load Intervals intersecter = intervals.Intersecter() for line in file( range_filename ): fields = line.split() intersecter.add_interval( intervals.Interval( int( fields[0] ), int( fields[1] ) ) ) # Start axt on stdout out = bx.align.axt.Writer( sys.stdout ) # Iterate over input axt for axt in bx.align.axt.Reader( sys.stdin ): ref_component = axt.components[ refindex ] # Find overlap with reference component intersections = intersecter.find( ref_component.start, ref_component.end ) # Keep output axt ordered intersections.sort() # Write each intersecting block for interval in intersections: start = max( interval.start, ref_component.start ) end = min( interval.end, ref_component.end ) sliced = axt.slice_by_component( refindex, start, end ) good = True for c in sliced.components: if c.size < 1: good = False if good and sliced.text_size > mincols: out.write( sliced ) # Close output axt out.close()
def __main__(): # Parse command line arguments options, args = doc_optparse.parse(__doc__) try: keep_header = bool(options.header) keep_comments = bool(options.comments) cols = [] if options.cols: for c in options.cols.split(','): try: v = int(c) except ValueError: v = c cols.append(v) if len(args) > 0: expr = args[0] else: expr = None if options.force_header: force_header = bx.tabular.io.FIRST_LINE_IS_HEADER else: force_header = None except Exception: doc_optparse.exception() # Compile expression for SPEED if expr: expr = compile(expr, '<expr arg>', 'eval') for element in bx.tabular.io.TableReader(sys.stdin, force_header=force_header): if isinstance(element, bx.tabular.io.Header): if keep_header: if cols: print("#" + "\t".join(element[c] for c in cols)) else: print(element) elif isinstance(element, bx.tabular.io.Comment): if keep_comments: print(element) else: if expr is None or bool(eval(expr, dict(row=element))): if cols: print("\t".join(element[c] for c in cols)) else: print(element)
def main(): options, args = doc_optparse.parse( __doc__ ) try: extension = options.ext except: doc_optparse.exception() # create datatype data = model.Dataset( extension=extension, id=int( args[0] ) ) data.file_path = "/home/ian/trunk/database/files/" if options.metadata: data.metadata = util.string_to_object( options.metadata ) errors = data.datatype.validate( data ) print util.object_to_string(errors)
def __main__(): # Parse command line arguments options, args = doc_optparse.parse(__doc__) try: keep_header = bool(options.header) keep_comments = bool(options.comments) cols = [] if options.cols: for c in options.cols.split(","): try: v = int(c) except: v = c cols.append(c) if len(args) > 0: expr = args[0] else: expr = None if options.force_header: force_header = bx.tabular.io.FIRST_LINE_IS_HEADER else: force_header = None except: doc_optparse.exception() # Compile expression for SPEED if expr: expr = compile(expr, "<expr arg>", "eval") for element in bx.tabular.io.TableReader(sys.stdin, force_header=force_header): if type(element) is bx.tabular.io.Header: if keep_header: if cols: print "#" + "\t".join(element[c] for c in cols) else: print element elif type(element) is bx.tabular.io.Comment: if keep_comments: print element else: if expr is None or bool(eval(expr, dict(row=element))): if cols: print "\t".join([element[c] for c in cols]) else: print element
def __main__(): options, args = doc_optparse.parse(__doc__) try: range_file = file(args[0]) nib_file = file(args[1]) except: doc_optparse.exit() nib = bx.seq.nib.NibFile(nib_file) for line in range_file: fields = line.split() start, end = int(fields[0]), int(fields[1]) print ">", start, end print_wrapped(nib.get(start, end - start))