def main(): # Read options, args. parser = optparse.OptionParser() (options, args) = parser.parse_args() input_fname, output_fname = args ctabix.tabix_compress(input_fname, output_fname, force=True)
def main(): # Read options, args. parser = optparse.OptionParser() parser.add_option( '-c', '--chr-col', type='int', dest='chrom_col' ) parser.add_option( '-s', '--start-col', type='int', dest='start_col' ) parser.add_option( '-e', '--end-col', type='int', dest='end_col' ) parser.add_option( '-P', '--preset', dest='preset' ) (options, args) = parser.parse_args() input_fname, output_fname = args tmpfile = tempfile.NamedTemporaryFile() sort_params = None if options.chrom_col and options.start_col and options.end_col: sort_params = [ "sort", "-k%(i)s,%(i)s" % { 'i': options.chrom_col }, "-k%(i)i,%(i)in" % { 'i': options.start_col }, "-k%(i)i,%(i)in" % { 'i': options.end_col } ] elif options.preset == "bed": sort_params = ["sort", "-k1,1", "-k2,2n", "-k3,3n"] elif options.preset == "vcf": sort_params = ["sort", "-k1,1", "-k2,2n"] elif options.preset == "gff": sort_params = ["sort", "-s", "-k1,1", "-k4,4n"] # stable sort on start column # Skip any lines starting with "#" and "track" grepped = subprocess.Popen(["grep", "-e", "^\"#\"", "-e", "^track", "-v", input_fname], stderr=subprocess.PIPE, stdout=subprocess.PIPE ) after_sort = subprocess.Popen(sort_params, stdin=grepped.stdout, stderr=subprocess.PIPE, stdout=tmpfile ) grepped.stdout.close() output, err = after_sort.communicate() ctabix.tabix_compress(tmpfile.name, output_fname, force=True)
def main(): # Read options, args. parser = optparse.OptionParser() parser.add_option( '-c', '--chr-col', type='int', dest='chrom_col' ) parser.add_option( '-s', '--start-col', type='int', dest='start_col' ) parser.add_option( '-e', '--end-col', type='int', dest='end_col' ) parser.add_option( '-P', '--preset', dest='preset' ) (options, args) = parser.parse_args() input_fname, output_fname = args tmpfile = tempfile.NamedTemporaryFile() sort_params = None if options.chrom_col and options.start_col and options.end_col: sort_params = ["sort", "-k%(i)s,%(i)s" % { 'i': options.chrom_col }, "-k%(i)i,%(i)in" % { 'i': options.start_col }, "-k%(i)i,%(i)in" % { 'i': options.end_col } ] elif options.preset == "bed": sort_params = ["sort", "-k1,1", "-k2,2n", "-k3,3n"] elif options.preset == "vcf": sort_params = ["sort", "-k1,1", "-k2,2n"] elif options.preset == "gff": sort_params = ["sort", "-s", "-k1,1", "-k4,4n"] # stable sort on start column # Skip any lines starting with "#" and "track" grepped = subprocess.Popen(["grep", "-e", "^\"#\"", "-e", "^track", "-v", input_fname], stderr=subprocess.PIPE, stdout=subprocess.PIPE ) after_sort = subprocess.Popen(sort_params, stdin=grepped.stdout, stderr=subprocess.PIPE, stdout=tmpfile ) grepped.stdout.close() output, err = after_sort.communicate() ctabix.tabix_compress(tmpfile.name, output_fname, force=True)
def main(): # Read options, args. usage = "Usage: %prog [options] tabular_input_file bgzip_output_file" parser = optparse.OptionParser(usage = usage) parser.add_option( '-c', '--chr-col', type='int', default=0, dest='chrom_col' ) parser.add_option( '-s', '--start-col', type='int', default=1, dest='start_col' ) parser.add_option( '-e', '--end-col', type='int', default=1, dest='end_col' ) (options, args) = parser.parse_args() if len(args) != 2: parser.print_usage() exit(1) input_fname, output_fname = args output_dir = os.path.dirname(output_fname) if not os.path.exists(output_dir): os.makedirs(output_dir) ctabix.tabix_compress(input_fname, output_fname, force=True) # Column indices are 0-based. ctabix.tabix_index(output_fname, seq_col=options.chrom_col,start_col=options.start_col,end_col=options.end_col)