def main(): multiprocessing.freeze_support() # parse command line parser = argparse.ArgumentParser() parser.add_argument("-v", "--verbose", action="store_true", dest="verbose", default=False) parser.add_argument("-p", "--num-processors", type=int, dest="num_processors", default=1) parser.add_argument("--gtf-score-attr", dest="gtf_score_attr", default="FPKM", metavar="ATTR", help="GTF attribute field containing node weight " " [default=%(default)s]") parser.add_argument("classify_dir") args = parser.parse_args() # check command line parameters if not os.path.exists(args.classify_dir): parser.error("Classification directory %s not found" % (args.classify_dir)) if not os.path.exists(R_SCRIPT): parser.error("Classification R script not found") if not check_executable("Rscript"): parser.error("Rscript binary not found") # set logging level if args.verbose: level = logging.DEBUG else: level = logging.INFO logging.basicConfig( level=level, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") logging.info("AssemblyLine %s" % (assemblyline.__version__)) logging.info("----------------------------------") # show parameters logging.info("Parameters:") logging.info("verbose logging: %s" % (args.verbose)) logging.info("num processors: %s" % (args.num_processors)) logging.info("gtf score attribute: %s" % (args.gtf_score_attr)) logging.info("classify directory: %s" % (args.classify_dir)) tmp_dir = os.path.join(args.classify_dir, "tmp") if not os.path.exists(tmp_dir): logging.info("Creating tmp directory '%s'" % (tmp_dir)) os.makedirs(tmp_dir) # run classification procedure classify_transcripts(args.classify_dir, args.num_processors, args.gtf_score_attr, tmp_dir) # cleanup if os.path.exists(tmp_dir): shutil.rmtree(tmp_dir)
def main(): multiprocessing.freeze_support() # parse command line parser = argparse.ArgumentParser() parser.add_argument("-v", "--verbose", action="store_true", dest="verbose", default=False) parser.add_argument( "--bufsize", dest="bufsize", type=int, default=(1 << 30), help="Size of buffer when splitting GTF file" ) parser.add_argument("-p", "--num-processors", type=int, dest="num_processors", default=1) parser.add_argument("run_dir") args = parser.parse_args() # check command line parameters if not os.path.exists(args.run_dir): parser.error("Run directory %s not found" % (args.run_dir)) # check command line parameters if not os.path.exists(CLASSIFY_R_SCRIPT): parser.error("Classification R script not found") if not check_executable("Rscript"): parser.error("Rscript binary not found") num_processors = max(1, args.num_processors) # set logging level if args.verbose: level = logging.DEBUG else: level = logging.INFO logging.basicConfig(level=level, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") logging.info("AssemblyLine %s" % (assemblyline.__version__)) logging.info("----------------------------------") # show parameters logging.info("Parameters:") logging.info("run directory: %s" % (args.run_dir)) logging.info("num processors: %d" % (args.num_processors)) logging.info("buffer size: %d" % (args.bufsize)) logging.info("verbose logging: %s" % (args.verbose)) logging.info("----------------------------------") # setup results results = config.AssemblylineResults(args.run_dir) if not os.path.exists(results.classify_dir): os.makedirs(results.classify_dir) # split gtf file split_gtf_file( results.annotated_transcripts_gtf_file, results.classify_dir, results.ref_gtf_file, results.category_stats_file, args.bufsize, ) # run classification retcode = classify_transcripts(results, num_processors) if retcode != 0: logging.error("ERROR") return retcode # merge results retcode = merge_transcripts(results) logging.info("Done") return retcode
def main(): parser = argparse.ArgumentParser() parser.add_argument("--baseurl", dest="baseurl", default=None) parser.add_argument("bedgraph_file") parser.add_argument("chrom_sizes_file") args = parser.parse_args() # check args if not os.path.exists(args.bedgraph_file): parser.error("bedgraph file %s not found" % (args.bedgraph_file)) if not os.path.exists(args.chrom_sizes_file): parser.error("chrom sizes file %s not found" % (args.chrom_sizes_file)) # check for executables if not check_executable("bedGraphToBigWig"): parser.error("'bedGraphToBigWig' executable not found in PATH") # convert bedgraph to bigwig prefix = os.path.splitext(args.bedgraph_file)[0] tmp_noheader_file = prefix + ".noheader.bedgraph" bigwig_file = prefix + ".bw" # format bigwig track line f = open(args.bedgraph_file) header_fields = f.next().strip().split() f.close() track_options = ["track"] for field in header_fields[1:]: k, v = field.split("=") if k == "type": track_options.append("type=bigWig") else: track_options.append(field) if args.baseurl is not None: bigwig_file_abspath = os.path.abspath(bigwig_file) track_options.append('bigDataUrl="http://%s%s"' % (args.baseurl, bigwig_file_abspath)) track_line = ' '.join(track_options) print track_line # remove header line of file outf = open(tmp_noheader_file, "w") retcode1 = subprocess.call(["sed", "1,1d", args.bedgraph_file], stdout=outf) outf.close() # convert to bigwig retcode2 = subprocess.call([ "bedGraphToBigWig", tmp_noheader_file, args.chrom_sizes_file, bigwig_file ]) # cleanup os.remove(tmp_noheader_file) if (retcode1 != 0) or (retcode2 != 0): return 1 return 0
def main(): multiprocessing.freeze_support() # parse command line parser = argparse.ArgumentParser() parser.add_argument("-v", "--verbose", action="store_true", dest="verbose", default=False) parser.add_argument("-p", "--num-processors", type=int, dest="num_processors", default=1) parser.add_argument("--gtf-score-attr", dest="gtf_score_attr", default="FPKM", metavar="ATTR", help="GTF attribute field containing node weight " " [default=%(default)s]") parser.add_argument("classify_dir") args = parser.parse_args() # check command line parameters if not os.path.exists(args.classify_dir): parser.error("Classification directory %s not found" % (args.classify_dir)) if not os.path.exists(R_SCRIPT): parser.error("Classification R script not found") if not check_executable("Rscript"): parser.error("Rscript binary not found") # set logging level if args.verbose: level = logging.DEBUG else: level = logging.INFO logging.basicConfig(level=level, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") logging.info("AssemblyLine %s" % (assemblyline.__version__)) logging.info("----------------------------------") # show parameters logging.info("Parameters:") logging.info("verbose logging: %s" % (args.verbose)) logging.info("num processors: %s" % (args.num_processors)) logging.info("gtf score attribute: %s" % (args.gtf_score_attr)) logging.info("classify directory: %s" % (args.classify_dir)) tmp_dir = os.path.join(args.classify_dir, "tmp") if not os.path.exists(tmp_dir): logging.info("Creating tmp directory '%s'" % (tmp_dir)) os.makedirs(tmp_dir) # run classification procedure classify_transcripts(args.classify_dir, args.num_processors, args.gtf_score_attr, tmp_dir) # cleanup if os.path.exists(tmp_dir): shutil.rmtree(tmp_dir)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--baseurl", dest="baseurl", default=None) parser.add_argument("bedgraph_file") parser.add_argument("chrom_sizes_file") args = parser.parse_args() # check args if not os.path.exists(args.bedgraph_file): parser.error("bedgraph file %s not found" % (args.bedgraph_file)) if not os.path.exists(args.chrom_sizes_file): parser.error("chrom sizes file %s not found" % (args.chrom_sizes_file)) # check for executables if not check_executable("bedGraphToBigWig"): parser.error("'bedGraphToBigWig' executable not found in PATH") # convert bedgraph to bigwig prefix = os.path.splitext(args.bedgraph_file)[0] tmp_noheader_file = prefix + ".noheader.bedgraph" bigwig_file = prefix + ".bw" # format bigwig track line f = open(args.bedgraph_file) header_fields = f.next().strip().split() f.close() track_options = ["track"] for field in header_fields[1:]: k,v = field.split("=") if k == "type": track_options.append("type=bigWig") else: track_options.append(field) if args.baseurl is not None: bigwig_file_abspath = os.path.abspath(bigwig_file) track_options.append('bigDataUrl="http://%s%s"' % (args.baseurl, bigwig_file_abspath)) track_line = ' '.join(track_options) print track_line # remove header line of file outf = open(tmp_noheader_file, "w") retcode1 = subprocess.call(["sed", "1,1d", args.bedgraph_file], stdout=outf) outf.close() # convert to bigwig retcode2 = subprocess.call(["bedGraphToBigWig", tmp_noheader_file, args.chrom_sizes_file, bigwig_file]) # cleanup os.remove(tmp_noheader_file) if (retcode1 != 0) or (retcode2 != 0): return 1 return 0
def main(): parser = argparse.ArgumentParser() parser.add_argument("--baseurl", dest="baseurl", default=None) parser.add_argument("output_dir") parser.add_argument("chrom_sizes_file") args = parser.parse_args() if not check_executable("bedToBigBed"): parser.error("bedToBigBed binary not found in PATH") if not check_executable("bedGraphToBigWig"): parser.error("'bedGraphToBigWig' executable not found in PATH") if not os.path.exists(args.chrom_sizes_file): parser.error("chrom sizes file %s not found" % (args.chrom_sizes_file)) if not os.path.exists(args.output_dir): parser.error("output dir %s not found" % (args.output_dir)) # input files prefix = os.path.dirname(args.output_dir) output_dir = os.path.abspath(args.output_dir) bed_file = os.path.join(output_dir, "assembly.bed") bed_track_file = bed_file + ".ucsc_track" bedgraph_files = [ os.path.join(output_dir, "assembly_none.bedgraph"), os.path.join(output_dir, "assembly_neg.bedgraph"), os.path.join(output_dir, "assembly_pos.bedgraph") ] bedgraph_track_files = [x + ".ucsc_track" for x in bedgraph_files] if not os.path.exists(bed_file): parser.error("BED file %s not found" % (bed_file)) if not os.path.exists(bed_track_file): parser.error("BED track file %s not found" % (bed_track_file)) for i in xrange(len(bedgraph_files)): if not os.path.exists(bedgraph_files[i]): parser.error("Bedgraph file %s not found" % (bedgraph_files[i])) if not os.path.exists(bedgraph_track_files[i]): parser.error("Bedgraph track file %s not found" % (bedgraph_track_files[i])) # convert to bigbed bigbed_file = os.path.join(output_dir, "assembly.bb") retcode = subprocess.call( ["bedToBigBed", bed_file, args.chrom_sizes_file, bigbed_file]) if retcode != 0: print >> sys.stderr, "bedToBigBed ERROR" return 1 # print track lines f = open(bed_track_file) fields = f.next().strip().split() f.close() track_options = ["track"] has_type = False for i, field in enumerate(fields): if field.startswith("type"): fields[i] = "type=bigBed" has_type = True break if not has_type: track_options.append("type=bigBed") track_options.extend(fields[1:]) if args.baseurl is not None: track_options.append('bigDataUrl="%s%s"' % (args.baseurl, os.path.abspath(bigbed_file))) track_line = ' '.join(track_options) print track_line # convert to bigwig for bedgraph_file in bedgraph_files: bwfile = os.path.splitext(bedgraph_file)[0] + ".bw" retcode = subprocess.call( ["bedGraphToBigWig", bedgraph_file, args.chrom_sizes_file, bwfile]) if retcode != 0: print >> sys.stderr, "bedGraphToBigWig ERROR" return 1 track_file = bedgraph_file + ".ucsc_track" f = open(track_file) fields = f.next().strip().split() f.close() track_options = ["track"] for field in fields[1:]: k, v = field.split("=") if k == "type": track_options.append("type=bigWig") else: track_options.append(field) if args.baseurl is not None: track_options.append('bigDataUrl="%s%s"' % (args.baseurl, os.path.abspath(bwfile))) track_line = ' '.join(track_options) print track_line return 0
def main(): multiprocessing.freeze_support() # parse command line parser = argparse.ArgumentParser() parser.add_argument("-v", "--verbose", action="store_true", dest="verbose", default=False) parser.add_argument("--bufsize", dest="bufsize", type=int, default=(1 << 30), help="Size of buffer when splitting GTF file") parser.add_argument("-p", "--num-processors", type=int, dest="num_processors", default=1) parser.add_argument("run_dir") args = parser.parse_args() # check command line parameters if not os.path.exists(args.run_dir): parser.error("Run directory %s not found" % (args.run_dir)) # check command line parameters if not os.path.exists(CLASSIFY_R_SCRIPT): parser.error("Classification R script not found") if not check_executable("Rscript"): parser.error("Rscript binary not found") num_processors = max(1, args.num_processors) # set logging level if args.verbose: level = logging.DEBUG else: level = logging.INFO logging.basicConfig( level=level, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") logging.info("AssemblyLine %s" % (assemblyline.__version__)) logging.info("----------------------------------") # show parameters logging.info("Parameters:") logging.info("run directory: %s" % (args.run_dir)) logging.info("num processors: %d" % (args.num_processors)) logging.info("buffer size: %d" % (args.bufsize)) logging.info("verbose logging: %s" % (args.verbose)) logging.info("----------------------------------") # setup results results = config.AssemblylineResults(args.run_dir) if not os.path.exists(results.classify_dir): os.makedirs(results.classify_dir) # split gtf file split_gtf_file(results.annotated_transcripts_gtf_file, results.classify_dir, results.ref_gtf_file, results.category_stats_file, args.bufsize) # run classification retcode = classify_transcripts(results, num_processors) if retcode != 0: logging.error("ERROR") return retcode # merge results retcode = merge_transcripts(results) logging.info("Done") return retcode
def main(): parser = argparse.ArgumentParser() parser.add_argument("--baseurl", dest="baseurl", default=None) parser.add_argument("output_dir") parser.add_argument("chrom_sizes_file") args = parser.parse_args() if not check_executable("bedToBigBed"): parser.error("bedToBigBed binary not found in PATH") if not check_executable("bedGraphToBigWig"): parser.error("'bedGraphToBigWig' executable not found in PATH") if not os.path.exists(args.chrom_sizes_file): parser.error("chrom sizes file %s not found" % (args.chrom_sizes_file)) if not os.path.exists(args.output_dir): parser.error("output dir %s not found" % (args.output_dir)) # input files prefix = os.path.dirname(args.output_dir) output_dir = os.path.abspath(args.output_dir) bed_file = os.path.join(output_dir, "assembly.bed") bed_track_file = bed_file + ".ucsc_track" bedgraph_files = [os.path.join(output_dir, "assembly_none.bedgraph"), os.path.join(output_dir, "assembly_neg.bedgraph"), os.path.join(output_dir, "assembly_pos.bedgraph")] bedgraph_track_files = [x + ".ucsc_track" for x in bedgraph_files] if not os.path.exists(bed_file): parser.error("BED file %s not found" % (bed_file)) if not os.path.exists(bed_track_file): parser.error("BED track file %s not found" % (bed_track_file)) for i in xrange(len(bedgraph_files)): if not os.path.exists(bedgraph_files[i]): parser.error("Bedgraph file %s not found" % (bedgraph_files[i])) if not os.path.exists(bedgraph_track_files[i]): parser.error("Bedgraph track file %s not found" % (bedgraph_track_files[i])) # convert to bigbed bigbed_file = os.path.join(output_dir, "assembly.bb") retcode = subprocess.call(["bedToBigBed", bed_file, args.chrom_sizes_file, bigbed_file]) if retcode != 0: print >>sys.stderr, "bedToBigBed ERROR" return 1 # print track lines f = open(bed_track_file) fields = f.next().strip().split() f.close() track_options = ["track"] has_type = False for i,field in enumerate(fields): if field.startswith("type"): fields[i] = "type=bigBed" has_type = True break if not has_type: track_options.append("type=bigBed") track_options.extend(fields[1:]) if args.baseurl is not None: track_options.append('bigDataUrl="%s%s"' % (args.baseurl, os.path.abspath(bigbed_file))) track_line = ' '.join(track_options) print track_line # convert to bigwig for bedgraph_file in bedgraph_files: bwfile = os.path.splitext(bedgraph_file)[0] + ".bw" retcode = subprocess.call(["bedGraphToBigWig", bedgraph_file, args.chrom_sizes_file, bwfile]) if retcode != 0: print >>sys.stderr, "bedGraphToBigWig ERROR" return 1 track_file = bedgraph_file + ".ucsc_track" f = open(track_file) fields = f.next().strip().split() f.close() track_options = ["track"] for field in fields[1:]: k,v = field.split("=") if k == "type": track_options.append("type=bigWig") else: track_options.append(field) if args.baseurl is not None: track_options.append('bigDataUrl="%s%s"' % (args.baseurl, os.path.abspath(bwfile))) track_line = ' '.join(track_options) print track_line return 0