コード例 #1
0
def main():
    multiprocessing.freeze_support()
    # parse command line
    parser = argparse.ArgumentParser()
    parser.add_argument("-v",
                        "--verbose",
                        action="store_true",
                        dest="verbose",
                        default=False)
    parser.add_argument("-p",
                        "--num-processors",
                        type=int,
                        dest="num_processors",
                        default=1)
    parser.add_argument("--gtf-score-attr",
                        dest="gtf_score_attr",
                        default="FPKM",
                        metavar="ATTR",
                        help="GTF attribute field containing node weight "
                        " [default=%(default)s]")
    parser.add_argument("classify_dir")
    args = parser.parse_args()
    # check command line parameters
    if not os.path.exists(args.classify_dir):
        parser.error("Classification directory %s not found" %
                     (args.classify_dir))
    if not os.path.exists(R_SCRIPT):
        parser.error("Classification R script not found")
    if not check_executable("Rscript"):
        parser.error("Rscript binary not found")
    # set logging level
    if args.verbose:
        level = logging.DEBUG
    else:
        level = logging.INFO
    logging.basicConfig(
        level=level,
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
    logging.info("AssemblyLine %s" % (assemblyline.__version__))
    logging.info("----------------------------------")
    # show parameters
    logging.info("Parameters:")
    logging.info("verbose logging:         %s" % (args.verbose))
    logging.info("num processors:          %s" % (args.num_processors))
    logging.info("gtf score attribute:   %s" % (args.gtf_score_attr))
    logging.info("classify directory:      %s" % (args.classify_dir))
    tmp_dir = os.path.join(args.classify_dir, "tmp")
    if not os.path.exists(tmp_dir):
        logging.info("Creating tmp directory '%s'" % (tmp_dir))
        os.makedirs(tmp_dir)
    # run classification procedure
    classify_transcripts(args.classify_dir, args.num_processors,
                         args.gtf_score_attr, tmp_dir)
    # cleanup
    if os.path.exists(tmp_dir):
        shutil.rmtree(tmp_dir)
コード例 #2
0
def main():
    multiprocessing.freeze_support()
    # parse command line
    parser = argparse.ArgumentParser()
    parser.add_argument("-v", "--verbose", action="store_true", dest="verbose", default=False)
    parser.add_argument(
        "--bufsize", dest="bufsize", type=int, default=(1 << 30), help="Size of buffer when splitting GTF file"
    )
    parser.add_argument("-p", "--num-processors", type=int, dest="num_processors", default=1)
    parser.add_argument("run_dir")
    args = parser.parse_args()
    # check command line parameters
    if not os.path.exists(args.run_dir):
        parser.error("Run directory %s not found" % (args.run_dir))
    # check command line parameters
    if not os.path.exists(CLASSIFY_R_SCRIPT):
        parser.error("Classification R script not found")
    if not check_executable("Rscript"):
        parser.error("Rscript binary not found")
    num_processors = max(1, args.num_processors)
    # set logging level
    if args.verbose:
        level = logging.DEBUG
    else:
        level = logging.INFO
    logging.basicConfig(level=level, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
    logging.info("AssemblyLine %s" % (assemblyline.__version__))
    logging.info("----------------------------------")
    # show parameters
    logging.info("Parameters:")
    logging.info("run directory:    %s" % (args.run_dir))
    logging.info("num processors:   %d" % (args.num_processors))
    logging.info("buffer size:      %d" % (args.bufsize))
    logging.info("verbose logging:  %s" % (args.verbose))
    logging.info("----------------------------------")
    # setup results
    results = config.AssemblylineResults(args.run_dir)
    if not os.path.exists(results.classify_dir):
        os.makedirs(results.classify_dir)
    # split gtf file
    split_gtf_file(
        results.annotated_transcripts_gtf_file,
        results.classify_dir,
        results.ref_gtf_file,
        results.category_stats_file,
        args.bufsize,
    )
    # run classification
    retcode = classify_transcripts(results, num_processors)
    if retcode != 0:
        logging.error("ERROR")
        return retcode
    # merge results
    retcode = merge_transcripts(results)
    logging.info("Done")
    return retcode
コード例 #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--baseurl", dest="baseurl", default=None)
    parser.add_argument("bedgraph_file")
    parser.add_argument("chrom_sizes_file")
    args = parser.parse_args()
    # check args
    if not os.path.exists(args.bedgraph_file):
        parser.error("bedgraph file %s not found" % (args.bedgraph_file))
    if not os.path.exists(args.chrom_sizes_file):
        parser.error("chrom sizes file %s not found" % (args.chrom_sizes_file))
    # check for executables
    if not check_executable("bedGraphToBigWig"):
        parser.error("'bedGraphToBigWig' executable not found in PATH")
    # convert bedgraph to bigwig
    prefix = os.path.splitext(args.bedgraph_file)[0]
    tmp_noheader_file = prefix + ".noheader.bedgraph"
    bigwig_file = prefix + ".bw"
    # format bigwig track line
    f = open(args.bedgraph_file)
    header_fields = f.next().strip().split()
    f.close()
    track_options = ["track"]
    for field in header_fields[1:]:
        k, v = field.split("=")
        if k == "type":
            track_options.append("type=bigWig")
        else:
            track_options.append(field)
    if args.baseurl is not None:
        bigwig_file_abspath = os.path.abspath(bigwig_file)
        track_options.append('bigDataUrl="http://%s%s"' %
                             (args.baseurl, bigwig_file_abspath))
    track_line = ' '.join(track_options)
    print track_line
    # remove header line of file
    outf = open(tmp_noheader_file, "w")
    retcode1 = subprocess.call(["sed", "1,1d", args.bedgraph_file],
                               stdout=outf)
    outf.close()
    # convert to bigwig
    retcode2 = subprocess.call([
        "bedGraphToBigWig", tmp_noheader_file, args.chrom_sizes_file,
        bigwig_file
    ])
    # cleanup
    os.remove(tmp_noheader_file)
    if (retcode1 != 0) or (retcode2 != 0):
        return 1
    return 0
コード例 #4
0
def main():
    multiprocessing.freeze_support()
    # parse command line
    parser = argparse.ArgumentParser()
    parser.add_argument("-v", "--verbose", action="store_true", 
                        dest="verbose", default=False)
    parser.add_argument("-p", "--num-processors", type=int, 
                        dest="num_processors", default=1)
    parser.add_argument("--gtf-score-attr", dest="gtf_score_attr", 
                        default="FPKM", metavar="ATTR",
                        help="GTF attribute field containing node weight "
                        " [default=%(default)s]")
    parser.add_argument("classify_dir")
    args = parser.parse_args()
    # check command line parameters
    if not os.path.exists(args.classify_dir):
        parser.error("Classification directory %s not found" % (args.classify_dir))
    if not os.path.exists(R_SCRIPT):
        parser.error("Classification R script not found")
    if not check_executable("Rscript"):
        parser.error("Rscript binary not found")        
    # set logging level
    if args.verbose:
        level = logging.DEBUG
    else:
        level = logging.INFO
    logging.basicConfig(level=level,
                        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
    logging.info("AssemblyLine %s" % (assemblyline.__version__))
    logging.info("----------------------------------")   
    # show parameters
    logging.info("Parameters:")
    logging.info("verbose logging:         %s" % (args.verbose))
    logging.info("num processors:          %s" % (args.num_processors))
    logging.info("gtf score attribute:   %s" % (args.gtf_score_attr))
    logging.info("classify directory:      %s" % (args.classify_dir))
    tmp_dir = os.path.join(args.classify_dir, "tmp")
    if not os.path.exists(tmp_dir):
        logging.info("Creating tmp directory '%s'" % (tmp_dir))
        os.makedirs(tmp_dir)
    # run classification procedure        
    classify_transcripts(args.classify_dir, args.num_processors, 
                         args.gtf_score_attr, tmp_dir)
    # cleanup
    if os.path.exists(tmp_dir):
        shutil.rmtree(tmp_dir)
コード例 #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--baseurl", dest="baseurl", default=None) 
    parser.add_argument("bedgraph_file")
    parser.add_argument("chrom_sizes_file")
    args = parser.parse_args()
    # check args
    if not os.path.exists(args.bedgraph_file):
        parser.error("bedgraph file %s not found" % (args.bedgraph_file))
    if not os.path.exists(args.chrom_sizes_file):
        parser.error("chrom sizes file %s not found" % (args.chrom_sizes_file))
    # check for executables
    if not check_executable("bedGraphToBigWig"):
        parser.error("'bedGraphToBigWig' executable not found in PATH")
    # convert bedgraph to bigwig
    prefix = os.path.splitext(args.bedgraph_file)[0]
    tmp_noheader_file = prefix + ".noheader.bedgraph"
    bigwig_file = prefix + ".bw"
    # format bigwig track line
    f = open(args.bedgraph_file)
    header_fields = f.next().strip().split()
    f.close()
    track_options = ["track"]
    for field in header_fields[1:]:
        k,v = field.split("=")
        if k == "type":
            track_options.append("type=bigWig")
        else:
            track_options.append(field)
    if args.baseurl is not None:
        bigwig_file_abspath = os.path.abspath(bigwig_file)
        track_options.append('bigDataUrl="http://%s%s"' % (args.baseurl, bigwig_file_abspath))
    track_line = ' '.join(track_options)
    print track_line
    # remove header line of file
    outf = open(tmp_noheader_file, "w")
    retcode1 = subprocess.call(["sed", "1,1d", args.bedgraph_file], stdout=outf)
    outf.close()
    # convert to bigwig
    retcode2 = subprocess.call(["bedGraphToBigWig", tmp_noheader_file, args.chrom_sizes_file, bigwig_file])
    # cleanup
    os.remove(tmp_noheader_file)
    if (retcode1 != 0) or (retcode2 != 0):
        return 1
    return 0
コード例 #6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--baseurl", dest="baseurl", default=None)
    parser.add_argument("output_dir")
    parser.add_argument("chrom_sizes_file")
    args = parser.parse_args()
    if not check_executable("bedToBigBed"):
        parser.error("bedToBigBed binary not found in PATH")
    if not check_executable("bedGraphToBigWig"):
        parser.error("'bedGraphToBigWig' executable not found in PATH")
    if not os.path.exists(args.chrom_sizes_file):
        parser.error("chrom sizes file %s not found" % (args.chrom_sizes_file))
    if not os.path.exists(args.output_dir):
        parser.error("output dir %s not found" % (args.output_dir))
    # input files
    prefix = os.path.dirname(args.output_dir)
    output_dir = os.path.abspath(args.output_dir)
    bed_file = os.path.join(output_dir, "assembly.bed")
    bed_track_file = bed_file + ".ucsc_track"
    bedgraph_files = [
        os.path.join(output_dir, "assembly_none.bedgraph"),
        os.path.join(output_dir, "assembly_neg.bedgraph"),
        os.path.join(output_dir, "assembly_pos.bedgraph")
    ]
    bedgraph_track_files = [x + ".ucsc_track" for x in bedgraph_files]
    if not os.path.exists(bed_file):
        parser.error("BED file %s not found" % (bed_file))
    if not os.path.exists(bed_track_file):
        parser.error("BED track file %s not found" % (bed_track_file))
    for i in xrange(len(bedgraph_files)):
        if not os.path.exists(bedgraph_files[i]):
            parser.error("Bedgraph file %s not found" % (bedgraph_files[i]))
        if not os.path.exists(bedgraph_track_files[i]):
            parser.error("Bedgraph track file %s not found" %
                         (bedgraph_track_files[i]))
    # convert to bigbed
    bigbed_file = os.path.join(output_dir, "assembly.bb")
    retcode = subprocess.call(
        ["bedToBigBed", bed_file, args.chrom_sizes_file, bigbed_file])
    if retcode != 0:
        print >> sys.stderr, "bedToBigBed ERROR"
        return 1
    # print track lines
    f = open(bed_track_file)
    fields = f.next().strip().split()
    f.close()
    track_options = ["track"]
    has_type = False
    for i, field in enumerate(fields):
        if field.startswith("type"):
            fields[i] = "type=bigBed"
            has_type = True
            break
    if not has_type:
        track_options.append("type=bigBed")
    track_options.extend(fields[1:])
    if args.baseurl is not None:
        track_options.append('bigDataUrl="%s%s"' %
                             (args.baseurl, os.path.abspath(bigbed_file)))
    track_line = ' '.join(track_options)
    print track_line
    # convert to bigwig
    for bedgraph_file in bedgraph_files:
        bwfile = os.path.splitext(bedgraph_file)[0] + ".bw"
        retcode = subprocess.call(
            ["bedGraphToBigWig", bedgraph_file, args.chrom_sizes_file, bwfile])
        if retcode != 0:
            print >> sys.stderr, "bedGraphToBigWig ERROR"
            return 1
        track_file = bedgraph_file + ".ucsc_track"
        f = open(track_file)
        fields = f.next().strip().split()
        f.close()
        track_options = ["track"]
        for field in fields[1:]:
            k, v = field.split("=")
            if k == "type":
                track_options.append("type=bigWig")
            else:
                track_options.append(field)
        if args.baseurl is not None:
            track_options.append('bigDataUrl="%s%s"' %
                                 (args.baseurl, os.path.abspath(bwfile)))
        track_line = ' '.join(track_options)
        print track_line
    return 0
コード例 #7
0
def main():
    multiprocessing.freeze_support()
    # parse command line
    parser = argparse.ArgumentParser()
    parser.add_argument("-v",
                        "--verbose",
                        action="store_true",
                        dest="verbose",
                        default=False)
    parser.add_argument("--bufsize",
                        dest="bufsize",
                        type=int,
                        default=(1 << 30),
                        help="Size of buffer when splitting GTF file")
    parser.add_argument("-p",
                        "--num-processors",
                        type=int,
                        dest="num_processors",
                        default=1)
    parser.add_argument("run_dir")
    args = parser.parse_args()
    # check command line parameters
    if not os.path.exists(args.run_dir):
        parser.error("Run directory %s not found" % (args.run_dir))
    # check command line parameters
    if not os.path.exists(CLASSIFY_R_SCRIPT):
        parser.error("Classification R script not found")
    if not check_executable("Rscript"):
        parser.error("Rscript binary not found")
    num_processors = max(1, args.num_processors)
    # set logging level
    if args.verbose:
        level = logging.DEBUG
    else:
        level = logging.INFO
    logging.basicConfig(
        level=level,
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
    logging.info("AssemblyLine %s" % (assemblyline.__version__))
    logging.info("----------------------------------")
    # show parameters
    logging.info("Parameters:")
    logging.info("run directory:    %s" % (args.run_dir))
    logging.info("num processors:   %d" % (args.num_processors))
    logging.info("buffer size:      %d" % (args.bufsize))
    logging.info("verbose logging:  %s" % (args.verbose))
    logging.info("----------------------------------")
    # setup results
    results = config.AssemblylineResults(args.run_dir)
    if not os.path.exists(results.classify_dir):
        os.makedirs(results.classify_dir)
    # split gtf file
    split_gtf_file(results.annotated_transcripts_gtf_file,
                   results.classify_dir, results.ref_gtf_file,
                   results.category_stats_file, args.bufsize)
    # run classification
    retcode = classify_transcripts(results, num_processors)
    if retcode != 0:
        logging.error("ERROR")
        return retcode
    # merge results
    retcode = merge_transcripts(results)
    logging.info("Done")
    return retcode
コード例 #8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--baseurl", dest="baseurl", default=None) 
    parser.add_argument("output_dir")
    parser.add_argument("chrom_sizes_file")
    args = parser.parse_args()
    if not check_executable("bedToBigBed"):
        parser.error("bedToBigBed binary not found in PATH")
    if not check_executable("bedGraphToBigWig"):
        parser.error("'bedGraphToBigWig' executable not found in PATH")
    if not os.path.exists(args.chrom_sizes_file):
        parser.error("chrom sizes file %s not found" % (args.chrom_sizes_file))
    if not os.path.exists(args.output_dir):
        parser.error("output dir %s not found" % (args.output_dir))
    # input files
    prefix = os.path.dirname(args.output_dir)
    output_dir = os.path.abspath(args.output_dir)
    bed_file = os.path.join(output_dir, "assembly.bed")
    bed_track_file = bed_file + ".ucsc_track"
    bedgraph_files = [os.path.join(output_dir, "assembly_none.bedgraph"),
                      os.path.join(output_dir, "assembly_neg.bedgraph"),
                      os.path.join(output_dir, "assembly_pos.bedgraph")]
    bedgraph_track_files = [x + ".ucsc_track" for x in bedgraph_files]
    if not os.path.exists(bed_file):
        parser.error("BED file %s not found" % (bed_file))
    if not os.path.exists(bed_track_file):
        parser.error("BED track file %s not found" % (bed_track_file))
    for i in xrange(len(bedgraph_files)):
        if not os.path.exists(bedgraph_files[i]):
            parser.error("Bedgraph file %s not found" % (bedgraph_files[i]))
        if not os.path.exists(bedgraph_track_files[i]):
            parser.error("Bedgraph track file %s not found" % (bedgraph_track_files[i]))
    # convert to bigbed
    bigbed_file = os.path.join(output_dir, "assembly.bb")
    retcode = subprocess.call(["bedToBigBed", bed_file, args.chrom_sizes_file, bigbed_file])
    if retcode != 0:
        print >>sys.stderr, "bedToBigBed ERROR"
        return 1
    # print track lines
    f = open(bed_track_file)
    fields = f.next().strip().split()
    f.close()
    track_options = ["track"]
    has_type = False
    for i,field in enumerate(fields):
        if field.startswith("type"):
            fields[i] = "type=bigBed"
            has_type = True
            break
    if not has_type:
        track_options.append("type=bigBed")
    track_options.extend(fields[1:])
    if args.baseurl is not None:
        track_options.append('bigDataUrl="%s%s"' % (args.baseurl, os.path.abspath(bigbed_file)))
    track_line = ' '.join(track_options)
    print track_line
    # convert to bigwig
    for bedgraph_file in bedgraph_files:
        bwfile = os.path.splitext(bedgraph_file)[0] + ".bw"
        retcode = subprocess.call(["bedGraphToBigWig", bedgraph_file, args.chrom_sizes_file, bwfile])
        if retcode != 0:
            print >>sys.stderr, "bedGraphToBigWig ERROR"
            return 1
        track_file = bedgraph_file + ".ucsc_track"
        f = open(track_file)
        fields = f.next().strip().split()
        f.close()
        track_options = ["track"]
        for field in fields[1:]:
            k,v = field.split("=")
            if k == "type":
                track_options.append("type=bigWig")
            else:
                track_options.append(field)
        if args.baseurl is not None:
            track_options.append('bigDataUrl="%s%s"' % (args.baseurl, os.path.abspath(bwfile)))
        track_line = ' '.join(track_options)
        print track_line        
    return 0