Ejemplo n.º 1
0
    out_dir = args.outdir + date if args.outdir[
        -1] == "/" else args.outdir + "/" + date

    os.makedirs(out_dir)

    #SETUP variables and OUTPUT samples
    ref_type = "nuc" if args.rna else "gen"
    is_paired = len(args.input) > 1

    out_csv = out_dir + "/%s_result.tsv" % date
    out_plot = out_dir + "/%s_coverage_plot.pdf" % date

    #mapping fished file to reference
    for i, sample in enumerate(args.input):
        if args.verbose:
            print "\n", ht.now(), "Mapping %s to %s reference..." % (
                os.path.basename(sample), ref_type.upper())
        sample_out = out_dir + "/" + date + "_" + str(i)
        subprocess.call(MAPPING_CMD % (config.get(
            "MAPPING", "THREADS"), sample_out, MAPPING_REF[ref_type], sample),
                        shell=True)

    #sam-to-hd5
    table, features = ht.load_hdf(ALLELE_HDF, False, 'table', 'features')
    if args.verbose:
        print "\n", ht.now(), "Generating binary hit matrix."

    if is_paired:
        #combine matrices for paired-end mapping
        sample_1 = out_dir + "/" + date + "_0.sam"
        sample_2 = out_dir + "/" + date + "_1.sam"
Ejemplo n.º 2
0
    # SETUP variables and OUTPUT samples
    ref_type = "nuc" if args.rna else "gen"
    is_paired = len(args.input) > 1

    out_csv = os.path.join(out_dir, ("%s_result.tsv" % date))
    out_plot = os.path.join(out_dir, ("%s_coverage_plot.pdf" % date))

    # mapping fished file to reference
    if not bam_input:
        threads = get_num_threads(config.getint("mapping", "threads"))
        if VERBOSE:
            print("\nmapping with %s threads..." % threads)
        for (i, sample), outbam in zip(enumerate(args.input), bam_paths):
            if VERBOSE:
                print(
                    "\n", ht.now(), "Mapping %s to %s reference..." %
                    (os.path.basename(sample), ref_type.upper()))

            subprocess.call(MAPPING_CMD %
                            (threads, outbam, MAPPING_REF[ref_type], sample),
                            shell=True)

    # sam-to-hdf5
    table, features = ht.load_hdf(ALLELE_HDF, False, 'table', 'features')
    if VERBOSE:
        print("\n", ht.now(), "Generating binary hit matrix.")

    if is_paired:
        # combine matrices for paired-end mapping
        pos, read_details = ht.pysam_to_hdf(bam_paths[0])
        binary1 = np.sign(pos)  # dtype=np.uint16
Ejemplo n.º 3
0
    date = datetime.datetime.fromtimestamp(time.time()).strftime('%Y_%m_%d_%H_%M_%S')
    out_dir = args.outdir+date if args.outdir[-1] == "/" else args.outdir+"/"+date

    os.makedirs(out_dir)

    #SETUP variables and OUTPUT samples
    ref_type = "nuc" if args.rna else "gen"
    is_paired = len(args.input) > 1
    
    out_csv = out_dir+"/%s_result.tsv"%date
    out_plot = out_dir+"/%s_coverage_plot.pdf"%date

    #mapping fished file to reference
    for i, sample in enumerate(args.input):
        if args.verbose:
            print "\n", ht.now(), "Mapping %s to %s reference..."%(os.path.basename(sample), ref_type.upper())
        sample_out = out_dir+"/"+date+"_"+str(i)
        subprocess.call(MAPPING_CMD%(config.get("MAPPING", "THREADS"), sample_out,
                                     MAPPING_REF[ref_type], sample), shell=True)

    #sam-to-hd5
    table, features = ht.load_hdf(ALLELE_HDF, False, 'table', 'features')
    if args.verbose:
        print "\n", ht.now(), "Generating binary hit matrix."

    if is_paired:
        #combine matrices for paired-end mapping
        sample_1 = out_dir+"/"+date+"_0.sam"
        sample_2 = out_dir+"/"+date+"_1.sam"
        pos, etc, desc = ht.sam_to_hdf(sample_1, verbosity=args.verbose)
        binary1 = pos.applymap(bool).applymap(int)