if not bam_input: threads = get_num_threads(config.getint("mapping", "threads")) if VERBOSE: print("\nmapping with %s threads..." % threads) for (i, sample), outbam in zip(enumerate(args.input), bam_paths): if VERBOSE: print( "\n", ht.now(), "Mapping %s to %s reference..." % (os.path.basename(sample), ref_type.upper())) subprocess.call(MAPPING_CMD % (threads, outbam, MAPPING_REF[ref_type], sample), shell=True) # sam-to-hdf5 table, features = ht.load_hdf(ALLELE_HDF, False, 'table', 'features') if VERBOSE: print("\n", ht.now(), "Generating binary hit matrix.") if is_paired: # combine matrices for paired-end mapping pos, read_details = ht.pysam_to_hdf(bam_paths[0]) binary1 = np.sign(pos) # dtype=np.uint16 pos2, read_details2 = ht.pysam_to_hdf(bam_paths[1]) binary2 = np.sign(pos2) # dtype=np.uint16 if not bam_input and config.getboolean('behavior', 'deletebam'): os.remove(bam_paths[0]) os.remove(bam_paths[1])
ref_type = "nuc" if args.rna else "gen" is_paired = len(args.input) > 1 out_csv = out_dir+"/%s_result.tsv"%date out_plot = out_dir+"/%s_coverage_plot.pdf"%date #mapping fished file to reference for i, sample in enumerate(args.input): if args.verbose: print "\n", ht.now(), "Mapping %s to %s reference..."%(os.path.basename(sample), ref_type.upper()) sample_out = out_dir+"/"+date+"_"+str(i) subprocess.call(MAPPING_CMD%(config.get("MAPPING", "THREADS"), sample_out, MAPPING_REF[ref_type], sample), shell=True) #sam-to-hd5 table, features = ht.load_hdf(ALLELE_HDF, False, 'table', 'features') if args.verbose: print "\n", ht.now(), "Generating binary hit matrix." if is_paired: #combine matrices for paired-end mapping sample_1 = out_dir+"/"+date+"_0.sam" sample_2 = out_dir+"/"+date+"_1.sam" pos, etc, desc = ht.sam_to_hdf(sample_1, verbosity=args.verbose) binary1 = pos.applymap(bool).applymap(int) pos2, etc2, desc2 = ht.sam_to_hdf(sample_2, verbosity=args.verbose) binary2 = pos2.applymap(bool).applymap(int) id1 = set(binary1.index) id2 = set(binary2.index)