def call_samtools(file, tempdir): ''' run samtools to generate sorted BAM files. ''' logging.info("Running Samtools sort...") bam_path = file[:-3] + "bam" cmd = ("samtools view -Sb %s | samtools sort -@ 4 -O bam -T %s - > %s" % \ (file, tempdir, bam_path)) r, o, e = exe(cmd) if r != 0: logging.error("Samtools sort failed!") logging.error("RETCODE %d" % (r)) logging.error("STDOUT %s" % (str(o))) logging.error("STDERR %s" % (str(e))) logging.error("Exiting") exit(r) logging.info("Finished Samtools sort.") logging.info("Running Samtools index...") cmd = ("samtools index %s" % (bam_path)) r, o, e = exe(cmd) if r != 0: logging.error("Samtools index failed!") logging.error("RETCODE %d" % (r)) logging.error("STDOUT %s" % (str(o))) logging.error("STDERR %s" % (str(e))) logging.error("Exiting") exit(r) logging.info("Finished Samtools index.") return bam_path
def check_bai(file, tempdir): ''' check the index of a BAM file. ''' if os.path.exists(file + ".bai"): logging.info("The bam file is legal.") return "" else: logging.info("The bam.bai is missed.") logging.info("Running Samtools sort...") bam_path = file[:-3] + "sorted.bam" cmd = ("samtools sort -@ 4 -O bam -T %s -o %s %s" % (tempdir, \ bam_path, file)) r, o, e = exe(cmd) if r != 0: logging.error("Samtools sort failed!") logging.error("RETCODE %d" % (r)) logging.error("STDOUT %s" % (str(o))) logging.error("STDERR %s" % (str(e))) logging.error("Exiting") exit(r) logging.info("Finished Samtools sort.") logging.info("Running Samtools index...") cmd = ("samtools index %s" % (bam_path)) r, o, e = exe(cmd) if r != 0: logging.error("Samtools index failed!") logging.error("RETCODE %d" % (r)) logging.error("STDOUT %s" % (str(o))) logging.error("STDERR %s" % (str(e))) logging.error("Exiting") exit(r) logging.info("Finished Samtools index.") return bam_path
def load_sam_multi_processes(args): ''' task scheduling ''' temporary_dir = args.temp_dir if args.temp_dir.endswith('/') else \ "%s/"%(args.temp_dir) os.mkdir("%ssignatures" % temporary_dir) # Major Steps: # loading alignment file: bam format samfile = pysam.AlignmentFile(args.input) # loading reference genome Ref = load_ref(args.Reference) global_ref.append(Ref) # acquire the total numbers of the ref contigs contig_num = len(samfile.get_index_statistics()) logging.info("The total number of chromsomes: %d" % (contig_num)) # Thread scheduling process_list = list() for i in samfile.get_index_statistics(): process_list.append([i[0], i[3]]) # #chr #read process_list = sorted(process_list, key=lambda x: -x[1]) # start to establish multiprocesses analysis_pools = Pool(processes=args.threads) # Acquire_Chr_name for i in process_list: para = [(temporary_dir, i[0], args.input, args.min_distance, \ args.min_support, args.min_length)] analysis_pools.map_async(multi_run_wrapper, para) analysis_pools.close() analysis_pools.join() samfile.close() output_p = args.output_dir if args.output_dir.endswith('/') else \ "%s/"%(args.output_dir) if not os.path.exists(output_p): os.mkdir(output_p) merge_cmd = ("cat %ssignatures/* > %spotential_ME.fa" % (temporary_dir, output_p)) r, o, e = exe(merge_cmd) if r != 0: logging.error("Merging ME signatures failed!") logging.error("RETCODE %d" % (r)) logging.error("STDOUT %s" % (str(o))) logging.error("STDERR %s" % (str(e))) logging.error("Exiting") exit(r) logging.info("Cleaning temporary files.") cmd_remove_tempfile = ("rm -r %ssignatures" % (temporary_dir)) r, o, e = exe(cmd_remove_tempfile) if r != 0: logging.error("Cleaning temporary files failed!") logging.error("RETCODE %d" % (r)) logging.error("STDOUT %s" % (str(o))) logging.error("STDERR %s" % (str(e))) logging.error("Exiting") exit(r)
def call_ngmlr(inFile, ref, presets, nproc, outFile): """ run ngmlr to generate alignments """ outFile = outFile + "map.sam" logging.info("Running NGMLR...") cmd = ("ngmlr -r %s -q %s -o %s -t %d -x %s" % (ref, inFile, outFile, \ nproc, presets)) r, o, e = exe(cmd) if r != 0: logging.error("NGMLR mapping failed!") logging.error("RETCODE %d" % (r)) logging.error("STDOUT %s" % (str(o))) logging.error("STDERR %s" % (str(e))) logging.error("Exiting") exit(r) logging.info("Finished NGMLR mapping.") return outFile
def call_ngmlr(inFile, ref, presets, nproc, outFile, SUBREAD_LENGTH, SUBREAD_CORRIDOR): """ fq = input file automatically search for .sa """ outFile = outFile + "cluster.sam" logging.info("Running NGMLR...") cmd = ("ngmlr -r %s -q %s -o %s -t %d -x %s --subread-length %d --subread-corridor %d" \ % (ref, inFile, outFile, nproc, presets, SUBREAD_LENGTH, SUBREAD_CORRIDOR)) r, o, e = exe(cmd) if r != 0: logging.error("NGMLR mapping failed!") logging.error("RETCODE %d" % (r)) logging.error("STDOUT %s" % (str(o))) logging.error("STDERR %s" % (str(e))) logging.error("Exiting") exit(r) logging.info("Finished NGMLR mapping.") return outFile