Beispiel #1
0
def call_samtools(file, tempdir):
    '''
	run samtools to generate sorted BAM files.
	'''
    logging.info("Running Samtools sort...")
    bam_path = file[:-3] + "bam"
    cmd = ("samtools view -Sb %s | samtools sort -@ 4 -O bam -T %s - > %s" % \
     (file, tempdir, bam_path))
    r, o, e = exe(cmd)
    if r != 0:
        logging.error("Samtools sort failed!")
        logging.error("RETCODE %d" % (r))
        logging.error("STDOUT %s" % (str(o)))
        logging.error("STDERR %s" % (str(e)))
        logging.error("Exiting")
        exit(r)
    logging.info("Finished Samtools sort.")

    logging.info("Running Samtools index...")
    cmd = ("samtools index %s" % (bam_path))
    r, o, e = exe(cmd)
    if r != 0:
        logging.error("Samtools index failed!")
        logging.error("RETCODE %d" % (r))
        logging.error("STDOUT %s" % (str(o)))
        logging.error("STDERR %s" % (str(e)))
        logging.error("Exiting")
        exit(r)
    logging.info("Finished Samtools index.")
    return bam_path
Beispiel #2
0
def check_bai(file, tempdir):
    '''
	check the index of a BAM file.
	'''
    if os.path.exists(file + ".bai"):
        logging.info("The bam file is legal.")
        return ""
    else:
        logging.info("The bam.bai is missed.")
        logging.info("Running Samtools sort...")
        bam_path = file[:-3] + "sorted.bam"
        cmd = ("samtools sort -@ 4 -O bam -T %s -o %s %s" % (tempdir, \
         bam_path, file))
        r, o, e = exe(cmd)
        if r != 0:
            logging.error("Samtools sort failed!")
            logging.error("RETCODE %d" % (r))
            logging.error("STDOUT %s" % (str(o)))
            logging.error("STDERR %s" % (str(e)))
            logging.error("Exiting")
            exit(r)
        logging.info("Finished Samtools sort.")

        logging.info("Running Samtools index...")
        cmd = ("samtools index %s" % (bam_path))
        r, o, e = exe(cmd)
        if r != 0:
            logging.error("Samtools index failed!")
            logging.error("RETCODE %d" % (r))
            logging.error("STDOUT %s" % (str(o)))
            logging.error("STDERR %s" % (str(e)))
            logging.error("Exiting")
            exit(r)
        logging.info("Finished Samtools index.")
        return bam_path
Beispiel #3
0
def load_sam_multi_processes(args):
    '''
	task scheduling
	'''
    temporary_dir = args.temp_dir if args.temp_dir.endswith('/') else \
    "%s/"%(args.temp_dir)
    os.mkdir("%ssignatures" % temporary_dir)
    # Major Steps:
    # loading alignment file: bam format
    samfile = pysam.AlignmentFile(args.input)
    # loading reference genome
    Ref = load_ref(args.Reference)
    global_ref.append(Ref)
    # acquire the total numbers of the ref contigs
    contig_num = len(samfile.get_index_statistics())
    logging.info("The total number of chromsomes: %d" % (contig_num))
    # Thread scheduling
    process_list = list()
    for i in samfile.get_index_statistics():
        process_list.append([i[0], i[3]])
        # #chr #read
    process_list = sorted(process_list, key=lambda x: -x[1])
    # start to establish multiprocesses
    analysis_pools = Pool(processes=args.threads)
    # Acquire_Chr_name
    for i in process_list:
        para = [(temporary_dir, i[0], args.input, args.min_distance, \
         args.min_support, args.min_length)]
        analysis_pools.map_async(multi_run_wrapper, para)
    analysis_pools.close()
    analysis_pools.join()
    samfile.close()

    output_p = args.output_dir if args.output_dir.endswith('/') else \
    "%s/"%(args.output_dir)
    if not os.path.exists(output_p):
        os.mkdir(output_p)
    merge_cmd = ("cat %ssignatures/* > %spotential_ME.fa" %
                 (temporary_dir, output_p))
    r, o, e = exe(merge_cmd)
    if r != 0:
        logging.error("Merging ME signatures failed!")
        logging.error("RETCODE %d" % (r))
        logging.error("STDOUT %s" % (str(o)))
        logging.error("STDERR %s" % (str(e)))
        logging.error("Exiting")
        exit(r)
    logging.info("Cleaning temporary files.")
    cmd_remove_tempfile = ("rm -r %ssignatures" % (temporary_dir))
    r, o, e = exe(cmd_remove_tempfile)
    if r != 0:
        logging.error("Cleaning temporary files failed!")
        logging.error("RETCODE %d" % (r))
        logging.error("STDOUT %s" % (str(o)))
        logging.error("STDERR %s" % (str(e)))
        logging.error("Exiting")
        exit(r)
Beispiel #4
0
def call_ngmlr(inFile, ref, presets, nproc, outFile):
    """
	run ngmlr to generate alignments
	"""
    outFile = outFile + "map.sam"
    logging.info("Running NGMLR...")
    cmd = ("ngmlr -r %s -q %s -o %s -t %d -x %s" % (ref, inFile, outFile, \
     nproc, presets))
    r, o, e = exe(cmd)

    if r != 0:
        logging.error("NGMLR mapping failed!")
        logging.error("RETCODE %d" % (r))
        logging.error("STDOUT %s" % (str(o)))
        logging.error("STDERR %s" % (str(e)))
        logging.error("Exiting")
        exit(r)
    logging.info("Finished NGMLR mapping.")
    return outFile
Beispiel #5
0
def call_ngmlr(inFile, ref, presets, nproc, outFile, SUBREAD_LENGTH,
               SUBREAD_CORRIDOR):
    """
	fq = input file
	automatically search for .sa
	"""
    outFile = outFile + "cluster.sam"
    logging.info("Running NGMLR...")
    cmd = ("ngmlr -r %s -q %s -o %s -t %d -x %s --subread-length %d --subread-corridor %d" \
     % (ref, inFile, outFile, nproc, presets, SUBREAD_LENGTH, SUBREAD_CORRIDOR))
    r, o, e = exe(cmd)
    if r != 0:
        logging.error("NGMLR mapping failed!")
        logging.error("RETCODE %d" % (r))
        logging.error("STDOUT %s" % (str(o)))
        logging.error("STDERR %s" % (str(e)))
        logging.error("Exiting")
        exit(r)
    logging.info("Finished NGMLR mapping.")
    return outFile