Esempio n. 1
0
def hmmpred(obs_seq, na3, forw_rerv, hmmoptions, commonOptions):
    obs_seq = obs_seq.replace('-', '')
    #obs_seq = obs_seq.replace('N', ''); obs_seq = obs_seq.replace('n', '');

    bp = getBasePair()
    len_repPat = printHMMmatrix.get_len_repPat(na3, commonOptions)

    trainsmat, startprob, emisionmat, obs_symbols, states, numStates, numSymbols, state3class, tol_info = hmmoptions
    hmmmodel = hmm.MultinomialHMM(numStates)
    hmmmodel.transmat_ = trainsmat
    hmmmodel.startprob_ = startprob
    hmmmodel.emissionprob_ = emisionmat
    hmmmodel.n_features = numSymbols

    myobs = []
    for osi in range(len(obs_seq)):
        myobs.append((np.where(obs_symbols == obs_seq[osi]))[0][0])

    logprob, predstats = hmmmodel.decode(np.array([myobs]).T,
                                         algorithm="viterbi")

    newstr, ststar, pre0 = getPred(predstats, obs_seq, state3class, len_repPat)
    if cur_M_STAT <= M_DEBUG:  #int(len(newstr)/float(len_repPat)+0.5)<14: #False: #True: #False: #int(len(newstr)/float(len_repPat)) in [8,13]:
        print 'hmmB:', obs_seq, int(len(newstr) / float(len_repPat) + 0.5)
        psstr = []
        for ps in predstats:
            psstr.append(str(ps))
        print 'hmmB:', ''.join(psstr)

    return [newstr, pre0, ststar]
Esempio n. 2
0
def getRepeatForGivenGene(commonOptions, specifiedOptions, moreOptions):
    logging.info(moreOptions['chr'] + ' ' + str(moreOptions['repeat_start_end']))
    chr = moreOptions['chr']
    repeatName = moreOptions['repeatName']
    gene_start_end = moreOptions['gene_start_end']
    repeat_start_end = moreOptions['repeat_start_end']
    repPat = moreOptions['repPat']
    forw_rerv = moreOptions['forw_rerv']

    bamfile = specifiedOptions['bamfile']
    unique_file_id = specifiedOptions['unique_file_id']
    analysis_file_id = specifiedOptions['analysis_file_id']

    isGapCorrection = commonOptions['isGapCorrection']
    repeatFlankLength = commonOptions['repeatFlankLength']
    MinSup = commonOptions['MinSup']

    len_repPat = printHMMmatrix.get_len_repPat(repPat, commonOptions)
    logging.info("len_repPat=" + str(len_repPat))
    # print repeatName,
    alignfolder = specifiedOptions['align']  # 'align/'
    if not os.path.isdir(alignfolder):
        os.system('mkdir ' + alignfolder)

    ref_repeat = (repeat_start_end[1] - repeat_start_end[0] + 1) / float(len_repPat)  # 3.0

    alignfile = alignfolder + repeatName + unique_file_id + '.alignment.txt'
    get_alg_cmd = 'samtools view ' + bamfile + ' ' + chr + ':' + \
        str(gene_start_end[0]) + '-' + str(gene_start_end[1]) + ' > ' + alignfile
    if 'thread' not in specifiedOptions:
        logging.info('Running ' + get_alg_cmd)
    os.system(get_alg_cmd)
    if os.path.getsize(alignfile) == 0:
        if commonOptions['outlog'] <= M_WARNING:
            logging.info(get_alg_cmd + '\n')
            logging.info('The file %s have zero size\nTry without chr' % alignfile)
            #print ('The file %s have zero size\nTry without chr' % alignfile)
        get_alg_cmd = 'samtools view ' + bamfile + ' ' + \
            chr[3:] + ':' + str(gene_start_end[0]) + '-' + \
            str(gene_start_end[1]) + ' > ' + alignfile
        if commonOptions['outlog'] <= M_INFO and ('thread' not in specifiedOptions):
            logging.info('Running ' + get_alg_cmd)
        os.system(get_alg_cmd)
    if commonOptions['outlog'] <= M_INFO:
        logging.info('Produced ' + alignfile + ' done!')

    if (not os.path.isfile(alignfile)) or os.path.getsize(alignfile) == 0:
        if commonOptions['outlog'] <= M_FATAL:
            logging.error('!!!!Cannot produce ' + alignfile + ' for ' + repeatName)
            # sys.exit(1)
            os.system('rm ' + alignfile)
            return None
    aligndata = myReadTxtFile(alignfile)
    os.system('rm ' + alignfile)

    repregion_len_threhold = len_repPat  # 3;

    predata, mfadata, sufdata = getGene(repeatName, chr, gene_start_end, unique_file_id,
                                        analysis_file_id, commonOptions['hgfile'], repeatFlankLength, specifiedOptions)
    rep_predata, rep_mfadata, rep_sufdata = getGene(
        repeatName, chr, repeat_start_end, unique_file_id, analysis_file_id, commonOptions['hgfile'], repeatFlankLength, specifiedOptions)

    commonOptions['rep_flanking_data'] = rep_predata, rep_sufdata

    wrongalign = 0

    hmmoptions = getHMMOptions(repeatFlankLength, repPat, forw_rerv, commonOptions)

    repeats = []
    repeats_dict = {}
    ids = []
    for line in aligndata:
        lsp = line.split('\t')
        readid = lsp[0]
        cchr = lsp[2]

        if not (chr == cchr or (len(chr) > 3 and chr[3:] == cchr) or (len(cchr) > 3 and cchr[3:] == chr)):
            continue

        pos = int(lsp[3])
        aligninfo = lsp[5]
        aainfo = lsp[9]

        if pos > repeat_start_end[0] - repeatFlankLength:
            wrongalign += 1
            # continue;
            #logging.error('The start pos in ref Genome is greater than the start position of repeats' + str(pos) +' ' + str(repeat_start_end[0]));
        if not (cchr == chr or cchr == chr[3:]):
            logging.error('Not same ' + cchr + ' ' + chr)
            continue

        numreg = re.compile('\d+')
        numinfo = numreg.findall(aligninfo)

        mdireg = re.compile('[MIDNSHPX=]{1}')
        mdiinfo = mdireg.findall(aligninfo)

        if not len(numinfo) == len(mdiinfo):
            logging.error('Num is equal to mid' + str(len(numinfo)) + ' ' + str(len(mdiinfo)))
            continue

        queryind = 0
        hpadd = 0
        queryrep = ''
        longer = False
        query_start_ind = None
        query_end_ind = None

        for n1ind in range(len(numinfo)):
            n1 = int(numinfo[n1ind])
            mdi = mdiinfo[n1ind]

            for n1i in range(n1):
                qrepadd = False
                if mdi == 'M':
                    pos = pos + 1
                    queryind = queryind + 1
                    qrepadd = True
                elif mdi == 'I':
                    qrepadd = True
                    queryind = queryind + 1
                elif mdi == 'D':
                    pos = pos + 1
                elif mdi == 'S':
                    queryind = queryind + 1
                    qrepadd = True
                elif mdi == 'H':
                    if qrepadd:
                        hpadd += 1  # pass
                elif mdi == 'P':
                    if qrepadd:
                        hpadd += 1  # pass
                else:
                    logging.warning('Warning unknow CIGAR element ' + str(n1) + ' ' + mdi)
                if qrepadd:
                    if pos - 1 >= repeat_start_end[0] - repeatFlankLength and pos - 1 <= repeat_start_end[1] + repeatFlankLength:
                        queryrep = queryrep + aainfo[queryind - 1]
                if pos - 1 < repeat_start_end[0] - repeatFlankLength:
                    query_start_ind = queryind - 1
                if pos - 1 >= repeat_start_end[1] and pos - 1 < repeat_start_end[1] + repeatFlankLength:
                    query_end_ind = queryind

            if pos - 1 > repeat_start_end[1] + repeatFlankLength:
                longer = True

        if readid not in repeats_dict:
            repeats_dict[readid] = [query_start_ind, query_end_ind, aainfo]
        else:
            if query_start_ind is not None:
                if repeats_dict[readid][0] is None or repeats_dict[readid][0] > query_start_ind:
                    repeats_dict[readid][0] = query_start_ind
            if query_end_ind is not None:
                if repeats_dict[readid][1] is None or repeats_dict[readid][1] < query_end_ind:
                    repeats_dict[readid][1] = query_end_ind
            if len(repeats_dict[readid][2]) < len(aainfo):
                repeats_dict[readid][2] = aainfo

        if len(queryrep) >= repregion_len_threhold:
            repeats.append([longer, queryrep, lsp[0]])
            ids.append(readid)

    handleint = True
    if handleint:
        repeats = []
        ids = []
        repeatskeys = repeats_dict.keys()
        for rk in repeatskeys:
            if repeats_dict[rk][0] is None or repeats_dict[rk][1] is None:
                repeats.append([False, str(repeats_dict[rk][0]) +
                                '-to-' + str(repeats_dict[rk][1]), rk])
                ids.append(rk)
            else:
                if repeats_dict[rk][1] - repeats_dict[rk][0] > 0:
                    repeats.append([True, repeats_dict[rk][2]
                                    [repeats_dict[rk][0]:(repeats_dict[rk][1] + 1)], rk])
                    ids.append(rk)
                else:
                    if commonOptions['outlog'] <= M_WARNING:
                        print('Warning!!! negative ', rk, repeats_dict[rk][:2])
                    repeats.append([False, str(repeats_dict[rk][0]) +
                                    '-to-' + str(repeats_dict[rk][1]), rk])
                    ids.append(rk)

    rptrue = []
    rpfalse = []
    orignial = []
    for currep_ind in range(len(repeats)):
        currep = repeats[currep_ind]
        newstr = currep[1]

        pre0 = 0
        predstats = ''
        if len(newstr) < commonOptions['MaxRep'] * len_repPat:
            if currep[0]:
                    # print 'BAMhandler', repeat_start_end, chr
                newstr, pre0, predstats = getUnsymAlignAndHMM(
                    repPat, forw_rerv, repeatFlankLength, hmmoptions, currep[1], commonOptions, ids[currep_ind])
            else:
                if 'thread' not in specifiedOptions:
                    logging.warning('The sequence is partial: ' + str(len(newstr)) + ' ' + chr + ' ' + repeatName + ' ' + repPat + ' ' + str(
                        currep[0]) + ' reads name:' + currep[2] + " " + str(commonOptions['MaxRep']) + " " + str(commonOptions['MaxRep'] * len_repPat))
                    if handleint:
                        logging.warning(str(repeats_dict[currep[2]][:2]))
        else:
            logging.warning('The sequence is too long: ' + str(len(newstr)) + ' ' + chr + ' ' + repeatName + ' ' + repPat + ' ' + str(
                currep[0]) + ' reads name:' + currep[2] + " " + str(commonOptions['MaxRep']) + " " + str(commonOptions['MaxRep'] * len_repPat))
            if handleint:
                logging.warning(str(repeats_dict[currep[2]][:2]))
        orignial.append([currep[1], pre0, predstats])
        currep[1] = newstr
        if currep[0]:
            rptrue.append(len(currep[1]) / float(len_repPat))  # 3.0);
        else:
            rpfalse.append(len(currep[1]) / float(len_repPat))  # 3.0);

    rptrue.sort()
    rpfalse.sort()
    trstr = 'true ' + str(len(rptrue)) + ' ['
    for rpt in rptrue:
        trstr = trstr + ('%.0f,' % rpt)
    trstr = trstr[:-1] + ']'
    logging.debug(trstr)

    p2, allocr = myGaussianMixtureModel.get2Peaks(rptrue, MinSup, commonoptions=commonOptions)

    if len(rpfalse) > 0:
        flstr = 'fals ' + str(len(rpfalse)) + ' ['
        for rpf in rpfalse:
            flstr = flstr + ('%.0f,' % rpf)
        flstr = flstr[:-1] + ']'
        logging.debug(flstr)

    logging.info('ref_repeat ' + ('%.0f' % ref_repeat) + '\t' + repPat + '\t' + forw_rerv)

    for currep_ind in range(len(repeats)):
        currep = repeats[currep_ind]

        aaprinindex = -1
        if not (currep[0]):
            aaprinindex = 300

        logging.debug('\t' + str(currep[0]) + ' o:' + str(len(orignial[currep_ind]
                                                              [0])) + '\t' + orignial[currep_ind][0][:aaprinindex]);
        prestr = ''
        # print currep_ind, orignial[currep_ind][1], orignial[currep_ind]
        for i in range(orignial[currep_ind][1]):
            prestr = prestr + ' '
        logging.debug('\t' + str(currep[0]) + ' p:' + str(len(currep[1])
                                                          ) + '\t' + prestr + (currep[1][:aaprinindex]))

    return [repeatName, ref_repeat, p2, allocr, len(rptrue), len(rpfalse) + wrongalign]
def getTransition_start_emission_prob_x(repPat, commonOptions, forprint=False):
	repPat = string.strip(repPat);
	if (commonOptions['CompRep']=='0' and len(repPat)<1) and (len(commonOptions['CompRep'])<1 and (not commonOptions['CompRep']=='0')): return None

	len_repPat = printHMMmatrix.get_len_repPat(repPat, commonOptions)
	if commonOptions['CompRep']=='0': CompRepPat = printHMMmatrix.getCompRepFromSimple(repPat)
	else: CompRepPat = commonOptions['CompRep']

	tol_info = produce_tolerate_mismatch(repPat, commonOptions)
	if commonOptions['outlog'] <= M_INFO: print 'tol_info', tol_info
	logging.info('tol_info=' + str(tol_info))

	avgsub = 0.0005
	avgsub = hmm_random_rep_transit/len_repPat
	avgsub = 1e-9
	
	typeOfRepEle = ['', 'I', 'D'];
	repEle = [];
	for rp_ind in range(len_repPat):
		repEle.append(''.join(['r', str(rp_ind+1)]));
	states = ['N'];
	for typRE in typeOfRepEle:
		for rp in repEle:
			states.append(''.join([typRE, rp]));

	if commonOptions.has_key('transitionm') and (not commonOptions['transitionm']==None):
		trainsmat = commonOptions['transitionm']
	else:
		trainsmat = np.full((len(states), len(states)), 1e-9);
		#for N to N
		trainsmat[0][0] = 0.96;
		#for N to rep;
		if not len_repPat<2:
			trainsmat[0][1] = 0.02;
		else: trainsmat[0][1] = 0.04;
		if not len_repPat<2:
			trainsmat[0][1+len(repEle)*2] = 0.02;
		#for rep to N;
		trainsmat[len(repEle)][0] = 0.02;
		trainsmat[len(repEle)*2][0] = 0.02;
		if not len_repPat<2:
			trainsmat[len(repEle)*3-1][0] = 0.02;
		#avgsub
		for i in range(1, len(states)):
			for j in range(len(repEle)):
				trainsmat[i][j+1] = avgsub
		#for insertion
		add_index = len(repEle)+1;
		for typ_ind in range(len(typeOfRepEle)):
			for j in range(len(repEle)):
				if typ_ind<len(typeOfRepEle)-1:
					jind = j
				else:
					jind = j+1;
					if jind > len(repEle)-1:
						jind = 0;
				trainsmat[len(repEle)*typ_ind+j+1][jind+add_index] = commonOptions['hmm_insert_rate'] #0.11
		#for deletion
		add_index = len(repEle)*2+1;
		for typ_ind in range(len(typeOfRepEle)):
			for j in range(len(repEle)):
				for k in range(1, len(repEle)):
					if typ_ind<len(typeOfRepEle)-1:
						jind = j+k
					else:
						if k>=len(repEle)-1: continue
						jind = j+k+1
					if jind > len(repEle)-1: jind -= len(repEle);
					trainsmat[len(repEle)*typ_ind+j+1][jind+add_index] = commonOptions['hmm_del_rate']**k # 
					if trainsmat[len(repEle)*typ_ind+j+1][jind+add_index]<1e-9: 
						trainsmat[len(repEle)*typ_ind+j+1][jind+add_index] = 1e-9
		#for between rep
		add_index = 1;
		for typ_ind in range(len(typeOfRepEle)):
			for j in range(len(repEle)):
				if typ_ind<len(typeOfRepEle)-1:
					jind = j+1;
					if jind > len(repEle)-1:
						jind = 0;
				else:
					jind = j+2
					if jind > len(repEle)-1:
						jind -= len(repEle)
				restprob = 1;
				for jstat in range(len(states)):
					if jstat==jind+add_index: pass
					else: restprob -= trainsmat[len(repEle)*typ_ind+j+1][jstat];
				trainsmat[len(repEle)*typ_ind+j+1][jind+add_index] = restprob
	
	startprob = []
	for i in range(len(states)):
		startprob.append(1e-9)
	startprob[0] = 0.96;
	if len_repPat<2:
		startprob[1] = 0.04
	else:
		startprob[1] = 0.02
		startprob[1+len(repEle)*2] = 0.02
	startprob = np.array(startprob)

	if commonOptions.has_key('emissionm') and (not commonOptions['emissionm']==None):
		emisionmat = commonOptions['emissionm']
	else:
		#emisionmat = np.full((len(repEle)*len(typeOfRepEle)+1, 4), commonOptions['hmm_sub_rate']/4)
		emisionmat = np.full((len(repEle)*len(typeOfRepEle)+1, 5), commonOptions['hmm_sub_rate']/4)
		randrow = [0]
		for j in range(len(repEle)):
			randrow.append(j+len(repEle)+1);
		if len_repPat<2: randrow.append(len(repEle)*len(typeOfRepEle))
		#print randrow
		for rdr in randrow:
			for jcol in range(4):
				#emisionmat[rdr][jcol] = 0.25;
				if not rdr==0:
					emisionmat[rdr][jcol] = 0.25;
				else: emisionmat[rdr][jcol] = 0.2
		for nset in range(len(repEle)*len(typeOfRepEle)+1):
			if not nset==0: emisionmat[nset][4] = 1e-9
			else: emisionmat[nset][4] = 0.2
	
		obs_symbols = np.array(['A', 'C', 'G', 'T', 'N'])
		for naind in range(len_repPat):
			CompRepPatkeys1 = CompRepPat[naind].keys();
			for k1 in CompRepPatkeys1:
				emind = (np.where(obs_symbols==k1))[0][0]
				emisionmat[naind+1][emind] += (1-commonOptions['hmm_sub_rate'])*CompRepPat[naind][k1]
			if len_repPat<2: continue;
			if naind<len_repPat-1:
				afterd = naind + 1;
			else:
				afterd = 0;
			CompRepPatkeys2 = CompRepPat[afterd].keys();
			for k2 in CompRepPatkeys2:
				emind = (np.where(obs_symbols==k2))[0][0]
				emisionmat[naind+1+len_repPat*2][emind] += (1-commonOptions['hmm_sub_rate'])*CompRepPat[afterd][k2]

	if forprint:
		if commonOptions['outlog'] <= M_INFO: 
			print 'HMMmatrix1'
			printHMMmatrix.printHMMmatrix(states, obs_symbols, trainsmat, emisionmat, startprob)

	state3class = [range(1, len_repPat+1), range(len_repPat+1, 2*len_repPat+1), range(2*len_repPat+1, 3*len_repPat+1)]

	#           0         1           2           3            4                  5             6               7          8   
	return [trainsmat, startprob, emisionmat, obs_symbols, np.array(states), len(states), len(obs_symbols), state3class, tol_info]
Esempio n. 4
0
def getSCA3ForGivenGene(commonOptions, specifiedOptions, moreOptions):
    predres = []

    mgloc = moreOptions['mgloc']
    repeatName = moreOptions['repeatName']
    gene_start_end = moreOptions['gene_start_end']
    repeat_start_end = moreOptions['repeat_start_end']

    fastafile = specifiedOptions['fastafile']
    unique_file_id = specifiedOptions['unique_file_id']
    analysis_file_id = specifiedOptions['analysis_file_id']

    hgfile = commonOptions['hgfile']
    MinSup = commonOptions['MinSup']

    repPat = moreOptions['repPat']

    myHMM.produce_for_repPat(commonOptions, moreOptions)
    len_repPat = printHMMmatrix.get_len_repPat(repPat, commonOptions)
    logging.info("len_repPat=" + str(len_repPat))
    repPat = moreOptions['repPat']

    upstreamstr, repregion, downstreamstr = get3part(
        mgloc, gene_start_end, repeat_start_end, repeatName, unique_file_id,
        analysis_file_id, hgfile, specifiedOptions)

    if len(repregion) == 0:
        logging.error("Not repeat region! please check!!" + repeatName +
                      (' gene_location=[%d, %d], repeat_location=[%d, %d]' %
                       (gene_start_end[0], gene_start_end[1],
                        repeat_start_end[0], repeat_start_end[1])))
        sys.exit(1)

    logging.info("Test " + repeatName + (
        ' gene_location=[%d, %d], repeat_location=[%d, %d]; upstreamsize=%d, downstreamsize=%d'
        % (gene_start_end[0], gene_start_end[1], repeat_start_end[0],
           repeat_start_end[1], repeat_start_end[0] - gene_start_end[0],
           gene_start_end[1] - repeat_start_end[1])))
    logging.info("Normal/Pathogenic repeats: %s" % mgloc[5])

    orirepeat = int(len(repregion) / float(len_repPat))  #3)

    logging.info("Orignal Test read=" + '<<<' + repregion + '>>>' +
                 (" #repeat=%d; #len=%d" % (orirepeat, len(repregion))))

    bwamem_w_option = 90 * 4
    max_w_option, min_w_option = 500, 100
    if bwamem_w_option < min_w_option: bwamem_w_option = min_w_option
    if bwamem_w_option > max_w_option: bwamem_w_option = max_w_option
    bwamem_w_option = bwamem_w_option + int(
        len(upstreamstr + repregion + downstreamstr) * 0.4)
    if bwamem_w_option > max_w_option: bwamem_w_option = max_w_option

    start_time = time.time()

    bamfile = fastafile + '.bam'
    bamfile = fastafile + unique_file_id + '.bam'
    specifiedOptions['bamfile'] = bamfile

    myret = {}
    myretdetail = {}

    #cmd = 'bwa mem -k17 -w'+str(bwamem_w_option)+' -W40 -r10 -A1 -B1 -O1 -E1 -L1 -t '+mthreads+' -v 2 '+hg_reference_and_index+'/'+hgfile+' '+ fastafile +' | samtools view -S -b | samtools sort > '+bamfile
    cmd = 'bwa mem -k17 -w' + str(
        bwamem_w_option
    ) + ' -W40 -r10 -A1 -B1 -O1 -E1 -L1 -t ' + mthreads + ' -v 2 ' + hgfile + ' ' + fastafile + ' | samtools view -S -b | samtools sort > ' + bamfile
    logging.info(cmd)
    os.system(cmd)

    cmd = 'samtools index ' + bamfile
    logging.info(cmd)
    os.system(cmd)

    if (commonOptions['SplitAndReAlign'] in [0, 2]) or testall:
        start_time = time.time()
        if commonOptions['outlog'] <= M_INFO and (
                not specifiedOptions.has_key('thread')):
            print 'p2bamhmm start'
            sys.stdout.flush()
        p2bamhmm = myBAMhandler.getRepeatForGivenGene(commonOptions,
                                                      specifiedOptions,
                                                      moreOptions)
        memres = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024
        if p2bamhmm == None:
            print 'ERROR None detection', moreOptions['repeatName'], mgloc
            logging.error('ERROR None detection: ' +
                          str(moreOptions['repeatName']) + ' ' + str(mgloc))
        else:
            myBAMhandler.addSumForAGene(p2bamhmm, myret, myretdetail,
                                        'p2bamhmm', 2)
        end_time = time.time()
        if commonOptions['outlog'] <= M_WARNING and (
                not specifiedOptions.has_key('thread')):
            print('p2bamhmm end---running time%.0f mem%d' %
                  (end_time - start_time, memres))
            sys.stdout.flush()

    if (commonOptions['SplitAndReAlign'] in [1, 2]) or testall:
        start_time = time.time()
        if commonOptions['outlog'] <= M_INFO and (
                not specifiedOptions.has_key('thread')):
            print 'start p2sp'
            sys.stdout.flush()

        #moreOptions['fafqfile'] = specifiedOptions['fastafile']
        #moreOptions['fafqtype'] = 'fq'
        moreOptions['fafqfile'] = bamfile
        moreOptions['fafqtype'] = 'bam'

        p2sp = myRepeatReAlignment.getRepeatCounts(commonOptions,
                                                   specifiedOptions,
                                                   moreOptions)
        memres = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024
        if p2sp == None:
            print 'ERROR None detection (sp)', moreOptions['repeatName'], mgloc
            logging.error('ERROR None detection (sp): ' +
                          str(moreOptions['repeatName']) + ' ' + str(mgloc))
        else:
            myBAMhandler.addSumForAGene(p2sp, myret, myretdetail, 'p2sp', 2)
        end_time = time.time()
        if commonOptions['outlog'] <= M_WARNING and (
                not specifiedOptions.has_key('thread')):
            print('p2sp end---running time%.0f mem%d' %
                  (end_time - start_time, memres))
            sys.stdout.flush()

    os.system('rm ' + bamfile)
    os.system('rm ' + bamfile + '.bai')

    return [myret, myretdetail]