def getRepeatForKnownGene(commonOptions, specifiedOptions, moreOptions={}): if "repeatName" in moreOptions: repeatName = moreOptions['repeatName'].lower() # repeatName.lower() else: repeatName = commonOptions['repeatName'].lower() moreOptions['repeatName'] = repeatName retoptions = get_gLoc(repeatName, commonOptions) mgloc = retoptions['mgloc'] if commonOptions['outlog'] <= M_INFO: print('mgloc', mgloc) moreOptions.update(retoptions) myHMM.produce_for_repPat(commonOptions, moreOptions) if specifiedOptions["SepbamfileTemp"] is not None: specifiedOptions["bamfile"] = (specifiedOptions["SepbamfileTemp"] % moreOptions['chr'][3:]) myret = {} myretdetail = {} if (commonOptions['SplitAndReAlign'] in [0, 2]) or testall: start_time = time.time() if commonOptions['outlog'] <= M_INFO and 'thread' not in specifiedOptions: print('p2bamhmm start') p2bamhmm = getRepeatForGivenGene(commonOptions, specifiedOptions, moreOptions) memres = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024 if p2bamhmm is None: print('ERROR None detection', moreOptions['repeatName'], mgloc) logging.error('ERROR None detection: ' + str(moreOptions['repeatName']) + ' ' + str(mgloc)) else: addSumForAGene(p2bamhmm, myret, myretdetail, 'p2bamhmm', 2) end_time = time.time() if commonOptions['outlog'] <= M_WARNING and 'thread' not in specifiedOptions: print('p2bamhmm end---running time%.0f mem%d' % (end_time - start_time, memres)) sys.stdout.flush() if (commonOptions['SplitAndReAlign'] in [1, 2]) or testall: start_time = time.time() if commonOptions['outlog'] <= M_INFO and 'thread' not in specifiedOptions: print('p2sp start') moreOptions['fafqfile'] = specifiedOptions["bamfile"] moreOptions['fafqtype'] = 'bam' p2sp = myRepeatReAlignment.getRepeatCounts(commonOptions, specifiedOptions, moreOptions) memres = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024 if p2sp is None: print('ERROR None detection (sp)', moreOptions['repeatName'], mgloc) logging.error('ERROR None detection (sp): ' + str(moreOptions['repeatName']) + ' ' + str(mgloc)) else: addSumForAGene(p2sp, myret, myretdetail, 'p2sp', 2) end_time = time.time() if commonOptions['outlog'] <= M_WARNING and 'thread' not in specifiedOptions: print('p2sp end---running time%.0f mem%d' % (end_time - start_time, memres)) sys.stdout.flush() return [myret, myretdetail]
def detectRepCounts(commonOptions, specifiedOptions, moreOptions): retoptions = myBAMhandler.get_Loc1(moreOptions['mgloc'], commonOptions) if commonOptions['outlog'] <= M_INFO: print 'mgloc', moreOptions['mgloc'] #print moreOptions['mgloc'][:5] moreOptions.update(retoptions) myHMM.produce_for_repPat(commonOptions, moreOptions) if not specifiedOptions["SepbamfileTemp"]==None: specifiedOptions["bamfile"] = (specifiedOptions["SepbamfileTemp"] % moreOptions['chr'][3:]) if (commonOptions['SplitAndReAlign'] in [0,2]) or testall: start_time = time.time(); if commonOptions['outlog'] <= M_INFO and (not specifiedOptions.has_key('thread')): print 'p2bamhmm start' p2bamhmm = myBAMhandler.getRepeatForGivenGene(commonOptions, specifiedOptions, moreOptions) memres = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024 if p2bamhmm==None: print 'ERROR None detection', moreOptions['repeatName'], moreOptions['mgloc'] logging.error('ERROR None detection: ' + str( moreOptions['repeatName']) + ' ' + str(moreOptions['mgloc'])) else: addSumForAGene(p2bamhmm, 'p2bamhmm', 2, commonOptions, specifiedOptions, moreOptions) end_time = time.time(); if commonOptions['outlog'] <= M_WARNING and (not specifiedOptions.has_key('thread')): print ('p2bamhmm end---running time%.0f mem%d' % (end_time-start_time, memres)); sys.stdout.flush() if (commonOptions['SplitAndReAlign'] in [1,2]) or testall: start_time = time.time(); if commonOptions['outlog'] <= M_INFO and (not specifiedOptions.has_key('thread')): print 'p2sp start' moreOptions['fafqfile'] = specifiedOptions["bamfile"] moreOptions['fafqtype'] = 'bam' p2sp = myRepeatReAlignment.getRepeatCounts(commonOptions, specifiedOptions, moreOptions) memres = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024 if p2sp==None: print 'ERROR None detection (sp)', moreOptions['repeatName'], moreOptions['mgloc'] logging.error('ERROR None detection (sp): ' + str( moreOptions['repeatName']) + ' ' + str(moreOptions['mgloc'])) else: addSumForAGene(p2sp, 'p2sp', 2, commonOptions, specifiedOptions, moreOptions) end_time = time.time(); if commonOptions['outlog'] <= M_WARNING and (not specifiedOptions.has_key('thread')): print ('p2sp end---running time%.0f mem%d' % (end_time-start_time, memres)); sys.stdout.flush()
def getSCA3ForGivenGene(commonOptions, specifiedOptions, moreOptions): predres = [] mgloc = moreOptions['mgloc'] repeatName = moreOptions['repeatName'] gene_start_end = moreOptions['gene_start_end'] repeat_start_end = moreOptions['repeat_start_end'] fastafile = specifiedOptions['fastafile'] unique_file_id = specifiedOptions['unique_file_id'] analysis_file_id = specifiedOptions['analysis_file_id'] hgfile = commonOptions['hgfile'] MinSup = commonOptions['MinSup'] repPat = moreOptions['repPat'] myHMM.produce_for_repPat(commonOptions, moreOptions) len_repPat = printHMMmatrix.get_len_repPat(repPat, commonOptions) logging.info("len_repPat=" + str(len_repPat)) repPat = moreOptions['repPat'] upstreamstr, repregion, downstreamstr = get3part( mgloc, gene_start_end, repeat_start_end, repeatName, unique_file_id, analysis_file_id, hgfile, specifiedOptions) if len(repregion) == 0: logging.error("Not repeat region! please check!!" + repeatName + (' gene_location=[%d, %d], repeat_location=[%d, %d]' % (gene_start_end[0], gene_start_end[1], repeat_start_end[0], repeat_start_end[1]))) sys.exit(1) logging.info("Test " + repeatName + ( ' gene_location=[%d, %d], repeat_location=[%d, %d]; upstreamsize=%d, downstreamsize=%d' % (gene_start_end[0], gene_start_end[1], repeat_start_end[0], repeat_start_end[1], repeat_start_end[0] - gene_start_end[0], gene_start_end[1] - repeat_start_end[1]))) logging.info("Normal/Pathogenic repeats: %s" % mgloc[5]) orirepeat = int(len(repregion) / float(len_repPat)) #3) logging.info("Orignal Test read=" + '<<<' + repregion + '>>>' + (" #repeat=%d; #len=%d" % (orirepeat, len(repregion)))) bwamem_w_option = 90 * 4 max_w_option, min_w_option = 500, 100 if bwamem_w_option < min_w_option: bwamem_w_option = min_w_option if bwamem_w_option > max_w_option: bwamem_w_option = max_w_option bwamem_w_option = bwamem_w_option + int( len(upstreamstr + repregion + downstreamstr) * 0.4) if bwamem_w_option > max_w_option: bwamem_w_option = max_w_option start_time = time.time() bamfile = fastafile + '.bam' bamfile = fastafile + unique_file_id + '.bam' specifiedOptions['bamfile'] = bamfile myret = {} myretdetail = {} #cmd = 'bwa mem -k17 -w'+str(bwamem_w_option)+' -W40 -r10 -A1 -B1 -O1 -E1 -L1 -t '+mthreads+' -v 2 '+hg_reference_and_index+'/'+hgfile+' '+ fastafile +' | samtools view -S -b | samtools sort > '+bamfile cmd = 'bwa mem -k17 -w' + str( bwamem_w_option ) + ' -W40 -r10 -A1 -B1 -O1 -E1 -L1 -t ' + mthreads + ' -v 2 ' + hgfile + ' ' + fastafile + ' | samtools view -S -b | samtools sort > ' + bamfile logging.info(cmd) os.system(cmd) cmd = 'samtools index ' + bamfile logging.info(cmd) os.system(cmd) if (commonOptions['SplitAndReAlign'] in [0, 2]) or testall: start_time = time.time() if commonOptions['outlog'] <= M_INFO and ( not specifiedOptions.has_key('thread')): print 'p2bamhmm start' sys.stdout.flush() p2bamhmm = myBAMhandler.getRepeatForGivenGene(commonOptions, specifiedOptions, moreOptions) memres = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024 if p2bamhmm == None: print 'ERROR None detection', moreOptions['repeatName'], mgloc logging.error('ERROR None detection: ' + str(moreOptions['repeatName']) + ' ' + str(mgloc)) else: myBAMhandler.addSumForAGene(p2bamhmm, myret, myretdetail, 'p2bamhmm', 2) end_time = time.time() if commonOptions['outlog'] <= M_WARNING and ( not specifiedOptions.has_key('thread')): print('p2bamhmm end---running time%.0f mem%d' % (end_time - start_time, memres)) sys.stdout.flush() if (commonOptions['SplitAndReAlign'] in [1, 2]) or testall: start_time = time.time() if commonOptions['outlog'] <= M_INFO and ( not specifiedOptions.has_key('thread')): print 'start p2sp' sys.stdout.flush() #moreOptions['fafqfile'] = specifiedOptions['fastafile'] #moreOptions['fafqtype'] = 'fq' moreOptions['fafqfile'] = bamfile moreOptions['fafqtype'] = 'bam' p2sp = myRepeatReAlignment.getRepeatCounts(commonOptions, specifiedOptions, moreOptions) memres = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024 if p2sp == None: print 'ERROR None detection (sp)', moreOptions['repeatName'], mgloc logging.error('ERROR None detection (sp): ' + str(moreOptions['repeatName']) + ' ' + str(mgloc)) else: myBAMhandler.addSumForAGene(p2sp, myret, myretdetail, 'p2sp', 2) end_time = time.time() if commonOptions['outlog'] <= M_WARNING and ( not specifiedOptions.has_key('thread')): print('p2sp end---running time%.0f mem%d' % (end_time - start_time, memres)) sys.stdout.flush() os.system('rm ' + bamfile) os.system('rm ' + bamfile + '.bai') return [myret, myretdetail]