def find_parameters(filenames,pbc,model, dv,dw,dwrad,D0,dtimezero,temp,temp_end,nmc,nmc_update,seed,outfile, ncosF,ncosD,ncosDrad, move_timezero,initfile,k, lmax,reduction): print "python program to extract diffusion coefficient and free energy from transition counts" print "copyright: Gerhard Hummer (NIH, July 2012)" print "adapted by An Ghysels (August 2012)\n" if seed is not None: np.random.seed(seed) # start Monte Carlo object MC = MCState(pbc,lmax) # settings MC.set_MC_params(dv,dw,dwrad,D0,dtimezero,temp,nmc,nmc_update,move_timezero,k,temp_end=temp_end,) #MC.print_MC_params() # INPUT and INITIALIZATION model/MC if MC.do_radial: data = RadTransitions(filenames) else: data = Transitions(filenames,reduction=reduction) MC.set_model(model,data,ncosF,ncosD,ncosDrad) # USE INFO from INITFILE if initfile is not None: import sys f = sys.stdout MC.use_initfile(initfile) MC.print_MC_params(f) MC.print_coeffs_laststate(f) logger = Logger(MC) # MONTE CARLO OPTIMIZATION do_mc_cycles(MC,logger) # print final results (potential and diffusion coefficient) #---------------------------------------------------------- # choose filename for pickle object if outfile is None: import sys f = sys.stdout picfile = "mc.pic" else: f = file(outfile,"w+") # print final model to a file picfile = outfile+".pic" # print to screen #MC.print_log_like() MC.print_statistics() MC.print_laststate(f,final=True) # print model, coeffs if outfile is not None: f.close() logger.model = MC.model # this is not a hard copy logger.dump(picfile) logger.statistics(MC) #st=1000) return()
def __init__(self, file): self.q = set() # states self.alphabet = set() # sigma self.transitions = Transitions() self.F = set() # final states self.__file = file self.__read_file()
def __init__(self, file_name): self.file_name = file_name self.Q = set() self.E = set() self.q0 = None self.F = set() self.delta = Transitions() self.read_file()
def processInputs( inFileStr, numProc, binSize, outID, parentLabelAr, decoding, isUniform, maxIter ): dType = ('Viterbi' if decoding == 'V' else ('Forward-backward' if decoding == 'F' else ('Viterbi and Forward-backward' if decoding == 'A' else 'None') ) ) info = '#from_script:epigenotype_by_logreg.py; in_file:{:s}; bin_size:{:s}; decoding:{:s}; uni_class_prob:{:s}\n'.format( os.path.basename( inFileStr), bth_util.binSizeToStr( binSize ), dType.lower().replace(' and ', ','), str(isUniform) ) print( 'Weighted methylation file:', os.path.basename( inFileStr ) ) print( 'Bin size:', bth_util.binSizeToStr( binSize ) ) print( 'Mother label:', parentLabelAr[0] ) print( 'Father label:', parentLabelAr[1] ) print( 'Uniform classification probabilities:', str( isUniform ) ) print( 'Decoding algorithm:', dType) # build data frame df = pd.read_table( inFileStr, header=1 ) # check parent labels checkParents( df['sample'], parentLabelAr ) # group by bin and analyze df['bin'] = df.pos // binSize nbins = max(df['bin'])+1 dfg = df.groupby('bin') if numProc > 1: print( 'Begin classifying {:d} bins with {:d} processors'.format( nbins, numProc ) ) res_class = runMultiClassification( dfg, numProc, parentLabelAr, isUniform ) else: print( 'Begin classifying {:d} bins'.format( nbins ) ) res_class = dfg.apply( classLogReg, pla=parentLabelAr, u=isUniform ) res_class.reset_index(inplace=True) # decode if necessary if decoding != 'N': ignoreAr = parentLabelAr + ['MPV'] print( 'Generating transition matrix' ) transition = Transitions( res_class, ignore=ignoreAr ) transitions = transition.getTransitions() # find optimum path for all samples groups = res_class.groupby( 'sample' ) nsamples = len(groups.groups) if numProc > 1: print( 'Begin {:s} decoding {:d} samples with {:d} processors'.format( dType, nsamples, numProc ) ) results = runMultiPath( groups, numProc, transitions, isUniform, decoding ) else: print( 'Begin {:s} decoding {:d} samples'.format( dType, nsamples ) ) results = groups.apply( findOptimalPath, trans=transitions, u=isUniform, d=decoding ) results.set_index( ['bin', 'sample'], inplace=True ) else: results = res_class # output file outFileStr = determineOutputFileName( inFileStr, outID, binSize, decoding, isUniform ) # write output print( 'Writing output to', outFileStr ) with open( outFileStr, 'w' ) as f: f.write(info) results.to_csv( outFileStr, sep='\t', mode='a' ) print( 'Done' )
def processInputs( inFileStr, numProc, binSize, outID, parentLabelAr, decoding, isUniform ): info = '#from_script: epigenotyping_pe.py; in_file:{:s}; bin_size:{:s}; decoding:{:s}; uni_class_prob:{:s}'.format( os.path.basename( inFileStr ), bth_util.binSizeToStr( binSize ), formatDecoding( decoding).lower().replace('and',','), str(isUniform).lower() ) print( 'Weighted methylation file:', os.path.basename( inFileStr ) ) print( 'Bin size:', bth_util.binSizeToStr( binSize ) ) print( 'Mother label:', parentLabelAr[0] ) print( 'Father label:', parentLabelAr[1] ) print( 'Uniform classification probabilities:', str(isUniform) ) print( 'Decoding algorithm:', formatDecoding( decoding ) ) # build dataframe df = pd.read_table( inFileStr, header=1 ) # check parent labels checkParents( df['sample'], parentLabelAr ) # group by bin df['bin'] = df.pos // binSize nbins = max(df['bin'])+1 dfBinGroup = df.groupby( 'bin' ) # classify by bin print( 'Begin classifying {:d} bins with {:d} processors'.format( nbins, numProc ) ) dfClass = runClassification( dfBinGroup, numProc, parentLabelAr, isUniform ) dfClass.reset_index(inplace=True) #print( dfClass.head ) del(df, dfBinGroup ) # decode, if necessary if decoding != 'N': ignoreAr = parentLabelAr[:2] + ['MPV'] transition = Transitions( dfClass, ignore = ignoreAr ) transitionMatrix = transition.getTransitions() # group by sample dfSampleGroup = dfClass.groupby( 'sample' ) nsamples = len(dfSampleGroup.groups ) print( 'Begin {:s} decoding {:d} samples with {:d} processors'.format( formatDecoding(decoding), nsamples, numProc ) ) dfOutput = runDecoding( dfSampleGroup, numProc, transitionMatrix, decoding ) dfOutput.set_index( ['bin', 'sample'], inplace=True ) del( dfSampleGroup ) else: dfOutput = dfClass # write output outFileStr = determineOutputFileName( inFileStr, outID, binSize, decoding, isUniform ) print( 'Writing output to', outFileStr ) with open( outFileStr, 'w' ) as f: f.write(info+'\n') dfOutput.to_csv( outFileStr, sep='\t', mode='a' ) print( 'Done' )
def findOptimalPath(df, trans=np.array([]), u=False, d='V'): # generate individual transition matrix if trans.size == 0: transObj = Transitions(df, ignore=[]) trans = transObj.getTransitions() if d == 'A': path = ProbPathAll(df, trans, u) elif d == 'F': path = ProbPathFB(df, trans, u) else: #path = ProbPathViterbi( df, trans, u ) path = ProbPathSimple(df, trans) outDf = path.run() return outDf
def run(self): ### going to try running forward-backward, updating transitions from # that until convergence #nSamples = self.observations - len(self.ignore) curIter = 0 while curIter < self.maxIter: curIter += 1 oldTransitions = np.copy(self.transitions) # get predictions newPredictions = self.groups.apply(runFB, oldTransitions) #print( newPredictions.head() ) # get transitions t = Transitions(newPredictions, self.ignore) self.transitions = t.getTransitions() #print( self.transitions ) # check if transitions converged if np.linalg.norm(oldTransitions - self.transitions) < EPS: break return curIter, self.transitions
def run(self): ### going to try running forward-backward, updating transitions from # that until convergence #nSamples = self.observations - len(self.ignore) curIter = 0 while curIter < self.maxIter: curIter += 1 oldTransitions = np.copy(self.transitions) # get predictions #newPredictions = self.groups.apply( runFB, oldTransitions ) newPredictions = runFB(self.data, oldTransitions) #print( newPredictions.head() ) # get transitions t = Transitions(newPredictions, []) self.transitions = t.getTransitions() #print( self.transitions ) # check if transitions converged if np.linalg.norm(oldTransitions - self.transitions) < EPS: break print('did not converge' if curIter == self.maxIter else 'coverged in {:d} iterations'.format(curIter)) return pd.DataFrame(self.transitions, index=self.labels, columns=self.labels)
def processInputs(inFileStr, numProc, binSize, outID, parentLabelAr, decoding, classProbs, combineBins, cent, scaleTransitions, isPrint): info = '#from_script: epigenotyping_pe_v9.py; in_file:{:s}; bin_size:{:s}; decoding:{:s}; class_prob:{:s}; combine_bins_threshold:{:d}; centromere_{:s}; scale_transitions:{:s}'.format( os.path.basename(inFileStr), bth_util.binSizeToStr(binSize), formatDecoding(decoding).lower().replace('and', ','), formatClassProbs(classProbs).lower(), combineBins, ('None' if cent == None else '{:s}-{:s}'.format( bth_util.binSizeToStr(cent[0]), bth_util.binSizeToStr(cent[1]))), str(scaleTransitions)) if isPrint: print('Weighted methylation file:', os.path.basename(inFileStr)) print('Bin size:', bth_util.binSizeToStr(binSize)) print('Mother label(s):', parentLabelAr[0]) print('Father label(s):', parentLabelAr[1]) print('Classification probabilities:', formatClassProbs(classProbs)) print('Decoding algorithm:', formatDecoding(decoding)) print('Combine bin feature threshold:', combineBins) print('Scale transitions by sample size:', scaleTransitions) if cent == None: centStr = 'None' else: centStr = '' for i in range(len(cent) // 2): si = i * 2 centStr += '; {:s}-{:s}'.format( bth_util.binSizeToStr(cent[si]), bth_util.binSizeToStr(cent[si + 1])) centStr = centStr[2:] if isPrint: print('Centromere:', centStr) # build dataframe if isPrint: print(' Reading input file', os.path.basename(inFileStr)) df = pd.read_table(inFileStr, header=1) # check parent labels newParentLabelAr = checkParents(df['sample'], parentLabelAr) tIgnoreAr = flattenList(newParentLabelAr[:2]) for i in range(len(newParentLabelAr[0])): tIgnoreAr += ['MPV{:d}'.format(i)] # group by bin df['bin'] = df.pos // binSize transformation = None # get centromere bins if necessary if cent == None: centBins = [] else: cent = [x // binSize for x in cent] centBins = [] #centBins = list( range(cent[0], cent[1]+1) ) for i in range(len(cent) // 2): si = i * 2 centBins += list(range(cent[si], cent[si + 1] + 1)) # combine bins if necessary nbins = max(df['bin']) + 1 if combineBins > 0: if isPrint: print(' Merging bins', end=' ... ') df['tBin'] = df['bin'] transformation = binTransformation(df, combineBins) # apply the transformation df['bin'] = df['tBin'].apply(lambda x: transformation[x]) dfBinGroup = df.groupby('bin') if combineBins > 0: newNBins = len(dfBinGroup.groups) info += '; non-functional_bins:{:d}'.format(nbins - newNBins) if isPrint: print('combined {:d} non-functional bins'.format(nbins - newNBins)) # classify by bin if isPrint: print(' Classifying {:d} bins with {:d} processors'.format( nbins, numProc)) dfClass = runClassification(dfBinGroup, numProc, newParentLabelAr, classProbs) dfClass.reset_index(inplace=True) #print( dfClass.head ) del (df, dfBinGroup) # decode, if necessary if decoding != 'N': #ignoreAr = parentLabelAr[:2] + ['MPV'] transition = Transitions(dfClass, ignore=tIgnoreAr) transitionMatrix = transition.getTransitions() # write this matrix to file '''outFStr = determineTransFileName(inFileStr, outID, binSize, combineBins ) tLabels = [ 'mother', 'MPV', 'father' ] transData = pd.DataFrame( transitionMatrix, index=tLabels, columns= tLabels ) with open( outFStr, 'w' ) as f: f.write(info+'\n') transData.to_csv( outFStr, sep='\t', mode='a' )''' # group by sample dfSampleGroup = dfClass.groupby('sample') nsamples = len(dfSampleGroup.groups) if scaleTransitions: scaleFactor = float(nsamples - len(tIgnoreAr) - 1) / float(nsamples - len(tIgnoreAr)) else: scaleFactor = 1 tmpDecoding = ('F' if decoding == 'B' else decoding) if isPrint: print(' {:s} decoding {:d} samples with {:d} processors'.format( formatDecoding(tmpDecoding), nsamples, numProc)) dfOutput = runDecoding(dfSampleGroup, numProc, transitionMatrix, tmpDecoding, centBins, scaleFactor) if decoding == 'B': dfNew = dfOutput.loc[:, ['bin', 'sample']].copy() dfNew['MPV'] = np.log(dfOutput['fb.score.MPV']) dfNew['mother'] = np.log(dfOutput['fb.score.mother']) dfNew['father'] = np.log(dfOutput['fb.score.father']) dfNew['prediction'] = dfOutput['fb.prediction'] #print(dfOutput.head()) #print(dfNew.head()) transition = Transitions(dfNew, ignore=tIgnoreAr) transitionMatrix = transition.getTransitions() dfSampleGroup = dfNew.groupby('sample') nsamples = len(dfSampleGroup.groups) if isPrint: print( ' {:s} decoding {:d} samples with {:d} processors'.format( formatDecoding('V'), nsamples, numProc)) dfOutputN = runDecoding(dfSampleGroup, numProc, transitionMatrix, 'V', centBins, scaleFactor) dfOutput[[ 'vit.score.mother', 'vit.score.father', 'vit.score.MPV', 'vit.prob.mother', 'vit.prob.father', 'vit.prob.MPV', 'vit.prediction' ]] = dfOutputN[[ 'vit.score.mother', 'vit.score.father', 'vit.score.MPV', 'vit.prob.mother', 'vit.prob.father', 'vit.prob.MPV', 'vit.prediction' ]] #print( dfOutput.head() ) # end decoding == B dfOutput.set_index(['bin', 'sample'], inplace=True) del (dfSampleGroup) else: dfOutput = dfClass # write output outFileStr = determineOutputFileName(inFileStr, outID, binSize, decoding, classProbs, scaleTransitions, combineBins) # if combination, undo transformation by applying the predictions to additional bins if combineBins > 0: dfOutput.reset_index(inplace=True) dfOutput['cBin'] = dfOutput['bin'] dfOutputT = undoBinTransformation(dfOutput, transformation) else: dfOutputT = dfOutput.drop('cBin', axis=1) if isPrint: print(' Writing output to', outFileStr) with open(outFileStr, 'w') as f: f.write(info + '\n') dfOutputT.to_csv(outFileStr, sep='\t', mode='a') if isPrint: print('Done')
def processInputs(inFileStr, numProc, binSize, outID, parentLabelAr, decoding, isUniform, combineBins): info = '#from_script: epigenotyping_pe_combbin.py; in_file:{:s}; bin_size:{:s}; decoding:{:s}; uni_class_prob:{:s}; combine_bins_threshold:{:d}'.format( os.path.basename(inFileStr), bth_util.binSizeToStr(binSize), formatDecoding(decoding).lower().replace('and', ','), str(isUniform).lower(), combineBins) print('Weighted methylation file:', os.path.basename(inFileStr)) print('Bin size:', bth_util.binSizeToStr(binSize)) print('Mother label:', parentLabelAr[0]) print('Father label:', parentLabelAr[1]) print('Uniform classification probabilities:', str(isUniform)) print('Decoding algorithm:', formatDecoding(decoding)) print('Combine bin feature threshold:', combineBins) # build dataframe print(' Reading input file', os.path.basename(inFileStr)) df = pd.read_table(inFileStr, header=1) # check parent labels checkParents(df['sample'], parentLabelAr) # group by bin df['bin'] = df.pos // binSize transformation = None # combine bins if necessary nbins = max(df['bin']) + 1 if combineBins > 0: print(' Merging bins', end=' ... ') df['tBin'] = df['bin'] transformation = binTransformation(df, combineBins) # apply the transformation df['bin'] = df['tBin'].apply(lambda x: transformation[x]) dfBinGroup = df.groupby('bin') if combineBins > 0: newNBins = len(dfBinGroup.groups) print('combined {:d} non-functional bins'.format(nbins - newNBins)) # classify by bin print(' Classifying {:d} bins with {:d} processors'.format(nbins, numProc)) dfClass = runClassification(dfBinGroup, numProc, parentLabelAr, isUniform) dfClass.reset_index(inplace=True) #print( dfClass.head ) del (df, dfBinGroup) # decode, if necessary if decoding != 'N': ignoreAr = parentLabelAr[:2] + ['MPV'] print(' Obtaining initial transitions') transition = Transitions(dfClass, ignore=ignoreAr) transitionMatrix = transition.getTransitions() # multiply for array of transitions transitionMatrixArray = np.array( [np.copy(transitionMatrix) for i in range(nbins)]) if maxIter > 0: print(' Iteratively improving transitions with maximum', maxIter, 'iterations') at = AdaptiveTransitions(dfClass, transitionMatrixArray, ignoreAr, maxIter) iterations, transitionMatrix = at.run() trInfo += '; iterations_to_convergence:' if iterations == maxIter: trInfo += 'NA' print(' Did not converge in 10 iterations') else: trInfo += str(iterations) print(' Convergence in', iterations, 'iterations') '''# write this matrix to file #outFStr = determineTransFileName(inFileStr, outID, binSize, combineBins ) tLabels = [ 'mother', 'MPV', 'father' ] transData = pd.DataFrame( transitionMatrix, index=tLabels, columns= tLabels ) with open( outFStr, 'w' ) as f: f.write(info+'\n') transData.to_csv( outFStr, sep='\t', mode='a' )''' # group by sample dfSampleGroup = dfClass.groupby('sample') nsamples = len(dfSampleGroup.groups) print(' {:s} decoding {:d} samples with {:d} processors'.format( formatDecoding(decoding), nsamples, numProc)) dfOutput = runDecoding(dfSampleGroup, numProc, transitionMatrixArray, decoding) dfOutput.set_index(['bin', 'sample'], inplace=True) del (dfSampleGroup) else: dfOutput = dfClass # write output outFileStr = determineOutputFileName(inFileStr, outID, binSize, decoding, isUniform, combineBins) # if combination, undo transformation by applying the predictions to additional bins if combineBins > 0: dfOutput.reset_index(inplace=True) dfOutput['cBin'] = dfOutput['bin'] dfOutputT = undoBinTransformation(dfOutput, transformation) else: dfOutputT = dfOutput.drop('cBin', axis=1) print(' Writing output to', outFileStr) with open(outFileStr, 'w') as f: f.write(info + '\n') dfOutputT.to_csv(outFileStr, sep='\t', mode='a') print('Done')
def __init__(self, target_position): self.target_position = target_position self._tran = Transitions() self._rewards = Rewarder(target_position) self._q_tab = QTable() self._v_tab = VTable()
def processInputs(inFileStr, numProc, binSize, outID, parentLabelAr, decoding, isUniform, combineBins, maxIter): info = '#from_script: epigenotyping_combin_smp-iter-trans.py; in_file:{:s}; bin_size:{:s}; decoding:{:s}; uni_class_prob:{:s}; combine_bins_threshold:{:d}; maximum_iterations:{:d}'.format( os.path.basename(inFileStr), bth_util.binSizeToStr(binSize), formatDecoding(decoding).lower().replace('and', ','), str(isUniform).lower(), combineBins, maxIter) print('Weighted methylation file:', os.path.basename(inFileStr)) print('Bin size:', bth_util.binSizeToStr(binSize)) print('Mother label:', parentLabelAr[0]) print('Father label:', parentLabelAr[1]) print('Uniform classification probabilities:', str(isUniform)) print('Decoding algorithm:', formatDecoding(decoding)) print('Combine bin feature threshold:', combineBins) print('Maximum transition matrix iterations:', maxIter) # build dataframe print(' Reading input file', os.path.basename(inFileStr)) df = pd.read_table(inFileStr, header=1) # check parent labels checkParents(df['sample'], parentLabelAr) # group by bin df['bin'] = df.pos // binSize transformation = None # combine bins if necessary nbins = max(df['bin']) + 1 if combineBins > 0: print(' Merging bins', end=' ... ') df['tBin'] = df['bin'] transformation = binTransformation(df, combineBins) # apply the transformation df['bin'] = df['tBin'].apply(lambda x: transformation[x]) dfBinGroup = df.groupby('bin') if combineBins > 0: newNBins = len(dfBinGroup.groups) print('combined {:d} non-functional bins'.format(nbins - newNBins)) # classify by bin print(' Classifying {:d} bins with {:d} processors'.format(nbins, numProc)) dfClass = runClassification(dfBinGroup, numProc, parentLabelAr, isUniform) dfClass.reset_index(inplace=True) #print( dfClass.head ) del (df, dfBinGroup) # decode, if necessary if decoding != 'N': ignoreAr = parentLabelAr[:2] + ['MPV'] print(' Obtaining initial transitions') transition = Transitions(dfClass, ignore=ignoreAr) transitionMatrix = transition.getTransitions() outFStr = determineTransFileName(inFileStr, outID, binSize, combineBins) with open(outFStr, 'w') as f: f.write(info + '\n') # group by sample #print(dfClass.head()) dfSampleGroup = dfClass.groupby('sample') nsamples = len(dfSampleGroup.groups) print( ' {:s} decoding and optimizing transition matrices for {:d} samples with {:d} processors' .format(formatDecoding(decoding), nsamples, numProc)) ## note: decoding will now include improved transition matrix calculations dfOutput = runDecoding(dfSampleGroup, numProc, transitionMatrix, decoding, maxIter, outFStr) dfOutput.set_index(['bin', 'sample'], inplace=True) del (dfSampleGroup) else: dfOutput = dfClass # write output outFileStr = determineOutputFileName(inFileStr, outID, binSize, decoding, isUniform, combineBins) # if combination, undo transformation by applying the predictions to additional bins if combineBins > 0: dfOutput.reset_index(inplace=True) dfOutput['cBin'] = dfOutput['bin'] dfOutputT = undoBinTransformation(dfOutput, transformation) else: dfOutputT = dfOutput.drop('cBin', axis=1) print(' Writing output to', outFileStr) with open(outFileStr, 'w') as f: f.write(info + '\n') dfOutputT.to_csv(outFileStr, sep='\t', mode='a') print('Done')