def __init__(self, file): self.q = set() # states self.alphabet = set() # sigma self.transitions = Transitions() self.F = set() # final states self.__file = file self.__read_file()
def processInputs( inFileStr, numProc, binSize, outID, parentLabelAr, decoding, isUniform, isIndiv ): dType = ('Viterbi' if decoding == 'V' else ('Forward-backward' if decoding == 'F' else ('Viterbi and Forward-backward' if decoding == 'A' else 'None') ) ) info = '#from_script:epigenotype_by_logreg.py; in_file:{:s}; bin_size:{:s}; decoding:{:s}; uni_class_prob:{:s}; indiv_transitions:{:s}\n'.format( os.path.basename( inFileStr), bth_util.binSizeToStr( binSize ), dType.lower().replace(' and ', ','), str(isUniform), str(isIndiv) ) print( 'Weighted methylation file:', os.path.basename( inFileStr ) ) print( 'Bin size:', bth_util.binSizeToStr( binSize ) ) print( 'Mother label:', parentLabelAr[0] ) print( 'Father label:', parentLabelAr[1] ) print( 'Uniform classification probabilities:', str( isUniform ) ) print( 'Decoding algorithm:', dType) print( 'Individual transition probabilities:', str( isIndiv ) ) # build data frame df = pd.read_table( inFileStr, header=1 ) # check parent labels checkParents( df['sample'], parentLabelAr ) # group by bin and analyze df['bin'] = df.pos // binSize nbins = max(df['bin'])+1 dfg = df.groupby('bin') if numProc > 1: print( 'Begin classifying {:d} bins with {:d} processors'.format( nbins, numProc ) ) res_class = runMultiClassification( dfg, numProc, parentLabelAr, isUniform ) else: print( 'Begin classifying {:d} bins'.format( nbins ) ) res_class = dfg.apply( classLogReg, pla=parentLabelAr, u=isUniform ) res_class.reset_index(inplace=True) # decode if necessary if decoding != 'N': ignoreAr = parentLabelAr + ['MPV'] if isIndiv: transitions = np.array([]) else: print( 'Generating transition matrix' ) transition = Transitions( res_class, ignore=ignoreAr ) transitions = transition.getTransitions() print(transitions) # find optimum path for all samples groups = res_class.groupby( 'sample' ) nsamples = len(groups.groups) if numProc > 1: print( 'Begin {:s} decoding {:d} samples with {:d} processors'.format( dType, nsamples, numProc ) ) results = runMultiPath( groups, numProc, transitions, isUniform, decoding ) else: print( 'Begin {:s} decoding {:d} samples'.format( dType, nsamples ) ) results = groups.apply( findOptimalPath, trans=transitions, u=isUniform, d=decoding ) results.set_index( ['bin', 'sample'], inplace=True ) else: results = res_class # output file outFileStr = determineOutputFileName( inFileStr, outID, binSize, decoding, isUniform, isIndiv ) # write output print( 'Writing output to', outFileStr ) with open( outFileStr, 'w' ) as f: f.write(info) results.to_csv( outFileStr, sep='\t', mode='a' ) print( 'Done' )
def processInputs( inFileStr, numProc, binSize, outID, parentLabelAr, parentAddLabelAr, decoding, isUniform ): info = '#from_script: epigenotyping_pe.py; in_file:{:s}; bin_size:{:s}; decoding:{:s}; uni_class_prob:{:s}; mother_samples:{:s}; father_samples:{:s}'.format( os.path.basename( inFileStr ), bth_util.binSizeToStr( binSize ), formatDecoding( decoding).lower().replace('and',','), str(isUniform).lower(), ','.join(parentLabelAr[0]), ','.join(parentLabelAr[1]) ) print( 'Weighted methylation file:', os.path.basename( inFileStr ) ) print( 'Bin size:', bth_util.binSizeToStr( binSize ) ) print( 'Mother label(s):', ', '.join(parentLabelAr[0]) ) print( 'Father label(s):', ', '.join(parentLabelAr[1]) ) if len(parentAddLabelAr[0]) != 0 or len(parentAddLabelAr[1]) != 0: print( 'Additional mother training label(s):', ('None' if len(parentAddLabelAr[0])==0 else ', '.join(parentAddLabelAr[0])) ) print( 'Additional father training label(s):', ('None' if len(parentAddLabelAr[1]) == 0 else ', '.join(parentAddLabelAr[1])) ) print( 'Uniform classification probabilities:', str(isUniform) ) print( 'Decoding algorithm:', formatDecoding( decoding ) ) # build dataframe df = pd.read_table( inFileStr, header=1 ) # check parent labels parentLabelAr = checkParents( df['sample'], parentLabelAr ) # check additional training data labels if len(parentAddLabelAr[0]) != 0 or len(parentAddLabelAr[1]) != 0: parentAddLabelAr = checkParents( df['sample'], parentAddLabelAr ) # group by bin df['bin'] = df.pos // binSize nbins = max(df['bin'])+1 dfBinGroup = df.groupby( 'bin' ) # classify by bin print( 'Begin classifying {:d} bins with {:d} processors'.format( nbins, numProc ) ) dfClass = runClassification( dfBinGroup, numProc, parentLabelAr, parentAddLabelAr, isUniform ) dfClass.reset_index(inplace=True) #print( dfClass.head ) del(df, dfBinGroup ) # decode, if necessary if decoding != 'N': totalParentLabelAr = [parentLabelAr[0] + parentAddLabelAr[0], parentLabelAr[1] + parentAddLabelAr[1]] ignoreAr = flattenList( totalParentLabelAr ) + ['MPV'] transition = Transitions( dfClass, ignore = ignoreAr ) transitionMatrix = transition.getTransitions() # group by sample dfSampleGroup = dfClass.groupby( 'sample' ) nsamples = len(dfSampleGroup.groups ) print( 'Begin {:s} decoding {:d} samples with {:d} processors'.format( formatDecoding(decoding), nsamples, numProc ) ) dfOutput = runDecoding( dfSampleGroup, numProc, transitionMatrix, decoding ) dfOutput.set_index( ['bin', 'sample'], inplace=True ) del( dfSampleGroup ) else: dfOutput = dfClass # write output outFileStr = determineOutputFileName( inFileStr, outID, binSize, decoding, isUniform ) print( 'Writing output to', outFileStr ) with open( outFileStr, 'w' ) as f: f.write(info+'\n') dfOutput.to_csv( outFileStr, sep='\t', mode='a' ) print( 'Done' )
def __init__(self, file_name): self.file_name = file_name self.Q = set() self.E = set() self.q0 = None self.F = set() self.delta = Transitions() self.read_file()
def processInputs( inFileStr, numProc, binSize, outID, parentLabelAr, decoding, isUniform, maxIter ): dType = ('Viterbi' if decoding == 'V' else ('Forward-backward' if decoding == 'F' else ('Viterbi and Forward-backward' if decoding == 'A' else 'None') ) ) info = '#from_script:epigenotype_by_logreg.py; in_file:{:s}; bin_size:{:s}; decoding:{:s}; uni_class_prob:{:s}\n'.format( os.path.basename( inFileStr), bth_util.binSizeToStr( binSize ), dType.lower().replace(' and ', ','), str(isUniform) ) print( 'Weighted methylation file:', os.path.basename( inFileStr ) ) print( 'Bin size:', bth_util.binSizeToStr( binSize ) ) print( 'Mother label:', parentLabelAr[0] ) print( 'Father label:', parentLabelAr[1] ) print( 'Uniform classification probabilities:', str( isUniform ) ) print( 'Decoding algorithm:', dType) # build data frame df = pd.read_table( inFileStr, header=1 ) # check parent labels checkParents( df['sample'], parentLabelAr ) # group by bin and analyze df['bin'] = df.pos // binSize nbins = max(df['bin'])+1 dfg = df.groupby('bin') if numProc > 1: print( 'Begin classifying {:d} bins with {:d} processors'.format( nbins, numProc ) ) res_class = runMultiClassification( dfg, numProc, parentLabelAr, isUniform ) else: print( 'Begin classifying {:d} bins'.format( nbins ) ) res_class = dfg.apply( classLogReg, pla=parentLabelAr, u=isUniform ) res_class.reset_index(inplace=True) # decode if necessary if decoding != 'N': ignoreAr = parentLabelAr + ['MPV'] print( 'Generating transition matrix' ) transition = Transitions( res_class, ignore=ignoreAr ) transitions = transition.getTransitions() # find optimum path for all samples groups = res_class.groupby( 'sample' ) nsamples = len(groups.groups) if numProc > 1: print( 'Begin {:s} decoding {:d} samples with {:d} processors'.format( dType, nsamples, numProc ) ) results = runMultiPath( groups, numProc, transitions, isUniform, decoding ) else: print( 'Begin {:s} decoding {:d} samples'.format( dType, nsamples ) ) results = groups.apply( findOptimalPath, trans=transitions, u=isUniform, d=decoding ) results.set_index( ['bin', 'sample'], inplace=True ) else: results = res_class # output file outFileStr = determineOutputFileName( inFileStr, outID, binSize, decoding, isUniform ) # write output print( 'Writing output to', outFileStr ) with open( outFileStr, 'w' ) as f: f.write(info) results.to_csv( outFileStr, sep='\t', mode='a' ) print( 'Done' )
def processInputs( inFileStr, numProc, binSize, outID, parentLabelAr, decoding, isUniform ): info = '#from_script: epigenotyping_pe.py; in_file:{:s}; bin_size:{:s}; decoding:{:s}; uni_class_prob:{:s}'.format( os.path.basename( inFileStr ), bth_util.binSizeToStr( binSize ), formatDecoding( decoding).lower().replace('and',','), str(isUniform).lower() ) print( 'Weighted methylation file:', os.path.basename( inFileStr ) ) print( 'Bin size:', bth_util.binSizeToStr( binSize ) ) print( 'Mother label:', parentLabelAr[0] ) print( 'Father label:', parentLabelAr[1] ) print( 'Uniform classification probabilities:', str(isUniform) ) print( 'Decoding algorithm:', formatDecoding( decoding ) ) # build dataframe df = pd.read_table( inFileStr, header=1 ) # check parent labels checkParents( df['sample'], parentLabelAr ) # group by bin df['bin'] = df.pos // binSize nbins = max(df['bin'])+1 dfBinGroup = df.groupby( 'bin' ) # classify by bin print( 'Begin classifying {:d} bins with {:d} processors'.format( nbins, numProc ) ) dfClass = runClassification( dfBinGroup, numProc, parentLabelAr, isUniform ) dfClass.reset_index(inplace=True) #print( dfClass.head ) del(df, dfBinGroup ) # decode, if necessary if decoding != 'N': ignoreAr = parentLabelAr[:2] + ['MPV'] transition = Transitions( dfClass, ignore = ignoreAr ) transitionMatrix = transition.getTransitions() # group by sample dfSampleGroup = dfClass.groupby( 'sample' ) nsamples = len(dfSampleGroup.groups ) print( 'Begin {:s} decoding {:d} samples with {:d} processors'.format( formatDecoding(decoding), nsamples, numProc ) ) dfOutput = runDecoding( dfSampleGroup, numProc, transitionMatrix, decoding ) dfOutput.set_index( ['bin', 'sample'], inplace=True ) del( dfSampleGroup ) else: dfOutput = dfClass # write output outFileStr = determineOutputFileName( inFileStr, outID, binSize, decoding, isUniform ) print( 'Writing output to', outFileStr ) with open( outFileStr, 'w' ) as f: f.write(info+'\n') dfOutput.to_csv( outFileStr, sep='\t', mode='a' ) print( 'Done' )
def findOptimalPath(df, trans=np.array([]), u=False, d='V'): # generate individual transition matrix if trans.size == 0: transObj = Transitions(df, ignore=[]) trans = transObj.getTransitions() if d == 'A': path = ProbPathAll(df, trans, u) elif d == 'F': path = ProbPathFB(df, trans, u) else: #path = ProbPathViterbi( df, trans, u ) path = ProbPathSimple(df, trans) outDf = path.run() return outDf
def findOptimalPath( df, trans=np.array([]), u=False, d='V' ): # generate individual transition matrix if trans.size == 0: transObj = Transitions( df, ignore=[] ) trans = transObj.getTransitions() if d == 'A': path = ProbPathAll( df, trans, u ) elif d == 'F': path = ProbPathFB( df, trans, u ) else: #path = ProbPathViterbi( df, trans, u ) path = ProbPathSimple( df, trans) outDf = path.run() return outDf
class FiniteAutomaton: def __init__(self, file): self.q = set() # states self.alphabet = set() # sigma self.transitions = Transitions() self.F = set() # final states self.__file = file self.__read_file() def __read_file(self): with open(self.__file, 'r') as file: self.q = file.readline().strip().split(' ') self.alphabet = file.readline().strip().split(' ') self.__q0 = self.q[0] self.F = file.readline().strip().split(' ') line = file.readline().strip() while line: line = line.split(' ') if line[0] not in self.q or line[1] not in self.q: raise ValueError("State does not exist") for i in range(2, len(line)): if line[i] not in self.alphabet: raise ValueError("Symbol " + line[i] + " is not in the alphabet") self.transitions.add(line[0], line[1], line[i]) line = file.readline().strip() def is_deterministic(self): keys = self.transitions.get_keys() for start in keys: for letter in self.alphabet: if len(self.transitions.get_transitions_to(start, letter)) > 1: return False return True def is_accepted(self, word): if not self.is_deterministic(): return None currentState = self.__q0 for char in word: nextTransitions = self.transitions.get_transitions_to( currentState, char) if len(nextTransitions) == 0: return False nextTransition = nextTransitions[0] currentState = nextTransition[0] if currentState not in self.F: return False return True
def read_config(filename): global log, Args # load INI files config = SafeConfigParser() config.read(filename) # read frame size size = config.get('output', 'size').split('x') size = [int(size[0]), int(size[1])] # read frames per second fps = int(config.get('output', 'fps')) # read composites from configuration log.info("reading composites from configuration...") composites = Composites.configure(config.items('composites'), size) log.debug("read %d composites:\n\t%s\t" % (len(composites), '\n\t'.join(sorted(composites)))) # maybe overwirte targets by arguments if Args.composite: # check for composites in arguments targets = [composites[c] for c in set(Args.composite)] else: # list of all relevant composites we like to target targets = Composites.targets(composites) intermediates = Composites.intermediates(composites) # list targets and itermediates if Args.list: print("%d targetable composite(s):\n\t%s\t" % (len(targets), '\n\t'.join([t.name for t in targets]))) print("%d intermediate composite(s):\n\t%s\t" % (len(intermediates), '\n\t'.join([t.name for t in intermediates]))) # read transitions from configuration log.info("reading transitions from configuration...") transitions = Transitions.configure(config.items('transitions'), composites, targets, fps) log.info("read %d transition(s)" % transitions.count()) if Args.map: print("transition table:\n%s" % transitions) # maybe overwirte targets by arguments if Args.composite: # check for composites in arguments sequence = Args.composite else: # generate sequence of targets sequence = Transitions.travel([t.name for t in targets]) log.debug("using %d target composite(s):\n\t%s\t" % (len(targets), '\n\t'.join([t.name for t in targets]))) # return config return size, fps, sequence, transitions, composites
def find_parameters(filenames,pbc,model, dv,dw,dwrad,D0,dtimezero,temp,temp_end,nmc,nmc_update,seed,outfile, ncosF,ncosD,ncosDrad, move_timezero,initfile,k, lmax,reduction): print "python program to extract diffusion coefficient and free energy from transition counts" print "copyright: Gerhard Hummer (NIH, July 2012)" print "adapted by An Ghysels (August 2012)\n" if seed is not None: np.random.seed(seed) # start Monte Carlo object MC = MCState(pbc,lmax) # settings MC.set_MC_params(dv,dw,dwrad,D0,dtimezero,temp,nmc,nmc_update,move_timezero,k,temp_end=temp_end,) #MC.print_MC_params() # INPUT and INITIALIZATION model/MC if MC.do_radial: data = RadTransitions(filenames) else: data = Transitions(filenames,reduction=reduction) MC.set_model(model,data,ncosF,ncosD,ncosDrad) # USE INFO from INITFILE if initfile is not None: import sys f = sys.stdout MC.use_initfile(initfile) MC.print_MC_params(f) MC.print_coeffs_laststate(f) logger = Logger(MC) # MONTE CARLO OPTIMIZATION do_mc_cycles(MC,logger) # print final results (potential and diffusion coefficient) #---------------------------------------------------------- # choose filename for pickle object if outfile is None: import sys f = sys.stdout picfile = "mc.pic" else: f = file(outfile,"w+") # print final model to a file picfile = outfile+".pic" # print to screen #MC.print_log_like() MC.print_statistics() MC.print_laststate(f,final=True) # print model, coeffs if outfile is not None: f.close() logger.model = MC.model # this is not a hard copy logger.dump(picfile) logger.statistics(MC) #st=1000) return()
def run(self): ### going to try running forward-backward, updating transitions from # that until convergence #nSamples = self.observations - len(self.ignore) curIter = 0 while curIter < self.maxIter: curIter += 1 oldTransitions = np.copy(self.transitions) # get predictions newPredictions = self.groups.apply(runFB, oldTransitions) #print( newPredictions.head() ) # get transitions t = Transitions(newPredictions, self.ignore) self.transitions = t.getTransitions() #print( self.transitions ) # check if transitions converged if np.linalg.norm(oldTransitions - self.transitions) < EPS: break return curIter, self.transitions
def run( self ): ### going to try running forward-backward, updating transitions from # that until convergence #nSamples = self.observations - len(self.ignore) curIter = 0 while curIter < self.maxIter: curIter += 1 oldTransitions = np.copy( self.transitions ) # get predictions newPredictions = self.groups.apply( runFB, oldTransitions ) #print( newPredictions.head() ) # get transitions t = Transitions( newPredictions, self.ignore ) self.transitions = t.getTransitions() #print( self.transitions ) # check if transitions converged if np.linalg.norm( oldTransitions - self.transitions ) < EPS: break return curIter, self.transitions
class ValueIterator: def __init__(self, target_position): self.target_position = target_position self._tran = Transitions() self._rewards = Rewarder(target_position) self._q_tab = QTable() self._v_tab = VTable() def update(self, debug=False): for s1 in self.all_states(): for a in range(len(Config.actions)): s2 = self._tran.run(s1, a) rew = self._rewards[s1, s2] if s2: q = rew + Config.gamma * self._v_tab[s2] else: q = rew self._q_tab[s1, a] = q if debug: pprint_transition(s1, a, s2, rew) self._v_tab.update_from_q_table(self._q_tab) # noinspection PyMethodMayBeStatic def all_states(self): for i in range(len(Config.letters)): for j in range(len(Config.numbers)): if (i, j) == self.target_position: continue for o in range(len(Config.orientations)): yield i, j, o def path(self, s0): a, _ = self._q_tab.get_best_action(s0) s1 = self._tran.run(s0, a) if not s1: raise ValueError("Переход в запрещенное состояние: " + state_to_str(s0) + "-" + action_to_str(a) + "-> None") elif (s1[0], s1[1]) == self.target_position: return [s0, a, s1] return [s0, a] + self.path(s1)
def run( self ): ### going to try running forward-backward, updating transitions from # that until convergence #nSamples = self.observations - len(self.ignore) curIter = 0 while curIter < self.maxIter: curIter += 1 oldTransitions = np.copy( self.transitions ) # get predictions #newPredictions = self.groups.apply( runFB, oldTransitions ) newPredictions = runFB(self.data, oldTransitions ) #print( newPredictions.head() ) # get transitions t = Transitions( newPredictions, [] ) self.transitions = t.getTransitions() #print( self.transitions ) # check if transitions converged if np.linalg.norm( oldTransitions - self.transitions ) < EPS: break print( 'did not converge' if curIter == self.maxIter else 'coverged in {:d} iterations'.format( curIter ) ) return pd.DataFrame(self.transitions,index=self.labels,columns=self.labels)
def run(self): ### going to try running forward-backward, updating transitions from # that until convergence #nSamples = self.observations - len(self.ignore) curIter = 0 while curIter < self.maxIter: curIter += 1 oldTransitions = np.copy(self.transitions) # get predictions #newPredictions = self.groups.apply( runFB, oldTransitions ) newPredictions = runFB(self.data, oldTransitions) #print( newPredictions.head() ) # get transitions t = Transitions(newPredictions, []) self.transitions = t.getTransitions() #print( self.transitions ) # check if transitions converged if np.linalg.norm(oldTransitions - self.transitions) < EPS: break print('did not converge' if curIter == self.maxIter else 'coverged in {:d} iterations'.format(curIter)) return pd.DataFrame(self.transitions, index=self.labels, columns=self.labels)
def processInputs( inFileStr, numProc, binSize, outID, parentLabelAr, decoding, classProbs, combineBins, cent, isPrint ): info = '#from_script: epigenotyping_pe_v7.py; in_file:{:s}; bin_size:{:s}; decoding:{:s}; class_prob:{:s}; combine_bins_threshold:{:d}; centromere_{:s}'.format( os.path.basename( inFileStr ), bth_util.binSizeToStr( binSize ), formatDecoding( decoding).lower().replace('and',','), formatClassProbs(classProbs).lower(), combineBins, ('None' if cent == None else '{:s}-{:s}'.format( bth_util.binSizeToStr( cent[0] ), bth_util.binSizeToStr( cent[1] ) ) ) ) if isPrint: print( 'Weighted methylation file:', os.path.basename( inFileStr ) ) print( 'Bin size:', bth_util.binSizeToStr( binSize ) ) print( 'Mother label(s):', parentLabelAr[0] ) print( 'Father label(s):', parentLabelAr[1] ) print( 'Classification probabilities:', formatClassProbs( classProbs ) ) print( 'Decoding algorithm:', formatDecoding( decoding ) ) print( 'Combine bin feature threshold:', combineBins ) if cent == None: centStr = 'None' else: centStr = '' for i in range(len(cent)//2): si = i*2 centStr += '; {:s}-{:s}'.format( bth_util.binSizeToStr( cent[si] ), bth_util.binSizeToStr( cent[si+1] ) ) centStr = centStr[2:] if isPrint: print( 'Centromere:', centStr ) # build dataframe if isPrint: print( ' Reading input file', os.path.basename( inFileStr ) ) df = pd.read_table( inFileStr, header=1 ) # check parent labels newParentLabelAr = checkParents( df['sample'], parentLabelAr ) tIgnoreAr = flattenList( newParentLabelAr[:2] ) for i in range(len(newParentLabelAr[0])): tIgnoreAr += [ 'MPV{:d}'.format( i ) ] # group by bin df['bin'] = df.pos // binSize transformation = None # get centromere bins if necessary if cent == None: centBins = [] else: cent = [ x // binSize for x in cent ] centBins = [] #centBins = list( range(cent[0], cent[1]+1) ) for i in range(len(cent) // 2 ): si = i * 2 centBins += list( range(cent[si], cent[si+1]+1) ) # combine bins if necessary nbins = max(df['bin'])+1 if combineBins > 0: if isPrint: print( ' Merging bins', end=' ... ' ) df['tBin'] = df['bin'] transformation = binTransformation( df, combineBins ) # apply the transformation df['bin'] = df['tBin'].apply( lambda x: transformation[x] ) dfBinGroup = df.groupby( 'bin' ) if combineBins > 0: newNBins = len( dfBinGroup.groups ) info += '; non-functional_bins:{:d}'.format( nbins - newNBins ) if isPrint: print( 'combined {:d} non-functional bins'.format( nbins - newNBins ) ) # classify by bin if isPrint: print( ' Classifying {:d} bins with {:d} processors'.format( nbins, numProc ) ) dfClass = runClassification( dfBinGroup, numProc, newParentLabelAr, classProbs ) dfClass.reset_index(inplace=True) #print( dfClass.head ) del(df, dfBinGroup ) # decode, if necessary if decoding != 'N': #ignoreAr = parentLabelAr[:2] + ['MPV'] transition = Transitions( dfClass, ignore = tIgnoreAr ) transitionMatrix = transition.getTransitions() # write this matrix to file #outFStr = determineTransFileName(inFileStr, outID, binSize, combineBins ) #tLabels = [ 'mother', 'MPV', 'father' ] #transData = pd.DataFrame( transitionMatrix, index=tLabels, columns= tLabels ) #with open( outFStr, 'w' ) as f: # f.write(info+'\n') #transData.to_csv( outFStr, sep='\t', mode='a' ) # group by sample dfSampleGroup = dfClass.groupby( 'sample' ) nsamples = len( dfSampleGroup.groups ) tmpDecoding = ( 'F' if decoding == 'B' else decoding ) if isPrint: print( ' {:s} decoding {:d} samples with {:d} processors'.format( formatDecoding(tmpDecoding), nsamples, numProc ) ) dfOutput = runDecoding( dfSampleGroup, numProc, transitionMatrix, tmpDecoding, centBins ) if decoding == 'B': dfNew = dfOutput.loc[:,['bin','sample']].copy() dfNew['MPV'] = np.log(dfOutput['fb.score.MPV']) dfNew['mother'] = np.log(dfOutput['fb.score.mother']) dfNew['father'] = np.log(dfOutput['fb.score.father']) dfNew['prediction'] = dfOutput['fb.prediction'] #print(dfOutput.head()) #print(dfNew.head()) transition = Transitions( dfNew, ignore = tIgnoreAr ) transitionMatrix = transition.getTransitions() dfSampleGroup = dfNew.groupby( 'sample' ) nsamples = len( dfSampleGroup.groups ) if isPrint: print( ' {:s} decoding {:d} samples with {:d} processors'.format( formatDecoding('V'), nsamples, numProc ) ) dfOutputN = runDecoding( dfSampleGroup, numProc, transitionMatrix, 'V', centBins ) dfOutput[['vit.score.mother', 'vit.score.father', 'vit.score.MPV', 'vit.prob.mother', 'vit.prob.father', 'vit.prob.MPV', 'vit.prediction']] = dfOutputN[['vit.score.mother', 'vit.score.father', 'vit.score.MPV', 'vit.prob.mother', 'vit.prob.father', 'vit.prob.MPV', 'vit.prediction']] #print( dfOutput.head() ) # end decoding == B dfOutput.set_index( ['bin', 'sample'], inplace=True ) del( dfSampleGroup ) else: dfOutput = dfClass # write output outFileStr = determineOutputFileName( inFileStr, outID, binSize, decoding, classProbs, combineBins ) # if combination, undo transformation by applying the predictions to additional bins if combineBins > 0: dfOutput.reset_index(inplace=True) dfOutput['cBin'] = dfOutput['bin'] dfOutputT = undoBinTransformation( dfOutput, transformation ) else: dfOutputT = dfOutput.drop('cBin', axis=1) if isPrint: print( ' Writing output to', outFileStr ) with open( outFileStr, 'w' ) as f: f.write(info+'\n') dfOutputT.to_csv( outFileStr, sep='\t', mode='a' ) if isPrint: print( 'Done' )
def processInputs( inFileStr, numProc, binSize, outID, parentLabelAr, decoding, isUniform, combineBins ): info = '#from_script: epigenotyping_pe_combbin.py; in_file:{:s}; bin_size:{:s}; decoding:{:s}; uni_class_prob:{:s}; combine_bins_threshold:{:d}'.format( os.path.basename( inFileStr ), bth_util.binSizeToStr( binSize ), formatDecoding( decoding).lower().replace('and',','), str(isUniform).lower(), combineBins ) print( 'Weighted methylation file:', os.path.basename( inFileStr ) ) print( 'Bin size:', bth_util.binSizeToStr( binSize ) ) print( 'Mother label:', parentLabelAr[0] ) print( 'Father label:', parentLabelAr[1] ) print( 'Uniform classification probabilities:', str(isUniform) ) print( 'Decoding algorithm:', formatDecoding( decoding ) ) print( 'Combine bin feature threshold:', combineBins ) # build dataframe print( ' Reading input file', os.path.basename( inFileStr ) ) df = pd.read_table( inFileStr, header=1 ) # check parent labels checkParents( df['sample'], parentLabelAr ) # group by bin df['bin'] = df.pos // binSize transformation = None # combine bins if necessary nbins = max(df['bin'])+1 if combineBins > 0: print( ' Merging bins', end=' ... ' ) df['tBin'] = df['bin'] transformation = binTransformation( df, combineBins ) # apply the transformation df['bin'] = df['tBin'].apply( lambda x: transformation[x] ) dfBinGroup = df.groupby( 'bin' ) if combineBins > 0: newNBins = len(dfBinGroup.groups ) print( 'combined {:d} non-functional bins'.format( nbins - newNBins ) ) # classify by bin print( ' Classifying {:d} bins with {:d} processors'.format( nbins, numProc ) ) dfClass = runClassification( dfBinGroup, numProc, parentLabelAr, isUniform ) dfClass.reset_index(inplace=True) #print( dfClass.head ) del(df, dfBinGroup ) # decode, if necessary if decoding != 'N': ignoreAr = parentLabelAr[:2] + ['MPV'] print( ' Obtaining initial transitions' ) transition = Transitions( dfClass, ignore = ignoreAr ) transitionMatrix = transition.getTransitions() # multiply for array of transitions transitionMatrixArray = np.array( [ np.copy( transitionMatrix ) for i in range(nbins ) ] ) if maxIter > 0: print( ' Iteratively improving transitions with maximum', maxIter, 'iterations' ) at = AdaptiveTransitions( dfClass, transitionMatrixArray, ignoreAr, maxIter ) iterations, transitionMatrix = at.run() trInfo += '; iterations_to_convergence:' if iterations == maxIter: trInfo += 'NA' print( ' Did not converge in 10 iterations' ) else: trInfo += str(iterations) print( ' Convergence in', iterations, 'iterations' ) '''# write this matrix to file #outFStr = determineTransFileName(inFileStr, outID, binSize, combineBins ) tLabels = [ 'mother', 'MPV', 'father' ] transData = pd.DataFrame( transitionMatrix, index=tLabels, columns= tLabels ) with open( outFStr, 'w' ) as f: f.write(info+'\n') transData.to_csv( outFStr, sep='\t', mode='a' )''' # group by sample dfSampleGroup = dfClass.groupby( 'sample' ) nsamples = len( dfSampleGroup.groups ) print( ' {:s} decoding {:d} samples with {:d} processors'.format( formatDecoding(decoding), nsamples, numProc ) ) dfOutput = runDecoding( dfSampleGroup, numProc, transitionMatrixArray, decoding ) dfOutput.set_index( ['bin', 'sample'], inplace=True ) del( dfSampleGroup ) else: dfOutput = dfClass # write output outFileStr = determineOutputFileName( inFileStr, outID, binSize, decoding, isUniform, combineBins ) # if combination, undo transformation by applying the predictions to additional bins if combineBins > 0: dfOutput.reset_index(inplace=True) dfOutput['cBin'] = dfOutput['bin'] dfOutputT = undoBinTransformation( dfOutput, transformation ) else: dfOutputT = dfOutput.drop('cBin', axis=1) print( ' Writing output to', outFileStr ) with open( outFileStr, 'w' ) as f: f.write(info+'\n') dfOutputT.to_csv( outFileStr, sep='\t', mode='a' ) print( 'Done' )
def processInputs(inFileStr, numProc, binSize, outID, parentLabelAr, decoding, classProbs, combineBins, cent, scaleTransitions, isPrint): info = '#from_script: epigenotyping_pe_v9.py; in_file:{:s}; bin_size:{:s}; decoding:{:s}; class_prob:{:s}; combine_bins_threshold:{:d}; centromere_{:s}; scale_transitions:{:s}'.format( os.path.basename(inFileStr), bth_util.binSizeToStr(binSize), formatDecoding(decoding).lower().replace('and', ','), formatClassProbs(classProbs).lower(), combineBins, ('None' if cent == None else '{:s}-{:s}'.format( bth_util.binSizeToStr(cent[0]), bth_util.binSizeToStr(cent[1]))), str(scaleTransitions)) if isPrint: print('Weighted methylation file:', os.path.basename(inFileStr)) print('Bin size:', bth_util.binSizeToStr(binSize)) print('Mother label(s):', parentLabelAr[0]) print('Father label(s):', parentLabelAr[1]) print('Classification probabilities:', formatClassProbs(classProbs)) print('Decoding algorithm:', formatDecoding(decoding)) print('Combine bin feature threshold:', combineBins) print('Scale transitions by sample size:', scaleTransitions) if cent == None: centStr = 'None' else: centStr = '' for i in range(len(cent) // 2): si = i * 2 centStr += '; {:s}-{:s}'.format( bth_util.binSizeToStr(cent[si]), bth_util.binSizeToStr(cent[si + 1])) centStr = centStr[2:] if isPrint: print('Centromere:', centStr) # build dataframe if isPrint: print(' Reading input file', os.path.basename(inFileStr)) df = pd.read_table(inFileStr, header=1) # check parent labels newParentLabelAr = checkParents(df['sample'], parentLabelAr) tIgnoreAr = flattenList(newParentLabelAr[:2]) for i in range(len(newParentLabelAr[0])): tIgnoreAr += ['MPV{:d}'.format(i)] # group by bin df['bin'] = df.pos // binSize transformation = None # get centromere bins if necessary if cent == None: centBins = [] else: cent = [x // binSize for x in cent] centBins = [] #centBins = list( range(cent[0], cent[1]+1) ) for i in range(len(cent) // 2): si = i * 2 centBins += list(range(cent[si], cent[si + 1] + 1)) # combine bins if necessary nbins = max(df['bin']) + 1 if combineBins > 0: if isPrint: print(' Merging bins', end=' ... ') df['tBin'] = df['bin'] transformation = binTransformation(df, combineBins) # apply the transformation df['bin'] = df['tBin'].apply(lambda x: transformation[x]) dfBinGroup = df.groupby('bin') if combineBins > 0: newNBins = len(dfBinGroup.groups) info += '; non-functional_bins:{:d}'.format(nbins - newNBins) if isPrint: print('combined {:d} non-functional bins'.format(nbins - newNBins)) # classify by bin if isPrint: print(' Classifying {:d} bins with {:d} processors'.format( nbins, numProc)) dfClass = runClassification(dfBinGroup, numProc, newParentLabelAr, classProbs) dfClass.reset_index(inplace=True) #print( dfClass.head ) del (df, dfBinGroup) # decode, if necessary if decoding != 'N': #ignoreAr = parentLabelAr[:2] + ['MPV'] transition = Transitions(dfClass, ignore=tIgnoreAr) transitionMatrix = transition.getTransitions() # write this matrix to file '''outFStr = determineTransFileName(inFileStr, outID, binSize, combineBins ) tLabels = [ 'mother', 'MPV', 'father' ] transData = pd.DataFrame( transitionMatrix, index=tLabels, columns= tLabels ) with open( outFStr, 'w' ) as f: f.write(info+'\n') transData.to_csv( outFStr, sep='\t', mode='a' )''' # group by sample dfSampleGroup = dfClass.groupby('sample') nsamples = len(dfSampleGroup.groups) if scaleTransitions: scaleFactor = float(nsamples - len(tIgnoreAr) - 1) / float(nsamples - len(tIgnoreAr)) else: scaleFactor = 1 tmpDecoding = ('F' if decoding == 'B' else decoding) if isPrint: print(' {:s} decoding {:d} samples with {:d} processors'.format( formatDecoding(tmpDecoding), nsamples, numProc)) dfOutput = runDecoding(dfSampleGroup, numProc, transitionMatrix, tmpDecoding, centBins, scaleFactor) if decoding == 'B': dfNew = dfOutput.loc[:, ['bin', 'sample']].copy() dfNew['MPV'] = np.log(dfOutput['fb.score.MPV']) dfNew['mother'] = np.log(dfOutput['fb.score.mother']) dfNew['father'] = np.log(dfOutput['fb.score.father']) dfNew['prediction'] = dfOutput['fb.prediction'] #print(dfOutput.head()) #print(dfNew.head()) transition = Transitions(dfNew, ignore=tIgnoreAr) transitionMatrix = transition.getTransitions() dfSampleGroup = dfNew.groupby('sample') nsamples = len(dfSampleGroup.groups) if isPrint: print( ' {:s} decoding {:d} samples with {:d} processors'.format( formatDecoding('V'), nsamples, numProc)) dfOutputN = runDecoding(dfSampleGroup, numProc, transitionMatrix, 'V', centBins, scaleFactor) dfOutput[[ 'vit.score.mother', 'vit.score.father', 'vit.score.MPV', 'vit.prob.mother', 'vit.prob.father', 'vit.prob.MPV', 'vit.prediction' ]] = dfOutputN[[ 'vit.score.mother', 'vit.score.father', 'vit.score.MPV', 'vit.prob.mother', 'vit.prob.father', 'vit.prob.MPV', 'vit.prediction' ]] #print( dfOutput.head() ) # end decoding == B dfOutput.set_index(['bin', 'sample'], inplace=True) del (dfSampleGroup) else: dfOutput = dfClass # write output outFileStr = determineOutputFileName(inFileStr, outID, binSize, decoding, classProbs, scaleTransitions, combineBins) # if combination, undo transformation by applying the predictions to additional bins if combineBins > 0: dfOutput.reset_index(inplace=True) dfOutput['cBin'] = dfOutput['bin'] dfOutputT = undoBinTransformation(dfOutput, transformation) else: dfOutputT = dfOutput.drop('cBin', axis=1) if isPrint: print(' Writing output to', outFileStr) with open(outFileStr, 'w') as f: f.write(info + '\n') dfOutputT.to_csv(outFileStr, sep='\t', mode='a') if isPrint: print('Done')
def processInputs(inFileStr, numProc, binSize, outID, parentLabelAr, decoding, isUniform, combineBins, maxIter): info = '#from_script: epigenotyping_combin_smp-iter-trans.py; in_file:{:s}; bin_size:{:s}; decoding:{:s}; uni_class_prob:{:s}; combine_bins_threshold:{:d}; maximum_iterations:{:d}'.format( os.path.basename(inFileStr), bth_util.binSizeToStr(binSize), formatDecoding(decoding).lower().replace('and', ','), str(isUniform).lower(), combineBins, maxIter) print('Weighted methylation file:', os.path.basename(inFileStr)) print('Bin size:', bth_util.binSizeToStr(binSize)) print('Mother label:', parentLabelAr[0]) print('Father label:', parentLabelAr[1]) print('Uniform classification probabilities:', str(isUniform)) print('Decoding algorithm:', formatDecoding(decoding)) print('Combine bin feature threshold:', combineBins) print('Maximum transition matrix iterations:', maxIter) # build dataframe print(' Reading input file', os.path.basename(inFileStr)) df = pd.read_table(inFileStr, header=1) # check parent labels checkParents(df['sample'], parentLabelAr) # group by bin df['bin'] = df.pos // binSize transformation = None # combine bins if necessary nbins = max(df['bin']) + 1 if combineBins > 0: print(' Merging bins', end=' ... ') df['tBin'] = df['bin'] transformation = binTransformation(df, combineBins) # apply the transformation df['bin'] = df['tBin'].apply(lambda x: transformation[x]) dfBinGroup = df.groupby('bin') if combineBins > 0: newNBins = len(dfBinGroup.groups) print('combined {:d} non-functional bins'.format(nbins - newNBins)) # classify by bin print(' Classifying {:d} bins with {:d} processors'.format(nbins, numProc)) dfClass = runClassification(dfBinGroup, numProc, parentLabelAr, isUniform) dfClass.reset_index(inplace=True) #print( dfClass.head ) del (df, dfBinGroup) # decode, if necessary if decoding != 'N': ignoreAr = parentLabelAr[:2] + ['MPV'] print(' Obtaining initial transitions') transition = Transitions(dfClass, ignore=ignoreAr) transitionMatrix = transition.getTransitions() outFStr = determineTransFileName(inFileStr, outID, binSize, combineBins) with open(outFStr, 'w') as f: f.write(info + '\n') # group by sample #print(dfClass.head()) dfSampleGroup = dfClass.groupby('sample') nsamples = len(dfSampleGroup.groups) print( ' {:s} decoding and optimizing transition matrices for {:d} samples with {:d} processors' .format(formatDecoding(decoding), nsamples, numProc)) ## note: decoding will now include improved transition matrix calculations dfOutput = runDecoding(dfSampleGroup, numProc, transitionMatrix, decoding, maxIter, outFStr) dfOutput.set_index(['bin', 'sample'], inplace=True) del (dfSampleGroup) else: dfOutput = dfClass # write output outFileStr = determineOutputFileName(inFileStr, outID, binSize, decoding, isUniform, combineBins) # if combination, undo transformation by applying the predictions to additional bins if combineBins > 0: dfOutput.reset_index(inplace=True) dfOutput['cBin'] = dfOutput['bin'] dfOutputT = undoBinTransformation(dfOutput, transformation) else: dfOutputT = dfOutput.drop('cBin', axis=1) print(' Writing output to', outFileStr) with open(outFileStr, 'w') as f: f.write(info + '\n') dfOutputT.to_csv(outFileStr, sep='\t', mode='a') print('Done')
def __init__(self, target_position): self.target_position = target_position self._tran = Transitions() self._rewards = Rewarder(target_position) self._q_tab = QTable() self._v_tab = VTable()
class FiniteAutomatan: def __init__(self, file_name): self.file_name = file_name self.Q = set() self.E = set() self.q0 = None self.F = set() self.delta = Transitions() self.read_file() def read_file(self): with open(self.file_name, 'r') as file: self.Q = file.readline().strip('\n').split(' ') self.E = file.readline().strip('\n').split(' ') self.q0 = file.readline().strip('\n') if self.q0 not in self.Q: raise Exception("Invalid initial state " + self.q0) self.F = file.readline().strip('\n').split(' ') for state in self.F: if state not in self.Q: raise Exception("Invalid final state " + state) line = file.readline().strip('\n') while line: line = line.split(' ') if line[0] not in self.Q or line[1] not in self.Q: raise Exception("Invalid transition " + line[0] + "->" + line[1] + ", one state is invalid") for i in range(2, len(line)): if line[i] not in self.E: raise Exception("Invalid symbol " + line[i] + " in transition " + ' '.join(line)) self.delta.add_transition(line[0], line[1], line[i]) line = file.readline().strip('\n') def print_states(self): print("List of states:") for state in self.Q: print(state) def print_alphabet(self): print("The alphabet of the FA:") for val in self.E: print(val) def print_initial_state(self): print("The initial state of the FA: ", self.q0) def print_final_states(self): print("The list of final states:") for state in self.F: print(state) def print_transitions(self): print("The list of transitions:") print(self.delta) def is_deterministic(self): for state in self.Q: transitions = self.delta.get_transitions_with_start(state) for transition in transitions: unique_transitions = list( filter(lambda x: x[1] != transition[1], transitions)) if len(unique_transitions) != len(transitions) - 1: return False return True def is_accepted(self, string): if not self.is_deterministic(): raise Exception("The FA is not deterministic") current_state = self.q0 for i in range(len(string)): possible_transitions = self.delta.get_transitions_with_start( current_state) current_value = string[i] current_state = None for transition in possible_transitions: if transition[1] == current_value: current_state = transition[0] if current_state is None: return False if current_state not in self.F: return False return True
def processInputs(inFileStr, numProc, binSize, outID, parentLabelAr, decoding, isUniform, combineBins): info = '#from_script: epigenotyping_pe_combbin.py; in_file:{:s}; bin_size:{:s}; decoding:{:s}; uni_class_prob:{:s}; combine_bins_threshold:{:d}'.format( os.path.basename(inFileStr), bth_util.binSizeToStr(binSize), formatDecoding(decoding).lower().replace('and', ','), str(isUniform).lower(), combineBins) print('Weighted methylation file:', os.path.basename(inFileStr)) print('Bin size:', bth_util.binSizeToStr(binSize)) print('Mother label:', parentLabelAr[0]) print('Father label:', parentLabelAr[1]) print('Uniform classification probabilities:', str(isUniform)) print('Decoding algorithm:', formatDecoding(decoding)) print('Combine bin feature threshold:', combineBins) # build dataframe print(' Reading input file', os.path.basename(inFileStr)) df = pd.read_table(inFileStr, header=1) # check parent labels checkParents(df['sample'], parentLabelAr) # group by bin df['bin'] = df.pos // binSize transformation = None # combine bins if necessary nbins = max(df['bin']) + 1 if combineBins > 0: print(' Merging bins', end=' ... ') df['tBin'] = df['bin'] transformation = binTransformation(df, combineBins) # apply the transformation df['bin'] = df['tBin'].apply(lambda x: transformation[x]) dfBinGroup = df.groupby('bin') if combineBins > 0: newNBins = len(dfBinGroup.groups) print('combined {:d} non-functional bins'.format(nbins - newNBins)) # classify by bin print(' Classifying {:d} bins with {:d} processors'.format(nbins, numProc)) dfClass = runClassification(dfBinGroup, numProc, parentLabelAr, isUniform) dfClass.reset_index(inplace=True) #print( dfClass.head ) del (df, dfBinGroup) # decode, if necessary if decoding != 'N': ignoreAr = parentLabelAr[:2] + ['MPV'] print(' Obtaining initial transitions') transition = Transitions(dfClass, ignore=ignoreAr) transitionMatrix = transition.getTransitions() # multiply for array of transitions transitionMatrixArray = np.array( [np.copy(transitionMatrix) for i in range(nbins)]) if maxIter > 0: print(' Iteratively improving transitions with maximum', maxIter, 'iterations') at = AdaptiveTransitions(dfClass, transitionMatrixArray, ignoreAr, maxIter) iterations, transitionMatrix = at.run() trInfo += '; iterations_to_convergence:' if iterations == maxIter: trInfo += 'NA' print(' Did not converge in 10 iterations') else: trInfo += str(iterations) print(' Convergence in', iterations, 'iterations') '''# write this matrix to file #outFStr = determineTransFileName(inFileStr, outID, binSize, combineBins ) tLabels = [ 'mother', 'MPV', 'father' ] transData = pd.DataFrame( transitionMatrix, index=tLabels, columns= tLabels ) with open( outFStr, 'w' ) as f: f.write(info+'\n') transData.to_csv( outFStr, sep='\t', mode='a' )''' # group by sample dfSampleGroup = dfClass.groupby('sample') nsamples = len(dfSampleGroup.groups) print(' {:s} decoding {:d} samples with {:d} processors'.format( formatDecoding(decoding), nsamples, numProc)) dfOutput = runDecoding(dfSampleGroup, numProc, transitionMatrixArray, decoding) dfOutput.set_index(['bin', 'sample'], inplace=True) del (dfSampleGroup) else: dfOutput = dfClass # write output outFileStr = determineOutputFileName(inFileStr, outID, binSize, decoding, isUniform, combineBins) # if combination, undo transformation by applying the predictions to additional bins if combineBins > 0: dfOutput.reset_index(inplace=True) dfOutput['cBin'] = dfOutput['bin'] dfOutputT = undoBinTransformation(dfOutput, transformation) else: dfOutputT = dfOutput.drop('cBin', axis=1) print(' Writing output to', outFileStr) with open(outFileStr, 'w') as f: f.write(info + '\n') dfOutputT.to_csv(outFileStr, sep='\t', mode='a') print('Done')
def processInputs( inFileStr, numProc, binSize, outID, parentLabelAr, decoding, isUniform, combineBins, maxIter ): info = '#from_script: epigenotyping_combin_smp-iter-trans.py; in_file:{:s}; bin_size:{:s}; decoding:{:s}; uni_class_prob:{:s}; combine_bins_threshold:{:d}; maximum_iterations:{:d}'.format( os.path.basename( inFileStr ), bth_util.binSizeToStr( binSize ), formatDecoding( decoding).lower().replace('and',','), str(isUniform).lower(), combineBins, maxIter ) print( 'Weighted methylation file:', os.path.basename( inFileStr ) ) print( 'Bin size:', bth_util.binSizeToStr( binSize ) ) print( 'Mother label:', parentLabelAr[0] ) print( 'Father label:', parentLabelAr[1] ) print( 'Uniform classification probabilities:', str(isUniform) ) print( 'Decoding algorithm:', formatDecoding( decoding ) ) print( 'Combine bin feature threshold:', combineBins ) print( 'Maximum transition matrix iterations:', maxIter ) # build dataframe print( ' Reading input file', os.path.basename( inFileStr ) ) df = pd.read_table( inFileStr, header=1 ) # check parent labels checkParents( df['sample'], parentLabelAr ) # group by bin df['bin'] = df.pos // binSize transformation = None # combine bins if necessary nbins = max(df['bin'])+1 if combineBins > 0: print( ' Merging bins', end=' ... ' ) df['tBin'] = df['bin'] transformation = binTransformation( df, combineBins ) # apply the transformation df['bin'] = df['tBin'].apply( lambda x: transformation[x] ) dfBinGroup = df.groupby( 'bin' ) if combineBins > 0: newNBins = len(dfBinGroup.groups ) print( 'combined {:d} non-functional bins'.format( nbins - newNBins ) ) # classify by bin print( ' Classifying {:d} bins with {:d} processors'.format( nbins, numProc ) ) dfClass = runClassification( dfBinGroup, numProc, parentLabelAr, isUniform ) dfClass.reset_index(inplace=True) #print( dfClass.head ) del(df, dfBinGroup ) # decode, if necessary if decoding != 'N': ignoreAr = parentLabelAr[:2] + ['MPV'] print( ' Obtaining initial transitions' ) transition = Transitions( dfClass, ignore = ignoreAr ) transitionMatrix = transition.getTransitions() outFStr = determineTransFileName(inFileStr, outID, binSize, combineBins ) with open( outFStr, 'w' ) as f: f.write(info+'\n') # group by sample #print(dfClass.head()) dfSampleGroup = dfClass.groupby( 'sample' ) nsamples = len( dfSampleGroup.groups ) print( ' {:s} decoding and optimizing transition matrices for {:d} samples with {:d} processors'.format( formatDecoding(decoding), nsamples, numProc ) ) ## note: decoding will now include improved transition matrix calculations dfOutput = runDecoding( dfSampleGroup, numProc, transitionMatrix, decoding, maxIter, outFStr ) dfOutput.set_index( ['bin', 'sample'], inplace=True ) del( dfSampleGroup ) else: dfOutput = dfClass # write output outFileStr = determineOutputFileName( inFileStr, outID, binSize, decoding, isUniform, combineBins ) # if combination, undo transformation by applying the predictions to additional bins if combineBins > 0: dfOutput.reset_index(inplace=True) dfOutput['cBin'] = dfOutput['bin'] dfOutputT = undoBinTransformation( dfOutput, transformation ) else: dfOutputT = dfOutput.drop('cBin', axis=1) print( ' Writing output to', outFileStr ) with open( outFileStr, 'w' ) as f: f.write(info+'\n') dfOutputT.to_csv( outFileStr, sep='\t', mode='a' ) print( 'Done' )