def load(self, dictionary): ConfigObject.load(self, dictionary) if "emission" not in dictionary: raise ParseException("Emission not found in state") if "name" not in dictionary: raise ParseException("Name not found in state") if "startprob" not in dictionary: raise ParseException("startprob not found in state") if "endprob" not in dictionary: raise ParseException("endprob not found in state") if "serialize" in dictionary: self.serialize = dictionary["serialize"] self.stateName = dictionary["name"] if "onechar" in dictionary: if len(dictionary['onechar']) != 1: raise ParseException('onechar has wrong length') self.onechar = dictionary["onechar"] else: if len(self.stateName) > 0: self.onechar = self.stateName[0] else: self.onechar = "?" self.startProbability = self.mathType(dictionary["startprob"]) self.endProbability = self.mathType(dictionary["endprob"]) self.emissions = dict() for [key, prob] in dictionary["emission"]: if key.__class__.__name__ == "list": key = tuple(key) try: self.emissions[key] = self.mathType(prob) except ValueError: self.emissions[key] = prob
def loadTransitions(self, dictionary): if "transitions" not in dictionary: raise ParseException("transitions are missing in HMM object") for transition in dictionary["transitions"]: if "from" not in transition or \ "to" not in transition or \ "prob" not in transition: raise ParseException("transitions are not properly defined") f = self.statenameToID[transition["from"]] t = self.statenameToID[transition["to"]] p = self.mathType(transition["prob"]) self.addTransition(f, t, p)
def load(self, dictionary): State.load(self, dictionary) if 'backgroundprob' not in dictionary: raise ParseException("Backround probability was not found in state") self.backgroundProbability = [tuple(x) for x in dictionary['backgroundprob']] if 'time' not in dictionary: raise ParseException('Time was not found in state') self.time = dictionary['time'] if 'transitionmatrix' not in dictionary: raise ParseException('Transition matrix not found in state') self.transitionMatrix = dictionary['transitionmatrix'] if 'consensusdistribution' in dictionary: self.consensusDistribution = default_dist(normalize_dict( dictionary['consensusdistribution'], mathType=self.mathType )) else: self.consensusDistribution = defaultdict(lambda *x: self.mathType(1.0)) if 'repeatlengthdistribution' in dictionary: tp = type(dictionary['repeatlengthdistribution']) if tp in [dict, defaultdict]: self.repeatLengthDistribution = \ default_dist(normalize_dict( dictionary['repeatlengthdistribution'], mathType=self.mathType )) else: self.repeatLengthDistribution = \ dictionary['repeatlengthdistribution'] self.repProb = self.repeatLengthDistribution.p if 'trackemissions' in dictionary: self.trackEmissions = dictionary['trackemissions'] if 'version' in dictionary: self.version = dictionary['version'] else: self.version = 'v1' if 'repprob' in dictionary: self.repProb = self.mathType(dictionary['repprob']) if self.version == 'v2': self.trackEmissions = defaultdict(lambda *_: self.mathType(1.0)) self.trackEmissions['MM'] = self.mathType(1.0) self.repeatLengthDistribution = defaultdict(lambda *_: self.mathType(1.0)) self.repeatLengthDistribution[10] = self.mathType(1.0) self.factory = RepeatProfileFactory(self.mathType, self.version, self.repProb) self.factory.backgroudProbability = self.backgroundProbability self.factory.time = self.time self.factory.transitionMatrix = self.transitionMatrix
def BackgroundProbabilityGenerator(dictionary, mathType): if "alphabet" not in dictionary: raise ParseException("Alphabet not found in background probability") tracks = 1 track = 0 if "track" in dictionary: track = dictionary['track'] if "tracks" in dictionary: tracks = dictionary['tracks'] distribution = None if 'distribution' in dictionary: distribution = dict(dictionary['distribution']) alphabet = dictionary['alphabet'] p = mathType(1.0 / float(len(alphabet))) output = [] for c in alphabet: if distribution != None: p = distribution[c] if tracks == 1: output.append((c, p)) else: cc = [""] * tracks cc[track] = c output.append((tuple(cc), p)) return output
def load(self, dictionary): State.load(self, dictionary) if "durations" not in dictionary: raise ParseException( "durations were not found in GeneralizedState") self.durations = list(dictionary["durations"]) for d in range(len(self.durations)): self.durations[d] = tuple(self.durations[d])
def JukesCantorGenerator(dictionary, mathType): if "alphabet" not in dictionary: raise ParseException("Alphabet not found for JC model") if "timeX" not in dictionary or "timeY" not in dictionary: raise ParseException("Time not found for JC model") if "backgroundprob" not in dictionary: raise ParseException("backgroundprob not in JC model") alphabet = dictionary["alphabet"] timeX = dictionary["timeX"] timeY = dictionary["timeY"] background = dict() for (key, value) in dictionary["backgroundprob"]: background[key] = value dst = defaultdict(mathType) for c in alphabet: for (cc, prob) in JCModel(c, timeX, alphabet): for (ccc, prob2) in JCModel(c, timeY, alphabet): dst[(cc, ccc)] += background[c] * prob * prob2 return [x for x in dst.iteritems()]
def load(self, dictionary): res = ClassifierState.load(self, dictionary) if self.onechar == 'X': seq = 0 elif self.onechar == 'Y': seq = 1 else: raise ParseException('Invalid state onechar') self.dp = self.dp = self._get_preparer(seq) self.clf = self._get_classifier() return res
def getSequences(self, fname, sequence_regexp=None): alignment_regexp = '' if sequence_regexp is None: sequence_regexp = ["^sequence1$", "^sequence2$"] self.sequence_regexp = sequence_regexp aln = next( Fasta.load(fname, alignment_regexp, Alignment, sequence_regexp)) if aln is None or len(aln.sequences) < 2: raise ParseException('Not enough sequences in file\n') seq1 = aln.sequences[0] seq2 = aln.sequences[1] return seq1, seq2
def load(self, dictionary): ConfigObject.load(self, dictionary) #Detect if user provides the data or not start = 0 if 'start' in dictionary: start = float(dictionary['start']) if 'data' not in dictionary: if 'p' not in dictionary: raise ParseException( 'Probability is missing in RepeatLengthDistribution') p = float(dictionary['p']) if 'fractions' not in dictionary: raise ParseException( 'Fractions are missing in RepeatLengthDistribution') fractions = dictionary['fractions'] self.setParams(p, start, fractions) else: data = dictionary['data'] if 'fractionssize' not in dictionary: raise ParseException( 'Number of fractions is missing in RepeatLengthDistribution' ) self.train(data, int(dictionary['fractionssize']), start)
def load(self, dictionary): ConfigObject.load(self, dictionary) if "sequences" not in dictionary: raise ParseException("Sequences not in AnnotationConfig") self.annotations = dictionary['annotations'] self.sequences = dict() for i in dictionary["sequences"]: self.sequences[i['name']] = dict() for a in i['annotations']: if a['id'] in self.annotations: if 'offset' in a: offset = a['offset'] else: offset = 0 self.sequences[i['name']][a['id']] = (a['file'], offset)
def load(self, dictionary): State.load(self, dictionary) if 'order' not in dictionary: raise ParseException('order was not found in state') self.order = dictionary['order']
def load(self, dictionary): ConfigObject.load(self, dictionary) if 'val' not in dictionary: raise ParseException("Value ('val') not found in state") self.value = float(dictionary['val'])
def loadStates(self, dictionary): if "states" not in dictionary: raise ParseException("states are missing in HMM object") for state in dictionary["states"]: self.addState(state)
def _prepare_base(self, base): if base not in self.m: raise ParseException('Invalid base') return self.m[base]
def check_base(b): if b not in constants.bases or b == '-': raise ParseException('Invalid base')