Exemple #1
0
 def load(self, dictionary):
     ConfigObject.load(self, dictionary)
     if "emission" not in dictionary:
         raise ParseException("Emission not found in state")
     if "name" not in dictionary:
         raise ParseException("Name not found in state")
     if "startprob" not in dictionary:
         raise ParseException("startprob not found in state")
     if "endprob" not in dictionary:
         raise ParseException("endprob not found in state")
     if "serialize" in dictionary:
         self.serialize = dictionary["serialize"]
     self.stateName = dictionary["name"]
     if "onechar" in dictionary:
         if len(dictionary['onechar']) != 1:
             raise ParseException('onechar has wrong length')
         self.onechar = dictionary["onechar"]
     else:
         if len(self.stateName) > 0:
             self.onechar = self.stateName[0]
         else:
             self.onechar = "?"
     self.startProbability = self.mathType(dictionary["startprob"])
     self.endProbability = self.mathType(dictionary["endprob"])
     self.emissions = dict()
     for [key, prob] in dictionary["emission"]:
         if key.__class__.__name__ == "list":
             key = tuple(key)
         try:
             self.emissions[key] = self.mathType(prob)
         except ValueError:
             self.emissions[key] = prob
Exemple #2
0
 def loadTransitions(self, dictionary):
     if "transitions" not in dictionary:
         raise ParseException("transitions are missing in HMM object")
     for transition in dictionary["transitions"]:
         if "from" not in transition or \
            "to" not in transition or \
            "prob" not in transition:
             raise ParseException("transitions are not properly defined")
         f = self.statenameToID[transition["from"]]
         t = self.statenameToID[transition["to"]]
         p = self.mathType(transition["prob"])
         self.addTransition(f, t, p)
Exemple #3
0
 def load(self, dictionary):
     State.load(self, dictionary)
     if 'backgroundprob' not in dictionary:
         raise ParseException("Backround probability was not found in state")
     self.backgroundProbability = [tuple(x) 
                                   for x in dictionary['backgroundprob']]
     if 'time' not in dictionary:
         raise ParseException('Time was not found in state')
     self.time = dictionary['time']
     if 'transitionmatrix' not in dictionary:
         raise ParseException('Transition matrix not found in state')
     self.transitionMatrix = dictionary['transitionmatrix']
     if 'consensusdistribution' in dictionary:
         self.consensusDistribution = default_dist(normalize_dict(
             dictionary['consensusdistribution'],
             mathType=self.mathType
         ))
     else:
         self.consensusDistribution = defaultdict(lambda *x: self.mathType(1.0))
     if 'repeatlengthdistribution' in dictionary:
         tp = type(dictionary['repeatlengthdistribution'])
         if tp in [dict, defaultdict]:
             self.repeatLengthDistribution = \
                 default_dist(normalize_dict(
                     dictionary['repeatlengthdistribution'],
                     mathType=self.mathType
                 ))
         else:
             self.repeatLengthDistribution = \
                 dictionary['repeatlengthdistribution']
             self.repProb = self.repeatLengthDistribution.p
     if 'trackemissions' in dictionary:
         self.trackEmissions = dictionary['trackemissions']
     if 'version' in dictionary:
         self.version = dictionary['version']
     else:
         self.version = 'v1'
     if 'repprob' in dictionary:
         self.repProb = self.mathType(dictionary['repprob'])
     if self.version == 'v2':
         self.trackEmissions = defaultdict(lambda *_: self.mathType(1.0))
         self.trackEmissions['MM'] = self.mathType(1.0)
         self.repeatLengthDistribution = defaultdict(lambda *_: 
                                                     self.mathType(1.0))
         self.repeatLengthDistribution[10] = self.mathType(1.0)
     self.factory = RepeatProfileFactory(self.mathType, self.version,
                                         self.repProb)
     self.factory.backgroudProbability = self.backgroundProbability
     self.factory.time = self.time
     self.factory.transitionMatrix = self.transitionMatrix
Exemple #4
0
def BackgroundProbabilityGenerator(dictionary, mathType):
    if "alphabet" not in dictionary:
        raise ParseException("Alphabet not found in background probability")
    tracks = 1
    track = 0
    if "track" in dictionary:
        track = dictionary['track']
    if "tracks" in dictionary:
        tracks = dictionary['tracks']
    distribution = None
    if 'distribution' in dictionary:
        distribution = dict(dictionary['distribution'])
    alphabet = dictionary['alphabet']
    p = mathType(1.0 / float(len(alphabet)))
    output = []
    for c in alphabet:
        if distribution != None:
            p = distribution[c]
        if tracks == 1:
            output.append((c, p))
        else:
            cc = [""] * tracks
            cc[track] = c
            output.append((tuple(cc), p))
    return output
Exemple #5
0
 def load(self, dictionary):
     State.load(self, dictionary)
     if "durations" not in dictionary:
         raise ParseException(
             "durations were not found in GeneralizedState")
     self.durations = list(dictionary["durations"])
     for d in range(len(self.durations)):
         self.durations[d] = tuple(self.durations[d])
Exemple #6
0
def JukesCantorGenerator(dictionary, mathType):
    if "alphabet" not in dictionary:
        raise ParseException("Alphabet not found for JC model")
    if "timeX" not in dictionary or "timeY" not in dictionary:
        raise ParseException("Time not found for JC model")
    if "backgroundprob" not in dictionary:
        raise ParseException("backgroundprob not in JC model")
    alphabet = dictionary["alphabet"]
    timeX = dictionary["timeX"]
    timeY = dictionary["timeY"]
    background = dict()
    for (key, value) in dictionary["backgroundprob"]:
        background[key] = value
    dst = defaultdict(mathType)
    for c in alphabet:
        for (cc, prob) in JCModel(c, timeX, alphabet):
            for (ccc, prob2) in JCModel(c, timeY, alphabet):
                dst[(cc, ccc)] += background[c] * prob * prob2
    return [x for x in dst.iteritems()]
Exemple #7
0
 def load(self, dictionary):
     res = ClassifierState.load(self, dictionary)
     if self.onechar == 'X':
         seq = 0
     elif self.onechar == 'Y':
         seq = 1
     else:
         raise ParseException('Invalid state onechar')
     self.dp = self.dp = self._get_preparer(seq)
     self.clf = self._get_classifier()
     return res
Exemple #8
0
 def getSequences(self, fname, sequence_regexp=None):
     alignment_regexp = ''
     if sequence_regexp is None:
         sequence_regexp = ["^sequence1$", "^sequence2$"]
     self.sequence_regexp = sequence_regexp
     aln = next(
         Fasta.load(fname, alignment_regexp, Alignment, sequence_regexp))
     if aln is None or len(aln.sequences) < 2:
         raise ParseException('Not enough sequences in file\n')
     seq1 = aln.sequences[0]
     seq2 = aln.sequences[1]
     return seq1, seq2
 def load(self, dictionary):
     ConfigObject.load(self, dictionary)
     #Detect if user provides the data or not
     start = 0
     if 'start' in dictionary:
         start = float(dictionary['start'])
     if 'data' not in dictionary:
         if 'p' not in dictionary:
             raise ParseException(
                 'Probability is missing in RepeatLengthDistribution')
         p = float(dictionary['p'])
         if 'fractions' not in dictionary:
             raise ParseException(
                 'Fractions are missing in RepeatLengthDistribution')
         fractions = dictionary['fractions']
         self.setParams(p, start, fractions)
     else:
         data = dictionary['data']
         if 'fractionssize' not in dictionary:
             raise ParseException(
                 'Number of fractions is missing in RepeatLengthDistribution'
             )
         self.train(data, int(dictionary['fractionssize']), start)
Exemple #10
0
    def load(self, dictionary):
        ConfigObject.load(self, dictionary)
        if "sequences" not in dictionary:
            raise ParseException("Sequences not in AnnotationConfig")

        self.annotations = dictionary['annotations']
        self.sequences = dict()

        for i in dictionary["sequences"]:
            self.sequences[i['name']] = dict()
            for a in i['annotations']:
                if a['id'] in self.annotations:
                    if 'offset' in a:
                        offset = a['offset']
                    else:
                        offset = 0
                    self.sequences[i['name']][a['id']] = (a['file'], offset)
 def load(self, dictionary):
     State.load(self, dictionary)
     if 'order' not in dictionary:
         raise ParseException('order was not found in state')
     self.order = dictionary['order']
Exemple #12
0
 def load(self, dictionary):
     ConfigObject.load(self, dictionary)
     if 'val' not in dictionary:
         raise ParseException("Value ('val') not found in state")
     self.value = float(dictionary['val'])
Exemple #13
0
 def loadStates(self, dictionary):
     if "states" not in dictionary:
         raise ParseException("states are missing in HMM object")
     for state in dictionary["states"]:
         self.addState(state)
Exemple #14
0
 def _prepare_base(self, base):
     if base not in self.m:
         raise ParseException('Invalid base')
     return self.m[base]
Exemple #15
0
def check_base(b):
    if b not in constants.bases or b == '-':
        raise ParseException('Invalid base')