Example #1
0
 def load(self, dictionary):
     State.load(self, dictionary)
     if 'backgroundprob' not in dictionary:
         raise ParseException("Backround probability was not found in state")
     self.backgroundProbability = [tuple(x) 
                                   for x in dictionary['backgroundprob']]
     if 'time' not in dictionary:
         raise ParseException('Time was not found in state')
     self.time = dictionary['time']
     if 'transitionmatrix' not in dictionary:
         raise ParseException('Transition matrix not found in state')
     self.transitionMatrix = dictionary['transitionmatrix']
     if 'consensusdistribution' in dictionary:
         self.consensusDistribution = default_dist(normalize_dict(
             dictionary['consensusdistribution'],
             mathType=self.mathType
         ))
     else:
         self.consensusDistribution = defaultdict(lambda *x: self.mathType(1.0))
     if 'repeatlengthdistribution' in dictionary:
         tp = type(dictionary['repeatlengthdistribution'])
         if tp in [dict, defaultdict]:
             self.repeatLengthDistribution = \
                 default_dist(normalize_dict(
                     dictionary['repeatlengthdistribution'],
                     mathType=self.mathType
                 ))
         else:
             self.repeatLengthDistribution = \
                 dictionary['repeatlengthdistribution']
             self.repProb = self.repeatLengthDistribution.p
     if 'trackemissions' in dictionary:
         self.trackEmissions = dictionary['trackemissions']
     if 'version' in dictionary:
         self.version = dictionary['version']
     else:
         self.version = 'v1'
     if 'repprob' in dictionary:
         self.repProb = self.mathType(dictionary['repprob'])
     if self.version == 'v2':
         self.trackEmissions = defaultdict(lambda *_: self.mathType(1.0))
         self.trackEmissions['MM'] = self.mathType(1.0)
         self.repeatLengthDistribution = defaultdict(lambda *_: 
                                                     self.mathType(1.0))
         self.repeatLengthDistribution[10] = self.mathType(1.0)
     self.factory = RepeatProfileFactory(self.mathType, self.version,
                                         self.repProb)
     self.factory.backgroudProbability = self.backgroundProbability
     self.factory.time = self.time
     self.factory.transitionMatrix = self.transitionMatrix
Example #2
0
 def improveModel(self, transitions, emissions):  
     self.clearCache()
     back = list(normalize_dict(emissions['I'], self.mathType).iteritems())
     self.backgroundProbability = back
     self.factory.backgroudProbability = back
     eqprob = emissions['M'][1] / sum(emissions['M'].values())
     time = -3.0/4.0 * (math.log((self.mathType(4.0) * eqprob - 1.0)/3.0))
     self.time = time
     self.factory.time = time
     totals = defaultdict(self.mathType)
     for k, v in transitions.iteritems():
         totals[k[:-1]] += v
     for state in transitions:
         transitions[state] /= totals[state[:-1]]
     self.transitionMatrix = transitions
     self.factory.transitionMatrix = transitions
Example #3
0
def main(model_file, additional_parameters,
         emmisions_file, transitions_file, repeat_consensus_file,
         repeat_length_file, trf_cover_file, output_file, simple_model):
    loader = HMMLoader()

    with Open(trf_cover_file, 'r') as f:
        trf_cover = json.load(f)
    if not simple_model:
        repeat_probability = (float(trf_cover['R_segment_count']) / 
                              (trf_cover['R_segment_count'] +
                               trf_cover['M_count']))
        repeat_count = sum([trf_cover[x] for x in ['RR', 'RM', 'MR']])
        repeat_repeat_probability = float(trf_cover['RR']) / repeat_count
        nothing_repeat_probability = float(trf_cover['MR']) / repeat_count
        repeat_nothing_probability = float(trf_cover['RM']) / repeat_count

        loader.addDictionary('trackemi', {"value": {
            'RR': 0.0,#repeat_repeat_probability,
            'RM': repeat_nothing_probability,
            'MR': nothing_repeat_probability,
        }})

    for k, v in additional_parameters.iteritems():
        loader.addDictionary(k, v)
    
    # Parse emissions
    
    with Open(emmisions_file, 'r') as f:
        emm = normalize_dict(json.load(f))

    emm = [(ast.literal_eval(k), v) for k, v in emm.iteritems()]
    loader.addDictionary('MatchStateEmissions', {'value': emm})
    
    background_prob = defaultdict(int)
    for ((r1, r2), v) in emm:
        background_prob[r1] += v
        background_prob[r2] += v
    background_prob = \
        {'value': list(normalize_dict(background_prob).iteritems())}
    loader.addDictionary('background-probability', background_prob)
    
    # Parse transitions
    with Open(transitions_file, 'r') as f:
        __trans = json.load(f)
    trans = dict()
    for k, v in __trans.iteritems():
        trans[''.join(ast.literal_eval(k))] = v
    trans = normalize_tuple_dict(trans)
    if not simple_model:
        for k in trans:
            trans[k] *= (1 - repeat_probability)
        trans['MR'] = repeat_probability
        trans['XR'] = repeat_probability
        trans['YR'] = repeat_probability
        trans['RR'] = repeat_probability
        trans['RX'] = (1 - repeat_probability) / 3
        trans['RY'] = (1 - repeat_probability) / 3
        trans['RM'] = (1 - repeat_probability) / 3
       
    loader.addDictionary('trans', trans) 
        
    # Parse emissions from trf
    if not simple_model:
        loader.addFile('consensus.js', 
                       os.path.relpath(os.path.abspath(repeat_consensus_file), 
                                       os.path.dirname(model_file)))
        loader.addFile('repeatlength.js', os.path.abspath(repeat_length_file))

    model = loader.load(model_file)
    
    json_prep = {'model': model['model'].toJSON()}
    with Open(output_file, 'w') as f:
        json.dump(json_prep, f, indent=4)
    return output_file
Example #4
0
def main(model_file, additional_parameters,
         emmisions_file, transitions_file, repeat_consensus_file,
         repeat_length_file, trf_cover_file, output_file, simple_model):
    loader = HMMLoader()

    with Open(trf_cover_file, 'r') as f:
        trf_cover = json.load(f)
    if not simple_model:
        repeat_probability = (float(trf_cover['R_segment_count']) / 
                              (trf_cover['R_segment_count'] +
                               trf_cover['M_count']))
        repeat_count = sum([trf_cover[x] for x in ['RR', 'RM', 'MR']])
        repeat_repeat_probability = float(trf_cover['RR']) / repeat_count
        nothing_repeat_probability = float(trf_cover['MR']) / repeat_count
        repeat_nothing_probability = float(trf_cover['RM']) / repeat_count

        loader.addDictionary('trackemi', {"value": {
            'RR': repeat_repeat_probability,
            'RM': repeat_nothing_probability,
            'MR': nothing_repeat_probability,
        }})

    for k, v in additional_parameters.iteritems():
        loader.addDictionary(k, v)
    
    # Parse emissions
    
    with Open(emmisions_file, 'r') as f:
        emm = normalize_dict(json.load(f))

    emm = [(ast.literal_eval(k), v) for k, v in emm.iteritems()]
    loader.addDictionary('MatchStateEmissions', {'value': emm})
    
    background_prob = defaultdict(int)
    for ((r1, r2), v) in emm:
        background_prob[r1] += v
        background_prob[r2] += v
    background_prob = \
        {'value': list(normalize_dict(background_prob).iteritems())}
    loader.addDictionary('background-probability', background_prob)
    
    # Parse transitions
    with Open(transitions_file, 'r') as f:
        __trans = json.load(f)
    trans = dict()
    for k, v in __trans.iteritems():
        trans[''.join(ast.literal_eval(k))] = v
    trans = normalize_tuple_dict(trans)
    if not simple_model:
        for k in trans:
            trans[k] *= (1 - repeat_probability)
        trans['MR'] = repeat_probability
        trans['XR'] = repeat_probability
        trans['YR'] = repeat_probability
        trans['RR'] = repeat_probability
        trans['RX'] = (1 - repeat_probability) / 3
        trans['RY'] = (1 - repeat_probability) / 3
        trans['RM'] = (1 - repeat_probability) / 3
       
    loader.addDictionary('trans', trans) 
        
    # Parse emissions from trf
    if not simple_model:
        loader.addFile('consensus.js', 
                       os.path.relpath(os.path.abspath(repeat_consensus_file), 
                                       os.path.dirname(model_file)))
        loader.addFile('repeatlength.js', os.path.abspath(repeat_length_file))

    model = loader.load(model_file)
    
    json_prep = {'model': model['model'].toJSON()}
    with Open(output_file, 'w') as f:
        json.dump(json_prep, f, indent=4)
    return output_file