예제 #1
0
 def test_hmm(tag, pwm):
     freqs, gaps = pwm
     logo = L.pssm_as_image(freqs, size=None, transparencies=gaps)
     logo_filename = '%s-logo.png' % tag
     logo.save(logo_filename)
     logging.info('%s: Created logo: %s', tag, logo_filename)
     model = build_hmm_model(freqs, gaps, .1)
     logging.debug('%s: Created model', tag)
     hmm.graph_as_svg(model, '%s-states' % tag, neato_properties={'-Elen':3.})
     logging.debug('%s: Graphed model', tag)
     return model
예제 #2
0
 def test_hmm(tag, pwm):
     freqs, gaps = pwm
     logo = L.pssm_as_image(freqs, size=None, transparencies=gaps)
     logo_filename = '%s-logo.png' % tag
     logo.save(logo_filename)
     logging.info('%s: Created logo: %s', tag, logo_filename)
     model = build_hmm_model(freqs, gaps, .1)
     logging.debug('%s: Created model', tag)
     hmm.graph_as_svg(model,
                      '%s-states' % tag,
                      neato_properties={'-Elen': 3.})
     logging.debug('%s: Graphed model', tag)
     return model
예제 #3
0
def run_pwm_viterbi(tag, freqs, gaps, positive_seqs, negative_seqs):
    """
    Run the PWM using Viterbi algorithm to classify sequences.
    """
    logging.info('Running PWM: %s', tag)
    logo = L.pssm_as_image(freqs, size=None, transparencies=gaps)
    logo_filename = '%s-logo.png' % tag
    logo.save(logo_filename)
    logging.info('%s: Created logo: %s', tag, logo_filename)
    roc_points = []
    for p_binding in p_binding_params:
        # build model
        model = build_hmm_model(freqs, gaps, p_binding)
        hmm.graph_as_svg(model, '%s-states' % tag, neato_properties={'-Elen':1.4})
        logging.debug('%s: Graphed model', tag)
        pos_total_pos, pos_total_neg, pos_num_seqs_with_site = run_on_seqs(model, positive_seqs)
        logging.debug(
            '%s: p(binding)=%.1e: Positive sequences: Over all sequences: found %4d positive sites and %4d negative sites in %4d/%4d sequences',
            tag,
            p_binding,
            pos_total_pos,
            pos_total_neg,
            pos_num_seqs_with_site,
            len(positive_seqs)
        )
        neg_total_pos, neg_total_neg, neg_num_seqs_with_site = run_on_seqs(model, negative_seqs)
        logging.debug(
            '%s: p(binding)=%.1e: Negative sequences: Over all sequences: found %4d positive sites and %4d negative sites in %4d/%4d sequences',
            tag,
            p_binding,
            neg_total_pos,
            neg_total_neg,
            neg_num_seqs_with_site,
            len(negative_seqs)
        )
        tp = pos_num_seqs_with_site
        fp = neg_num_seqs_with_site
        fn = len(positive_seqs) - pos_num_seqs_with_site
        tn = len(negative_seqs) - neg_num_seqs_with_site
        roc_point = roc.RocCalculator(tp=tp, fp=fp, tn=tn, fn=fn)
        logging.info('%s: p(binding)=%.1e; Specificity=%.3f; Sensitivity=%.3f',
            tag,
            p_binding,
            roc_point.specificity(),
            roc_point.sensitivity(),
        )
        roc_points.append(roc_point)
    return roc_points
예제 #4
0
def run_pwm_forward_backward(tag, freqs, gaps, positive_seqs, negative_seqs):
    """
    Run the PWM using forward-backward.
    """
    logging.info('Running PWM: %s', tag)
    logo = L.pssm_as_image(freqs, size=None, transparencies=gaps)
    logo_filename = '%s-logo.png' % tag
    logo.save(logo_filename)
    logging.info('%s: Created logo: %s', tag, logo_filename)
    # build model
    model = build_hmm_model(freqs, gaps, .001)
    hmm.graph_as_svg(model, '%s-states' % tag, neato_properties={'-Elen': 1.4})
    logging.debug('%s: Graphed model', tag)
    positive_scores = test_hmm_forward_backward(model, positive_seqs.values())
    negative_scores = test_hmm_forward_backward(model, negative_seqs.values())
    return roc.picked_rocs_from_thresholds(positive_scores, negative_scores)
예제 #5
0
def run_pwm_forward_backward(tag, freqs, gaps, positive_seqs, negative_seqs):
    """
    Run the PWM using forward-backward.
    """
    logging.info('Running PWM: %s', tag)
    logo = L.pssm_as_image(freqs, size=None, transparencies=gaps)
    logo_filename = '%s-logo.png' % tag
    logo.save(logo_filename)
    logging.info('%s: Created logo: %s', tag, logo_filename)
    # build model
    model = build_hmm_model(freqs, gaps, .001)
    hmm.graph_as_svg(model, '%s-states' % tag, neato_properties={'-Elen':1.4})
    logging.debug('%s: Graphed model', tag)
    positive_scores = test_hmm_forward_backward(model, positive_seqs.values())
    negative_scores = test_hmm_forward_backward(model, negative_seqs.values())
    return roc.picked_rocs_from_thresholds(positive_scores, negative_scores)
예제 #6
0
def run_pwm_viterbi(tag, freqs, gaps, positive_seqs, negative_seqs):
    """
    Run the PWM using Viterbi algorithm to classify sequences.
    """
    logging.info('Running PWM: %s', tag)
    logo = L.pssm_as_image(freqs, size=None, transparencies=gaps)
    logo_filename = '%s-logo.png' % tag
    logo.save(logo_filename)
    logging.info('%s: Created logo: %s', tag, logo_filename)
    roc_points = []
    for p_binding in p_binding_params:
        # build model
        model = build_hmm_model(freqs, gaps, p_binding)
        hmm.graph_as_svg(model,
                         '%s-states' % tag,
                         neato_properties={'-Elen': 1.4})
        logging.debug('%s: Graphed model', tag)
        pos_total_pos, pos_total_neg, pos_num_seqs_with_site = run_on_seqs(
            model, positive_seqs)
        logging.debug(
            '%s: p(binding)=%.1e: Positive sequences: Over all sequences: found %4d positive sites and %4d negative sites in %4d/%4d sequences',
            tag, p_binding, pos_total_pos, pos_total_neg,
            pos_num_seqs_with_site, len(positive_seqs))
        neg_total_pos, neg_total_neg, neg_num_seqs_with_site = run_on_seqs(
            model, negative_seqs)
        logging.debug(
            '%s: p(binding)=%.1e: Negative sequences: Over all sequences: found %4d positive sites and %4d negative sites in %4d/%4d sequences',
            tag, p_binding, neg_total_pos, neg_total_neg,
            neg_num_seqs_with_site, len(negative_seqs))
        tp = pos_num_seqs_with_site
        fp = neg_num_seqs_with_site
        fn = len(positive_seqs) - pos_num_seqs_with_site
        tn = len(negative_seqs) - neg_num_seqs_with_site
        roc_point = roc.RocCalculator(tp=tp, fp=fp, tn=tn, fn=fn)
        logging.info(
            '%s: p(binding)=%.1e; Specificity=%.3f; Sensitivity=%.3f',
            tag,
            p_binding,
            roc_point.specificity(),
            roc_point.sensitivity(),
        )
        roc_points.append(roc_point)
    return roc_points
예제 #7
0
파일: io.py 프로젝트: JohnReid/HMM
TRANSITION:4;5;1
TRANSITION:5;6;1
TRANSITION:6;7;1
TRANSITION:7;8;1

# EMISSIONS:i;b1,b2,b3,b4 - where p(state i emits base i) = bi
EMISSIONS:0;.5,.5,0,0
EMISSIONS:1;0,0,.5,.5
EMISSIONS:2;1,0,0,0
EMISSIONS:3;0,0,0,1
EMISSIONS:4;0,0,1,0
EMISSIONS:5;0,1,0,0
EMISSIONS:6;1,0,0,0
EMISSIONS:7;1,0,0,0
EMISSIONS:8;0,0,1,0
"""
    model = build_model( to_parse.split('\n') )
    m = hmm.model_states_2_model(model)
    if False:
        hmm.graph_as_svg(
                m,
                'test',
                'model_io_test',
                graphing_keywords = {
                        'show_dists' : lambda l: True,
                        'state_labels' : None
                },
                neato_properties = { '-Elen' : '2' }
        )
    with open('model.mdl', 'w') as f: write_model(model, f)
예제 #8
0
TRANSITION:3;5;.9
TRANSITION:4;5;1
TRANSITION:5;6;1
TRANSITION:6;7;1
TRANSITION:7;8;1

# EMISSIONS:i;b1,b2,b3,b4 - where p(state i emits base i) = bi
EMISSIONS:0;.5,.5,0,0
EMISSIONS:1;0,0,.5,.5
EMISSIONS:2;1,0,0,0
EMISSIONS:3;0,0,0,1
EMISSIONS:4;0,0,1,0
EMISSIONS:5;0,1,0,0
EMISSIONS:6;1,0,0,0
EMISSIONS:7;1,0,0,0
EMISSIONS:8;0,0,1,0
"""
    model = build_model(to_parse.split('\n'))
    m = hmm.model_states_2_model(model)
    if False:
        hmm.graph_as_svg(m,
                         'test',
                         'model_io_test',
                         graphing_keywords={
                             'show_dists': lambda l: True,
                             'state_labels': None
                         },
                         neato_properties={'-Elen': '2'})
    with open('model.mdl', 'w') as f:
        write_model(model, f)
예제 #9
0
      [ 1., 0., 0., 0. ],
      [ 0., 1., 0., 0. ],
      [ 0., 0., 0., 1. ],
    ]
    K = 7
    test_seq = 'acgtgat' # matches dist above
    test_seq_order_0 = hmm.pssm.seq_to_numpy(test_seq)

    # for various different orders
    for order in [0, 1, 2]:

        # build a model of distribution above
        traits = GappedPssmTraits(K, p_binding_site, order, num_background_states, create_background_model, emission_dists=emission_dists)
        model = traits.new_model()
        converted = hmm.model_states_2_model(model)
        B = converted.B
        hmm.graph_as_svg(converted, 'gapped_pssm')


        # check the reverse complement states are correct
        for n in xrange(model.N):
            for o in xrange(model.M):
                rev_comp_state, rev_comp_obs = traits.get_non_reverse_complement(n,o)
                assert check_is_close_2(B[rev_comp_state,rev_comp_obs], B[n,o]), ('%d,%d %d,%d: %f %f' % (rev_comp_state,rev_comp_obs,n,o,B[rev_comp_state,rev_comp_obs],B[n,o]))

        # check viterbi gives correct result
        test_seq_order_n = converted.converter.to_order_n(test_seq_order_0)
        LL, states = converted.viterbi(test_seq_order_n)
        for i, state in enumerate(states):
            assert (state-num_background_states)/2 == i
예제 #10
0
        hmm.dirichlet_draw(numpy.ones(builder.M) * .1)
        for k in xrange(builder.K)
    ])
    emissions[builder.gap_index] = hmm.dirichlet_draw(
        numpy.ones(builder.M) * .3)
    model_by_states, in_states, out_states = builder.create(
        p_gap=.6, emissions=emissions)

    # create a background model and add the single gapped pssm to it
    complete_model = add_to_simple_background_model(model_by_states,
                                                    in_states,
                                                    out_states,
                                                    p_binding_site=.01)

    # convert to other type of model
    model = hmm.as_model(complete_model)

    # write as a graph
    hmm.graph_as_svg(model,
                     'single-gapped-hmm',
                     graphing_keywords={'include_emissions': False},
                     neato_properties={'-Elen': 2})

    # get the emissions and gap probabilities and write a logo
    emissions_copy, gap_probs = builder.get_emissions_and_gap_probabilities(
        model, offset=1)
    assert (emissions_copy - emissions).sum() < 1e-10
    import hmm.pssm.logo as logo
    image = logo.pssm_as_image(emissions, transparencies=gap_probs)
    image.save("single-gapped-pssm-logo.png", "PNG")
예제 #11
0
sp1_pssms = all_sp1_pssms()
for tag in methods:
    score_pickle_file = '%s-scores.pickle' % tag
    try:
        positive_scores, negative_scores = cPickle.load(open(score_pickle_file))
        logging.info('%s: Unpickled ROCs from %s.', tag, score_pickle_file)
    except:
        logging.info('%s: Could not ROCs from unpickle %s, calculating from scratch.', tag, score_pickle_file)
        freqs, gaps = sp1_pssms[tag]
        freqs = (freqs.T / freqs.sum(axis=1)).T
        logo = L.pssm_as_image(freqs, size=None, transparencies=gaps)
        logo_filename = '%s-logo.png' % tag
        logo.save(logo_filename)
        logging.info('%s: Created logo: %s', tag, logo_filename)
        model = build_hmm_model(freqs, gaps, .001)
        hmm.graph_as_svg(model, '%s-states' % tag, neato_properties={'-Elen':1.4})
        logging.debug('%s: Graphed model', tag)
        positive_scores = test_hmm_forward_backward(model, sequences['positive'].values())
        negative_scores = dict(
            (bg, test_hmm_forward_backward(model, sequences[bg].values()))
            for bg in backgrounds
        )
        cPickle.dump((positive_scores, negative_scores), open(score_pickle_file, 'wb'))
    scores[(tag,)] = positive_scores
    for bg, score in negative_scores.iteritems():
        scores[(tag, bg)] = score


#
# Generate ROCs
#
예제 #12
0
        positive_scores, negative_scores = cPickle.load(
            open(score_pickle_file))
        logging.info('%s: Unpickled ROCs from %s.', tag, score_pickle_file)
    except:
        logging.info(
            '%s: Could not ROCs from unpickle %s, calculating from scratch.',
            tag, score_pickle_file)
        freqs, gaps = sp1_pssms[tag]
        freqs = (freqs.T / freqs.sum(axis=1)).T
        logo = L.pssm_as_image(freqs, size=None, transparencies=gaps)
        logo_filename = '%s-logo.png' % tag
        logo.save(logo_filename)
        logging.info('%s: Created logo: %s', tag, logo_filename)
        model = build_hmm_model(freqs, gaps, .001)
        hmm.graph_as_svg(model,
                         '%s-states' % tag,
                         neato_properties={'-Elen': 1.4})
        logging.debug('%s: Graphed model', tag)
        positive_scores = test_hmm_forward_backward(
            model, sequences['positive'].values())
        negative_scores = dict(
            (bg, test_hmm_forward_backward(model, sequences[bg].values()))
            for bg in backgrounds)
        cPickle.dump((positive_scores, negative_scores),
                     open(score_pickle_file, 'wb'))
    scores[(tag, )] = positive_scores
    for bg, score in negative_scores.iteritems():
        scores[(tag, bg)] = score

#
# Generate ROCs
예제 #13
0
파일: single_gap.py 프로젝트: JohnReid/HMM
    emissions[builder.gap_index] = hmm.dirichlet_draw(numpy.ones(builder.M) * .3)
    model_by_states, in_states, out_states = builder.create(
      p_gap=.6,
      emissions=emissions
    )

    # create a background model and add the single gapped pssm to it
    complete_model = add_to_simple_background_model(
      model_by_states,
      in_states,
      out_states,
      p_binding_site=.01)

    # convert to other type of model
    model = hmm.as_model(complete_model)

    # write as a graph
    hmm.graph_as_svg(
      model,
      'single-gapped-hmm',
      graphing_keywords={'include_emissions':False},
      neato_properties={'-Elen':2}
    )

    # get the emissions and gap probabilities and write a logo
    emissions_copy, gap_probs = builder.get_emissions_and_gap_probabilities(model, offset=1)
    assert (emissions_copy - emissions).sum() < 1e-10
    import hmm.pssm.logo as logo
    image = logo.pssm_as_image(emissions, transparencies=gap_probs)
    image.save("single-gapped-pssm-logo.png", "PNG")