def test_simple_model_initialisation_1(): """Test how the table counts are initialised in a simple model. A simple model that has emitted one of each base from the empty context must have one table for each base in the root context and no other tables.""" model = cacto.cactomodelfromseqs(('A', 'C', 'G', 'T')) t = model.t.copy() t[model.prefixindex.topdown().value.id] -= 1 assert (0 == t).all()
def test_seqs_log_likelihood(): for trainingseqs in prefix_seq_sets: model = cacto.cactomodelfromseqs(trainingseqs) for predictionseqs in prefix_seq_sets: logging.info( 'likelihood/base: %.3f', math.exp( model.seqsloglikelihood(predictionseqs) / sum(map(len, predictionseqs))))
def test_model_initialisation_1(): """Test how the table counts are initialised.""" model = cacto.cactomodelfromseqs(('CGAT',)) seqan.traverse.depthfirsttraversal(model.prefixindex, model.log_table_counts) t = model.t.copy() i_cga = model.prefixindex.topdown() if not i_cga.goDown('CGA'[::-1]): raise ValueError('Should have been able to find prefix "CGA"') assert ([0,0,0,1] == t[i_cga.value.id]).all()
def test_model_initialisation_1(): """Test how the table counts are initialised.""" model = cacto.cactomodelfromseqs(('CGAT', )) seqan.traverse.depthfirsttraversal(model.prefixindex, model.log_table_counts) t = model.t.copy() i_cga = model.prefixindex.topdown() if not i_cga.goDown('CGA'[::-1]): raise ValueError('Should have been able to find prefix "CGA"') assert ([0, 0, 0, 1] == t[i_cga.value.id]).all()
def _test_empty_model_predictions(): """Currently dumps core due to seqan bug.""" seqs = tuple('', ) model = cacto.cactomodelfromseqs(seqs) # # No matter what the context we should see p = 1/4 # for u in ( '', 'A', 'GC', ): x = cacto.Value('A') logger.info('p(%s|%s) = %.3e', x, u, model.p_x_given_u(x, u)) assert abs(.25 - model.p(x, u)) < 1e-15
def _test_empty_model_predictions(): """Currently dumps core due to seqan bug.""" seqs = tuple('',) model = cacto.cactomodelfromseqs(seqs) # # No matter what the context we should see p = 1/4 # for u in ( '', 'A', 'GC', ): x = cacto.Value('A') logger.info('p(%s|%s) = %.3e', x, u, model.p_x_given_u(x, u)) assert abs(.25 - model.p(x, u)) < 1e-15
def test_simple_model_predictions(): seqs = ( 'A', 'C', 'G', 'T', ) model = cacto.cactomodelfromseqs(seqs) # # No matter what the context we should see p(x|u) = 1/4 # for u in ( '', 'A', 'GC', ): x = cacto.Value('A') logger.info('p(%s|%s) = %.3e', x, u, model.p_x_given_u(x, u)) p = model.p_x_given_u(x, u) if abs(.25 - model.p_x_given_u(x, u)) >= 1e-15: raise ValueError('p not close to 1/4')
def test_model_predictions(): import seqan for seqs, test_xs_us in prediction_sets: model = cacto.cactomodelfromseqs(seqs) posterior = model.calculateposterior() for x, u in test_xs_us: logging.debug('%s|%s', x, u) p = model.p_x_given_u(cacto.Value(x), u) i = model._locate_context(u, topdownhistory=True) post = posterior[i.value.id] p2 = model.p_xord_given_ui(cacto.Value(x).ordValue, i) # Check that the three different methods of calculating likelihoods # give similar results assertareclose(p, p2) assertareclose(p, post[cacto.Value(x).ordValue]) assertareclose(1., post.sum()) # Check posterior adds to 1 if False: # Choose whether to build graph or not import seqan.io.graphtool builder = seqan.io.graphtool.Builder(model.prefixindex) seqan.io.graphtool.GT.graph_draw( builder.graph, pos=seqan.io.graphtool.GT.sfdp_layout(builder.graph), vertex_size=2, vertex_fill_color="lightgrey", vertex_font_size=8, vertex_text=builder.map_vertices( lambda it: '{0} {1} {2} {3}'.format(*map( int, model._su(it)))), vertex_pen_width=seqan.io.graphtool.root_vertex_property( builder), edge_text=seqan.io.graphtool.edge_labels_for_output(builder), edge_color=seqan.io.graphtool.color_edges_by_first_symbol( builder), edge_end_marker="none", edge_pen_width=2, #edge_dash_style=seqan.io.graphtool.dash_non_suffix_edges(builder, suffix), #edge_pen_width=builder.edge_lengths, #output="graphtool.png" )
def test_model_predictions(): import seqan for seqs, test_xs_us in prediction_sets: model = cacto.cactomodelfromseqs(seqs) posterior = model.calculateposterior() for x, u in test_xs_us: logging.debug('%s|%s', x, u) p = model.p_x_given_u(cacto.Value(x), u) i = model._locate_context(u, topdownhistory=True) post = posterior[i.value.id] p2 = model.p_xord_given_ui(cacto.Value(x).ordValue, i) # Check that the three different methods of calculating likelihoods # give similar results assertareclose(p, p2) assertareclose(p, post[cacto.Value(x).ordValue]) assertareclose(1., post.sum()) # Check posterior adds to 1 if False: # Choose whether to build graph or not import seqan.io.graphtool builder = seqan.io.graphtool.Builder(model.prefixindex) seqan.io.graphtool.GT.graph_draw( builder.graph, pos=seqan.io.graphtool.GT.sfdp_layout(builder.graph), vertex_size=2, vertex_fill_color="lightgrey", vertex_font_size=8, vertex_text=builder.map_vertices( lambda it: '{0} {1} {2} {3}'.format(*map(int, model._su(it)))), vertex_pen_width=seqan.io.graphtool.root_vertex_property(builder), edge_text=seqan.io.graphtool.edge_labels_for_output(builder), edge_color=seqan.io.graphtool.color_edges_by_first_symbol(builder), edge_end_marker="none", edge_pen_width=2, #edge_dash_style=seqan.io.graphtool.dash_non_suffix_edges(builder, suffix), #edge_pen_width=builder.edge_lengths, #output="graphtool.png" )
def test_seqs_log_likelihood(): for trainingseqs in prefix_seq_sets: model = cacto.cactomodelfromseqs(trainingseqs) for predictionseqs in prefix_seq_sets: logging.info('likelihood/base: %.3f', math.exp(model.seqsloglikelihood(predictionseqs)/sum(map(len, predictionseqs))))