def test_tag_type(self): dist_trans_proba = distributive_transition_probabilities(self.agg_6074_0_tags) trans_proba = transition_probabilities(self.agg_6074_0_tags) for tags in trans_proba["origin"].tolist(): self.assertEqual(type(tags), str) for tags in dist_trans_proba["origin"].tolist(): self.assertEqual(type(tags), str)
def markov_cluster_transition_probabilities(train, is_distributive, random_dummy_mode=None): if is_distributive: trans_proba = mk.distributive_transition_probabilities(train) else: trans_proba = mk.transition_probabilities(train) if not random_dummy_mode is None: trans_proba = mk.equalize_transition_prob(trans_proba) return mk.to_dict(trans_proba)
def test_clean_data(self): dist_trans_proba = distributive_transition_probabilities(self.agg_6074_0_tags) trans_proba = transition_probabilities(self.agg_6074_0_tags) self.assertNotIn([], trans_proba["origin"].tolist()) self.assertNotIn("[]", trans_proba["origin"]) self.assertNotIn([], dist_trans_proba["origin"].tolist()) self.assertNotIn("[]", dist_trans_proba["origin"]) self.assertNotIn([], trans_proba["destination"].tolist()) self.assertNotIn("[]", trans_proba["destination"]) self.assertNotIn([], dist_trans_proba["destination"].tolist()) self.assertNotIn("[]", dist_trans_proba["destination"])
def test_equalize_transition_prob(self): sequence = ["A", "B", "A", "C", "A", "D", "B", "C", "B", "C", "A", "Z", "B", "C", "B", "C", "J", "A", "T", "A", "S"] trans_proba = equalize_transition_prob(transition_probabilities(sequence)) for origin in trans_proba["origin"]: origin_df = trans_proba[trans_proba["origin"] == origin] for transition_freq in origin_df["transition_freq"]: self.assertAlmostEqual(origin_df["transition_freq"].mean(), transition_freq, delta=0.01) self.assertAlmostEqual(0.16667, trans_proba[trans_proba["origin"] == "A"]["transition_freq"].mean(), delta=0.001)
def test_transition_prob_df(self): sequence = ["A", "B", "A", "C", "A", "D", "B", "C", "B", "C", "A", "Z", "B", "C", "B", "C"] trans_proba = transition_probabilities(sequence) self.assertEqual(1, trans_proba[trans_proba["origin"] == "A"]["transition_freq"].sum()) self.assertEqual(1, trans_proba[trans_proba["origin"] == "B"]["transition_freq"].sum()) self.assertEqual(1, trans_proba[trans_proba["origin"] == "C"]["transition_freq"].sum()) self.assertEqual(0.25, trans_proba[(trans_proba["origin"] == "A") & (trans_proba["destination"] == "B")][ "transition_freq"].item()) self.assertEqual(0.8, trans_proba[(trans_proba["origin"] == "B") & (trans_proba["destination"] == "C")][ "transition_freq"].item())
def test_markov(train, test, is_distributive, random_dummy_mode=None): if is_distributive: trans_proba = mk.distributive_transition_probabilities(train) else: trans_proba = mk.transition_probabilities(train) if not random_dummy_mode is None: trans_proba = mk.equalize_transition_prob(trans_proba) trans_proba_dict = mk.to_dict(trans_proba) return do_markov_test(trans_proba_dict=trans_proba_dict, test=test, is_distributive=is_distributive)
def test_transitions_origins_and_destinations(self): tags_1 = ["A", "B", "A", "C", "A", "D"] tags_2 = ["A", "X", "A", "C", "A", "D", "X"] tags_3 = ["A", "Y", "A", "Y", "C", "A", "D", "Y"] cluster_transitions = pd.DataFrame() for tags in [tags_1, tags_2, tags_3]: transitions = transition_probabilities(tags) cluster_transitions = cluster_transitions.append(transitions) cluster_transitions = cluster_transitions.groupby(["origin", "destination"])["transition_count"].sum().to_frame().reset_index() self.assertEquals(cluster_transitions[(cluster_transitions["origin"] == "A") & (cluster_transitions["destination"] == "D")]["transition_count"].item(),3) self.assertEquals(cluster_transitions[(cluster_transitions["origin"] == "C") & (cluster_transitions["destination"] == "A")]["transition_count"].item(),3) self.assertEquals(cluster_transitions[(cluster_transitions["origin"] == "X") & (cluster_transitions["destination"] == "A")]["transition_count"].item(),1) self.assertEquals(cluster_transitions[(cluster_transitions["origin"] == "Y") & (cluster_transitions["destination"] == "A")]["transition_count"].item(),1) self.assertEquals(len(cluster_transitions[(cluster_transitions["origin"] == "D") & (cluster_transitions["destination"] == "A")]),0) cluster_transitions = cluster_transition_probabilities([tags_1, tags_2, tags_3]) self.assertEquals(cluster_transitions[(cluster_transitions["origin"] == "A") & (cluster_transitions["destination"] == "D")]["transition_count"].item(),3) self.assertEquals(cluster_transitions[(cluster_transitions["origin"] == "C") & (cluster_transitions["destination"] == "A")]["transition_count"].item(),3) self.assertEquals(cluster_transitions[(cluster_transitions["origin"] == "X") & (cluster_transitions["destination"] == "A")]["transition_count"].item(),1) self.assertEquals(cluster_transitions[(cluster_transitions["origin"] == "Y") & (cluster_transitions["destination"] == "A")]["transition_count"].item(),1) self.assertEquals(len(cluster_transitions[(cluster_transitions["origin"] == "D") & (cluster_transitions["destination"] == "A")]),0) self.assertAlmostEqual(cluster_transitions[(cluster_transitions["origin"] == "A") & (cluster_transitions["destination"] == "B")]["transition_freq"].item(), 1/9)
def test_transitions_origins_and_destinations(self): dist_trans_proba = distributive_transition_probabilities(self.agg_6074_0_tags) trans_proba = transition_probabilities(self.agg_6074_0_tags) tags_origin = ["['WORK']", "['cafe', 'food']", "['bus_station', 'transit_station']", "['lodging']", "['transit_station']", "['clothing_store', 'store']", "['liquor_store', 'store']", "['school']", "['accounting', 'finance']", "['moving_company', 'storage']", "['meal_takeaway', 'restaurant', 'food']"] tags_destination = ["['WORK']", "['cafe', 'food']", "['bus_station', 'transit_station']", "['lodging']", "['transit_station']", "['clothing_store', 'store']", "['liquor_store', 'store']", "['school']", "['restaurant', 'food']", "['accounting', 'finance']", "['moving_company', 'storage']", "['meal_takeaway', 'restaurant', 'food']"] for tags in tags_origin[:-1]: self.assertIn(tags, trans_proba["origin"].tolist()) for tag in ast.literal_eval(tags): self.assertIn(tag, dist_trans_proba["origin"].tolist()) for tags in tags_destination[1:]: self.assertIn(tags, trans_proba["destination"].tolist()) for tag in ast.literal_eval(tags): self.assertIn(tag, dist_trans_proba["destination"].tolist())