def setUp(self) -> None: self.sessions1 = [ ["Set-User", "Set-User"], ["Set-Mailbox", "Set-User", "Set-User"], ] self.sessions2 = [ [ Cmd("Set-User", {"Identity"}), Cmd("Set-User", {"Identity", "City", "Name"}), ], [ Cmd("Set-Mailbox", {"Identity"}), Cmd("Set-User", {"Identity", "City"}), Cmd("Set-User", {"Identity"}), ], ] self.sessions3 = [ [ Cmd("Set-User", {"Identity": "blah"}), Cmd("Set-User", { "Identity": "haha", "City": "york", "Name": "bob" }), ], [ Cmd("Set-Mailbox", {"Identity": "blah"}), Cmd("Set-User", { "Identity": "blah", "City": "london" }), Cmd("Set-User", {"Identity": "haha"}), ], ]
def test_rarest_window_session(self): actual = cmds_params_only.rarest_window_session( session=[], prior_probs=self.data3["prior_probs"], trans_probs=self.data3["trans_probs"], param_cond_cmd_probs=self.data3["param_cond_cmd_probs"], window_len=1, use_start_end_tokens=False, start_token=START_TOKEN, end_token=END_TOKEN, use_geo_mean=False, ) self.assertListEqual(actual[0], []) self.assertTrue(np.isnan(actual[1])) actual = cmds_params_only.rarest_window_session( session=[], prior_probs=self.data3["prior_probs"], trans_probs=self.data3["trans_probs"], param_cond_cmd_probs=self.data3["param_cond_cmd_probs"], window_len=1, use_start_end_tokens=True, start_token=START_TOKEN, end_token=END_TOKEN, use_geo_mean=False, ) self.assertListEqual(actual[0], []) self.assertEqual(actual[1], 0.25) actual = cmds_params_only.rarest_window_session( session=[Cmd("Set-User", {"City"}), Cmd("drfjh", {})], prior_probs=self.data3["prior_probs"], trans_probs=self.data3["trans_probs"], param_cond_cmd_probs=self.data3["param_cond_cmd_probs"], window_len=3, use_start_end_tokens=False, start_token=START_TOKEN, end_token=END_TOKEN, use_geo_mean=False, ) self.assertListEqual(actual[0], []) self.assertTrue(np.isnan(actual[1])) actual = cmds_params_only.rarest_window_session( session=[Cmd("Set-User", {"City"}), Cmd("drfjh", {})], prior_probs=self.data3["prior_probs"], trans_probs=self.data3["trans_probs"], param_cond_cmd_probs=self.data3["param_cond_cmd_probs"], window_len=3, use_start_end_tokens=True, start_token=START_TOKEN, end_token=END_TOKEN, use_geo_mean=False, ) self.assertEqual(len(actual[0]), 2) self.assertTrue(~np.isnan(actual[1]))
def test_compute_likelihood_window(self): actual = cmds_params_only.compute_likelihood_window( window=[], prior_probs=self.data3["prior_probs"], trans_probs=self.data3["trans_probs"], param_cond_cmd_probs=self.data3["param_cond_cmd_probs"], use_start_token=False, use_end_token=False, start_token=START_TOKEN, end_token=END_TOKEN, ) self.assertTrue(np.isnan(actual)) actual = cmds_params_only.compute_likelihood_window( window=[Cmd("Set-User", {"Identity"})], prior_probs=self.data3["prior_probs"], trans_probs=self.data3["trans_probs"], param_cond_cmd_probs=self.data3["param_cond_cmd_probs"], use_start_token=False, use_end_token=False, start_token=START_TOKEN, end_token=END_TOKEN, ) self.assertEqual(actual, 0.22787717886202657) actual = cmds_params_only.compute_likelihood_window( window=[Cmd("Set-User", {"Identity"})], prior_probs=self.data3["prior_probs"], trans_probs=self.data3["trans_probs"], param_cond_cmd_probs=self.data3["param_cond_cmd_probs"], use_start_token=True, use_end_token=False, start_token=START_TOKEN, end_token=END_TOKEN, ) self.assertEqual(actual, 0.31333112093528653) actual = cmds_params_only.compute_likelihood_window( window=[Cmd("Set-User", {"Identity"})], prior_probs=self.data3["prior_probs"], trans_probs=self.data3["trans_probs"], param_cond_cmd_probs=self.data3["param_cond_cmd_probs"], use_start_token=False, use_end_token=True, start_token=START_TOKEN, end_token=END_TOKEN, ) self.assertEqual(actual, 0.09115087154481064)
def setUp(self) -> None: self.sessions1 = [ ["Set-User", "Set-User"], ["Set-Mailbox", "Set-User", "Set-User"], ] self.sessions2 = [ [ Cmd("Set-User", {"Identity"}), Cmd("Set-User", {"Identity", "City", "Name"}), ], [ Cmd("Set-Mailbox", {"Identity"}), Cmd("Set-User", {"Identity", "City"}), Cmd("Set-User", {"Identity"}), ], ] self.sessions3 = [ [ Cmd("Set-User", {"Identity": "blah"}), Cmd("Set-User", { "Identity": "haha", "City": "york", "Name": "bob" }), ], [ Cmd("Set-Mailbox", {"Identity": "blah"}), Cmd("Set-User", { "Identity": "blah", "City": "london" }), Cmd("Set-User", {"Identity": "haha"}), ], ] self.times = [datetime(2019, 3, 1), datetime(2019, 5, 6)] self.data1 = pd.DataFrame({ "session": self.sessions1, "time": self.times }) self.data2 = pd.DataFrame({ "session": self.sessions2, "time": self.times }) self.data3 = pd.DataFrame({ "session": self.sessions3, "time": self.times })
def test_compute_likelihood_windows_in_session(self): actual = cmds_params_values.compute_likelihood_windows_in_session( session=[], prior_probs=self.data3["prior_probs"], trans_probs=self.data3["trans_probs"], param_cond_cmd_probs=self.data3["param_cond_cmd_probs"], value_cond_param_probs=self.data3["value_cond_param_probs"], modellable_params=set(), window_len=1, use_start_end_tokens=False, start_token=START_TOKEN, end_token=END_TOKEN, use_geo_mean=False, ) self.assertListEqual(actual, []) actual = cmds_params_values.compute_likelihood_windows_in_session( session=[], prior_probs=self.data3["prior_probs"], trans_probs=self.data3["trans_probs"], param_cond_cmd_probs=self.data3["param_cond_cmd_probs"], value_cond_param_probs=self.data3["value_cond_param_probs"], modellable_params=set(), window_len=1, use_start_end_tokens=True, start_token=START_TOKEN, end_token=END_TOKEN, use_geo_mean=False, ) self.assertListEqual(actual, [0.25]) actual = cmds_params_values.compute_likelihood_windows_in_session( session=[Cmd("Set-User", {"Identity": "blah"})], prior_probs=self.data3["prior_probs"], trans_probs=self.data3["trans_probs"], param_cond_cmd_probs=self.data3["param_cond_cmd_probs"], value_cond_param_probs=self.data3["value_cond_param_probs"], modellable_params=set(), window_len=1, use_start_end_tokens=False, start_token=START_TOKEN, end_token=END_TOKEN, use_geo_mean=False, ) self.assertListEqual(actual, [0.22787717886202657])
def setUp(self): self.data1 = dict() self.data2 = dict() self.data3 = dict() # populate data1 self.data1["sessions"] = [] self.data1["seq1_counts"] = defaultdict(lambda: 0) self.data1["seq1_counts_ls"] = {UNK_TOKEN: 2} self.data1["seq2_counts"] = defaultdict(lambda: defaultdict(lambda: 0)) self.data1["seq2_counts_ls"] = {UNK_TOKEN: {UNK_TOKEN: 1}} self.data1["param_counts"] = defaultdict(lambda: 0) self.data1["param_counts_ls"] = {UNK_TOKEN: 1} self.data1["cmd_param_counts"] = defaultdict( lambda: defaultdict(lambda: 0)) self.data1["cmd_param_counts_ls"] = {UNK_TOKEN: {UNK_TOKEN: 1}} self.data1["value_counts"] = defaultdict(lambda: 0) self.data1["value_counts_ls"] = {UNK_TOKEN: 1} self.data1["param_value_counts"] = defaultdict( lambda: defaultdict(lambda: 0)) self.data1["param_value_counts_ls"] = {UNK_TOKEN: {UNK_TOKEN: 1}} self.data1["cmds"] = [UNK_TOKEN] self.data1["params"] = [UNK_TOKEN] # populate data2 self.data2["sessions"] = [[]] self.data2["seq1_counts"] = defaultdict(lambda: 0, { START_TOKEN: 1, END_TOKEN: 1 }) self.data2["seq1_counts_ls"] = { UNK_TOKEN: 4, START_TOKEN: 3, END_TOKEN: 3 } self.data2["seq2_counts"] = defaultdict(lambda: defaultdict(lambda: 0)) self.data2["seq2_counts"][START_TOKEN][END_TOKEN] = 1 self.data2["seq2_counts_ls"] = { START_TOKEN: { END_TOKEN: 2, UNK_TOKEN: 1 }, UNK_TOKEN: { END_TOKEN: 1, UNK_TOKEN: 1 }, } self.data2["param_counts"] = defaultdict(lambda: 0) self.data2["param_counts_ls"] = {UNK_TOKEN: 3} self.data2["cmd_param_counts"] = defaultdict( lambda: defaultdict(lambda: 0)) self.data2["cmd_param_counts_ls"] = { START_TOKEN: { UNK_TOKEN: 1 }, END_TOKEN: { UNK_TOKEN: 1 }, UNK_TOKEN: { UNK_TOKEN: 1 }, } self.data2["value_counts"] = defaultdict(lambda: 0) self.data2["value_counts_ls"] = {UNK_TOKEN: 1} self.data2["param_value_counts"] = defaultdict( lambda: defaultdict(lambda: 0)) self.data2["param_value_counts_ls"] = {UNK_TOKEN: {UNK_TOKEN: 1}} self.data2["cmds"] = [START_TOKEN, END_TOKEN, UNK_TOKEN] self.data2["params"] = [UNK_TOKEN] # populate data3 cmd = "Set-User" self.data3["sessions"] = [[ Cmd(name="Set-User", params={ "City": "york", "Identity": "blah" }), Cmd(name="Set-User", params={"Identity": "blah"}), ]] self.data3["seq1_counts"] = defaultdict(lambda: 0, { START_TOKEN: 1, cmd: 2, END_TOKEN: 1 }) self.data3["seq1_counts_ls"] = { UNK_TOKEN: 6, START_TOKEN: 4, END_TOKEN: 4, cmd: 8, } self.data3["seq2_counts"] = defaultdict(lambda: defaultdict(lambda: 0)) self.data3["seq2_counts"][START_TOKEN][cmd] = 1 self.data3["seq2_counts"][cmd][cmd] = 1 self.data3["seq2_counts"][cmd][END_TOKEN] = 1 self.data3["seq2_counts_ls"] = { START_TOKEN: { END_TOKEN: 1, UNK_TOKEN: 1, cmd: 2 }, UNK_TOKEN: { END_TOKEN: 1, UNK_TOKEN: 1, cmd: 1 }, cmd: { cmd: 2, END_TOKEN: 2, UNK_TOKEN: 1 }, } self.data3["param_counts"] = defaultdict(lambda: 0, { "City": 1, "Identity": 2 }) self.data3["param_counts_ls"] = { UNK_TOKEN: 4, "City": 2, "Identity": 3 } self.data3["cmd_param_counts"] = defaultdict( lambda: defaultdict(lambda: 0)) self.data3["cmd_param_counts"][cmd]["City"] = 1 self.data3["cmd_param_counts"][cmd]["Identity"] = 2 self.data3["cmd_param_counts_ls"] = { START_TOKEN: { UNK_TOKEN: 1 }, END_TOKEN: { UNK_TOKEN: 1 }, UNK_TOKEN: { UNK_TOKEN: 1 }, cmd: { "City": 2, "Identity": 3, UNK_TOKEN: 1 }, } self.data3["value_counts"] = defaultdict(lambda: 0, { "york": 1, "blah": 2 }) self.data3["value_counts_ls"] = {"york": 2, "blah": 3, UNK_TOKEN: 3} self.data3["param_value_counts"] = defaultdict( lambda: defaultdict(lambda: 0)) self.data3["param_value_counts"]["City"]["york"] = 1 self.data3["param_value_counts"]["Identity"]["blah"] = 2 self.data3["param_value_counts_ls"] = { "City": { "york": 2, UNK_TOKEN: 1 }, "Identity": { "blah": 3, UNK_TOKEN: 1 }, UNK_TOKEN: { UNK_TOKEN: 1 }, } self.data3["cmds"] = [START_TOKEN, END_TOKEN, UNK_TOKEN, cmd] self.data3["params"] = [UNK_TOKEN, "City", "Identity"]
def setUp(self): self.data1 = dict() self.data2 = dict() self.data3 = dict() # populate data1 self.data1["sessions"] = [] self.data1["seq1_counts"] = StateMatrix({UNK_TOKEN: 2}, UNK_TOKEN) self.data1["seq2_counts"] = StateMatrix({UNK_TOKEN: {UNK_TOKEN: 1}}, UNK_TOKEN) self.data1["param_counts"] = StateMatrix({UNK_TOKEN: 1}, UNK_TOKEN) self.data1["cmd_param_counts"] = StateMatrix( {UNK_TOKEN: {UNK_TOKEN: 1}}, UNK_TOKEN ) self.data1["value_counts"] = StateMatrix({UNK_TOKEN: 1}, UNK_TOKEN) self.data1["param_value_counts"] = StateMatrix( {UNK_TOKEN: {UNK_TOKEN: 1}}, UNK_TOKEN ) self.data1["prior_probs"] = StateMatrix({UNK_TOKEN: 1}, UNK_TOKEN) self.data1["trans_probs"] = StateMatrix({UNK_TOKEN: {UNK_TOKEN: 1}}, UNK_TOKEN) self.data1["param_probs"] = StateMatrix({UNK_TOKEN: 0.5}, UNK_TOKEN) self.data1["param_cond_cmd_probs"] = StateMatrix( {UNK_TOKEN: {UNK_TOKEN: 0.5}}, UNK_TOKEN ) self.data1["value_probs"] = StateMatrix({UNK_TOKEN: 1}, UNK_TOKEN) self.data1["value_cond_param_probs"] = StateMatrix( {UNK_TOKEN: {UNK_TOKEN: 1}}, UNK_TOKEN ) # populate data2 self.data2["sessions"] = [[]] self.data2["seq1_counts"] = StateMatrix( {UNK_TOKEN: 4, START_TOKEN: 3, END_TOKEN: 3}, UNK_TOKEN ) self.data2["seq2_counts"] = StateMatrix( { START_TOKEN: {END_TOKEN: 2, UNK_TOKEN: 1}, UNK_TOKEN: {END_TOKEN: 1, UNK_TOKEN: 1}, }, UNK_TOKEN, ) self.data2["param_counts"] = StateMatrix({UNK_TOKEN: 3}, UNK_TOKEN) self.data2["cmd_param_counts"] = StateMatrix( { START_TOKEN: {UNK_TOKEN: 1}, END_TOKEN: {UNK_TOKEN: 1}, UNK_TOKEN: {UNK_TOKEN: 1}, }, UNK_TOKEN, ) self.data2["value_counts"] = StateMatrix({UNK_TOKEN: 1}, UNK_TOKEN) self.data2["param_value_counts"] = StateMatrix( {UNK_TOKEN: {UNK_TOKEN: 1}}, UNK_TOKEN ) self.data2["prior_probs"] = StateMatrix( {START_TOKEN: 0.3, END_TOKEN: 0.3, UNK_TOKEN: 0.4}, UNK_TOKEN ) self.data2["trans_probs"] = StateMatrix( { START_TOKEN: { END_TOKEN: 0.6666666666666666, UNK_TOKEN: 0.3333333333333333, }, UNK_TOKEN: {END_TOKEN: 0.5, UNK_TOKEN: 0.5}, }, UNK_TOKEN, ) self.data2["param_probs"] = StateMatrix({UNK_TOKEN: 0.3}, UNK_TOKEN) self.data2["param_cond_cmd_probs"] = StateMatrix( { START_TOKEN: {UNK_TOKEN: 0.3333333333333333}, END_TOKEN: {UNK_TOKEN: 0.3333333333333333}, UNK_TOKEN: {UNK_TOKEN: 0.25}, }, UNK_TOKEN, ) self.data2["value_probs"] = StateMatrix({UNK_TOKEN: 1}, UNK_TOKEN) self.data2["value_cond_param_probs"] = StateMatrix( {UNK_TOKEN: {UNK_TOKEN: 1}}, UNK_TOKEN ) # populate data3 cmd = "Set-User" self.data3["sessions"] = [ [ Cmd(name="Set-User", params={"City": "york", "Identity": "blah"}), Cmd(name="Set-User", params={"Identity": "blah"}), ] ] self.data3["seq1_counts"] = StateMatrix( {UNK_TOKEN: 6, START_TOKEN: 4, END_TOKEN: 4, cmd: 8}, UNK_TOKEN ) self.data3["seq2_counts"] = StateMatrix( { START_TOKEN: {END_TOKEN: 1, UNK_TOKEN: 1, cmd: 2}, UNK_TOKEN: {END_TOKEN: 1, UNK_TOKEN: 1, cmd: 1}, cmd: {cmd: 2, END_TOKEN: 2, UNK_TOKEN: 1}, }, UNK_TOKEN, ) self.data3["param_counts"] = StateMatrix( {UNK_TOKEN: 4, "City": 2, "Identity": 3}, UNK_TOKEN ) self.data3["cmd_param_counts"] = StateMatrix( { START_TOKEN: {UNK_TOKEN: 1}, END_TOKEN: {UNK_TOKEN: 1}, UNK_TOKEN: {UNK_TOKEN: 1}, cmd: {"City": 2, "Identity": 3, UNK_TOKEN: 1}, }, UNK_TOKEN, ) self.data3["value_counts"] = StateMatrix( {"york": 2, "blah": 3, UNK_TOKEN: 3}, UNK_TOKEN ) self.data3["param_value_counts"] = StateMatrix( { "City": {"york": 2, UNK_TOKEN: 1}, "Identity": {"blah": 3, UNK_TOKEN: 1}, UNK_TOKEN: {UNK_TOKEN: 1}, }, UNK_TOKEN, ) self.data3["prior_probs"] = StateMatrix( { START_TOKEN: 0.18181818181818182, END_TOKEN: 0.18181818181818182, UNK_TOKEN: 0.2727272727272727, cmd: 0.36363636363636365, }, UNK_TOKEN, ) self.data3["trans_probs"] = StateMatrix( { START_TOKEN: {END_TOKEN: 0.25, UNK_TOKEN: 0.25, cmd: 0.5}, UNK_TOKEN: {END_TOKEN: 1 / 3, UNK_TOKEN: 1 / 3, cmd: 1 / 3}, cmd: {END_TOKEN: 0.4, UNK_TOKEN: 0.2, cmd: 0.4}, }, UNK_TOKEN, ) self.data3["param_probs"] = StateMatrix( { UNK_TOKEN: 0.18181818181818182, "Identity": 0.13636363636363635, "City": 0.09090909090909091, }, UNK_TOKEN, ) self.data3["param_cond_cmd_probs"] = StateMatrix( { START_TOKEN: {UNK_TOKEN: 0.25}, END_TOKEN: {UNK_TOKEN: 0.25}, UNK_TOKEN: {UNK_TOKEN: 0.16666666666666666}, cmd: {"City": 0.25, "Identity": 0.375, UNK_TOKEN: 0.125}, }, UNK_TOKEN, ) self.data3["value_probs"] = StateMatrix( {"york": 0.25, "blah": 0.375, UNK_TOKEN: 0.375}, UNK_TOKEN ) self.data3["value_cond_param_probs"] = StateMatrix( { "City": {"york": 0.6666666666666666, UNK_TOKEN: 0.3333333333333333}, "Identity": {"blah": 0.75, UNK_TOKEN: 0.25}, UNK_TOKEN: {UNK_TOKEN: 1.0}, }, UNK_TOKEN, )
def setUp(self): self.data1 = dict() self.data2 = dict() self.data3 = dict() # populate data1 self.data1["sessions"] = [] self.data1["_seq1_counts"] = defaultdict(lambda: 0) self.data1["seq1_counts"] = StateMatrix({UNK_TOKEN: 2}, UNK_TOKEN) self.data1["_seq2_counts"] = defaultdict( lambda: defaultdict(lambda: 0)) self.data1["seq2_counts"] = StateMatrix({UNK_TOKEN: { UNK_TOKEN: 1 }}, UNK_TOKEN) self.data1["_param_counts"] = defaultdict(lambda: 0) self.data1["param_counts"] = StateMatrix({UNK_TOKEN: 1}, UNK_TOKEN) self.data1["_cmd_param_counts"] = defaultdict( lambda: defaultdict(lambda: 0)) self.data1["cmd_param_counts"] = StateMatrix( {UNK_TOKEN: { UNK_TOKEN: 1 }}, UNK_TOKEN) self.data1["prior_probs"] = StateMatrix({UNK_TOKEN: 1}, UNK_TOKEN) self.data1["trans_probs"] = StateMatrix({UNK_TOKEN: { UNK_TOKEN: 1 }}, UNK_TOKEN) self.data1["param_probs"] = StateMatrix({UNK_TOKEN: 0.5}, UNK_TOKEN) self.data1["param_cond_cmd_probs"] = StateMatrix( {UNK_TOKEN: { UNK_TOKEN: 0.5 }}, UNK_TOKEN) # populate data2 self.data2["sessions"] = [[]] self.data2["_seq1_counts"] = defaultdict(lambda: 0, { START_TOKEN: 1, END_TOKEN: 1 }) self.data2["seq1_counts"] = StateMatrix( { UNK_TOKEN: 4, START_TOKEN: 3, END_TOKEN: 3 }, UNK_TOKEN) self.data2["_seq2_counts"] = defaultdict( lambda: defaultdict(lambda: 0)) self.data2["_seq2_counts"][START_TOKEN][END_TOKEN] = 1 self.data2["seq2_counts"] = StateMatrix( { START_TOKEN: { END_TOKEN: 2, UNK_TOKEN: 1 }, UNK_TOKEN: { END_TOKEN: 1, UNK_TOKEN: 1 }, }, UNK_TOKEN, ) self.data2["_param_counts"] = defaultdict(lambda: 0) self.data2["param_counts"] = StateMatrix({UNK_TOKEN: 3}, UNK_TOKEN) self.data2["_cmd_param_counts"] = defaultdict( lambda: defaultdict(lambda: 0)) self.data2["cmd_param_counts"] = StateMatrix( { START_TOKEN: { UNK_TOKEN: 1 }, END_TOKEN: { UNK_TOKEN: 1 }, UNK_TOKEN: { UNK_TOKEN: 1 }, }, UNK_TOKEN, ) self.data2["prior_probs"] = StateMatrix( { START_TOKEN: 0.3, END_TOKEN: 0.3, UNK_TOKEN: 0.4 }, UNK_TOKEN) self.data2["trans_probs"] = StateMatrix( { START_TOKEN: { END_TOKEN: 0.6666666666666666, UNK_TOKEN: 0.3333333333333333, }, UNK_TOKEN: { END_TOKEN: 0.5, UNK_TOKEN: 0.5 }, }, UNK_TOKEN, ) self.data2["param_probs"] = StateMatrix({UNK_TOKEN: 0.3}, UNK_TOKEN) self.data2["param_cond_cmd_probs"] = StateMatrix( { START_TOKEN: { UNK_TOKEN: 0.3333333333333333 }, END_TOKEN: { UNK_TOKEN: 0.3333333333333333 }, UNK_TOKEN: { UNK_TOKEN: 0.25 }, }, UNK_TOKEN, ) # populate data3 cmd = "Set-User" self.data3["sessions"] = [[ Cmd(name="Set-User", params={"City", "Identity"}), Cmd(name="Set-User", params={"Identity"}), ]] self.data3["_seq1_counts"] = defaultdict(lambda: 0, { START_TOKEN: 1, END_TOKEN: 1, cmd: 2 }) self.data3["seq1_counts"] = StateMatrix( { UNK_TOKEN: 6, START_TOKEN: 4, END_TOKEN: 4, cmd: 8 }, UNK_TOKEN) self.data3["_seq2_counts"] = defaultdict( lambda: defaultdict(lambda: 0)) self.data3["_seq2_counts"][START_TOKEN][cmd] = 1 self.data3["_seq2_counts"][cmd][END_TOKEN] = 1 self.data3["_seq2_counts"][cmd][cmd] = 1 self.data3["seq2_counts"] = StateMatrix( { START_TOKEN: { END_TOKEN: 1, UNK_TOKEN: 1, cmd: 2 }, UNK_TOKEN: { END_TOKEN: 1, UNK_TOKEN: 1, cmd: 1 }, cmd: { cmd: 2, END_TOKEN: 2, UNK_TOKEN: 1 }, }, UNK_TOKEN, ) self.data3["_param_counts"] = defaultdict(lambda: 0, { "Identity": 2, "City": 1 }) self.data3["param_counts"] = StateMatrix( { UNK_TOKEN: 4, "City": 2, "Identity": 3 }, UNK_TOKEN) self.data3["_cmd_param_counts"] = defaultdict( lambda: defaultdict(lambda: 0)) self.data3["_cmd_param_counts"][cmd]["Identity"] = 2 self.data3["_cmd_param_counts"][cmd]["City"] = 1 self.data3["cmd_param_counts"] = StateMatrix( { START_TOKEN: { UNK_TOKEN: 1 }, END_TOKEN: { UNK_TOKEN: 1 }, UNK_TOKEN: { UNK_TOKEN: 1 }, cmd: { "City": 2, "Identity": 3, UNK_TOKEN: 1 }, }, UNK_TOKEN, ) self.data3["prior_probs"] = StateMatrix( { START_TOKEN: 0.18181818181818182, END_TOKEN: 0.18181818181818182, UNK_TOKEN: 0.2727272727272727, cmd: 0.36363636363636365, }, UNK_TOKEN, ) self.data3["trans_probs"] = StateMatrix( { START_TOKEN: { END_TOKEN: 0.25, UNK_TOKEN: 0.25, cmd: 0.5 }, UNK_TOKEN: { END_TOKEN: 1 / 3, UNK_TOKEN: 1 / 3, cmd: 1 / 3 }, cmd: { END_TOKEN: 0.4, UNK_TOKEN: 0.2, cmd: 0.4 }, }, UNK_TOKEN, ) self.data3["param_probs"] = StateMatrix( { UNK_TOKEN: 0.18181818181818182, "Identity": 0.13636363636363635, "City": 0.09090909090909091, }, UNK_TOKEN, ) self.data3["param_cond_cmd_probs"] = StateMatrix( { START_TOKEN: { UNK_TOKEN: 0.25 }, END_TOKEN: { UNK_TOKEN: 0.25 }, UNK_TOKEN: { UNK_TOKEN: 0.16666666666666666 }, cmd: { "City": 0.25, "Identity": 0.375, UNK_TOKEN: 0.125 }, }, UNK_TOKEN, )