def test_get_word_type_for_state(self): self.markov_chain = MarkovChain(self.states) self.assertEqual("s1", self.markov_chain.get_word_type_for_state("s1")) self.assertEqual("dict", self.markov_chain.get_word_type_for_state("s4")) self.assertRaises(ValueError, self.markov_chain.get_word_type_for_state, "s0")
def test_get_current_word_type(self): self.markov_chain = MarkovChain(self.states) self.markov_chain.current_state = "s1" self.assertEqual("s1", self.markov_chain.get_current_word_type()) self.markov_chain.current_state = "s4" self.assertEqual("dict", self.markov_chain.get_current_word_type()) self.markov_chain.current_state = "s0" self.assertIsNone(self.markov_chain.get_current_word_type())
def init_markov_chain(filename: str) -> MarkovChain: try: loaded = markov.load_markov_chain(filename) if loaded is None: return MarkovChain(set()) else: return loaded except ValueError: return MarkovChain(set())
def test_dict(self): self.markov_chain = MarkovChain(self.states) serial_dict = self.markov_chain.__dict__() self.assertIsInstance(serial_dict, dict) self.assertEqual(2, len(serial_dict)) self.assertIsInstance(serial_dict.get("chain"), dict) self.assertDictEqual({}, serial_dict.get("chain")) self.assertIsInstance(serial_dict.get("wt_refs"), dict) self.assertDictEqual(self.test_wt_refs, serial_dict.get("wt_refs"))
def setUp(self): self.states = {"s1", "s2", "s3", ("s4", "dict")} self.markov_chain = MarkovChain(self.states) self.transitions = set() self.transitions.add(("s0", "s1", 2)) self.transitions.add(("s0", "s2", 3)) self.transitions.add(("s1", "s3", 0.5)) self.transitions.add(("s1", "s4", 0.5)) self.transitions.add(("s2", "s4", 1)) self.transitions.add(("s3", "s0", 1)) self.transitions.add(("s4", "s0", 2))
def test_get_number_of_paths(self): self.markov_chain.set_transitions(self.transitions) no_of_paths = markov.get_number_of_paths(self.markov_chain) self.assertEqual(3, no_of_paths) self.states.add("s5") self.markov_chain = MarkovChain(self.states) self.transitions.add(("s2", "s5", 1)) self.transitions.add(("s4", "s5", 1)) self.transitions.add(("s5", "s0", 1)) self.markov_chain.set_transitions(self.transitions) no_of_paths = markov.get_number_of_paths(self.markov_chain) self.assertEqual(6, no_of_paths)
def test_init(self): self.markov_chain = MarkovChain(self.states) self.assertSetEqual(self.test_states, self.markov_chain.states) self.assertEqual("s0", self.markov_chain.current_state) self.assertDictEqual( { "s0": None, "s1": None, "s2": None, "s3": None, "s4": None }, self.markov_chain.markov_chain) self.assertDictEqual(self.test_wt_refs, self.markov_chain.wt_refs)
def setUp(self): self.mappings = set() self.mappings.add(("penguin", Bits(bin="000"))) self.mappings.add(("tiger", Bits(bin="001"))) self.mappings.add(("giraffe", Bits(bin="01"))) self.mappings.add(("dog", Bits(bin="10"))) self.mappings.add(("cat", Bits(bin="11"))) self.mapping_dict = MappingDictionary(self.mappings) self.input_dict = {"animals": self.mapping_dict} self.mappings = set() self.mappings.add(("pen", Bits(bin="1"))) self.mappings.add(("pencil", Bits(bin="00"))) self.mappings.add(("paper", Bits(bin="01"))) self.mapping_dict = MappingDictionary(self.mappings) self.input_dict.update({"stationery": self.mapping_dict}) self.mappings = set() self.mappings.add(("red", Bits(bin="111"))) self.mappings.add(("orange", Bits(bin="110"))) self.mappings.add(("blue", Bits(bin="10"))) self.mappings.add(("yellow", Bits(bin="01"))) self.mappings.add(("green", Bits(bin="00"))) self.mapping_dict = MappingDictionary(self.mappings) self.input_dict.update({"colours": self.mapping_dict}) self.wt_dict = WordTypeDictionary(self.input_dict) self.states = {"animals", "stationery", "colours"} self.markov_chain = MarkovChain(self.states) self.transitions = set() self.transitions.add(("s0", "animals", 4)) self.transitions.add(("s0", "stationery", 2)) self.transitions.add(("animals", "colours", 0.3)) self.transitions.add(("animals", "stationery", 0.7)) self.transitions.add(("stationery", "s0", 0.5)) self.transitions.add(("stationery", "colours", 0.5)) self.transitions.add(("colours", "s0", 1)) self.markov_chain.set_transitions(self.transitions)
class TestMarkovChain(unittest.TestCase): def setUp(self): self.states = {"s1", "s2", "s3", ("s4", "dict")} self.test_states = {"s0", "s1", "s2", "s3", "s4"} self.test_wt_refs = {"s1": "s1", "s2": "s2", "s3": "s3", "s4": "dict"} def test_init(self): self.markov_chain = MarkovChain(self.states) self.assertSetEqual(self.test_states, self.markov_chain.states) self.assertEqual("s0", self.markov_chain.current_state) self.assertDictEqual( { "s0": None, "s1": None, "s2": None, "s3": None, "s4": None }, self.markov_chain.markov_chain) self.assertDictEqual(self.test_wt_refs, self.markov_chain.wt_refs) def test_init_s0(self): self.markov_chain = MarkovChain(self.states.union({"s0"})) self.assertSetEqual(self.test_states, self.markov_chain.states) self.assertEqual("s0", self.markov_chain.current_state) self.assertDictEqual(self.test_wt_refs, self.markov_chain.wt_refs) def test_init_s0_tuple(self): self.markov_chain = MarkovChain(self.states.union({("s0", "any")})) self.assertSetEqual(self.test_states, self.markov_chain.states) self.assertEqual("s0", self.markov_chain.current_state) self.assertDictEqual(self.test_wt_refs, self.markov_chain.wt_refs) def test_dict(self): self.markov_chain = MarkovChain(self.states) serial_dict = self.markov_chain.__dict__() self.assertIsInstance(serial_dict, dict) self.assertEqual(2, len(serial_dict)) self.assertIsInstance(serial_dict.get("chain"), dict) self.assertDictEqual({}, serial_dict.get("chain")) self.assertIsInstance(serial_dict.get("wt_refs"), dict) self.assertDictEqual(self.test_wt_refs, serial_dict.get("wt_refs")) def test_get_word_type_for_state(self): self.markov_chain = MarkovChain(self.states) self.assertEqual("s1", self.markov_chain.get_word_type_for_state("s1")) self.assertEqual("dict", self.markov_chain.get_word_type_for_state("s4")) self.assertRaises(ValueError, self.markov_chain.get_word_type_for_state, "s0") def test_get_current_word_type(self): self.markov_chain = MarkovChain(self.states) self.markov_chain.current_state = "s1" self.assertEqual("s1", self.markov_chain.get_current_word_type()) self.markov_chain.current_state = "s4" self.assertEqual("dict", self.markov_chain.get_current_word_type()) self.markov_chain.current_state = "s0" self.assertIsNone(self.markov_chain.get_current_word_type())
def _encode_next_word(chain: MarkovChain, wt_dict: WordTypeDictionary, bits: Bits) -> Tuple[str, Bits, bool]: word_type = chain.get_current_word_type() mapping_dict = wt_dict.wt_dict.get(word_type) if mapping_dict is None: raise ValueError( "Unable to find mapping dictionary for word-type {}".format( word_type)) try: word = retrieve_word_from_mappings(bits, mapping_dict, True) except ValueError: raise ValueError( "Failed to retrieve a word for word-type {}".format(word_type)) return word, mapping_dict.mappings.get(word), mapping_dict.encode_spaces
def encode_bits_as_words(chain: MarkovChain, wt_dict: WordTypeDictionary, bits: Bits, pad_text=True) -> list: """ Given a bit stream, a Markov chain, and a word-type dictionary, retrieve a corresponding list of words. Every state in the Markov chain, except the start state s0, must have a corresponding word-type in the given dictionary with at least one word. If the word-type dictionary does not have path bits to match the end of the input exactly, it will append 0s until the function can complete. :param chain: a Markov chain with states :param wt_dict: a corresponding dictionary of word-types :param bits: the input bits :param pad_text: if true, generate cover text from random bits until the Markov chain reaches state s0 :return: an ordered list of words encoded by the system """ if bits is None or bits.__eq__(Bits()): raise ValueError("Bits cannot be None or empty.") words = [] prefix = bits while len(prefix) > 0: chain.transition() if chain.current_state.__eq__(START_STATE_LABEL): chain.transition() word, word_bits, encode_spaces = _encode_next_word( chain, wt_dict, prefix) words.append((word, encode_spaces)) bit_length = len(word_bits) prefix = prefix[bit_length:] if pad_text: # add filler bits until s0 reached while not chain.current_state.__eq__(START_STATE_LABEL): chain.transition() if chain.current_state.__eq__(START_STATE_LABEL): break longest_word = get_longest_word_in_dictionary(wt_dict) pseudo_random_bits = Bits(bin="".join( random.choice(["0", "1"]) for _ in range(len(longest_word)))) word, word_bits, encode_spaces = _encode_next_word( chain, wt_dict, pseudo_random_bits) words.append((word, encode_spaces)) return words
class TestEncodeBits(unittest.TestCase): def setUp(self): self.mappings = set() self.mappings.add(("penguin", Bits(bin="000"))) self.mappings.add(("tiger", Bits(bin="001"))) self.mappings.add(("giraffe", Bits(bin="01"))) self.mappings.add(("dog", Bits(bin="10"))) self.mappings.add(("cat", Bits(bin="11"))) self.mapping_dict = MappingDictionary(self.mappings) self.input_dict = {"animals": self.mapping_dict} self.mappings = set() self.mappings.add(("pen", Bits(bin="1"))) self.mappings.add(("pencil", Bits(bin="00"))) self.mappings.add(("paper", Bits(bin="01"))) self.mapping_dict = MappingDictionary(self.mappings) self.input_dict.update({"stationery": self.mapping_dict}) self.mappings = set() self.mappings.add(("red", Bits(bin="111"))) self.mappings.add(("orange", Bits(bin="110"))) self.mappings.add(("blue", Bits(bin="10"))) self.mappings.add(("yellow", Bits(bin="01"))) self.mappings.add(("green", Bits(bin="00"))) self.mapping_dict = MappingDictionary(self.mappings) self.input_dict.update({"colours": self.mapping_dict}) self.wt_dict = WordTypeDictionary(self.input_dict) self.states = {"animals", "stationery", ("x", "colours")} self.markov_chain = MarkovChain(self.states) self.transitions = set() self.transitions.add(("s0", "animals", 4)) self.transitions.add(("s0", "stationery", 2)) self.transitions.add(("animals", "x", 0.3)) self.transitions.add(("animals", "stationery", 0.7)) self.transitions.add(("stationery", "x", 0.5)) self.transitions.add(("x", "s0", 1)) self.markov_chain.set_transitions(self.transitions) def test_encode_bits_as_string(self): bits = Bits(bin="00101011100101010") words = extendedcoder.encode_bits_as_words(self.markov_chain, self.wt_dict, bits) longest_bit_string = 3 shortest_bit_string = 1 self.assertGreaterEqual(len(words), -(-len(bits) // longest_bit_string)) self.assertLessEqual(len(words), -(-len(bits) // shortest_bit_string) + 8) # accounting for padding def test_encode_bits_as_string_padded(self): bits = Bits(bin="01") words = extendedcoder.encode_bits_as_words(self.markov_chain, self.wt_dict, bits) self.assertGreaterEqual(len(words), 2) # s0 > stationery > x > s0 self.assertLessEqual(len(words), 3) # s0 > animals > stationery > x > s0 def test_encode_bits_as_string_no_pad(self): bits = Bits(bin="01") words = extendedcoder.encode_bits_as_words(self.markov_chain, self.wt_dict, bits, False) self.assertEqual(1, len(words)) # 1 word from stationery or animals
class TestEncodeMessage(unittest.TestCase): def setUp(self): self.mappings = set() self.mappings.add(("penguin", Bits(bin="000"))) self.mappings.add(("tiger", Bits(bin="001"))) self.mappings.add(("giraffe", Bits(bin="01"))) self.mappings.add(("dog", Bits(bin="10"))) self.mappings.add(("cat", Bits(bin="11"))) self.mapping_dict = MappingDictionary(self.mappings) self.input_dict = {"animals": self.mapping_dict} self.mappings = set() self.mappings.add(("pen", Bits(bin="1"))) self.mappings.add(("pencil", Bits(bin="00"))) self.mappings.add(("paper", Bits(bin="01"))) self.mapping_dict = MappingDictionary(self.mappings) self.input_dict.update({"stationery": self.mapping_dict}) self.mappings = set() self.mappings.add(("red", Bits(bin="111"))) self.mappings.add(("orange", Bits(bin="110"))) self.mappings.add(("blue", Bits(bin="10"))) self.mappings.add(("yellow", Bits(bin="01"))) self.mappings.add(("green", Bits(bin="00"))) self.mapping_dict = MappingDictionary(self.mappings) self.input_dict.update({"colours": self.mapping_dict}) self.wt_dict = WordTypeDictionary(self.input_dict) self.states = {"animals", "stationery", "colours"} self.markov_chain = MarkovChain(self.states) self.transitions = set() self.transitions.add(("s0", "animals", 4)) self.transitions.add(("s0", "stationery", 2)) self.transitions.add(("animals", "colours", 0.3)) self.transitions.add(("animals", "stationery", 0.7)) self.transitions.add(("stationery", "s0", 0.5)) self.transitions.add(("stationery", "colours", 0.5)) self.transitions.add(("colours", "s0", 1)) self.markov_chain.set_transitions(self.transitions) def test_get_fixed_length_header(self): self.assertEqual("1100101011110000", extendedcoder.get_fixed_length_header(618, 16).bin) self.assertEqual("11001000", extendedcoder.get_fixed_length_header(1, 8).bin) self.assertEqual("00110111", extendedcoder.get_fixed_length_header(256, 8).bin) def test_get_fixed_length_header_inverses(self): # Headers generated should have a property whereby each number has an inverse number, whose inverse is the # original number. numbers_to_test = {618, 1, 65536} for number in numbers_to_test: header = extendedcoder.get_fixed_length_header(number, 16).bin inverse_number = Bits(bin=header).uint + 1 inverse_header = extendedcoder.get_fixed_length_header( inverse_number, 16).bin self.assertEqual(number, Bits(bin=inverse_header).uint + 1) def test_get_fixed_length_header_exact(self): self.assertEqual("01001", extendedcoder.get_fixed_length_header(17, 4).bin) def test_get_fixed_length_header_long_message(self): self.assertRaises(ValueError, extendedcoder.get_fixed_length_header, 18, 4) def test_get_fixed_length_header_zero_length_message(self): self.assertRaises(ValueError, extendedcoder.get_fixed_length_header, 0, 4) def test_get_message_length_from_header(self): self.assertEqual( 618, extendedcoder.get_message_length_from_header( Bits(bin="1100101011110000"))) self.assertEqual( 1, extendedcoder.get_message_length_from_header(Bits(bin="11001000"))) self.assertEqual( 256, extendedcoder.get_message_length_from_header(Bits(bin="00110111"))) def test_encode_message(self): bits = Bits(bin="0100101110010110100101") header_length = 6 cover_text = extendedcoder.encode_message(self.markov_chain, self.wt_dict, bits, header_length) longest_bit_string = 3 longest_word = 7 shortest_bit_string = 1 shortest_word = 3 neg_total_message_length = -(len(bits) + header_length) minimum_chars = (-(neg_total_message_length // longest_bit_string)) * ( shortest_word + 1) - 1 maximum_chars = (-(neg_total_message_length // shortest_bit_string) ) * (longest_word + 1) - 1 self.assertGreaterEqual(len(cover_text), minimum_chars) self.assertLessEqual(len(cover_text), maximum_chars) def test_encode_message_zero_header(self): bits = Bits(bin="0100101110010110100101") header_length = 0 self.assertRaises(ValueError, extendedcoder.encode_message, self.markov_chain, self.wt_dict, bits, header_length) def test_encode_message_empty_bits(self): bits = Bits() header_length = 6 self.assertRaises(ValueError, extendedcoder.encode_message, self.markov_chain, self.wt_dict, bits, header_length)
raise ValueError("Filename for Markov chain was not provided.") else: chain_filename = prefix_filename(args.subfolder, chain_filename) if no_of_states is None: no_of_states = 2 elif no_of_states < 2: raise ValueError("Number of states provided must be at least 2 (" "including start state \"s0\").") elif no_of_states > 100: raise ValueError("Number of states provided cannot exceed 100.") new_states = set() for state_index in range(1, no_of_states): new_states.add( ("state_name" + str(state_index), "word_type" + str(state_index))) markov_chain = MarkovChain(new_states) from_state = "s0" to_state = "state_name1" transitions = {(from_state, to_state, 1)} for state_index in range(1, no_of_states - 1): from_state = "state_name" + str(state_index) to_state = "state_name" + str(state_index + 1) transitions.add((from_state, to_state, 1)) transitions.add((to_state, "s0", 1)) markov_chain.set_transitions(transitions) markov.save_markov_chain(markov_chain, chain_filename) print("Saved to {}.".format(chain_filename)) elif operation.__eq__("resetChain"):
class TestMarkovChainTransitions(unittest.TestCase): def setUp(self): self.states = {"s1", "s2", "s3", ("s4", "dict")} self.markov_chain = MarkovChain(self.states) self.transitions = set() self.transitions.add(("s0", "s1", 2)) self.transitions.add(("s0", "s2", 3)) self.transitions.add(("s1", "s3", 0.5)) self.transitions.add(("s1", "s4", 0.5)) self.transitions.add(("s2", "s4", 1)) self.transitions.add(("s3", "s0", 1)) self.transitions.add(("s4", "s0", 2)) def test_set_transitions(self): self.markov_chain.set_transitions(self.transitions) self.assertEqual(5, len(self.markov_chain.markov_chain)) self.assertIsInstance(self.markov_chain.markov_chain.get("s0"), StateTransitions) self.assertDictEqual( { "s1": 2, "s2": 3 }, self.markov_chain.markov_chain.get("s0").transitions) def test_set_transitions_bad_outbound(self): self.transitions.add(("s4", "s5", 1)) self.assertRaises(MarkovError, self.markov_chain.set_transitions, self.transitions) def test_set_transitions_bad_inbound(self): self.transitions.add(("s5", "s1", 1)) self.assertRaises(MarkovError, self.markov_chain.set_transitions, self.transitions) def test_set_transitions_neg_probability(self): self.transitions.add(("s3", "s4", -1)) self.assertRaises(MarkovError, self.markov_chain.set_transitions, self.transitions) def test_set_transitions_zero_probability(self): self.transitions.add(("s3", "s4", 0)) self.assertRaises(MarkovError, self.markov_chain.set_transitions, self.transitions) def test_set_transitions_s0_no_outbound(self): self.transitions = { x for x in self.transitions if not x[0].__eq__("s0") } self.assertRaises(MarkovError, self.markov_chain.set_transitions, self.transitions) def test_set_transitions_s0_no_inbound(self): self.transitions = { x for x in self.transitions if not x[1].__eq__("s0") } self.assertRaises(MarkovError, self.markov_chain.set_transitions, self.transitions) def test_dict(self): self.markov_chain.set_transitions(self.transitions) serial_dict = self.markov_chain.__dict__() self.assertEqual(2, len(serial_dict)) chain_dict = serial_dict.get("chain") self.assertIsInstance(chain_dict, dict) self.assertEqual(5, len(chain_dict)) self.assertIsInstance(chain_dict.get("s0"), dict) self.assertDictEqual({"s1": 2, "s2": 3}, chain_dict.get("s0")) def test_get_outbound_transitions(self): self.markov_chain.set_transitions(self.transitions) self.markov_chain.current_state = "s0" outbound_transitions = self.markov_chain.get_outbound_transitions() self.assertIsInstance(outbound_transitions, StateTransitions) self.assertEqual(2, len(outbound_transitions.transitions)) self.assertEqual(2, outbound_transitions.transitions.get("s1")) self.assertEqual(3, outbound_transitions.transitions.get("s2")) self.markov_chain.current_state = "s4" outbound_transitions = self.markov_chain.get_outbound_transitions() self.assertIsInstance(outbound_transitions, StateTransitions) self.assertEqual(1, len(outbound_transitions.transitions)) self.assertEqual(2, outbound_transitions.transitions.get("s0")) def test_accumulate_transitions(self): self.markov_chain.set_transitions(self.transitions) self.markov_chain.current_state = "s0" accum = self.markov_chain.accumulate_transitions() self.assertIsInstance(accum, list) self.assertEqual(2, len(accum)) self.assertTrue(any(x[1] == 5) for x in accum) def test_transition(self): self.markov_chain.set_transitions(self.transitions) self.markov_chain.current_state = "s0" self.markov_chain.transition() self.assertIn(self.markov_chain.current_state, {"s1", "s2"}) self.markov_chain.transition() self.assertIn(self.markov_chain.current_state, {"s3", "s4"}) self.markov_chain.transition() self.assertEqual("s0", self.markov_chain.current_state) def test_find_2_cycle(self): self.transitions.add(("s4", "s2", 1)) self.assertRaises(MarkovError, self.markov_chain.set_transitions, self.transitions) def test_find_3_cycle(self): self.transitions = { x for x in self.transitions if not x[1].__eq__("s3") } self.transitions.add(("s4", "s3", 1)) self.transitions.add(("s3", "s1", 1)) self.assertRaises(MarkovError, self.markov_chain.set_transitions, self.transitions) def test_ignore_non_cycle(self): self.transitions.add(("s1", "s2", 1)) self.markov_chain.set_transitions(self.transitions) def test_get_number_of_paths(self): self.markov_chain.set_transitions(self.transitions) no_of_paths = markov.get_number_of_paths(self.markov_chain) self.assertEqual(3, no_of_paths) self.states.add("s5") self.markov_chain = MarkovChain(self.states) self.transitions.add(("s2", "s5", 1)) self.transitions.add(("s4", "s5", 1)) self.transitions.add(("s5", "s0", 1)) self.markov_chain.set_transitions(self.transitions) no_of_paths = markov.get_number_of_paths(self.markov_chain) self.assertEqual(6, no_of_paths) @unittest.skip def test_save(self): self.markov_chain.set_transitions(self.transitions) markov.save_markov_chain(self.markov_chain)
def test_init_s0_tuple(self): self.markov_chain = MarkovChain(self.states.union({("s0", "any")})) self.assertSetEqual(self.test_states, self.markov_chain.states) self.assertEqual("s0", self.markov_chain.current_state) self.assertDictEqual(self.test_wt_refs, self.markov_chain.wt_refs)