def test_get_sequence_nodes(): hds = Hydraseq('_') hds.insert('a b c d e f g h i j') nxt = hds.look_ahead('a b c d e f g h i').next_nodes.pop() assert nxt.get_sequence() == 'a b c d e f g h i j' assert str(nxt.get_sequence_nodes() ) == '[[a], [b], [c], [d], [e], [f], [g], [h], [i]]'
def test_get_sequence_multiple(): hds2 = Hydraseq('_') hds2.insert([['a'], ['B'], ['c']]) hds2.insert([['a'], ['b'], ['c']]) nxts = hds2.look_ahead([['a'], ['b', 'B']]).next_nodes assert sorted([str(nxt.get_sequence_nodes()) for nxt in nxts]) == ['[[a], [B]]', '[[a], [b]]']
def test_streaming(): hdr = Hydraseq('streaming') hdr.insert("the quick brown fox") assert hdr.look_ahead("the quick").get_next_values() == ["brown"] hdr.reset() assert hdr.look_ahead("the").get_next_values() == ["quick"] assert hdr.hit("quick", None).get_next_values() == ["brown"]
def test_autocomplete(): ac = Hydraseq('auto') for name in ['efrain', 'efrom', 'efren', 'ephrem', 'efrainium']: ac.insert(expand(name + '$')) def autocomp(st): def _compact(seq): return "".join([c for c in seq[:-1]]).replace(' ', '') ac.look_ahead(expand(st)) hits = ac.forward_prediction() hits = [_compact(hit.get_sequence()) for hit in hits] return hits hits = autocomp('efra') assert sorted(hits) == sorted(['efrainium', 'efrain'])
def test_get_downwards(): hds = Hydraseq("_") hds.insert("a b c _D") hds.insert("e f g _D") hds.insert("c e f _D") assert hds.get_downwards(["_D"]) == ['a', 'b', 'c', 'e', 'f', 'g']
def test_01_02_01_B_sequence(): hdr = Hydraseq('main') hdr.insert([['a'], ['b'], ['d']]) hdr.insert([['a'], ['c'], ['d']]) check_active(hdr, 1, ['a c d'], ['d']) check_next(hdr, 0, [], []) hdr.look_ahead([['a'], ['b']]) check_active(hdr, 1, ['a b'], ['b']) check_next(hdr, 1, ['a b d'], ['d']) hdr.look_ahead([['a'], ['c']]) check_active(hdr, 1, ['a c'], ['c']) check_next(hdr, 1, ['a c d'], ['d']) hdr.look_ahead([['a'], ['b', 'c'], ['d']]) check_active(hdr, 2, ['a b d', 'a c d'], ['d']) check_next(hdr, 0, [], [])
def test_surpise_flag(): hdr = Hydraseq('main') assert hdr.surprise == False hdr.insert("This is a test") assert hdr.surprise == True hdr.insert("This is a test") assert hdr.surprise == False hdr.insert("This is NOT a test") assert hdr.surprise == True hdr.insert("This is a test") assert hdr.surprise == False
def test_run_convolutions_json(): hdr = Hydraseq('_') hdr.insert("a b c _ALPHA") hdr.insert("1 2 3 _DIGIT") assert hdr.convolutions("a b c 1 2 3".split()) == [{ 'convo': '_ALPHA', 'end': 3, 'start': 0, 'words': ['a', 'b', 'c'] }, { 'convo': '_DIGIT', 'end': 6, 'start': 3, 'words': ['1', '2', '3'] }] assert hdr.convolutions("a b 1 2 3 a b".split()) == [{ 'convo': '_DIGIT', 'end': 5, 'start': 2, 'words': ['1', '2', '3'] }]
def test_self_insert(): hdq = Hydraseq('_') hdq.self_insert( "Burger King wants people to download its app. So it's sending them to McDonald's for access to a one-cent Whopper." ) assert len(hdq.columns) == 38 assert sorted( hdq.look_ahead("Burger King wants").get_next_values()) == sorted( ['people', '_3']) assert sorted( hdq.look_ahead("Burger King wants").get_active_values()) == sorted( ['wants'])
def test_run_convolutions_overlap(): hdr = Hydraseq('_') hdr.insert("b a d _1") hdr.insert("a n d _2") hdr.insert("a d a _3") hdr.insert("a n d y _4") hdr.insert("a d a n _5") tester = "b a d a n d y".split() assert hdr.convolutions(tester) == [{ 'convo': '_1', 'end': 3, 'start': 0, 'words': ['b', 'a', 'd'] }, { 'convo': '_3', 'end': 4, 'start': 1, 'words': ['a', 'd', 'a'] }, { 'convo': '_5', 'end': 5, 'start': 1, 'words': ['a', 'd', 'a', 'n'] }, { 'convo': '_2', 'end': 6, 'start': 3, 'words': ['a', 'n', 'd'] }, { 'convo': '_4', 'end': 7, 'start': 3, 'words': ['a', 'n', 'd', 'y'] }]
def test_cloning_hydra(): hdr0 = Hydraseq('zero') hdr0.insert("the quick brown fox") hdr1 = Hydraseq('one', hdr0) assert hdr1.look_ahead("the quick").get_next_values() == ["brown"] hdr1.reset() assert hdr1.look_ahead("the").get_next_values() == ["quick"] assert hdr1.hit("quick", None).get_next_values() == ["brown"]
def initialize_seq(self, lst_typs, fpath, id): seq = Hydraseq(id) for typ in lst_typs: self.train_sequences_from_file(seq, '{}/{}.csv'.format(fpath, typ), [['_{}_'.format(typ.upper())]]) return seq
def test_get_sequence_nodes_out_of_order(): hds1 = Hydraseq('_') hds1.insert('C A B') nxt1 = hds1.look_ahead('C A').next_nodes.pop() assert nxt1.get_sequence() == 'C A B'
def test_compare(): hq = Hydraseq('_') hq.insert('small large GROWING') hq.insert('large small SHRINKING') hq.insert('left right EAST') hq.insert('right left WEST') hq.insert('circle square SHAPER') hq.insert('square circle BLUNTER') hq.look_ahead([['small', 'left', 'circle'], ['small', 'right', 'circle']]) assert hq.get_next_values() == ['EAST'] hq.look_ahead([['small', 'left', 'square'], ['small', 'right', 'circle']]) assert hq.get_next_values() == ['BLUNTER', 'EAST'] hq.look_ahead([['small', 'left', 'circle'], ['large', 'right', 'circle']]) assert hq.get_next_values() == ['EAST', 'GROWING'] hq.look_ahead([['large', 'right', 'circle'], ['small', 'left', 'circle']]) assert hq.get_next_values() == ['SHRINKING', 'WEST']
0 0 0 0 0 0 o o 0 0 0 0 db 0 0 0 v u v 0 0 0 0 0 0 _end """ face = """ o o db v u v _end """ hdq1 = Hydraseq('0_') for pattern in [ "o 0_eye", "db 0_nose", "v 0_left_mouth", "u 0_mid_mouth", "v 0_right_mouth", ]: hdq1.insert(pattern) hdq2 = Hydraseq('1_') for pattern in [ "0_eye 0_eye 1_eyes", "0_nose 1_nose", "0_left_mouth 0_mid_mouth 0_right_mouth 1_mouth", ]:
def test_sentence(): hdr = Hydraseq('main') hdr.insert("The quick brown fox jumped over the lazy dog") assert hdr.look_ahead("The quick brown").get_next_values() == ['fox'] hdr.insert("The quick brown cat jumped over the lazy dog") assert hdr.look_ahead("The quick brown").get_next_values() == [ 'cat', 'fox' ] hdr.insert("The quick brown cat jumped over the lazy hound") assert hdr.look_ahead("The quick brown").get_next_values() == [ 'cat', 'fox' ] hdr.look_ahead([['The'], ['quick'], ['brown'], ['fox', 'cat']]) check_active(hdr, 2, ['The quick brown cat', 'The quick brown fox'], ['cat', 'fox']) check_next(hdr, 2, ['The quick brown cat jumped', 'The quick brown fox jumped'], ['jumped'])
def test_hydra_depths(): hdr = Hydraseq('main') assert len( hdr.d_depths) == 0, "Initially a hydraseq should have no depth sets" st = "one two three four five six seven eight nine ten" hdr.insert(st) assert len( hdr.d_depths) == 10, "there should be one set per depht traversed" st_lst = st.split() for idx, set_item in hdr.d_depths.items(): assert len(hdr.d_depths[idx] ) == 1, "there should be one item in each depth level" assert next(node for node in hdr.d_depths[idx]).key == st_lst[ idx - 1], "the nodes should be in depth order" hdr.look_ahead("one two three four five six seven eight nine") last_node = next(node for node in hdr.next_nodes) assert last_node.depth == 10 hdr.insert("one two three four five six siete ocho nueve diez") hdr.insert("one two three four five six siete ocho nueve") last_node = next(node for node in hdr.next_nodes) assert last_node.depth == 10 hdr.insert("one two three four five six siete") last_node = next(node for node in hdr.next_nodes) assert last_node.depth == 8
def test_activate_node_pathway(): hdr = Hydraseq('main') hdr.insert("a b c d e f LETTERS") hdr.insert("1 2 3 4 5 6 NUMBERS") hdr.insert("a1 2b b4 MIXED") hdr.activate_node_pathway('LETTERS') assert {node.key for node in hdr.path_nodes } == {"a", "b", "c", "d", "e", "f", "LETTERS"} assert hdr.look_ahead("a b c d").get_active_values() == ["d"] assert hdr.look_ahead("a b c d").get_next_values() == ["e"] assert {node.key for node in hdr.path_nodes } == {"a", "b", "c", "d", "e", "f", "LETTERS"} assert hdr.look_ahead("1 2 3 4").get_active_values() == [] assert hdr.look_ahead("1 2 3 4").get_next_values() == [] hdr.reset_node_pathway() assert {node.key for node in hdr.path_nodes} == set() assert hdr.look_ahead("a b c d").get_active_values() == ["d"] assert hdr.look_ahead("a b c d").get_next_values() == ["e"] assert {node.key for node in hdr.path_nodes} == set() assert hdr.look_ahead("1 2 3 4").get_active_values() == ["4"] assert hdr.look_ahead("1 2 3 4").get_next_values() == ["5"]
def test_active_synapses(): hdr = Hydraseq('main') hdr.insert("a b c d e f") hdr.insert("1 2 3 4 5 6") assert hdr.look_ahead("a b c d e").get_active_values() == ['e'] assert hdr.look_ahead("1 2 3 4 5").get_active_values() == ['5'] hdr.set_active_synapses(['f']) assert hdr.look_ahead("a b c d e").get_active_values() == ['e'] assert hdr.look_ahead("1 2 3 4 5").get_active_values() == [] assert hdr.look_ahead("a b c d").get_active_values() == ['d'] hdr.reset_active_synapses() assert hdr.look_ahead("a b c d e").get_active_values() == ['e'] assert hdr.look_ahead("1 2 3 4 5").get_active_values() == ['5'] assert hdr.look_ahead("a b c d").get_active_values() == ['d']
filepath - csv two column file, second column 'SEQUENCE' contains lists of lists lst_lst_identifier - what identifier to use to cap seqennces, [['mysequence']] for example """ with open(filepath, 'r') as source: csv_file = csv.DictReader(source) for row in csv_file: str_sequence = row['SEQUENCE'].strip() if len(str_sequence.strip()) == 0: continue else: lst_sequence = eval(str_sequence) train_with_provided_list(_seq, lst_sequence + lst_lst_identifier) seq = Hydraseq('input') for typ in ['suite', 'address', 'dir', 'pobox', 'attn']: train_sequences_from_file(seq, 'data/address_{}.csv'.format(typ), [['_{}_'.format(typ.upper())]]) def encode_from_word_list(arr_st): """Expects ['123', 'main', 'st]""" assert isinstance(arr_st, list) if arr_st: assert isinstance(arr_st[0], str) return [encoder(word) for word in arr_st] def is_address(seq, arr_st): """Expects ["123","main","st"]""" return any([