def test_input_diff_constraint(self): # API and Cython object test. Replicates command line: # ./MPP -file input.txt -thr 0.001 -att input_att1.txt -lg 20 -ug 1000 -ls 800 - us 3700 -att input_att2.txt -la 20 -ua 80 -lm 30 -um 70 -out -write BMS_patt.txt # input on Main.cpp and verifies output with data captured from original implementation # Similar to default but all lower constraints lowered by 10 all upper constraints raised 10. # Significantly different results to default. # Seq2Pat patterns_file = self.DATA_DIR + "input.txt" sequences = read_data(patterns_file) seq2pat = Seq2Pat(sequences) # Load Attributes attribute_file = self.DATA_DIR + "input_att1.txt" attr1_data = read_data(attribute_file) att1 = Attribute(attr1_data) attribute_file = self.DATA_DIR + "input_att2.txt" attr2_data = read_data(attribute_file) att2 = Attribute(attr2_data) cts1 = seq2pat.add_constraint(20 <= att1.gap() <= 1000) cts2 = seq2pat.add_constraint(800 <= att1.span() <= 3700) cts3 = seq2pat.add_constraint(20 <= att2.average() <= 80) cts4 = seq2pat.add_constraint(30 <= att2.median() <= 70) test_patterns = seq2pat.get_patterns(.001) results_file = self.DATA_DIR + "diff_constraints_results.txt" control_patterns = read_data(results_file) sorted_control = sort_pattern(control_patterns) self.assertListEqual(sorted_control, test_patterns)
def test_input_no_upper_constraint(self): # API and Cython object test. Replicates command line: # ./MPP -file input.txt -thr 0.01 -att input_att1.txt -lg 30 -ls 900 -att input_att2.txt -la 30 -lm 40 -out -write BMS_patt.txt # input on Main.cpp and verifies output with data captured from original implementation # Similar to default but no upper constraints imposed # Seq2Pat patterns_file = self.DATA_DIR + "input.txt" sequences = read_data(patterns_file) seq2pat = Seq2Pat(sequences) # Load Attributes attribute_file = self.DATA_DIR + "input_att1.txt" attr1_data = read_data(attribute_file) att1 = Attribute(attr1_data) attribute_file = self.DATA_DIR + "input_att2.txt" attr2_data = read_data(attribute_file) att2 = Attribute(attr2_data) cts1 = seq2pat.add_constraint(30 <= att1.gap()) cts2 = seq2pat.add_constraint(900 <= att1.span()) cts3 = seq2pat.add_constraint(30 <= att2.average()) cts4 = seq2pat.add_constraint(40 <= att2.median()) test_patterns = seq2pat.get_patterns(.01) results_file = self.DATA_DIR + "no_upper_constraint_results.txt" control_patterns = read_data(results_file) sorted_control = sort_pattern(control_patterns) self.assertListEqual(sorted_control, test_patterns)
def test_input_one_constraint(self): # API and Cython object test. Replicates command line: # ./MPP -file input.txt -thr 0.001 -att input_att1.txt -lg 30 -ug 900 -ls 900 - us 3600 -out -write BMS_patt.txt # input on Main.cpp and verifies output with data captured from original implementation # Similar to default but with constraints on a single attribute- input_att1.txt # Seq2Pat patterns_file = self.DATA_DIR + "input.txt" sequences = read_data(patterns_file) seq2pat = Seq2Pat(sequences) # Load Attributes attribute_file = self.DATA_DIR + "input_att1.txt" attr1_data = read_data(attribute_file) att1 = Attribute(attr1_data) cts1 = seq2pat.add_constraint(30 <= att1.gap() <= 900) cts2 = seq2pat.add_constraint(900 <= att1.span()) test_patterns = seq2pat.get_patterns(.001) results_file = self.DATA_DIR + "one_constraint_results.txt" control_patterns = read_data(results_file) sorted_controls = sort_pattern(control_patterns) self.assertListEqual(sorted_controls, test_patterns) self.assertFalse(test_patterns == read_data(self.DATA_DIR + "default_results.txt"))
def test_usage(self): # Pattern data patterns_file = self.DATA_DIR + "input.txt" sequences = read_data(patterns_file) # print("Patterns: ", sequences[:5]) # Attribute data attribute_file = self.DATA_DIR + "input_att1.txt" attribute_1 = read_data(attribute_file) # print("Attribute_1: ", attribute_1[:5]) # Sequential pattern finder seq2pat = Seq2Pat(sequences) # Constraints on attribute 1 att1 = Attribute(attribute_1) avg_constraint = seq2pat.add_constraint(5 <= att1.average()) gap_constraint = seq2pat.add_constraint(att1.gap() <= 10) median_constraint = seq2pat.add_constraint(10 <= att1.median() <= 15) span_constraint = seq2pat.add_constraint(att1.span() <= 20) # Print constraint store # seq2pat.__str__() seq2pat.get_patterns(min_frequency=100)
def test_input(self): patterns_file = self.DATA_DIR + "input.txt" sequences = read_data(patterns_file) self.assertTrue(type(sequences) == list) self.assertTrue(len(sequences) == 52619) self.assertTrue(len(sequences[0]) == 5) # print(sequences[:5]) attribute_file = self.DATA_DIR + "input_att1.txt" attribute_1 = read_data(attribute_file) self.assertTrue(type(attribute_1) == list) self.assertTrue(len(attribute_1) == 52619) self.assertTrue(len(attribute_1[0]) == 5)
def test_input_no_constraint(self): # API and Cython object test. Replicates command line: # ./MPP -file input.txt -thr 0.01 -out # input on Main.cpp and verifies output with data captured from original implementation # Unconstrained call. Significantly different and larger results. # Seq2Pat patterns_file = self.DATA_DIR + "input.txt" sequences = read_data(patterns_file) seq2pat = Seq2Pat(sequences) test_patterns = seq2pat.get_patterns(.01) results_file = self.DATA_DIR + "no_constraints_results.txt" control_patterns = read_data(results_file) sorted_results = sort_pattern(control_patterns) self.assertListEqual(sorted_results, test_patterns) self.assertFalse(test_patterns == read_data(self.DATA_DIR + "default_results.txt"))
def test_input_item_variables(self): patterns_file = self.DATA_DIR + "input.txt" sequences = read_data(patterns_file) m = get_max_column_size(sequences) n = len(sequences) l = get_max_value(sequences) self.assertEqual(161, m) self.assertEqual(52619, n) self.assertEqual(3340, l)
def test_seq2patfinder_default(self): # API and Cython object test. # Replicates command line: # > ./MPP.exe # -file input.txt # -thr 0.001 # -att input_att1.txt -lg 30 -ug 900 -ls 900 - us 3600 # -att input_att2.txt -la 30 -ua 70 -lm 40 -um 60 # -out -write BMS_patt.txt # input on Main.cpp and verifies output with data captured from original implementation # Seq2Pat patterns_file = self.DATA_DIR + "input.txt" sequences = read_data(patterns_file) seq2pat = Seq2Pat(sequences) # Load Attributes attribute_file = self.DATA_DIR + "input_att1.txt" attr1_data = read_data(attribute_file) att1 = Attribute(attr1_data) attribute_file = self.DATA_DIR + "input_att2.txt" attr2_data = read_data(attribute_file) att2 = Attribute(attr2_data) cts1 = seq2pat.add_constraint(30 <= att1.gap() <= 900) cts2 = seq2pat.add_constraint(900 <= att1.span()) cts3 = seq2pat.add_constraint(30 <= att2.average() <= 70) cts4 = seq2pat.add_constraint(40 <= att2.median() <= 60) test_pf = seq2pat._get_cython_imp(-1) self.assertListEqual([30], test_pf.lgap) self.assertListEqual([900], test_pf.ugap) self.assertListEqual([30], test_pf.lavr) self.assertListEqual([70], test_pf.uavr) self.assertListEqual([900], test_pf.lspn) self.assertListEqual([], test_pf.uspn) self.assertListEqual([40], test_pf.lmed) self.assertListEqual([0], test_pf.ugapi) self.assertListEqual([0], test_pf.lgapi) self.assertListEqual([], test_pf.uspni) self.assertListEqual([0], test_pf.lspni) self.assertListEqual([1], test_pf.uavri) self.assertListEqual([1], test_pf.lavri) self.assertListEqual([1], test_pf.umedi) self.assertListEqual([1], test_pf.lmedi) self.assertListEqual([2, 0], test_pf.num_minmax) self.assertListEqual([0, 2], test_pf.num_avr) self.assertListEqual([0, 2], test_pf.num_med) self.assertListEqual([0], test_pf.tot_gap) self.assertListEqual([0], test_pf.tot_spn) self.assertListEqual([1], test_pf.tot_avr) self.assertEqual(161, test_pf.M) self.assertEqual(52619, test_pf.N) self.assertEqual(3340, test_pf.L) self.assertListEqual([284871, 100], test_pf.max_attrs) self.assertListEqual([0, 1], test_pf.min_attrs) test_patterns = seq2pat.get_patterns(.001) # Consistency sanity check dup_patterns = seq2pat.get_patterns(.001) self.assertListEqual(test_patterns, dup_patterns) results_file = self.DATA_DIR + "default_results.txt" control_patterns = read_data(results_file) sorted_control = sort_pattern(control_patterns) self.assertListEqual(sorted_control, test_patterns) # Remove constraint and test cts5 = seq2pat.remove_constraint(40 <= att2.median() <= 60) ct6 = seq2pat.remove_constraint(30 <= att2.average() <= 70) test_pf = seq2pat._get_cython_imp(-1) self.assertListEqual([], test_pf.umedi) self.assertListEqual([], test_pf.lmedi) self.assertListEqual([], test_pf.uavr) self.assertListEqual([], test_pf.uavri) self.assertListEqual([0], test_pf.num_med) one_constraint_result = seq2pat.get_patterns(.001) results_file = self.DATA_DIR + "one_constraint_results.txt" control_patterns = read_data(results_file) sorted_controls = sort_pattern(control_patterns) self.assertListEqual(sorted_controls, one_constraint_result)
def test_setter(self): # Testing cython object setters and getters python_seq2pat = stp.PySeq2pat() patterns_file = self.DATA_DIR + "input.txt" sequences = read_data(patterns_file) seq2pat = Seq2Pat(sequences) python_seq2pat.lgap = [30] python_seq2pat.ugap = [900] python_seq2pat.lspn = [77] python_seq2pat.uspn = [9, 80] python_seq2pat.lavr = [9, 88] python_seq2pat.uavr = [7] python_seq2pat.lmed = [9, 9, 8] python_seq2pat.umed = [99999] self.assertListEqual(python_seq2pat.lgap, [30]) self.assertListEqual(python_seq2pat.ugap, [900]) self.assertListEqual(python_seq2pat.lspn, [77]) self.assertListEqual(python_seq2pat.uspn, [9, 80]) self.assertListEqual(python_seq2pat.lavr, [9, 88]) self.assertListEqual(python_seq2pat.uavr, [7]) self.assertListEqual(python_seq2pat.lmed, [9, 9, 8]) self.assertListEqual(python_seq2pat.umed, [99999]) python_seq2pat.lgapi = [0] python_seq2pat.ugapi = [0] python_seq2pat.lspni = [1] python_seq2pat.uspni = [1, 0] python_seq2pat.lavri = [0, 1] python_seq2pat.uavri = [0] python_seq2pat.lmedi = [0, 1, 2] python_seq2pat.umedi = [2] # self.assertListEqual(python_seq2pat.lgapi, [0]) self.assertListEqual(python_seq2pat.ugapi, [0]) self.assertListEqual(python_seq2pat.lspni, [1]) self.assertListEqual(python_seq2pat.uspni, [1, 0]) self.assertListEqual(python_seq2pat.lavri, [0, 1]) self.assertListEqual(python_seq2pat.uavri, [0]) self.assertListEqual(python_seq2pat.lmedi, [0, 1, 2]) self.assertListEqual(python_seq2pat.umedi, [2]) python_seq2pat.num_minmax = [0, 0, 0] python_seq2pat.num_avr = [1, 1, 1] python_seq2pat.num_med = [0, 1, 2] python_seq2pat.tot_gap = [0, 1, 0] python_seq2pat.tot_spn = [2, 2, 2] python_seq2pat.tot_avr = [0, 1, 1] self.assertListEqual(python_seq2pat.num_minmax, [0, 0, 0]) self.assertListEqual(python_seq2pat.num_avr, [1, 1, 1]) self.assertListEqual(python_seq2pat.num_med, [0, 1, 2]) self.assertListEqual(python_seq2pat.tot_gap, [0, 1, 0]) self.assertListEqual(python_seq2pat.tot_spn, [2, 2, 2]) self.assertListEqual(python_seq2pat.tot_avr, [0, 1, 1]) python_seq2pat.num_att = 3 python_seq2pat.N = 200 python_seq2pat.M = 999 python_seq2pat.L = 89 python_seq2pat.theta = 89 self.assertEqual(python_seq2pat.num_att, 3) self.assertEqual(python_seq2pat.N, 200) self.assertEqual(python_seq2pat.M, 999) self.assertEqual(python_seq2pat.L, 89) self.assertEqual(python_seq2pat.theta, 89)