def get_data(self, data_file): parser = DataParser(data_file) sets = parser.get_training_sets(1) #one set, so take the first of the trainings sets #and take the first of that (i.e. not the test set) self.training_set = sets[0][0] self.test_set = parser.get_test_set()
def set_up(numNodes, comm): '''Run by the master node, this method sets up the ANNs, reads in the training and test data sets, and scatters them to all the other nodes. ''' try: in_layer = int(sys.argv[1]) hidden_layers = [int(sys.argv[2])] if len(sys.argv) == 6: hidden_layers.append(int(sys.argv[3])) out_layer = int(sys.argv[4]) fileName = sys.argv[5] else: out_layer = int(sys.argv[3]) fileName = sys.argv[4] except: in_layer = 9 hidden_layers = [1] out_layer = 1 fileName = 'breast_cancer.dat' #break up data (for each node and into testing/training) f = open(fileName) parser = DataParser(f) training_data = parser.get_training_sets(numNodes) test_data = parser.get_test_set() f.close() # broadcast ANN architecture to all nodes comm.bcast((in_layer, hidden_layers, out_layer, fileName), root=0) return training_data, test_data, in_layer, hidden_layers, out_layer
class TestDataParser(unittest.TestCase): def setUp(self): self.file = open('test/data.dat') self.instance = DataParser(self.file) def test_create_tuple(self): nums = ['1','2','3','4'] output = self.instance._create_tuple(nums) self.assertEqual(output, (1.0,2.0,3.0,4.0)) def test_parse_line(self): line = "1,2,3,4,5,7" output = self.instance._parse_line(line) self.assertEqual(output, ((2,3,4,5),(7,))) def test_parse_file(self): #it gets randomized pass def test_get_training_sets(self): sets = self.instance.get_training_sets(2) self.assertEqual(len(sets), 2) def test_divide_data(self): self.assertEqual(len(self.instance.test), 1) self.assertEqual(len(self.instance.training), 9) def test_separate_training_test(self): samples = [1,2,3,4,5,6,7,8,9,0] out = self.instance.separate_training_test(samples) self.assertEqual(([1,2,3,4,5,6,7,8], [9,0]), out) samples = [1,2,3,4,5,6] out = self.instance.separate_training_test(samples) self.assertEqual(([1,2,3,4], [5,6]), out) def test_whole(self): inst = DataParser(open('breast_cancer.dat')) print len(inst.get_training_sets(5)[1][0]) def tearDown(self): pass
class TestDataParser(unittest.TestCase): def setUp(self): self.file = open('test/data.dat') self.instance = DataParser(self.file) def test_create_tuple(self): nums = ['1', '2', '3', '4'] output = self.instance._create_tuple(nums) self.assertEqual(output, (1.0, 2.0, 3.0, 4.0)) def test_parse_line(self): line = "1,2,3,4,5,7" output = self.instance._parse_line(line) self.assertEqual(output, ((2, 3, 4, 5), (7, ))) def test_parse_file(self): #it gets randomized pass def test_get_training_sets(self): sets = self.instance.get_training_sets(2) self.assertEqual(len(sets), 2) def test_divide_data(self): self.assertEqual(len(self.instance.test), 1) self.assertEqual(len(self.instance.training), 9) def test_separate_training_test(self): samples = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] out = self.instance.separate_training_test(samples) self.assertEqual(([1, 2, 3, 4, 5, 6, 7, 8], [9, 0]), out) samples = [1, 2, 3, 4, 5, 6] out = self.instance.separate_training_test(samples) self.assertEqual(([1, 2, 3, 4], [5, 6]), out) def test_whole(self): inst = DataParser(open('breast_cancer.dat')) print len(inst.get_training_sets(5)[1][0]) def tearDown(self): pass
def setUp(self): self.file = open('test/data.dat') self.instance = DataParser(self.file)
def test_whole(self): inst = DataParser(open('breast_cancer.dat')) print len(inst.get_training_sets(5)[1][0])