예제 #1
0
 def get_data(self, data_file):
     parser = DataParser(data_file)
     sets = parser.get_training_sets(1)
     #one set, so take the first of the trainings sets
     #and take the first of that (i.e. not the test set)
     self.training_set = sets[0][0]
     self.test_set = parser.get_test_set()
예제 #2
0
def set_up(numNodes, comm):
    '''Run by the master node, this method
    sets up the ANNs, reads in the training
    and test data sets, and scatters them to
    all the other nodes.
    '''
    try:
        in_layer = int(sys.argv[1])
        hidden_layers = [int(sys.argv[2])]
	
        if len(sys.argv) == 6:
            hidden_layers.append(int(sys.argv[3]))
            out_layer = int(sys.argv[4])
            fileName = sys.argv[5]
        else:
            out_layer = int(sys.argv[3])
            fileName = sys.argv[4]
    except:
        in_layer = 9
        hidden_layers = [1]
        out_layer = 1
        fileName = 'breast_cancer.dat'

    #break up data (for each node and into testing/training)
    f = open(fileName)
    parser = DataParser(f)
    training_data = parser.get_training_sets(numNodes)
    test_data = parser.get_test_set()
    f.close()

    # broadcast ANN architecture to all nodes
    comm.bcast((in_layer, hidden_layers, out_layer, fileName), root=0)
    return training_data, test_data, in_layer, hidden_layers, out_layer
예제 #3
0
def set_up(numNodes, comm):
    '''Run by the master node, this method
    sets up the ANNs, reads in the training
    and test data sets, and scatters them to
    all the other nodes.
    '''
    try:
        in_layer = int(sys.argv[1])
        hidden_layers = [int(sys.argv[2])]

        if len(sys.argv) == 6:
            hidden_layers.append(int(sys.argv[3]))
            out_layer = int(sys.argv[4])
            fileName = sys.argv[5]
        else:
            out_layer = int(sys.argv[3])
            fileName = sys.argv[4]
    except:
        in_layer = 9
        hidden_layers = [1]
        out_layer = 1
        fileName = 'breast_cancer.dat'

    #break up data (for each node and into testing/training)
    f = open(fileName)
    parser = DataParser(f)
    training_data = parser.get_training_sets(numNodes)
    test_data = parser.get_test_set()
    f.close()

    # broadcast ANN architecture to all nodes
    comm.bcast((in_layer, hidden_layers, out_layer, fileName), root=0)
    return training_data, test_data, in_layer, hidden_layers, out_layer
예제 #4
0
 def get_data(self, data_file):
     parser = DataParser(data_file)
     sets = parser.get_training_sets(1)
     #one set, so take the first of the trainings sets
     #and take the first of that (i.e. not the test set)
     self.training_set = sets[0][0]
     self.test_set = parser.get_test_set()
예제 #5
0
class TestDataParser(unittest.TestCase):
    
    def setUp(self):
        self.file = open('test/data.dat')
        self.instance = DataParser(self.file)
        
    def test_create_tuple(self):
        nums = ['1','2','3','4']
        output = self.instance._create_tuple(nums)
        self.assertEqual(output, (1.0,2.0,3.0,4.0))

    def test_parse_line(self):
        line = "1,2,3,4,5,7"
        output = self.instance._parse_line(line)
        self.assertEqual(output, ((2,3,4,5),(7,)))

    def test_parse_file(self):
        #it gets randomized
        pass

    def test_get_training_sets(self):
        sets = self.instance.get_training_sets(2)
        self.assertEqual(len(sets), 2)

    def test_divide_data(self):
        self.assertEqual(len(self.instance.test), 1)
        self.assertEqual(len(self.instance.training), 9)

    def test_separate_training_test(self):
        samples = [1,2,3,4,5,6,7,8,9,0]
        out = self.instance.separate_training_test(samples)
        self.assertEqual(([1,2,3,4,5,6,7,8], [9,0]), out)

        samples = [1,2,3,4,5,6]
        out = self.instance.separate_training_test(samples)
        self.assertEqual(([1,2,3,4], [5,6]), out)

    def test_whole(self):
        inst = DataParser(open('breast_cancer.dat'))
        print len(inst.get_training_sets(5)[1][0])

    def tearDown(self):
        pass
예제 #6
0
class TestDataParser(unittest.TestCase):
    def setUp(self):
        self.file = open('test/data.dat')
        self.instance = DataParser(self.file)

    def test_create_tuple(self):
        nums = ['1', '2', '3', '4']
        output = self.instance._create_tuple(nums)
        self.assertEqual(output, (1.0, 2.0, 3.0, 4.0))

    def test_parse_line(self):
        line = "1,2,3,4,5,7"
        output = self.instance._parse_line(line)
        self.assertEqual(output, ((2, 3, 4, 5), (7, )))

    def test_parse_file(self):
        #it gets randomized
        pass

    def test_get_training_sets(self):
        sets = self.instance.get_training_sets(2)
        self.assertEqual(len(sets), 2)

    def test_divide_data(self):
        self.assertEqual(len(self.instance.test), 1)
        self.assertEqual(len(self.instance.training), 9)

    def test_separate_training_test(self):
        samples = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]
        out = self.instance.separate_training_test(samples)
        self.assertEqual(([1, 2, 3, 4, 5, 6, 7, 8], [9, 0]), out)

        samples = [1, 2, 3, 4, 5, 6]
        out = self.instance.separate_training_test(samples)
        self.assertEqual(([1, 2, 3, 4], [5, 6]), out)

    def test_whole(self):
        inst = DataParser(open('breast_cancer.dat'))
        print len(inst.get_training_sets(5)[1][0])

    def tearDown(self):
        pass
예제 #7
0
 def setUp(self):
     self.file = open('test/data.dat')
     self.instance = DataParser(self.file)
예제 #8
0
 def test_whole(self):
     inst = DataParser(open('breast_cancer.dat'))
     print len(inst.get_training_sets(5)[1][0])
예제 #9
0
 def setUp(self):
     self.file = open('test/data.dat')
     self.instance = DataParser(self.file)
예제 #10
0
 def test_whole(self):
     inst = DataParser(open('breast_cancer.dat'))
     print len(inst.get_training_sets(5)[1][0])