Ejemplo n.º 1
0
def predictLanguage(inputTrans):
    # Load the classifier created and saved by CreateClassifier.py
    classifier = utl.ReadModel('languageClassifierThree.pickle')

    #Extract the features, feed it into the classifier, and return the result
    languageFeatures = utl.get_features(inputTrans)
    return classifier.classify(languageFeatures)
Ejemplo n.º 2
0
    def format_content(self):
        # Format to int
        unpadded_formatted = MLUtils.format_line(self.content)

        # Translate to float
        float_unpadded = map(lambda n: float(n), unpadded_formatted)

        # Pad
        self.formatted_content = MLUtils.pad_float(float_unpadded, self.size)
def load_artist(artist):
    '''Load the images for a single artist.'''
    print "Loading images for artist", artist
    mat_contents = sio.loadmat(DATA_PATH + FILENAME, struct_as_record=False)

    temp_data = mat_contents[SHAPE]

    #THIS IS 1xn array that needs to be reorganized
    points = temp_data[0, 0].Points

    Runs = temp_data[0, 0].Runs

    features = np.empty([Runs.shape[0], points.shape[1], NUM_FOCAL_PLANE_DOF],
                        dtype=np.float32)
    print points.shape
    print Runs.shape
    outputs = np.empty([Runs.shape[0], NUM_DOF], dtype=np.float32)
    count1 = 0
    for i in xrange(0, Runs.shape[0]):
        for j in xrange(0, points.shape[1]):
            if 0:  #Runs[i,0].Occluded[j][0] == 1:
                features[i, j, 0] = 0
                features[i, j, 1] = 0
            else:
                count1 = count1 + 1
                features[i, j, 0] = Runs[i, 0].Points[0, j]
                features[i, j, 1] = Runs[i, 0].Points[1, j]
                #print features[i,j,:]
        temp_dcm = mlu.pose2DCM(Runs[i, 0].yaw[0, 0], Runs[i, 0].pitch[0, 0],
                                Runs[i, 0].roll[0, 0])
        outputs[i][0] = Runs[i, 0].x[0, 0]
        outputs[i][1] = Runs[i, 0].y[0, 0]
        outputs[i][2] = Runs[i, 0].z[0, 0]
        outputs[i][3] = temp_dcm[0][0]
        outputs[i][4] = temp_dcm[0][1]
        outputs[i][5] = temp_dcm[0][2]
        outputs[i][6] = temp_dcm[1][0]
        outputs[i][7] = temp_dcm[1][1]
        outputs[i][8] = temp_dcm[1][2]
        outputs[i][9] = temp_dcm[2][0]
        outputs[i][10] = temp_dcm[2][1]
        outputs[i][11] = temp_dcm[2][2]
        #print outputs[i]

    print 'Shape dataset size:', features.shape
    print 'Mean:', np.mean(features)
    print 'Standard deviation:', np.std(features)
    print ''
    #print outputs
    print count1
    return features, outputs
Ejemplo n.º 4
0
    def split(self):

        source_file = open(self.source_p, 'r')
        inverse_map = {}
        char_index = 0
        line_index = 1

        # Initialize inverse map
        for line in source_file:
            for c in line:
                inverse_map[char_index] = line_index
                char_index += 1
            line_index += 1

        source_file.close()
        source_file = open(self.source_p, 'r')
        source_text = source_file.read()

        all_blocks = []
        i = 0
        while i < len(source_text):
            j = 0
            current_block = Block(inverse_map[i], i, self.BLOCK_SIZE)
            while j < self.BLOCK_SIZE and i < len(source_text):
                current_block.content += source_text[i]
                i += 1
                j += 1
            current_block.content = MLUtils.pad(current_block.content,
                                                self.BLOCK_SIZE)
            all_blocks.append(current_block)
            if i >= len(source_text):
                break
            i -= self.BLOCK_SIZE - self.BLOCK_OFFSET

        source_file.close()
        self.container = all_blocks
Ejemplo n.º 5
0
 def test_format_training(self):
     marker = Marker('testFiles/training.txt', None, 'testFiles/category_map_test.csv', 550)
     formatted = marker.format_training_file()
     form_line = MLUtils.format_line((' &#124; otherwise = y\n\n').replace('\n',''))
     padded = MLUtils.pad_float(form_line, 550)
     self.assertTrue((0, padded) in formatted)
Ejemplo n.º 6
0
 def test_format(self):
     formatted = MLUtils.format_line('abcde')
     expected = [97, 98, 99, 100, 101]
     self.assertEquals(formatted, expected)
Ejemplo n.º 7
0
 def test_parse_training(self):
     parsed = MLUtils.parse_training_file('testFiles/training.txt')
     self.assertTrue(('ok', ' &#124; otherwise = y') in parsed)
     self.assertTrue(('comment', '-- Geometric sequence') in parsed)
def make_basic_datasets():
    artist_path = DATA_PATH
    artists = [1]
    assert len(artists) == NUM_ARTISTS

    train_data, train_labels = make_dataset_arrays()
    val_data, val_labels = make_dataset_arrays()
    test_data, test_labels = make_dataset_arrays()
    num_train = num_val = num_test = 0

    temp_d = np.loadtxt(DATA_PATH + '../poses.txt',
                        dtype=np.float32,
                        delimiter=" ")
    d = np.empty([temp_d.shape[0], 12], dtype=np.float32)
    #     print d[1]
    for i in xrange(0, len(temp_d) - 1):
        temp_dcm = mlu.pose2DCM(temp_d[i][3], temp_d[i][4], temp_d[i][5])
        d[i][0] = temp_d[i][0]
        d[i][1] = temp_d[i][1]
        d[i][2] = temp_d[i][2]
        d[i][3] = temp_dcm[0][0]
        d[i][4] = temp_dcm[0][1]
        d[i][5] = temp_dcm[0][2]
        d[i][6] = temp_dcm[1][0]
        d[i][7] = temp_dcm[1][1]
        d[i][8] = temp_dcm[1][2]
        d[i][9] = temp_dcm[2][0]
        d[i][10] = temp_dcm[2][1]
        d[i][11] = temp_dcm[2][2]
        print d[i]

#    print d[1] # 248
    for label, artist in enumerate(artists):
        # load in the images and poses
        artist_data, num_indices = load_artist(artist)
        artist_label = d[num_indices]
        #randomize the data
        artist_data, artist_label = randomize(artist_data, artist_label)
        #print artist_label
        #scale the data
        artist_data = scale_pixel_values(artist_data)
        num_paintings = len(artist_data)

        # randomly shuffle the data to ensure random validation and test sets
        #np.random.shuffle(artist_data)

        nv = int(num_paintings * VALIDATION_PERCENT)

        # partition validation data
        artist_val = artist_data[0:nv, :, :, :]
        val_data[num_val:num_val + nv, :, :, :] = artist_val
        val_labels[num_val:num_val + nv, :] = artist_label[num_val:num_val +
                                                           nv]

        num_val += nv

        # partition test data
        if PARTITION_TEST:
            nt = int(num_paintings * TEST_PERCENT)
            artist_test = artist_data[nv:nv + nt, :, :, :]
            test_data[num_test:num_test + nt, :, :, :] = artist_test
            test_labels[num_test:num_test +
                        nt, :] = artist_label[num_test:num_test + nt]
            num_test += nt
        else:
            nt = 0

        # patition train data
        artist_train = artist_data[nv + nt:, :, :, :]
        ntr = len(artist_train)
        train_data[num_train:num_train + ntr, :, :, :] = artist_train
        train_labels[num_train:num_train +
                     ntr, :] = artist_label[num_train:num_train + ntr]
        num_train += ntr

    # throw out extra allocated rows
    train_data, train_labels = trim_dataset_arrays(train_data, train_labels,
                                                   num_train)
    val_data, val_labels = trim_dataset_arrays(val_data, val_labels, num_val)

    # shuffle the data to distribute samples from artists randomly
    #train_data, train_labels = randomize(train_data, train_labels)
    #val_data, val_labels = randomize(val_data, val_labels)

    print 'Training set:', train_data.shape, train_labels.shape
    print 'Validation:', val_data.shape, val_labels.shape

    if PARTITION_TEST:
        test_data, test_labels = trim_dataset_arrays(test_data, test_labels,
                                                     num_test)
        test_data, test_labels = randomize(test_data, test_labels)
        print 'Testing:', test_data.shape, test_labels.shape
        print ''

    # save all the datasets in a pickle file
    pickle_file = 'art_data.pickle'
    save = {
        'train_data': train_data,
        'train_labels': train_labels,
        'val_data': val_data,
        'val_labels': val_labels
    }
    #print val_labels

    if PARTITION_TEST:
        save['test_data'] = test_data
        save['test_labels'] = test_labels
    save_pickle_file(pickle_file, save)
Ejemplo n.º 9
0
    # result list instantiation and a definition of possible classes
    labeledTrain = []
    labeledTest = []
    possibleClassifications = ['en-GB', 'fr-FR', 'de-DE', 'it-IT', 'es-DO']

    print("----------EXTRACTING FEATURES----------")
    print("TRAINING SET")
    # Extract features (and print progress updates) for the training set
    for classification, transcriptions in train.items():
        print("\t" + classification)
        for position, transcription in enumerate(transcriptions):
            print("\t\t" + str(position + 1) + " of " +
                  str(len(transcriptions)))
            labeledTrain.append(
                (utl.get_features(transcription), classification))

    print("TEST SET (for accuracy purposes)")
    # Extract features and print progress updates for the test set (used for evaluation purposes, it is not involved with the training in any way)
    for classification, transcriptions in test.items():
        print("\t" + classification)
        for position, transcription in enumerate(transcriptions):
            print("\t\t" + str(position + 1) + " of " +
                  str(len(transcriptions)))
            labeledTest.append(
                (utl.get_features(transcription), classification))

    # Train the classifiers on the feature exctracted/labeled training set
    print("\n\n----------TRAINING CLASSIFIERS----------")
    print("\tTraining Bernoulli")
    bern = SklearnClassifier(BernoulliNB()).train(labeledTrain)
Ejemplo n.º 10
0
        " alpha: "+str(alpha)
    )
    print()

    np.save("saved_layers/layer1.nn", layer1)
    np.save("saved_layers/layer2.nn", layer2)

#   ----    visualize weights   ----    #
if load:
    layer1 = np.load("saved_layers/layer1.nn.npy")
    layer2 = np.load("saved_layers/layer2.nn.npy")

if visualize:
    # layers = [layer1, layer2]
    layers = [layer1, layer2]
    MLUtils.visualizeLayers(layers)

#   ----    test    ----    #
if test:
    error = 0.0
    numCorrect = 0
    for i in range(0, numTestData):
        index = testDataStartIndex + i
        inputData = x[index]
        label = y[index]

        layer1Output = activFuncs.lrelu(np.sum(layer1[inputData], axis=0))
        layer2Output = layer1Output.dot(layer2)

        layer2Delta = label - layer2Output