예제 #1
0
def createTree(X, y, minSampleSplit=2, maxDepth=None, depth=0):

    X = np.array(X)
    y = np.array(y)

    tree = {}
    depth = depth + 1

    if ((isinstance(maxDepth, (int, float)) and depth <= maxDepth) or maxDepth
            == None) and len(y) >= minSampleSplit and computeEntropy(y) != 0:
        splits = computeSplits(X)
        optimumSplitList = optimumSplit(X, y, splits)

        columnIndex = list(optimumSplitList.keys())[0]
        threshold = list(optimumSplitList.values())[0]

        lowerX, upperX, lowerY, upperY = splitData(X, y, columnIndex,
                                                   threshold)
        lowerNode = createTree(lowerX,
                               lowerY,
                               minSampleSplit=minSampleSplit,
                               depth=depth,
                               maxDepth=maxDepth)
        upperNode = createTree(upperX,
                               upperY,
                               minSampleSplit=minSampleSplit,
                               depth=depth,
                               maxDepth=maxDepth)

        tree[columnIndex] = [threshold, lowerNode, upperNode]
    else:
        return selectClass(y)
    return tree
def optimumSplit(X, y, splits):

    X = np.array(X)
    y = np.array(y)

    optimumSplittings = {}
    minimumEntropy = 10000
    optimumColumn = None
    optimumThreshold = None

    for columnIndex, thresholdList in splits.items():
        for threshold in thresholdList:
            x1, x2, y1, y2 = splitData(X, y, columnIndex, threshold)
            if computeBranchEntropy(y1, y2) < minimumEntropy:
                minimumEntropy = computeBranchEntropy(y1, y2)
                optimumColumn = columnIndex
                optimumThreshold = threshold

    optimumSplittings[optimumColumn] = optimumThreshold
    return optimumSplittings
data = loadmat('Data/Data.mat')
#X is a matrix containing Training Data
#Y is a matrix containing Training Labels
X = data['X']
y = data['y']

print('Displaying 100 Random Images')

rand_indices = np.random.permutation(range(X.shape[0]))
sel = X[rand_indices[0:100], :]
displayData(sel)

print('Seperating Data into Test and Training Sets')
print('\n')
#create Test and Train examples
X_test, X_train, y_train, Y_test, Y = splitData(X, y)
print('One Hot Encoding Labels')
print('\n')
encoder = OneHotEncoder(sparse=False, categories='auto')
y_onehot = encoder.fit_transform(y)
y_train = encoder.fit_transform(y_train)

print('Setting up Neural Network')
print('\n')

# initial setup
input_size = 400
hidden_size = 25
num_labels = 10
learning_rate = .9
예제 #4
0
	reviews[i] = ' '.join(data[i][0:-1])
	labels[i] = int(data[i][-1])

##### convert labels with 0 to -1 ###########
for i in range(len(labels)):
	if labels[i]==0:
		labels[i]=-1

labels = np.asarray(labels)
path_to_weight_matrix = 'path to weight matrix'
fname = 'weightmatrix.h5'
weight_matrix_df = pd.read_hdf(fname)
weight_matrix = weight_matrix_df.as_matrix()

splits = 10
trainb_ilst,trainy_ilst,testb_ilst,testy_ilst = splitData(weight_matrix,labels,splits)

split_idx = 0
#### determine train and test data #####
train_mat = np.array(trainb_ilst[split_idx])
train_labels = np.array(trainy_ilst[split_idx])

test_mat = np.array(testb_ilst[split_idx])
test_labels = np.array(testy_ilst[split_idx])

num_features = 80
#### set training parameters ##########
# load guess matrix either with LSA or word2vec
fname = 'lsaguessvectors.h5'
word_guess_df = pd.read_hdf(fname)
word_guess = word_guess_df.as_matrix()