Python splitData 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: SplitData

메소드/함수: splitData

hotexamples.com에서의 예제들: 4

Python splitData - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 SplitData.splitData에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def createTree(X, y, minSampleSplit=2, maxDepth=None, depth=0):

    X = np.array(X)
    y = np.array(y)

    tree = {}
    depth = depth + 1

    if ((isinstance(maxDepth, (int, float)) and depth <= maxDepth) or maxDepth
            == None) and len(y) >= minSampleSplit and computeEntropy(y) != 0:
        splits = computeSplits(X)
        optimumSplitList = optimumSplit(X, y, splits)

        columnIndex = list(optimumSplitList.keys())[0]
        threshold = list(optimumSplitList.values())[0]

        lowerX, upperX, lowerY, upperY = splitData(X, y, columnIndex,
                                                   threshold)
        lowerNode = createTree(lowerX,
                               lowerY,
                               minSampleSplit=minSampleSplit,
                               depth=depth,
                               maxDepth=maxDepth)
        upperNode = createTree(upperX,
                               upperY,
                               minSampleSplit=minSampleSplit,
                               depth=depth,
                               maxDepth=maxDepth)

        tree[columnIndex] = [threshold, lowerNode, upperNode]
    else:
        return selectClass(y)
    return tree

예제 #2

파일 보기

파일: OptimumSplit.py 프로젝트: Ratansingh648/ML101-Machine-Learning-from-Scratch

def optimumSplit(X, y, splits):

    X = np.array(X)
    y = np.array(y)

    optimumSplittings = {}
    minimumEntropy = 10000
    optimumColumn = None
    optimumThreshold = None

    for columnIndex, thresholdList in splits.items():
        for threshold in thresholdList:
            x1, x2, y1, y2 = splitData(X, y, columnIndex, threshold)
            if computeBranchEntropy(y1, y2) < minimumEntropy:
                minimumEntropy = computeBranchEntropy(y1, y2)
                optimumColumn = columnIndex
                optimumThreshold = threshold

    optimumSplittings[optimumColumn] = optimumThreshold
    return optimumSplittings

예제 #3

파일 보기

파일: main.py 프로젝트: Mbamin/HandWritten-Digits-Classifier-Neural-Network

data = loadmat('Data/Data.mat')
#X is a matrix containing Training Data
#Y is a matrix containing Training Labels
X = data['X']
y = data['y']

print('Displaying 100 Random Images')

rand_indices = np.random.permutation(range(X.shape[0]))
sel = X[rand_indices[0:100], :]
displayData(sel)

print('Seperating Data into Test and Training Sets')
print('\n')
#create Test and Train examples
X_test, X_train, y_train, Y_test, Y = splitData(X, y)
print('One Hot Encoding Labels')
print('\n')
encoder = OneHotEncoder(sparse=False, categories='auto')
y_onehot = encoder.fit_transform(y)
y_train = encoder.fit_transform(y_train)

print('Setting up Neural Network')
print('\n')

# initial setup
input_size = 400
hidden_size = 25
num_labels = 10
learning_rate = .9

예제 #4

파일 보기

	reviews[i] = ' '.join(data[i][0:-1])
	labels[i] = int(data[i][-1])

##### convert labels with 0 to -1 ###########
for i in range(len(labels)):
	if labels[i]==0:
		labels[i]=-1

labels = np.asarray(labels)
path_to_weight_matrix = 'path to weight matrix'
fname = 'weightmatrix.h5'
weight_matrix_df = pd.read_hdf(fname)
weight_matrix = weight_matrix_df.as_matrix()

splits = 10
trainb_ilst,trainy_ilst,testb_ilst,testy_ilst = splitData(weight_matrix,labels,splits)

split_idx = 0
#### determine train and test data #####
train_mat = np.array(trainb_ilst[split_idx])
train_labels = np.array(trainy_ilst[split_idx])

test_mat = np.array(testb_ilst[split_idx])
test_labels = np.array(testy_ilst[split_idx])

num_features = 80
#### set training parameters ##########
# load guess matrix either with LSA or word2vec
fname = 'lsaguessvectors.h5'
word_guess_df = pd.read_hdf(fname)
word_guess = word_guess_df.as_matrix()