def deserialize_random_forest(model_dict): model = RandomForestClassifier(**model_dict['params']) estimators = [deserialize_decision_tree(decision_tree) for decision_tree in model_dict['estimators_']] model.estimators_ = np.array(estimators) model.classes_ = np.array(model_dict['classes_']) model.n_features_ = model_dict['n_features_'] model.n_outputs_ = model_dict['n_outputs_'] model.max_depth = model_dict['max_depth'] model.min_samples_split = model_dict['min_samples_split'] model.min_samples_leaf = model_dict['min_samples_leaf'] model.min_weight_fraction_leaf = model_dict['min_weight_fraction_leaf'] model.max_features = model_dict['max_features'] model.max_leaf_nodes = model_dict['max_leaf_nodes'] model.min_impurity_decrease = model_dict['min_impurity_decrease'] model.min_impurity_split = model_dict['min_impurity_split'] if 'oob_score_' in model_dict: model.oob_score_ = model_dict['oob_score_'] if 'oob_decision_function_' in model_dict: model.oob_decision_function_ = model_dict['oob_decision_function_'] if isinstance(model_dict['n_classes_'], list): model.n_classes_ = np.array(model_dict['n_classes_']) else: model.n_classes_ = model_dict['n_classes_'] return model
def main(): _n_classes = 2 sigmas = [0.3, 0.7, 1., 3.5] args = parse_args() path = args.path _train_split = args.train_split if args.test is None: whole = args.whole if whole: rfc = RandomForestClassifier(n_estimators=200, n_jobs=-1) rfc.n_classes_ = _n_classes rfc = train_forest_whole(rfc, path) else: batch_size = args.batch_size n_estimators = 100 shape = Brats15NumpyDataset(path, True, 1., -1)[0][0].shape[1:] #np_transform = [crop, resize] #np_params = [{'size': np.multiply(shape, 0.8).astype(int)}, # {'output_shape': np.multiply(shape, 0.4).astype(int), # 'mode': 'constant'}] #np_transform = None #np_params = None np_transform = [crop] np_params = [{'size': np.multiply(shape, 0.8).astype(int)}] dset_train = Brats15NumpyDataset(path, True, _train_split, _random_state, transform=None, np_transform=np_transform, np_transform_params=np_params, tensor_conversion=False) rfc = RandomForestClassifier(n_estimators=0, warm_start=True, n_jobs=-1) rfc.n_classes_ = _n_classes rfc = train_forest_sequential(rfc, dset_train, sigmas, batch_size=batch_size, n_estimators=n_estimators) joblib.dump(rfc, 'rfc_test_{}.pkl'.format(int(whole))) else: dset_test = Brats15NumpyDataset(path, False, _train_split, _random_state, transform=None, np_transform=None, np_transform_params=None, tensor_conversion=False) test(dset_test, sigmas, load_path=args.test, sample_imgs=args.sample_imgs)
def ReprodIndividualsFromRF(list_indiv, max_id, options): list_indiv = list(list_indiv) rf = RandomForestClassifier(n_estimators=len(list_indiv)) trees = list() for indiv in list_indiv: trees.append(indiv.clf) rf.estimators_ = trees rf.n_classes_ = trees[0].n_classes_ rf.classes_ = trees[0].classes_ new_dt = eqtree_rec_rf(rf, 0, max_depth=options['max_depth'], smallest_tree=False) new_id = max_id + 1 indiv3 = genetic.individual(new_dt, new_id, type_rf=False, alpha=options['alpha'], evaluate_on_data=options['on_data'], X=options['X'], y=options['y']) return indiv3
def train_random_forest(X, Y, estimators, classes, features): train_x = X[0:3600, :] train_y = Y[0:3600] validate_x = X[3600:, :] validate_y = Y[3600:] clf = RandomForestClassifier(n_estimators=estimators) clf.n_classes_ = classes clf.n_features_ = features clf.fit(train_x, train_y) predictions = clf.predict(validate_x).reshape(400, 1) result = sum([1 if predictions[i] == validate_y[i] else 0 for i in range(validate_y.shape[0])]) / \ validate_y.shape[0] * 100 return result
def random_forest(X, Y, n_samples): clf = RandomForestClassifier(n_estimators=500, max_depth=10, random_state=0) clf.n_classes_ = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] clf.fit(X[:n_samples // 2], Y[:n_samples // 2]) start_time = time.time() predicted = clf.predict(X[n_samples // 2:]) elapsed_time = time.time() - start_time expected = Y[n_samples // 2:] return [elapsed_time, sum(expected == predicted) * 1. / len(expected)]
def learn(train_set): clf = RandomForestClassifier(n_estimators=150) clf.n_classes_ = len(object_types) # x -- 2d array, x[i] -- list of feature values for segment i # y -- 1d array, y[i] -- true class of segment i x = [] y = [] for num in train_set: x += get_features_value(num) y += get_segments_classes(num) clf.fit(x, y) cPickle.dump(clf, open("classifier", "wb+"))
def build_classifier(trees): def build_decision_tree(t): dt = DecisionTreeClassifier(random_state=0) dt.n_features_ = t.n_features dt.n_outputs_ = t.n_outputs dt.n_classes_ = t.n_classes[0] dt.classes_ = np.array([x for x in range(dt.n_classes_)]) dt.tree_ = t return dt if len(trees) > 1: clf = RandomForestClassifier(random_state=0, n_estimators=len(trees)) clf.estimators_ = [build_decision_tree(t) for t in trees] clf.n_features_ = trees[0].n_features clf.n_outputs_ = trees[0].n_outputs clf.n_classes_ = trees[0].n_classes[0] clf.classes_ = np.array([x for x in range(clf.n_classes_)]) else: clf = build_decision_tree(trees[0]) return clf
import pandas as pd from sklearn.ensemble import RandomForestClassifier import sys sys.path.append('..') import data_loader from data_loader import * classifier = RandomForestClassifier( n_estimators=20, min_samples_leaf=200 ) classifier.n_classes_ = 2 classifier.fit(train_data, label_data.ravel()) prediction = classifier.predict(test) frame = pd.DataFrame(data=prediction) frame.index += 1 frame.to_csv( "random_forest.csv", index=True, header=['Solution'], index_label='Id') if __name__ == "__main__": pass
X[i,:]=image #images[i,:,:] = image #print(X) #print(y) clf = RandomForestClassifier(n_estimators=500, max_depth=10, random_state=0) clf.n_classes_=[0,1,2,3,4,5,6,7,8,9] #clf.n_classes_=[0,1,2,3,4,5,6,7,8,9] clf.fit(X[:n_samples //2], Y[:n_samples // 2]) predicted = clf.predict(X[n_samples//2:]) expected = Y[n_samples//2:] print("Classification report for classifier %s:\n%s\n" % (clf, metrics.classification_report(expected, predicted))) print("Confusion matrix:\n%s" % metrics.confusion_matrix(expected, predicted)) images_and_predictions = list(zip(images[n_samples // 2:], predicted)) for index, (image, prediction) in enumerate(images_and_predictions[:25]): plt.subplot(5, 5, index+1) plt.axis('off') plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')
index = 0 for tweet in tweets: words = tweet.split(' ') for word in words: tweetX[index][bagOfWords[word]] += 1 index += 1 #75-25 Train-Test split results X_train, X_test, y_train, y_test = train_test_split(tweetX, labels, test_size=0.25, random_state=42) rf = RandomForestClassifier(n_estimators=50) rf.classes_ = [0, 1, -1] rf.n_classes_ = 3 rf.fit(X_train, y_train) a = rf.predict(X_test) print 'Train-Test Split Results' print np.sum(a == y_test) * 100.0 / len(y_test) #Cross Validation Results ''' rf = RandomForestClassifier(n_estimators=50) rf.classes_ = [0,1,-1] rf.n_classes_ = 3 rf.fit(tweetX,labels)
RF = RandomForestClassifier(n_estimators=rfSize) RF.fit(train_x, train_y) RF_path = model_path + '/RF.m' joblib.dump(RF, RF_path) # BRAF rf3 = RandomForestClassifier(n_estimators=rf2_size) rf3.fit(training_c_x, training_c_y) rf3_path = model_path + '/rf3.m' joblib.dump(rf3, rf3_path) RF1 = RandomForestClassifier(n_estimators=rfSize) Gobaltree = rf1.estimators_ + rf3.estimators_ RF1.estimators_ = Gobaltree RF1.classes_ = rf1.classes_ RF1.n_classes_ = rf1.n_classes_ RF1.n_outputs_ = rf1.n_outputs_ RF1_path = model_path + '/braf.m' joblib.dump(RF1, RF1_path) # DBRF RF2 = RandomForestClassifier(n_estimators=rfSize) mod_Gobaltree = rf1.estimators_ + rf2.estimators_ RF2.estimators_ = mod_Gobaltree RF2.classes_ = rf2.classes_ RF2.n_classes_ = rf2.n_classes_ RF2.n_outputs_ = rf2.n_outputs_ RF2_path = model_path + '/borderlindbscan.m' joblib.dump(RF2, RF2_path) from sklearn import metrics
def Train1(X, y): rfc = RandomForestClassifier(n_estimators=10, oob_score=True) rfc.n_classes_ = 3 model = rfc.fit(X, y) return model
def Train_Kfold(X, y, K): #y = np.array(y) kf = KFold(X.shape[0], n_folds = K) record = {} k = 0 for train_index, test_index in kf: k = k + 1 rfc = RandomForestClassifier(n_estimators=5, oob_score=True) rfc.n_classes_ = 3 model = rfc.fit(X[train_index], y[train_index]) pred = model.predict(X[test_index]) count = 0 #AB: predicted as "1" while really is "2" AA, AB, AC, BA, BB, BC, CA, CB, CC = 0,0,0,0,0,0,0,0,0 TA, TB, TC, PA, PB, PC = 0,0,0,0,0,0 for i in range(len(pred)): if pred[i] == '1' and y[test_index][i] == '1': AA = AA + 1 PA = PA + 1 TA = TA + 1 if pred[i] == '1' and y[test_index][i] == '2': AB = AB + 1 PA = PA + 1 TB = TB + 1 if pred[i] == '1' and y[test_index][i] == '3': AC = AC + 1 PA = PA + 1 TC = TC + 1 if pred[i] == '2' and y[test_index][i] == '1': BA = BA + 1 PB = PB + 1 TA = TA + 1 if pred[i] == '2' and y[test_index][i] == '2': BB = BB + 1 PB = PB + 1 TB = TB + 1 if pred[i] == '2' and y[test_index][i] == '3': BC = BC + 1 PB = PB + 1 TC = TC + 1 if pred[i] == '3' and y[test_index][i] == '1': CA = CA + 1 PC = PC + 1 TA = TA + 1 if pred[i] == '3' and y[test_index][i] == '2': CB = CB + 1 PC = PC + 1 TB = TB + 1 if pred[i] == '3' and y[test_index][i] == '3': CC = CC + 1 PC = PC + 1 TC = TC + 1 if pred[i] != y[test_index][i]: count = count + 1 record[str(k)] = [count, AA, AB, AC, BA, BB, BC, CA, CB, CC, TA, TB, TC, PA, PB, PC, len(pred)] err, Aerr, Berr, Cerr = 0, 0, 0, 0 for key in record: Aerr = Aerr + (record[key][2]+record[key][3])/float(record[key][13]) Berr = Berr + (record[key][4]+record[key][5])/float(record[key][14]) #Cerr = Cerr + (record[key][7]+record[key][8])/float(record[key][15]) err = err + record[key][0]/float(record[key][16]) err = err/float(K) Aerr = err/float(K) Berr = err/float(K) #Cerr = err/float(K) #err = float(count)/K #AA, AB, AC, BA, BB, BC, CA, CB, CC = float(AA)/K, float(AB)/K, float(AC)/K, float(BA)/K, float(BB)/K, float(BC)/K, float(CA)/K, float(CB)/K, float(CC)/K return (err, Aerr, Berr)
local_dict = sorted(local_dict) ############################# Part 3: Linearity ########################## # random forest 1 rf1 = RandomForestClassifier(random_state=10) rf1.fit(train_vectors, y_train) # random forest 2 rf2 = RandomForestClassifier(random_state=15) rf2.fit(train_vectors, y_train) # random forest 3 rf3 = RandomForestClassifier(random_state=22) rf3.estimators_ = rf1.estimators_ + rf2.estimators_ rf3.n_classes_ = rf1.n_classes_ # model 1 def model_rf1(data): n_data = len(data) res = np.zeros((n_data, 2)) tfidf = vectorizer.transform(data) p = rf1.predict_proba(tfidf) res[:, 0] = p[:, 1] res[:, 1] = p[:, 1] return res # model 2 def model_rf2(data): n_data = len(data) res = np.zeros((n_data, 2))