import matplotlib.pyplot as plt from sklearn import preprocessing from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score from sklearn.model_selection import StratifiedKFold from sklearn.metrics import roc_curve, auc from dimension_reduction import elasticNet import utils.tools as utils from gcforest.gcforest import GCForest from gcforest.utils.config_utils import load_json start = time.time() path1 = 'gcforest4.json' config = load_json(path1) gc = GCForest(config) mask_data = sio.loadmat('yeast_elastic_mask_scale_0.03_0.1.mat') mask = mask_data.get('yeast_elastic_mask') extraction = sio.loadmat('yeast_feature_end.mat') proteinA = extraction.get('feature_A') protein_A = np.array(proteinA) proteinB = extraction.get('feature_B') protein_B = np.array(proteinB) X_ = np.concatenate((protein_A, protein_B), axis=1) X_ = np.array(X_) [row, column] = np.shape(X_) label_P = np.ones(int(row / 2)) label_N = np.zeros(int(row / 2)) label_ = np.hstack((label_P, label_N))
parser = argparse.ArgumentParser() parser.add_argument("--model", dest="model", type=str, default=None, help="gcfoest Net Model File") args = parser.parse_args() return args if __name__ == "__main__": # config args = parse_args() if args.model == 'ca': config = load_json('./mnist-ca.json') elif args.model == 'gc': config = load_json('./mnist-gc.json') else: config = load_json('./mnist-gc.json') gc = GCForest(config) # gc.set_keep_model_in_mem(False) gc.set_keep_model_in_mem(True) # data data_num_train = 60000 # The number of figures data_num_test = 10000 # test num fig_w = 45 # width of each figure
"n_jobs": -1 }) ca_config["estimators"].append({ "n_folds": 5, "type": "LogisticRegression" }) config["cascade"] = ca_config return config if __name__ == "__main__": args = parse_args() if args.model is None: config = get_toy_config() else: config = load_json(args.model) gc = GCForest(config) # If the model you use cost too much memory for you. # You can use these methods to force gcforest not keeping model in memory gc.set_keep_model_in_mem(False) # default is TRUE. (X_train, y_train), (X_test, y_test) = mnist.load_data() # X_train, y_train = X_train[:2000], y_train[:2000] X_train = X_train[:, np.newaxis, :, :] X_test = X_test[:, np.newaxis, :, :] X_train_enc = gc.fit_transform(X_train, y_train) # X_enc is the concatenated predict_proba result of each estimators of the last layer of the GCForest model # X_enc.shape = # (n_datas, n_estimators * n_classes): If cascade is provided
training = np.loadtxt(open("/home/jluo80/Desktop/training_1.csv","rb"),delimiter=",",skiprows=1) testing = np.loadtxt(open("/home/jluo80/Desktop/testing_1.csv","rb"),delimiter=",",skiprows=1) # X_train = training[:, :-1] # y_train = training[:, -1] # X_test = testing[:, :-1] # y_test = testing[:, -1] return (training[:, :-1], training[:, -1]), (testing[:, :-1], testing[:, -1]) if __name__ == "__main__": args = parse_args() if args.model is None: config = get_toy_config() else: config = load_json(args.model) # type(config): <class 'dict'> gc = GCForest(config) # If the model you use cost too much memory for you. # You can use these methods to force gcforest not keeping model in memory # gc.set_keep_model_in_mem(False), default is TRUE. (X_train, y_train), (X_test, y_test) = claim_data_load() # X_train, y_train = X_train[:2000], y_train[:2000] X_train = X_train[:,np.newaxis,:,np.newaxis] X_test = X_test[:,np.newaxis,:,np.newaxis] X_train_enc = gc.fit_transform(X_train, y_train) # X_enc is the concatenated predict_proba result of each estimators of the last layer of the GCForest model # X_enc.shape =
def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("--model", dest="model", type=str, default=None, help="gcfoest Net Model File") args = parser.parse_args() return args if __name__ == "__main__": args = parse_args() config = load_json('IP.json') gc = GCForest(config) # If the model you use cost too much memory for you. # You can use these methods to force gcforest not keeping model in memory # gc.set_keep_model_in_mem(False), default is TRUE. f = h5py.File('IP28-28-27.h5', 'r') train_images = f['data'][:] train_labels = f['label'][:] f.close() #f=open('paviaU.data','rb') #train_images=pickle.load(f) #train_labels=pickle.load(f) #f.close() train_labels = np.argmax(train_labels, 1)
def model_gcforest(config_file): config = load_json(config_file) model = gc = GCForest(config) return model
def _make_clf(self, args): config = load_json(str(args.config)) return gcforest.GCForest(config)
ca_config["estimators"].append( {"n_folds": 3, "type": "ExtraTreesClassifier", "n_estimators": 500, "max_depth": None, "n_jobs": -1,"max_features":1}) ca_config["estimators"].append( {"n_folds": 3, "type": "ExtraTreesClassifier", "n_estimators": 500, "max_depth": None, "n_jobs": -1,"max_features":1}) config["cascade"] = ca_config return config if __name__ == "__main__": # args = parse_args() # if args.model is None: # config = get_toy_config() # else: # config = load_json(args.model) config1 = load_json("/home/qiang/repo/python/experiment-gcForest/cascade_clf/examples/demo_ca.json") # If the model you use cost too much memory for you. # You can use these methods to force gcforest not keeping model in memory # gc.set_keep_model_in_mem(False), default is TRUE. config2 = get_toy_config() acc_st = [] acc_gc = [] acc_rf = [] for i in range(10): (X_train, y_train), (X_test, y_test) = uci_yeast.load_data() gc1 = GCForest(config1) gc1.fit_transform(X_train, y_train) y_pred = gc1.predict(X_test) acc = accuracy_score(y_test, y_pred)