def runTests(self, data, sizes, iterations, graph=False): """ Parameters ---------- data : dataset data to be sampled from for testing sizes : array array containing tuples of (start size, step size, end size, random_size (optional), outlier_size (optional)) to be used for testing. iterations : int Number of times to run each test defined by sizes array """ results = {} for size in tqdm(sizes): size_F1s = [[] for _ in self.models] for iteration in tqdm(range(iterations)): (train, test) = data.test_train_split(train_percent=.9) r_size = 0 if len(size) < 4 else size[3] o_size = 0 if len(size) < 4 else size[4] # train and test models for i in range(len(self.models)): model = Model(type=self.models[i][0], sample=self.models[i][1], name=(self.models[i][2] + str(iteration))) if model.sample == 'Active': model.activeLearn(train.get_x(), train.get_y(), start_size=size[0], end_size=size[2], step_size=size[1], random_size=r_size, outlier_size=o_size) else: rand_train = train.random_sample(size=size[2]) model.fit(rand_train.get_x(), rand_train.get_y()) size_F1s[i].append( model.test_metric(test.get_x(), test.get_y(), f1=True)) if model.type == 'NN': files = glob.glob('NN/*') for f in files: os.remove(f) results[size[2]] = size_F1s self.currentResults = results return results
mnist_pca = pickle.load(open("../data/pickled/mnist_data_pca50.p", "rb")) mnist_pca_sample = mnist_pca.random_sample(percent=.5) #24 instances #randSVMF1s = [] activeSVMF1s = [] for _ in range(5): #getting test data to use for both models (train_pca, test_pca) = mnist_pca.test_train_split(train_percent=.8) #make active model active_SVM = Model('SVM', sample='Active') active_SVM.activeLearn(train_pca.get_x(), train_pca.get_y(), start_size=10, end_size=150, step_size=10, random_size=2, outlier_size=1) activeSVMF1s.append( active_SVM.test_metric(test_pca.get_x(), test_pca.get_y(), f1=True)) #pickle.dump(randSVMF1s, open("randSVMF1s.p", "wb" )) pickle.dump(activeSVMF1s, open("activeSVMDistF1s150.p", "wb")) print(datetime.datetime.now()) print(activeSVMF1s)
randRFF1s = [] activeRFF1s = [] for _ in range(1000): #getting test data to use for both models (train_pca, test_pca) = mnist_pca.test_train_split(train_percent=.8) #make random train data and model rand_train_PCA = train_pca.random_sample(size=250) rand_RF = Model('RF') rand_RF.fit(rand_train_PCA.get_x(), rand_train_PCA.get_y()) randRFF1s.append( rand_RF.test_metric(test_pca.get_x(), test_pca.get_y(), f1=True)) #make active model active_RF = Model('RF', sample='Active') active_RF.activeLearn(train_pca.get_x(), train_pca.get_y(), start_size=150, end_size=250, step_size=10) activeRFF1s.append( active_RF.test_metric(test_pca.get_x(), test_pca.get_y(), f1=True)) pickle.dump(randRFF1s, open("randRFF1s.p", "wb")) pickle.dump(activeRFF1s, open("activeRFF1s.p", "wb")) print(datetime.datetime.now())
rand_SVM = Model('SVM') rand_SVM.fit(rand_train_PCA.get_x(), rand_train_PCA.get_y()) randSVMF1s.append( rand_SVM.test_metric(test_pca.get_x(), test_pca.get_y(), f1=True, avg='weighted')) randSVMRaw.append(rand_SVM.predict(test_pca.get_x())) #make active model for step size 5, 10, 15 for stepSize in [5, 10, 15]: active_SVM = Model('SVM', sample='Active') active_SVM.activeLearn(train_pca.get_x(), train_pca.get_y(), start_size=startSize, end_size=sampleSize, step_size=stepSize) activeSVMF1s[stepSize].append( active_SVM.test_metric(test_pca.get_x(), test_pca.get_y(), f1=True, avg='weighted')) activeSVMRaw[stepSize].append(active_SVM.predict(test_pca.get_x())) pickle.dump(TESTDATA, open(testfnam, "wb")) pickle.dump(RANDTRAIN, open(randtrainfnam, "wb")) pickle.dump(randSVMF1s, open(rSVMF1fname, "wb")) pickle.dump(randSVMRaw, open(rSVMRawfname, "wb")) pickle.dump(activeSVMF1s, open(aSVMF1fname, "wb"))