def run_test(sampler_type, X_train, y_train, X_test, y_test): # Samplers sampler = None if sampler_type == 'rs': sampler = RandomSampler(X_train, y_train, X_unlabeled, y_unlabeled) elif sampler_type == 'ms': sampler = MarginSampler(X_train, y_train, X_unlabeled, y_unlabeled) elif sampler_type == 'hs': sampler = HierarchicalSampler(X_train, y_train, X_unlabeled, y_unlabeled) else: raise ValueError print("Finish constructing sampler class " + sampler_type) errors = [] X_train, y_train = sampler.X_train, sampler.y_train i = 0 while i < max_unlabeled_size: x_samples, y_samples = np.empty((batch_size, X_train.shape[1]), float), np.empty((batch_size, ), int) for b in range(batch_size): x_sample, y_sample = sampler.sample() # hack if sampler_type != 'hs': x_sample = x_sample.toarray() x_samples[b] = x_sample y_samples[b] = y_sample X_train = vstack([X_train, x_samples]) y_train = np.append(y_train, y_samples) model = LogisticRegression(multi_class="multinomial", solver="lbfgs", max_iter=200) model.fit(X_train, y_train) #y_pred = model.predict(X_test) error = 1 - model.score(X_test, y_test) print(sampler_type + ' number of labels: ' + str(training_size + i) + ' error=' + str(error)) errors.append(error) i += batch_size output.put((sampler_type, errors))
X_test = test_dataset.data X_test = vstack([X_test, X_train_base[2000:, :]]).toarray() y_test = test_dataset.target y_test = np.append(y_test, y_train_base[2000:]) X_train_base = X_train_base[:2000, :] y_train_base = y_train_base[:2000] X_train, y_train = X_train_base[: training_size], y_train_base[: training_size] X_unlabeled, y_unlabeled = X_train_base[training_size:], y_train_base[ training_size:] rs = RandomSampler(X_train, y_train, X_unlabeled, y_unlabeled) ms = MarginSampler(X_train, y_train, X_unlabeled, y_unlabeled) hs = HierarchicalSampler(X_train, y_train, X_unlabeled, y_unlabeled) x_train_random = X_train y_train_random = y_train x_train_margin = X_train y_train_margin = y_train x_train_Hierarchical = X_train y_train_Hierarchical = y_train print( 'Successfully loaded the Newsgroups dataset into train and test set.') for num_samples in range(max_unlabeled_size): #Add data, random x_sample, y_sample = rs.sample()