def rfc_learner(option): accuracy = [] data = pool_reader() true_labels = oracle.read_mat() [row_size, col_size] = data.shape points = np.empty([0, col_size]) labels = [] used = set() flag = True predictions = np.zeros(row_size) for i in xrange(0, 256): if option == 'select': if flag: pick = random.sample(range(row_size), 1)[0] else: # pick = get_next(data, points, used) clf = RandomForestClassifier(n_estimators=10, criterion='entropy') clf.fit(points, np.array(labels)) prob = clf.predict_proba(data) weight = np.abs(prob[:, 0] - 0.5) rank = np.argsort(weight) for x in xrange(len(rank)): if rank[x] not in used: pick = rank[x] break else: while 1: pick = random.sample(range(row_size), 1)[0] if pick not in used: break used.add(pick) points = np.vstack([points, data[pick]]) if oracle.oracle1(true_labels, pick) == 1: flag = False labels.append(oracle.oracle1(true_labels, pick)) clf = RandomForestClassifier(n_estimators=10, criterion='entropy') clf.fit(points, np.array(labels)) predictions = clf.predict(data) cur_acc = err.generalization_error(predictions, true_labels) accuracy.append(cur_acc) plt.plot(accuracy) plt.show() print "f1 ", f1_score(predictions, true_labels) return accuracy
def lrc_learner(option): accuracy = [] data = pool_reader() true_labels = oracle.read_mat() [row_size, col_size] = data.shape predictions = np.zeros(row_size) points = np.empty([0, col_size]) labels = [] used = set() flag = True for i in xrange(0, 256): if option == "select": pick = -1 if flag: while 1: pick = random.sample(range(row_size), 1)[0] if pick not in used: used.add(pick) points = np.vstack([points, data[pick]]) label = oracle.oracle1(true_labels, pick) labels.append(label) if label == 1: flag = False break else: clf = LogisticRegression() clf.fit(points, np.array(labels)) prob = clf.predict_proba(data) weight = np.abs(prob[:, 0] - 0.5) rank = np.argsort(weight) for x in xrange(len(rank)): if rank[x] not in used: pick = rank[x] break used.add(pick) points = np.vstack([points, data[pick]]) label = oracle.oracle1(true_labels, pick) labels.append(label) clf.fit(points, np.array(labels)) predictions = clf.predict(data) cur_acc = err.generalization_error(predictions, true_labels) accuracy.append(cur_acc) else: while 1: pick = random.sample(range(row_size), 1)[0] if pick not in used: break used.add(pick) points = np.vstack([points, data[pick]]) label = oracle.oracle1(true_labels, pick) labels.append(label) if label == 1: flag = False if not flag: clf = LogisticRegression() clf.fit(points, np.array(labels)) predictions = clf.predict(data) cur_acc = err.generalization_error(predictions, true_labels) accuracy.append(cur_acc) plt.plot(accuracy) plt.show() print "f1 ", f1_score(predictions, true_labels) return accuracy