def test_multilabel_with_auxiliary_learner_mmr(self): trn_ds = Dataset(self.X, self.y[:5] + [None] * (len(self.y) - 5)) qs = MultilabelWithAuxiliaryLearner(trn_ds, major_learner=BinaryRelevance(LogisticRegression(solver='liblinear', multi_class="ovr")), auxiliary_learner=BinaryRelevance(SVM(gamma="auto")), criterion='mmr', random_state=1126) qseq = run_qs(trn_ds, qs, self.y, self.quota) assert_array_equal(qseq, np.array([1258, 1461, 231, 1198, 1498, 1374, 955, 1367, 265, 144]))
def test_multilabel_with_auxiliary_learner_hlr(self): trn_ds = Dataset(self.X, self.y[:5] + [None] * (len(self.y) - 5)) qs = MultilabelWithAuxiliaryLearner( trn_ds, major_learner=BinaryRelevance(LogisticRegression()), auxiliary_learner=BinaryRelevance(SVM()), criterion='hlr', random_state=1126) qseq = run_qs(trn_ds, qs, self.y, self.quota) assert_array_equal( qseq, np.array([701, 1403, 147, 897, 974, 1266, 870, 703, 292, 1146]))
def test_multilabel_with_auxiliary_learner_shlr(self): trn_ds = Dataset(self.X, self.y[:5] + [None] * (len(self.y) - 5)) qs = MultilabelWithAuxiliaryLearner(trn_ds, major_learner=BinaryRelevance(LogisticRegression(solver='liblinear', multi_class="ovr")), auxiliary_learner=BinaryRelevance(SVM(gamma="auto")), criterion='shlr', b=1., random_state=1126) qseq = run_qs(trn_ds, qs, self.y, self.quota) assert_array_equal(qseq, np.array([1258, 805, 459, 550, 783, 964, 736, 1004, 38, 750]))
def main(): test_size = 0.25 # the percentage of samples in the dataset that will be # randomly selected and assigned to the test set result = {'E1': [], 'E2': [], 'E3': [], 'E4': [], 'E5': [], 'E6': []} for i in range(10): # repeat experiment trn_ds, tst_ds, fully_labeled_trn_ds = split_train_test(test_size) trn_ds2 = copy.deepcopy(trn_ds) trn_ds3 = copy.deepcopy(trn_ds) trn_ds4 = copy.deepcopy(trn_ds) trn_ds5 = copy.deepcopy(trn_ds) trn_ds6 = copy.deepcopy(trn_ds) lbr = IdealLabeler(fully_labeled_trn_ds) model = BinaryRelevance(LogisticRegression()) quota = 150 # number of samples to query qs = MMC(trn_ds, br_base=LogisticRegression()) _, E_out_1 = run(trn_ds, tst_ds, lbr, model, qs, quota) result['E1'].append(E_out_1) qs2 = RandomSampling(trn_ds2) _, E_out_2 = run(trn_ds2, tst_ds, lbr, model, qs2, quota) result['E2'].append(E_out_2) qs3 = MultilabelWithAuxiliaryLearner(trn_ds3, BinaryRelevance( LogisticRegression()), BinaryRelevance(SVM()), criterion='hlr') _, E_out_3 = run(trn_ds3, tst_ds, lbr, model, qs3, quota) result['E3'].append(E_out_3) qs4 = MultilabelWithAuxiliaryLearner(trn_ds4, BinaryRelevance( LogisticRegression()), BinaryRelevance(SVM()), criterion='shlr') _, E_out_4 = run(trn_ds4, tst_ds, lbr, model, qs4, quota) result['E4'].append(E_out_4) qs5 = MultilabelWithAuxiliaryLearner(trn_ds5, BinaryRelevance( LogisticRegression()), BinaryRelevance(SVM()), criterion='mmr') _, E_out_5 = run(trn_ds5, tst_ds, lbr, model, qs5, quota) result['E5'].append(E_out_5) qs6 = BinaryMinimization(trn_ds6, LogisticRegression()) _, E_out_6 = run(trn_ds6, tst_ds, lbr, model, qs6, quota) result['E6'].append(E_out_6) E_out_1 = np.mean(result['E1'], axis=0) E_out_2 = np.mean(result['E2'], axis=0) E_out_3 = np.mean(result['E3'], axis=0) E_out_4 = np.mean(result['E4'], axis=0) E_out_5 = np.mean(result['E5'], axis=0) E_out_6 = np.mean(result['E6'], axis=0) print("MMC: ", E_out_1[::5].tolist()) print("Random: ", E_out_2[::5].tolist()) print("MultilabelWithAuxiliaryLearner_hlr: ", E_out_3[::5].tolist()) print("MultilabelWithAuxiliaryLearner_shlr: ", E_out_4[::5].tolist()) print("MultilabelWithAuxiliaryLearner_mmr: ", E_out_5[::5].tolist()) print("BinaryMinimization: ", E_out_6[::5].tolist()) query_num = np.arange(1, quota + 1) fig = plt.figure(figsize=(9, 6)) ax = plt.subplot(111) ax.plot(query_num, E_out_1, 'g', label='MMC') ax.plot(query_num, E_out_2, 'k', label='Random') ax.plot(query_num, E_out_3, 'r', label='AuxiliaryLearner_hlr') ax.plot(query_num, E_out_4, 'b', label='AuxiliaryLearner_shlr') ax.plot(query_num, E_out_5, 'c', label='AuxiliaryLearner_mmr') ax.plot(query_num, E_out_6, 'm', label='BinaryMinimization') box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.75, box.height]) plt.legend(loc=2, bbox_to_anchor=(1.05, 1), borderaxespad=0.) plt.xlabel('Number of Queries') plt.ylabel('Loss') plt.title('Experiment Result (Hamming Loss)') plt.show()
''' MAIN FUNCTION ''' result = {'Hamming': [],'F1': []} model = BinaryRelevance(LogisticRegression()) quota = 20 # number of samples to query #EXECUTE FROM HERE FOR ITERATIONS qs1 = MultilabelWithAuxiliaryLearner( trn_ds, BinaryRelevance(LogisticRegression()), BinaryRelevance(SVM()), criterion='hlr') run(data_CV_train,trn_ds, qs1, quota) model.train(trn_ds) X , y = zip(*tst_ds.get_labeled_entries()) pred = model.predict(X) output = pd.DataFrame() output['UE_pred'] = [pred[i][0] for i in range(len(pred))] output['BR_pred'] = [pred[i][1] for i in range(len(pred))] output['FR_pred'] = [pred[i][2] for i in range(len(pred))]