def baseline_experiments_synthetic(): results = dict() data = load_data_set('synthetic') # base line clf = get_classifier('logistic') clf.fit(data.XS, data.yS) yP = clf.predict(data.XT) score = accuracy_score(yP, data.yT) results['logistic'] = score # transfer learning models. classifiers = ['tca', 'suba', 'rba', 'flda', 'tcpr'] for classifier in classifiers: clf = get_classifier(classifier) clf.fit(data.XS, data.yS, data.XT) yP = clf.predict(data.XT) score = accuracy_score(yP, data.yT) results[classifier] = score return results
from extl.influence.influence import DataSet from extl.models.suba import SubspaceAlignedClassifier # Set up domains = ['mnist', 'usps'] feat_type = 'surf' res_all = [] for d in itertools.permutations(domains, 2): source_domain = d[0] target_domain = d[1] print('***************************************************') print(' Source: {} and Target: {}'.format(source_domain, target_domain)) dataset = load_data_set('mnist', source=source_domain, target=target_domain, feat_type=feat_type) XS = dataset.XS.reshape(-1, 784) XT = dataset.XT.reshape(-1, 784) YS = dataset.yS YT = dataset.yT print(np.unique(YS), np.unique(YT)) # TODO: normalize the images. print('Shape of the tdata (S and T)', XS.shape, XT.shape) min_value = min(min(XS.shape), min(XT.shape)) print('Min value {}'.format(min_value))
def baseline_experiments(data_set, source=None, target=None): data = load_data_set(name=data_set, source=source, target=target) XS = data.XS XT = data.XT m = XS.shape[0] X = np.vstack([XS, XT]) X = X / np.max(X) XS = X[:m, :] XT = X[m:, :] random_state = 0 nTL = 200 pos_inds = np.where(data.yT > 0)[0] neg_inds = np.where(data.yT < 0)[0] np.random.seed(random_state) np.random.shuffle(pos_inds) np.random.seed(random_state) np.random.shuffle(neg_inds) pos_inds_l = pos_inds[:nTL] neg_inds_l = neg_inds[:nTL] pos_inds_u = pos_inds[nTL:] neg_inds_u = neg_inds[nTL:] inds_l = np.hstack([pos_inds_l, neg_inds_l]) inds_u = np.hstack([pos_inds_u, neg_inds_u]) _XS = np.vstack([XS, XT[inds_l, :]]) _XT = XT[inds_u, :] _yS = np.hstack([data.yS, data.yT[inds_l]]) _yT = data.yT[inds_u] print(_XS.shape, _XT.shape, _yS.shape, _yT.shape) # XS = XS/np.max(XS) # XT = XT/np.max(XT) # print(XS.sum(axis=1),XS.sum(axis=1).shape) # print(XS.sum(axis=0), XS.sum(axis=0).shape) # X_transform = TruncatedSVD(n_components=1000).fit_transform(X) yS = _yS yT = _yT XS = _XS XT = _XT #------------------------------------------- results = [] clf = get_classifier('logistic') clf.fit(XS, yS) yp = clf.predict(XT) results.append(accuracy_score(yp, yT)) clf = get_classifier('svm') clf.fit(XS, yS) yp = clf.predict(XT) results.append(accuracy_score(yp, yT)) from extl.models.suba import SubspaceAlignedClassifier clf = SubspaceAlignedClassifier(num_components=1500, loss='logistic', l2=10) clf.fit(XS, yS, XT) yp = clf.predict(XT) results.append(accuracy_score(yp, yT)) from extl.models.iw import ImportanceWeightedClassifier iwe = ['lr', 'nn', 'kmm'] for _iwe in iwe: clf = ImportanceWeightedClassifier(iwe=_iwe, loss='logistic') clf.fit(XS, yS, XT) yp = clf.predict(XT) results.append(accuracy_score(yp, yT)) for _iwe in iwe: clf = ImportanceWeightedClassifier(iwe=_iwe, loss='hinge') clf.fit(XS, yS, XT) yp = clf.predict(XT) results.append(accuracy_score(yp, yT)) print(results) return results
all_results = [] all_data = [] domains = ['books', 'kitchen', 'electronics', 'dvd'] res = [] for d in itertools.permutations(domains, 2): source_domain = d[0] target_domain = d[1] print('***************************************************') print(' Source: {} and Target: {}'.format(source_domain, target_domain)) data = load_data_set('multi-domain-sentiment', source=d[0], target=d[1]) XS = data.XS XT = data.XT YS = data.yS YT = data.yT YS = YS * 0.5 + 0.5 YT = YT * 0.5 + 0.5 random_state = 0 nTL = 50 pos_inds = np.where(YT > 0.5)[0] neg_inds = np.where(YT < 0.5)[0]
from extl.models.suba import SubspaceAlignedClassifier # Set up domains = ['amazon', 'caltech10', 'dslr', 'webcam'] feat_type = 'surf' res_all = [] for d in itertools.permutations(domains, 2): source_domain = d[0] target_domain = d[1] print('***************************************************') print(' Source: {} and Target: {}'.format(source_domain, target_domain)) dataset = load_data_set('office-caltech', source=source_domain, target=target_domain, feat_type=feat_type) XS = dataset.XS XT = dataset.XT YS = dataset.yS - 1 YT = dataset.yT - 1 # TODO: normalize the images. print('Shape of the tdata (S and T)', XS.shape, XT.shape) min_value = min(min(XS.shape), min(XT.shape)) print('Min value {}'.format(min_value)) print(
all_results = [] all_data = [] domains = ['amazon', 'imdb', 'yelp'] n_top = 3000 res = [] for d in itertools.permutations(domains, 2): source_domain = d[0] target_domain = d[1] print('***************************************************') print(' Source: {} and Target: {}'.format(source_domain, target_domain)) data = load_data_set('sentiment', source=d[0], target=d[1], n_top=n_top) XS = data.XS XT = data.XT YS = data.yS YT = data.yT YS = YS * 0.5 + 0.5 YT = YT * 0.5 + 0.5 # Compute the influence. train = DataSet(XS, YS) validation = None test = DataSet(XT, YT) data_sets = base.Datasets(train=train, validation=validation, test=test)