Python load_data_set Examples, extl.dataset.load_data_set Python Examples

Example #1

0

Show file

def baseline_experiments_synthetic():
    results = dict()
    data = load_data_set('synthetic')

    # base line
    clf = get_classifier('logistic')
    clf.fit(data.XS, data.yS)
    yP = clf.predict(data.XT)
    score = accuracy_score(yP, data.yT)
    results['logistic'] = score

    # transfer learning models.
    classifiers = ['tca', 'suba', 'rba', 'flda', 'tcpr']
    for classifier in classifiers:
        clf = get_classifier(classifier)
        clf.fit(data.XS, data.yS, data.XT)
        yP = clf.predict(data.XT)
        score = accuracy_score(yP, data.yT)
        results[classifier] = score

    return results

Example #2

0

Show file

from extl.influence.influence import DataSet
from extl.models.suba import SubspaceAlignedClassifier

# Set up
domains = ['mnist', 'usps']
feat_type = 'surf'

res_all = []
for d in itertools.permutations(domains, 2):
  source_domain = d[0]
  target_domain = d[1]

  print('***************************************************')
  print(' Source: {} and Target: {}'.format(source_domain, target_domain))

  dataset = load_data_set('mnist', source=source_domain, target=target_domain, feat_type=feat_type)

  XS = dataset.XS.reshape(-1, 784)
  XT = dataset.XT.reshape(-1, 784)

  YS = dataset.yS
  YT = dataset.yT

  print(np.unique(YS), np.unique(YT))

  # TODO: normalize the images.
  print('Shape of the tdata (S and T)', XS.shape, XT.shape)

  min_value = min(min(XS.shape), min(XT.shape))
  print('Min value {}'.format(min_value))

Example #3

0

Show file

def baseline_experiments(data_set, source=None, target=None):

    data = load_data_set(name=data_set, source=source, target=target)

    XS = data.XS
    XT = data.XT

    m = XS.shape[0]

    X = np.vstack([XS, XT])
    X = X / np.max(X)

    XS = X[:m, :]
    XT = X[m:, :]

    random_state = 0

    nTL = 200

    pos_inds = np.where(data.yT > 0)[0]
    neg_inds = np.where(data.yT < 0)[0]

    np.random.seed(random_state)
    np.random.shuffle(pos_inds)

    np.random.seed(random_state)
    np.random.shuffle(neg_inds)

    pos_inds_l = pos_inds[:nTL]
    neg_inds_l = neg_inds[:nTL]

    pos_inds_u = pos_inds[nTL:]
    neg_inds_u = neg_inds[nTL:]

    inds_l = np.hstack([pos_inds_l, neg_inds_l])
    inds_u = np.hstack([pos_inds_u, neg_inds_u])

    _XS = np.vstack([XS, XT[inds_l, :]])
    _XT = XT[inds_u, :]

    _yS = np.hstack([data.yS, data.yT[inds_l]])
    _yT = data.yT[inds_u]

    print(_XS.shape, _XT.shape, _yS.shape, _yT.shape)

    # XS = XS/np.max(XS)
    # XT = XT/np.max(XT)

    # print(XS.sum(axis=1),XS.sum(axis=1).shape)
    # print(XS.sum(axis=0), XS.sum(axis=0).shape)

    # X_transform = TruncatedSVD(n_components=1000).fit_transform(X)

    yS = _yS
    yT = _yT
    XS = _XS
    XT = _XT

    #-------------------------------------------
    results = []

    clf = get_classifier('logistic')
    clf.fit(XS, yS)
    yp = clf.predict(XT)
    results.append(accuracy_score(yp, yT))

    clf = get_classifier('svm')
    clf.fit(XS, yS)
    yp = clf.predict(XT)
    results.append(accuracy_score(yp, yT))

    from extl.models.suba import SubspaceAlignedClassifier
    clf = SubspaceAlignedClassifier(num_components=1500,
                                    loss='logistic',
                                    l2=10)
    clf.fit(XS, yS, XT)
    yp = clf.predict(XT)
    results.append(accuracy_score(yp, yT))

    from extl.models.iw import ImportanceWeightedClassifier
    iwe = ['lr', 'nn', 'kmm']
    for _iwe in iwe:
        clf = ImportanceWeightedClassifier(iwe=_iwe, loss='logistic')
        clf.fit(XS, yS, XT)
        yp = clf.predict(XT)
        results.append(accuracy_score(yp, yT))

    for _iwe in iwe:
        clf = ImportanceWeightedClassifier(iwe=_iwe, loss='hinge')
        clf.fit(XS, yS, XT)
        yp = clf.predict(XT)
        results.append(accuracy_score(yp, yT))

    print(results)
    return results

Example #4

0

Show file

all_results = []
all_data = []

domains = ['books', 'kitchen', 'electronics', 'dvd']

res = []
for d in itertools.permutations(domains, 2):

    source_domain = d[0]
    target_domain = d[1]

    print('***************************************************')
    print(' Source: {} and Target: {}'.format(source_domain, target_domain))

    data = load_data_set('multi-domain-sentiment', source=d[0], target=d[1])
    XS = data.XS
    XT = data.XT

    YS = data.yS
    YT = data.yT

    YS = YS * 0.5 + 0.5
    YT = YT * 0.5 + 0.5

    random_state = 0

    nTL = 50

    pos_inds = np.where(YT > 0.5)[0]
    neg_inds = np.where(YT < 0.5)[0]

Example #5

0

Show file

from extl.models.suba import SubspaceAlignedClassifier

# Set up
domains = ['amazon', 'caltech10', 'dslr', 'webcam']
feat_type = 'surf'

res_all = []
for d in itertools.permutations(domains, 2):
    source_domain = d[0]
    target_domain = d[1]

    print('***************************************************')
    print(' Source: {} and Target: {}'.format(source_domain, target_domain))

    dataset = load_data_set('office-caltech',
                            source=source_domain,
                            target=target_domain,
                            feat_type=feat_type)

    XS = dataset.XS
    XT = dataset.XT

    YS = dataset.yS - 1
    YT = dataset.yT - 1

    # TODO: normalize the images.
    print('Shape of the tdata (S and T)', XS.shape, XT.shape)

    min_value = min(min(XS.shape), min(XT.shape))
    print('Min value {}'.format(min_value))

    print(

Example #6

0

Show file

all_results = []
all_data = []

domains = ['amazon', 'imdb', 'yelp']
n_top = 3000

res = []
for d in itertools.permutations(domains, 2):

    source_domain = d[0]
    target_domain = d[1]

    print('***************************************************')
    print(' Source: {} and Target: {}'.format(source_domain, target_domain))

    data = load_data_set('sentiment', source=d[0], target=d[1], n_top=n_top)
    XS = data.XS
    XT = data.XT

    YS = data.yS
    YT = data.yT

    YS = YS * 0.5 + 0.5
    YT = YT * 0.5 + 0.5

    # Compute the influence.
    train = DataSet(XS, YS)
    validation = None
    test = DataSet(XT, YT)
    data_sets = base.Datasets(train=train, validation=validation, test=test)