Ejemplo n.º 1
0
optimize = True

mds_home= '../datasets/MDS'
dataset_home='../datasets/Webis-CLS-10'

nfolds=5
outfile = './DCI.varpivot.dat'
if exists(outfile):
    rperf = Result.load(outfile, False)
else:
    rperf = Result(['dataset', 'task', 'method', 'fold', 'npivots', 'acc', 'dci_time', 'svm_time'])


pivot_range = [10,25,50,100,250,500,1000,1500,2000,2500,5000]

for source, target, fold, taskname in MDS_task_generator(abspath(mds_home), nfolds=nfolds):
    s_pivots, t_pivots = pivot_selection(max(pivot_range), source.X, source.y, source.U, target.U,
                                         source.V, target.V,
                                         phi=1, cross=True)

    for npivots in pivot_range:
        for dcf in ['cosine','linear']:
            dci = DCI(dcf=dcf, unify=False, post='normal')
            acc, dci_time, svm_time, _ = DCIclassify(source, target, s_pivots[:npivots], t_pivots[:npivots], dci, optimize=True)
            rperf.add(dataset='MDS', task=taskname, method=str(dci), fold=fold, npivots=npivots, acc=acc, dci_time=dci_time, svm_time=svm_time)
            rperf.pivot(index=['dataset', 'task','npivots'], values=['acc', 'dci_time', 'svm_time'])
            rperf.dump(outfile)

for source, target, oracle, taskname in WebisCLS10_task_generator(abspath(dataset_home)):
    s_pivots, t_pivots = pivot_selection(max(pivot_range), source.X, source.y, source.U, target.U,
                                         source.V, target.V,
Ejemplo n.º 2
0

dataset_home='../datasets/MDS'
svmlight_home='../../svm_light'

nfolds=5

results = Result(['dataset', 'task', 'method', 'acc'])
for domain in UpperMDS_task_generator(abspath(dataset_home)):
    isvm = SVMlight(svmlightbase=svmlight_home, verbose=0, transduction=None)
    score = cross_val_score(isvm, domain.X, domain.y, cv=nfolds).mean()
    results.add(dataset='MDS', task=domain.domain, method='UPPER', acc=score)
results.pivot(grand_totals=True)

results = Result(['dataset', 'task', 'method', 'fold', 'acc'])
for source, target, fold, task in MDS_task_generator(abspath(dataset_home), nfolds=nfolds):
    source_name = source.domain
    target_name = target.domain
    source, target = unify_feat_space(source, target)

    isvm = SVMlight(svmlightbase=svmlight_home, verbose=0, transduction=None).fit(source.X, source.y)
    tsvm = SVMlight(svmlightbase=svmlight_home, verbose=0, transduction=target.X).fit(source.X, source.y)

    yte_ = isvm.predict(target.X)
    tyte_ = tsvm.transduced_labels

    iacc = (yte_ == target.y).mean()
    tacc = (tyte_ == target.y).mean()

    results.add(dataset='MDS', task=task, method='ISVM', fold=fold, acc=iacc)
    results.add(dataset='MDS', task=task, method='TSVM', fold=fold, acc=tacc)