Ejemplo n.º 1
0
def pivot_selection_timed(*args, **kwargs):
    print('Pivot selection')
    tinit = time()
    s_pivots, t_pivots = pivot_selection(*args, **kwargs)
    pivot_time = time() - tinit
    print(f'\t[pivot selection took {pivot_time:.3f} seconds]')
    return s_pivots, t_pivots, pivot_time
Ejemplo n.º 2
0
mds_home= '../datasets/MDS'
dataset_home='../datasets/Webis-CLS-10'

nfolds=5
outfile = './DCI.varpivot.dat'
if exists(outfile):
    rperf = Result.load(outfile, False)
else:
    rperf = Result(['dataset', 'task', 'method', 'fold', 'npivots', 'acc', 'dci_time', 'svm_time'])


pivot_range = [10,25,50,100,250,500,1000,1500,2000,2500,5000]

for source, target, fold, taskname in MDS_task_generator(abspath(mds_home), nfolds=nfolds):
    s_pivots, t_pivots = pivot_selection(max(pivot_range), source.X, source.y, source.U, target.U,
                                         source.V, target.V,
                                         phi=1, cross=True)

    for npivots in pivot_range:
        for dcf in ['cosine','linear']:
            dci = DCI(dcf=dcf, unify=False, post='normal')
            acc, dci_time, svm_time, _ = DCIclassify(source, target, s_pivots[:npivots], t_pivots[:npivots], dci, optimize=True)
            rperf.add(dataset='MDS', task=taskname, method=str(dci), fold=fold, npivots=npivots, acc=acc, dci_time=dci_time, svm_time=svm_time)
            rperf.pivot(index=['dataset', 'task','npivots'], values=['acc', 'dci_time', 'svm_time'])
            rperf.dump(outfile)

for source, target, oracle, taskname in WebisCLS10_task_generator(abspath(dataset_home)):
    s_pivots, t_pivots = pivot_selection(max(pivot_range), source.X, source.y, source.U, target.U,
                                         source.V, target.V,
                                         oracle=oracle, phi=30, cross=False)
    for npivots in pivot_range:
dataset_home = '../datasets/Webis-CLS-10'

rperf = Result([
    'dataset', 'task', 'method', 'acc', 'pivot_t', 'dci_t', 'svm_t', 'test_t'
])
for source, target, oracle, taskname in WebisCLS10_crossdomain_crosslingual_task_generator(
        os.path.abspath(dataset_home)):

    # pivot selection
    tinit = time()
    s_pivots, t_pivots = pivot_selection(npivots,
                                         source.X,
                                         source.y,
                                         source.U,
                                         target.U,
                                         source.V,
                                         target.V,
                                         oracle=oracle,
                                         phi=30,
                                         show=min(10, npivots),
                                         cross=True)
    pivot_time = time() - tinit
    print('pivot selection took {:.3f} seconds'.format(pivot_time))

    dci = DCI(dcf=dcf, unify=True, post='normal')
    acc, dci_time, svm_time, test_time = DCIclassify(source,
                                                     target,
                                                     s_pivots,
                                                     t_pivots,
                                                     dci,
                                                     optimize=optimize)