Beispiel #1
0
def main(fn, output_fn):
    print("Reading in dataset")
    data, classes = readDataset(fn)
    print(len(data), " sequences found")
    print("Found classes:", sorted(classes))
    proc = Processor(classes, 2, 2, prefix=(1,3), affix=(2,1), hashes=2,
            features=100000, stem=False, ohe=False)

    yss = []
    ryss = []
    for Xs in data:
        ys = [x['output'] for x in Xs]
        yss.append(ys)
        ryss.append([proc.encode_target(ys, i) for i in range(len(ys))])

    rs = np.random.RandomState(seed=2016)
    print("Starting KFolding")
    y_trues, y_preds = [], []
    fold_object = KFold(5, random_state=1)
    for train_idx, test_idx in fold_object.split(data):
        tr_X, tr_y = subset(data, yss, train_idx, rs)
        test_data = subset(data, yss, test_idx, rs, False)

        print("Training")
        d = Dagger(proc, tr_X, tr_y, validation_set=test_data)
        clf = d.train(10)

        seq = Sequencer(proc, clf)

        print("Testing")
        y_true, y_pred = test(data, ryss, test_idx, seq)
#        print(y_true, y_pred, proc.labels)
        print( classification_report(y_true, y_pred))

        y_trues.extend(y_true)
        y_preds.extend(y_pred)

    print("Total Report")
    print(classification_report(y_trues, y_preds, target_names=proc.labels))

    print("Training all")
    idxs = range(len(data))
    tr_X, tr_y = subset(data, yss, idxs, rs)
    d = Dagger(proc, tr_X, tr_y)
    clf = d.train()
    seq = Sequencer(proc, clf)

    save(output_fn, seq)
import tensorflow as tf
import numpy as np
import os
import time
from tensorflow.python.framework import ops
from shutil import copyfile
import pvmdnn_model as model
import utils

# ======================================================================================================================
# Read the model settings (options) & Dataset
# Argument: 0 for training, 1 for testing (error regression, entrainment)
flag, dbs = utils.readDataset(1)

# ======================================================================================================================
# If the log_dir doesn't exist, make a directory and copy the setting file
# Make a directory and copy the setting file
isdir = os.path.exists(flag.log_dir)
if not isdir:
    print 'Please check the log directory'
    assert False
isdir = os.path.exists(flag.log_dir + "errorRegression")
if not isdir:
    os.makedirs(flag.log_dir + "errorRegression")
copyfile('./settings.ini', flag.log_dir + 'errorRegression/' + 'settings.ini')

# Check the device (either CPU or GPU)
device_name = flag.device[0:4]
if device_name != '/cpu' and device_name != '/gpu':
    print 'The device should be either cpu or gpu'
    assert False
Beispiel #3
0
    plt_trsv = np.mean(trains_svc, axis=0)
    plt_tesv = np.mean(test_svc, axis=0)
    ns = range(len(plt_teal))
    plt.ylim = (0, 1)
    plt.plot(ns, plt_tral, color="green")
    plt.plot(ns, plt_teal, color="red")

    plt.plot(ns, plt_trsv, color="blue")
    plt.plot(ns, plt_tesv, color="black")
    plt.show()


#iris = load_iris()

#X = iris['data']

X, Y = readDataset('dataset/data.json')
replace_dict = {'Objetivo': 2, 'Negativo': 0, 'Positivo': 3, "Neutro": 1}
Y = [replace_dict[y] for y in Y]
Y = np.array(Y)
X_features = doc2vecMatrix(X)
X = X_features
visualize_data(X, Y)
e1 = KNeighborsClassifier(n_neighbors=20)
e2 = SVC(kernel="linear", probability=True, gamma=0.001)
e3 = KNeighborsClassifier(n_neighbors=10)
e4 = SVC(kernel="rbf", probability=True, gamma=0.001)
e5 = GaussianNB()  #SVC(kernel="rbf",probability=True,gamma="auto")

N_iter(X, Y, [e1, e2, e3, e4, e5])
def run(path, fname):
    '''
    if len(sys.argv) != 3:
        print("Usage: python we_sensesim.py SenseEmbedding Datasets")
        exit(0)
    '''
    wvs = utils.readWordVecs(sys.argv[1])
    print("Finish reading vector!")
    wvssen = {}
    s_list = defaultdict(list)
    for sense in wvs:
        wvssen[sense.split("%")[0]] = ''
        s_list[sense.split("%")[0]].append(sense)
    mean_vector = np.mean(wvs.values(), axis=0)

    spear_score_max = []
    spear_score_avg = []
    f_name = []

    for name in fname:
        filenames = os.path.join(path, name)
        #full_path = os.path.join(path, name)
        #filenames = os.path.expanduser(full_path).split(',')
        pairs, scores = utils.readDataset(filename, no_skip=True)
        coefs_max = []
        coefs_avg = []
        missing = 0
        for pair in pairs:
            vecs0 = []
            trimed_p0 = trim(pair[0], wvssen)
            if trimed_p0 not in wvssen:
                vecs0.append(mean_vector)
                missing += 1
            else:
                for sense in s_list[trimed_p0]:
                    vecs0.append(wvs[sense])
            vecs1 = []
            trimed_p1 = trim(pair[1], wvssen)
            if trimed_p1 not in wvssen:
                vecs1.append(mean_vector)
                missing += 1
            else:
                for sense in s_list[trimed_p1]:
                    vecs1.append(wvs[sense])
            '''
                max_value and avg_value: see "Multi-Prototype Vector-Space Models of Word Meaning" section 3.2 Measuring Semantic Similarity
                http://www.cs.utexas.edu/~ml/papers/reisinger.naacl-2010.pdf
            '''
            max_value = max([1 - cosine(a, b) for a in vecs0 for b in vecs1])
            avg_value = np.mean(
                [1 - cosine(a, b) for a in vecs0 for b in vecs1])
            coefs_max.append(max_value)
            coefs_avg.append(avg_value)

        spear_max = spearmanr(scores, coefs_max)
        pearson_max = pearsonr(scores, coefs_max)
        spear_avg = spearmanr(scores, coefs_avg)
        pearson_avg = pearsonr(scores, coefs_avg)
        spear_score_max.append(spear_max[0])
        spear_score_avg.append(spear_avg[0])
    print 'type     \t',
    for i in range(len(fname)):
        print fname[i],
    print '\nspear max\t',
    for i in range(len(fname)):
        print '%.06f   ' % (spear_score_max[i]),
    print '\nspear avg\t',
    for i in range(len(fname)):
        print '%.06f   ' % (spear_score_avg[i]),
Beispiel #5
0
def main(fn, sp):
    print("Reading in dataset")
    data, classes = readDataset(fn)
    print(len(data), " sequences found")
    print("Found classes:", sorted(classes))
    proc = Processor(classes,
                     2,
                     2,
                     prefix=(1, 3),
                     affix=(2, 1),
                     hashes=2,
                     features=100000,
                     stem=False,
                     ohe=False)

    print("Converting to features")
    Xs, ys = [], []
    sTime = time.time()
    for i, d in enumerate(data):
        if i % 100 == 0 and i:
            print("Converted %s of %s: %s DPS" % (i, len(data), i /
                                                  (time.time() - sTime)))

        X, y = [], []
        trad = [x['output'] for x in d]
        for i in range(len(d)):
            X.append(proc.transform(d, trad, i))
            y.append(proc.encode_target(trad, i))

        Xs.append(X)
        ys.append(y)

    print("Starting KFolding")
    rs = np.random.RandomState(seed=2016)
    y_trues, y_preds = [], []
    fold_object = KFold(5, random_state=1)
    for train_idx, test_idx in fold_object.split(data):
        tr_X, tr_y = build(Xs, ys, train_idx, rs)

        print("Training")
        clf = train(tr_X, tr_y)

        seq = Sequencer(proc, clf)

        print("Testing")
        y_true, y_pred = test(data, ys, test_idx, seq)
        print(classification_report(y_true, y_pred))

        y_trues.extend(y_true)
        y_preds.extend(y_pred)

    print("Total Report")
    print(classification_report(y_trues, y_preds))

    print("Training all")
    idxs = range(len(Xs))
    tr_X, tr_y = build(Xs, ys, idxs, rs)
    clf = train(tr_X, tr_y)
    seq = Sequencer(proc, clf)

    save(sp, seq)
Beispiel #6
0
from utils import readDataset, getSurgeryByPriority
import guloso

#criar variaveis de decisao
# Xcstd, yesd, z = utils.createDecisionVar(C, S, T, D, E)

S = 1
#ler dataset
dataset = readDataset('toy2.txt')

solucao = guloso.gerarSolucaoInicial(dataset, S)

priority1 = getSurgeryByPriority(dataset, 1)

# print(solucao)

for c in priority1:
    print('----------\n\n')
    print(solucao[c][0])