def main(fn, output_fn): print("Reading in dataset") data, classes = readDataset(fn) print(len(data), " sequences found") print("Found classes:", sorted(classes)) proc = Processor(classes, 2, 2, prefix=(1,3), affix=(2,1), hashes=2, features=100000, stem=False, ohe=False) yss = [] ryss = [] for Xs in data: ys = [x['output'] for x in Xs] yss.append(ys) ryss.append([proc.encode_target(ys, i) for i in range(len(ys))]) rs = np.random.RandomState(seed=2016) print("Starting KFolding") y_trues, y_preds = [], [] fold_object = KFold(5, random_state=1) for train_idx, test_idx in fold_object.split(data): tr_X, tr_y = subset(data, yss, train_idx, rs) test_data = subset(data, yss, test_idx, rs, False) print("Training") d = Dagger(proc, tr_X, tr_y, validation_set=test_data) clf = d.train(10) seq = Sequencer(proc, clf) print("Testing") y_true, y_pred = test(data, ryss, test_idx, seq) # print(y_true, y_pred, proc.labels) print( classification_report(y_true, y_pred)) y_trues.extend(y_true) y_preds.extend(y_pred) print("Total Report") print(classification_report(y_trues, y_preds, target_names=proc.labels)) print("Training all") idxs = range(len(data)) tr_X, tr_y = subset(data, yss, idxs, rs) d = Dagger(proc, tr_X, tr_y) clf = d.train() seq = Sequencer(proc, clf) save(output_fn, seq)
import tensorflow as tf import numpy as np import os import time from tensorflow.python.framework import ops from shutil import copyfile import pvmdnn_model as model import utils # ====================================================================================================================== # Read the model settings (options) & Dataset # Argument: 0 for training, 1 for testing (error regression, entrainment) flag, dbs = utils.readDataset(1) # ====================================================================================================================== # If the log_dir doesn't exist, make a directory and copy the setting file # Make a directory and copy the setting file isdir = os.path.exists(flag.log_dir) if not isdir: print 'Please check the log directory' assert False isdir = os.path.exists(flag.log_dir + "errorRegression") if not isdir: os.makedirs(flag.log_dir + "errorRegression") copyfile('./settings.ini', flag.log_dir + 'errorRegression/' + 'settings.ini') # Check the device (either CPU or GPU) device_name = flag.device[0:4] if device_name != '/cpu' and device_name != '/gpu': print 'The device should be either cpu or gpu' assert False
plt_trsv = np.mean(trains_svc, axis=0) plt_tesv = np.mean(test_svc, axis=0) ns = range(len(plt_teal)) plt.ylim = (0, 1) plt.plot(ns, plt_tral, color="green") plt.plot(ns, plt_teal, color="red") plt.plot(ns, plt_trsv, color="blue") plt.plot(ns, plt_tesv, color="black") plt.show() #iris = load_iris() #X = iris['data'] X, Y = readDataset('dataset/data.json') replace_dict = {'Objetivo': 2, 'Negativo': 0, 'Positivo': 3, "Neutro": 1} Y = [replace_dict[y] for y in Y] Y = np.array(Y) X_features = doc2vecMatrix(X) X = X_features visualize_data(X, Y) e1 = KNeighborsClassifier(n_neighbors=20) e2 = SVC(kernel="linear", probability=True, gamma=0.001) e3 = KNeighborsClassifier(n_neighbors=10) e4 = SVC(kernel="rbf", probability=True, gamma=0.001) e5 = GaussianNB() #SVC(kernel="rbf",probability=True,gamma="auto") N_iter(X, Y, [e1, e2, e3, e4, e5])
def run(path, fname): ''' if len(sys.argv) != 3: print("Usage: python we_sensesim.py SenseEmbedding Datasets") exit(0) ''' wvs = utils.readWordVecs(sys.argv[1]) print("Finish reading vector!") wvssen = {} s_list = defaultdict(list) for sense in wvs: wvssen[sense.split("%")[0]] = '' s_list[sense.split("%")[0]].append(sense) mean_vector = np.mean(wvs.values(), axis=0) spear_score_max = [] spear_score_avg = [] f_name = [] for name in fname: filenames = os.path.join(path, name) #full_path = os.path.join(path, name) #filenames = os.path.expanduser(full_path).split(',') pairs, scores = utils.readDataset(filename, no_skip=True) coefs_max = [] coefs_avg = [] missing = 0 for pair in pairs: vecs0 = [] trimed_p0 = trim(pair[0], wvssen) if trimed_p0 not in wvssen: vecs0.append(mean_vector) missing += 1 else: for sense in s_list[trimed_p0]: vecs0.append(wvs[sense]) vecs1 = [] trimed_p1 = trim(pair[1], wvssen) if trimed_p1 not in wvssen: vecs1.append(mean_vector) missing += 1 else: for sense in s_list[trimed_p1]: vecs1.append(wvs[sense]) ''' max_value and avg_value: see "Multi-Prototype Vector-Space Models of Word Meaning" section 3.2 Measuring Semantic Similarity http://www.cs.utexas.edu/~ml/papers/reisinger.naacl-2010.pdf ''' max_value = max([1 - cosine(a, b) for a in vecs0 for b in vecs1]) avg_value = np.mean( [1 - cosine(a, b) for a in vecs0 for b in vecs1]) coefs_max.append(max_value) coefs_avg.append(avg_value) spear_max = spearmanr(scores, coefs_max) pearson_max = pearsonr(scores, coefs_max) spear_avg = spearmanr(scores, coefs_avg) pearson_avg = pearsonr(scores, coefs_avg) spear_score_max.append(spear_max[0]) spear_score_avg.append(spear_avg[0]) print 'type \t', for i in range(len(fname)): print fname[i], print '\nspear max\t', for i in range(len(fname)): print '%.06f ' % (spear_score_max[i]), print '\nspear avg\t', for i in range(len(fname)): print '%.06f ' % (spear_score_avg[i]),
def main(fn, sp): print("Reading in dataset") data, classes = readDataset(fn) print(len(data), " sequences found") print("Found classes:", sorted(classes)) proc = Processor(classes, 2, 2, prefix=(1, 3), affix=(2, 1), hashes=2, features=100000, stem=False, ohe=False) print("Converting to features") Xs, ys = [], [] sTime = time.time() for i, d in enumerate(data): if i % 100 == 0 and i: print("Converted %s of %s: %s DPS" % (i, len(data), i / (time.time() - sTime))) X, y = [], [] trad = [x['output'] for x in d] for i in range(len(d)): X.append(proc.transform(d, trad, i)) y.append(proc.encode_target(trad, i)) Xs.append(X) ys.append(y) print("Starting KFolding") rs = np.random.RandomState(seed=2016) y_trues, y_preds = [], [] fold_object = KFold(5, random_state=1) for train_idx, test_idx in fold_object.split(data): tr_X, tr_y = build(Xs, ys, train_idx, rs) print("Training") clf = train(tr_X, tr_y) seq = Sequencer(proc, clf) print("Testing") y_true, y_pred = test(data, ys, test_idx, seq) print(classification_report(y_true, y_pred)) y_trues.extend(y_true) y_preds.extend(y_pred) print("Total Report") print(classification_report(y_trues, y_preds)) print("Training all") idxs = range(len(Xs)) tr_X, tr_y = build(Xs, ys, idxs, rs) clf = train(tr_X, tr_y) seq = Sequencer(proc, clf) save(sp, seq)
from utils import readDataset, getSurgeryByPriority import guloso #criar variaveis de decisao # Xcstd, yesd, z = utils.createDecisionVar(C, S, T, D, E) S = 1 #ler dataset dataset = readDataset('toy2.txt') solucao = guloso.gerarSolucaoInicial(dataset, S) priority1 = getSurgeryByPriority(dataset, 1) # print(solucao) for c in priority1: print('----------\n\n') print(solucao[c][0])