def train(parameters): # 模型保存的目录 eg: './results/docred-dev/docred_basebert_full/' model_folder = setup_log(parameters, parameters['save_pred'] + '_train') set_seed(parameters['seed']) ################################### # Data Loading ################################### # if parameters['re_train']: # print('\nLoading mappings ...') # train_loader = load_mappings(parameters['remodelfile']) # else: print('加载训练数据 ...') train_loader = DataLoader(parameters['train_data'], parameters) train_loader(embeds=parameters['embeds'], parameters=parameters) train_data, _ = DocRelationDataset(train_loader, 'train', parameters, train_loader).__call__() print('\nLoading testing data ...') test_loader = DataLoader(parameters['test_data'], parameters, train_loader) test_loader(parameters=parameters) test_data, prune_recall = DocRelationDataset(test_loader, 'test', parameters, train_loader).__call__() # print("prune_recall-->", str(prune_recall)) ################################### # Training ################################### trainer = Trainer(train_loader, parameters, { 'train': train_data, 'test': test_data }, model_folder, prune_recall) trainer.run()
def measure_accuracy(self, nregistered=40, nunregistered=40): s = System() registered = sample(list(DataLoader.get_img_data('registered')), nregistered) unregistered = sample(list(DataLoader.get_img_data('unregistered')), nunregistered) self._test_identification(s, registered) self._test_unregistered(s, unregistered)
def evaluate_model(evalparams): torch.manual_seed(evalparams.seed) random.seed(1234) if evalparams.cpu: evalparams.cuda = False elif evalparams.cud: torch.cuda.manual_seed(args.seed) # load opt print(evalparams.model_dir, evalparams.model) # model_file = evalparams.model_dir + "/" + evalparams.model model_file = 'best_model.pt' print("Loading model from {}".format(model_file)) opt = torch_utils.load_config(model_file) model = RelationModel(opt) model.load(model_file) # load vocab vocab_file = evalparams.model_dir + '/vocab.pkl' vocab = Vocab(vocab_file, load=True) assert opt[ 'vocab_size'] == vocab.size, "Vocab size must match that in the saved model." # load data data_file = opt['data_dir'] + '/{}.json'.format(evalparams.dataset) print("Loading data from {} with batch size {}...".format( data_file, opt['batch_size'])) batch = DataLoader(data_file, opt['batch_size'], opt, vocab, evaluation=True) helper.print_config(opt) id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()]) predictions = [] all_probs = [] for i, b in enumerate(batch): preds, probs, _ = model.predict(b) predictions += preds all_probs += probs predictions = [id2label[p] for p in predictions] p, r, f1 = scorer.score(batch.gold(), predictions, verbose=True) # save probability scores if len(evalparams.out) > 0: helper.ensure_dir(os.path.dirname(evalparams.out)) with open(evalparams.out, 'wb') as outfile: pickle.dump(all_probs, outfile) print("Prediction scores saved to {}.".format(evalparams.out)) print("Evaluation ended.") return (batch.gold(), predictions, model)
def __init__(self, config): self.batch_size = config.batch_size self.checkpoint_path = config.checkpoint_path self.model_path = config.model_save_path self.epochs = config.epochs self.arch_type = config.arch_type self.sample_data_file = config.sample_data_file self.input_shape = config.input_shape self.learning_rate = config.learning_rate self.data_loader = DataLoader(self.sample_data_file)
def train(parameters): model_folder = setup_log(parameters, parameters['save_pred'] + '_train') set_seed(parameters['seed']) ################################### # Data Loading ################################### # if parameters['re_train']: # print('\nLoading mappings ...') # train_loader = load_mappings(parameters['remodelfile']) # else: flag=False processed_dataset=parameters['remodelfile'] if flag and os.path.exists(os.path.join(processed_dataset, 'train_loader.pkl')): with open(os.path.join(processed_dataset, 'train_loader.pkl'), 'rb') as f: train_loader = pkl.load(f) with open(os.path.join(processed_dataset, 'train_data.pkl'), 'rb') as f: train_data = pkl.load(f) with open(os.path.join(processed_dataset, 'test_data.pkl'), 'rb') as f: test_data = pkl.load(f) with open(os.path.join(processed_dataset, 'prune_recall.pkl'), 'rb') as f: prune_recall = pkl.load(f) # print('Loading training data ...') else: train_loader = DataLoader(parameters['train_data'], parameters) train_loader(embeds=parameters['embeds'], parameters=parameters) train_data, _ = DocRelationDataset(train_loader, 'train', parameters, train_loader).__call__() # operate_data(train_data, "train_data.json") print('\nLoading testing data ...') test_loader = DataLoader(parameters['test_data'], parameters, train_loader) test_loader(parameters=parameters) test_data, prune_recall = DocRelationDataset(test_loader, 'test', parameters, train_loader).__call__() with open(os.path.join(processed_dataset, 'train_loader.pkl'), 'wb') as f: pkl.dump(train_loader, f, pkl.HIGHEST_PROTOCOL) with open(os.path.join(processed_dataset, 'train_data.pkl'), 'wb') as f: pkl.dump(train_data, f, pkl.HIGHEST_PROTOCOL) with open(os.path.join(processed_dataset, 'test_data.pkl'), 'wb') as f: pkl.dump(test_data, f, pkl.HIGHEST_PROTOCOL) with open(os.path.join(processed_dataset, 'prune_recall.pkl'), 'wb') as f: pkl.dump(prune_recall, f, pkl.HIGHEST_PROTOCOL) # ################################### # Training ################################### trainer = Trainer(train_loader, parameters, {'train': train_data, 'test': test_data}, model_folder, prune_recall) trainer.run() write_metrics(trainer,model_folder) if parameters['plot']: plot_learning_curve(trainer, model_folder) plot_P_R(trainer, model_folder)
def data_processor(bs, url="MNIST_URL"): x_train, y_train, x_valid, y_valid = get_data(url) train_mean, train_std = x_train.mean(), x_train.std() x_train = normalize(x_train, train_mean, train_std) # NB: Use training, not validation mean for validation set x_valid = normalize(x_valid, train_mean, train_std) train_ds, valid_ds = Dataset(x_train, y_train), Dataset(x_valid, y_valid) train_samp = Sampler(train_ds, bs, shuffle=True) valid_samp = Sampler(valid_ds, bs, shuffle=False) train_dl = DataLoader(train_ds, sampler=train_samp) valid_dl = DataLoader(valid_ds, sampler=valid_samp) return train_dl, valid_dl
def predict(sentences): test_data = list() for sent in sentences: tokens = jieba.lcut(sent, cut_all=False) test_data.append({ 'text': tokens, 'aspects': [constant.ID_TO_ASP[0]], 'polarities': [constant.ID_TO_LABEL[0]] }) test_batch = DataLoader(test_data, opt['batch_size'], opt, vocab) print("Predicting on test set...") labels = list() for i, (batch, indices) in enumerate(test_batch): predicts = trainer.predict(batch) labels += [predicts[k] for k in indices] results = list() for i, label in enumerate(labels): aspects = [x1 for x1, x2 in label] polarities = [x2 for x1, x2 in label] results.append({ 'text': test_data[i]['text'], 'aspects': aspects, 'polarities': polarities }) return results
def _test(parameters): model_folder = setup_log(parameters, parameters['save_pred'] + '_test') print('\nLoading mappings ...') train_loader = load_mappings(parameters['remodelfile']) flag=True print('\nLoading testing data ...') processed_dataset=parameters['remodelfile'] if flag and os.path.exists(os.path.join(processed_dataset, 'test_test_data.pkl')): with open(os.path.join(processed_dataset, 'test_test_data.pkl'), 'rb') as f: test_data = pkl.load(f) with open(os.path.join(processed_dataset, 'test_prune_recall.pkl'), 'rb') as f: prune_recall = pkl.load(f) else: test_loader = DataLoader(parameters['test_data'], parameters, train_loader) test_loader(parameters=parameters) test_data, prune_recall = DocRelationDataset(test_loader, 'test', parameters, train_loader).__call__() with open(os.path.join(processed_dataset, 'test_test_data.pkl'), 'wb') as f: pkl.dump(test_data, f, pkl.HIGHEST_PROTOCOL) with open(os.path.join(processed_dataset, 'test_prune_recall.pkl'), 'wb') as f: pkl.dump(prune_recall, f, pkl.HIGHEST_PROTOCOL) m = Trainer(train_loader, parameters, {'train': [], 'test': test_data}, model_folder, prune_recall) trainer = load_model(parameters['remodelfile'], m) _, _,_,p,r=trainer.eval_epoch(final=True, save_predictions=True) print('Saving test metrics ... ', end="") np.savetxt(parameters['remodelfile']+"/p.txt", p) np.savetxt(parameters['remodelfile']+"/r.txt", r) # b = numpy.loadtxt("filename.txt", delimiter=',') print('DONE')
def _create_database(self): self._images = dict() for key, img in DataLoader.get_img_data(): if key not in self._images.keys(): self._images[key] = img self._graphs = { k: Graph(i).get_stats() for k, i in self._images.items() }
def __init__(self): print("init") #self.clasificador = Clasificador() #self.autoencoder = Autoencoder() self.pathLib = os.path.normpath(os.getcwd() + "/lib/deepfakes/faceswap.py") self.pathImgGenerada = os.path.normpath(os.getcwd() + "/tmp/faces") self.modeloIniciado = False self.dataLoader = DataLoader() self.dataLoader.setPathClassData( os.path.normpath(os.getcwd() + "/bd/categoriasImg")) self.dataLoader.setPathTrainingData( os.path.normpath(os.getcwd() + "/bd/categoriasImg")) self.dataLoader.cargarClases() self.setNumClasses(self.dataLoader.getNumClasses()) self.classes = self.dataLoader.getClasses() self.threshold = 5e-8 self.inputDim = 64 self.batchSize = 40 self.dataLoader.setBatchSize(self.batchSize) self.epochs = 10
def get_scores(data_file, opt, vocab, model): print( "Loading data from {} with batch size {}...".format( data_file, opt["batch_size"] ) ) batch = DataLoader(data_file, opt["batch_size"], opt, vocab, evaluation=True) predictions = [] all_probs = [] for i, b in enumerate(batch): preds, probs, attn_weights, _ = model.predict(b) predictions += preds all_probs += probs predictions = [id2label[p] for p in predictions] # print("predictions") # for a, b in zip(batch.gold(), predictions): # print(f"{a:<28} {b:<28}") p, r, f1 = scorer.score(batch.gold(), predictions, verbose=False) return p, r, f1
def modelfn(inp): batch = DataLoader(json.dumps([inp]), 3, opt, vocab, evaluation=True, load_from_file=False) predictions = [] all_probs = [] for i, b in enumerate(batch): preds, probs, _ = model.predict(b) predictions += preds all_probs += probs predictions = [id2label[p] for p in predictions] return all_probs[0], predictions
def _test(parameters): model_folder = setup_log(parameters, parameters['save_pred'] + '_test') print('\nLoading mappings ...') train_loader = load_mappings(parameters['remodelfile']) print('\nLoading testing data ...') test_loader = DataLoader(parameters['test_data'], parameters, train_loader) test_loader(parameters=parameters) test_data, prune_recall = DocRelationDataset(test_loader, 'test', parameters, train_loader).__call__() m = Trainer(train_loader, parameters, { 'train': [], 'test': test_data }, model_folder, prune_recall) trainer = load_model(parameters['remodelfile'], m) trainer.eval_epoch(final=True, save_predictions=True)
def get_biased_model_class_probs(args): # load opt model_file = args.model_dir + "/" + args.model print("Loading model from {}".format(model_file)) opt = torch_utils.load_config(model_file) model = RelationModel(opt) model.load(model_file) # load vocab vocab_file = args.model_dir + "/vocab.pkl" vocab = Vocab(vocab_file, load=True) assert opt[ "vocab_size"] == vocab.size, "Vocab size must match that in the saved model." opt["vocab_size"] = vocab.size emb_file = opt["vocab_dir"] + "/embedding.npy" emb_matrix = np.load(emb_file) assert emb_matrix.shape[0] == vocab.size assert emb_matrix.shape[1] == opt["emb_dim"] # load data data_file = args.data_dir + "/{}".format(args.data_name) print("Loading data from {} with batch size {}...".format( data_file, opt["batch_size"])) batch = DataLoader(data_file, opt["batch_size"], opt, vocab, evaluation=True) # helper.print_config(opt) id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()]) all_probs = [] for i, b in enumerate(batch): preds, probs, _ = model.predict(b) all_probs.append(probs) return all_probs
# load vocab vocab_file = opt['vocab_dir'] + '/vocab.pkl' vocab = Vocab(vocab_file, load=True) opt['vocab_size'] = vocab.size emb_file = opt['vocab_dir'] + '/embedding.npy' emb_matrix = np.load(emb_file) assert emb_matrix.shape[0] == vocab.size assert emb_matrix.shape[1] == opt['emb_dim'] # load data print("Loading data from {} with batch size {}...".format( opt['data_dir'], opt['batch_size'])) train_batch = DataLoader(opt['data_dir'] + '/train.json', opt['batch_size'], opt, vocab, evaluation=False) dev_batch = DataLoader(opt['data_dir'] + '/dev.json', opt['batch_size'], opt, vocab, evaluation=True) model_id = opt['id'] if len(opt['id']) > 1 else '0' + opt['id'] model_save_dir = opt['save_dir'] + '/' + model_id opt['model_save_dir'] = model_save_dir helper.ensure_dir(model_save_dir, verbose=True) # save config helper.save_config(opt, model_save_dir + '/config.json', verbose=True)
# print("eval samples of subj:"+subj+" obj:"+obj) # args.model_dir = 'saved_models/02' # if os.path.exists(args.model_dir+'/'+subj+"_"+obj+"_"+"best_model.pt"): # model_file = args.model_dir +'/'+subj+"_"+obj+"_"+"best_model.pt" # else: # model_file = args.model_dir + '/best_model.pt' model_file=args.model_dir+'/best_model' \ '.pt' print("Loading model from {}".format(model_file)) opt = torch_utils.load_config(model_file) data_file = opt['data_dir'] + '/{}.json'.format(args.dataset) trainer = GCNTrainer(opt, lbstokens=lbstokens) trainer.load(model_file) batch = DataLoader([data_file], opt['batch_size'], opt, vocab, evaluation=True, corefresolve=True) batch_iter = tqdm(batch) all_probs = [] samples = [] for i, b in enumerate(batch_iter): preds, probs, _, sample = trainer.predict(b) predictions += preds all_probs += probs # effsum+=lab_eff # lab_nums+=lab_num samples = samples + sample key += batch.gold()
# load spacy model spacy_model = spacy.load("en_core_web_lg") # load vocab vocab_file = args.model_dir + '/vocab.pkl' vocab = Vocab(vocab_file, load=True) assert opt['vocab_size'] == vocab.size, "Vocab size must match that in the saved model." # load data data_file = opt['data_dir'] + '/{}.json'.format(args.dataset) print("Loading data from {} with batch size {}...".format(data_file, opt['batch_size'])) batch = DataLoader(data_file, opt['batch_size'], opt, vocab, spacy_model, evaluation=True) # predict predictions = [] all_probs = [] for i, b in enumerate(batch): preds, probs, _ = model.predict(b) predictions += preds all_probs += probs # class to label class2id = dict([(v, k) for k, v in constant.ID_TO_CLASS.items()]) id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()]) predictions = [class2id[p] for p in predictions] predictions = [id2label[p] for p in predictions]
vocab = Vocab(vocab_file, load=True) opt['vocab_size'] = vocab.size emb_file = opt['vocab_dir'] + '/embedding.npy' emb_matrix = np.load(emb_file) assert emb_matrix.shape[0] == vocab.size assert emb_matrix.shape[1] == opt['emb_dim'] # Add subject/object indices opt['subject_indices'] = vocab.subj_idxs opt['object_indices'] = vocab.obj_idxs # load data print("Loading data from {} with batch size {}...".format( opt['data_dir'], opt['batch_size'])) train_batch = DataLoader(opt['data_dir'] + '/train.json', opt['batch_size'], opt, vocab, evaluation=False) dev_batch = DataLoader(opt['data_dir'] + '/kg_dev_reduced.json', opt['batch_size'], opt, vocab, evaluation=True, kg_graph=train_batch.kg_graph) test_batch = DataLoader(opt['data_dir'] + '/kg_test_reduced.json', opt['batch_size'], opt, vocab, evaluation=True, kg_graph=dev_batch.kg_graph)
vocab = Vocab(vocab_file, load=True) assert opt[ 'vocab_size'] == vocab.size, "Vocab size must match that in the saved model." char_vocab_file = args.model_dir + '/vocab_char.pkl' char_vocab = Vocab(char_vocab_file, load=True) assert opt[ 'char_vocab_size'] == char_vocab.size, "Char vocab size must match that in the saved model." # load data data_file = opt['data_dir'] + '/{}.jsonl'.format(args.dataset) print("Loading data from {} with batch size {}...".format( data_file, opt['batch_size'])) batch = DataLoader(data_file, opt['batch_size'], opt, vocab, char_vocab, evaluation=True) helper.print_config(opt) if opt['scheme'] == 'iob': label2id = constant.TYPE_TO_ID_IOB elif opt['scheme'] == 'iobes': label2id = constant.TYPE_TO_ID_IOBES else: raise Exception("Tagging scheme not found: " + opt['scheme']) id2label = dict([(v, k) for k, v in label2id.items()]) predictions = [] for i, b in enumerate(tqdm(batch)): preds, _ = trainer.predict(b)
label2id = constant.LABEL_TO_ID[type_pair_id] opt['num_class'] = len(label2id) # load vocab vocab_file = opt['vocab_dir'] + '/vocab.pkl' vocab = Vocab(vocab_file, load=True) opt['vocab_size'] = vocab.size emb_file = opt['vocab_dir'] + '/embedding.npy' emb_matrix = np.load(emb_file) assert emb_matrix.shape[0] == vocab.size assert emb_matrix.shape[1] == opt['emb_dim'] # load data print("Loading data from {} with batch size {}...".format(opt['data_dir'], opt['batch_size'])) train_batch = DataLoader( '%s/%s_%s' % (opt['data_dir'], constant.ID_TO_TYPE_PAIR[type_pair_id], 'train.json'), opt['batch_size'], opt, vocab, data_type='training') dev_batch = DataLoader('%s/%s_%s' % (opt['data_dir'], constant.ID_TO_TYPE_PAIR[type_pair_id], 'dev.json'), opt['batch_size'], opt, vocab, data_type='development') # model_id = opt['id'] if len(opt['id']) > 1 lese '0' + opt['id'] model_id = opt['id'].strip() model_save_dir = "%s/%d-%s" % (opt['save_dir'], type_pair_id, model_id) opt['model_save_dir'] = model_save_dir opt['log'] = 'log.%d-%s.txt' % (type_pair_id, model_id) helper.ensure_dir(model_save_dir, verbose=True) # save config helper.save_config(opt, model_save_dir + '/config.json', verbose=True) vocab.save(model_save_dir + '/vocab.pkl') file_logger = helper.FileLogger(model_save_dir + '/' + opt['log'], header="# epoch\ttrain_loss\tdev_loss\tdev_score\tbest_dev_score")
# load vocab vocab_file = opt['vocab_dir'] + '/vocab.pkl' vocab = Vocab(vocab_file, load=True) opt['vocab_size'] = vocab.size emb_file = opt['vocab_dir'] + '/embedding.npy' emb_matrix = np.load(emb_file) assert emb_matrix.shape[0] == vocab.size assert emb_matrix.shape[1] == opt['emb_dim'] # load data print("Loading data from {} with batch size {}...".format( opt['data_dir'], opt['batch_size'])) train_batch = DataLoader(opt['data_dir'] + '/train.json', opt['batch_size'], opt, vocab, evaluation=False, pattern_file=opt['pattern_file']) dev_batch = DataLoader(opt['data_dir'] + '/dev.json', opt['batch_size'], opt, vocab, evaluation=True, pattern_file=opt['pattern_file']) test_batch = DataLoader(opt['data_dir'] + '/test.json', opt['batch_size'], opt, vocab, evaluation=True, pattern_file=opt['pattern_file'])
print("Loading model from {}".format(model_file)) opt = torch_utils.load_config(model_file) student_model = StudentModel(opt) student_model.load(model_file) # load vocab vocab_file = 'saved_models/' + args.model_id + '/vocab.pkl' vocab = Vocab(vocab_file, load=True) assert opt[ 'vocab_size'] == vocab.size, "Vocab size must match that in the saved model." # load data data_file = opt['data_dir'] + '/{}.json'.format(args.dataset) print("Loading data from {} with batch size {}...".format( data_file, opt['batch_size'])) batch = DataLoader(data_file, opt['batch_size'], opt, vocab, evaluation=True) helper.print_config(opt) id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()]) final_predictions, inst_predictions, aux_predictions = [], [], [] all_final_probs, all_inst_probs, all_aux_probs = [], [], [] for i, b in enumerate(batch): final_preds, inst_preds, aux_preds, final_probs, inst_probs, aux_probs = student_model.predict_all( b) final_predictions += final_preds inst_predictions += inst_preds aux_predictions += aux_preds all_final_probs += final_probs all_inst_probs += inst_probs all_aux_probs += aux_probs
helper.print_config(opt) # model id2label = dict([(v, k) for k, v in label2id.items()]) aug_train_epoch = 5 # for subj in SUBJ_LIST: # for obj in OBJ_LIST: print("labeled dataset for class with subj:" + str(subj) + " and obj: " + str(obj)) # model_file = "saved_models/02/" + subj + "_" + obj + "_" + "best_model.pt" # if not os.path.exists(model_file): # model_file="saved_models/02/"+"best_model_aug.pt" train_batch = DataLoader([opt['data_dir'] + '/train_coref.json'], opt['batch_size'], opt, vocab, evaluation=False, is_aug=False, corefresolve=True, subj=subj, obj=obj) dev_batch = DataLoader([opt['data_dir'] + '/dev_rev_coref.json'], opt['batch_size'], opt, vocab, evaluation=True, corefresolve=True, subj=subj, obj=obj) test_batch = DataLoader([opt['data_dir'] + '/test_rev_coref.json'], opt['batch_size'], opt,
print("Loading model from {}".format(model_file)) opt = torch_utils.load_config(model_file) trainer = GCNTrainer(opt) trainer.load(model_file) # load vocab vocab_file = args.model_dir + '/vocab.pkl' vocab = Vocab(vocab_file, load=True) assert opt[ 'vocab_size'] == vocab.size, "Vocab size must match that in the saved model." # load data data_file = opt['data_dir'] + '/{}.json'.format(args.dataset) print("Loading data from {} with batch size {}...".format( data_file, opt['batch_size'])) batch = DataLoader(data_file, opt['batch_size'], opt, vocab, evaluation=True) helper.print_config(opt) label2id = constant.LABEL_TO_ID id2label = dict([(v, k) for k, v in label2id.items()]) predictions = [] all_probs = [] batch_iter = tqdm(batch) index = 0 error = [[[] for j in range(len(id2label.keys()))] for i in range(len(id2label.keys()))] for i, b in enumerate(batch_iter): preds, probs, label, token, sub_pos, obj_pos, _ = trainer.predict(b) for j in range(len(token)): idx = index
# load vocab vocab_file = opt['vocab_dir'] + '/vocab.pkl' vocab = Vocab(vocab_file, load=True) opt['vocab_size'] = vocab.size emb_file = opt['vocab_dir'] + '/embedding.npy' emb_matrix = np.load(emb_file) assert emb_matrix.shape[0] == vocab.size assert emb_matrix.shape[1] == opt['emb_dim'] # load data print("Loading data from {} with batch size {}...".format( opt['data_dir'], opt['batch_size'])) train_batch = DataLoader(opt['data_dir'] + '/train_full.json', opt['batch_size'], opt, vocab, evaluation=False) dev_batch = DataLoader(opt['data_dir'] + '/dev_full.json', opt['batch_size'], opt, vocab, evaluation=True) test_batch = DataLoader(opt['data_dir'] + '/test_full.json', opt['batch_size'], opt, vocab, evaluation=True) model_id = opt['id'] if len(opt['id']) > 1 else '0' + opt['id'] model_save_dir = opt['save_dir'] + '/' + model_id
class faceSearch: def __init__(self): print("init") #self.clasificador = Clasificador() #self.autoencoder = Autoencoder() self.pathLib = os.path.normpath(os.getcwd() + "/lib/deepfakes/faceswap.py") self.pathImgGenerada = os.path.normpath(os.getcwd() + "/tmp/faces") self.modeloIniciado = False self.dataLoader = DataLoader() self.dataLoader.setPathClassData( os.path.normpath(os.getcwd() + "/bd/categoriasImg")) self.dataLoader.setPathTrainingData( os.path.normpath(os.getcwd() + "/bd/categoriasImg")) self.dataLoader.cargarClases() self.setNumClasses(self.dataLoader.getNumClasses()) self.classes = self.dataLoader.getClasses() self.threshold = 5e-8 self.inputDim = 64 self.batchSize = 40 self.dataLoader.setBatchSize(self.batchSize) self.epochs = 10 #def setEncoderDim(self, dim): # self.clasificador.setEncoderDim(dim) # self.autoencoder.setEncoderDim(dim) def setInputDim(self, dim): self.inputDim = dim def setNumClasses(self, num): self.numClasses = num def entrenar(self): self.dataLoader.cargarData() if (self.modeloIniciado == False): self.setInputDim(self.dataLoader.getInputDim()) self.initModel() #exit() loss = 1 lossBuscador = 1 contador = 0 #optimizer = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999) while lossBuscador > self.threshold: self.trainingSet, self.labelsSet = self.dataLoader.nextTrainingData( labels=True) self.testingSet, self.testLabelSet = self.dataLoader.nextTestingData( labels=True) #loss = self.buscador.fit(self.trainingSet, self.labelsSet, # batch_size=self.batchSize, # epochs=self.epochs, # verbose=0, # validation_data=(self.testingSet, self.testLabelSet)) loss = self.buscador.train_on_batch(self.trainingSet, self.labelsSet) score = self.buscador.evaluate(self.testingSet, self.testLabelSet, verbose=0) lossBuscador = score[0] #print('Test loss:', score[0]) #print('Test accuracy:', score[1]) # #lossBuscador = self.buscador.train_on_batch(self.trainingSet, self.labelsSet) #print("% Completado " + str(score[0]) + " ", end='\r') print("% Completado " + str( (self.threshold / lossBuscador) * 100) + " loss: " + str(score[0]) + " accurracy " + str(score[1]), end='\r') contador += 1 if contador % 100 == 0: self.guardarAvance() if contador > 1000: contador = 0 def guardarAvance(self): self.buscador.save_weights( os.path.normpath(os.getcwd() + "/models/pesos/model.h5"), True) def initModel(self): if (self.modeloIniciado == True): return self.buscador = Sequential() self.buscador.add( Conv2D(64, kernel_size=(5, 5), activation='relu', data_format='channels_first', border_mode='same', input_shape=(1, self.inputDim, self.inputDim))) #self.buscador.add(MaxPooling2D(pool_size=(2, 2))) self.buscador.add(Conv2D(32, (10, 10), activation='relu')) #self.buscador.add(MaxPooling2D(pool_size=(2, 2))) self.buscador.add(Conv2D(16, (5, 5), activation='relu')) #self.buscador.add(MaxPooling2D(pool_size=(2, 2))) self.buscador.add(Conv2D(8, (5, 5), activation='relu')) #self.buscador.add(MaxPooling2D(pool_size=(2, 2))) self.buscador.add(Conv2D(4, (5, 5), activation='relu')) #self.buscador.add(MaxPooling2D(pool_size=(2, 2))) self.buscador.add(Conv2D(2, (5, 5), activation='relu')) #self.buscador.add(MaxPooling2D(pool_size=(2, 2))) #self.buscador.add(Conv2D(12, (5, 5), activation='relu')) #self.buscador.add(MaxPooling2D(pool_size=(2, 2))) #self.buscador.add(Conv2D(20, (5, 5), activation='relu')) #self.buscador.add(MaxPooling2D(pool_size=(2, 2))) #self.buscador.add(Conv2D(12, (5, 5), activation='relu')) #self.buscador.add(MaxPooling2D(pool_size=(2, 2))) #self.buscador.add(Conv2D(12, (1, 1), activation='relu')) #self.buscador.add(MaxPooling2D(pool_size=(2, 2))) #self.buscador.add(Conv2D(64, (5, 5), activation='tanh')) #self.buscador.add(MaxPooling2D(pool_size=(2, 2))) #self.buscador.add(AveragePooling2D(pool_size=(2, 2))) #self.buscador.add(Dropout(0.25)) self.buscador.add(Flatten()) #self.buscador.add(Dense(128, activation='tanh')) #, kernel_regularizer=keras.regularizers.l2(0.01) #, activity_regularizer=keras.regularizers.l1(0.01))) #self.buscador.add(Dropout(0.25)) #self.buscador.add(Dense(128, activation='tanh')) #, kernel_regularizer=keras.regularizers.l2(0.01) #, activity_regularizer=keras.regularizers.l1(0.01))) #self.buscador.add(Dropout(0.25)) #self.buscador.add(Dense(128, activation='tanh')) #, kernel_regularizer=keras.regularizers.l2(0.01) #, activity_regularizer=keras.regularizers.l1(0.01))) #self.buscador.add(Dropout(0.25)) #self.buscador.add(Dense(128, activation='tanh')) #, kernel_regularizer=keras.regularizers.l2(0.01) #, activity_regularizer=keras.regularizers.l1(0.01))) #self.buscador.add(Dropout(0.5)) self.buscador.add(Dropout(0.25)) self.buscador.add(Dense(self.numClasses, activation='softmax')) #self.buscador.summary() #exit() #optimizer = Adam(lr=5e-9, beta_1=0.5, beta_2=0.999) #este funciona! optimizer = keras.optimizers.Adadelta() self.buscador.compile(loss=keras.losses.categorical_crossentropy, optimizer=optimizer, metrics=['accuracy']) try: self.buscador.load_weights( os.path.normpath(os.getcwd() + "/models/pesos/model.h5"), True) print("pesos cargados") except OSError: print("no se han creado los pesos") self.modeloIniciado = True def search(self, pathImage): command_line = "python " + self.pathLib + " extract -v -i " + pathImage + " -o " + self.pathImgGenerada p = subprocess.Popen(command_line, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) # Success! result = [] for line in p.stdout: result.append(line) errcode = p.returncode print(errcode) for line in result: print(line) p.kill() filesList = [] for subdir, dirs, files in os.walk(self.pathImgGenerada): for file in files: filesList.append(os.path.join(subdir, file)) imgs = [] for file in filesList: im = cv2.imread(file) im = cv2.resize(im, (64, 64), interpolation=cv2.INTER_AREA) im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) #cv2.imwrite(file,im) im = np.reshape(im, (1, 1, im.shape[0], im.shape[1])) if (self.modeloIniciado == False): self.setInputDim(im.shape[2]) self.initModel() #imgs.append(im) #predicted = self.buscador.predict(imgs) print("prediccion " + str(self.classes[self.buscador.predict(im).argmax()]))
model_file = args.model_dir + '/' + args.model print("Loading model from {}".format(model_file)) opt = torch_utils.load_config(model_file) trainer = GCNTrainer(opt) trainer.load(model_file) # load vocab vocab_file = opt['vocab_dir'] + '/vocab.pkl' vocab = Vocab(vocab_file, load=True) assert opt['vocab_size'] == vocab.size, "Vocab size must match that in the saved model." # load data data_file = opt['data_dir'] + '/test.json' print("Loading data from {} with batch size {}...".format(data_file, opt['batch_size'])) data = read_file(data_file, vocab, opt, False) batch = DataLoader(data, opt['batch_size'], opt, evaluation=True) helper.print_config(opt) label2id = constant.LABEL_TO_ID id2label = dict([(v, k) for k, v in label2id.items()]) predictions = [] all_probs = [] cross_list = [] batch_iter = tqdm(batch) for i, b in enumerate(batch_iter): cross_list += b[8] preds, probs, _ = trainer.predict(b) predictions += preds all_probs += probs
# load vocab vocab_file = opt['vocab_dir'] + '/vocab.pkl' vocab = Vocab(vocab_file, load=True) opt['vocab_size'] = vocab.size emb_file = opt['vocab_dir'] + '/embedding.npy' emb_matrix = np.load(emb_file) assert emb_matrix.shape[0] == vocab.size assert emb_matrix.shape[1] == opt['emb_dim'] # load data print("Loading data from {} with batch size {}...".format( opt['data_dir'], opt['batch_size'])) train_batch = DataLoader(opt['data_dir'] + '/train.json', opt['batch_size'], opt, vocab, evaluation=False) dev_batch = DataLoader(opt['data_dir'] + '/dev.json', opt['batch_size'], opt, vocab, evaluation=True) test_batch = DataLoader(opt['data_dir'] + '/test.json', opt['batch_size'], opt, vocab, evaluation=True) model_id = opt['id'] if len(opt['id']) > 1 else '0' + opt['id'] model_save_dir = opt['save_dir'] + '/' + model_id
if config.dataset_path != 'imdb' and config.dataset_path != 'imdb_small': train = pd.read_csv(config.dataset_path + '/6_train.csv') test = pd.read_csv(config.dataset_path + '/6_test.csv') val = pd.read_csv(config.dataset_path + '/6_val.csv') X_train, meta_train, Y_train, label_encoder_train = splitFeatures(train) X_test, meta_test, Y_test, label_encoder_test = splitFeatures(test) X_val, meta_val, Y_val, label_encoder_val = splitFeatures(val) n_classes = max(len(label_encoder_val.classes_), len(label_encoder_test.classes_), len(label_encoder_train.classes_)) elif config.dataset_path == 'imdb_small': dl = DataLoader() X_train, X_val, X_test, Y_train, Y_val, Y_test, _, _, _ = dl.load_data( data_path='../imdb_small/budgetandactors2.txt') Y_val = [1 if y == 1 else 0 for y in Y_val] Y_test = [1 if y == 1 else 0 for y in Y_test] n_classes = 2 else: dl = DataLoader() X_train, X_val, X_test, Y_train, Y_val, Y_test, _, _, _ = dl.load_data( data_path='./data/imdb/budgetandactors.txt') Y_val = [1 if y == 1 else 0 for y in Y_val] Y_test = [1 if y == 1 else 0 for y in Y_test] n_classes = 2 # print("X_val", X_val)
opt = torch_utils.load_config(args.model_dir) helper.print_config(opt) model = MyTrainer(opt) model.load(args.model_dir) print("Loading data from {} with batch size {}...".format(os.path.join(args.data_dir, args.test_filename), opt['batch_size'])) # split_test_data for multi if opt['type'] == 'multi': split_test_data(opt['coarse_name']) is_multi_eval = False if opt['type'] == 'multi': is_multi_eval = True test_batch = DataLoader(os.path.join(args.data_dir, args.test_filename), opt['batch_size'], opt, is_multi_eval) print("Evaluating...") if opt['type'] == 'multi': predictions,data_ids = [],[] test_step = 0 for i, batch in enumerate(test_batch): pred,data_id = model.predict(batch,only_pred=True) predictions += pred test_step += 1 data_ids += data_id else: predictions, labels, data_ids = [], [], [] test_loss, test_acc, test_step = 0., 0., 0 for i, batch in enumerate(test_batch): loss, acc, pred, label, data_id = model.predict(batch) test_loss += loss