def __init__(self, training_properties, train_iter, dev_iter, test_iter, device): self.optimizer_type = training_properties["optimizer"] self.learning_rate = training_properties["learning_rate"] self.weight_decay = training_properties["weight_decay"] self.momentum = training_properties["momentum"] self.norm_ratio = training_properties["norm_ratio"] self.epoch = training_properties["epoch"] self.topk = training_properties["topk"] self.print_every = training_properties["print_every_batch_step"] self.save_every = training_properties["save_every_epoch"] self.eval_every = training_properties["eval_every"] self.save_path = training_properties["save_path"] self.openAIAdamSchedulerType = training_properties["scheduler_type"] self.amsgrad = training_properties["amsgrad"] self.partial_adam = training_properties["partial_adam"] self.train_iter = train_iter self.dev_iter = dev_iter self.test_iter = test_iter self.device = device self.dev_evaluator, self.test_evaluator = Evaluator( ).evaluator_factory("single_model_evaluator", self.device)
def compare_results(self): model_results = {} dataset_results = {} for eval_space in self.errors.keys(): model_results[eval_space] = Evaluator.means_per_metric( self.errors[eval_space]) dataset_results[eval_space] = Evaluator.means_per_metric( self.dataset_errors[eval_space]) Evaluator.print_comparison(model_results, dataset_results)
def test_evaluate(self): evaluator = Evaluator(model_dir=self.model_dir, input_shape=self.input_shape) BAR, EAR = evaluator.evaluate(basic_model=self.basic_model, evaluate_model=self.evaluate_model, valid_stocks=self.valid_stocks, rounds=3) print BAR, EAR self.assertNotEqual(BAR, EAR)
def prec_recall(data, gt): search_engine = SearchEngine(data) print('\n> Running Evaluation...\n', end='') evaluator = Evaluator(search_engine, gt) prec, avg_prec_recall = evaluator.evaluate() mkdir(EVALUATION_PATH) save_to_csv(prec, os.path.join(EVALUATION_PATH, 'precision.csv')) save_to_csv(avg_prec_recall, os.path.join(EVALUATION_PATH, 'avg_prec_recall.csv'), index=True) print('\n Results of evaluation saved to directory "%s"' % os.path.relpath(EVALUATION_PATH, PROJ_ROOT))
def _initial_performance(self, session): val_start = dt.now() validation_performance = session.test_model(self.val_loader) target_performance = Evaluator.means_over_subsets( validation_performance)['distance'] val_time = dt.now() - val_start Evaluator.print_result_summary_flat(validation_performance, '\t') print('\t\tThat took {} milliseconds.'.format(val_time.microseconds // 1000)) return target_performance, copy.deepcopy(session.model.state_dict())
def evaluate_model(self, mode='mean'): self.errors = { 'default': Evaluator.to_model(self.data_loader, self.model, space='default', mode=mode) } if type(self.dataset) == datasets.NormalizedPairedPoseDataset: self.errors['original'] = Evaluator.to_model(self.data_loader, self.model, space='original', mode=mode)
def eval_run_func(params): from evaluation.evaluator import Evaluator # get input parameters model_dir = params['model_dir'] basic_model = params['basic_model'] evaluate_model = params['evaluate_model'] input_shape = params['input_shape'] rounds = params['rounds'] valid_stocks = params['valid_stocks'] _evaluator = Evaluator(model_dir=model_dir, input_shape=input_shape) BAR, EAR = _evaluator.evaluate(basic_model, evaluate_model, valid_stocks, rounds) return BAR, EAR
def evaluate_dataset(self, precompute_prefix=None, mode='mean'): if precompute_prefix is not None: path = self._construct_error_file_path(precompute_prefix) self.dataset_errors = torch.load(path) else: self.dataset_errors = { 'default': Evaluator.to_dataset(self.data_loader, space='default', mode=mode) } if type(self.dataset) == datasets.NormalizedPairedPoseDataset: self.dataset_errors['original'] = Evaluator.to_dataset( self.data_loader, space='original', mode=mode)
def __init__(self, output_dir): if cfg.TRAIN.FLAG: self.model_dir = os.path.join(output_dir, 'Model') self.image_dir = os.path.join(output_dir, 'Image') self.log_dir = os.path.join(output_dir, 'Log') mkdir_p(self.model_dir) mkdir_p(self.image_dir) mkdir_p(self.log_dir) self.summary_writer = FileWriter(self.log_dir) self.max_epoch = cfg.TRAIN.MAX_EPOCH self.snapshot_interval = cfg.TRAIN.SNAPSHOT_INTERVAL s_gpus = cfg.GPU_ID.split(',') self.gpus = [int(ix) for ix in s_gpus] self.num_gpus = len(self.gpus) self.batch_size = cfg.TRAIN.BATCH_SIZE * self.num_gpus torch.cuda.set_device(self.gpus[0]) cudnn.benchmark = True # load fasttext embeddings (e.g., birds.en.vec) path = os.path.join(cfg.DATA_DIR, cfg.DATASET_NAME + ".en.vec") txt_dico, _txt_emb = load_external_embeddings(path) txt_emb = nn.Embedding(len(txt_dico), 300, sparse=False) txt_emb.weight.data.copy_(_txt_emb) txt_emb.weight.requires_grad = False self.txt_dico = txt_dico self.txt_emb = txt_emb # load networks and evaluator self.networks = self.load_network() self.evaluator = Evaluator(self.networks, self.txt_emb) # visualizer to visdom server self.vis = Visualizer(cfg.VISDOM_HOST, cfg.VISDOM_PORT, output_dir) self.vis.make_img_window("real_im") self.vis.make_img_window("fake_im") self.vis.make_txt_window("real_captions") self.vis.make_txt_window("genr_captions") self.vis.make_plot_window("G_loss", num=7, legend=["errG", "uncond", "cond", "latent", "cycltxt", "autoimg", "autotxt"]) self.vis.make_plot_window("D_loss", num=4, legend=["errD", "uncond", "cond", "latent"]) self.vis.make_plot_window("KL_loss", num=4, legend=["kl", "img", "txt", "fakeimg"]) self.vis.make_plot_window("inception_score", num=2, legend=["real", "fake"]) self.vis.make_plot_window("r_precision", num=1)
def __init__(self, use_old_model, use_time, port, python_port, train, evaluate): self._use_old_model = use_old_model self._use_time = use_time self._port = port self._python_port = python_port self._train = train self._evaluate = evaluate if use_old_model: self._models_folder = 'old_model' else: self._models_folder = 'new_model' self._evaluator = Evaluator(self._port, self._python_port) print self._models_folder
def evaluate_multiple_experiments(name_pattern, config_base_dir, user_dir_override): for config_path in glob(path.join(config_base_dir, name_pattern)): print('Evaluating experiment ', path.basename(config_path).replace('.json', '')) params = get_params_from_config(config_path, user_dir_override) model = Model(checkpoint_interval=10, model_params=params) evaluator = Evaluator(model) all_flows = evaluator.flows_over_epochs(every_nth=10) result_path = path.join( user_dir_override, 'training', 'results', path.basename(config_path).replace('.json', '.pkl')) with open(result_path, 'wb') as f: pkl.dump(all_flows, f)
def train_4v4(): envs_config_paths = [ Path(JSONS_FOLDER, 'configs', '4v4', 'all_equal.json'), Path(JSONS_FOLDER, 'configs', '4v4', 'more_horizontally.json'), Path(JSONS_FOLDER, 'configs', '4v4', 'more_vertically.json') ] training_state = get_frap_training_4v4() training_state.junctions = [ "gneJ1", "gneJ2", "gneJ3", "gneJ4", "gneJ7", "gneJ8", "gneJ9", "gneJ10", "gneJ14", "gneJ15", "gneJ16", "gneJ17", "gneJ20", "gneJ21", "gneJ22", "gneJ23" ] evaluator = Evaluator.from_file( Path(JSONS_FOLDER, 'evaluators', '4v4_eq_vert_hori.json')) main_train( training_state, [ SumoEnv.from_config_file(env_config_path, 3000) for env_config_path in envs_config_paths ], evaluator, Path('saved', 'aaai-multi', 'frap', '4v4'), )
def _logging(self, log, loss, train_performance, val_performance, i): for key, value in train_performance['DEFAULT'].items(): log['train'][key].append(value) for subset_key, subset_performance in val_performance.items(): for key, value in subset_performance.items(): log['val'][subset_key][key].append(value) if self.config['verbose']: print('Iteration {}/{}:'.format(i, self.iters_per_epoch - 1, loss)) print('Loss: {:.2e}'.format(loss)) Evaluator.print_result_summary_flat(train_performance, '\tTraining : ') Evaluator.print_result_summary_flat(val_performance, '\tValidation: ') print()
def test_to_model(): distorter = distorters.NoDistorter() model = helpers.DummyModel() dataset_no_subs = datasets.NormalizedPairedPoseDataset('unit_test/dummy42', distorter, norm.NoNorm, False, device='cuda:0') dataset_subs = datasets.NormalizedPairedPoseDataset('unit_test/ident42', distorter, norm.NoNorm, True, device='cuda:0') data_loader_no_subs = datasets.DataLoader(dataset_no_subs, 6) data_loader_subs = datasets.DataLoader(dataset_subs, 6) batch_size = 42 true_results = { 'coord_diff': torch.zeros(batch_size, device='cuda:0'), 'distance': torch.zeros(batch_size, device='cuda:0'), 'bone_length': torch.zeros(batch_size, device='cuda:0'), 'proportion': torch.zeros(batch_size, device='cuda:0'), } results_norm_no_subs = Evaluator.to_model(data_loader_no_subs, model) results_orig_no_subs = Evaluator.to_model(data_loader_no_subs, model, space='original') results_norm_subs = Evaluator.to_model(data_loader_subs, model) results_orig_subs = Evaluator.to_model(data_loader_subs, model, space='original') for metric_name in Evaluator.metric_names: for subset_name in ['sub1', 'sub2']: assert torch.allclose(results_norm_subs[subset_name][metric_name], true_results[metric_name], atol=1e-5) assert torch.allclose(results_orig_subs[subset_name][metric_name], true_results[metric_name], atol=1e-5) assert torch.allclose(results_norm_no_subs['DEFAULT'][metric_name], true_results[metric_name], atol=1e-5) assert torch.allclose(results_orig_no_subs['DEFAULT'][metric_name], true_results[metric_name], atol=1e-5)
def train_aaai(): env_config_path = Path(JSONS_FOLDER, 'configs', 'aaai_random.json') evaluator = Evaluator.from_file( Path(JSONS_FOLDER, 'evaluators', 'example_test.json')) main_train( get_frap_training(), SumoEnv.from_config_file(env_config_path), evaluator, Path('saved', 'aaai-random', 'frap'), )
def run_nrt_experiment(self): self.history_logs = edict() self.history_logs['Train'] = [] self.history_logs['Val'] = [] for dataName in sorted(os.listdir(Path(self.cfg.nrt_data_folder) / self.cfg.input_folder)): self.dataName = dataName print(f"\n==> {self.dataName}") self.dataloaders = self.train_val_loader(num=self.cfg.size_of_train) self.optimizer = torch.optim.Adam([dict(params=self.model.parameters(), lr=self.cfg.learning_rate, weight_decay=self.cfg.weight_decay)]) # self.history_logs = {'Train': np.zeros((len(metrics)+1, self.cfg.max_epoch)), # 'Val': np.zeros((len(metrics)+1, self.cfg.max_epoch))} # --------------------------------- Train ------------------------------------------- for epoch in range(0, self.cfg.max_epoch): print(f"\n==> train epoch: {epoch}/{self.cfg.max_epoch}") valid_logs = self.train_one_epoch(epoch) # do something (save model, change lr, etc.) if self.cfg.max_score < valid_logs['iou_score']: max_score = valid_logs['iou_score'] torch.save(self.model, self.model_url) print('Model saved!') if epoch == 10: self.optimizer.param_groups[0]['lr'] = self.cfg.learning_rate * 0.1 print(f"Decrease decoder learning rate to {self.optimizer.param_groups[0]['lr']}!") # save learning history self.plot_and_save_learnHistory() self.cfg.data_folder = self.cfg.nrt_data_folder self.cfg.modelPath = self.savePath self.evaluator = Evaluator(self.cfg) url = Path(self.cfg.nrt_data_folder) / self.cfg.input_folder / self.dataName print(url) predMap = self.evaluator.inference(url, self.savePath)
def test_means_over_subsets(): true_mean_results = { 'coord_diff': torch.tensor(1.5), 'distance': torch.tensor(1.5), 'bone_length': torch.tensor(2.0), 'proportion': torch.tensor(2.0), } mean_results = Evaluator.means_over_subsets(example_results) for metric_name in Evaluator.metric_names: assert torch.allclose(mean_results[metric_name], true_mean_results[metric_name])
def test(cls, X_train, y_train, X_test, y_test, X_syn, y_syn, train_on, title, repeated, save_path=None): """ Train and test classification model for inner-corpus evaluation @param cls: Initialized classifier given hyperparameters @param X_train: Real training data @param y_train: Labels for real training data @param X_test: Test data @param y_test: Labels for test data @param X_syn: Synthetic training data @param y_syn: Labels for synthetic training data @param train_on: Which training data will be used @param title: Title for generated image of confusion matrix @param repeated: Number of repeated times for each classification @save_path: File path to save the generated image @return: Mean and standard deviation of test recall """ start = time.time() eva = Evaluator(X_train, y_train, X_test, y_test, X_syn, y_syn, cls=cls, repeated=repeated) if train_on == 'real': mean_recall, std_recall, mean_cm = eva.real() elif train_on == 'syn': mean_recall, std_recall, mean_cm = eva.syn() elif train_on == 'real+syn': mean_recall, std_recall, mean_cm = eva.real_plus_syn() end = time.time() print("time used: {} s".format(time_converter(start, end))) print("inner-corpus test - mean: {}, std: {}".format(mean_recall, std_recall)) plot_confusion_matrix(mean_cm, title=title) if save_path: plt.savefig(save_path, bbox_inches='tight') print("Successfully generated {}".format(save_path)) else: plt.show() plt.close() return mean_recall, std_recall
def main(): parser = argparse.ArgumentParser() parser.add_argument('-f', '--file_name', type=str, help='File name in manual_src to run.', default=None) parser.add_argument('-g', '--gen_src_count', type=int, help='Number of automatically generated source to run.', default=0) parser.add_argument('-n', '--num_seeds', type=int, help='Number of seeds that will run per source file.', default=2) args = parser.parse_args() seeds = [10 * s for s in range(args.num_seeds)] evaluator = Evaluator(seeds, 'evaluation/manual_src', 'evaluation/__genned') if args.file_name is not None: evaluator.eval_with_manual_src_file(args.file_name) elif args.gen_src_count > 0: for _ in range(args.gen_src_count): evaluator.eval_with_gen_src() else: evaluator.eval_with_manual_src_all() dnn_approx_result, dnn_result, vanila_result = evaluator.get_all_results() print('All results.') print('DNN + Approx') print_result(dnn_approx_result) print('DNN') print_result(dnn_result) print('Vanila') print_result(vanila_result) dnn_approx_avg, dnn_avg, vanila_avg = evaluator.get_all_avg_results() print(print_bar_double) print('Overall avg results.') print('DNN + Approx') print(dnn_approx_avg) print('DNN') print(dnn_avg) print('Vanila') print(vanila_avg)
def test_to_batch(): batch_size = 42 poses = torch.rand(batch_size, 21, 3) labels = poses + torch.ones(batch_size, 21, 3) batch = datasets.PoseCorrectionBatch(poses, labels, poses, labels) true_results = { 'coord_diff': torch.ones(batch_size), 'distance': torch.sqrt(3.0 * torch.ones(batch_size)), 'bone_length': torch.zeros(batch_size), 'proportion': torch.zeros(batch_size), } results_norm = Evaluator.to_batch(batch) results_orig = Evaluator.to_batch(batch, space='original') for metric_name in Evaluator.metric_names: assert torch.allclose(results_norm[metric_name], true_results[metric_name], atol=1e-6) assert torch.allclose(results_orig[metric_name], true_results[metric_name], atol=1e-6)
def train_2v2(): env_config_path = Path(JSONS_FOLDER, 'configs', '2v2', 'all_equal.json') training_state = get_frap_training_2v2() training_state.junctions = ['gneJ25', 'gneJ26', 'gneJ27', 'gneJ28'] evaluator = Evaluator.from_file( Path(JSONS_FOLDER, 'evaluators', '2v2_small_subset.json')) main_train( training_state, SumoEnv.from_config_file(env_config_path, 3000), evaluator, Path('saved', 'aaai-multi', 'frap'), )
def _full_evaluation(self, model, eval_space): default_results = Evaluator.means_per_metric( Evaluator.to_model(self.val_loader, model, 'default')) eval_results = {'default': default_results} if eval_space == 'original': original_results = Evaluator.means_per_metric( Evaluator.to_model(self.val_loader, model, 'original')) eval_results['original'] = original_results for eval_space_results in eval_results.values(): Evaluator.results_to_cpu(eval_space_results) return eval_results
def _intermediate_eval(self, session, batch): if session.params['eval_space'] == 'original': batch.original_poses = self.normalizer.denormalize( batch.poses, batch.normalization_params) train_results = { 'DEFAULT': Evaluator.to_batch(batch, session.params['eval_space']) } train_mean_results = Evaluator.means_per_metric(train_results) Evaluator.results_to_cpu(train_mean_results) val_mean_results = session.test_model(self.val_loader) Evaluator.results_to_cpu(val_mean_results) return train_mean_results, val_mean_results
def test_means_per_metric(): true_mean_results = { 'sub1': { 'coord_diff': torch.tensor(1.0), 'distance': torch.tensor(1.0), 'bone_length': torch.tensor(1.0), 'proportion': torch.tensor(1.0), }, 'sub2': { 'coord_diff': torch.tensor(2.0), 'distance': torch.tensor(2.0), 'bone_length': torch.tensor(3.0), 'proportion': torch.tensor(3.0), } } mean_results = Evaluator.means_per_metric(example_results) for subset_name in mean_results.keys(): for metric_name in Evaluator.metric_names: assert torch.allclose(mean_results[subset_name][metric_name], true_mean_results[subset_name][metric_name])
def evaluate_labeling(dir_path, labeling: Dict[str, Dict[str, int]], key_path: str = None, maxLabels= 2) \ -> Dict[str, Dict[str, float]]: #RL maxLabels added """ labeling example : {'become.v.3': {'become.sense.1':3,'become.sense.5':17} ... } means instance become.v.3' is 17/20 in sense 'become.sense.5' and 3/20 in sense 'become.sense.1' :param key_path: write produced key to this file :param dir_path: SemEval dir :param labeling: instance id labeling :return: FNMI, FBC as calculated by SemEval provided code """ logging.info('starting evaluation key_path: %s' % key_path) def get_scores(gold_key, eval_key): ret = {} # for metric, jar, column in [ # # ('jaccard-index','SemEval-2013-Task-13-test-data/scoring/jaccard-index.jar'), # # ('pos-tau', 'SemEval-2013-Task-13-test-data/scoring/positional-tau.jar'), # # ('WNDC', 'SemEval-2013-Task-13-test-data/scoring/weighted-ndcg.jar'), # ('FNMI', os.path.join(dir_path, 'scoring/fuzzy-nmi.jar'), 1), # ('FBC', os.path.join(dir_path, 'scoring/fuzzy-bcubed.jar'), 3), # ]: # logging.info('calculating metric %s' % metric) # res = subprocess.Popen(['java', '-jar', jar, gold_key, eval_key], stdout=subprocess.PIPE).stdout.readlines() # # columns = [] # for line in res: # line = line.decode().strip() # if line.startswith('term'): # # columns = line.split('\t') # pass # else: # split = line.split('\t') # if len(split) > column: # word = split[0] # # results = list(zip(columns[1:], map(float, split[1:]))) # result = split[column] # if word not in ret: # ret[word] = {} # ret[word][metric] = float(result) #+RL script = [ "python2.7", "./spanish-lex-sample/score/score", eval_key, gold_key, './spanish-lex-sample/test/emptysensemap' ] res = subprocess.Popen(" ".join(script), shell=True, env={ "PYTHONPATH": "." }, stdout=subprocess.PIPE).stdout.readlines() ret['all'] = {} splitted = res[2].strip().split() ret['all']['precision'] = float(splitted[1]) ret['all']['correct'] = float( str(splitted[2].decode()).replace('(', '')) ret['all']['attempted'] = float(splitted[5]) splitted = res[3].strip().split() ret['all']['recall'] = float(splitted[1]) ret['all']['total'] = float(splitted[5]) splitted = res[4].strip().split() ret['all']['attemptedPct'] = float(splitted[1]) #- return ret def getGoldKeySENSEVAL2(goldPath): #+RL with open(os.path.join(dir_path, goldPath), 'r') as fgold: goldKey = dict() for line in fgold.readlines(): splitted = line.strip().split() #if splitted[0] == lemma: instance = dict() graded = dict() rest = splitted[2:] for index in rest: graded[splitted[0] + '.' + index] = 1.0 / len(rest) instance[splitted[1]] = graded if not splitted[0] in goldKey: goldKey[splitted[0]] = instance else: goldKey[splitted[0]].update(instance) return goldKey def dictToJ(dictionary): #+RL HashMap = autoclass('java.util.HashMap') String = autoclass('java.lang.String') Double = autoclass('java.lang.Double') map = HashMap() for token, instances in dictionary.items(): jToken = String(token) instanceMap = HashMap() for instance, labels in instances.items(): jInstance = String(instance) labelMap = HashMap() sum_applicabilities = sum([a for _, a in labels.items()]) for label, applicability in labels.items(): if sum_applicabilities > 1: applicability /= sum_applicabilities jLabel = String(label) jApplicability = Double(applicability) labelMap.put(jLabel, jApplicability) instanceMap.put(jInstance, labelMap) map.put(jToken, instanceMap) return map def getTrainingInstances(trainingSets): #+RL HashSet = autoclass('java.util.HashSet') String = autoclass('java.lang.String') listJTrainingSets = [] for trainingSet in trainingSets: jTrainingSet = HashSet() for instance in trainingSet: jInstance = String(instance) jTrainingSet.add(jInstance) listJTrainingSets.append(jTrainingSet) return listJTrainingSets def printTrainingSets(listJTrainingSets): #+RL trainingSet = 1 for trainingInstances in listJTrainingSets: print( '---------------------------------------------Training set %d \n' % trainingSet) entrySetIterator = trainingInstances.iterator() string = '' while entrySetIterator.hasNext(): e = entrySetIterator.next() string += e + ', ' print(string) trainingSet += 1 def mapSenses(trainingInstances, goldMap, labelingMap, maxLabels): #+RL GradedReweightedKeyMapper = autoclass( 'edu.ucla.clustercomparison.GradedReweightedKeyMapper') mapper = GradedReweightedKeyMapper() allRemappedTestKey = {} remappedTestKey = mapper.convert(goldMap, labelingMap, trainingInstances) #print(remappedTestKey) convertedSet = remappedTestKey.entrySet() convertedIterator = convertedSet.iterator() while convertedIterator.hasNext(): e = convertedIterator.next() doc = e.getKey() instRatings = e.getValue() instanceIterator = instRatings.entrySet().iterator() while instanceIterator.hasNext(): i = instanceIterator.next() instance = i.getKey() labelIterator = i.getValue().entrySet().iterator() labelList = [] while labelIterator.hasNext(): l = labelIterator.next() label = l.getKey() applicability = l.getValue() # print(f'{label} -----{applicability}') labelList.append((label, applicability)) labelList.sort(key=lambda x: x[1], reverse=True) allRemappedTestKey[instance] = labelList[0:maxLabels] return allRemappedTestKey with tempfile.NamedTemporaryFile('wt') as fout: lines = [] #+RL goldPath = 'key' goldKey = getGoldKeySENSEVAL2(goldPath) allInstances = [] for _, v in goldKey.items(): for k1, _ in v.items(): allInstances.append(k1) indices = list(range(0, len(allInstances))) random.seed(18) random.shuffle(indices) trainingSets = [set() for _ in range(0, 5)] for i in range(0, len(allInstances)): instance = allInstances[i] toExclude = i % len(trainingSets) for j in range(0, len(trainingSets)): if j != toExclude: trainingSets[j].add(instance) #print(trainingSets) # termToNumberSenses = {} # for e in goldKey.items(): # term = e[0] # senses = set() # for ratings in goldKey[term].values(): # for sense in ratings.keys(): # senses.update(sense) # termToNumberSenses[term] = len(senses) listJTrainingInstances = getTrainingInstances(trainingSets) #TrainingSets(listJTrainingInstances) goldMap = dictToJ(goldKey) lemmaLabeling = {} # print(labeling) for k, v in labeling.items(): lemma = k.split('.')[0] if not lemma in lemmaLabeling: lemmaLabeling[lemma] = {k: v} else: lemmaLabeling[lemma][k] = v labelingMap = dictToJ(lemmaLabeling) lines = [] global_test_key = {} for jTrainingInstances in listJTrainingInstances: testKey = mapSenses(jTrainingInstances, goldMap, labelingMap, maxLabels) # print(sorted(testKey.items(), key= lambda x: x[0])) global_test_key.update(testKey) for instance, label in testKey.items(): clusters_str = ' '.join(x[0].split('.')[1] for x in label[0:maxLabels]) lines.append('%s %s %s' % (instance.split('.')[0], instance, clusters_str)) evaluator = Evaluator(goldKey, global_test_key) evals = evaluator.semeval_2013_task_13_metrics() evalKey = key_path logging.info('writing key to file %s' % evalKey) with open(evalKey, 'w', encoding="utf-8") as fout2: lines = sorted(lines) fout2.write('\n'.join(lines)) scores = get_scores( os.path.join(dir_path, goldPath), #'keys/gold/all.key'), RL goldPath added evalKey) #RL task added scores['all'].update(evals) print(scores) #- # goldPath = 'keys/gold/all.key' # for instance_id, clusters_dict in labeling.items(): # clusters = sorted(clusters_dict.items(), key=lambda x: x[1]) # clusters_str = ' '.join([('%s/%d' % (cluster_name, count)) for cluster_name, count in clusters]) # lemma_pos = instance_id.rsplit('.', 1)[0] # lines.append('%s %s %s' % (lemma_pos, instance_id, clusters_str)) # fout.write('\n'.join(lines)) # fout.flush() # scores = get_scores(os.path.join(dir_path, goldPath), #'keys/gold/all.key'), RL goldPath added # fout.name,task) #RL task added # if key_path: # logging.info('writing key to file %s' % key_path) # with open(key_path, 'w', encoding="utf-8") as fout2: # fout2.write('\n'.join(lines)) return scores
datasets = {} dataset_splits = DatasetSplitter.generate_splits(config) transformations = TransformsGenerator.get_final_transforms(config) for key in dataset_splits: path, batching_config, split = dataset_splits[key] transform = transformations[key] datasets[key] = VideoDataset(path, batching_config, transform, split) trainer = Trainer(config, model, datasets["train"], logger) evaluator = Evaluator(config, datasets["validation"], logger, action_sampler=None, logger_prefix="validation") # Resume training try: trainer.load_checkpoint(model) except Exception as e: logger.print(e) logger.print("Cannot play without loading checkpoint") exit(1) model.eval() dataloader = evaluator.dataloader # Uses validation dataloader #dataset_index = int(input(f"- Insert start sample index in [0, {len(dataloader)}): ")) dataset_index = 0
def train(self, model, hyperparams): """ Trains the passed model on the training set with the specified hyper-parameters. Loss, validation errors or other intermediate results are logged (or printed/plotted during the training) and returned at the end, together with the best weights according to the validation performance. :param model: Model to train. :type model: torch.nn.Module :param hyperparams: Dictionary with all the hyperparameters required for training. :type hyperparams: dict :return: log: Dictionary containing all logs collected during training. :type: log: dict :return: final_val_results: The validation results (all metrics) of the model using the best weights after training finished. :type: final_val_results: dict :return: best_weights: Model weights that performed best on the validation set during the whole training. :type: best_weights: dict :return: example_predictions: Corrected example poses from the validation set collected during training. :type: example_predictions: torch.FloatTensor """ start_time = dt.now() print('Time: ', start_time.strftime('%H:%M:%S')) print() print('Setting things up...') session = TrainingSession(model, hyperparams, self.normalizer) self.train_loader.set_augmenters(hyperparams['augmenters']) helper.print_hyperparameters(hyperparams, self.config['interest_keys'], indent=1) log, example_predictions, log_iterations = self._initialize_logs() print('\n\tChecking initial validation performance:') best_val_performance, best_weights = self._initial_performance(session) print() print('All set, let\'s get started!') for epoch in range(self.config['num_epochs']): running_loss = 0.0 session.schedule_learning_rate() for i, batch in enumerate(self.train_loader): loss, output_batch = session.train_batch(batch) if self.config['log_loss']: log['train']['loss'].append(loss) if self.config['log_grad']: log['train']['grad'].append(self._sum_gradients(model)) if i in log_iterations: train_performance, val_performance = self._intermediate_eval( session, output_batch) self._logging(log, loss, train_performance, val_performance, i) if len(self.config['val_example_indices']) > 0: example_predictions.append( self._example_predictions(session)) running_loss += loss # Training for this epoch finished. session.scheduler_metric = running_loss / self.iters_per_epoch val_performance = session.test_model(self.val_loader) target_performance = Evaluator.means_over_subsets( val_performance)['distance'] if target_performance < best_val_performance: best_val_performance = target_performance best_weights = copy.deepcopy(model.state_dict()) log['best_epoch'] = epoch self._print_epoch_end_info(session, val_performance, start_time, epoch, best_val_performance) model.load_state_dict(best_weights) final_val_results = self._full_evaluation(model, session.params['eval_space']) print() print('-' * 30) print('FINISH') print('Final validation errors:') Evaluator.print_results(final_val_results) print() return log, final_val_results, best_weights, torch.stack( example_predictions).cpu()
import json from evaluation.evaluator import Evaluator if __name__ == '__main__': with open('dataset/dev-predictions-final-it4.json', 'r') as f: bad_format_predictions = json.loads(f.read()) predictions = {} for question_id, predictions_list in bad_format_predictions.iteritems( ): predictions[question_id] = predictions_list[0] evaluator = Evaluator('dataset/dev.json') print evaluator.ExactMatch(predictions) print evaluator.F1(predictions)
training_properties, datasetloader, device) if dataset_properties["checkpoint_path"] is None or dataset_properties[ "checkpoint_path"] == "": logger.info("Train process is starting from scratch!") trainer.train_iters(model) else: checkpoint = torch.load(dataset_properties["checkpoint_path"]) logger.info("Train process is reloading from epoch {}".format( checkpoint["epoch"])) trainer.train_iters(model, checkpoint) elif model_properties["common_model_properties"][ "run_mode"] == "eval_interactive": interactive_evaluator = Evaluator.evaluator_factory( "interactive_evaluator", "cpu") model_path = evaluation_properties["model_path"] sentence_vocab_path = evaluation_properties["sentence_vocab"] category_vocab_path = evaluation_properties["category_vocab"] logger.info( "Interactive evaluation mode for model {}:".format(model_path)) interactive_evaluator.evaluate_interactive( model_path=model_path, sentence_vocab_path=sentence_vocab_path, category_vocab_path=category_vocab_path, preprocessor=preprocessor.preprocess, topk=training_properties["topk"]) logger.info("Done!")
(int(line[0]), int(line[1]), int(line[2]), int(line[3]))) eval_list = [] for word in ["immediately"]: if len(positions[word]) > 1: for position in positions[word]: print "Wort: %s" % word query_width = position[2] - position[0] query_height = position[3] - position[1] width = roundTo(query_width, 10) #print width #print query_height my_finder = Word_finder(sift_step_size, sift_cell_size, sift_n_classes, width, patch_height, patch_hop_size, flatten_dimensions, searchfile, visualize_progress, tf_idf) result = my_finder.search(position) recall = Eva.calculate_recall(positions[word], result, threshold) precision = Eva.calculate_precision(positions[word], result, threshold) avg_precision = Eva.calculate_avg_precision( positions[word], result, threshold) eval_list.append((recall, precision, avg_precision)) print "Wort: %s \nRecall %g \nPrecision %g \navg_precision %g" % ( word, recall, precision, avg_precision) Eva.calculate_mean(eval_list) if __name__ == '__main__': pass