def __init__(self, class_train, class_test, min_division=2, min_info_gain=0.05, number_of_trees=15): # Labels are appended to X_train & X_test self.X_train = list(csv.reader(open(class_train, 'r'))) self.X_test = list(csv.reader(open(class_test, 'r'))) # parameter initialization self.number_of_trees = number_of_trees self.min_info_gain = min_info_gain self.min_division = min_division # feature names feature_number_for_each_tree = math.floor(math.sqrt(7)) forest = [] # initialization of decision trees for i in range(number_of_trees): feature_indexes = random.sample(range(1, 7), feature_number_for_each_tree) tree = DecisionTree(self.get_random_subsets(feature_indexes), feature_indexes[0], feature_indexes[1], min_info_gain, min_division) # Add tree instance to forest forest.append(tree) # making predictions and calculating the f1 score pred = self.prediction(forest, number_of_trees) f1_score([row[8] for row in self.X_test], pred)
def test(opt, model, test_dataloader, threshold=config.PRED_THRESHOLD, record_result=False, analysis_result=False, mode=config.TEST): device = torch.device(config.CUDA) if torch.cuda.is_available() else "cpu" model.eval() bgn = 0 test_iter = iter(test_dataloader) gold_all, pred_all = [], [] hierarchical_types = pickle.load( open(config.DATA_ROOT + opt.corpus_dir + "hierarchical_types.pkl", 'rb')) for batch in test_iter: # mention, mention_len, mention_neighbor, lcontext, rcontext, y = batch mention, mention_len, lcontext, rcontext, mention_char, y = batch mention = mention.to(device) mention_len = mention_len.to(device) lcontext = lcontext.to(device) rcontext = rcontext.to(device) mention_char = mention_char.to(device) y = y.to(device) model_output = model( [mention, mention_len, lcontext, rcontext, mention_char]) loss, gold, pred, prob = bce_loss(model, model_output, y, opt, hierarchical_types, threshold) if record_result: util.record_result(gold, pred, prob, opt, bgn, mode) gold_all.append(gold) pred_all.append(pred) bgn += opt.batch_size gold_all = torch.cat(gold_all) pred_all = torch.cat(pred_all) if analysis_result: util.analysis_result(gold_all, pred_all) pmacro, remacro = e.loose_macro_PR(gold_all, pred_all, opt) pmicro, remicro = e.loose_micro_PR(gold_all, pred_all, opt) pstrict, restrict = e.strict_PR(gold_all, pred_all, opt) macro_F1 = e.f1_score(pmacro, remacro) micro_F1 = e.f1_score(pmicro, remicro) strict_F1 = e.f1_score(pstrict, restrict) return (macro_F1, pmacro, remacro), \ (micro_F1, pmicro, remicro), \ (strict_F1, pstrict, restrict)
def test_f1_score(self): target = FloatTensor([[1, 1, 1, 0, 1], [1, 0, 1, 0, 1]]) pred = FloatTensor([[1, -1, 1, -1, 1], [-1, 1, 1, -1, -1]]) f1_macro_score, f1_micro = f1_score(target, pred, 5, threshold=0.5) self.assertAlmostEqual(7 / 15, f1_macro_score, 5) self.assertAlmostEqual(2 / 3, f1_micro, 5) pred = FloatTensor([[1, 0, 1, 0, 1], [0, 1, 1, 0, 0]]) f1_macro_score, f1_micro = f1_score(target, pred, 5, use_threshold=False) self.assertAlmostEqual(7 / 15, f1_macro_score, 5) self.assertAlmostEqual(2 / 3, f1_micro, 5)
def get_metric(logits, data): start_logits, end_logits = logits start_logits, end_logits = start_logits.squeeze().cpu( ), end_logits.squeeze().cpu() answers = data['answer'] contexts = data['context'] offsets = data['offset_mapping'] cw_ids = data['cw_ids'] mask = torch.zeros_like(cw_ids) != cw_ids mask = mask.type(torch.float32) start_logits = mask * start_logits + (1 - mask) * _neg_inf end_logits = mask * end_logits + (1 - mask) * _neg_inf pred_starts = torch.argmax(start_logits, dim=1) pred_ends = torch.argmax(end_logits, dim=1) f1 = 0 n_samples = len(answers) for i in range(n_samples): true_answer = answers[i] start_token_idx, end_token_idx = pred_starts[i], pred_ends[i] start_char_idx, end_char_idx = offsets[i][start_token_idx][0], offsets[ i][end_token_idx][1] pred_answer = contexts[i][start_char_idx:end_char_idx + 1] f1 += f1_score(pred_answer, true_answer) f1_result = 100.0 * f1 / n_samples return f1_result
def evaluate(self, x, y): self.eval() output = self(x) f1_macro, f1_micro = f1_score(y, output, self.number_of_class, use_threshold=True, threshold=self.best_threshold) f1_macro = f1_macro.data.cpu().numpy()[0] f1_micro = f1_micro.data.cpu().numpy()[0] return f1_macro, f1_micro
def __init__(self,class_train="Classification_Train.csv",class_test="Classification_Test_Data.csv",max_depth=5,min_info_gain=0.03,number_of_trees=15): # Labels are appended to X_train & X_test self.X_train=list(csv.reader(open(class_train,'r'))) self.X_test=list(csv.reader(open(class_test,'r'))) # parameter initialization self.number_of_trees=number_of_trees self.min_info_gain=min_info_gain self.max_depth=max_depth # feature names feature_number_for_each_tree=math.floor(math.sqrt(7)) forest=[] # initialization of decision trees for i in range(number_of_trees): feature_indexes=random.sample(range(1,8),feature_number_for_each_tree) tree=DecisionTree(self.get_random_subsets(feature_indexes),feature_indexes[0],feature_indexes[1],min_info_gain,max_depth) # Decision tree class automatically calls the train method forest.append(tree) # making predictions and calculating the f1 score pred=self.prediction(forest,number_of_trees) self.f1=f1_score([row[8] for row in self.X_test],pred)
def SVM_machine(class_train, class_test): print("\nSVM Initialization\n") # dividing the train data as input and output for SVM algorithm [X, y] = calculateInputOutput( convertSVM(list(csv.reader(open(class_train, 'r'))))) # creating SVM object from SupportVectorMachine class SVM = SupportVectorMachine() # fitting the train data to the SVM machine # calculating the 5 fold cross validation SVM.crossValidation() # dividing the test data as input and output for SVM algorithm [X_t, y_t] = calculateInputOutput( convertSVM(list(csv.reader(open(class_test, 'r'))))) # classifying the test data with the SVM machine y_p = SVM.predict(X_t) # printing the label predictions print('The SVM predictions are:') print(np.array(y_p)) #calculating the prediction error on the test set and printing error = classError(y_p, y_t) print('\nThe SVM prediction error percentage is {0:.2f} \n'.format(error)) score = f1_score(y_t, y_p) print('f1(SVM) = {0:.2f} \n'.format(score)) return
def evaluate(self, criterion): """Evaluates the model on a validation set. Args: model: A PyTorch model. val_loader: A DataLoader to the evluation data set. device: The CUDA device being used. criterion: Loss function for the model. Returns: A tuple of the (F1-Score, Accuracy, Total Loss) on the validation set. """ total_loss = 0.0 tp, fp, fn, tn = 0, 0, 0, 0 self.model.eval() with torch.no_grad(): since = time.time() for i, (images, labels) in enumerate(self.val_loader): images = images.to(self.device) labels = labels.to(self.device).long().flatten() # Forward propagate and evaluate outputs = self.model(images) loss = criterion(outputs, labels) # Compute class probabilities -> predictions probabilities = self.model.log_softmax(outputs) predictions = torch.argmax(probabilities, dim=1) # Compute confusion matrix terms tp += (predictions[labels == 1] == 1).sum().item() fp += (predictions[labels == 0] == 1).sum().item() fn += (predictions[labels == 1] == 0).sum().item() tn += (predictions[labels == 0] == 0).sum().item() # Compute total loss total_loss += loss.item() # Compute model accuracy and f1-score accuracy = (tp + tn) / (tp + fp + fn + tn + 1e-10) score = f1_score(tp, fp, fn, tn) return score, accuracy, total_loss
# 画像を選択 index = 4 image_path = os.path.join(image_dir, image_files[index]) img = cv2.imread(image_path) gt = annotation[index] answer = ans[index] # 予測 confidence_threshold = 0.25 out = ssd.predict(image_path) out = [pred for pred in out if pred["score"]>=confidence_threshold] out = sorted(out, key=lambda x:x["score"], reverse=True) # 評価 pred = [p['bbox'] for p in out] evaluation = f1_score(pred, answer) print('f1score:', evaluation) #表示 plot_bbox(img, gt, out) print("1つ目:正解と予測の重複度を表すIoUが規定値(0.5)に達していないことから, 誤検出(FP)となっており、正解bboxも検出できていない為、未検出(FN)") ################## # 画像を選択 index = 4 image_path = os.path.join(image_dir, image_files[index]) img = cv2.imread(image_path) gt = annotation[index] answer = ans[index]
def fit(self, train, test, verbose=False): """ The general training loop to fit the model Parameters ---------- train: :class:`spotlight.interactions.Interactions` training instances, also contains test sequences test: :class:`spotlight.interactions.Interactions` only contains targets for test sequences verbose: bool, optional print the logs """ # convert to sequences, targets and users sequences_np = train.sequences.sequences targets_np = train.sequences.targets users_np = train.sequences.user_ids.reshape(-1, 1) L, T = train.sequences.L, train.sequences.T n_train = sequences_np.shape[0] output_str = 'total training instances: %d' % n_train print(output_str) if not self._initialized: self._initialize(train) start_epoch = 0 best_map = 0 ### create directory if not exists save_dir = args.save_root + args.dataset + '/' if not os.path.exists(save_dir): os.makedirs(save_dir) results = pd.DataFrame() #results_odd = pd.DataFrame() for epoch_num in range(start_epoch, self._n_iter): t1 = time() # set model to training mode self._net.train() users_np, sequences_np, targets_np = shuffle(users_np, sequences_np, targets_np) negatives_np = self._generate_negative_samples(users_np, train, n=self._neg_samples) # convert numpy arrays to PyTorch tensors and move it to the corresponding devices users, sequences, targets, negatives = (torch.from_numpy(users_np).long(), torch.from_numpy(sequences_np).long(), torch.from_numpy(targets_np).long(), torch.from_numpy(negatives_np).long()) users, sequences, targets, negatives = (users.to(self._device), sequences.to(self._device), targets.to(self._device), negatives.to(self._device)) epoch_loss = 0.0 for (minibatch_num, (batch_users, batch_sequences, batch_targets, batch_negatives)) in enumerate(minibatch(users, sequences, targets, negatives, batch_size=self._batch_size)): items_to_predict = torch.cat((batch_targets, batch_negatives), 1) items_prediction = self._net(batch_sequences, batch_users, items_to_predict) (targets_prediction, negatives_prediction) = torch.split(items_prediction, [batch_targets.size(1), batch_negatives.size(1)], dim=1) self._optimizer.zero_grad() # compute the binary cross-entropy loss positive_loss = -torch.mean( torch.log(torch.sigmoid(targets_prediction))) negative_loss = -torch.mean( torch.log(1 - torch.sigmoid(negatives_prediction))) loss = positive_loss + negative_loss epoch_loss += loss.item() loss.backward() self._optimizer.step() epoch_loss /= minibatch_num + 1 parameterset = {} t2 = time() if verbose: #and (epoch_num + 1) % 2 == 0: precision, recall, mean_aps = evaluate_ranking(self, test, train, k=[1, 5, 10]) output_str = "Epoch %d [%.1f s]\tloss=%.4f, map=%.4f, " \ "prec@1=%.4f, prec@5=%.4f, prec@10=%.4f, " \ "recall@1=%.4f, recall@5=%.4f, recall@10=%.4f,"\ "f1_score@1=%.4f,f1_score@5=%.4f,f1_score@10=%.4f,[%.1f s]" % (epoch_num + 1, t2 - t1, epoch_loss, mean_aps, np.mean(precision[0]), np.mean(precision[1]), np.mean(precision[2]), np.mean(recall[0]), np.mean(recall[1]), np.mean(recall[2]), f1_score(np.mean(precision[0]),np.mean(recall[0])), f1_score(np.mean(precision[1]),np.mean(recall[1])), f1_score(np.mean(precision[2]),np.mean(recall[2])), time() - t2) parameterset["Epoch"] = epoch_num + 1 parameterset["time1"] = t2 - t1 parameterset["loss"] = epoch_loss parameterset["map"] = mean_aps parameterset["prec@1"] = np.mean(precision[0]) parameterset["prec@5"] = np.mean(precision[1]) parameterset["prec@10"] = np.mean(precision[2]) parameterset["recall@1"] = np.mean(recall[0]) parameterset["recall@5"] = np.mean(recall[1]) parameterset["recall@10"] = np.mean(recall[2]) parameterset["f1_score@1"] = f1_score(np.mean(precision[0]),np.mean(recall[0])) parameterset["f1_score@5"] = f1_score(np.mean(precision[1]),np.mean(recall[1])) parameterset["f1_score@10"] = f1_score(np.mean(precision[2]),np.mean(recall[2])) parameterset["time2"] = time() - t2 results = results.append(parameterset, ignore_index=True) print(output_str) if mean_aps > best_map: best_map = mean_aps checkpoint_name = "best_model.pth.tar" save_checkpoint({ 'epoch': epoch_num+1, 'state_dict': self._net.state_dict(), 'optimizer': self._optimizer.state_dict(), }, checkpoint_name, save_dir) #else: # output_str = "Epoch %d [%.1f s]\tloss=%.4f [%.1f s]" % (epoch_num + 1, # t2 - t1, # epoch_loss, # time() - t2) # parameterset["Epoch"] = epoch_num + 1 # parameterset["time1"] = t2 - t1 # parameterset["loss"] = epoch_loss # parameterset["time2"] = time() - t2 # results_odd = results_odd.append(parameterset, ignore_index=True) # print(output_str) print ('***** Best map:{0:.4f} *****'.format(best_map)) #results_odd.to_csv("results/Odd_ml1m", index=False) results.to_csv("results/ml1m_hold", index=False)
def train(opt, model, optim, tr_dataloader, test_dataloader, dev_dataloader, lr_scheduler, logger): device = torch.device( config.CUDA) if torch.cuda.is_available() and opt.cuda else "cpu" best_state = None train_loss = [] train_f = [] best_f = 0 best_t_macro_f, best_t_micro_f, best_t_strict_f = 0, 0, 0 best_model_path = os.path.join(opt.experiment_root, "best_model.pth") last_model_path = os.path.join(opt.experiment_root, "last_model.pth") p = config.DATA_ROOT + opt.corpus_dir + "hierarchical_types.pkl" prior = torch.tensor(util.create_prior(p), requires_grad=False, dtype=torch.long).to(device) tune = torch.tensor(util.create_prior(p, config.BETA), requires_grad=False, dtype=torch.float).to(device) mask = torch.tensor(util.create_mask(p), requires_grad=False, dtype=torch.long).to(device) for epoch in range(opt.epochs): # logger.info(f"epoch: {epoch}") print(f"====Epoch: {epoch}====") tr_iter = iter(tr_dataloader) model.train() for batch in tqdm(tr_iter): optim.zero_grad() mention, mention_len, mention_neighbor, lcontext, rcontext, y = batch mention = mention.to(device) mention_len = mention_len.to(device) mention_neighbor = mention_neighbor.to(device) lcontext = lcontext.to(device) rcontext = rcontext.to(device) y = y.to(device) model_output = model( [mention, mention_len, mention_neighbor, lcontext, rcontext]) # loss, gold, pred = customized_bce_loss(model, model_output, y, opt, hierarchical_types) loss, gold, pred = bce_loss(model, model_output, y, opt, "train") # loss, gold, pred = hier_loss(model, model_output, y, opt, tune, prior, mask) train_loss.append(float(loss.item())) precision, recall = e.loose_macro_PR(gold, pred, opt) train_f.append(e.f1_score(float(precision), float(recall))) loss.backward() optim.step() # lr_scheduler.step() avg_loss = np.mean(train_loss) avg_f = np.mean(train_f) print(f"Avg train loss: {avg_loss}, Avg train macro-f1 score: {avg_f}") if dev_dataloader is not None: dev_ma, dev_mi, dev_str = test(opt, model, dev_dataloader) print( f"Model acc in dev data:\n" f" \nmacro: F1: {dev_ma[0]}, P: {dev_ma[1]}, R: {dev_ma[2]}" f" \nmicro: F1: {dev_mi[0]}, P: {dev_mi[1]}, R: {dev_mi[2]}" f" \nstrict: F1: {dev_str[0]}, P: {dev_str[1]}, R: {dev_str[2]}" ) if test_dataloader is not None: test_ma, test_mi, test_str = test(opt, model, test_dataloader, record_result=False) print( f"Model acc in test data:\n" f" \nmacro: F1: {test_ma[0]}, P: {test_ma[1]}, R: {test_ma[2]}" f" \nmicro: F1: {test_mi[0]}, P: {test_mi[1]}, R: {test_mi[2]}" f" \nstrict: F1: {test_str[0]}, P: {test_str[1]}, R: {test_str[2]}" ) if best_t_macro_f + best_t_micro_f + best_t_strict_f < test_ma[ 0] + test_mi[0] + test_str[0]: best_t_macro_f, best_t_micro_f, best_t_strict_f = test_ma[ 0], test_mi[0], test_str[0] best_state = model.state_dict() print(f"save best model in: {best_model_path}") torch.save(best_state, best_model_path) print( f"Best Model F values:" f"\nmacro: {best_t_macro_f}, micro: {best_t_micro_f}, strict: {best_t_strict_f}" ) torch.save(model.state_dict(), last_model_path)
def test(opt, model, test_dataloader, record_result=False): device = torch.device(config.CUDA) if torch.cuda.is_available() else "cpu" model.eval() macro_F1, micro_F1, strict_F1 = 0, 0, 0 pmacro, remacro = 0, 0 pmicro, remicro = 0, 0 pstrict, restrict = 0, 0 bgn = 0 total = len(test_dataloader) test_iter = iter(test_dataloader) p = config.DATA_ROOT + opt.corpus_dir + "hierarchical_types.pkl" prior = torch.tensor(util.create_prior(p), requires_grad=False, dtype=torch.long).to(device) tune = torch.tensor(util.create_prior(p, config.BETA), requires_grad=False, dtype=torch.float).to(device) mask = torch.tensor(util.create_mask(p), requires_grad=False, dtype=torch.long).to(device) for batch in test_iter: mention, mention_len, mention_neighbor, lcontext, rcontext, y = batch mention = mention.to(device) mention_len = mention_len.to(device) mention_neighbor = mention_neighbor.to(device) lcontext = lcontext.to(device) rcontext = rcontext.to(device) y = y.to(device) model_output = model( [mention, mention_len, mention_neighbor, lcontext, rcontext]) # loss, gold, pred = customized_bce_loss(model, model_output, y, opt, hierarchical_types) loss, gold, pred = bce_loss(model, model_output, y, opt, "test") # loss, gold, pred = hier_loss(model, model_output, y, opt, tune, prior, mask) if record_result: util.record_result(gold, pred, opt, bgn) bgn += opt.batch_size pma, rema = e.loose_macro_PR(gold, pred, opt) macro_F1 += e.f1_score(pma, rema) pmacro += pma remacro += rema pmi, remi = e.loose_micro_PR(gold, pred, opt) micro_F1 += e.f1_score(pmi, remi) pmicro += pmi remicro += remi pstr, restr = e.strict_PR(gold, pred, opt) strict_F1 += e.f1_score(pstr, restr) pstrict += pstr restrict += restr return (macro_F1/total, pmacro/total, remacro/total), \ (micro_F1/total, pmicro/total, remicro/total), \ (strict_F1/total, pstrict/total, restrict/total)
def test(opt, model, test_dataloader, record_result=False, analysis_result=False, mode=config.TEST): device = torch.device(config.CUDA) if torch.cuda.is_available() else "cpu" model.eval() macro_F1, micro_F1, strict_F1 = 0, 0, 0 pmacro, remacro = 0, 0 pmicro, remicro = 0, 0 pstrict, restrict = 0, 0 bgn = 0 total = len(test_dataloader) test_iter = iter(test_dataloader) gold_all, pred_all = [], [] p = config.DATA_ROOT + opt.corpus_dir + "hierarchical_types.pkl" hierarchical_types = pickle.load(open(p, 'rb')) for batch in test_iter: # mention, mention_len, mention_neighbor, lcontext, rcontext, y = batch mention, mention_len, lcontext, rcontext, mention_char, y = batch mention = mention.to(device) mention_len = mention_len.to(device) # mention_neighbor = mention_neighbor.to(device) lcontext = lcontext.to(device) rcontext = rcontext.to(device) mention_char = mention_char.to(device) # feature = feature.to(device) y = y.to(device) model_output = model( [mention, mention_len, lcontext, rcontext, mention_char]) loss, gold, pred, prob = bce_loss(model, model_output, y, opt, hierarchical_types) if record_result: util.record_result(gold, pred, prob, opt, bgn) if analysis_result: gold_all.append(gold) pred_all.append(pred) bgn += opt.batch_size pma, rema = e.loose_macro_PR(gold, pred, opt) macro_F1 += e.f1_score(pma, rema) pmacro += pma remacro += rema pmi, remi = e.loose_micro_PR(gold, pred, opt) micro_F1 += e.f1_score(pmi, remi) pmicro += pmi remicro += remi pstr, restr = e.strict_PR(gold, pred, opt) strict_F1 += e.f1_score(pstr, restr) pstrict += pstr restrict += restr if analysis_result: util.analysis_result(torch.cat(gold_all), torch.cat(pred_all)) return (macro_F1/total, pmacro/total, remacro/total), \ (micro_F1/total, pmicro/total, remicro/total), \ (strict_F1/total, pstrict/total, restrict/total)