def main(): results = open("result","w") # for idxs in (3,4): # for jdx in (1,2,4,6,8,10): idxs = 3 jdx = 20 d = Parser(utils.TRAIN) # d = Parser(utils.DUM) f = CFeatures(d.sentences, idxs) # f = BFeatures(d.sentences) # print("num of feats: ",f.f_len) w = np.zeros(f.f_len) tt = time() results.write("Training model for {} iterations with threshold {}...\n".format(jdx,idxs)) perc = Perceptron(d.sentences, w, f, utils.MODE) for i in range(jdx): perc.train() w = perc.getW() results.write("time in seconds: {}\n".format(time() - tt)) t = Parser(utils.TEST) inf = Inference(w, t.sentences, f,utils.MODE) inf.tag_text(utils.TEST_R) res = evaluate(utils.TEST, utils.TEST_R) results.write("correct: {}\n\n\n".format(res))
# net_A2.load_state_dict(p1) # net1的参数更新为平均参数 # net_B2.load_state_dict(p1) # net2的参数更新为平均参数 p1 = net_A3.state_dict() # 拿到net1参数字典 p2 = net_B3.state_dict() # 拿到net2参数字典 for key, value in p2.items(): # p1等于两个字典平均 p1[key] = (p1[key] + value) / 2 net_A3.load_state_dict(p1) # net1的参数更新为平均参数 net_B3.load_state_dict(p1) # net2的参数更新为平均参数 loss = (loss1 + loss2) / 2 g1.ndata['h'] = features1 g2.ndata['h'] = features2 upload_en(g1, list_up1) # 重新给服务器上的字典赋值初始特征 upload_en(g2, list_up2) acc1 = evaluate(net_A1, net_A2, net_A3, g, features, labels, test_mask) acc.append(acc1) # print("Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f}".format( # epoch, loss.item(), np.mean(dur))) print( "Epoch {:05d} | Loss {:.4f} | Test Acc {:.4f} | Time(s) {:.4f}".format( epoch, loss.item(), acc1, np.mean(dur))) import pandas as pd acc = pd.DataFrame(data=acc) acc.to_csv('./test2.csv')
def validate(val_loader, net, criterion, optimizer, epoch, iter_num, train_args, visualize): net.eval() val_loss = AverageMeter() gts_all = np.zeros((len(val_loader), int( args['longer_size'] / 2), int(args['longer_size'])), dtype=int) predictions_all = np.zeros((len(val_loader), int( args['longer_size'] / 2), int(args['longer_size'])), dtype=int) for vi, data in enumerate(val_loader): input, gt, slices_info = data assert len(input.size()) == 5 and len(gt.size()) == 4 and len( slices_info.size()) == 3 input.transpose_(0, 1) gt.transpose_(0, 1) slices_info.squeeze_(0) assert input.size()[3:] == gt.size()[2:] count = torch.zeros(int(args['longer_size'] / 2), args['longer_size']) # .cuda() output = torch.zeros(cityscapes.num_classes, int(args['longer_size'] / 2), args['longer_size']) # .cuda() slice_batch_pixel_size = input.size(1) * input.size(3) * input.size(4) for input_slice, gt_slice, info in zip(input, gt, slices_info): input_slice = Variable(input_slice) # .cuda() gt_slice = Variable(gt_slice) # .cuda() output_slice = net(input_slice) assert output_slice.size()[2:] == gt_slice.size()[1:] assert output_slice.size()[1] == cityscapes.num_classes output[:, info[0]:info[1], info[2]:info[3]] += output_slice[ 0, :, :info[4], :info[5]].data gts_all[vi, info[0]:info[1], info[2]:info[3]] += gt_slice[ 0, :info[4], :info[5]].data.cpu().numpy() count[info[0]:info[1], info[2]:info[3]] += 1 val_loss.update( criterion(output_slice, gt_slice).data[0], slice_batch_pixel_size) output /= count gts_all[vi, :, :] /= count.cpu().numpy().astype(int) predictions_all[vi, :, :] = output.max(0)[1].squeeze_(0).cpu().numpy() print('validating: %d / %d' % (vi + 1, len(val_loader))) acc, acc_cls, mean_iu, fwavacc = evaluate(predictions_all, gts_all, cityscapes.num_classes) if val_loss.avg < train_args['best_record']['val_loss']: train_args['best_record']['val_loss'] = val_loss.avg train_args['best_record']['epoch'] = epoch train_args['best_record']['iter'] = iter_num train_args['best_record']['acc'] = acc train_args['best_record']['acc_cls'] = acc_cls train_args['best_record']['mean_iu'] = mean_iu train_args['best_record']['fwavacc'] = fwavacc snapshot_name = 'epoch_%d_iter_%d_loss_%.5f_acc_%.5f_acc-cls_%.5f_mean-iu_%.5f_fwavacc_%.5f_lr_%.10f' % ( epoch, iter_num, val_loss.avg, acc, acc_cls, mean_iu, fwavacc, optimizer.param_groups[1]['lr']) torch.save(net.state_dict(), os.path.join(ckpt_path, exp_name, snapshot_name + '.pth')) torch.save( optimizer.state_dict(), os.path.join(ckpt_path, exp_name, 'opt_' + snapshot_name + '.pth')) if train_args['val_save_to_img_file']: to_save_dir = os.path.join(ckpt_path, exp_name, '%d_%d' % (epoch, iter_num)) check_mkdir(to_save_dir) val_visual = [] for idx, data in enumerate(zip(gts_all, predictions_all)): gt_pil = cityscapes.colorize_mask(data[0]) predictions_pil = cityscapes.colorize_mask(data[1]) if train_args['val_save_to_img_file']: predictions_pil.save( os.path.join(to_save_dir, '%d_prediction.png' % idx)) gt_pil.save(os.path.join(to_save_dir, '%d_gt.png' % idx)) val_visual.extend([ visualize(gt_pil.convert('RGB')), visualize(predictions_pil.convert('RGB')) ]) val_visual = torch.stack(val_visual, 0) val_visual = torchvision.utils.make_grid(val_visual, nrow=2, padding=5) writer.add_image(snapshot_name, val_visual) print( '-----------------------------------------------------------------------------------------------------------' ) print( '[epoch %d], [iter %d], [val loss %.5f], [acc %.5f], [acc_cls %.5f], [mean_iu %.5f], [fwavacc %.5f]' % (epoch, iter_num, val_loss.avg, acc, acc_cls, mean_iu, fwavacc)) print( 'best record: [val loss %.5f], [acc %.5f], [acc_cls %.5f], [mean_iu %.5f], [fwavacc %.5f], [epoch %d], ' '[iter %d]' % (train_args['best_record']['val_loss'], train_args['best_record']['acc'], train_args['best_record']['acc_cls'], train_args['best_record']['mean_iu'], train_args['best_record']['fwavacc'], train_args['best_record']['epoch'], train_args['best_record']['iter'])) print( '-----------------------------------------------------------------------------------------------------------' ) writer.add_scalar('val_loss', val_loss.avg, epoch) writer.add_scalar('acc', acc, epoch) writer.add_scalar('acc_cls', acc_cls, epoch) writer.add_scalar('mean_iu', mean_iu, epoch) writer.add_scalar('fwavacc', fwavacc, epoch) net.train() return val_loss.avg
sampleDatasets_cpf_sv = ["db","cpf","sv"] sampleDatasets_sv = ["db","sv"] #select model and eval functions from models.DeepJet_models_final import conv_model_final as trainingModel from DeepJetCore.training.training_base import training_base from funcs import loadModel, evaluate inputDataset = sampleDatasets_pf_cpf_sv trainDir = opts.d inputTrainDataCollection = opts.t inputTestDataCollection = opts.i LoadModel = True removedVars = None if True: evalModel = loadModel(trainDir,inputTrainDataCollection,trainingModel,LoadModel,inputDataset,removedVars) evalDir = opts.o from DeepJetCore.DataCollection import DataCollection testd=DataCollection() testd.readFromFile(inputTestDataCollection) if os.path.isdir(evalDir): raise Exception('output directory: %s must not exists yet' %evalDir) else: os.mkdir(evalDir) df = evaluate(testd, evalModel, evalDir)
from Inference import * from main import * from funcs import evaluate import utils #inf TEST t = Parser(utils.TEST) d = Parser(utils.TRAIN) # d = Parser("dum") f = Features(d.sentences) # w = np.zeros(f.f_len) w = pickle.load(open(utils.W_VEC, 'rb')) inf = Inference(w, t.sentences, f) inf.tag_text(utils.TEST_R) evaluate(utils.TEST, utils.TEST_R) # # #inf TRAIN # t = Parser(utils.TEST) # d = Parser(utils.TRAIN) # # d = Parser("dum") # f = Features(d.sentences) # # w = np.zeros(f.f_len) # w = pickle.load(open(utils.W_VEC, 'rb')) # inf = Inference(w,d.sentences,f) # inf.tag_text(utils.TRAIN_R) # evaluate(utils.TRAIN, utils.TRAIN_R)
i, j = sample subset = F.sample(ratings, user_counts, movie_counts, i, j) if j == 100: max_k = 25 else: max_k = 45 k_s = range(5, max_k, 5) train, test = train_test(subset) print "Running Baseline, KNN on the dataset with {} users and {} items".format( i, j) base, base_test = F.train_baseline(subset) base_eval = F.evaluate(base, subset, base_test) base_eval['name'] = 'baseline' base_eval['sample'] = sample all_results.append(base_eval) for k in k_s: f = k if f > 25: f = 25 # MF model t_0 = time.time() mf, mf_test = F.train_matrix(subset, f, 5)
k_s = range(5, 60, 5) factor_sizes = range(5, 60, 5) top_k = 5 all_results = [] user_value_counts = ratings['UserId'].value_counts() movie_value_counts = ratings['MovieId'].value_counts() for sample in samples: i, j = sample _dataset = F.sample(ratings, user_value_counts, movie_value_counts, i, j) print "Running Baseline, MF, KNN on the dataset with {} users and {} items".format(i, j) base, base_test = F.train_baseline(_dataset) base_eval = F.evaluate(base, _dataset, base_test) base_eval['name'] = 'baseline' base_eval['sample'] = sample all_results.append(base_eval) for f in factor_sizes: t_0 = time.time() mf, mf_test = F.train_matrix(_dataset, f, 5) print "Running MF with F of {}".format(f) results = F.evaluate(mf, _dataset, mf_test, top_k) # add k, and sample size to results results['sample'] = sample results['f'] = f
plot = False ##################################################### for i in range(2): x = pd.read_csv("../Data/" + datasets[i] + ".csv") xtrain = x ytrain = xtrain["y"].to_numpy() ytest = xtrain["y"].to_numpy() xpos, xneg, x, xtest = funcs.generate_two_classes(xtrain, xtrain) mean_diff = xpos.mean(0) - xneg.mean(0) cov_pos = funcs.covariance_matrix(xpos) cov_neg = funcs.covariance_matrix(xneg) pooled_covariance = cov_pos + cov_neg inverse_pooled_covariance = np.linalg.inv(pooled_covariance) classifier = inverse_pooled_covariance.dot(mean_diff) xpos_transform = xpos.dot(classifier) xneg_transform = xneg.dot(classifier) predictions = funcs.classify(xtest, classifier, thresholds[i], datasets[i]) print('Results for ' + datasets[i]) funcs.evaluate(predictions, ytest, datasets[i]) print('\n\n\n') if plot == True: funcs.plot_normal(xpos_transform, xneg_transform, datasets[i])