def main(): cnn = CNN() cnn.eval() cnn.load_state_dict(torch.load('model.pkl')) print("load cnn net.") eval_dataloader = dataset.get_eval_data_loader() correct = 0 total = 0 for i, (images, labels) in enumerate(eval_dataloader): image = images vimage = Variable(image) predict_label = cnn(vimage) c0 = setting.ALL_CHAR_SET[np.argmax( predict_label[0, 0:setting.ALL_CHAR_SET_LEN].data.numpy())] c1 = setting.ALL_CHAR_SET[np.argmax( predict_label[0, setting.ALL_CHAR_SET_LEN:2 * setting.ALL_CHAR_SET_LEN].data.numpy())] c2 = setting.ALL_CHAR_SET[np.argmax( predict_label[0, 2 * setting.ALL_CHAR_SET_LEN:3 * setting.ALL_CHAR_SET_LEN].data.numpy())] c3 = setting.ALL_CHAR_SET[np.argmax( predict_label[0, 3 * setting.ALL_CHAR_SET_LEN:4 * setting.ALL_CHAR_SET_LEN].data.numpy())] predict_label = '%s%s%s%s' % (c0, c1, c2, c3) true_label = encoding.decode(labels.numpy()[0]) total += labels.size(0) if (predict_label == true_label): correct += 1 if (total % 200 == 0): print('Test Accuracy of the model on the %d eval images: %f %%' % (total, 100 * correct / total)) print('Test Accuracy of the model on the %d eval images: %f %%' % (total, 100 * correct / total)) return correct / total
def main(): cnn = CNN() cnn.eval() cnn.load_state_dict(torch.load('model.pkl')) print("load cnn net.") predict_dataloader = dataset.get_predict_data_loader() # vis = Visdom() for i, (images, labels) in enumerate(predict_dataloader): image = images vimage = Variable(image) predict_label = cnn(vimage) c0 = captcha_setting.ALL_CHAR_SET[np.argmax( predict_label[0, 0:captcha_setting.ALL_CHAR_SET_LEN].data.numpy())] c1 = captcha_setting.ALL_CHAR_SET[np.argmax( predict_label[0, captcha_setting.ALL_CHAR_SET_LEN:2 * captcha_setting.ALL_CHAR_SET_LEN].data.numpy())] c2 = captcha_setting.ALL_CHAR_SET[np.argmax( predict_label[0, 2 * captcha_setting.ALL_CHAR_SET_LEN:3 * captcha_setting.ALL_CHAR_SET_LEN].data.numpy())] c3 = captcha_setting.ALL_CHAR_SET[np.argmax( predict_label[0, 3 * captcha_setting.ALL_CHAR_SET_LEN:4 * captcha_setting.ALL_CHAR_SET_LEN].data.numpy())] c = '%s%s%s%s' % (c0, c1, c2, c3) print(c)
def predict(): device = "cuda:0" if torch.cuda.is_available() else "cpu" df = pd.read_csv(config.TEST_PATH, header=None) dataset = PicDataset(df.loc[:, 1:]) preds = np.zeros((len(dataset), 256)) for i in range(5): temp = np.zeros((len(dataset), 256)) model = CNN() model.load_state_dict(torch.load(f'./models/model_{i}.bin')) model.to(device) model.eval() for j in range(len(dataset)): x, _ = dataset[j] x = x.to(device).unsqueeze(0) y = model(x) temp[j, :] = y.detach().cpu().numpy().reshape(1, -1) preds += temp preds /= 5 df = pd.DataFrame(np.concatenate([np.arange(1, 921).reshape(-1, 1), preds], axis=1), columns=np.arange(257)) df[0] = df[0].astype('int') df.to_csv('./predictions.csv', index=False)
def display_dataset(model_path, num_feat): if model_path is not None: model = CNN(kernel=3, num_features=num_feat) model.load_state_dict(torch.load(model_path)) model.eval() else: model = None [trainl, _, _], [traind, testd] = initialize_loader(6, num_workers=1, shuffle=False) testd.visualize_images(delay=1200, model=model, start=0, scale=2)
def predict(model:CNN, image:np.ndarray, device:torch.device): image_resized = cv.resize(image, (28, 28)) image_tensor = torch.from_numpy(image_resized).to(device, torch.float) tensor_norm = image_tensor / 255 input = tensor_norm.view((1, 1, 28, 28)) with torch.no_grad(): model.eval() model.to(device, torch.float) outputs = model(input) _, pred = outputs.max(1) return pred
def main(args): print("Start!") model = CNN(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, is_train=False) model.load_state_dict(torch.load(args.model_path, map_location=device)) model = model.to(device) print("Load model success!") vocab, _, _ = data_loader(amazon_root, load_val=False, load_train=False) print("Load vocab success!") label_list = [] context_list = [] model.eval() def get_vec(sentence, min_len=5): tokenized = [tok.text for tok in spacy_en.tokenizer(sentence)] if len(tokenized) < min_len: tokenized += ['<pad>'] * (min_len - len(tokenized)) indexed = [vocab.stoi[t] for t in tokenized] tensor = torch.LongTensor(indexed).to(device) tensor = tensor.unsqueeze(1) with torch.no_grad(): res = model(tensor) return res print("Start transfer to vector!") with open(args.input_yelp_file, 'rt', encoding='utf-8') as fin: csv_header = csv.reader(fin, delimiter=',') for i, row in enumerate(csv_header): label_list.append(row[0]) context_list.append(get_vec(row[1]).cpu()) label_list = label_list[1:] context_list = context_list[1:] "Start to write to json!" with open(args.output_json_file, 'wt') as fout: for i, context in enumerate(context_list): average_vec_dict = {} average_vec_dict['label'] = str(label_list[i]) average_vec_dict['avg_vec'] = context_list[i].squeeze(0).numpy().tolist() json.dump(average_vec_dict, fout) fout.write('\n')
def test(img_path, model_path, use_gpu = False): """!!! Useless !!! """ model = CNN() model.load(model_path) if use_gpu: model.cuda() char_table = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" # 把模型设为验证模式 from torchvision import transforms as T transforms = T.Compose([ T.Resize((128,128)), T.ToTensor(), ]) data = dataset.transforms(img_path, pre=False).unsqueeze(dim=0) model.eval() with torch.no_grad(): if use_gpu: data = data.cuda() score = model(data) score = decode(score) score = ''.join(map(lambda i: char_table[i], score[0])) return score
class Classifier: def __init__(self, ds_path, lr, iterations, batch_size, hidden_layers_out, print_freq, save_dir, momentum, dropout): self.train_data = torchvision.datasets.MNIST(ds_path, train=True, transform=transforms.ToTensor(), download=True) self.test_data = torchvision.datasets.MNIST(ds_path, train=False, transform=transforms.ToTensor(), download=True) self.train_loader = torch.utils.data.DataLoader(self.train_data, batch_size=batch_size, shuffle=True) self.test_loader = torch.utils.data.DataLoader(self.test_data, batch_size=batch_size) self.save_dir = save_dir self.is_momentum = (momentum != 0.0) # Set Model Hyperparameters self.learning_rate = lr self.iterations = iterations self.print_freq = print_freq self.model = CNN(hidden_layers_out, dropout=dropout) self.cuda = torch.cuda.is_available() if self.cuda: self.model = self.model.cuda() def train(self, momentum, nesterov, weight_decay): train_loss_hist = [] train_acc_hist = [] test_loss_hist = [] test_acc_hist = [] best_pred = 0.0 end = time.time() for itr in range(self.iterations): self.model.train() if self.is_momentum: optimizer = optim.SGD(self.model.parameters(), lr=self.learning_rate, momentum=momentum, nesterov=nesterov, weight_decay=weight_decay) else: optimizer = optim.SGD(self.model.parameters(), lr=self.learning_rate) losses = AverageMeter() batch_time = AverageMeter() top1 = AverageMeter() for i, (x_batch, y_batch) in enumerate(self.train_loader): # Compute output for example logits = self.model(x_batch) loss = self.model.loss(logits, y_batch) # Update Mean loss for current iteration losses.update(loss.item(), x_batch.size(0)) prec1 = self.accuracy(logits.data, y_batch, k=1) top1.update(prec1.item(), x_batch.size(0)) # compute gradient and do SGD step loss.backward() optimizer.step() # Set grads to zero for new iter optimizer.zero_grad() batch_time.update(time.time() - end) end = time.time() if i % self.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc {top1.val:.3f} ({top1.avg:.3f})'.format( itr, i, len(self.train_loader), batch_time=batch_time, loss=losses, top1=top1)) # evaluate on validation set test_loss, test_prec1 = self.test(self.test_loader) train_loss_hist.append(losses.avg) train_acc_hist.append(top1.avg) test_loss_hist.append(test_loss) test_acc_hist.append(test_prec1) # Store best model is_best = best_pred < test_prec1 if is_best: best_pred = test_prec1 self.save_checkpoint(is_best, (itr+1), self.model.state_dict(), self.save_dir) return (train_loss_hist, train_acc_hist, test_loss_hist, test_acc_hist) def test(self, batch_loader): self.model.eval() losses = AverageMeter() batch_time = AverageMeter() top1 = AverageMeter() end = time.time() for i, (x_batch,y_batch) in enumerate(batch_loader): with torch.no_grad(): logits = self.model(x_batch) loss = self.model.loss(logits, y_batch) # Update Mean loss for current iteration losses.update(loss.item(), x_batch.size(0)) prec1 = self.accuracy(logits.data, y_batch, k=1) top1.update(prec1.item(), x_batch.size(0)) batch_time.update(time.time() - end) end = time.time() if i % self.print_freq == 0: print('Epoch: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc {top1.val:.3f} ({top1.avg:.3f})'.format( i, len(batch_loader), batch_time=batch_time, loss=losses, top1=top1)) print(' * Acc {top1.avg:.3f}'.format(top1=top1)) return (losses.avg, top1.avg) def accuracy(self, output, y, k=1): """Computes the precision@k for the specified values of k""" # Rehape to [N, 1] target = y.view(-1, 1) _, pred = torch.topk(output, k, dim=1, largest=True, sorted=True) correct = torch.eq(pred, target) return torch.sum(correct).float() / y.size(0) def save_checkpoint(self, is_best, epoch, state, save_dir, base_name="chkpt_plain"): """Saves checkpoint to disk""" directory = save_dir filename = base_name + ".pth.tar" if not os.path.exists(directory): os.makedirs(directory) filename = directory + filename torch.save(state, filename) if is_best: shutil.copyfile(filename, directory + base_name + '__model_best.pth.tar')
torch.load(os.path.join(MNIST_tran_ini, 'CNN={}.pth'.format('animal')))) optimizer = optim.SGD(network.parameters(), lr=1e-3, momentum=0.9, weight_decay=stat['weight_decay']) # In[11]: ###set up before transfer learning stat['T'] = int((len(stat['source']) / stat['bsize']) * stat['n_epochs']) stat['interval'] = int(stat['T'] / 50) stat['la'][0] = 0 ######### p_{w_0}( y| x ) on source task start = time.time() network.eval() dsize = stat['dsize'] with torch.no_grad(): #for data in testloader: k1 = 0 for isource, ds in enumerate(stat['svl']): k2 = 0 xs, ys = ds xs, ys = xs.to(stat['dev']).unsqueeze(1), ys.to( stat['dev']).unsqueeze(1) for itarget, dt in enumerate(stat['tvl']): xt, yt = dt xt, yt = xt.to(stat['dev']).unsqueeze(0), yt.to( stat['dev']).unsqueeze(0) xmix = (xs.repeat(1, dsize, 1, 1, 1)).mul(1 - stat['la'][0]) + ( xt.repeat(dsize, 1, 1, 1, 1)).mul(stat['la'][0])
model.train() # Make sure the model is in the correct state (for Dropout...) for data in train_loader: optimizer.zero_grad() # We should set zero to optimizer train_pred = model(data[0].cuda()) batch_loss = loss(train_pred, data[1].cuda()) # Be careful that pred and label must be on CPU or GPU simultaneously. batch_loss.backward() # Use back propagation to calculate gradient optimizer.step() # The function can be called once the gradients are computed using e.g. backward() train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy()) train_loss += batch_loss.item() train_loss = train_loss / train_set.__len__() * BATCH train_acc /= train_set.__len__() if sys.argv[3] == '1': model.eval() with torch.no_grad(): # To tell Pytorch not to trace gradient for data in val_loader: val_pred = model(data[0].cuda()) batch_loss = loss(val_pred, data[1].cuda()) val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy()) val_loss += batch_loss.item() val_loss = val_loss / val_set.__len__() * BATCH val_acc /= val_set.__len__() print("\nEpoch = %03d/%03d, loss = %3.6f, acc = %3.6f, val_loss = %3.6f, val_acc = %3.6f" % (epoch + 1, EPOCH, train_loss, train_acc, val_loss, val_acc), flush=True) if val_acc > best_acc: print("Saving model...") torch.save(model.state_dict(), "model.pkl") best_acc = val_acc
for i in trange(batch_num, desc='get mini_batch data'): indices = index_array[i * batch_size:(i + 1) * batch_size] examples = [data[idx] for idx in indices] examples = sorted(examples, key=lambda x: len(x[1]), reverse=True) src_sents = [e for e in examples] yield src_sents def set_seed(): random.seed(3344) np.random.seed(3344) torch.manual_seed(3344) if torch.cuda.is_available(): torch.cuda.manual_seed(3344) vocab = Vocab.load('./vocab.json') label_map = vocab.labels set_seed() cnn_model = CNN(len(vocab.vocab), 300, 100, [2, 3, 4], len(label_map), dropout=0.2) cnn_model.load_state_dict( torch.load('classifa-best-CNN.th', map_location={'cuda:6': 'cuda:0'})) cnn_model.to(device) cnn_model.eval() if __name__ == '__main__': pass
graphs = Variable(graphs).cuda() labels = Variable(labels).cuda() optimizer.zero_grad() outputs = cnn(graphs) if torch.cuda.is_available(): loss = criterion(outputs, labels.cuda()) else: loss = criterion(outputs, labels) loss.backward() optimizer.step() total_loss += loss.data if epoch % 10 == 0: print("Epoch %i: Loss = %.2f" % (epoch, total_loss)) # Test the Model cnn.eval() correct = 0 total = 0 for graphs, labels in test_loader: graphs = Variable(graphs).cuda() outputs = cnn(graphs) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) if torch.cuda.is_available(): correct += (predicted == labels.cuda()).sum() else: correct += (predicted == labels).sum() test_acc = (100 * correct / total) test_accs.append(test_acc)
def train_test(Q, x_train, x_test, y_train, y_test, batch_size): train_loader, test_loader = create_train_test_loaders( Q, x_train, x_test, y_train, y_test, batch_size) cnn = CNN(input_size=num_filters, hidden_size=hidden_size, num_classes=np.unique(y).size, dim=dim, num_kernels=num_kernels, max_document_length=max_document_length) if torch.cuda.is_available(): cnn.cuda() if torch.cuda.is_available(): criterion = nn.CrossEntropyLoss().cuda() else: criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate) for epoch in range(num_epochs): for i, (graphs, labels) in enumerate(train_loader): graphs = Variable(graphs) labels = Variable(labels) optimizer.zero_grad() outputs = cnn(graphs) if torch.cuda.is_available(): loss = criterion(outputs, labels.cuda()) else: loss = criterion(outputs, labels) loss.backward() optimizer.step() # Test the Model cnn.eval() correct = 0 total = 0 TP = 0 TN = 0 FP = 0 FN = 0 predict = [] label = [] output = [] for graphs, labels in test_loader: graphs = Variable(graphs) outputs = cnn(graphs) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels.cuda()).sum() TP += (predicted + labels.cuda() == 2).sum() FP += (predicted * 5 + labels.cuda() * 1 == 5).sum() FN += (predicted * 1 + labels.cuda() * 5 == 5).sum() TN += (predicted + labels.cuda() == 0).sum() predict.append(predicted) label.append(labels) output.append(outputs.data) if TP + FP == 0: precision = 0 else: precision = TP / (TP + FP) if TP + FN == 0: recall = 0 else: recall = TP / (TP + FN) l = np.zeros((len(label))) for i in range(len(label)): l[i] = int(label[i]) s = np.zeros((len(output))) for i in range(len(output)): s[i] = output[i][0][1] return TP, TN, FP, FN, precision, recall, l, s
def train(): transforms_train = Compose( [ToTensor(), RandomAffine(10, translate=(0.02, 0.05))]) train_dataset = CaptchaData('./set-train', transform=transforms_train) train_data_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=0, shuffle=True, drop_last=True) # train_data_loader.dataset.set_use_cache(use_cache=True) test_data = CaptchaData('./set-test', transform=None) test_data_loader = DataLoader(test_data, batch_size=batch_size, num_workers=0, shuffle=True, drop_last=True) # test_data_loader.dataset.set_use_cache(use_cache=True) print('train set', len(train_dataset)) print('test set', len(test_data)) cnn = CNN() if torch.cuda.is_available(): cnn.cuda() if restor: cnn.load_state_dict(torch.load(model_path)) # freezing_layers = list(cnn.named_parameters())[:10] # for param in freezing_layers: # param[1].requires_grad = False # print('freezing layer:', param[0]) optimizer = torch.optim.AdamW(cnn.parameters(), lr=base_lr, weight_decay=True) criterion = nn.MultiLabelSoftMarginLoss() sche = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=2, threshold=.002, verbose=True) for epoch in range(max_epoch): start_ = time.time() loss_history = [] acc_history = [] cnn.train() for img, target in (train_data_loader): # pdb.set_trace() img = Variable(img) target = Variable(target) if torch.cuda.is_available(): img = img.cuda() target = target.cuda() output = cnn(img) loss = criterion(output, target) optimizer.zero_grad() loss.backward() optimizer.step() acc = calculat_acc(output, target) acc_history.append(float(acc)) loss_history.append(float(loss)) print('train_loss: {:.4}|train_acc: {:.4}'.format( torch.mean(torch.Tensor(loss_history)), torch.mean(torch.Tensor(acc_history)), )) loss_history = [] acc_history = [] cnn.eval() with torch.no_grad(): for img, target in (test_data_loader): img = Variable(img) target = Variable(target) if torch.cuda.is_available(): img = img.cuda() target = target.cuda() output = cnn(img) loss = criterion(output, target) acc = calculat_acc(output, target) acc_history.append(float(acc)) loss_history.append(float(loss)) test_loss = torch.mean(torch.Tensor(loss_history)) print('test_loss: {:.4}|test_acc: {:.4}'.format( test_loss, torch.mean(torch.Tensor(acc_history)), )) print('epoch: {}|time: {:.4f}'.format(epoch, time.time() - start_)) torch.save(cnn.state_dict(), model_path % epoch) sche.step(test_loss)
def train(): # 加载网络 cnn = CNN(IMAGE_WIDTH, IMAGE_HEIGHT, CAPTCHA_LEN) print(cnn) # 定义损失函数 criterion = nn.MultiLabelSoftMarginLoss() # 优化器 optimzier = torch.optim.Adam(cnn.parameters(), lr=LEARNING_RAGE) # 加载训练集验证集测试集 trams = transform.Compose([ # Grayscale() 会将模型通道数改为1 导致报错 # transform.Grayscale(), transform.ToTensor() ]) # 训练集 train_dataset = Captcha_DataSets(TRAIN_IMAGE_FILE, trams) train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=0) # 测试集 test_dataset = Captcha_DataSets(TEST_IMAGE_FILE, trams) test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=True, num_workers=0) for epoch in range(NUM_EPOCH): # 指定为train cnn.train(True) for iter_num, (images, labels) in enumerate(train_dataloader): # 自动灰度化 images = Variable(images) labels = Variable(labels.float()) # 评测模型 predict_labels = cnn(images) loss = criterion(predict_labels, labels) optimzier.zero_grad() loss.backward() optimzier.step() if (iter_num) % 10 == 0: cnn.eval() test_loss = 0 with torch.no_grad(): for i, (images, labels) in enumerate(test_dataloader): images = Variable(images) labels = Variable(labels.float()) # 评测模型 predict_labels = cnn(images) test_loss += criterion(predict_labels, labels) print("epoch:{},iter:{},train loss:{:.4f},val loss:{:.4f}". format(epoch, iter_num, loss.item(), test_loss)) # 一个epoch保存一个模型 torch.save(cnn.state_dict(), "./model/{}.pth".format(epoch)) print("save epoch :{} model".format(epoch))
def main(args): print('debug is {} and fourclass is {}'.format(args.debug, args.fourclass)) train_acc = np.zeros(args.epochs) train_loss = np.zeros(args.epochs) val_acc = np.zeros(args.epochs) val_loss = np.zeros(args.epochs) # Data processing stream = True if stream: train_loader, val_loader = stream_train_val_loader( args.batch_size, debug=args.debug, fourclass=args.fourclass) else: train_loader, val_loader = get_train_val_loader( args.batch_size, debug=args.debug, fourclass=args.fourclass) logging.info('loaders loaded') if not args.fourclass: model = CNN().to(device) else: model = CNN_fourclass().to(device) loss_fcn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr) logging.info('good sofar') start_time = time.time() for epoch in range(args.epochs): logging.info('epoch {} ...'.format(epoch + 1)) accum_loss = 0.0 num_trained = 0 tot_corr = 0 for i, data in enumerate(train_loader, 0): inputs, labels = data inputs = inputs.to(device) labels = labels.to(device) #print(inputs.get_device, labels.get_device) optimizer.zero_grad() predictions = model(inputs) #print(predictions.shape) loss = loss_fcn(predictions, labels.long()) # magic loss.backward() optimizer.step() # count correct predictions accum_loss += loss.item() _, predicted = torch.max(predictions, 1) corr = (predicted == labels.long()).sum().item() tot_corr += corr num_trained += len(labels) # Print statistics valid_acc, valid_loss = evaluate(model.eval(), val_loader, loss_fcn, epoch, fourclass=args.fourclass) model = model.train() train_acc[epoch] = tot_corr * 100 / num_trained train_loss[epoch] = accum_loss / (i + 1) val_acc[epoch] = valid_acc * 100 val_loss[epoch] = valid_loss # print('epoch: %d, loss: %f, training acc: %f%%, validation acc: %f%%' % # (epoch + 1, train_loss[epoch], train_acc[epoch], val_acc[epoch])) logging.info( 'epoch %d, loss: %f, training acc: %f%%, validation acc: %f%%' % (epoch + 1, train_loss[epoch], train_acc[epoch], val_acc[epoch])) print('Finished training:\n', 'train accuracy:', max(train_acc), 'train loss:', min(train_loss), '\n', 'validation accuracy:', max(val_acc), 'validation loss:', min(val_loss), '\n') import re localtime = time.asctime(time.localtime(time.time())) path = 'training_gpu_' + re.sub(r':', '-', localtime[11:19]) time_elapsed = time.time() - start_time steps = np.arange(1, args.epochs + 1) plot_graph(path, steps, train_acc, val_acc) print("time elapsed:", time_elapsed) now = datetime.now() torch.save( model, 'model_{:02d}{:02d}_{:02d}{:02d}.pt'.format(now.month, now.day, now.hour, now.minute))
def train(): train_data_loader, test_data_loader = load_data() cnn = CNN() if torch.cuda.is_available(): cnn.cuda() if restor: cnn.load_state_dict(torch.load(MODEL_PATH)) optimizer = torch.optim.Adam(cnn.parameters(), lr=base_lr) # optimize criterion = nn.MultiLabelSoftMarginLoss() train_acc_epoch = [] test_acc_epoch = [] loss_epoch = [] for epoch in range(max_epoch): start = time.time() cnn.train() acc_history = [] loss_history = [] # in train set for img, target in train_data_loader: img = Variable(img) target = Variable(target) if torch.cuda.is_available(): img = img.cuda() target = target.cuda() output = cnn(img) loss = criterion(output, target) optimizer.zero_grad() loss.backward() optimizer.step() acc_history.append(calculat_acc(output, target)) loss_history.append(float(loss)) train_acc, loss = cal_acc_loss(acc_history, loss_history) loss_history = [] acc_history = [] cnn.eval() # in test set for img, target in test_data_loader: img = Variable(img) target = Variable(target) if torch.cuda.is_available(): img = img.cuda() target = target.cuda() output = cnn(img) acc_history.append(calculat_acc(output, target)) test_acc, nothing = cal_acc_loss(acc_history, loss_history) print('train loss:{:.5f}'.format(loss)) print('train acc:{:.5f}'.format(train_acc)) print('test acc: {:.5f}'.format(test_acc)) print('epoch: {} , using time: {:.5}\n'.format(epoch + 1, time.time() - start)) train_acc_epoch.append(train_acc) test_acc_epoch.append(test_acc) loss_epoch.append(loss) torch.save(cnn.state_dict(), MODEL_PATH) if loss < eps: break return train_acc_epoch, test_acc_epoch, loss_epoch
class DQN: def __init__(self, screen_height=0, screen_width=0, n_actions=0, gamma=0.999, epsilon_start=0.9, epsilon_end=0.05, epsilon_decay=200, memory_capacity=10000, batch_size=128, device="cpu"): self.actions_count = 0 self.n_actions = n_actions # 总的动作个数 self.device = device # 设备,cpu或gpu等 self.gamma = gamma # e-greedy策略相关参数 self.epsilon = 0 self.epsilon_start = epsilon_start self.epsilon_end = epsilon_end self.epsilon_decay = epsilon_decay self.batch_size = batch_size self.policy_net = CNN(screen_height, screen_width, n_actions).to(self.device) self.target_net = CNN(screen_height, screen_width, n_actions).to(self.device) self.target_net.load_state_dict( self.policy_net.state_dict()) # target_net的初始模型参数完全复制policy_net self.target_net.eval() # 不启用 BatchNormalization 和 Dropout self.optimizer = optim.RMSprop(self.policy_net.parameters( )) # 可查parameters()与state_dict()的区别,前者require_grad=True self.loss = 0 self.memory = ReplayBuffer(memory_capacity) def select_action(self, state): '''选择动作 Args: state [array]: [description] Returns: action [array]: [description] ''' self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * \ math.exp(-1. * self.actions_count / self.epsilon_decay) self.actions_count += 1 if random.random() > self.epsilon: with torch.no_grad(): q_value = self.policy_net( state) # q_value比如tensor([[-0.2522, 0.3887]]) # tensor.max(1)返回每行的最大值以及对应的下标, # 如torch.return_types.max(values=tensor([10.3587]),indices=tensor([0])) # 所以tensor.max(1)[1]返回最大值对应的下标,即action action = q_value.max(1)[1].view( 1, 1) # 注意这里action是个张量,如tensor([1]) return action else: return torch.tensor([[random.randrange(self.n_actions)]], device=self.device, dtype=torch.long) def update(self): if len(self.memory) < self.batch_size: return transitions = self.memory.sample(self.batch_size) # Transpose the batch (see https://stackoverflow.com/a/19343/3343043 for # detailed explanation). This converts batch-array of Transitions # to Transition of batch-arrays. batch = self.memory.Transition(*zip(*transitions)) # Compute a mask of non-final states and concatenate the batch elements # (a final state would've been the one after which simulation ended) non_final_mask = torch.tensor(tuple( map(lambda s: s is not None, batch.next_state)), device=self.device, dtype=torch.bool) non_final_next_states = torch.cat( [s for s in batch.next_state if s is not None]) state_batch = torch.cat(batch.state) action_batch = torch.cat(batch.action) reward_batch = torch.cat(batch.reward) # tensor([1., 1.,...,]) # Compute Q(s_t, a) - the model computes Q(s_t), then we select the # columns of actions taken. These are the actions which would've been taken # for each batch state according to policy_net state_action_values = self.policy_net(state_batch).gather( 1, action_batch) #tensor([[ 1.1217],...,[ 0.8314]]) # Compute V(s_{t+1}) for all next states. # Expected values of actions for non_final_next_states are computed based # on the "older" target_net; selecting their best reward with max(1)[0]. # This is merged based on the mask, such that we'll have either the expected # state value or 0 in case the state was final. next_state_values = torch.zeros(self.batch_size, device=self.device) next_state_values[non_final_mask] = self.target_net( non_final_next_states).max(1)[0].detach() # Compute the expected Q values expected_state_action_values = ( next_state_values * self.gamma) + reward_batch # tensor([0.9685, 0.9683,...,]) # Compute Huber loss self.loss = F.smooth_l1_loss( state_action_values, expected_state_action_values.unsqueeze(1)) # .unsqueeze增加一个维度 # Optimize the model self.optimizer.zero_grad( ) # zero_grad clears old gradients from the last step (otherwise you’d just accumulate the gradients from all loss.backward() calls). self.loss.backward( ) # loss.backward() computes the derivative of the loss w.r.t. the parameters (or anything requiring gradients) using backpropagation. for param in self.policy_net.parameters(): # clip防止梯度爆炸 param.grad.data.clamp_(-1, 1) self.optimizer.step( ) # causes the optimizer to take a step based on the gradients of the parameters.
for epoch in range(opt.n_epoch): cnn1.train() adjust_learning_rate(optimizer1, epoch, alpha_plan, beta_plan) cnn2.train() adjust_learning_rate(optimizer2, epoch, alpha_plan, beta_plan) if opt.loss in ['JoCoR', 'JoCoR_backward_only']: loss_t_total, correct_1, correct_2 = train_JoCoR( train_loader, epoch, cnn1, cnn2, optimizer1, optimizer2, rate_schedule, opt) else: loss_1_total, loss_2_total, correct_1, correct_2 = train_co_teaching( train_loader, epoch, cnn1, cnn2, optimizer1, optimizer2, rate_schedule) cnn1.eval() cnn2.eval() acc1_test, acc2_test = test(test_loader, cnn1, cnn2) if opt.loss in ['JoCoR', 'JoCoR_backward_only']: print('epoch', epoch, '|loss:' '%.4f' % loss_t_total, '|acc1:' '%.3f' % correct_1, '|acc2:' '%.3f' % correct_2, '|acc1_t:' '%.3f' % acc1_test, '|acc2_t:' '%.3f' % acc2_test) else: print('epoch', epoch, '|loss1:' '%.4f' % loss_1_total, '|loss2:' '%.4f' % loss_2_total, '|acc1:' '%.3f' % correct_1, '|acc2:'
from flask import Flask, send_from_directory from jinja2 import Template, Environment, FileSystemLoader import torch as th from model import CNN app = Flask(__name__, static_folder="static") # create an app for the website file_loader = FileSystemLoader('.') env = Environment(loader=file_loader) t = env.get_template('index.html') d = range(20) m = CNN() m.load_state_dict(th.load('cnn.pt')) m.eval() dataset = th.load("face_dataset.pt") # load face image dataset X = dataset["X"] @app.route("/") def result(): return t.render(face_ids=d) @app.route("/predict/<int:face_id>") def predict(face_id): if face_id % 2 == 0: i = face_id // 2 else: i = face_id // 2 + 10 x = X[i].reshape(1, 1, 64, 64) z = m(x) if z[0, 0] > 0: