if step % 2000 == 0: logger.model_param_histo_summary(model=net, step=step) if step % 500 == 0: logger.scalar_summary(tag='train/loss', value=loss, step=step) vis.line(torch.FloatTensor([loss.data[0]]), torch.FloatTensor([step]), win='train/loss', update='append' if step > 0 else None) # Save model occasionally if (step % cfg.TRAIN.SNAPSHOT_ITERS == 0) and step > 0: save_name = os.path.join( output_dir, '{}_{}.h5'.format(cfg.TRAIN.SNAPSHOT_PREFIX, step)) network.save_net(save_name, net) print('Saved model to {}'.format(save_name)) if step in lr_decay_steps: lr *= lr_decay optimizer = torch.optim.SGD(params, lr=lr, momentum=momentum, weight_decay=weight_decay) if re_cnt: tp, tf, fg, bg = 0., 0., 0, 0 train_loss = 0 step_cnt = 0 t.tic() re_cnt = False
def train_fcn(): print("Start training") fcn_net.train() label_batch = None output_batch = None correct = 0 np.random.shuffle(train_set['data']) train_data = train_set['data'][:train_num:] valid_data = train_set['data'][train_num::] # np.random.shuffle(valid_data) data_frames = fcn_data_loader(train_data) for epoch in range(epoch_num): np.random.shuffle(data_frames) for i, (frame, label) in enumerate(data_frames): # input = data_point['features'] # label = data_point['class_num'] global_step = i + epoch * len(data_frames) input_tensor = torch.from_numpy(frame) input_var = Variable(input_tensor).cuda() label_np = np.asarray([label], dtype=np.long) label_tensor = torch.from_numpy(label_np) label_var = Variable(label_tensor).cuda() # if input_var.data.shape[0] != 512: # print("Shape is not correct") outputs = fcn_net(input_var) pred_num = outputs.data.max(0, keepdim=True)[1].cpu().numpy()[0] if pred_num == label: correct += 1 # output_dict.shape = N*51 if label_batch is None or output_batch is None: output_batch = outputs.view(1, 51) # label_batch = label_var.view(1,51) label_batch = label_var else: output_batch = torch.cat((output_batch, outputs.view(1, 51)), 0) # label_batch = torch.cat((label_batch, label_var.view(1,51)),0) label_batch = torch.cat((label_batch, label_var), 0) # train_visualize if i % train_visualize_iter is 0 and i is not 0: optimizer.zero_grad() loss = criterion(output_batch, label_batch) # print("Iter: {}, loss: {}".format(i, loss)) loss.backward() optimizer.step() label_batch = None output_batch = None train_ap = correct / train_visualize_iter correct = 0 print("Train Loss: {}, train ap: {}".format( loss.data.cpu().numpy()[0], train_ap * 100)) data_log1.scalar_summary('train/acc', train_ap, global_step) data_log1.scalar_summary('train/loss', loss, global_step) # eval_visualize if i % eval_visualize_iter is 0 and i is not 0: # test_fcn_train(data_frames[35000::], epoch, i) eval_loss, eval_ap = test_fcn_train(valid_data, epoch, i) data_log1.scalar_summary('eval/acc', eval_ap, global_step) data_log1.scalar_summary('eval/loss', eval_loss, global_step) if eval_ap > 30: test_data(fcn_net, test_data) break if (global_step % snapshot_interval == 0) and global_step > 0: save_name = os.path.join( output_dir, '{}_{}.h5'.format("task1_1", global_step)) if not os.path.exists(output_dir): os.makedirs(output_dir) network.save_net(save_name, fcn_net) print('Saved model to {}'.format(save_name))