def __post_init__(self): self.device = 'cuda' if torch.cuda.is_available() else 'cpu' self.model = ConvNet(self.input_shape, self.num_actions, self.lr).to(self.device) self.tgt_model = ConvNet(self.input_shape, self.num_actions, self.lr).to(self.device) self.model_update_count = 0 self.current_loss = 0
def get_model(args): ''' define model ''' model = ConvNet(use_batch_norm=True, use_resnet=False) print('---Model Information---') print('Net:', model) print('Use GPU:', args.use_cuda) return model.to(args.device)
class ModelsHandler: input_shape: tuple num_actions: int lr: float = field(default=0.001) def __post_init__(self): self.device = 'cuda' if torch.cuda.is_available() else 'cpu' self.model = ConvNet(self.input_shape, self.num_actions, self.lr).to(self.device) self.tgt_model = ConvNet(self.input_shape, self.num_actions, self.lr).to(self.device) self.model_update_count = 0 self.current_loss = 0 def train_step(self, rb: ReplayBuffer, sample_size=300): # loss calcualation trans_sts = rb.sample(sample_size) states = torch.stack([trans.state_tensor for trans in trans_sts]).to(self.device) next_states = torch.stack( [trans.next_state_tensor for trans in trans_sts]).to(self.device) not_done = torch.Tensor([trans.not_done_tensor for trans in trans_sts]).to(self.device) actions = [trans.action for trans in trans_sts] rewards = torch.stack([trans.reward_tensor for trans in trans_sts]).to(self.device) with torch.no_grad(): qvals_predicted = self.tgt_model(next_states).max(-1) self.model.optimizer.zero_grad() qvals_current = self.model(states) one_hot_actions = torch.nn.functional.one_hot( torch.LongTensor(actions), self.num_actions).to(self.device) loss = ((rewards + (not_done * qvals_predicted.values) - torch.sum(qvals_current * one_hot_actions, -1))**2).mean() loss.backward() self.model.optimizer.step() return loss.detach().item() def update_target_model(self): state_dict = deepcopy(self.model.state_dict()) self.tgt_model.load_state_dict(state_dict) self.model_update_count += 1 def save_target_model(self): file_name = f"{datetime.now().strftime('%H:%M:%S')}.pth" temp_dir = os.environ.get('TMPDIR', '/tmp') file_name = os.path.join(temp_dir, file_name) torch.save(self.model, file_name) wandb.save(file_name)
def example2(): cm = ConfigManager('testset') imgs = DataLoader.get_images_objects(cm.get_dataset_path(), 'processed_x.pt', 'processed_y.pt', to_tensor=True) print(type(imgs)) dm = DatasetsManager(cm, imgs) n_output = 2 net = ConvNet(n_output) optimizer = optim.Adam(net.parameters(), lr=1e-3) loss_function = nn.MSELoss() EPOCHS = 10 BATCH_SIZE = 128 print('Start training') for epoch in range(EPOCHS): for k in tqdm(range(0, len(dm.train), BATCH_SIZE)): batch_x = torch.cat(dm.train.get_x(start=k, end=k + BATCH_SIZE), dim=0) batch_y = torch.Tensor(dm.train.get_y(start=k, end=k + BATCH_SIZE)) print(type(batch_x)) net.zero_grad() out = net(batch_x) loss = loss_function(out, batch_y) loss.backward() optimizer.step() print(f'Epoch: {epoch}. Loss: {loss}') correct = 0 total = 0 # with torch.no_grad(): # for k in tqdm(range(len(x_test))): # real_class = torch.argmax(y_test[k]) # net_out = net(x_test[k].view(-1, 1, IMG_SIZE, IMG_SIZE))[0] # returns list # predicted_class = torch.argmax(net_out) # if predicted_class == real_class: # correct += 1 # total += 1 print('Accuracy: ', round(correct / total, 3)) torch.save(net, 'data/cnn_cats_dogs_model.pt')
def main(): results = [] # Modification starts sess = tf.Session() # if we don't have the trained model, simply do: # Trainer(sess) # pass the session and the image to find_circle function checkpoint_path = 'checkpoints/dump-63' inputs = tf.placeholder(tf.float32, shape=(None, 200, 200, 1)) outputs = tf.placeholder(tf.float32, shape=(None, 3)) predictions = ConvNet(inputs, outputs, mode='predict') saved_variables = tf.global_variables() saver = tf.train.Saver(saved_variables) saver.restore(sess, checkpoint_path) # End of modification for idx in range(1000): print('Inference on image: ' + str(idx)) params, img = noisy_circle(200, 50, 2) detected = find_circle(img, sess, inputs, outputs, predictions) results.append(iou(params, detected)) results = np.array(results) print((results > 0.7).mean()) sess.close()
def get_model(args): # TODO model_type = args.model if model_type.lower() == "ConvNet".lower(): return ConvNet() else: NotImplementedError() pass
def main(): parser = get_command_line_parser() args = parser.parse_args() torch.manual_seed(args.seed) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu use_gpu = torch.cuda.is_available() if use_gpu: print("Currently using GPU: {}".format(args.gpu)) torch.backends.cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) else: print("Currently using CPU") trainloader, testloader = get_mnist_data(train_batch_size=args.batch_size, workers=args.workers) print("Creating model: {}".format(args.model)) feature_extractor = ConvNet(depth=6, input_channel=1) model = BaseLine(feature_extractor=feature_extractor, num_base_class=10, embed_size=2) if use_gpu: model = model.cuda() # optimizer_model = torch.optim.SGD(model.parameters(), lr=args.lr_model, weight_decay=5e-04, momentum=0.9) optimizer_model = torch.optim.Adam(model.parameters(), lr=args.lr_model) if args.stepsize > 0: scheduler = torch.optim.lr_scheduler.StepLR(optimizer_model, step_size=args.stepsize, gamma=args.gamma) start_time = time.time() for epoch in range(args.max_epoch): print("==> Epoch {}/{}".format(epoch + 1, args.max_epoch)) train(model, optimizer_model, trainloader, use_gpu, 10, epoch, args) if args.stepsize > 0: scheduler.step() if args.eval_freq > 0 and (epoch + 1) % args.eval_freq == 0 or ( epoch + 1) == args.max_epoch: print("==> Test") acc, err = evaluate(model, testloader, use_gpu, 10, epoch, args=args) print("Accuracy (%): {}\t Error rate (%): {}".format(acc, err)) elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) print("Finished. Total elapsed time (h:m:s): {}".format(elapsed))
def initialise(): game_controller = GameController(game_cfg.start_bbox, game_cfg.end_bbox, game_cfg.start_thres) player_controller = PlayerController(general_cfg.app) rl_recorder = RlRecorder() # TODO, read replay from disk player_controller.activate_chrome() # switch to chrome timer = Timer(game_cfg.space_time_gap) performances = {'iter': [], 'score': []} if cnn_cfg.load_model and os.path.isfile(cnn_cfg.chkpnt_path): cnn = torch.load(cnn_cfg.chkpnt_path) cnn.cnn_cfg = cnn_cfg print("Load cnn model from ", cnn_cfg.chkpnt_path) else: cnn = ConvNet(cnn_cfg, num_classes=cnn_cfg.num_classes, lr=cnn_cfg.lr) print("Create new CNN done!") if torch.cuda.is_available(): cnn = cnn.cuda() print("Cuda is available!") return game_controller, player_controller, rl_recorder, timer, performances, cnn
def demo_main(char_set, weight, name): _, valid_transform = get_transform() demo_data = DemoDataset('cleaned_data', name, valid_transform) test_loader = DataLoader( dataset=demo_data, batch_size=3, shuffle=False, num_workers=1, pin_memory=True, ) model = ConvNet(1, len(char_set)) if torch.cuda.is_available(): model = model.cuda() print('load weights from {}'.format(weight)) model.load_state_dict(torch.load(weight)) model.eval() def map_indexlist_char(ind_list, char_set): return ''.join([char_set[i] for i in ind_list]) with torch.no_grad(): for batch_idx, (x, imgpath) in enumerate(test_loader): if batch_idx > 0: break x = x.cuda() out = model(x) _, pred_label = torch.max(out, 1) pred_name = map_indexlist_char(pred_label.tolist(), char_set) print('name {} pred name {}'.format(name, pred_name)) def get_concat(im1, im2): dst = Image.new('RGB', (im1.width + im2.width, im1.height)) dst.paste(im1, (0, 0)) dst.paste(im2, (im1.width, 0)) return dst concat_im = None for img in demo_data.images(): im = Image.open(img) if concat_im is None: concat_im = im else: concat_im = get_concat(concat_im, im) #concat_im.show() concat_im.save('demo.jpg')
def get_model(args): ''' define model ''' model = None if args.fc: model = FCNet() else: model = ConvNet() if args.cuda: model.cuda() print('\n---Model Information---') print('Net:',model) print('Use GPU:', args.cuda) return model
def get_model(args): ''' define model ''' model = None if args.model == 'Net': model = Net() elif args.model == 'FCNet': model = FCNet() elif args.model == 'ConvNet': model = ConvNet() else: raise ValueError('The model is not defined!!') print('---Model Information---') print('Net:', model) print('Use GPU:', args.use_cuda) return model.to(args.device)
def train(): g = ConvNet(is_training=True) # Start train with tf.Session(graph=g.graph) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) if tf.train.latest_checkpoint('checkpoint'): saver.restore(sess, tf.train.latest_checkpoint('checkpoint')) print("Loaded parameter from {}".format( tf.train.latest_checkpoint('checkpoint'))) n_batches = g.mnist.train.num_examples // g.batch_size print("Start to train") for i in range(10): total_loss = 0 for j in range(n_batches): X_batch, Y_batch = g.mnist.train.next_batch(g.batch_size) X_batch = np.reshape(X_batch, [-1, 28, 28, 1]) _, loss_batch = sess.run([g.optimizer, g.loss], { g.X: X_batch, g.Y: Y_batch }) total_loss += loss_batch print('Epoch {}: {}'.format(i + 1, total_loss / n_batches)) X_test = np.reshape(g.mnist.test.images, [-1, 28, 28, 1]) print( 'Accuracy:', sess.run(g.accuracy, feed_dict={ g.X: X_test, g.Y: g.mnist.test.labels })) saver.save(sess, 'checkpoint/model')
def train(opt, split): model = ConvNet(opt.nClasses, GAP=opt.GAP) optimizer = chainer.optimizers.NesterovAG(lr=opt.LR, momentum=opt.momentum) trainer = Trainer(model, optimizer, train_iter, val_iter, opt) log = {'train_acc': [], 'val_acc': [], 'lr': [], 'train_loss': []} if opt.testOnly: chainer.serializers.load_npz( os.path.join(opt.save, 'model_split{}.npz'.format(split)), trainer.model) val_top1 = trainer.val() print('| Val: top1 {:.2f}'.format(val_top1)) return for epoch in range(1, opt.nEpochs + 1): train_loss, train_top1 = trainer.train(epoch) val_top1 = trainer.val() sys.stderr.write('\r\033[K') sys.stdout.write( '| Epoch: {}/{} | Train: LR {} Loss {:.3f} top1 {:.2f} | Val: top1 {:.2f}\n'.format( epoch, opt.nEpochs, trainer.optimizer.lr, train_loss, train_top1, val_top1)) sys.stdout.flush() log['lr'].append(trainer.optimizer.lr) log['train_loss'].append(train_loss) log['train_acc'].append(train_top1) log['val_acc'].append(val_top1) if opt.save != 'None': # Save weights chainer.serializers.save_npz( os.path.join(opt.save, 'model_split{}.npz'.format(split)), model) # Save logs with open(os.path.join(opt.save, 'logger{}.txt'.format(split)), "w") as f: for k, v in log.items(): f.write(str(k) + ': ' + str(v) + '\n') # Save parameters with open(os.path.join(opt.save, 'opt{}.pkl'.format(split)), "wb") as f: pickle.dump(opt, f)
def run_exp(args, update_lambda, fix_weight): if args.predata is False: X_elementary, Y_elementary, X_hyper, Y_hyper, X_valid, Y_valid, X_test, Y_test = read_preprocess(params=args) np.savez(args.processedDataName, X_elementary=X_elementary, Y_elementary=Y_elementary, X_hyper=X_hyper, Y_hyper=Y_hyper, X_v=X_valid, Y_v=Y_valid, X_test=X_test, Y_test=Y_test) else: tmpload = np.load(args.processedDataName) X_elementary, Y_elementary, X_hyper, Y_hyper, X_valid, Y_valid, X_test, Y_test = \ tmpload['X_elementary'], tmpload['Y_elementary'], tmpload['X_hyper'], tmpload['Y_hyper'],\ tmpload['X_v'], tmpload['Y_v'], tmpload['X_test'], tmpload['Y_test'] """ Build Theano functions """ if args.model == 'convnet': x = T.ftensor4('x') elif args.model == 'mlp': x = T.matrix('x') else: raise AttributeError y = T.matrix('y') lr_ele = T.fscalar('lr_ele') lr_ele_true = np.array(args.lrEle, theano.config.floatX) mom = 0.95 # Michael: momentum lr_hyper = T.fscalar('lr_hyper') grad_valid_weight = T.tensor4('grad_valid_weight') if args.model == 'mlp': model = MLP(x=x, y=y, args=args) elif args.model == 'convnet': model = ConvNet(x=x, y=y, args=args) #Michael: check here for model.params_theta if args.dataset == 'mnist': nc = 1 nPlane = 28 else: nc = 3 nPlane = 32 X_elementary = X_elementary.reshape(-1, nc, nPlane, nPlane) X_hyper = X_hyper.reshape(-1, nc, nPlane, nPlane) X_valid = X_valid.reshape(-1, nc, nPlane, nPlane) X_test = X_test.reshape(-1, nc, nPlane, nPlane) else: raise AttributeError # Michael: this computes the updated parameters # Michael: these aren't the new parameters themselves, but the update functions update_ele, update_valid, output_valid_list, share_var_dloss_dweight = update(model.params_theta, model.params_lambda, model.params_weight, model.loss, model.penalty, model.lossWithPenalty, lr_ele, lr_hyper, mom) if update_lambda: for up, origin in zip(update_lambda, model.params_lambda): origin.set_value(np.array(up)) boo = origin.get_value() # print 'update', type(up), type(boo), boo[1] # TIME.sleep(20) if fix_weight: for fix, origin in zip(fix_weight, model.params_weight): origin.set_value(np.array(fix)) else: fix_weight = [] for origin in model.params_weight: fix_weight.append(origin.get_value()) # Phase 1 # Michael: ??? func_elementary = theano.function( inputs=[x, y, lr_ele], outputs=[model.lossWithPenalty, model.prediction], updates=update_ele, #Michael: update_ele is the updating function, not the new parameters on_unused_input='ignore', allow_input_downcast=True) func_eval = theano.function( inputs=[x, y], outputs=[model.loss, model.prediction], on_unused_input='ignore', allow_input_downcast=True) # Phase 2 # actually, in the backward phase func_hyper_valid = theano.function( inputs=[x, y], outputs=[model.loss, model.prediction] + output_valid_list, updates=update_valid, on_unused_input='ignore', allow_input_downcast=True) """ Phase 1: meta-forward """ X_mix = np.concatenate((X_valid, X_test), axis=0) Y_mix = np.concatenate((Y_valid, Y_test), axis=0) print(X_valid.shape, X_mix.shape) X_valid, Y_valid = X_mix[:len(X_mix) // 2], Y_mix[:len(X_mix) // 2] X_test, Y_test = X_mix[len(X_mix) // 2:], Y_mix[len(X_mix) // 2:] n_ele, n_valid, n_test = X_elementary.shape[0], X_valid.shape[0], X_test.shape[0] # TODO: remove this override n_ele = 20000 X_elementary, Y_elementary = X_elementary[:n_ele], Y_elementary[:n_ele] print("# of ele, valid, test: ", n_ele, n_valid, n_test) n_batch_ele = n_ele // args.batchSizeEle test_perm, ele_perm = range(0, n_test), range(0, n_ele) last_iter = args.maxEpoch * n_batch_ele - 1 temp_err_ele = [] temp_cost_ele = [] eval_loss = 0. t_start = time() iter_index_cache = [] # save the model parameters into theta_initial theta_initial = [] for i, w in enumerate(model.params_theta): # Michael: doesn't actually go through parameters, only [W, b, W, b, W, b, W, b] theta_initial.append(w.get_value()) """ # Michael: pick two random parameters, construct two lists to store the paths # Michael: "i, w in enumerate(model.params_theta)", but random? # i is the layer (list index), w is the weight # model.params_theta = [W, b, W, b, W, b, W, b] # W's, b's are type theano.tensor.sharedvar.TensorSharedVariable # model.params_theta[0].get_value()[0][0][0][0] gives a weight, possibly repeated/shared? # model.params_theta[i1].get_value()[i2][i3][i4][i5] # Get coordinates of first weight coords1 = [np.random.randint(0, len(model.params_theta))] #[len(model.params_theta)-2] #[0] #[np.random.randint(0, len(model.params_theta))] layer_value = model.params_theta[coords1[0]].get_value() while not isinstance(layer_value, (int, float, np.float32, np.float64)): #while we haven't gotten to a weight coords1.append(np.random.randint(0, len(layer_value))) layer_value = layer_value[coords1[-1]] # Access and create list initialized with first value layer_value = model.params_theta[coords1[0]].get_value() for l in range(1, len(coords1)): layer_value = layer_value[coords1[l]] w_1 = [layer_value] #for l in range(1, len(coords1)-1): # layer_value = layer_value[coords1[l]] #layer_value[coords1[-1]] = 1.0 #w_1 = [1.0] # Get coordinates of second weight coords2 = [np.random.randint(0, len(model.params_theta))] #[len(model.params_theta)-2] #[0] # layer_value = model.params_theta[coords2[0]].get_value() while not isinstance(layer_value, (int, float, np.float32, np.float64)): #while we haven't gotten to a weight coords2.append(np.random.randint(0, len(layer_value))) layer_value = layer_value[coords2[-1]] # Access and create list initialized with first value layer_value = model.params_theta[coords2[0]].get_value() for l in range(1, len(coords2)): layer_value = layer_value[coords2[l]] w_2 = [layer_value]""" #for l in range(1, len(coords2)-1): # layer_value = layer_value[coords2[l]] #layer_value[coords2[-1]] = 1.30 #w_2 = [1.30] for i in range(0, args.maxEpoch * n_batch_ele): # Michael: SGD steps curr_epoch = i // n_batch_ele curr_batch = i % n_batch_ele """ Learning rate and momentum schedules. """ t = 1. * i // (args.maxEpoch * n_batch_ele) #Michael: never used? """ Update """ sample_idx_ele = ele_perm[(curr_batch * args.batchSizeEle):((curr_batch + 1) * args.batchSizeEle)] #Michael: batch indices iter_index_cache.append(sample_idx_ele) batch_x, batch_y = X_elementary[sample_idx_ele], Y_elementary[sample_idx_ele] #Michael: batch data if i == 399: print("399!!!!!!!!!!!", batch_y) #Michael: ??? # TODO: last elementary step before hyperparameter update? #Michael: what's this for? tmp_y = np.zeros((args.batchSizeEle, 10)) #Michael: 10 for 10 classes; put a 1 in row=idx and column=class=element of idx for idx, element in enumerate(batch_y): #Michael: idx = index, element = element at that index tmp_y[idx][element] = 1 batch_y = tmp_y # Michael: This where the elementary parameters are updated res = func_elementary(batch_x, batch_y, lr_ele_true) (cost_ele, pred_ele, debugs) = (res[0], res[1], res[2:]) # Michael: add new parameters to lists """layer_value = model.params_theta[coords1[0]].get_value() for l in range(1, len(coords1)): layer_value = layer_value[coords1[l]] w_1.append(layer_value) layer_value = model.params_theta[coords2[0]].get_value() for l in range(1, len(coords2)): layer_value = layer_value[coords2[l]] w_2.append(layer_value)""" # Michael: plot them right away if i%20 == 0: #only every 20 #plt.plot(w_1, w_2, marker='o', ms=3.) #plt.plot(w_1[0], w_2[0], marker='o') #plt.plot(w_1[len(w_1)-1], w_2[len(w_1)-1], marker='o', ms=10.) #plt.show() print(i) # print("Epoch %d, batch %d, time = %ds, train_loss = %.4f" % # (curr_epoch, curr_batch, time() - t_start, cost_ele)) # temp_err_ele += [1. * sum(batch_y != pred_ele) / args.batchSizeEle] temp_cost_ele += [cost_ele] eval_error = 0. # if np.isnan(cost_ele): # print 'NANS', cost_ele """ Evaluate """ if args.verbose or (curr_batch == n_batch_ele - 1): if args.model == 'mlp': n_eval = n_test else: n_eval = 1000 temp_idx = test_perm[:n_eval] batch_x, batch_y = X_test[temp_idx], Y_test[temp_idx] tmp_y = np.zeros((n_eval, 10)) for idx, element in enumerate(batch_y): tmp_y[idx][element] = 1 batch_y = tmp_y eval_loss, y_test = func_eval(batch_x, batch_y) wrong = 0 for e1, e2 in zip(y_test, Y_test[temp_idx]): if e1 != e2: wrong += 1 # eval_error = 1. * sum(int(Y_test[temp_idx] != batch_y)) / n_eval eval_error = 100. * wrong / n_eval print("test sample", n_eval) print("Valid on Test Set: Epoch %d, batch %d, time = %ds, eval_loss = %.4f, eval_error = %.4f" % (curr_epoch, curr_batch + 1, time() - t_start, eval_loss, eval_error)) # save the model parameters after T1 into theta_final theta_final = [] for i, w in enumerate(model.params_theta): theta_final.append(w.get_value()) # Michael: plot paths #plt.plot(w_1, w_2, marker='o', ms=3.) #plt.plot(w_1[0], w_2[0], marker='o') #plt.plot(w_1[len(w_1)-1], w_2[len(w_1)-1], marker='o', ms=10.) #plt.show() # Michael: plot paths #plt.plot(range(0,len(w_2)), w_2, marker='o', ms=3.) #plt.plot(0, w_2[0], marker='o') #plt.plot(len(w_2)-1, w_2[len(w_1)-1], marker='o', ms=10.) #plt.show() """ Phase 2: Validation on Hyper set """ n_hyper = X_hyper.shape[0] n_batch_hyper = n_hyper // args.batchSizeHyper hyper_perm = range(0, n_hyper) # np.random.shuffle(hyper_perm) err_valid = 0. cost_valid = 0. t_start = time() grad_l_theta = [] for i in range(0, n_batch_hyper): sample_idx = hyper_perm[(i * args.batchSizeHyper):((i + 1) * args.batchSizeHyper)] batch_x, batch_y = X_elementary[sample_idx], Y_elementary[sample_idx] # TODO: refactor, too slow tmp_y = np.zeros((args.batchSizeEle, 10)) for idx, element in enumerate(batch_y): tmp_y[idx][element] = 1 batch_y = tmp_y res = func_hyper_valid(batch_x, batch_y) valid_cost, pred_hyper, grad_temp = res[0], res[1], res[2:] err_tmp = 0. # err_tmp = 1. * sum(batch_y != pred_hyper) / args.batchSizeHyper err_valid += err_tmp # print "err_temp", err_tmp cost_valid += valid_cost # accumulate gradient and then take the average if i == 0: for grad in grad_temp: grad_l_theta.append(np.asarray(grad)) else: for k, grad in enumerate(grad_temp): grad_l_theta[k] += grad err_valid /= n_batch_hyper cost_valid /= n_batch_hyper # get average grad of all iterations on validation set for i, grad in enumerate(grad_l_theta): print(grad.shape) grad_l_theta[i] = grad / (np.array(n_hyper * 1., dtype=theano.config.floatX)) print("Valid on Hyper Set: time = %ds, valid_err = %.2f, valid_loss = %.4f" % (time() - t_start, err_valid * 100, cost_valid)) """ Phase 3: meta-backward """ # updates for phase 3 update_hyper, output_hyper_list, phase_3_input = updates_hyper(model.params_lambda, model.params_weight, model.lossWithPenalty, grad_l_theta, output_valid_list) # Phase 3 # dloss_dpenalty = T.grad(model.loss, model.params_lambda) func_hyper = theano.function( inputs=[x, y], outputs=output_hyper_list + output_valid_list, updates=update_hyper, on_unused_input='ignore', allow_input_downcast=True) # Michael: this is the backwards approximating path # init for pseudo params pseudo_params = [] for i, v in enumerate(model.params_theta): pseudo_params.append(v.get_value()) def replace_pseudo_params(ratio): for i, param in enumerate(model.params_theta): pseudo_params[i] = (1 - ratio) * theta_initial[i] + ratio * theta_final[i] param.set_value(pseudo_params[i]) n_backward = len(iter_index_cache)//10 print("n_backward", n_backward) rho = np.linspace(0.001, 0.999, n_backward) # initialization up_lambda, up_v = [], [] for param in model.params_lambda: temp_param = np.zeros_like(param.get_value() * 0., dtype=theano.config.floatX) up_lambda += [temp_param] for param in model.params_weight: temp_v = np.zeros_like(param.get_value() * 0., dtype=theano.config.floatX) up_v += [temp_v] # time.sleep(20) up_theta = grad_l_theta iter_index_cache = iter_index_cache[:n_backward] for iteration in range(n_backward)[::-1]: # Michael: this is the backwards approximating path replace_pseudo_params(rho[iteration]) # line 4 curr_epoch = iteration // n_batch_ele curr_batch = iteration % n_batch_ele if iteration % 40 == 0: print("Phase 3, ep{} iter{}, total{}".format(curr_epoch, curr_batch, iteration)) sample_idx_ele = iter_index_cache[iteration] # sample_idx_ele = ele_perm[(curr_batch * args.batchSizeEle):((curr_batch + 1) * args.batchSizeEle)] batch_x, batch_y = X_elementary[sample_idx_ele], Y_elementary[sample_idx_ele] if curr_batch == 399: print("399!!!!!!!!!!!", batch_y) tmp_y = np.zeros((args.batchSizeEle, 10)) for idx, element in enumerate(batch_y): tmp_y[idx][element] = 1 batch_y = tmp_y if args.model == 'mlp': for p3, p1, input_p in zip(up_v, up_theta, phase_3_input): # print p3.shape, p1.shape p3 += lr_ele_true * p1 input_p.set_value(p3) tmp = input_p.get_value() # print 'set up_v to obtain hypergrad', tmp[1][1] # TIME.sleep(2) else: for p3, p1, input_p in zip(up_v, up_theta, phase_3_input): p3 += lr_ele_true * p1 input_p.set_value(p3) # hessian vector product HVP_value = func_hyper(batch_x, batch_y) HVP_weight_value = HVP_value[:4] HVP_lambda_value = HVP_value[4:8] debug_orz = HVP_value[8:] # return cnt = 0 for p1, p2, p3, hvp1, hvp2 in zip(up_theta, up_lambda, up_v, HVP_weight_value, HVP_lambda_value): # this code is to monitor the up_lambda if cnt == 3: tmp2 = np.array(hvp2) tmp1 = np.array(hvp1) if iteration % 40 == 0: print("up_lambda", p2[3][0]) else: cnt += 1 p1 -= (1. - mom) * np.array(hvp1) p2 -= (1. - mom) * np.array(hvp2) p3 *= mom # print up_lambda[2][0][0] return model.params_lambda, up_lambda, fix_weight, eval_loss, eval_error
model = ConvNet( conv_params={ 'kernel': ((1, 16), (1, 8), (1, 8)), 'num_filter': ( 16, 32, 64, ), 'stride': ( (1, 1), (1, 1), (1, 1), ), 'padding': ( (0, 0), (0, 0), (0, 0), ), 'dilate': ( (1, 1), (1, 1), (1, 1), ) }, act_params={'act_type': ( 'relu', 'relu', 'relu', 'relu', )}, pool_params={ 'pool_type': ( 'avg', 'avg', 'avg', ), 'kernel': ( (1, 16), (1, 16), (1, 16), ), 'stride': ( (1, 2), (1, 2), (1, 2), ), 'padding': ( (0, 0), (0, 0), (0, 0), ), 'dilate': ( (1, 1), (1, 1), (1, 1), ) }, fc_params={'hidden_dim': (64, )}, drop_prob=0, # input_dim = (2,1,8192) input_dim=(1, 1, 8192))
def perform_experiments(n_runs=10, n_points=1000, n_epochs=200, run_best=False, verbose=False): """ Perform experiments for 5 different neural network architectures and losses. To run all experiments call this function with default params :param n_runs: number of runs for which experiment should be repeated :param n_points: number of training and testing data points used in the experiments :param n_epochs: number of epochs every architecture should be trained on :param run_best: If True only the best architecture (Siamese Network with auxiliary loss) is trained :param verbose: If True, print training and validation loss every epoch :returns: dictionary containing history of training (training, validation loss and accuracy) """ history_mlp_net = [] history_conv_net = [] history_conv_net_aux = [] history_siamese = [] history_siamese_aux = [] for n_run in range(n_runs): data_set = generate_pair_sets(n_points) MAX_VAL = 255.0 TRAIN_INPUT = Variable(data_set[0]) / MAX_VAL TRAIN_TARGET = Variable(data_set[1]) TRAIN_CLASSES = Variable(data_set[2]) TEST_INPUT = Variable(data_set[3]) / MAX_VAL TEST_TARGET = Variable(data_set[4]) TEST_CLASSES = Variable(data_set[5]) if not run_best: ############################################################################## # Creates Multilayer Perceptron Network with ReLU activationss mlp_net = MLPNet(in_features=392, out_features=2, n_layers=3, n_hidden=16) # Set train flag on (for dropouts) mlp_net.train() # Train the model and append the history history_mlp_net.append( train_model(mlp_net, train_input=TRAIN_INPUT.view((n_points, -1)), train_target=TRAIN_TARGET, val_input=TEST_INPUT.view((n_points, -1)), val_target=TEST_TARGET, n_epochs=n_epochs, verbose=verbose)) # Set train flag to False for getting accuracies on validation data mlp_net.eval() acc = get_accuracy(mlp_net, TEST_INPUT.view( (n_points, -1)), TEST_TARGET) * 100.0 print("Run: {}, Mlp_net Test Accuracy: {:.3f} %".format( n_run, acc)) ############################################################################## # Create ConvNet without auxiliary outputs conv_net = ConvNet(n_classes=2, n_layers=3, n_features=16) # Set train flag on (for dropouts) conv_net.train() # Train the model and append the history history_conv_net.append( train_model(conv_net, train_input=TRAIN_INPUT, train_target=TRAIN_TARGET, val_input=TEST_INPUT, val_target=TEST_TARGET, n_epochs=n_epochs, verbose=verbose)) # Set train flag to False for getting accuracies on validation data conv_net.eval() acc = get_accuracy(conv_net, TEST_INPUT, TEST_TARGET) * 100.0 print("Run: {}, ConvNet Test Accuracy: {:.3f} %".format( n_run, acc)) ############################################################################## # Create ConvNet with auxiliary outputs conv_net_aux = ConvNet(n_classes=22, n_layers=3, n_features=16) # Set train flag on (for dropouts) conv_net_aux.train() # Train the model and append the history history_conv_net_aux.append( train_model(conv_net_aux, train_input=TRAIN_INPUT, train_target=TRAIN_TARGET, aux_param=1.0, train_classes=TRAIN_CLASSES, val_input=TEST_INPUT, val_target=TEST_TARGET, val_classes=TEST_CLASSES, n_epochs=n_epochs, verbose=verbose)) # Set train flag to False for getting accuracies on validation data conv_net_aux.eval() acc = get_accuracy(conv_net_aux, TEST_INPUT, TEST_TARGET) * 100.0 print("Run: {}, ConvNet Auxilary Test Accuracy: {:.3f} %".format( n_run, acc)) ############################################################################## # Create Siamese Network without auxiliary outputs conv_net = BlockConvNet() conv_net_siamese = DeepSiameseNet(conv_net) # Set train flag on (for dropouts) conv_net.train() conv_net_siamese.train() # Train the model and append the history history_siamese.append( train_model(conv_net_siamese, train_input=TRAIN_INPUT, train_target=TRAIN_TARGET, val_input=TEST_INPUT, val_target=TEST_TARGET, n_epochs=n_epochs, verbose=verbose)) # Set train flag to False for getting accuracies on validation data conv_net.eval() conv_net_siamese.eval() acc = get_accuracy(conv_net_siamese, TEST_INPUT, TEST_TARGET) * 100.0 print("Run: {}, Siamese Test Accuracy: {:.3f} %".format( n_run, acc)) ############################################################################## # Create Siamese Network with auxiliary outputs conv_net = BlockConvNet() conv_net_siamese_aux = DeepSiameseNet(conv_net) # Set train flag on (for dropouts) conv_net.train() conv_net_siamese_aux.train() # Train the model and append the history history_siamese_aux.append( train_model(conv_net_siamese_aux, train_input=TRAIN_INPUT, train_target=TRAIN_TARGET, train_classes=TRAIN_CLASSES, val_input=TEST_INPUT, val_target=TEST_TARGET, val_classes=TEST_CLASSES, aux_param=3.0, n_epochs=n_epochs, verbose=verbose)) # Set train flag to False for getting accuracies on validation data conv_net.eval() conv_net_siamese_aux.eval() acc = get_accuracy(conv_net_siamese_aux, TEST_INPUT, TEST_TARGET) * 100.0 print("Run: {}, Siamese Auxilary Test Accuracy: {:.3f} %".format( n_run, acc)) ############################################################################## return { 'history_mlp_net': history_mlp_net, 'history_conv_net': history_conv_net, 'history_conv_net_aux': history_conv_net_aux, 'history_siamese': history_siamese, 'history_siamese_aux': history_siamese_aux }
def baseline_fitness(state_dict,num_epochs=600): # Hyper Parameters param = { 'batch_size': 4, 'test_batch_size': 50, 'num_epochs': num_epochs, 'learning_rate': 0.001, 'weight_decay': 5e-4, } num_cnn_layer =sum( [ int(len(v.size())==4) for d, v in state_dict.items() ] ) num_fc_layer = sum( [ int(len(v.size())==2) for d, v in state_dict.items() ] ) state_key = [ k for k,v in state_dict.items()] cfg = [] first = True for d, v in state_dict.items(): #print(v.data.size()) if len(v.data.size()) == 4 or len(v.data.size()) ==2: if first: first = False cfg.append(v.data.size()[1]) cfg.append(v.data.size()[0]) assert num_cnn_layer + num_fc_layer == len(cfg) - 1 net = ConvNet(cfg, num_cnn_layer) # masks = [] for i, p in enumerate(net.parameters()): p.data = state_dict[ state_key[i] ] if len(p.data.size()) == 4: pass #p_np = p.data.cpu().numpy() #masks.append(np.ones(p_np.shape).astype('float32')) #value_this_layer = np.abs(p_np).sum(axis=(2,3)) # for j in range(len(value_this_layer)): # # for k in range(len(value_this_layer[0])): # # if abs( value_this_layer[j][k] ) < 1e-4: # # masks[-1][j][k] = 0. elif len(p.data.size()) == 2: pass #p_np = p.data.cpu().numpy() #masks.append(np.ones(p_np.shape).astype('float32')) #value_this_layer = np.abs(p_np) # for j in range(len(value_this_layer)): # # for k in range(len(value_this_layer[0])): # # if abs( value_this_layer[j][k] ) < 1e-4: # # masks[-1][j][k] = 0. #net.set_masks(masks) ## Retraining loader_train, loader_test = load_dataset() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(net.parameters(), lr=param['learning_rate'], weight_decay=param['weight_decay']) #if num_epochs > 0: # test(net, loader_test) #train(net, criterion, optimizer, param, loader_train) test_acc_list = [] for t in range(num_epochs ): param['num_epochs'] = 10 train(net, criterion, optimizer, param, loader_train) #print("--- After training ---") test_acc_list.append(test(net, loader_test)) plt.plot(test_acc_list) with open('baseline_result.csv','a',newline='') as csvfile: writer = csv.writer(csvfile) for row in test_acc_list: writer.writerow([row])
def retrain(state_dict, part=1, num_epochs=5): # Hyper Parameters param = { 'batch_size': 4, 'test_batch_size': 50, 'num_epochs': num_epochs, 'learning_rate': 0.001, 'weight_decay': 5e-4, } num_cnn_layer = sum( [int(len(v.size()) == 4) for d, v in state_dict.items()]) num_fc_layer = sum( [int(len(v.size()) == 2) for d, v in state_dict.items()]) state_key = [k for k, v in state_dict.items()] cfg = [] first = True for d, v in state_dict.items(): #print(v.data.size()) if len(v.data.size()) == 4 or len(v.data.size()) == 2: if first: first = False cfg.append(v.data.size()[1]) cfg.append(v.data.size()[0]) assert num_cnn_layer + num_fc_layer == len(cfg) - 1 net = ConvNet(cfg, num_cnn_layer, part) masks = [] for i, p in enumerate(net.parameters()): p.data = state_dict[state_key[i]] if len(p.data.size()) == 4: p_np = p.data.cpu().numpy() masks.append(np.ones(p_np.shape).astype('float32')) value_this_layer = np.abs(p_np).sum(axis=(2, 3)) for j in range(len(value_this_layer)): for k in range(len(value_this_layer[0])): if abs(value_this_layer[j][k]) < 1e-4: masks[-1][j][k] = 0. elif len(p.data.size()) == 2: p_np = p.data.cpu().numpy() masks.append(np.ones(p_np.shape).astype('float32')) value_this_layer = np.abs(p_np) for j in range(len(value_this_layer)): for k in range(len(value_this_layer[0])): if abs(value_this_layer[j][k]) < 1e-4: masks[-1][j][k] = 0. net.set_masks(masks) ## Retraining loader_train, loader_test = load_dataset() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(net.parameters(), lr=param['learning_rate'], weight_decay=param['weight_decay']) #if num_epochs > 0: # test(net, loader_test) train(net, criterion, optimizer, param, loader_train) for i, p in enumerate(net.parameters()): state_dict[state_key[i]] = p.data #print(p.data == state_dict[ state_key[i] ]) #print("--- After retraining ---") #test(net, loader_test) #return net.state_dict() return state_dict
else: batch_e_i = batch_s_i + batch_size # print("batch_s_i: ", batch_s_i) # print("batch_e_i: ", batch_e_i) x_batch = X[batch_s_i:batch_e_i] y_batch = Y[batch_s_i:batch_e_i] x_batch = np.concatenate(x_batch, axis=0) y_batch = np.concatenate(y_batch, axis=0) yield x_batch, y_batch if __name__ == '__main__': from models import ConvNet game_index_now = 10 replays_paths = 'replays' batch_size = 2 epoch = 20 cnn = ConvNet(num_classes=2, lr=1e-3) random_samples, step_size = load_replays( game_index_now, pos_sample_factor=1.0, max_N=None, valid_game_index_range=float('inf')) cnn_data_loader = data_loader(batch_size, random_samples, step_size) cnn.train_model(cnn_data_loader, epoch, step_size)
def Solver(train, test, Debug, batch_size, lr , smoothing_constant, num_fc1, num_fc2, num_outputs, epochs, SNR , sl, pool_type ,pool_size ,pool_stride, params_init=None, period=None): num_examples = train.shape[0] # 训练集数据类型转换 y = nd.array(~train.sigma.isnull() +0) X = nd.array(Normolise(train.drop(['mass','positions','gaps','max_peak','sigma','SNR_mf','SNR_mf0'],axis=1))) print('Label for training:', y.shape) print('Dataset for training:', X.shape, end='\n\n') dataset_train = gluon.data.ArrayDataset(X, y) train_data = gluon.data.DataLoader(dataset_train, batch_size, shuffle=True, last_batch='keep') y = nd.array(~test.sigma.isnull() +0) X = nd.array(Normolise(test.drop(['mass','positions','gaps','max_peak','sigma','SNR_mf','SNR_mf0'],axis=1))) print('Label for testing:', y.shape) print('Dataset for testing:', X.shape, end='\n\n') # 这里使用data模块来读取数据。创建测试数据。 (suffle) dataset_test = gluon.data.ArrayDataset(X, y) test_data = gluon.data.DataLoader(dataset_test, batch_size, shuffle=True, last_batch='keep') # Train loss_history = [] loss_v_history = [] moving_loss_history = [] test_accuracy_history = [] train_accuracy_history = [] # assert period >= batch_size and period % batch_size == 0 # Initializate parameters if params_init: print('Loading params...') params = params_init [W1, b1, W2, b2, W3, b3, W4, b4, W5, b5, W6, b6] = params # random fc layers weight_scale = .01 W7 = nd.random_normal(loc=0, scale=weight_scale, shape=(sl, num_fc1), ctx=ctx ) W8 = nd.random_normal(loc=0, scale=weight_scale, shape=(num_fc1, num_fc2), ctx=ctx ) W9 = nd.random_normal(loc=0, scale=weight_scale, shape=(num_fc2, num_outputs), ctx=ctx ) b7 = nd.random_normal(shape=num_fc1, scale=weight_scale, ctx=ctx) b8 = nd.random_normal(shape=num_fc2, scale=weight_scale, ctx=ctx) b9 = nd.random_normal(shape=num_outputs, scale=weight_scale, ctx=ctx) params = [W1, b1, W2, b2, W3, b3, W4, b4, W5, b5, W6, b6] print('Random the FC1&2-layers...') vs = [] sqrs = [] for param in params: param.attach_grad() vs.append(param.zeros_like()) sqrs.append(param.zeros_like()) else: params, vs, sqrs = init_params(num_fc1 = 64, num_fc2 = 64, num_outputs = 2, sl=sl) print('Initiate weights from random...') # Debug if Debug: print('Debuging...') if params_init: params = params_init else: params, vs, sqrs = init_params(num_fc1 = 64, num_fc2 = 64, num_outputs = 2, sl=sl) for data, _ in train_data: data = data.as_in_context(ctx).reshape((batch_size,1,1,-1)) break print(pool_type, pool_size, pool_stride) _, _ = ConvNet(data, params, debug=Debug, pool_type=pool_type,pool_size = pool_size,pool_stride=pool_stride) print() # total_loss = [Total_loss(train_data_10, params, batch_size, num_outputs)] t = 0 # Epoch starts from 1. print('pool_type: ', pool_type) print('pool_size: ', pool_size) print('pool_stride: ', pool_stride) print('sl: ', sl) for epoch in range(1, epochs + 1): Epoch_loss = [] # 学习率自我衰减。 if epoch > 2: # lr *= 0.1 lr /= (1+0.01*epoch) for batch_i, ((data, label),(data_v, label_v)) in enumerate(zip(train_data, test_data)): data = data.as_in_context(ctx).reshape((data.shape[0],1,1,-1)) label = label.as_in_context(ctx) label_one_hot = nd.one_hot(label, num_outputs) with autograd.record(): output, _ = ConvNet(data, params, pool_type=pool_type,pool_size = pool_size,pool_stride=pool_stride) loss = softmax_cross_entropy(output, label_one_hot) loss.backward() # print(output) # params = sgd(params, lr, batch_size) # Increment t before invoking adam. t += 1 params, vs, sqrs = adam(params, vs, sqrs, lr, batch_size, t) data_v = data_v.as_in_context(ctx).reshape((data_v.shape[0],1,1,-1)) label_v = label_v.as_in_context(ctx) label_v_one_hot = nd.one_hot(label_v, num_outputs) output_v, _ = ConvNet(data_v, params, pool_type=pool_type,pool_size = pool_size,pool_stride=pool_stride) loss_v = softmax_cross_entropy(output_v, label_v_one_hot) # ######################### # Keep a moving average of the losses # ######################### curr_loss = nd.mean(loss).asscalar() curr_loss_v = nd.mean(loss_v).asscalar() moving_loss = (curr_loss if ((batch_i == 0) and (epoch-1 == 0)) else (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss) loss_history.append(curr_loss) loss_v_history.append(curr_loss_v) moving_loss_history.append(moving_loss) Epoch_loss.append(curr_loss) # if batch_i * batch_size % period == 0: # print('Curr_loss: ', curr_loss) print('Working on epoch %d. Curr_loss: %.5f (complete percent: %.2f/100' %(epoch, curr_loss*1.0, 1.0 * batch_i / (num_examples//batch_size) * 100) +')' , end='') sys.stdout.write("\r") # print('{"metric": "Training Loss for ALL", "value": %.5f}' %(curr_loss*1.0) ) # print('{"metric": "Testing Loss for ALL", "value": %.5f}' %(curr_loss_v*1.0) ) # print('{"metric": "Training Loss for SNR=%s", "value": %.5f}' %(str(SNR), curr_loss*1.0) ) # print('{"metric": "Testing Loss for SNR=%s", "value": %.5f}' %(str(SNR), curr_loss_v*1.0) ) train_accuracy = evaluate_accuracy(train_data, num_examples, batch_size, params, ConvNet,pool_type=pool_type,pool_size = pool_size,pool_stride=pool_stride) test_accuracy = evaluate_accuracy(test_data, num_examples, batch_size, params, ConvNet,pool_type=pool_type,pool_size = pool_size,pool_stride=pool_stride) test_accuracy_history.append(test_accuracy) train_accuracy_history.append(train_accuracy) print("Epoch %d, Moving_loss: %.6f, Epoch_loss(mean): %.6f, Train_acc %.4f, Test_acc %.4f" % (epoch, moving_loss, np.mean(Epoch_loss), train_accuracy, test_accuracy)) # print('{"metric": "Train_acc. for SNR=%s in epoches", "value": %.4f}' %(str(SNR), train_accuracy) ) # print('{"metric": "Test_acc. for SNR=%s in epoches", "value": %.4f}' %(str(SNR), test_accuracy) ) yield (params, loss_history, loss_v_history, moving_loss_history, test_accuracy_history, train_accuracy_history)
args = parser.parse_args() data_dir, model_dir = get_data_and_model_dir(args.model) json_path = os.path.join(model_dir, 'params.json') params = utils.Params(json_path) params.device = "cuda" if torch.cuda.is_available() else "cpu" params.seed = args.seed params.writer = SummaryWriter() # set random seed for reproducibility np.random.seed(args.seed) torch.manual_seed(args.seed) if params.device == "cuda": torch.cuda.manual_seed(args.seed) model_and_loss = { 'cnn': (ConvNet(params), cnn_loss), 'capsule': (CapsuleNet(params), capsule_loss), 'darknet_d': (DarkNetD(params), dark_d_loss), 'darknet_r': (DarkNetR(params), dark_r_loss), 'darkcapsule': (DarkCapsuleNet(params), darkcapsule_loss), } model, loss_fn = model_and_loss[args.model] if args.summary: summary(model, config.input_shape[args.model]) optimizer = Adam(model.parameters(), lr=args.lr) if args.mode == 'train': train_and_evaluate(model, optimizer, loss_fn, params, data_dir + '/train.p', data_dir + '/eval.p', model_dir)
download=False, transform=transforms.ToTensor()) loader_train = torch.utils.data.DataLoader(train_dataset, batch_size=param['batch_size'], shuffle=True) test_dataset = datasets.MNIST(root='../data/', train=False, download=False, transform=transforms.ToTensor()) loader_test = torch.utils.data.DataLoader(test_dataset, batch_size=param['test_batch_size'], shuffle=True) # Load the pretrained model net = ConvNet() net.load_state_dict(torch.load('models/convnet_pretrained1.pkl')) #if torch.cuda.is_available(): # print('CUDA ensabled.') # net.cuda() # Pretraining #criterion = nn.CrossEntropyLoss() #optimizer = torch.optim.RMSprop(net.parameters(), lr=param1['learning_rate'], # weight_decay=param['weight_decay']) # #train(net, criterion, optimizer, param1, loader_train) # Save and load the entire model #torch.save(net.state_dict(), 'models/convnet_pretrained1.pkl')
def example1(): """ Train convnet and then save the model """ DATASETS_DICT = './data' IMG_SIZE = CONFIG['img_size'] # x_train = DataLoader.load(os.path.join(DATASETS_DICT, 'x_train_cats_dogs.npy')) # y_train = DataLoader.load(os.path.join(DATASETS_DICT, 'y_train_cats_dogs.npy')) # x_train = DataLoader.load(os.path.join(DATASETS_DICT, 'x_cats_dogs_skimage.npy')) # y_train = DataLoader.load(os.path.join(DATASETS_DICT, 'y_cats_dogs_skimage.npy')) # x_train = DataLoader.load(os.path.join(DATASETS_DICT, 'x_rps_skimage.npy')) # y_train = DataLoader.load(os.path.join(DATASETS_DICT, 'y_rps_skimage.npy')) x_train = DataLoader.load_npy(CONFIG['data']['x_path']) y_train = DataLoader.load_npy(CONFIG['data']['y_path']) x_train = torch.Tensor(x_train).view(-1, IMG_SIZE, IMG_SIZE) y_train = torch.Tensor(y_train) N_TRAIN = CONFIG['n_train'] N_EVAL = CONFIG['n_eval'] N_TEST = CONFIG['n_test'] if N_TRAIN + N_EVAL + N_TEST > len(x_train): raise Exception('Not enough data!') # resnet50 works with 224, 244 input size n_output = 2 net = ConvNet(n_output) optimizer = optim.Adam(net.parameters(), lr=1e-3) loss_function = nn.MSELoss() # split data x_eval = x_train[:N_EVAL] y_eval = y_train[:N_EVAL] x_test = x_train[N_EVAL:N_EVAL + N_TEST] y_test = y_train[N_EVAL:N_EVAL + N_TEST] x_train = x_train[N_EVAL + N_TEST:N_EVAL + N_TEST + N_TRAIN] y_oracle = y_train[N_EVAL + N_TEST:N_EVAL + N_TEST + N_TRAIN] # show_grid_imgs(x_train[:16], y_oracle[:16], (4, 4)) EPOCHS = 10 BATCH_SIZE = 128 print('Start training') for epoch in range(EPOCHS): for k in tqdm(range(0, len(x_train), BATCH_SIZE)): batch_x = x_train[k:k + BATCH_SIZE].view(-1, 1, IMG_SIZE, IMG_SIZE) batch_y = y_oracle[k:k + BATCH_SIZE] net.zero_grad() out = net(batch_x) loss = loss_function(out, batch_y) loss.backward() optimizer.step() print(f'Epoch: {epoch}. Loss: {loss}') correct = 0 total = 0 with torch.no_grad(): for k in tqdm(range(len(x_test))): real_class = torch.argmax(y_test[k]) net_out = net(x_test[k].view(-1, 1, IMG_SIZE, IMG_SIZE))[0] # returns list predicted_class = torch.argmax(net_out) if predicted_class == real_class: correct += 1 total += 1 print('Accuracy: ', round(correct / total, 3)) torch.save(net, f'{DATASETS_DICT}/cnn_rps_model.pt')
download=True, transform=transforms.ToTensor()) loader_train = torch.utils.data.DataLoader(train_dataset, batch_size=param['batch_size'], shuffle=True) test_dataset = datasets.MNIST(root='../data/', train=False, download=True, transform=transforms.ToTensor()) loader_test = torch.utils.data.DataLoader(test_dataset, batch_size=param['test_batch_size'], shuffle=True) # Load the pretrained model net = ConvNet() net.load_state_dict(torch.load('models/convnet_pretrained.pkl')) if torch.cuda.is_available(): print('CUDA ensabled.') net.cuda() print("--- Pretrained network loaded ---") test(net, loader_test) # prune the weights masks = filter_prune(net, param['pruning_perc']) net.set_masks(masks) print("--- {}% parameters pruned ---".format(param['pruning_perc'])) test(net, loader_test) # Retraining criterion = nn.CrossEntropyLoss()
for i in range(self.num_stacks): self.stack_frames(image_processed) return self.buffer.copy() def get_grid(self): stacked = np.expand_dims(self.buffer, 1) imgs_tensor = torch.tensor(stacked) grid_image = utils.make_grid(imgs_tensor, 1) return grid_image.numpy().transpose((1, 2, 0)) if __name__ == '__main__': save_images = True env = gym.make("Breakout-v0") obs = env.reset() f = Frame(640, 480, 4) for i in range(40): if i == 0: f.step(env, 1) obs, reward, done, info = f.step(env) if save_images: if i % 4 == 0: cur_date = datetime.now().isoformat() # save in temp directory file_save_path = f'{get_temp_dir("image {:0>2}.jpg".format(i))}' cv2.imwrite(file_save_path, f.get_grid()) print("file saved at:", file_save_path) input_buffer = torch.unsqueeze(torch.Tensor(obs), dim=0) model = ConvNet(f.observation_shape, 4) print(model(input_buffer))
def train(name): record = pd.DataFrame(data=np.zeros((1, 4), dtype=np.float), columns=['precision', 'accuracy', 'recall', 'F1']) for _ in range(opt.runs): seed = random.randint(1, 10000) print("Random Seed: ", seed) torch.manual_seed(seed) # mkdirs for checkpoints output os.makedirs(opt.checkpoints_folder, exist_ok=True) os.makedirs('%s/%s' % (opt.checkpoints_folder, name), exist_ok=True) os.makedirs('report_metrics', exist_ok=True) root_dir = 'report_metrics/%s_aug_%s_IMBA/%s' % ( opt.model, str(opt.n_group), name) os.makedirs(root_dir, exist_ok=True) # 加载数据集 path = 'UCRArchive_2018/' + name + '/' + name + '_TRAIN.tsv' train_set, n_class = load_ucr(path) print('启用平衡数据增强!') stratified_train_set = stratify_by_label(train_set) data_aug_set = data_aug_by_dft(stratified_train_set, opt.n_group) total_set = np.concatenate((train_set, data_aug_set)) print('Shape of total set', total_set.shape) dataset = UcrDataset(total_set, channel_last=opt.channel_last) batch_size = int(min(len(dataset) / 10, 16)) dataloader = UCR_dataloader(dataset, batch_size) # Common behavior seq_len = dataset.get_seq_len() # 初始化序列长度 # 创建分类器对象\损失函数\优化器 if opt.model == 'r': net = ResNet(n_in=seq_len, n_classes=n_class).to(device) if opt.model == 'f': net = ConvNet(n_in=seq_len, n_classes=n_class).to(device) criterion = nn.CrossEntropyLoss().to(device) optimizer = optim.Adam(net.parameters(), lr=opt.lr) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=50, min_lr=0.0001) min_loss = 10000 print('############# Start to Train ###############') net.train() for epoch in range(opt.epochs): for i, (data, label) in enumerate(dataloader): data = data.float() data = data.to(device) label = label.long() label = label.to(device) optimizer.zero_grad() output = net(data) loss = criterion(output, label.view(label.size(0))) loss.backward() optimizer.step() scheduler.step(loss) # print('[%d/%d][%d/%d] Loss: %.8f ' % (epoch, opt.epochs, i + 1, len(dataloader), loss.item())) if loss < min_loss: min_loss = loss # End of the epoch,save model print('MinLoss: %.10f Saving the best epoch model.....' % min_loss) torch.save( net, '%s/%s/%s_%s_best_IMBA.pth' % (opt.checkpoints_folder, name, opt.model, str( opt.n_group))) net_path = '%s/%s/%s_%s_best_IMBA.pth' % (opt.checkpoints_folder, name, opt.model, str(opt.n_group)) one_record = eval_accuracy(net_path, name) print('The minimum loss is %.8f' % min_loss) record = record.append(one_record, ignore_index=True) record = record.drop(index=[0]) record.loc['mean'] = record.mean() record.loc['std'] = record.std() record.to_csv(root_dir + '/metrics.csv') # all_reprot_metrics.loc[name, 'acc_mean'] = record.at['mean', 'accuracy'] # all_reprot_metrics.loc[name, 'acc_std'] = record.at['std', 'accuracy'] # all_reprot_metrics.loc[name, 'F1_mean'] = record.at['mean', 'F1'] # all_reprot_metrics.loc[name, 'F1_std'] = record.at['std', 'F1'] print('\n')
OURs_modified = ConvNet( conv_params={ 'kernel': ((1, 16), (1, 8), (1, 8)), 'num_filter': ( 32, 64, 128, ), 'stride': ( (1, 1), (1, 1), (1, 1), ), 'padding': ( (0, 0), (0, 0), (0, 0), ), 'dilate': ( (1, 1), (1, 1), (1, 1), ) }, act_params={'act_type': ( 'elu', 'elu', 'elu', 'elu', )}, pool_params={ 'pool_type': ( 'max', 'max', 'max', ), 'kernel': ( (1, 4), (1, 4), (1, 4), ), 'stride': ( (1, 2), (1, 2), (1, 2), ), 'padding': ( (0, 0), (0, 0), (0, 0), ), 'dilate': ( (1, 1), (1, 1), (1, 1), ) }, fc_params={'hidden_dim': (128, )}, drop_prob=0, # input_dim = (2,1,8192) input_dim=(1, 1, 8192))
print('num_layers:', num_layers) param = nd.load(pretrained_add + param_add) OURS_ori = ConvNet( conv_params={ 'kernel': ((1, 16), ) + ((1, 8), ) * (num_layers - 1), 'num_filter': temp(1 + (num_layers - 1)), 'stride': ((1, 1), ) + ((1, 1), ) * (num_layers - 1), 'padding': ((0, 0), ) + ((0, 0), ) * (num_layers - 1), 'dilate': ((1, 1), ) + ((1, 1), ) * (num_layers - 1) }, act_params={ 'act_type': (('relu', )) * 2 + (('relu', )) * (num_layers - 1) }, pool_params={ 'pool_type': (('avg'), ) + (('avg'), ) * (num_layers - 1), 'kernel': ((1, 16), ) + ((1, 16), ) * (num_layers - 1), 'stride': ((1, 2), ) + ((1, 2), ) * (num_layers - 1), 'padding': ((0, 0), ) + ((0, 0), ) * (num_layers - 1), 'dilate': ((1, 1), ) + ((1, 1), ) * (num_layers - 1) }, fc_params={'hidden_dim': (64, )}, drop_prob=0, params_inits=param, input_dim=(1, 1, 8192)) auc_list = [] snr_list = np.linspace(0.1, 1, 10) j = 0 while True:
OURS_ori = ConvNet( conv_params={ 'kernel': ((1, 16), (1, 8), (1, 8)), 'num_filter': ( 16, 32, 64, ), 'stride': ( (1, 1), (1, 1), (1, 1), ), 'padding': ( (0, 0), (0, 0), (0, 0), ), 'dilate': ( (1, 1), (1, 1), (1, 1), ) }, act_params={'act_type': ( 'relu', 'relu', 'relu', 'relu', )}, pool_params={ 'pool_type': ( 'avg', 'avg', 'avg', ), 'kernel': ( (1, 16), (1, 16), (1, 16), ), 'stride': ( (1, 2), (1, 2), (1, 2), ), 'padding': ( (0, 0), (0, 0), (0, 0), ), 'dilate': ( (1, 1), (1, 1), (1, 1), ) }, fc_params={'hidden_dim': (64, )}, drop_prob=0, params_inits=param, input_dim=(1, 1, 8192))
args = parse_args() # unpack args device = args.device epoch = 1 lmbda = args.lmbda lr = args.lr criterion = make_criterion(args) train_loss_tracker, train_acc_tracker = [], [] test_loss_tracker, test_acc_tracker = [], [] # ADD FILENAMES FOR MODEL WEIGHTS TO QUANTIZE AND EVALUATE THEM filenames = ['control'] experiment_net = ConvNet() experiment_net = experiment_net.to(device) base_accuracies = [] for h in range(len(filenames)): experiment_net.load_state_dict(torch.load(filenames[h] + '.pt')) print('Test Accuracy without Quantization for ' + filenames[h] + '.pt') acc = test(experiment_net, testloader, criterion, epoch, lmbda, test_loss_tracker, test_acc_tracker) base_accuracies.append(acc) # CHANGE FOR LOOP RANGE TO QUANTIZE FOR DIFFERENT BITWIDTHS for n_bits in range(4, 9): print('{} BITWIDTH'.format(n_bits)) # L1 AND L2 for n in range(len(filenames)): experiment_net.load_state_dict(torch.load(filenames[n] + '.pt'))