def load_data(args, data_split, data_dir): data_files = [] # data_dir = '../process_data/reason_data/reason_data/RAVEN-10000/' data_dir = "C:/Users/Hertz/Documents/SJSU Coursework/MS Project_big files/RAVEN 1000/center_single" # filename = "RAVEN_0_train.npz" # for subdir in os.listdir(data_dir): # for filename in os.listdir(data_dir + subdir): # if "npz" in filename: # data_files.append(data_dir + subdir + "/" + filename) for filename in os.listdir(data_dir): if "npz" in filename: data_files.append(data_dir + "/" + filename) data_files.append(data_dir + "/" + filename) df = [ data_file for data_file in data_files if data_split in data_file and "npz" in data_file ][:] print("df", df) #data_files = [data_file for data_file in data_files if data_split in data_file] print("Nums of " + data_split + " : ", len(df)) # train_loader = torch.utils.data.DataLoader(Dataset(train_files), batch_size=args.batch_size, shuffle=True,num_workers=args.numwork)# loader = torch.utils.data.DataLoader(Dataset(args, df), batch_size=args.batch_size, num_workers=args.numwork) return loader
def main(): """ Load data and train a model on it. """ args = argument_parser().parse_args() random.seed(args.seed) args.checkpoint_dir = CHECKPOINT_DIR ### os.environ['CUDA_VISIBLE_DEVICES'] = args.gpus data_source = OmniglotDataSource(data_dir=DATA_DIR) data_source.split_train_test(num_train=1200) train_set = Dataset(data_source, which_set='train', task_type='classification') test_set = Dataset(data_source, which_set='test', task_type='classification') model = OmniglotModel(args.classes, **model_kwargs(args)) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: if not args.pretrained: print('Training...') train(sess, model, train_set, test_set, os.path.join(args.checkpoint_dir, args.checkpoint), **train_kwargs(args)) else: print('Restoring from checkpoint...') tf.train.Saver().restore( sess, tf.train.latest_checkpoint( os.path.join(args.checkpoint_dir, args.checkpoint))) print('Evaluating...') eval_kwargs = evaluate_kwargs(args) print('Train accuracy: ' + str(evaluate(sess, model, train_set, **eval_kwargs))) print('Test accuracy: ' + str(evaluate(sess, model, test_set, **eval_kwargs)))
def main(args): # Step 1: init data folders '''if os.path.exists('save_state/'+args.regime+'/normalization_stats.pkl'): print('Loading normalization stats') x_mean, x_sd = misc.load_file('save_state/'+args.regime+'/normalization_stats.pkl') else: x_mean, x_sd = preprocess.save_normalization_stats(args.regime) print('x_mean: %.3f, x_sd: %.3f' % (x_mean, x_sd))''' val_loader=load_data(args, "val") tb=TensorBoard(args.model_dir) # Step 2: init neural networks print("network is:",args.net) if args.net == 'Reab3p16': model = Reab3p16(args) elif args.net=='RN_mlp': model =WildRelationNet() if args.gpunum > 1: model = nn.DataParallel(model, device_ids=range(args.gpunum)) weights_path = args.path_weight+"/"+args.load_weight if os.path.exists(weights_path) and args.restore: pretrained_dict = torch.load(weights_path) model_dict = model.state_dict() pretrained_dict1 = {} for k, v in pretrained_dict.items(): if k in model_dict: pretrained_dict1[k] = v #print(k) model_dict.update(pretrained_dict1) model.load_state_dict(model_dict) print('load weight') style_raven={65:0, 129:1, 257:2, 66:3, 132:4, 36:5, 258:6, 136:7, 264:8, 72:9, 130:10 , 260:11, 40:12, 34:13, 49:14, 18:15, 20:16, 24:17} model.cuda() optimizer = optim.SGD(model.parameters(), lr=args.lr,momentum=args.mo, weight_decay=5e-4) if args.gpunum>1: optimizer = nn.DataParallel(optimizer, device_ids=range(args.gpunum)) iter_count = 1 epoch_count = 1 #iter_epoch=int(len(train_files) / args.batch_size) print(time.strftime('%H:%M:%S', time.localtime(time.time())), 'training') style_raven_len = len(style_raven) if args.rl_style=="dqn": dqn = DQN() elif args.rl_style=="ddpg": ram = MemoryBuffer(1000) ddpg = Trainer(style_raven_len*4+2, style_raven_len, 1, ram) alpha_1=0.1 if args.rl_style=="dqn": a = dqn.choose_action([0.5] * 3) # TODO elif args.rl_style=="ddpg": action_ = ddpg.get_exploration_action(np.zeros([style_raven_len*4+2]).astype(np.float32),alpha_1) if args.type_loss:loss_fn=nn.BCELoss() best_acc=0.0 while True: since=time.time() print(action_) for i in range(style_raven_len): tb.scalar_summary("action/a"+str(i), action_[i], epoch_count) data_files = preprocess.provide_data(args.regime, style_raven_len, action_,style_raven) train_files = [data_file for data_file in data_files if 'train' in data_file] print("train_num:", len(train_files)) train_loader = torch.utils.data.DataLoader(Dataset(args,train_files), batch_size=args.batch_size, shuffle=True, num_workers=args.numwork) model.train() iter_epoch = int(len(train_files) / args.batch_size) acc_part_train=np.zeros([style_raven_len,2]).astype(np.float32) mean_loss_train= np.zeros([style_raven_len, 2]).astype(np.float32) loss_train=0 for x, y,style,me in train_loader: if x.shape[0]<10: print(x.shape[0]) break x, y ,meta = Variable(x).cuda(), Variable(y).cuda(), Variable(me).cuda() if args.gpunum > 1: optimizer.module.zero_grad() else: optimizer.zero_grad() if args.type_loss: pred_train, pred_meta= model(x) else: pred_train = model(x) loss_ = F.nll_loss(pred_train, y,reduce=False) loss=loss_.mean() if not args.type_loss else loss_.mean()+10*loss_fn(pred_meta,meta) loss.backward() if args.gpunum > 1: optimizer.module.step() else: optimizer.step() iter_count += 1 pred = pred_train.data.max(1)[1] correct = pred.eq(y.data).cpu() loss_train+=loss.item() for num, style_pers in enumerate(style): style_pers = style_pers[:-4].split("/")[-1].split("_")[3:] for style_per in style_pers: style_per=int(style_per) if correct[num] == 1: acc_part_train[style_per, 0] += 1 acc_part_train[style_per, 1] += 1 #mean_pred_train[style_per,0] += pred_train[num,y[num].item()].data.cpu() #mean_pred_train[style_per, 1] += 1 mean_loss_train[style_per,0] += loss_[num].item() mean_loss_train[style_per, 1] += 1 accuracy_total = correct.sum() * 100.0 / len(y) if iter_count %10 == 0: iter_c = iter_count % iter_epoch print(time.strftime('%H:%M:%S', time.localtime(time.time())), ('train_epoch:%d,iter_count:%d/%d, loss:%.3f, acc:%.1f') % ( epoch_count, iter_c, iter_epoch, loss, accuracy_total)) tb.scalar_summary("train_loss",loss,iter_count) loss_train=loss_train/len(train_files) #mean_pred_train=[x[0]/ x[1] for x in mean_pred_train] mean_loss_train=[x[0]/ x[1] for x in mean_loss_train] acc_part_train = [x[0] / x[1] if x[1]!=0 else 0 for x in acc_part_train] print(acc_part_train) if epoch_count %args.lr_step ==0: print("change lr") adjust_learning_rate(optimizer, epoch_count, args.lr_step,args.gpunum) time_elapsed = time.time() - since print('train epoch in {:.0f}h {:.0f}m {:.0f}s'.format( time_elapsed // 3600, time_elapsed // 60 % 60, time_elapsed % 60)) #acc_p=np.array([x[0]/x[1] for x in acc_part]) #print(acc_p) with torch.no_grad(): model.eval() accuracy_all = [] iter_test=0 acc_part_val = np.zeros([style_raven_len, 2]).astype(np.float32) for x, y, style,me in val_loader: iter_test+=1 x, y = Variable(x).cuda(), Variable(y).cuda() pred,_ = model(x) pred = pred.data.max(1)[1] correct = pred.eq(y.data).cpu().numpy() accuracy = correct.sum() * 100.0 / len(y) for num, style_pers in enumerate(style): style_pers = style_pers[:-4].split("/")[-1].split("_")[3:] for style_per in style_pers: style_per = int(style_per) if correct[num] == 1: acc_part_val[style_per, 0] += 1 acc_part_val[style_per, 1] += 1 accuracy_all.append(accuracy) # if iter_test % 10 == 0: # # print(time.strftime('%H:%M:%S', time.localtime(time.time())), # ('test_iter:%d, acc:%.1f') % ( # iter_test, accuracy)) accuracy_all = sum(accuracy_all) / len(accuracy_all) acc_part_val = [x[0] / x[1] if x[1]!=0 else 0 for x in acc_part_val ] baseline_rl=70 reward=np.mean(acc_part_val)*100-baseline_rl tb.scalar_summary("valreward", reward,epoch_count) action_list=[x for x in a] cur_state=np.array(acc_part_val+acc_part_train+action_list+mean_loss_train +[loss_train]+[epoch_count]).astype(np.float32) #np.expand_dims(, axis=0) if args.rl_style == "dqn": a = dqn.choose_action(cur_state) # TODO elif args.rl_style == "ddpg": a = ddpg.get_exploration_action(cur_state,alpha_1) if alpha_1<1: alpha_1+=0.005#0.1 if epoch_count > 1: if args.rl_style == "dqn":dqn.store_transition(last_state, a, reward , cur_state) elif args.rl_style == "ddpg":ram.add(last_state, a, reward, cur_state) if epoch_count > 1: if args.rl_style == "dqn":dqn.learn() elif args.rl_style == "ddpg":loss_actor, loss_critic=ddpg.optimize() print('------------------------------------') print('learn q learning') print('------------------------------------') tb.scalar_summary("loss_actor", loss_actor, epoch_count) tb.scalar_summary("loss_critic", loss_critic, epoch_count) last_state=cur_state time_elapsed = time.time() - since print('test epoch in {:.0f}h {:.0f}m {:.0f}s'.format( time_elapsed // 3600, time_elapsed // 60 % 60, time_elapsed % 60)) print('------------------------------------') print(('epoch:%d, acc:%.1f') % (epoch_count, accuracy_all)) print('------------------------------------') if accuracy_all>best_acc: best_acc=max(best_acc,accuracy_all) #ddpg.save_models(args.model_dir + '/', epoch_count) save_state(model.state_dict(), args.model_dir + "/epochbest") epoch_count += 1 if epoch_count%20==0: print("save weights") ddpg.save_models(args.model_dir+'/',epoch_count ) save_state(model.state_dict(), args.model_dir+"/epoch"+str(epoch_count))
def main(args): # Step 1: init data folders '''if os.path.exists('save_state/'+args.regime+'/normalization_stats.pkl'): ##to load raw data and preprocess it print('Loading normalization stats') x_mean, x_sd = misc.load_file('save_state/'+args.regime+'/normalization_stats.pkl') else: x_mean, x_sd = preprocess.save_normalization_stats(args.regime) print('x_mean: %.3f, x_sd: %.3f' % (x_mean, x_sd))''' val_loader=load_data(args, "val") ##loading already preprocessed validation/testing data tb=TensorBoard(args.model_dir) ##The model_dir arguments represents the directory to save model parameters, graph and etc. This can also be used to ##load checkpoints from the directory into a estimator to continue training a previously saved model. # Step 2: init neural networks print("network is:",args.net) if args.net == 'Reab3p16': ##if want to use model Reab3p16 model = Reab3p16(args) elif args.net=='RN_mlp': ##if want to use model WildRelationNet model =WildRelationNet() if args.gpunum > 1: model = nn.DataParallel(model, device_ids=range(args.gpunum)) ##The nn package defines a set of Modules, which you can think of as a neural network layer that has produces output from ##input and may have some trainable weights. ##when more than one gpu, want to save model weights using DataParrallel module prefix weights_path = args.path_weight+"/"+args.load_weight ##saved weigths of model if os.path.exists(weights_path) and args.restore: ##pretrained weights pretrained_dict = torch.load(weights_path) ##pretrained_dict is the state dictionary of the pre-trained model available model_dict = model.state_dict() ## https://pytorch.org/tutorials/recipes/recipes/what_is_state_dict.htmlA state_dict is an integral entity pretrained_dict1 = {} ##..if you are interested in saving or loading models from PyTorch for k, v in pretrained_dict.items(): ##filter out unnecessary keys k if k in model_dict: ##only when keys match(like conv2D..and so forth) pretrained_dict1[k] = v #print(k) model_dict.update(pretrained_dict1) ##overwrite entries in the existing state dict model.load_state_dict(model_dict) ##load the new state dict, new weights print('load weight') style_raven={65:0, 129:1, 257:2, 66:3, 132:4, 36:5, 258:6, 136:7, 264:8, 72:9, 130:10 ##dictionary(key:value pair of , 260:11, 40:12, 34:13, 49:14, 18:15, 20:16, 24:17} ##After setting weights using optimizer for training. ##The standard way in PyTorch to train a model in multiple GPUs is to use nn.DataParallel which copies the model to the GPUs ##and during training splits the batch among them and combines the individual outputs. ##model.cuda() by default will send your model to the "current device" #If you need to move a model to GPU via .cuda(), please do so before constructing optimizers for it. Parameters of a model #after .cuda() will be different objects with those before the call. ##A very popular technique that is used along with SGD is called Momentum. Instead of using only the gradient of the current ##step to guide the search, momentum also accumulates the gradient of the past steps to determine the direction to go model.cuda() optimizer = optim.SGD(model.parameters(), lr=args.lr,momentum=args.mo, weight_decay=5e-4) ##Adam has convergence problems that often SGD + momentum can converge better ##with longer training time. We often see a lot of papers in 2018 and 2019 were still using SGD if args.gpunum>1: optimizer = nn.DataParallel(optimizer, device_ids=range(args.gpunum)) ##setting iter-count and epoch to 1 before starting training iter_count = 1 ## number of batches of data the algorithm has seen (or simply the number of passes the algorithm has done on the dataset) epoch_count = 1 ##number of times a learning algorithm sees the complete dataset #iter_epoch=int(len(train_files) / args.batch_size) print(time.strftime('%H:%M:%S', time.localtime(time.time())), 'training') style_raven_len = len(style_raven) ##length of style raven dict if args.rl_style=="dqn": ##calling reinforcemt model for training dqn = DQN() ##if want to use dqn model elif args.rl_style=="ddpg": ##if want to use ddpg model (aiming to use this) ram = MemoryBuffer(1000) ddpg = Trainer(style_raven_len*4+2, style_raven_len, 1, ram) ##creating an instance of Trainer class defined in rl folder (ddpg.py) why style_raven_len*4+2? alpha_1=0.1 if args.rl_style=="dqn": a = dqn.choose_action([0.5] * 3) # TODO elif args.rl_style=="ddpg": action_ = ddpg.get_exploration_action(np.zeros([style_raven_len*4+2]).astype(np.float32),alpha_1) ##calling exploration which returns action? if args.type_loss:loss_fn=nn.BCELoss() ##Creates a criterion that measures the Binary Cross Entropy between the target and the output. best_acc=0.0 ##setting accuracy to 0.0 while True: ##loop(train) until since=time.time() print(action_) for i in range(style_raven_len): tb.scalar_summary("action/a"+str(i), action_[i], epoch_count) ##saving summary such as poch counts and actions data_files = preprocess.provide_data(args.regime, style_raven_len, action_,style_raven) train_files = [data_file for data_file in data_files if 'train' in data_file] #creating a list of training files print("train_num:", len(train_files)) ##torch.utils.data.DataLoader` supports both map-style and iterable-style datasets with single- or multi-process loading, ##customizing loading order and optional automatic batching (collation) and memory pinning ##shuffle true because we want independent B training batches from Dataset train_loader = torch.utils.data.DataLoader(Dataset(args,train_files), batch_size=args.batch_size, shuffle=True, num_workers=args.numwork) model.train() ##start training model iter_epoch = int(len(train_files) / args.batch_size) ##setting iteration count for total dataset acc_part_train=np.zeros([style_raven_len,2]).astype(np.float32) ##defining variable for saving part accuracy while training mean_loss_train= np.zeros([style_raven_len, 2]).astype(np.float32) ##defining variable for saving mean loss while training loss_train=0 for x, y,style,me in train_loader: if x.shape[0]<10: ##x.shape[0] will give the number of rows in an array (10 by 1024 2D array) print(x.shape[0]) break x, y ,meta = Variable(x).cuda(), Variable(y).cuda(), Variable(me).cuda() ##Components are accessible as variable.x, variable.y, variable.z if args.gpunum > 1: optimizer.module.zero_grad() ##to set the gradient of the parameters in the model to 0, module beacause DataParallel else: optimizer.zero_grad() ## same as above set the gradient of the parameters to zero if args.type_loss: pred_train, pred_meta= model(x) ##applying model to x where x is from training data else: pred_train = model(x) ##x is images y is actual label/category loss_ = F.nll_loss(pred_train, y,reduce=False) ##calculating loss occurred while training loss=loss_.mean() if not args.type_loss else loss_.mean()+10*loss_fn(pred_meta,meta)##If your loss is not a scalar value, then you should certainly use either loss.backward() ##loss.mean() or loss.sum() to convert it to a scalar before calling the backward. Otherwise, it will cause an error #When you call loss.backward(), all it does is compute gradient of loss w.r.t all the parameters in loss that have ##requires_grad = True and store them in parameter.grad attribute for every parameter. ##optimizer.step() updates all the parameters based on parameter.grad if args.gpunum > 1: optimizer.module.step() ##module for DataParallel else: optimizer.step() iter_count += 1 ##update iter-count by 1 evrytime pred = pred_train.data.max(1)[1] correct = pred.eq(y.data).cpu() ##compare actual and predicted category loss_train+=loss.item() ##The average of the batch losses will give you an estimate of the “epoch loss” during training. for num, style_pers in enumerate(style): style_pers = style_pers[:-4].split("/")[-1].split("_")[3:] for style_per in style_pers: style_per=int(style_per) if correct[num] == 1: acc_part_train[style_per, 0] += 1 acc_part_train[style_per, 1] += 1 #mean_pred_train[style_per,0] += pred_train[num,y[num].item()].data.cpu() #mean_pred_train[style_per, 1] += 1 mean_loss_train[style_per,0] += loss_[num].item() mean_loss_train[style_per, 1] += 1 accuracy_total = correct.sum() * 100.0 / len(y) ####calc accuracy if iter_count %10 == 0: ##do this for 10 iterations iter_c = iter_count % iter_epoch print(time.strftime('%H:%M:%S', time.localtime(time.time())), ('train_epoch:%d,iter_count:%d/%d, loss:%.3f, acc:%.1f') % ( epoch_count, iter_c, iter_epoch, loss, accuracy_total)) tb.scalar_summary("train_loss",loss,iter_count) ##saving train loss to summary loss_train=loss_train/len(train_files) ##The average of the batch losses will give you an estimate of the “epoch loss” during training. #mean_pred_train=[x[0]/ x[1] for x in mean_pred_train] mean_loss_train=[x[0]/ x[1] for x in mean_loss_train] acc_part_train = [x[0] / x[1] if x[1]!=0 else 0 for x in acc_part_train] print(acc_part_train) if epoch_count %args.lr_step ==0: ##adjusting learning rate after 30 epochs print("change lr") adjust_learning_rate(optimizer, epoch_count, args.lr_step,args.gpunum) time_elapsed = time.time() - since print('train epoch in {:.0f}h {:.0f}m {:.0f}s'.format( time_elapsed // 3600, time_elapsed // 60 % 60, time_elapsed % 60)) #acc_p=np.array([x[0]/x[1] for x in acc_part]) #print(acc_p) with torch.no_grad(): model.eval() ##evaluating model accuracy_all = [] iter_test=0 acc_part_val = np.zeros([style_raven_len, 2]).astype(np.float32) for x, y, style,me in val_loader: ##using validation data iter_test+=1 x, y = Variable(x).cuda(), Variable(y).cuda() pred,_ = model(x) pred = pred.data.max(1)[1] correct = pred.eq(y.data).cpu().numpy() accuracy = correct.sum() * 100.0 / len(y) ##accuracy is calc basd on how many labels match for num, style_pers in enumerate(style): style_pers = style_pers[:-4].split("/")[-1].split("_")[3:] for style_per in style_pers: style_per = int(style_per) if correct[num] == 1: acc_part_val[style_per, 0] += 1 acc_part_val[style_per, 1] += 1 accuracy_all.append(accuracy) ##append to accuracy list # if iter_test % 10 == 0: # # print(time.strftime('%H:%M:%S', time.localtime(time.time())), # ('test_iter:%d, acc:%.1f') % ( # iter_test, accuracy)) accuracy_all = sum(accuracy_all) / len(accuracy_all) ##total accuracy is calculated acc_part_val = [x[0] / x[1] if x[1]!=0 else 0 for x in acc_part_val ] baseline_rl=70 ##baseline for accuracy reward=np.mean(acc_part_val)*100-baseline_rl ##calculating reward using val accuracy tb.scalar_summary("valreward", reward,epoch_count) ##saving summary action_list=[x for x in a] cur_state=np.array(acc_part_val+acc_part_train+action_list+mean_loss_train ##saving all calc in currnt state +[loss_train]+[epoch_count]).astype(np.float32) #np.expand_dims(, axis=0) if args.rl_style == "dqn": a = dqn.choose_action(cur_state) # TODO elif args.rl_style == "ddpg": ##passing current state to rl model's get_exploration_action a = ddpg.get_exploration_action(cur_state,alpha_1) if alpha_1<1: alpha_1+=0.005#0.1 if epoch_count > 1: ##saving last state and current state ,reward in memory for epoch >1 if args.rl_style == "dqn":dqn.store_transition(last_state, a, reward , cur_state) elif args.rl_style == "ddpg":ram.add(last_state, a, reward, cur_state) if epoch_count > 1: if args.rl_style == "dqn":dqn.learn() elif args.rl_style == "ddpg":loss_actor, loss_critic=ddpg.optimize() ##using rl ddpg model's optimize function to for teaching print('------------------------------------') print('learn q learning') print('------------------------------------') tb.scalar_summary("loss_actor", loss_actor, epoch_count) tb.scalar_summary("loss_critic", loss_critic, epoch_count) last_state=cur_state time_elapsed = time.time() - since print('test epoch in {:.0f}h {:.0f}m {:.0f}s'.format( time_elapsed // 3600, time_elapsed // 60 % 60, time_elapsed % 60)) print('------------------------------------') print(('epoch:%d, acc:%.1f') % (epoch_count, accuracy_all)) print('------------------------------------') if accuracy_all>best_acc: ##save the best accuracy obtained from val data as best accuracy for next epoch best_acc=max(best_acc,accuracy_all) #ddpg.save_models(args.model_dir + '/', epoch_count) save_state(model.state_dict(), args.model_dir + "/epochbest") ##saving the current state epoch_count += 1 ##increasing epoch count by 1 if epoch_count%20==0: ##Do this for 20 epochs for complete dataset print("save weights") ddpg.save_models(args.model_dir+'/',epoch_count ) ##saving the model save_state(model.state_dict(), args.model_dir+"/epoch"+str(epoch_count))
def main(args): # Step 1: init data folders '''if os.path.exists('save_state/'+args.regime+'/normalization_stats.pkl'): print('Loading normalization stats') x_mean, x_sd = misc.load_file('save_state/'+args.regime+'/normalization_stats.pkl') else: x_mean, x_sd = preprocess.save_normalization_stats(args.regime) print('x_mean: %.3f, x_sd: %.3f' % (x_mean, x_sd))''' data_dir = args.datapath data_files = [] for x in os.listdir(data_dir): for y in os.listdir(data_dir + x): data_files.append(data_dir + x + "/" + y) test_files = [ data_file for data_file in data_files if 'val' in data_file and 'npz' in data_file ] train_files = [ data_file for data_file in data_files if 'train' in data_file and 'npz' in data_file ] print("train_num:", len(train_files), "test_num:", len(test_files)) train_loader = torch.utils.data.DataLoader(Dataset(args, train_files), batch_size=args.batch_size, shuffle=True, num_workers=args.numwork) # test_loader = torch.utils.data.DataLoader(Dataset(args, test_files), batch_size=args.batch_size, num_workers=args.numwork) tb = TensorBoard(args.model_dir) # Step 2: init neural networks print("network is:", args.net) if args.net == 'Reab3p16': model = Reab3p16(args) if args.gpunum > 1: model = nn.DataParallel(model, device_ids=range(args.gpunum)) weights_path = args.path_weight if os.path.exists(weights_path): pretrained_dict = torch.load(weights_path) model_dict = model.state_dict() pretrained_dict1 = {} for k, v in pretrained_dict.items(): if k in model_dict: pretrained_dict1[k] = v #print(k) model_dict.update(pretrained_dict1) model.load_state_dict(model_dict) print('load weight: ' + weights_path) model.cuda() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.mo, weight_decay=5e-4) #optimizer = optim.Adam(model.parameters(), lr=args.lr) if args.gpunum > 1: optimizer = nn.DataParallel(optimizer, device_ids=range(args.gpunum)) iter_count = 1 epoch_count = 1 #iter_epoch=int(len(train_files) / args.batch_size) print(time.strftime('%H:%M:%S', time.localtime(time.time())), 'training') while True: since = time.time() with torch.no_grad(): model.eval() accuracy_all = [] for x, y, style, me in test_loader: x, y = Variable(x).cuda(), Variable(y).cuda() pred = model(x) pred = pred.data.max(1)[1] correct = pred.eq(y.data).cpu().numpy() accuracy = correct.sum() * 100.0 / len(y) accuracy_all.append(accuracy) accuracy_all = sum(accuracy_all) / len(accuracy_all) reward = accuracy_all * 100 tb.scalar_summary("test_acc", reward, epoch_count) # np.expand_dims(, axis=0) time_elapsed = time.time() - since print('test epoch in {:.0f}h {:.0f}m {:.0f}s'.format( time_elapsed // 3600, time_elapsed // 60 % 60, time_elapsed % 60)) print('------------------------------------') print(('epoch:%d, acc:%.1f') % (epoch_count, accuracy_all)) print('------------------------------------') model.train() iter_epoch = int(len(train_files) / args.batch_size) for x, y, style, me in train_loader: if x.shape[0] < 10: print(x.shape[0]) break x, y = Variable(x).cuda(), Variable(y).cuda() if args.gpunum > 1: optimizer.module.zero_grad() else: optimizer.zero_grad() pred = model(x) loss = F.nll_loss(pred, y, reduce=False) #train_loss=loss loss = loss.mean() loss.backward() if args.gpunum > 1: optimizer.module.step() else: optimizer.step() iter_count += 1 pred = pred.data.max(1)[1] correct = pred.eq(y.data).cpu() accuracy_total = correct.sum() * 100.0 / len(y) if iter_count % 100 == 0: iter_c = iter_count % iter_epoch print( time.strftime('%H:%M:%S', time.localtime(time.time())), ('train_epoch:%d,iter_count:%d/%d, loss:%.3f, acc:%.1f') % (epoch_count, iter_c, iter_epoch, loss, accuracy_total)) tb.scalar_summary("train_loss", loss, iter_count) #print(acc_part_train) if epoch_count % args.lr_step == 0: print("change lr") adjust_learning_rate(optimizer, epoch_count, args.lr_step, args.gpunum) time_elapsed = time.time() - since print('train epoch in {:.0f}h {:.0f}m {:.0f}s'.format( time_elapsed // 3600, time_elapsed // 60 % 60, time_elapsed % 60)) #acc_p=np.array([x[0]/x[1] for x in acc_part]) #print(acc_p) epoch_count += 1 if epoch_count % 1 == 0: print("save!!!!!!!!!!!!!!!!") save_state(model.state_dict(), args.model_dir + "/epoch" + str(epoch_count))
def run_inference(data_dir, data_split, filename): torch.multiprocessing.freeze_support() device = torch.device('cpu') weights_path = "C:\\Users\\Hertz\\Documents\\SJSU Coursework\\MS Project_big files\\git\\distracting_feature\\distracting_feature\\epochs\\epoch860" weights_path = "C:\\Users\\Hertz\\Documents\\SJSU Coursework\\MS Project_big files\\git\\distracting_feature\\distracting_feature\\epochs\\epoch860" # got ~20 percent here. weights_path = "C:\\Users\\Hertz\\Documents\\SJSU Coursework\\MS Project_big files\\git\\distracting_feature\\distracting_feature\\epochs\\epochbest(200K_79.2)" #model_path = "C:\\Users\\Hertz\\Documents\\SJSU Coursework\\MS Project_big files\\git\\distracting_feature\\distracting_feature\\epochs\\epoch860" # got ~70 percent here. #image_path = "C:\\Users\\sonam\\Desktop\\MS_Project\\test_model\\RAVEN_1368_test\\image.npy" # image_path = os.path.join(RAVEN_folder, file_folder) # image_path = str(RAVEN_folder)+"/"+ str(file) # image_path = "C:/Users/Hertz/Documents/SJSU Coursework/MS Project_big files/RAVEN-10000-release/RAVEN-10000/center_single/RAVEN_10_train.npz" image_path = "C:/Users/Hertz/Documents/SJSU Coursework/MS Project_big files/RAVEN-10000-release/RAVEN-10000/" ap = argparse.ArgumentParser() #ap.add_argument("type_loss", type=bool) #ap.add_argument("image_path", type=str) ap.add_argument('--type_loss', type=bool, default=True) ap.add_argument('--image_path', type=str, default=image_path) ap.add_argument('--regime', type=str, default='all') ap.add_argument('--image_type', type=str, default='image') ap.add_argument('--batch_size', type=int, default=1) ap.add_argument('--numwork', dest='numwork', type=int, default=1) args = ap.parse_args() # args = config.get_args() # args = args[0] # weights_path = args.path_weight+"/"+args.load_weight pretrained_dict = torch.load(weights_path, map_location=device) # print("pretrained_dict:", pretrained_dict) r_model = m.Reab3p16(args) model_dict = r_model.state_dict( ) ## https://pytorch.org/tutorials/recipes/recipes/what_is_state_dict.htmlA state_dict is an integral entity pretrained_dict1 = { } ##..if you are interested in saving or loading models from PyTorch for k, v in pretrained_dict.items(): ##filter out unnecessary keys k if k[:7] == "module.": k = k[7:] if k in model_dict: ##only when keys match(like conv2D..and so forth) pretrained_dict1[k] = v model_dict.update( pretrained_dict1) ##overwrite entries in the existing state dict r_model.load_state_dict(model_dict) # print("pretrained_dict1:", pretrained_dict1) with torch.no_grad(): r_model.eval() accuracy_all = [] #data_dir = "C:/Users/Hertz/Documents/SJSU Coursework/MS Project_big files/RAVEN-10000-release/RAVEN-10000/" #data_dir = "C:/Users/Hertz/Documents/SJSU Coursework/MS Project_big files/RAVEN 1000/" loader_try = load_data_for_inference(args, data_dir, data_split, filename) # print(loader_try) # data_split = "train" # data_files = [image_path] # print("datafiles: ", data_files) # df = [data_file for data_file in data_files][:] # print("df: ", df) loader = torch.utils.data.DataLoader(Dataset(args, loader_try), batch_size=args.batch_size, num_workers=args.numwork) # checkpoint = torch.load(model_path, map_location=device) count = 0 for x, y, style, me in loader_try: count = count + 1 #print("count", count) # print("style:", style) x, y = Variable(x), Variable(y) pred = r_model(x) # print("pred:", pred) pred = pred[0].data.max(1)[1] # print("pred:", pred) # print("y",y) correct = pred.eq(y.data).cpu().numpy() accuracy = correct.sum() * 100.0 / len(y) print("accuracy", accuracy) accuracy_all.append(accuracy) accuracy_all = sum(accuracy_all) / len(accuracy_all) print(accuracy_all) print("pred:", pred.data) print("y:", y.data) print("count:", count) return pred
num_iters = data.num_iters(batch_size) loss = [] avg_loss = 0. avg_acc = 0. for epoch in range(1, n_epochs + 1): for _ in range(num_iters): batch_x, batch_y = data.next_batch(batch_size) batch_y = to_categorical(batch_y) avg_loss += model.fit(batch_x, batch_y) avg_acc += model.accuracy() if epoch % 10 == 0: n = 10. * num_iters print "Epoch: {}; Loss: {}".format(epoch, avg_loss / n) print "Acc: {}".format(avg_acc / n) loss.append(avg_loss) avg_loss = 0. avg_acc = 0. test_x, test_y = data.test pred = model.predict(test_x) test_acc = np.mean(test_y == pred) print "Test accuracy: {}".format(test_acc) if __name__ == '__main__': data = Dataset(DATA_DIR) inp_shape, num_classes = data.inp_shape(), data.num_classes() model = NeuralNetwork(inp_shape, num_classes, hidden_units=64) train(data, model)