def __init__(self, ga): # ga reference self.ga = ga # compressor model self.vae = ga.compressor # controller model; trained on the go self.controller = Controller(ga.input_size, ga.output_size).cuda()
def get_player(current_time, version, file_model, solver_version=None, sequence=1): """ Load the models of a specific player """ path = os.path.join(os.path.dirname(os.path.realpath(__file__)), \ '..', 'saved_models', str(current_time)) try: mod = os.listdir(path) models = list(filter(lambda model: (model.split('-')[0] == str(version) \ and file_model in model), mod)) models.sort() if len(models) == 0: return False, version except FileNotFoundError: return False, version if file_model == "vae": model = ConvVAE((HEIGHT, WIDTH, 3), LATENT_VEC).to(DEVICE) elif file_model == "lstm": model = LSTM(sequence, HIDDEN_UNITS, LATENT_VEC,\ NUM_LAYERS, GAUSSIANS, HIDDEN_DIM).to(DEVICE) elif file_model == "controller": model = Controller(PARAMS_CONTROLLER, ACTION_SPACE).to(DEVICE) checkpoint = load_torch_models(path, model, models[0]) if file_model == "controller": file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), \ '..', 'saved_models', current_time, "{}-solver.pkl".format(solver_version)) solver = pickle.load(open(file_path, 'rb')) return checkpoint, model, solver return model, checkpoint
class RolloutGenerator(object): def __init__(self, ga): # ga reference self.ga = ga # compressor model self.vae = ga.compressor # controller model; trained on the go self.controller = Controller(ga.input_size, ga.output_size).cuda() def get_action(self, obs, bodystate, brushstate, pulse): bodystate_comp = torch.cat( (bodystate, brushstate, pulse)) if self.ga.cpg_enabled else torch.cat( (bodystate, brushstate)) latent_mu, _ = self.vae.cuda().encoder(obs.cuda()) action = self.controller.cuda().forward( latent_mu.flatten(), bodystate_comp.cuda().flatten()) return action.squeeze().cpu().numpy() def do_rollout(self, generation, id, early_termination=True): with torch.no_grad(): client = Client(ClientType.ROLLOUT, self.ga.obs_size) client.start(generation, id, rollout=self)
def init_models(current_time, load_vae=False, load_lstm=False, load_controller=True, sequence=SEQUENCE): vae = lstm = best_controller = solver = None if load_vae: vae, checkpoint = load_model(current_time, -1, model="vae") if not vae: vae = ConvVAE((HEIGHT, WIDTH, 3), LATENT_VEC).to(DEVICE) if load_lstm: lstm, checkpoint = load_model(current_time, -1, model="lstm", sequence=sequence) if not lstm: lstm = LSTM(sequence, HIDDEN_UNITS, LATENT_VEC,\ NUM_LAYERS, GAUSSIANS, HIDDEN_DIM).to(DEVICE) if load_controller: res = load_model(current_time, -1, model="controller") checkpoint = res[0] if len(res) > 2: best_controller = res[1] solver = res[2] current_ctrl_version = checkpoint['version'] else: best_controller = Controller(LATENT_VEC, PARAMS_FC1, ACTION_SPACE).to(DEVICE) solver = CMAES(PARAMS_FC1 + LATENT_VEC + 512, sigma_init=SIGMA_INIT, popsize=POPULATION) return vae, lstm, best_controller, solver, checkpoint
def init(self): # get the models self.models = { json_model["name"]: Model(json_data=json_model) for json_model in self.get_data(self.models_endpoint) } log_str = "Loaded " + str(len(self.models)) + " models: " + str( [model.name for model in self.models.values()]) self.logs.append({ "ts": time.time(), "date": str(datetime.datetime.now()), "msg": log_str }) # get the containers self.containers = [ Container(json_data=json_container) for json_container in self.get_data(self.containers_endpoint) ] # group containers by nodes self.nodes = set(map(lambda c: c.node, self.containers)) self.containers_on_node = {} for node in self.nodes: self.containers_on_node[node] = list( filter(lambda c: c.node == node, self.containers)) log_str = "Containers by node: " + str([{ node: [c.to_json() for c in self.containers_on_node[node]] } for node in self.containers_on_node]) self.logs.append({ "ts": time.time(), "date": str(datetime.datetime.now()), "msg": log_str }) # init controllers self.controllers = [] t = time.time() for container in list( filter(lambda c: c.device == Device.CPU and c.active, self.containers)): c = Controller(container) c.next_action = t self.controllers.append(c)
transforms.Resize((RED_SIZE, RED_SIZE)), transforms.ToTensor(), ]) trained=0 #model = VAE(3, LSIZE).to(device) model=VAE(3, LSIZE) model=torch.nn.DataParallel(model,device_ids=range(8)) model.cuda() optimizer = optim.Adam(model.parameters(),lr=learning_rate,betas=(0.9,0.999)) model_p=VAE_a(7, LSIZE) model_p=torch.nn.DataParallel(model_p,device_ids=range(8)) model_p.cuda() optimizer_p = optim.Adam(model_p.parameters(),lr=learning_rate,betas=(0.9,0.999)) controller=Controller(LSIZE,3) controller=torch.nn.DataParallel(controller,device_ids=range(8)) controller=controller.cuda() optimizer_a = optim.SGD(controller.parameters(),lr=learning_rate*10) # scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=5) # earlystopping = EarlyStopping('min', patience=30) vis = visdom.Visdom(env='pa_train') current_window = vis.image( np.random.rand(64, 64), opts=dict(title='current!', caption='current.'), ) recon_window = vis.image( np.random.rand(64, 64), opts=dict(title='Reconstruction!', caption='Reconstruction.'),
A.RGBShift(p=1), A.RandomBrightness(p=1), A.RandomContrast(p=1) ], p=0.5), A.OneOf([A.ElasticTransform(p=1.0), A.IAAPiecewiseAffine(p=1.0)], p=0.5), A.Normalize(p=1.0), pytorch.ToTensorV2(), ]) val_transform = A.Compose([ A.Resize(height=512, width=512, p=1.0), A.Normalize(p=1.0), pytorch.ToTensorV2(), ]) train_loader, val_loader = generate_train_validation_dataloader( train_df, val_df, config["train_parameters"]["batch_size"], "data/data/images/", train_transform, val_transform) EF_Net = TL_EfficientNet(config["network_parameters"], True).to(device) Optimizer = optim.Adam(EF_Net.parameters(), lr=config["train_parameters"]["learning_rate"]) Loss = nn.CrossEntropyLoss() Control = Controller(model=EF_Net, optimizer=Optimizer, loss=Loss, train_data=train_loader, val_data=val_loader, epochs=config["train_parameters"]["epochs"], device=device) Control.train()
def main(): global args np.random.seed(args.seed) torch.cuda.manual_seed(args.seed) if args.fixed_arc: sys.stdout = Logger(filename='logs/' + args.output_filename + '_fixed.log') else: sys.stdout = Logger(filename='logs/' + args.output_filename + '.log') print(args) data_loaders = load_datasets() controller = Controller(search_for=args.search_for, search_whole_channels=True, num_layers=args.child_num_layers, num_branches=args.child_num_branches, out_filters=args.child_out_filters, lstm_size=args.controller_lstm_size, lstm_num_layers=args.controller_lstm_num_layers, tanh_constant=args.controller_tanh_constant, temperature=None, skip_target=args.controller_skip_target, skip_weight=args.controller_skip_weight) controller = controller.cuda() shared_cnn = SharedCNN(num_layers=args.child_num_layers, num_branches=args.child_num_branches, out_filters=args.child_out_filters, keep_prob=args.child_keep_prob) shared_cnn = shared_cnn.cuda() # https://github.com/melodyguan/enas/blob/master/src/utils.py#L218 controller_optimizer = torch.optim.Adam(params=controller.parameters(), lr=args.controller_lr, betas=(0.0, 0.999), eps=1e-3) # https://github.com/melodyguan/enas/blob/master/src/utils.py#L213 shared_cnn_optimizer = torch.optim.SGD(params=shared_cnn.parameters(), lr=args.child_lr_max, momentum=0.9, nesterov=True, weight_decay=args.child_l2_reg) # https://github.com/melodyguan/enas/blob/master/src/utils.py#L154 shared_cnn_scheduler = CosineAnnealingLR(optimizer=shared_cnn_optimizer, T_max=args.child_lr_T, eta_min=args.child_lr_min) if args.resume: if os.path.isfile(args.resume): print("Loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] # args = checkpoint['args'] shared_cnn.load_state_dict(checkpoint['shared_cnn_state_dict']) controller.load_state_dict(checkpoint['controller_state_dict']) shared_cnn_optimizer.load_state_dict(checkpoint['shared_cnn_optimizer']) controller_optimizer.load_state_dict(checkpoint['controller_optimizer']) shared_cnn_scheduler.optimizer = shared_cnn_optimizer # Not sure if this actually works print("Loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: raise ValueError("No checkpoint found at '{}'".format(args.resume)) else: start_epoch = 0 if not args.fixed_arc: train_enas(start_epoch, controller, shared_cnn, data_loaders, shared_cnn_optimizer, controller_optimizer, shared_cnn_scheduler) else: assert args.resume != '', 'A pretrained model should be used when training a fixed architecture.' train_fixed(start_epoch, controller, shared_cnn, data_loaders)
import numpy as np from models.model import TL_ResNet50 from utils.utils import get_device, read_parameters, separate_train_val from models.controller import Controller from PIL import Image def load_image(image_name, transformer=None): image = Image.open(image_name) if transformer == None: image = np.array(image).transpose(2, 0, 1)/255 image = image.astype(np.float32) else: image = transformer(image) return image def evaluate(model, data): out = model(data) return out if __name__ == "__main__": configs = read_parameters() device = get_device ResNet50 = TL_ResNet50(configs["network_parameters"], pretrained = True).to(device) Control = Controller(ResNet50) Control.load() ResNet50.eval()
trained = 0 #model = VAE(3, LSIZE).to(device) model = VAE(3, LSIZE) model = torch.nn.DataParallel(model, device_ids=range(8)) model.cuda() optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999)) model_p = VAE_a(7, LSIZE) model_p = torch.nn.DataParallel(model_p, device_ids=range(8)) model_p.cuda() optimizer_p = optim.Adam(model_p.parameters(), lr=learning_rate, betas=(0.9, 0.999)) controller = Controller(LSIZE, 4) controller = torch.nn.DataParallel(controller, device_ids=range(8)) controller = controller.cuda() optimizer_a = optim.Adam(controller.parameters(), lr=learning_rate, betas=(0.9, 0.999)) # scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=5) # earlystopping = EarlyStopping('min', patience=30) vis = visdom.Visdom(env='pa_train') current_window = vis.image( np.random.rand(64, 64), opts=dict(title='current!', caption='current.'), ) recon_window = vis.image(
validation_split = 0.1 shuffle_dataset = True test_split = 0.1 random_seed = 42 class_num = 10 # LOSS = FocalLoss() DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") controller = Controller(search_for=args.search_for, search_whole_channels=True, num_layers=args.child_num_layers, num_branches=args.child_num_branches, out_filters=args.child_out_filters, lstm_size=args.controller_lstm_size, lstm_num_layers=args.controller_lstm_num_layers, tanh_constant=args.controller_tanh_constant, temperature=None, skip_target=args.controller_skip_target, skip_weight=args.controller_skip_weight) controller = controller.cuda() shared_cnn = SharedCNN(num_layers=args.child_num_layers, num_branches=args.child_num_branches, out_filters=args.child_out_filters, keep_prob=args.child_keep_prob) shared_cnn = shared_cnn.cuda() controller.eval() controller()
if (seed >= 0): env.seed(seed) return env transform = transforms.Compose([ transforms.ToPILImage(), transforms.Resize((64, 64)), # transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) # from https://github.com/openai/gym/blob/master/gym/envs/box2d/car_racing.py if __name__=="__main__": model=VAE(3, 64) model=torch.nn.DataParallel(model,device_ids=range(1)) model.cuda() controller=Controller(64,3) controller=torch.nn.DataParallel(controller,device_ids=range(1)) controller=controller.cuda() state = torch.load('/home/ld/gym-car/log/vae/contorl_checkpoint_52.pkl') controller.load_state_dict(state['state_dict']) print('contorller load success') state = torch.load('/home/ld/gym-car/log/vae/vae_checkpoint_52.pkl') model.load_state_dict(state['state_dict']) print('vae load success') model_p=VAE_a(7, 64) model_p=torch.nn.DataParallel(model_p,device_ids=range(1)) model_p.cuda() state = torch.load('/home/ld/gym-car/log/vae/pre_checkpoint_52.pkl') model_p.load_state_dict(state['state_dict']) print('prediction load success')
def train_controller(current_time): """ Train the controllers by using the CMA-ES algorithm to improve candidature solutions by testing them in parallel using multiprocessing """ current_time = str(current_time) number_generations = 1 games = GAMES levels = LEVELS current_game = False result_queue = Queue() vae, lstm, best_controller, solver, checkpoint = init_models( current_time, sequence=1, load_vae=True, load_controller=True, load_lstm=True) if checkpoint: current_ctrl_version = checkpoint["version"] current_solver_version = checkpoint["solver_version"] new_results = solver.result() current_best = new_results[1] else: current_ctrl_version = 1 current_solver_version = 1 current_best = 0 while True: solutions = solver.ask() fitlist = np.zeros(POPULATION) eval_left = 0 ## Once a level is beaten, remove it from the training set of levels if current_best > SCORE_CAP or not current_game: if not current_game or len(levels[current_game]) == 0: current_game = games[0] games.remove(current_game) current_best = 0 current_level = np.random.choice(levels[current_game]) levels[current_game].remove(current_level) print("[CONTROLLER] Current game: %s and level is: %s" % (current_game, current_level)) while eval_left < POPULATION: jobs = [] todo = PARALLEL if eval_left + PARALLEL <= POPULATION else ( eval_left + PARALLEL) % POPULATION ## Create the child processes to evaluate in parallel print("[CONTROLLER] Starting new batch") for job in range(todo): process_id = eval_left + job ## Assign new weights to the controller, given by the CMA controller = Controller(LATENT_VEC, PARAMS_FC1, ACTION_SPACE).to(DEVICE) init_controller(controller, solutions[process_id]) ## Start the evaluation new_game = VAECGame(process_id, vae, lstm, controller, current_game, current_level, result_queue) new_game.start() jobs.append(new_game) ## Wait for the evaluation to be completed for p in jobs: p.join() eval_left = eval_left + todo print("[CONTROLLER] Done with batch") ## Get the results back from the processes times = create_results(result_queue, fitlist) ## For display current_score = np.max(fitlist) average_score = np.mean(fitlist) ## Update solver with results max_idx = np.argmax(fitlist) fitlist = rankmin(fitlist) solver.tell(fitlist) new_results = solver.result() ## Display print("[CONTROLLER] Total duration for generation: %.3f seconds, average duration:" " %.3f seconds per process, %.3f seconds per run" % ((np.sum(times), \ np.mean(times), np.mean(times) / REPEAT_ROLLOUT))) print("[CONTROLLER] Creating generation: {} ...".format( number_generations + 1)) print("[CONTROLLER] Current best score: {}, new run best score: {}". format(current_best, current_score)) print( "[CONTROLLER] Best score ever: {}, current number of improvements: {}" .format(current_best, current_ctrl_version)) print( "[CONTROLLER] Average score on all of the processes: {}\n".format( average_score)) ## Save the new best controller if current_score > current_best: init_controller(best_controller, solutions[max_idx]) state = { 'version': current_ctrl_version, 'solver_version': current_solver_version, 'score': current_score, 'level': current_level, 'game': current_game, 'generation': number_generations } save_checkpoint(best_controller, "controller", state, current_time) current_ctrl_version += 1 current_best = current_score ## Save solver and change level to a random one if number_generations % SAVE_SOLVER_TICK == 0: dir_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), \ 'saved_models', current_time, "{}-solver.pkl".format(current_solver_version)) pickle.dump(solver, open(dir_path, 'wb')) current_solver_version += 1 current_level = np.random.choice(levels[current_game]) number_generations += 1
trained = 0 #model = VAE(3, LSIZE).to(device) model = VAE(3, LSIZE) model = torch.nn.DataParallel(model, device_ids=range(7)) model.cuda() optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999)) model_p = VAE_a(7, LSIZE) model_p = torch.nn.DataParallel(model_p, device_ids=range(7)) model_p.cuda() optimizer_p = optim.Adam(model_p.parameters(), lr=learning_rate, betas=(0.9, 0.999)) controller = Controller(LSIZE, 3) controller = torch.nn.DataParallel(controller, device_ids=range(7)) controller = controller.cuda() optimizer_a = optim.Adam(controller.parameters(), lr=learning_rate, betas=(0.9, 0.999)) # scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=5) # earlystopping = EarlyStopping('min', patience=30) vis = visdom.Visdom(env='pa_train') current_window = vis.image( np.random.rand(64, 64), opts=dict(title='current!', caption='current.'), ) recon_window = vis.image(
A.Normalize(p = 1.0), pytorch.ToTensorV2(), ]) KF = StratifiedKFold() for train, val in KF.split(df["labels"].values, df["classes"].values): x_train, x_val = np.array(df["labels"].values)[train], np.array(df["labels"].values)[val] y_train, y_val = np.array(df["classes"].values)[train], np.array(df["classes"].values)[val], train_loader, val_loader = generate_train_validation_dataloader(x_train, y_train, transform, x_val, y_val, transform_val_test configs["train_parameters"]["batch_size"], "data/images/") EFNet = EF_Net().to(device) Loss = nn.CrossEntropyLoss() Optimizer = optim.Adam(EFNet.parameters(), lr = configs["train_parameters"]["learning_rate"]) Stepper = optim.lr_scheduler.ReduceLROnPlateau(Optimizer, patience = configs["train_parameters"]["patience"]) Metrics = Accuracy_Metric() Control = Controller(model = EFNet, optimizer = Optimizer, loss = Loss, metric = Metrics, train_data = train_gen, validation_data = val_gen, epochs = configs["train_parameters"]["epochs"], device = device, lr_scheduler = Stepper) Control.train()
def run(self, max_generations, folder, ga_id='', init_solution_id=''): if (ga_id == ''): ga_id = self.init_time # disk results_dir = os.path.join(folder, ga_id) if not os.path.exists(results_dir): os.makedirs(results_dir) fitness_path = os.path.join( results_dir, 'fitness.txt' ) # most important fitness results per run (for plotting) ind_fitness_path = os.path.join( results_dir, 'ind_fitness.txt') # more detailed fitness results per individual solver_path = os.path.join( results_dir, "solver.pkl") # contains the current population best_solver_path = os.path.join( results_dir, "best_solver.pkl") # contains the current population init_solution_path = os.path.join( os.path.join(folder, init_solution_id), "solver.pkl") # path to initial solution solver current_generation = 0 P = self.P best_f = -sys.maxsize # initialize controller instance to be saved from models.controller import Controller best_controller = Controller(P[0].input_size, P[0].output_size) # initialize cma es (start from scratch or load previously saved solver/population) resume = False if os.path.exists(solver_path): resume = True self.solver = pickle.load(open(solver_path, 'rb')) new_results = self.solver.result() best_f = new_results[1] if os.path.exists(fitness_path): with open(fitness_path, 'r') as f: lines = f.read().splitlines() last_line = lines[-1] current_generation = int(last_line.split('/')[0]) # start from scratch but with an initial solution param elif os.path.exists(init_solution_path): tmp_solver = pickle.load(open(init_solution_path, 'rb')) self.solver = CMAES(num_params=self.num_controller_params, solution_init=tmp_solver.best_param(), sigma_init=0.1, popsize=self.pop_size) # completely start from scratch else: self.solver = CMAES(num_params=self.num_controller_params, sigma_init=0.1, popsize=self.pop_size) if not resume: with open(fitness_path, 'a') as file: file.write('gen/avg/cur/best\n') with open(ind_fitness_path, 'a') as file: file.write( 'gen/id/fitness/coverage/coverageReward/IC/PC/PCt0/PCt1\n') while current_generation < max_generations: fitness = np.zeros(self.pop_size) results_full = np.zeros(self.pop_size) print(f'Generation {current_generation}') print(f'Evaluating individuals: {len(P)}') # ask the ES to give us a set of candidate solutions solutions = self.solver.ask() # evaluate all candidates for i, s in enumerate(P): set_controller_weights(s.controller, solutions[i]) s.run_solution(generation=current_generation, local_id=i) # request fitness from simulator results_full = Client(ClientType.REQUEST).start() fitness = results_full[:, 0] for i, s in enumerate(P): s.fitness = fitness[i] current_f = np.max(fitness) average_f = np.mean(fitness) print( f'Current best: {current_f}\nCurrent average: {average_f}\nAll-time best: {best_f}' ) # return rewards to ES for param update self.solver.tell(fitness) max_index = np.argmax(fitness) new_results = self.solver.result() # process results pickle.dump(self.solver, open(solver_path, 'wb')) if current_f > best_f: set_controller_weights(best_controller, solutions[max_index]) torch.save(best_controller, os.path.join(results_dir, 'best_controller.pth')) # Save solver and change level to a random one pickle.dump(self.solver, open(best_solver_path, 'wb')) best_f = current_f for i, s in enumerate(P): # fitness/coverage/coverageReward/IC/PC/PCt0/PCt1 res = results_full[i, :] res_str = ('/'.join(['%.6f'] * len(res))) % tuple(res) with open(ind_fitness_path, 'a') as file: file.write('%d/%d/%s\n' % (current_generation, i, res_str)) res_str = '%d/%f/%f/%f' % (current_generation, average_f, current_f, best_f) print(f'gen/avg/cur/best : {res_str}') with open(fitness_path, 'a') as file: file.write(f'{res_str}\n') if (i > max_generations): break gc.collect() current_generation += 1 print('Finished')
m_optimizer = torch.optim.RMSprop(m_model.parameters(), lr=1e-3, alpha=.9) # pipaek : hardmaru 는 lr=1e-4 이다. m_scheduler = ReduceLROnPlateau(m_optimizer, 'min', factor=0.5, patience=5) m_earlystopping = EarlyStopping('min', patience=30) # patience 30 -> 5 # 3-3. MDN-RNN 모델(M) 훈련 m_model_train_proc(rnn_dir, m_model, v_model, m_dataset_train, m_dataset_test, m_optimizer, m_scheduler, m_earlystopping, skip_train=True, max_train_epochs=30) m_model_cell = get_mdrnn_cell(rnn_dir).to(device) # 4-1. Controller 모델(C) 생성 controller = Controller(LSIZE, RSIZE, ASIZE).to(device) # 4-2. Controller 모델(C) 훈련 controller_train_proc(ctrl_dir, controller, v_model, m_model_cell, skip_train=False) # 4-3. Controller 모델(C) 시연 (optional) controller_test_proc(controller, v_model, m_model_cell)