def distributions(): conditions.clear() dice1 = 8 dice2 = 6 reroll_equal_to = [] reroll_lowest = 0 roll_min = 0 drop_lowest = 0 details = [ dice1, dice2, reroll_equal_to, reroll_lowest, roll_min, drop_lowest ] d20 = d20Set(1) diceset = Diceset([(dice1, dice2)]) action = Action(d20, 0, diceset, crit_numbers=[], fail_dmg_scale=0.5) stats = Statistics(action) stats.collect_statistics() dummystats.clear() dummystats.append(stats.report_statistics()) return render_template('distributions.html', imagepath='static/img/placeholder.png', stats=dummystats, conditions=conditions, collecting=False)
def test_detection(self, dataloader, label_by_block=False, dataset_name=None): stats = Statistics() detection_data = { 'stats': stats, 'label_by_block': label_by_block, 'block_split': [], 'labels': [], 'threshold': self.threshold, 'thresholds': [(t, Statistics()) for t in np.linspace(0, 1.5 * self.threshold, num=50)], 'e': [] } detection_data['thresholds'].append((self.threshold, stats)) last_block_index = 0 logger.info(f" Computing and comparing predictions ({'labeled by block' if label_by_block else 'labeled by sample'})") for inputs, outputs, labels in dataloader: inputs = inputs.to(device=self.device) outputs = outputs.to(device='cpu') labels = labels.to(device='cpu') results = self.predict(inputs).to(device='cpu') losses = self.criterion(results, outputs, labels) # labels only used with cos loos + labeled anomal training data experiment for i in range(losses.size()[0]): loss = losses[i].numpy() label = labels[i].numpy() for t, s in detection_data['thresholds']: self.evaluate_threshold(t, s, loss, label, label_by_block) last_block_index += len(loss) detection_data['block_split'].append(last_block_index) detection_data['labels'].extend(label) detection_data['e'].extend(loss) detection_data['block_split'].pop() self.env.add_test_result(self.epoch, dataset_name, detection_data) return detection_data
def home(): dice1 = 8 dice2 = 6 d20 = d20Set(1) diceset = Diceset([(dice1, dice2)]) action = Action(d20, 0, diceset, crit_numbers=[], fail_dmg_scale=0.5) stats = Statistics(action) stats.collect_statistics() return render_template( 'home.html' ) #, imagepath='static/img/placeholder.png', stats=stats.report_statistics())
def initialize_simulation(self): """Initialize the simulation. This initialization is required when the simulator.simulate function is being called. The initialization deals with the simulator clock, database initiation, various simulation events, etc. """ self.events = list() self.initialize_db() self.initialize_people() self.statistics = Statistics(simulator=self) self.clock = Time(delta_time=timedelta(0), init_date_time=self.end_time.init_date_time) self.initialize_plan_day_events(self.end_time) self.initialize_virus_spread_events(self.end_time, self.spread_period) self.initialize_infections(self.initialized_infected_ids)
def test(self, dataloader, threshold=0.5, label_by_block=False): stats = Statistics() detection_data = { 'stats': stats, 'label_by_block': label_by_block, 'block_split': [], 'labels': [], 'threshold': threshold, 'thresholds': [(t, Statistics()) for t in np.linspace(0, 1.5 * threshold, num=50)], 'e': [] } detection_data['thresholds'].append((threshold, stats)) last_block_index = 0 logger.info(f" Computing classification ({'labeled by block' if label_by_block else 'labeled by sample'})") for inputs, outputs, labels in dataloader: inputs = inputs.to(device=self.device) labels = labels.to(device='cpu') results = self.predict(inputs).to(device='cpu') for i in range(results.size()[0]): loss = results[i].numpy() label = labels[i].numpy() for t, s in detection_data['thresholds']: LogAnomalyDetection.evaluate_threshold(t, s, loss, label, label_by_block) last_block_index += len(loss) detection_data['block_split'].append(last_block_index) detection_data['labels'].extend(label) detection_data['e'].extend(loss) detection_data['block_split'].pop() return detection_data
def run(args=None): device = 'cuda' if torch.cuda.is_available() and (not args.no_cuda) else 'cpu' num_train, train_loader, test_loader, input_size, input_channel, n_class = get_loaders(args) lossFn = nn.CrossEntropyLoss(reduction='none') def evalFn(x): return torch.max(x, dim=1)[1] ## initialize SpecNet dTNet = MyDeepTrunkNet.get_deepTrunk_net(args, device, lossFn, evalFn, input_size, input_channel, n_class) ## setup logging and checkpointing timestamp = int(time.time()) model_signature = '%s/%s/%d/%s_%.5f/%d' % (args.dataset, args.exp_name, args.exp_id, args.net, args.train_eps, timestamp) model_dir = args.root_dir + 'models_new/%s' % (model_signature) args.model_dir = model_dir print("Saving model to: %s" % model_dir) count_vars(args, dTNet) if not os.path.exists(model_dir): os.makedirs(model_dir) tb_writer = SummaryWriter(model_dir) stats = Statistics(len(train_loader), tb_writer, model_dir) args_file = os.path.join(model_dir, 'args.json') with open(args_file, 'w') as fou: json.dump(vars(args), fou, indent=4) write_config(args, os.path.join(model_dir, 'run_config.txt')) ## main part depending on training mode if 'train' in args.train_mode: epoch = train_deepTrunk(dTNet, args, device, stats, train_loader, test_loader) if args.cert: with torch.no_grad(): cert_deepTrunk_net(dTNet, args, device, test_loader if args.test_set == "test" else train_loader, stats, log_ind=True, break_on_failure=False, epoch=epoch) elif args.train_mode == 'test': with torch.no_grad(): test_deepTrunk_net(dTNet, args, device, test_loader if args.test_set == "test" else train_loader, stats, log_ind=True) elif args.train_mode == "cert": with torch.no_grad(): cert_deepTrunk_net(dTNet, args, device, test_loader if args.test_set == "test" else train_loader, stats, log_ind=True, break_on_failure=False) else: assert False, 'Unknown mode: {}!'.format(args.train_mode) exit(0)
def evaluate_threshold(errors, labels, threshold): stats = Statistics() skip = 0 for e, l in zip(errors, labels): # if len(e) < 5: # skip+=1 # continue if l == 1: if np.max(e) > threshold: stats.add_tp() else: stats.add_fn() else: if np.max(e) > threshold: stats.add_fp() else: stats.add_tn() # print('skippend',skip) return stats
def evaluate(self, dataloader, threshold, label_by_block=False): """Evaluate outputs.""" stats = Statistics() validation_loss = 0 for inputs, outputs, labels in dataloader: inputs = inputs.to(device=self.device) outputs = outputs.to(device=self.device) labels = labels.to(device='cpu') results = self.predict(inputs) loss = self.criterion(results, outputs) results = results.to(device='cpu') validation_loss += loss.to(device='cpu').item() for i in range(results.size()[0]): LogAnomalyDetection.evaluate_threshold(threshold, stats, results[i], labels[i], label_by_block) validation_loss = validation_loss / len(dataloader) data = {'stats': stats, 'validation_loss': validation_loss} return "Validation loss: %.3f, %s" % (validation_loss, stats.as_string()), data
def calculate(): root = os.path.dirname(__file__) dir = os.path.join(root, 'static/img/temp/') files = os.listdir(dir) for file in files: os.remove(os.path.join(dir, file)) alpha = 0.9**len(conditions) statistics = [] i = 0 for details in conditions: d20 = d20Set(1) diceset = Diceset([(details[0], details[1])], details[2], details[3], details[4], details[5]) action = Action(d20, 0, diceset, crit_numbers=[], fail_dmg_scale=0.0) stats = Statistics(action) stats.collect_statistics() stats.plot_histogram(alpha, str(i)) i += 1 statistics.append(stats.report_statistics()) plotname = 'static/img/temp/' for detail in details: plotname = plotname + str(detail) plotname = plotname + '.png' copyconditions = conditions.copy() conditions.clear() plt.savefig(os.path.join(root, plotname)) plt.clf() return render_template('distributions.html', imagepath=plotname, stats=statistics, conditions=copyconditions, collecting=False)
def run_experiment(maze_env, trial_out_dir, args=None, n_generations=100, save_results=False, silent=False): """ The function to run the experiment against hyper-parameters defined in the provided configuration file. The winner genome will be rendered as a graph as well as the important statistics of neuroevolution process execution. Arguments: maze_env: The maze environment to use in simulation. trial_out_dir: The directory to store outputs for this trial n_generations: The number of generations to execute. save_results: The flag to control if intermdiate results will be saved. silent: If True than no intermediary outputs will be presented until solution is found. args: The command line arguments holder. Returns: True if experiment finished with successful solver found. """ # set random seed seed = int(time.time()) #1571021768# print("Random seed: %d" % seed) # Create Population of Robots and objective functions robot = create_robot(maze_env, seed=seed) obj_func = create_objective_fun(seed) # Run for up to N generations. start_time = time.time() best_robot_genome_ser = None best_robot_id = -1 solution_found = False best_obj_func_coeffs = None best_solution_novelty = 0 best_solution_distance = 0 stats = Statistics() for generation in range(n_generations): print("\n****** Generation: %d ******\n" % generation) gen_time = time.time() # evaluate objective function population obj_func_coeffs, max_obj_func_fitness = evaluate_obj_functions( obj_func, generation) # evaluate robots population robot_genome, solution_found, robot_fitness, distances, \ obj_coeffs, best_distance, best_novelty = evaluate_solutions( robot=robot, obj_func_coeffs=obj_func_coeffs, generation=generation) stats.post_evaluate(max_fitness=robot_fitness, errors=distances) # store the best genome if solution_found or robot.population.GetBestFitnessEver( ) < robot_fitness: best_robot_genome_ser = pickle.dumps(robot_genome) best_robot_id = robot_genome.GetID() best_obj_func_coeffs = obj_coeffs best_solution_novelty = best_novelty best_solution_distance = best_distance if solution_found: print( '\nSolution found at generation: %d, best fitness: %f, species count: %d\n' % (generation, robot_fitness, len(robot.population.Species))) break # advance to the next generation robot.population.Epoch() obj_func.population.Epoch() # print statistics gen_elapsed_time = time.time() - gen_time print("Generation fitness -> solution: %f, objective function: %f" % (robot_fitness, max_obj_func_fitness)) print( "Gen. species count -> solution: %d, objective function: %d" % (len(robot.population.Species), len(obj_func.population.Species))) print("Gen. archive size -> solution: %d, objective function: %d" % (robot.archive.size(), obj_func.archive.size())) print("Objective function coeffts: %s" % obj_coeffs) print( "Gen. best solution genome ID: %d, distance to exit: %f, novelty: %f" % (robot_genome.GetID(), best_distance, best_novelty)) print("->") print("Best fitness ever -> solution: %f, objective function: %f" % (robot.population.GetBestFitnessEver(), obj_func.population.GetBestFitnessEver())) print( "Best ever solution genome ID: %d, distance to exit: %f, novelty: %f" % (best_robot_id, best_solution_distance, best_solution_novelty)) print("------------------------------") print("Generation elapsed time: %.3f sec\n" % (gen_elapsed_time)) elapsed_time = time.time() - start_time # Load serialized best robot genome best_robot_genome = pickle.loads(best_robot_genome_ser) # write best genome to the file best_genome_file = os.path.join(trial_out_dir, "best_robot_genome.pickle") with open(best_genome_file, 'wb') as genome_file: pickle.dump(best_robot_genome, genome_file) # write the record store data rs_file = os.path.join(trial_out_dir, "data.pickle") robot.record_store.dump(rs_file) print("==================================") print("Record store file: %s" % rs_file) print("Random seed: %d" % seed) print("............") print("Best solution fitness: %f, genome ID: %d" % (robot.population.GetBestFitnessEver(), best_robot_genome.GetID())) print("Best objective func coefficients: %s" % best_obj_func_coeffs) print("------------------------------") # Visualize the experiment results show_results = not silent if save_results or show_results: if args is None: visualize.draw_maze_records(maze_env, robot.record_store.records, view=show_results) else: visualize.draw_maze_records(maze_env, robot.record_store.records, view=show_results, width=args.width, height=args.height, filename=os.path.join( trial_out_dir, 'maze_records.svg')) # store NoveltyItems archive data robot.archive.write_to_file( path=os.path.join(trial_out_dir, 'ns_items_all.txt')) # create the best genome simulation path and render maze_env = copy.deepcopy(robot.orig_maze_environment) multi_net = NEAT.NeuralNetwork() best_robot_genome.BuildPhenotype(multi_net) depth = 8 try: best_robot_genome.CalculateDepth() depth = genome.GetDepth() except: pass control_net = ANN(multi_net, depth=depth) path_points = [] distance = maze.maze_simulation_evaluate(env=maze_env, net=control_net, time_steps=SOLVER_TIME_STEPS, path_points=path_points) print("Best solution distance to maze exit: %.2f, novelty: %.2f" % (distance, best_solution_novelty)) visualize.draw_agent_path(robot.orig_maze_environment, path_points, best_robot_genome, view=show_results, width=args.width, height=args.height, filename=os.path.join( trial_out_dir, 'best_solver_path.svg')) # Draw the best agent phenotype ANN visualize.draw_net(multi_net, view=show_results, filename="best_solver_net", directory=trial_out_dir) # Visualize statistics visualize.plot_stats(stats, ylog=False, view=show_results, filename=os.path.join(trial_out_dir, 'avg_fitness.svg')) print("------------------------") print("Trial elapsed time: %.3f sec" % (elapsed_time)) print("==================================") return solution_found
def main(args, local_rank): logging.basicConfig( format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO) vocabs = dict() vocabs['src'] = Vocab(args.src_vocab, 0, [BOS, EOS]) vocabs['tgt'] = Vocab(args.tgt_vocab, 0, [BOS, EOS]) if args.world_size == 1 or (dist.get_rank() == 0): logger.info(args) for name in vocabs: logger.info("vocab %s, size %d, coverage %.3f", name, vocabs[name].size, vocabs[name].coverage) set_seed(19940117) #device = torch.device('cpu') torch.cuda.set_device(local_rank) device = torch.device('cuda', local_rank) if args.resume_ckpt: model = MatchingModel.from_pretrained(vocabs, args.resume_ckpt) else: model = MatchingModel.from_params(vocabs, args.layers, args.embed_dim, args.ff_embed_dim, args.num_heads, args.dropout, args.output_dim, args.bow) if args.world_size > 1: set_seed(19940117 + dist.get_rank()) model = model.to(device) if args.resume_ckpt: dev_data = DataLoader(vocabs, args.dev_data, args.dev_batch_size, addition=args.additional_negs) acc = validate(model, dev_data, device) logger.info("initialize from %s, initial acc %.2f", args.resume_ckpt, acc) optimizer = Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.98), eps=1e-9) lr_schedule = get_linear_schedule_with_warmup(optimizer, args.warmup_steps, args.total_train_steps) train_data = DataLoader(vocabs, args.train_data, args.per_gpu_train_batch_size, worddrop=args.worddrop, addition=args.additional_negs) global_step, step, epoch = 0, 0, 0 tr_stat = Statistics() logger.info("start training") model.train() while global_step <= args.total_train_steps: for batch in train_data: batch = move_to_device(batch, device) loss, acc, bsz = model(batch['src_tokens'], batch['tgt_tokens'], args.label_smoothing) tr_stat.update({ 'loss': loss.item() * bsz, 'nsamples': bsz, 'acc': acc * bsz }) tr_stat.step() loss.backward() step += 1 if not (step % args.gradient_accumulation_steps == -1 % args.gradient_accumulation_steps): continue if args.world_size > 1: average_gradients(model) torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) optimizer.step() lr_schedule.step() optimizer.zero_grad() global_step += 1 if args.world_size == 1 or (dist.get_rank() == 0): if global_step % args.print_every == -1 % args.print_every: logger.info("epoch %d, step %d, loss %.3f, acc %.3f", epoch, global_step, tr_stat['loss'] / tr_stat['nsamples'], tr_stat['acc'] / tr_stat['nsamples']) tr_stat = Statistics() if global_step > args.warmup_steps and global_step % args.eval_every == -1 % args.eval_every: dev_data = DataLoader(vocabs, args.dev_data, args.dev_batch_size, addition=args.additional_negs) acc = validate(model, dev_data, device) logger.info("epoch %d, step %d, dev, dev acc %.2f", epoch, global_step, acc) save_path = '%s/epoch%d_batch%d_acc%.2f' % ( args.ckpt, epoch, global_step, acc) model.save(args, save_path) model.train() if global_step > args.total_train_steps: break epoch += 1 logger.info('rank %d, finish training after %d steps', local_rank, global_step)
def run(args=None): device = 'cuda' if torch.cuda.is_available() and ( not args.no_cuda) else 'cpu' num_train, train_loader, test_loader, input_size, input_channel, n_class = get_loaders( args) lossFn = nn.CrossEntropyLoss(reduction='none') evalFn = lambda x: torch.max(x, dim=1)[1] net = get_net(device, args.dataset, args.net, input_size, input_channel, n_class, load_model=args.load_model, net_dim=args.cert_net_dim ) #, feature_extract=args.core_feature_extract) timestamp = int(time.time()) model_signature = '%s/%s/%d/%s_%.5f/%d' % (args.dataset, args.exp_name, args.exp_id, args.net, args.train_eps, timestamp) model_dir = args.root_dir + 'models_new/%s' % (model_signature) args.model_dir = model_dir count_vars(args, net) if not os.path.exists(model_dir): os.makedirs(model_dir) if isinstance(net, UpscaleNet): relaxed_net = None relu_ids = None else: relaxed_net = RelaxedNetwork(net.blocks, args.n_rand_proj).to(device) relu_ids = relaxed_net.get_relu_ids() if "nat" in args.train_mode: cnet = CombinedNetwork(net, relaxed_net, lossFn=lossFn, evalFn=evalFn, device=device, no_r_net=True).to(device) else: dummy_input = torch.rand((1, ) + net.dims[0], device=device, dtype=torch.float32) cnet = CombinedNetwork(net, relaxed_net, lossFn=lossFn, evalFn=evalFn, device=device, dummy_input=dummy_input).to(device) n_epochs, test_nat_loss, test_nat_acc, test_adv_loss, test_adv_acc = args.n_epochs, None, None, None, None if 'train' in args.train_mode: tb_writer = SummaryWriter(model_dir) stats = Statistics(len(train_loader), tb_writer, model_dir) args_file = os.path.join(model_dir, 'args.json') with open(args_file, 'w') as fou: json.dump(vars(args), fou, indent=4) write_config(args, os.path.join(model_dir, 'run_config.txt')) eps = 0 epoch = 0 lr = args.lr n_epochs = args.n_epochs if "COLT" in args.train_mode: relu_stable = args.relu_stable # if args.layers is None: # args.layers = [-2, -1] + relu_ids layers = get_layers(args.train_mode, cnet, n_attack_layers=args.n_attack_layers, protected_layers=args.protected_layers) elif "adv" in args.train_mode: relu_stable = None layers = [-1, -1] args.mix = False elif "natural" in args.train_mode: relu_stable = None layers = [-2, -2] args.nat_factor = 1 args.mix = False elif "diffAI" in args.train_mode: relu_stable = None layers = [-2, -2] else: assert False, "Unknown train mode %s" % args.train_mode print('Saving model to:', model_dir) print('Training layers: ', layers) for j in range(len(layers) - 1): opt, lr_scheduler = get_opt(cnet.net, args.opt, lr, args.lr_step, args.lr_factor, args.n_epochs, train_loader, args.lr_sched, fixup="fixup" in args.net) curr_layer_idx = layers[j + 1] eps_old = eps eps = get_scaled_eps(args, layers, relu_ids, curr_layer_idx, j) kappa_sched = Scheduler(0.0 if args.mix else 1.0, 1.0, num_train * args.mix_epochs, 0) beta_sched = Scheduler( args.beta_start if args.mix else args.beta_end, args.beta_end, args.train_batch * len(train_loader) * args.mix_epochs, 0) eps_sched = Scheduler(eps_old if args.anneal else eps, eps, num_train * args.anneal_epochs, 0) layer_dir = '{}/{}'.format(model_dir, curr_layer_idx) if not os.path.exists(layer_dir): os.makedirs(layer_dir) print('\nnew train phase: eps={:.5f}, lr={:.2e}, curr_layer={}\n'. format(eps, lr, curr_layer_idx)) for curr_epoch in range(n_epochs): train(device, epoch, args, j + 1, layers, cnet, eps_sched, kappa_sched, opt, train_loader, lr_scheduler, relu_ids, stats, relu_stable, relu_stable_protected=args.relu_stable_protected, beta_sched=beta_sched) if isinstance(lr_scheduler, optim.lr_scheduler.StepLR ) and curr_epoch >= args.mix_epochs: lr_scheduler.step() if (epoch + 1) % args.test_freq == 0: with torch.no_grad(): test_nat_loss, test_nat_acc, test_adv_loss, test_adv_acc = test( device, args, cnet, test_loader if args.test_set == "test" else train_loader, [curr_layer_idx], stats=stats, log_ind=(epoch + 1) % n_epochs == 0) if (epoch + 1) % args.test_freq == 0 or (epoch + 1) % n_epochs == 0: torch.save( net.state_dict(), os.path.join(layer_dir, 'net_%d.pt' % (epoch + 1))) torch.save( opt.state_dict(), os.path.join(layer_dir, 'opt_%d.pt' % (epoch + 1))) stats.update_tb(epoch) epoch += 1 relu_stable = None if relu_stable is None else relu_stable * args.relu_stable_layer_dec lr = lr * args.lr_layer_dec if args.cert: with torch.no_grad(): diffAI_cert( device, args, cnet, test_loader if args.test_set == "test" else train_loader, stats=stats, log_ind=True, epoch=epoch, domains=args.cert_domain) elif args.train_mode == 'print': print('printing network to:', args.out_net_file) dummy_input = torch.randn(1, input_channel, input_size, input_size, device='cuda') net.skip_norm = True torch.onnx.export(net, dummy_input, args.out_net_file, verbose=True) elif args.train_mode == 'test': with torch.no_grad(): test(device, args, cnet, test_loader if args.test_set == "test" else train_loader, [-1], log_ind=True) elif args.train_mode == "cert": tb_writer = SummaryWriter(model_dir) stats = Statistics(len(train_loader), tb_writer, model_dir) args_file = os.path.join(model_dir, 'args.json') with open(args_file, 'w') as fou: json.dump(vars(args), fou, indent=4) write_config(args, os.path.join(model_dir, 'run_config.txt')) print('Saving results to:', model_dir) with torch.no_grad(): diffAI_cert( device, args, cnet, test_loader if args.test_set == "test" else train_loader, stats=stats, log_ind=True, domains=args.cert_domain) exit(0) else: assert False, 'Unknown mode: {}!'.format(args.train_mode) return test_nat_loss, test_nat_acc, test_adv_loss, test_adv_acc
def run_experiment(params, vd_environment, trial_out_dir, num_dimensions, n_generations=100, save_results=False, silent=False, args=None): """ The function to run the experiment against hyper-parameters defined in the provided configuration file. The winner genome will be rendered as a graph as well as the important statistics of neuroevolution process execution. Arguments: params: The NEAT parameters vd_environment: The environment to test visual discrimination trial_out_dir: The directory to store outputs for this trial num_dimensions: The dimensionsionality of visual field n_generations: The number of generations to execute. save_results: The flag to control if intermdiate results will be saved. silent: If True than no intermediary outputs will be presented until solution is found. args: The command line arguments holder. Returns: True if experiment finished with successful solver found. """ # random seed seed = int(time.time()) # Create substrate substrate = create_substrate(num_dimensions) # Create CPPN genome and population g = NEAT.Genome(0, substrate.GetMinCPPNInputs(), 0, substrate.GetMinCPPNOutputs(), False, NEAT.ActivationFunction.UNSIGNED_SIGMOID, NEAT.ActivationFunction.UNSIGNED_SIGMOID, 0, params, 0) pop = NEAT.Population(g, params, True, 1.0, seed) pop.RNG.Seed(seed) # Run for up to N generations. start_time = time.time() best_genome_ser = None best_ever_goal_fitness = 0 best_id = -1 solution_found = False stats = Statistics() for generation in range(n_generations): print("\n****** Generation: %d ******\n" % generation) gen_time = time.time() # get list of current genomes genomes = NEAT.GetGenomeList(pop) # evaluate genomes genome, fitness, distances = eval_genomes(genomes, vd_environment=vd_environment, substrate=substrate, generation=generation) stats.post_evaluate(max_fitness=fitness, distances=distances) solution_found = fitness >= FITNESS_THRESHOLD # store the best genome if solution_found or best_ever_goal_fitness < fitness: best_genome_ser = pickle.dumps(genome) best_ever_goal_fitness = fitness best_id = genome.GetID() if solution_found: print('Solution found at generation: %d, best fitness: %f, species count: %d' % (generation, fitness, len(pop.Species))) break # advance to the next generation pop.Epoch() # print statistics gen_elapsed_time = time.time() - gen_time print("Best fitness: %f, genome ID: %d" % (fitness, best_id)) print("Species count: %d" % len(pop.Species)) print("Generation elapsed time: %.3f sec" % (gen_elapsed_time)) print("Best fitness ever: %f, genome ID: %d" % (best_ever_goal_fitness, best_id)) elapsed_time = time.time() - start_time best_genome = pickle.loads(best_genome_ser) # write best genome to the file best_genome_file = os.path.join(trial_out_dir, "best_genome.pickle") with open(best_genome_file, 'wb') as genome_file: pickle.dump(best_genome, genome_file) # Print experiment statistics print("\nBest ever fitness: %f, genome ID: %d" % (best_ever_goal_fitness, best_id)) print("\nTrial elapsed time: %.3f sec" % (elapsed_time)) print("Random seed:", seed) # Visualize the experiment results show_results = not silent if save_results or show_results: # Draw CPPN network graph net = NEAT.NeuralNetwork() best_genome.BuildPhenotype(net) visualize.draw_net(net, view=show_results, node_names=None, directory=trial_out_dir, fmt='svg') print("\nCPPN nodes: %d, connections: %d" % (len(net.neurons), len(net.connections))) # Visualize activations from the best genome net = NEAT.NeuralNetwork() best_genome.BuildHyperNEATPhenotype(net, substrate) # select random visual field index = random.randint(0, len(vd_environment.data_set) - 1) print("\nRunning test evaluation against random visual field:", index) print("Substrate nodes: %d, connections: %d" % (len(net.neurons), len(net.connections))) vf = vd_environment.data_set[index] # draw activations outputs, x, y = vd_environment.evaluate_net_vf(net, vf) visualize.draw_activations(outputs, found_object=(x, y), vf=vf, dimns=num_dimensions, view=show_results, filename=os.path.join(trial_out_dir, "best_activations.svg")) # Visualize statistics visualize.plot_stats(stats, ylog=False, view=show_results, filename=os.path.join(trial_out_dir, 'avg_fitness.svg')) return solution_found
stats]) socket.recv_pyobj() if __name__ == '__main__': # Prepare zmq server. context = zmq.Context() socket = context.socket(zmq.REP) port = socket.bind_to_random_port(ZMQ_ADDRESS) # Start processes. processes = [multiprocessing.Process(target=environment_process, args=(i, port,), daemon=True) for i in range(NUM_ENVIRONMENTS)] for p in processes: p.start() steps_statistics = Statistics() total_reward_statistics = Statistics() collision_statistics = Statistics() num_nodes_statistics = Statistics() depth_statistics = Statistics() stats_statistics = [Statistics() for _ in range(5)] while True: # Read request and process. (steps, total_reward, collision, num_nodes, depth, stats) = socket.recv_pyobj() socket.send_pyobj(0) if args.task not in ['DriveHard']: if not collision: steps_statistics.append(steps) else:
hidden_ans_dim=args.hidden_ans_dim, hidden_hist_dim=args.hidden_hist_dim, hidden_cap_dim=args.hidden_cap_dim, hidden_img_dim=img_features_dim) # Multiple GPUs batch parallel if torch.cuda.is_available(): if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) model.cuda() else: sys.exit("Only GPU version is currently available.") #for n, p in model.named_parameters(): # print(n, p.numel()) print("Total params:", sum(p.numel() for p in model.parameters())) stats = Statistics(args) ndcg = NDCG() if args.submission: if args.model_pathname: args.mrr_pathname = os.path.join(args.model_pathname, 'best_model_mrr.pth.tar') args.ndcg_pathname = os.path.join(args.model_pathname, 'best_model_ndcg.pth.tar') else: args.mrr_pathname = os.path.join(dir_path(args), 'best_model_mrr.pth.tar') args.ndcg_pathname = os.path.join(dir_path(args), 'best_model_ndcg.pth.tar') print('Creating submissions') print("loading best MRR: {}".format(args.mrr_pathname))
socket = context.socket(zmq.REP) port = socket.bind_to_random_port(ZMQ_ADDRESS) # Start processes. processes = [ multiprocessing.Process(target=environment_process, args=( i, port, ), daemon=True) for i in range(NUM_ENVIRONMENTS) ] for p in processes: p.start() steps_statistics = Statistics() total_reward_statistics = Statistics() collision_statistics = Statistics() num_nodes_statistics = Statistics() depth_statistics = Statistics() stats_statistics = [Statistics() for _ in range(5)] planner_value_statistics = defaultdict(lambda: Statistics()) value_statistics = defaultdict(lambda: Statistics()) while len(total_reward_statistics) < 100 or total_reward_statistics.stderr( ) > TARGET_SE: # Read request and process. (steps, total_reward, collision, num_nodes, depth, stats) = socket.recv_pyobj() socket.send_pyobj(0)
def main(args, local_rank): logging.basicConfig( format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO) vocabs = dict() vocabs['src'] = Vocab(args.src_vocab, 0, [BOS, EOS]) vocabs['tgt'] = Vocab(args.tgt_vocab, 0, [BOS, EOS]) if args.world_size == 1 or (dist.get_rank() == 0): logger.info(args) for name in vocabs: logger.info("vocab %s, size %d, coverage %.3f", name, vocabs[name].size, vocabs[name].coverage) set_seed(19940117) #device = torch.device('cpu') torch.cuda.set_device(local_rank) device = torch.device('cuda', local_rank) if args.arch == 'vanilla': model = Generator(vocabs, args.embed_dim, args.ff_embed_dim, args.num_heads, args.dropout, args.enc_layers, args.dec_layers, args.label_smoothing) elif args.arch == 'mem': model = MemGenerator(vocabs, args.embed_dim, args.ff_embed_dim, args.num_heads, args.dropout, args.mem_dropout, args.enc_layers, args.dec_layers, args.mem_enc_layers, args.label_smoothing, args.use_mem_score) elif args.arch == 'rg': logger.info("start building model") logger.info("building retriever") retriever = Retriever.from_pretrained( args.num_retriever_heads, vocabs, args.retriever, args.nprobe, args.topk, local_rank, use_response_encoder=(args.rebuild_every > 0)) logger.info("building retriever + generator") model = RetrieverGenerator(vocabs, retriever, args.share_encoder, args.embed_dim, args.ff_embed_dim, args.num_heads, args.dropout, args.mem_dropout, args.enc_layers, args.dec_layers, args.mem_enc_layers, args.label_smoothing) if args.resume_ckpt: model.load_state_dict(torch.load(args.resume_ckpt)['model']) else: global_step = 0 if args.world_size > 1: set_seed(19940117 + dist.get_rank()) model = model.to(device) retriever_params = [ v for k, v in model.named_parameters() if k.startswith('retriever.') ] other_params = [ v for k, v in model.named_parameters() if not k.startswith('retriever.') ] optimizer = Adam([{ 'params': retriever_params, 'lr': args.embed_dim**-0.5 * 0.1 }, { 'params': other_params, 'lr': args.embed_dim**-0.5 }], betas=(0.9, 0.98), eps=1e-9) lr_schedule = get_inverse_sqrt_schedule_with_warmup( optimizer, args.warmup_steps, args.total_train_steps) train_data = DataLoader(vocabs, args.train_data, args.per_gpu_train_batch_size, for_train=True, rank=local_rank, num_replica=args.world_size) model.eval() #dev_data = DataLoader(vocabs, cur_dev_data, args.dev_batch_size, for_train=False) #bleu = validate(device, model, dev_data, beam_size=5, alpha=0.6, max_time_step=10) step, epoch = 0, 0 tr_stat = Statistics() logger.info("start training") model.train() best_dev_bleu = 0. while global_step <= args.total_train_steps: for batch in train_data: #step_start = time.time() batch = move_to_device(batch, device) if args.arch == 'rg': loss, acc = model( batch, update_mem_bias=(global_step > args.update_retriever_after)) else: loss, acc = model(batch) tr_stat.update({ 'loss': loss.item() * batch['tgt_num_tokens'], 'tokens': batch['tgt_num_tokens'], 'acc': acc }) tr_stat.step() loss.backward() #step_cost = time.time() - step_start #print ('step_cost', step_cost) step += 1 if not (step % args.gradient_accumulation_steps == -1 % args.gradient_accumulation_steps): continue if args.world_size > 1: average_gradients(model) torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) optimizer.step() lr_schedule.step() optimizer.zero_grad() global_step += 1 if args.world_size == 1 or (dist.get_rank() == 0): if global_step % args.print_every == -1 % args.print_every: logger.info("epoch %d, step %d, loss %.3f, acc %.3f", epoch, global_step, tr_stat['loss'] / tr_stat['tokens'], tr_stat['acc'] / tr_stat['tokens']) tr_stat = Statistics() if global_step % args.eval_every == -1 % args.eval_every: model.eval() max_time_step = 256 if global_step > 2 * args.warmup_steps else 5 bleus = [] for cur_dev_data in args.dev_data: dev_data = DataLoader(vocabs, cur_dev_data, args.dev_batch_size, for_train=False) bleu = validate(device, model, dev_data, beam_size=5, alpha=0.6, max_time_step=max_time_step) bleus.append(bleu) bleu = sum(bleus) / len(bleus) logger.info("epoch %d, step %d, dev bleu %.2f", epoch, global_step, bleu) if bleu > best_dev_bleu: testbleus = [] for cur_test_data in args.test_data: test_data = DataLoader(vocabs, cur_test_data, args.dev_batch_size, for_train=False) testbleu = validate(device, model, test_data, beam_size=5, alpha=0.6, max_time_step=max_time_step) testbleus.append(testbleu) testbleu = sum(testbleus) / len(testbleus) logger.info("epoch %d, step %d, test bleu %.2f", epoch, global_step, testbleu) torch.save({ 'args': args, 'model': model.state_dict() }, '%s/best.pt' % (args.ckpt, )) if not args.only_save_best: torch.save( { 'args': args, 'model': model.state_dict() }, '%s/epoch%d_batch%d_devbleu%.2f_testbleu%.2f' % (args.ckpt, epoch, global_step, bleu, testbleu)) best_dev_bleu = bleu model.train() if args.rebuild_every > 0 and (global_step % args.rebuild_every == -1 % args.rebuild_every): model.retriever.drop_index() torch.cuda.empty_cache() next_index_dir = '%s/batch%d' % (args.ckpt, global_step) if args.world_size == 1 or (dist.get_rank() == 0): model.retriever.rebuild_index(next_index_dir) dist.barrier() else: dist.barrier() model.retriever.update_index(next_index_dir, args.nprobe) if global_step > args.total_train_steps: break epoch += 1 logger.info('rank %d, finish training after %d steps', local_rank, global_step)
def run_experiment(params, rt_environment, trial_out_dir, n_generations=100, save_results=False, silent=False, args=None): """ The function to run the experiment against hyper-parameters defined in the provided configuration file. The winner genome will be rendered as a graph as well as the important statistics of neuroevolution process execution. Arguments: params: The NEAT parameters rt_environment: The test environment for detector ANN evaluations trial_out_dir: The directory to store outputs for this trial n_generations: The number of generations to execute. save_results: The flag to control if intermdiate results will be saved. silent: If True than no intermediary outputs will be presented until solution is found. args: The command line arguments holder. Returns: True if experiment finished with successful solver found. """ # random seed seed = 1569777981 #int(time.time()) # Create substrate substrate = create_substrate() # Create CPPN genome and population g = NEAT.Genome( 0, substrate.GetMinCPPNInputs(), 2, # hidden units substrate.GetMinCPPNOutputs(), False, NEAT.ActivationFunction.TANH, NEAT.ActivationFunction. SIGNED_GAUSS, # The initial activation type for hidden 1, # hidden layers seed params, 1) # one hidden layer pop = NEAT.Population(g, params, True, 1.0, seed) pop.RNG.Seed(seed) # Run for up to N generations. start_time = time.time() best_genome_ser = None best_ever_goal_fitness = 0 best_id = -1 solution_found = False stats = Statistics() for generation in range(n_generations): print("\n****** Generation: %d ******\n" % generation) gen_time = time.time() # get list of current genomes genomes = NEAT.GetGenomeList(pop) # evaluate genomes genome, fitness, errors = eval_genomes(genomes, rt_environment=rt_environment, substrate=substrate, params=params) stats.post_evaluate(max_fitness=fitness, errors=errors) solution_found = fitness >= FITNESS_THRESHOLD # store the best genome if solution_found or best_ever_goal_fitness < fitness: best_genome_ser = pickle.dumps( genome) # dump to pickle to freeze the genome state best_ever_goal_fitness = fitness best_id = genome.GetID() if solution_found: print( 'Solution found at generation: %d, best fitness: %f, species count: %d' % (generation, fitness, len(pop.Species))) break # advance to the next generation pop.Epoch() # print statistics gen_elapsed_time = time.time() - gen_time print("Best fitness: %f, genome ID: %d" % (fitness, best_id)) print("Species count: %d" % len(pop.Species)) print("Generation elapsed time: %.3f sec" % (gen_elapsed_time)) print("Best fitness ever: %f, genome ID: %d" % (best_ever_goal_fitness, best_id)) # Find the experiment elapsed time elapsed_time = time.time() - start_time # Restore the freezed best genome from pickle best_genome = pickle.loads(best_genome_ser) # write best genome to the file best_genome_file = os.path.join(trial_out_dir, "best_genome.pickle") with open(best_genome_file, 'wb') as genome_file: pickle.dump(best_genome, genome_file) # Print experiment statistics print("\nBest ever fitness: %f, genome ID: %d" % (best_ever_goal_fitness, best_id)) print("\nTrial elapsed time: %.3f sec" % (elapsed_time)) print("Random seed:", seed) # Visualize the experiment results show_results = not silent if save_results or show_results: # Draw CPPN network graph net = NEAT.NeuralNetwork() best_genome.BuildPhenotype(net) visualize.draw_net(net, view=False, node_names=None, filename="cppn_graph.svg", directory=trial_out_dir, fmt='svg') print("\nCPPN nodes: %d, connections: %d" % (len(net.neurons), len(net.connections))) # Draw the substrate network graph net = NEAT.NeuralNetwork() best_genome.BuildESHyperNEATPhenotype(net, substrate, params) visualize.draw_net(net, view=False, node_names=None, filename="substrate_graph.svg", directory=trial_out_dir, fmt='svg') print("\nSubstrate nodes: %d, connections: %d" % (len(net.neurons), len(net.connections))) inputs = net.NumInputs() outputs = net.NumOutputs() hidden = len(net.neurons) - net.NumInputs() - net.NumOutputs() print("\n\tinputs: %d, outputs: %d, hidden: %d" % (inputs, outputs, hidden)) # Test against random retina configuration l_index = random.randint(0, 15) r_index = random.randint(0, 15) left = rt_environment.visual_objects[l_index] right = rt_environment.visual_objects[r_index] err, outputs = rt_environment._evaluate(net, left, right, 3) print("Test evaluation error: %f" % err) print("Left flag: %f, pattern: %s" % (outputs[0], left)) print("Right flag: %f, pattern: %s" % (outputs[1], right)) # Test against all visual objects fitness, avg_error, total_count, false_detetctions = rt_environment.evaluate_net( net, debug=True) print( "Test evaluation against full data set [%d], fitness: %f, average error: %f, false detections: %f" % (total_count, fitness, avg_error, false_detetctions)) # Visualize statistics visualize.plot_stats(stats, ylog=False, view=show_results, filename=os.path.join(trial_out_dir, 'avg_fitness.svg')) return solution_found
def main(args): wandb_logger = WandbLogger(project=args.project) checkpoint_callback = ModelCheckpoint( save_top_k=1, verbose=True, monitor="val_loss", mode="min", ) datamodule = RosslerAttractorDataModule( n_iter_train=args.n_iter_train, n_iter_valid=args.n_iter_valid, n_iter_test=args.n_iter_test, init_pos_train=args.init_pos_train, init_pos_test=args.init_pos_train, init_pos_valid=args.init_pos_valid, batch_size=args.batch_size, delta_t=args.delta_t, ) datamodule.setup() criterion = nn.L1Loss(reduction="mean") # use_cuda = False if args.gpus is None else True # criterion_2 = SoftDTW(use_cuda=use_cuda, gamma=0.1, normalize=True) criterion_2 = nn.MSELoss(reduction="mean") # checkpoint_path = "Data/checkpoints/model_dtw.ckpt" # model = DiscreteModel.load_from_checkpoint(checkpoint_path=checkpoint_path) # model.hparams.criterion_2 = criterion_2 # model.configure_optimizers() # model.hparams.lr = args.lr model = DiscreteModel( criterion=criterion, criterion_2=criterion_2, lr=args.lr, delta_t=args.delta_t, mean=datamodule.dataset_train.mean, std=datamodule.dataset_train.std, hidden_size=15, ) trainer = Trainer( gpus=args.gpus, logger=wandb_logger, max_epochs=args.epochs, callbacks=[checkpoint_callback], # auto_lr_find=True, ) # trainer.tune(model=model, datamodule=datamodule) trainer.fit(model=model, datamodule=datamodule) trainer.test(model=model, datamodule=datamodule) # Tests TRAJECTORY_DUR = 1000 nb_steps = int(TRAJECTORY_DUR // args.delta_t) checkpoint_path = Path(checkpoint_callback.best_model_path) save_dir_path = checkpoint_path.parent trained_model = DiscreteModel.load_from_checkpoint( checkpoint_path=checkpoint_path) trained_model.normalize = False true_model = RosslerMap(delta_t=args.delta_t) statstics_calculator = Statistics(wandb_logger) dynamics_calculator = Dynamics(wandb_logger, true_model, trained_model, nb_steps) # TRAIN set traj_pred, traj_true, time_list = compute_traj(trained_model, true_model, args.init_pos_train, nb_steps) np.save(os.path.join(save_dir_path, "traj_pred_train.npy"), traj_pred) np.save(os.path.join(save_dir_path, "traj_true_train.npy"), traj_true) np.save(os.path.join(save_dir_path, "time_list_train.npy"), time_list) statstics_calculator.add_traj(traj_true, traj_pred, time_list, prefix="train ") statstics_calculator.plot_all() dynamics_calculator.add_traj(traj_true, traj_pred) dynamics_calculator.plot_all() # VAL set # traj_pred, traj_true, time_list = compute_traj( # trained_model, true_model, args.init_pos_valid, nb_steps # ) # np.save(os.path.join(save_dir_path, "traj_pred_valid.npy"), traj_pred) # np.save(os.path.join(save_dir_path, "traj_true_valid.npy"), traj_true) # np.save(os.path.join(save_dir_path, "time_list_valid.npy"), time_list) # statstics_calculator.add_traj(traj_true, traj_pred, time_list, prefix="valid ") # statstics_calculator.plot_all() # dynamics_calculator.add_traj(traj_true, traj_pred) # dynamics_calculator.plot_all() # TEST set traj_pred, traj_true, time_list = compute_traj(trained_model, true_model, args.init_pos_test, nb_steps) np.save(os.path.join(save_dir_path, "traj_pred_test.npy"), traj_pred) np.save(os.path.join(save_dir_path, "traj_true_test.npy"), traj_true) np.save(os.path.join(save_dir_path, "time_list_test.npy"), time_list) statstics_calculator.add_traj(traj_true, traj_pred, time_list, prefix="test ") statstics_calculator.plot_all() dynamics_calculator.add_traj(traj_true, traj_pred) dynamics_calculator.plot_all()