def __init__(self, eng_name, model): super(TFEngine, self).__init__() self.eng_name = eng_name self.model = model self.book = Book.load_GoGoD_book() self.last_move_probs = np.zeros(( self.model.N, self.model.N, )) self.kibitz_mode = False # build the graph with tf.Graph().as_default(): with tf.device('/cpu:0'): self.feature_planes = tf.placeholder( tf.float32, shape=[None, self.model.N, self.model.N, self.model.Nfeat], name='feature_planes') self.logits = model.inference(self.feature_planes, self.model.N, self.model.Nfeat) saver = tf.train.Saver(tf.trainable_variables()) init = tf.initialize_all_variables() self.sess = tf.Session(config=tf.ConfigProto( log_device_placement=False)) self.sess.run(init) checkpoint_dir = os.path.join(model.train_dir, 'checkpoints') Checkpoint.restore_from_checkpoint(self.sess, saver, checkpoint_dir)
def execute(load=False, train_from_start=False): dataloader64, dataloader128 = prep_data() netG1, netG2, netD1, netD2 = create_nn() optimizerG1, optimizerG2, optimizerD1, optimizerD2 = \ create_optimizers(netG1, netG2, netD1, netD2) img_list_64 = [] img_list_128 = [] G1_losses = [] D1_losses = [] G2_losses = [] D2_losses = [] lists = (img_list_64, img_list_128, D1_losses, G1_losses, G2_losses, D2_losses) fixed_noise = torch.randn(64, nz, 1, 1, device=device) checkpoint = Checkpoint( { 'Generator_Stage_1': netG1, 'Generator_Stage_2': netG2, 'Discriminator_Stage_1': netD1, 'Discriminator_Stage_2': netD2 }, { 'optimizerG1': optimizerG1, 'optimizerG2': optimizerG2, 'optimizerD1': optimizerD1, 'optimizerD2': optimizerD2 }, epoch=0, loss=criterion, script_name="metalGAN2", load=load) args_bundle = dataloader64, dataloader128, netG1, netG2, netD1, netD2, \ optimizerG1, optimizerG2, optimizerD1, optimizerD2 if train_from_start: checkpoint.epoch = 0 start_at_epoch = checkpoint.epoch train_part_1(args_bundle, lists, checkpoint, fixed_noise, start_at_epoch) print("starting stage 2...") adjust_learning_rate(optimizerD1, lr) adjust_learning_rate(optimizerG1, lr) train_part_2(args_bundle, lists, checkpoint, fixed_noise, num_epochs, max(10, start_at_epoch)) adjust_learning_rate(optimizerD1, lr / 2) adjust_learning_rate(optimizerG1, lr / 2) adjust_learning_rate(optimizerD2, lr) adjust_learning_rate(optimizerG2, lr) train_part_2(args_bundle, lists, checkpoint, fixed_noise, num_epochs * 2, max(20, start_at_epoch)) checkpoint.save() show_off(lists, dataloader128) return 0
def createCheckpointsFromExperiment(): print "Setting up private mode baselines" for np, wl, mem, simpoint, fw in buildPossibleParams(): if np != 4: curWorkload = workloads.getBms(wl, np, True) for bm in curWorkload: actualPath = checkpoints.getCheckpointDirectory( 4, mem, bm, simpoint) checkPath = checkpoints.getCheckpointDirectory( np, mem, bm, simpoint) checkFile = checkPath + "/m5.cpt" if not os.path.exists(checkFile): print "Linking", actualPath, checkPath os.symlink(actualPath, checkPath) print print "Generating multi-core checkpoints" for np, wl, mem, simpoint, fw in buildPossibleParams(): if Checkpoint.prerequisiteFilesExist(wl, np, mem, simpoint): printParameters(np, wl, mem, simpoint, fw) path = Checkpoint.generateCheckpoint(wl, np, fw, mem, simpoint) print "Generated checkpoint at " + path else: print "Files needed for np " + str( np ) + ", workload " + wl + ", memsys " + mem + " and simpoint " + str( simpoint) + " not found" print "Skipping..." return 0
def __init__(self, model): BaseEngine.__init__(self) self.model = model with tf.Graph().as_default(): with tf.device('/cpu:0'): self.feature_planes = tf.placeholder(tf.float32, shape=[None, self.model.N, self.model.N, self.model.Nfeat], name='feature_planes') self.probs_op = model.inference(self.feature_planes, self.model.N, self.model.Nfeat) saver = tf.train.Saver(tf.trainable_variables()) init = tf.initialize_all_variables() self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) self.sess.run(init) checkpoint_dir = os.path.join(model.train_dir, 'checkpoints') Checkpoint.restore_from_checkpoint(self.sess, saver, checkpoint_dir)
def load_and_compare(): dataloader64, dataloader128 = prep_data() netG1, netG2, netD1, netD2 = create_nn() optimizerG1, optimizerG2, optimizerD1, optimizerD2 = \ create_optimizers(netG1, netG2, netD1, netD2) fixed_noise = torch.randn(64, nz, 1, 1, device=device) checkpoint = Checkpoint( { 'Generator_Stage_1': netG1, 'Generator_Stage_2': netG2, 'Discriminator_Stage_1': netD1, 'Discriminator_Stage_2': netD2 }, { 'optimizerG1': optimizerG1, 'optimizerG2': optimizerG2, 'optimizerD1': optimizerD1, 'optimizerD2': optimizerD2 }, epoch=0, loss=criterion, script_name="metalGAN2", load=True) compare_with_real(device, dataloader128, img_list=None, Generator=netG1, noise=fixed_noise)
def main(): opts, args = parseAargs() print print "Automatic checkpoint generation for multiprogrammed workloads" print if opts.checkpointDestination: sys.exit(copyCheckpointFiles(opts.checkpointDestination, opts)) if opts.fromExp: sys.exit(createCheckpointsFromExperiment()) if opts.convertCheckpointFile != "": sys.exit(convertCheckpointFile(opts.convertCheckpointFile)) if opts.test: sys.exit(testCheckpoints(opts.siminsts)) simpoint = -1 printParameters(opts.np, opts.workload, opts.memsys, simpoint, opts.fwinsts) chkptPath = Checkpoint.generateCheckpoint(opts.workload, opts.np, opts.fwinsts, opts.memsys, simpoint) print print "Generated checkpoint at " + chkptPath print
def __init__(self, eng_name, model, step=None): super(NNEngine,self).__init__(model.N) self.eng_name = eng_name self.model = model self.move_records = [] self.move_probs = [] # build the graph with tf.Graph().as_default(): #with tf.device('/cpu:0'): self.feature_planes = tf.placeholder(tf.float32, shape=[None, self.model.N + 1, self.model.N + 1, self.model.Nfeat], name='feature_planes') self.logits = model.inference(self.feature_planes, self.model.N + 1, self.model.Nfeat) saver = tf.train.Saver(tf.trainable_variables()) init = tf.global_variables_initializer() self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) self.sess.run(init) checkpoint_dir = os.path.join(model.train_dir) Checkpoint.restore_from_checkpoint(self.sess, saver, checkpoint_dir, step)
def __init__(self, model): BaseEngine.__init__(self) self.model = model with tf.Graph().as_default(): with tf.device('/cpu:0'): self.feature_planes = tf.placeholder( tf.float32, shape=[None, self.model.N, self.model.N, self.model.Nfeat], name='feature_planes') self.probs_op = model.inference(self.feature_planes, self.model.N, self.model.Nfeat) saver = tf.train.Saver(tf.trainable_variables()) init = tf.initialize_all_variables() self.sess = tf.Session(config=tf.ConfigProto( log_device_placement=False)) self.sess.run(init) checkpoint_dir = os.path.join(model.train_dir, 'checkpoints') Checkpoint.restore_from_checkpoint(self.sess, saver, checkpoint_dir)
def __init__(self, eng_name, model): super(TFEngine,self).__init__() self.eng_name = eng_name self.model = model self.book = Book.load_GoGoD_book() self.last_move_probs = np.zeros((self.model.N, self.model.N,)) self.kibitz_mode = False # build the graph with tf.Graph().as_default(): with tf.device('/cpu:0'): self.feature_planes = tf.placeholder(tf.float32, shape=[None, self.model.N, self.model.N, self.model.Nfeat], name='feature_planes') self.logits = model.inference(self.feature_planes, self.model.N, self.model.Nfeat) saver = tf.train.Saver(tf.trainable_variables()) init = tf.initialize_all_variables() self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) self.sess.run(init) checkpoint_dir = os.path.join(model.train_dir, 'checkpoints') Checkpoint.restore_from_checkpoint(self.sess, saver, checkpoint_dir)
def calc(self, atomlist, do_Qeq=False, charge=0, multiplicity=1): """do the actual calculation and parse the results""" self.atomlist = atomlist if self.embedding == "electrostatic" and self.have_charges == False: do_Qeq = True self.have_charges = True if do_Qeq == True: # The charge equilibration with Qeq should be performed only once. # For repeated calculations (dynamics, optimization) the partial # charges are set to 0, and the electrostatic interaction is added # later. route = "#P UFF=Qeq Force NoSymm Geom=(Connectivity, NoCrowd)" else: route = "#P UFF Force NoSymm Geom=(Connectivity, NoCrowd)" write_input(self.com_file, self.atomlist, chk_file=self.chk_file, route=route, connectivity=self.connectivity, charge=charge, multiplicity=multiplicity, title="compute energies and gradients") # execute g09 if self.verbose > 0: print "Gaussian 09 ..." print " route: %s" % route ret = os.system("g09 < %s 2>&1 > %s" % (self.com_file, self.log_file)) error_msg = "G09 Calculation failed! Check the log-file %s" % self.log_file assert ret == 0, error_msg # create formatted checkpoint file ret = os.system("formchk %s 2>&1 > /dev/null" % self.chk_file) assert ret == 0, "ERROR: formchk failed!" # parse the checkpoint file and extract: # - the total energy # - the forces # - the partial MM charges from the charge equilibration Data = Checkpoint.parseCheckpointFile(self.fchk_file) # save results for later use self.mm_energy = Data["_Total_Energy"] # The gradient does not contain the contribution from electrostatics self.mm_gradient = Data["_Cartesian_Gradient"] if do_Qeq == True: # At the first step, when Qeq is performed # gradient and energy contain electrostatic interactions, already self.mm_charges = Data["_MM_charges"] elif self.embedding == "electrostatic": # add the electrostatic energy enCoul, gradCoul = self._electrostatics() self.mm_energy += enCoul self.mm_gradient += gradCoul
def train_model(model, N, Nfeat, build_feed_dict, normalization, loss_func, train_data_dir, val_data_dir, lr_base, lr_half_life, max_steps, just_validate=False): with tf.Graph().as_default(): # build the graph learning_rate_ph = tf.placeholder(tf.float32) momentum_ph = tf.placeholder(tf.float32) feature_planes = tf.placeholder(tf.float32, shape=[None, N, N, Nfeat]) model_outputs = model.inference(feature_planes, N, Nfeat) outputs_ph, total_loss, accuracy = loss_func(model_outputs) train_op = train_step(total_loss, learning_rate_ph, momentum_ph) saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=5, keep_checkpoint_every_n_hours=2.0) init = tf.initialize_all_variables() sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) sess.run(init) summary_writer = tf.train.SummaryWriter(os.path.join(model.train_dir, 'summaries', datetime.now().strftime('%Y%m%d-%H%M%S')), graph=sess.graph, flush_secs=5) accuracy_avg = MovingAverage('accuracy', time_constant=1000) total_loss_avg = MovingAverage('total_loss', time_constant=1000) def run_validation(): # run the validation set val_loader = NPZ.Loader(val_data_dir) mean_loss = 0.0 mean_accuracy = 0.0 mb_num = 0 print "Starting validation..." while val_loader.has_more(): if mb_num % 100 == 0: print "validation minibatch #%d" % mb_num feed_dict = build_feed_dict(val_loader, normalization, feature_planes, outputs_ph) loss_value, accuracy_value = sess.run([total_loss, accuracy], feed_dict=feed_dict) mean_loss += loss_value mean_accuracy += accuracy_value mb_num += 1 mean_loss /= mb_num mean_accuracy /= mb_num print "Validation: mean loss = %.3f, mean accuracy = %.2f%%" % (mean_loss, 100*mean_accuracy) summary_writer.add_summary(make_summary('validation_loss', mean_loss), step) summary_writer.add_summary(make_summary('validation_accuracy_percent', 100*mean_accuracy), step) last_training_loss = None if just_validate: # Just run the validation set once Checkpoint.restore_from_checkpoint(sess, saver, model.train_dir) run_validation() else: # Run the training loop #step = 0 step = Checkpoint.optionally_restore_from_checkpoint(sess, saver, os.path.join(model.train_dir, 'checkpoints')) #step = optionally_restore_from_checkpoint(sess, saver, model.train_dir) #print "WARNING: CHECKPOINTS TURNED OFF!!" print "WARNING: WILL STOP AFTER %d STEPS" % max_steps print "WARNING: IGNORING lr.txt and momentum.txt" print "lr_base = %f, lr_half_life = %f" % (lr_base, lr_half_life) #loader = NPZ.AsyncRandomizingLoader(train_data_dir, minibatch_size=128) minibatch_size = 128 batch_queue = EvalTraining.AsyncRandomBatchQueue(feature_planes, outputs_ph, train_data_dir, minibatch_size, normalization) #loader = NPZ.RandomizingLoader(train_data_dir, minibatch_size=128) #loader = NPZ.GroupingRandomizingLoader(train_data_dir, Ngroup=1) #loader = NPZ.SplittingRandomizingLoader(train_data_dir, Nsplit=2) last_step_ref_time = 0 while True: if step % 10000 == 0 and step != 0: run_validation() start_time = time.time() #feed_dict = build_feed_dict(loader, normalization, feature_planes, outputs_ph) feed_dict = batch_queue.next_feed_dict() load_time = time.time() - start_time if step % 1 == 0: #learning_rate = read_float_from_file('../work/lr.txt', default=0.1) #momentum = read_float_from_file('../work/momentum.txt', default=0.9) if step < 100: learning_rate = 0.0003 # to stabilize initially else: learning_rate = lr_base * 0.5**(float(step-100)/lr_half_life) momentum = 0.9 summary_writer.add_summary(make_summary('learningrate', learning_rate), step) summary_writer.add_summary(make_summary('momentum', momentum), step) feed_dict[learning_rate_ph] = learning_rate feed_dict[momentum_ph] = momentum start_time = time.time() _, loss_value, accuracy_value, outputs_value = sess.run([train_op, total_loss, accuracy, model_outputs], feed_dict=feed_dict) train_time = time.time() - start_time total_loss_avg.add(loss_value) accuracy_avg.add(100 * accuracy_value) #print "outputs_value =" #print outputs_value.flatten() #print "feed_dict[outputs_ph] =" #print feed_dict[outputs_ph].flatten() if np.isnan(loss_value): print "Model diverged with loss = Nan" return #assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step >= max_steps: return if step % 10 == 0: total_loss_avg.write(summary_writer, step) accuracy_avg.write(summary_writer, step) full_step_time = time.time() - last_step_ref_time last_step_ref_time = time.time() if step % 1 == 0: minibatch_size = feed_dict[feature_planes].shape[0] examples_per_sec = minibatch_size / full_step_time print "%s: step %d, lr=%.6f, mom=%.2f, loss = %.4f, accuracy = %.2f%% (mb_size=%d, %.1f examples/sec), (load=%.3f train=%.3f total=%0.3f sec/step)" % \ (datetime.now(), step, learning_rate, momentum, loss_value, 100*accuracy_value, minibatch_size, examples_per_sec, load_time, train_time, full_step_time) if step % 10 == 0: summary_writer.add_summary(make_summary('examples/sec', examples_per_sec), step) summary_writer.add_summary(make_summary('step', step), step) if step % 1000 == 0 and step != 0: #print "WARNING: CHECKPOINTS TURNED OFF!!" saver.save(sess, os.path.join(model.train_dir, "checkpoints", "model.ckpt"), global_step=step) step += 1
def train(game_type, agent_type, annealer=None): sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) agent = agent_type(sess) agents = [agent] * game_type.get_num_agents() # hyperparameters gamma = 0.99 gae_lambda = 0.95 learning_rate = 0.0003 # learning_rate = 0.0001 epsilon = 0.1 # ppo parameter TODO: fiddle with me value_loss_coef = 0.3 examples_per_iteration = 1000 minibatch_size = 100 # experiences in each training batch epochs = 3 hyper_string = "ppo_lr{}_ep{}_vc{}_eit{}_mb{}_ep{}".format( learning_rate, epsilon, value_loss_coef, examples_per_iteration, minibatch_size, epochs) # set up the training operations in tensorflow advantage_ph = tf.placeholder(tf.float32, shape=[None], name='advantage') # shape: (batch size,) old_log_p_chosen_action_ph = tf.placeholder( tf.float32, shape=[None], name='old_log_p_chosen_action') # shape: (batch size,) log_p_chosen_action_op = agent.get_log_p_chosen_action_op() p_ratio = tf.exp(log_p_chosen_action_op - old_log_p_chosen_action_ph) clipped_p_ratio = tf.clip_by_value(p_ratio, 1.0 - epsilon, 1.0 + epsilon) policy_loss = -tf.reduce_sum( tf.minimum(advantage_ph * p_ratio, advantage_ph * clipped_p_ratio)) # train value function by gradient descent on [(value est) - (cum future reward)] ** 2 reward_ph = tf.placeholder(tf.float32, shape=[None], name='reward') # shape: (batch size,) value_op = agent.get_value_op() value_mse = tf.reduce_sum(tf.square(reward_ph - value_op)) value_mse_sum = tf.summary.scalar( "value_mse", tf.reduce_mean(tf.square(reward_ph - value_op))) # put policy and value loss together to get total loss total_loss = policy_loss + value_loss_coef * value_mse # could optionally add an entropy loss to encourage exploration learning_rate_ph = tf.placeholder(tf.float32, name="learning_rate") train_op = tf.train.AdamOptimizer(learning_rate_ph).minimize(total_loss) sess.run(tf.global_variables_initializer()) exp_buf = [] rew_buf = [] adv_buf = [] prr = PeriodicReplayWriter(game_type=game_type, agents=agents, period=10, outdir="/home/greg/coding/ML/rlgames/replays") merged_sum_op = tf.summary.merge_all() log_dir = os.path.join( "/home/greg/coding/ML/rlgames/logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + "_" + hyper_string) sum_wri = tf.summary.FileWriter(log_dir, graph=sess.graph, flush_secs=5) saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=5, keep_checkpoint_every_n_hours=0.5) ckpt_dir = os.path.join("/home/greg/coding/ML/rlgames", "checkpoints") steps_between_ckpts = 5000 step = Checkpoint.optionally_restore_from_checkpoint(sess, saver, ckpt_dir) last_ckpt_step = step game_frames = 0 train_frames = 0 time_tracker = TimeTracker() iteration = 0 while True: if annealer: annealer.frame(step) time_tracker.start("prr") prr.maybe_write(iteration) time_tracker.end("prr") sampler = Sampler() # stores the examples we'll use in this iteration # play games until we have enough examples to do a round of optimization print "iteration {}: playing games...".format(iteration) while sampler.num_examples < examples_per_iteration: # play a game and remember the experiences and rewards # If there's more than one player, the same agent is used for all of them # so the agent had better not be something with state like an RNN. Then # the experiences of all players are used for training. time_tracker.start("game") game = game_type() result = GameLoop.play_game(game, agents) time_tracker.end("game") game_frames += len(result.episodes[0].experiences) #print result.episodes[0] for ep in result.episodes: # remember each frame as an example to train on later ep_rewards = ep.compute_cum_discounted_future_rewards( gamma=gamma) ep_advs = ep.compute_generalized_advantage_ests( gamma, gae_lambda) ep_log_p_actions = np.array( [exp.log_p_action for exp in ep.experiences]) ep_feed_dict = { reward_ph: ep_rewards, advantage_ph: ep_advs, old_log_p_chosen_action_ph: ep_log_p_actions } ep_feed_dict.update(agent.make_train_feed_dict(ep.experiences)) sampler.add_examples(ep_feed_dict) # record some stats ep_undisc_rewards = ep.compute_cum_discounted_future_rewards( gamma=1.0) sum_wri.add_summary(make_summary("disc_rew", ep_rewards[0]), global_step=step) sum_wri.add_summary(make_summary("undisc_rew", ep_undisc_rewards[0]), global_step=step) sum_wri.add_summary(make_summary("init_value_est", ep.experiences[0].value_est), global_step=step) sum_wri.add_summary(make_summary("init_value_mse", (ep.experiences[0].value_est - ep_rewards[0])**2), global_step=step) sum_wri.add_summary(make_summary( "game_length", len(result.episodes[0].experiences)), global_step=step) sum_wri.add_summary(make_summary( "total_undisc_rew", sum( sum(exp.reward for exp in ep.experiences) for ep in result.episodes)), global_step=step) # do a few epochs of optimization on the examples print "iteration {}: starting training...".format(iteration) time_tracker.start("train") for epoch in range(epochs): for mb_i, minibatch_fd in enumerate( sampler.get_minibatches(minibatch_size)): #print "begin iteration {} epoch {} minibatch {}".format(iteration, epoch, mb_i) minibatch_fd[learning_rate_ph] = learning_rate #print "minibatch_fd =\n{}".format(minibatch_fd) #print "debug before train step:" #agent.print_debug_info() [_, sums] = sess.run([train_op, merged_sum_op], feed_dict=minibatch_fd) #print "debug after train step:" #agent.print_debug_info() sum_wri.add_summary(sums, global_step=step) step += minibatch_size train_frames += minibatch_size time_tracker.end("train") iteration += 1 cur_time = time.time() print "iteration {}: finished training.".format(iteration) game_seconds = time_tracker.part_seconds["game"] train_seconds = time_tracker.part_seconds["train"] prr_seconds = time_tracker.part_seconds["prr"] total_seconds = time_tracker.get_total_seconds() other_seconds = total_seconds - train_seconds - game_seconds - prr_seconds print "game frames = {} game seconds = {:.1f}s game frames per second = {:.1f}".format( game_frames, game_seconds, game_frames / game_seconds) print "train frames = {} train seconds = {:.1f}s train frames per second = {:.1f}".format( train_frames, train_seconds, train_frames / train_seconds) print "total time = {:.1f}s game {:.1f}% train {:.1f}% prr {:.1f}% other {:.1f}%".format( total_seconds, 100 * game_seconds / total_seconds, 100 * train_seconds / total_seconds, 100 * prr_seconds / total_seconds, 100 * other_seconds / total_seconds) if step - last_ckpt_step >= steps_between_ckpts: saver.save(sess, os.path.join(ckpt_dir, "model.ckpt"), global_step=step) last_ckpt_step = step
def game(level, screen): score = 0 boolean = False time_initial = pygame.time.get_ticks() clock = pygame.time.Clock() music_selection(level) #Level selection if level.id == 0: point1 = Checkpoint(842, 50) point2 = Checkpoint(130, 480) screen.blit(intro1.image, intro1.rect) pygame.display.update() while True: # wait for user to acknowledge and return for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() sys.exit() if event.type == pygame.KEYDOWN and event.key in [pygame.K_RETURN, pygame.K_KP_ENTER, pygame.K_BACKSPACE]: boolean = True break if boolean: break pygame.time.wait(20) elif level.id == 1: point1 = Checkpoint(730, 30) point2 = Checkpoint(858, 500) screen.blit(intro2.image, intro2.rect) pygame.display.update() while True: # wait for user to acknowledge and return for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() sys.exit() if event.type == pygame.KEYDOWN and event.key in [pygame.K_RETURN, pygame.K_KP_ENTER, pygame.K_BACKSPACE]: boolean = True break if boolean: break pygame.time.wait(20) else: point1 = Checkpoint(20, 70) point2 = Checkpoint(900, 400) screen.blit(intro3.image, intro3.rect) pygame.display.update() while True: # wait for user to acknowledge and return for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() sys.exit() if event.type == pygame.KEYDOWN and event.key in [pygame.K_RETURN, pygame.K_KP_ENTER, pygame.K_BACKSPACE]: boolean = True break if boolean: break pygame.time.wait(20) level.mapa(screen) screen.blit(point1.image, point1.rect) screen.blit(point2.image, point2.rect) #Initialization variables player_x0, player_y0, player_angle = player_constants(level) heli_x0, heli_y0, heli_angle, patrol_radius = heli_constants(level) #Terrain parameters angle_step = 7.5 terrain_factor = 1 cont = 0 Font = pygame.font.SysFont("arial", 20, True) txt_surf = Font.render("", True, WHITE) #Class initialization object_group = pygame.sprite.Group() player = Player(player_x0, player_y0, player_angle) heli = Heli(1, SCREENHEIGHT-1, heli_angle) second_heli = Heli(heli_x0, heli_y0, heli_angle) capivara = Capivara() soldier = Soldier() object_group.add(heli) object_group.add(second_heli) object_group.add(capivara) object_group.draw(screen) screen.blit(player.image, player.rect) if soldier.state: screen.blit(soldier.image, soldier.rect) #Game Over criteria game_over = GameOver() while not game_over.state: # GET EVENT for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() sys.exit() elif event.type == KEYDOWN and event.key == K_p: pause(screen) player.handle_event(event) #Handle terrain level.mapa(screen) terrain_factor = measure_terrain(player, level, screen) if cont == 0: Font = pygame.font.SysFont("arial", 20, True) txt_surf = Font.render("", True, WHITE) else: cont -= 1 # Atualização de Score e Verificação de Flags das etapas dos Jogos if level.verificarmissao(player, point1, point2): time_flag = pygame.time.get_ticks() Font = pygame.font.SysFont("arial", 24, True) txt_surf = Font.render("CHECKPOINT ACEITO", True, WHITE) cont = 45 score += int(1000 - 5 * (time_flag / 1000 - time_initial / 1000)) # modelo de Score if level.vencedor(): pygame.mixer.music.stop() arq = level.file() get_score(screen, arq, score) break #Player movement player.move(terrain_factor, angle_step) #Bot reaction '''bot_1.follow(player.x, player.y) bot_2.follow(player.x, player.y)''' capivara.time_counter(level, screen, SCREENHEIGHT) soldier.time_counter(level, screen, SCREENHEIGHT) player.update_pos(angle_step) heli.follow(player.x, player.y) second_heli.patrol(heli_x0, heli_y0, patrol_radius) heli.update_pos(player.x) second_heli.update_pos(player.x) level.mapa(screen) screen.blit(player.image, player.rect) screen.blit(point1.image, point1.rect) screen.blit(point2.image, point2.rect) screen.blit(txt_surf, (600, 450)) object_group.draw(screen) if soldier.state: screen.blit(soldier.image, soldier.rect) #Game over verification object_group.add(soldier) game_over.measure_state(player, object_group) object_group.remove(soldier) #Screen update pygame.display.update() clock.tick(20) # Time do relógio if game_over.state: pygame.mixer.music.stop() screen.blit(gameover.image, gameover.rect) pygame.display.update() game_over_sound() while True: # wait for user to acknowledge and return for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() sys.exit() if event.type == pygame.KEYDOWN and event.key in [pygame.K_RETURN, pygame.K_KP_ENTER, pygame.K_BACKSPACE]: return pygame.time.wait(20)
def train_model(model, N, Nfeat, lr_base, lr_half_life, max_steps, minibatch_size, engine_strength, just_validate=False): with tf.Graph().as_default(): # build the graph learning_rate_ph = tf.placeholder(tf.float32) grad_cap = tf.placeholder(tf.float32) feature_planes = tf.placeholder(tf.float32, shape=[None, N + 1, N + 1, Nfeat]) model_outputs = model.inference(feature_planes, N + 1, Nfeat) label_op = tf.placeholder(tf.int64, shape=[None]) outcome_op = tf.placeholder(tf.float32, shape=[None]) cross_entropy_err = loss_func(model_outputs, label_op) train_op, grads_vars_op, dir_grads_vars_op = train_step(cross_entropy_err, learning_rate_ph, outcome_op, grad_cap) saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=800, keep_checkpoint_every_n_hours=0.5) init = tf.global_variables_initializer() sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) sess.run(init) last_training_loss = None if just_validate: # Just run the validation set once step = Checkpoint.restore_from_checkpoint(sess, saver, model.train_dir) run_validation(step, engine_strength) else: # Run the training loop # step = 0 step = Checkpoint.optionally_restore_from_checkpoint(sess, saver, os.path.join(model.train_dir)) saver.save(sess, os.path.join(model.train_dir, "model.ckpt"), global_step=step) print("WARNING: WILL STOP AFTER %d STEPS" % max_steps) print("lr_base = %f, lr_half_life = %f" % (lr_base, lr_half_life)) # loader = NPZ.AsyncRandomizingLoader(train_data_dir, minibatch_size=128) batch_queue = SelfPlayGenerator.AsyncRandomGamePlayQueue() last_step_ref_time = 0 while True: if step % (10 * minibatch_size) == 0 and step != 0: win_rate = run_validation(step, engine_strength) if (win_rate > 0.90) and (engine_strength >= 0.05): engine_strength = engine_strength - 0.05 start_time = time.time() checkpoint_paths = saver.last_checkpoints versions = [int(checkpoint_paths[ind].split('/')[-1].split('-')[-1]) for ind in range(len(checkpoint_paths))] batch_queue.start_gen_game_play(feature_planes, label_op, outcome_op, minibatch_size, versions, model) if step < 100: learning_rate = 0.0003 # to stabilize initially else: learning_rate = lr_base * 0.5 ** (float(step - 110000) / lr_half_life) #summary_writer.add_summary(make_summary('learningrate', learning_rate), step) #summary_writer.add_summary(make_summary('momentum', momentum), step) mean_loss = 0.0 counter = 0 while True: feed_dict = batch_queue.next_feed_dict() if (feed_dict == None): break feed_dict[learning_rate_ph] = learning_rate feed_dict[grad_cap] = learning_rate * 1.0 _, loss_value = sess.run( [train_op, cross_entropy_err], feed_dict=feed_dict) # _, loss_value, model_out, label_out = sess.run( # [train_op, cross_entropy_err, model_outputs, label_op], # feed_dict=feed_dict) # # e_x = np.exp(model_out - np.max(model_out)) # e_x = e_x / e_x.sum() # print("Model out: \n", e_x) # print("True out: \n", label_out) # print("Loss: ", loss_value) # # if (loss_value > 30): # return mean_loss += loss_value counter += 1 mean_loss /= counter # _, loss_value, outputs_value, grads_vars, dir_grads_vars, outcome = sess.run([train_op, cross_entropy_err, model_outputs, # grads_vars_op, dir_grads_vars_op, outcome_op], # feed_dict=feed_dict) train_time = time.time() - start_time if step >= max_steps: return #if step % 10 == 0: # total_loss_avg.write(summary_writer, step) # accuracy_avg.write(summary_writer, step) full_step_time = time.time() - last_step_ref_time last_step_ref_time = time.time() if step % 10 == 0: print("%s: step %d, lr=%f, loss = %.2f, (train=%.3f s/%d)" % \ (datetime.now(), step, learning_rate, mean_loss, train_time, minibatch_size)) step += minibatch_size if step % minibatch_size == 0 and step != 0: saver.save(sess, os.path.join(model.train_dir, "model.ckpt"), global_step=step)
def train_model(model, N, Nfeat, lr_base, lr_half_life, max_steps, just_validate=False): with tf.Graph().as_default(): # build the graph learning_rate_ph = tf.placeholder(tf.float32) feature_planes = tf.placeholder(tf.float32, shape=[None, N + 1, N + 1, Nfeat]) model_outputs = model.inference(feature_planes, N + 1, Nfeat) true_outputs = tf.placeholder(tf.float32, shape=[None, (N+1)*(N+1)*2]) loss = loss_func(model_outputs, true_outputs) train_op = train_step(loss, learning_rate_ph) saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=20, keep_checkpoint_every_n_hours=0.5) init = tf.global_variables_initializer() sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) sess.run(init) ## initialize gen board queue batch_queue = SelfPlayGenerator.AsyncRandomBoardQueue(model.N, feature_planes, true_outputs) last_training_loss = None def run_validation(step, model, queue): # play 100 games with different level of AI / previous version NN print("Run validation") num_board = 100 total_err = 0 for i in range(num_board): feed_dict = queue.next_feed_dict() loss_value, model_out, true_out = sess.run([loss, model_outputs, true_outputs], feed_dict=feed_dict) total_err += loss_value if (i % 50 == 0): e_x = np.exp(model_out - np.max(model_out)) e_x = e_x / e_x.sum() print("Model out: \n", e_x) print("True out: \n", true_out) total_err /= num_board print("Validation error: ", total_err) if just_validate: # Just run the validation set once step = Checkpoint.restore_from_checkpoint(sess, saver, model.train_dir) run_validation(step, model, batch_queue) else: # Run the training loop step = Checkpoint.optionally_restore_from_checkpoint(sess, saver, os.path.join(model.train_dir)) saver.save(sess, os.path.join(model.train_dir, "model.ckpt"), global_step=step) print("WARNING: WILL STOP AFTER %d STEPS" % max_steps) print("lr_base = %f, lr_half_life = %f" % (lr_base, lr_half_life)) last_step_ref_time = 0 test_size = 2000 save_size = 500 while True: if step % test_size == 0 and step != 0: run_validation(step, model, batch_queue) if step < 100: learning_rate = 0.0003 # to stabilize initially else: learning_rate = lr_base * 0.5 ** (float(step - 100) / lr_half_life) #if (step % 20 == 0): # print("Step = ", step) start_time = time.time() feed_dict = batch_queue.next_feed_dict() feed_dict[learning_rate_ph] = learning_rate _, loss_value = sess.run( [train_op, loss], feed_dict=feed_dict) train_time = time.time() - start_time #print(model_out) #print(true_out) if step >= max_steps: return if step % 100 == 0: print("%s: step %d, lr=%.6f, loss = %.4f, (train=%.3f sec/ step)" % \ (datetime.now(), step, learning_rate, loss_value, train_time)) step += 1 if step % save_size == 0 and step != 0: saver.save(sess, os.path.join(model.train_dir, "model.ckpt"), global_step=step)
except: return None row += 1 if row == 9: return blocks return None if len(sys.argv) > 1: for i in range(1, len(sys.argv)): try: file = open(sys.argv[i], 'r') read = read_file(file) file.close() if read is not None: checkpoint = Checkpoint(read) blocks = checkpoint.check() if blocks is not None: row_print = "" for block in blocks: row_print += str(block.get_num()) if block.get_column() == 8: row_print += "\n" print(row_print) else: print(sys.argv[i] + " is not a correct Sudoku") else: print(sys.argv[i] + " must have 9 lines, each line having 9 digits") except: print(sys.argv[i] + " cannot be opened")
def train_model(model, N, Nfeat, build_feed_dict, normalization, loss_func, train_data_dir, val_data_dir, lr_base, lr_half_life, max_steps, just_validate=False): with tf.Graph().as_default(): # build the graph learning_rate_ph = tf.placeholder(tf.float32) momentum_ph = tf.placeholder(tf.float32) feature_planes = tf.placeholder(tf.float32, shape=[None, N, N, Nfeat]) model_outputs = model.inference(feature_planes, N, Nfeat) outputs_ph, total_loss, accuracy = loss_func(model_outputs) train_op = train_step(total_loss, learning_rate_ph, momentum_ph) saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=5, keep_checkpoint_every_n_hours=2.0) init = tf.initialize_all_variables() sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) sess.run(init) summary_writer = tf.train.SummaryWriter( os.path.join(model.train_dir, 'summaries', datetime.now().strftime('%Y%m%d-%H%M%S')), graph=sess.graph, flush_secs=5) accuracy_avg = MovingAverage('accuracy', time_constant=1000) total_loss_avg = MovingAverage('total_loss', time_constant=1000) def run_validation(): # run the validation set val_loader = NPZ.Loader(val_data_dir) mean_loss = 0.0 mean_accuracy = 0.0 mb_num = 0 print "Starting validation..." while val_loader.has_more(): if mb_num % 100 == 0: print "validation minibatch #%d" % mb_num feed_dict = build_feed_dict(val_loader, normalization, feature_planes, outputs_ph) loss_value, accuracy_value = sess.run([total_loss, accuracy], feed_dict=feed_dict) mean_loss += loss_value mean_accuracy += accuracy_value mb_num += 1 mean_loss /= mb_num mean_accuracy /= mb_num print "Validation: mean loss = %.3f, mean accuracy = %.2f%%" % (mean_loss, 100 * mean_accuracy) summary_writer.add_summary(make_summary('validation_loss', mean_loss), step) summary_writer.add_summary(make_summary('validation_accuracy_percent', 100 * mean_accuracy), step) last_training_loss = None if just_validate: # Just run the validation set once Checkpoint.restore_from_checkpoint(sess, saver, model.train_dir) run_validation() else: # Run the training loop # step = 0 step = Checkpoint.optionally_restore_from_checkpoint(sess, saver, os.path.join(model.train_dir, 'checkpoints')) # step = optionally_restore_from_checkpoint(sess, saver, model.train_dir) # print "WARNING: CHECKPOINTS TURNED OFF!!" print "WARNING: WILL STOP AFTER %d STEPS" % max_steps print "WARNING: IGNORING lr.txt and momentum.txt" print "lr_base = %f, lr_half_life = %f" % (lr_base, lr_half_life) # loader = NPZ.AsyncRandomizingLoader(train_data_dir, minibatch_size=128) minibatch_size = 128 batch_queue = EvalTraining.AsyncRandomBatchQueue(feature_planes, outputs_ph, train_data_dir, minibatch_size, normalization) # loader = NPZ.RandomizingLoader(train_data_dir, minibatch_size=128) # loader = NPZ.GroupingRandomizingLoader(train_data_dir, Ngroup=1) # loader = NPZ.SplittingRandomizingLoader(train_data_dir, Nsplit=2) last_step_ref_time = 0 while True: if step % 10000 == 0 and step != 0: run_validation() start_time = time.time() # feed_dict = build_feed_dict(loader, normalization, feature_planes, outputs_ph) feed_dict = batch_queue.next_feed_dict() load_time = time.time() - start_time if step % 1 == 0: # learning_rate = read_float_from_file('../work/lr.txt', default=0.1) # momentum = read_float_from_file('../work/momentum.txt', default=0.9) if step < 100: learning_rate = 0.0003 # to stabilize initially else: learning_rate = lr_base * 0.5 ** (float(step - 100) / lr_half_life) momentum = 0.9 summary_writer.add_summary(make_summary('learningrate', learning_rate), step) summary_writer.add_summary(make_summary('momentum', momentum), step) feed_dict[learning_rate_ph] = learning_rate feed_dict[momentum_ph] = momentum start_time = time.time() _, loss_value, accuracy_value, outputs_value = sess.run([train_op, total_loss, accuracy, model_outputs], feed_dict=feed_dict) train_time = time.time() - start_time total_loss_avg.add(loss_value) accuracy_avg.add(100 * accuracy_value) # print "outputs_value =" # print outputs_value.flatten() # print "feed_dict[outputs_ph] =" # print feed_dict[outputs_ph].flatten() if np.isnan(loss_value): print "Model diverged with loss = Nan" return # assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step >= max_steps: return if step % 10 == 0: total_loss_avg.write(summary_writer, step) accuracy_avg.write(summary_writer, step) full_step_time = time.time() - last_step_ref_time last_step_ref_time = time.time() if step % 1 == 0: minibatch_size = feed_dict[feature_planes].shape[0] examples_per_sec = minibatch_size / full_step_time print "%s: step %d, lr=%.6f, mom=%.2f, loss = %.4f, accuracy = %.2f%% (mb_size=%d, %.1f examples/sec), (load=%.3f train=%.3f total=%0.3f sec/step)" % \ (datetime.now(), step, learning_rate, momentum, loss_value, 100 * accuracy_value, minibatch_size, examples_per_sec, load_time, train_time, full_step_time) if step % 10 == 0: summary_writer.add_summary(make_summary('examples/sec', examples_per_sec), step) summary_writer.add_summary(make_summary('step', step), step) if step % 1000 == 0 and step != 0: # print "WARNING: CHECKPOINTS TURNED OFF!!" saver.save(sess, os.path.join(model.train_dir, "checkpoints", "model.ckpt"), global_step=step) step += 1
def BatchNorm(Channels): RawBatchNorm = cp.CpBatchNorm2d(Channels) RawBatchNorm.weight.data.fill_(1) RawBatchNorm.bias.data.fill_(0) return RawBatchNorm