예제 #1
0
    def __init__(self, eng_name, model):
        super(TFEngine, self).__init__()
        self.eng_name = eng_name
        self.model = model
        self.book = Book.load_GoGoD_book()

        self.last_move_probs = np.zeros((
            self.model.N,
            self.model.N,
        ))
        self.kibitz_mode = False

        # build the graph
        with tf.Graph().as_default():
            with tf.device('/cpu:0'):
                self.feature_planes = tf.placeholder(
                    tf.float32,
                    shape=[None, self.model.N, self.model.N, self.model.Nfeat],
                    name='feature_planes')
                self.logits = model.inference(self.feature_planes,
                                              self.model.N, self.model.Nfeat)
                saver = tf.train.Saver(tf.trainable_variables())
                init = tf.initialize_all_variables()
                self.sess = tf.Session(config=tf.ConfigProto(
                    log_device_placement=False))
                self.sess.run(init)
                checkpoint_dir = os.path.join(model.train_dir, 'checkpoints')
                Checkpoint.restore_from_checkpoint(self.sess, saver,
                                                   checkpoint_dir)
예제 #2
0
def execute(load=False, train_from_start=False):
    dataloader64, dataloader128 = prep_data()
    netG1, netG2, netD1, netD2 = create_nn()
    optimizerG1, optimizerG2, optimizerD1, optimizerD2 = \
        create_optimizers(netG1, netG2, netD1, netD2)

    img_list_64 = []
    img_list_128 = []
    G1_losses = []
    D1_losses = []
    G2_losses = []
    D2_losses = []
    lists = (img_list_64, img_list_128, D1_losses, G1_losses, G2_losses,
             D2_losses)

    fixed_noise = torch.randn(64, nz, 1, 1, device=device)

    checkpoint = Checkpoint(
        {
            'Generator_Stage_1': netG1,
            'Generator_Stage_2': netG2,
            'Discriminator_Stage_1': netD1,
            'Discriminator_Stage_2': netD2
        }, {
            'optimizerG1': optimizerG1,
            'optimizerG2': optimizerG2,
            'optimizerD1': optimizerD1,
            'optimizerD2': optimizerD2
        },
        epoch=0,
        loss=criterion,
        script_name="metalGAN2",
        load=load)

    args_bundle = dataloader64, dataloader128, netG1, netG2, netD1, netD2, \
                 optimizerG1, optimizerG2, optimizerD1, optimizerD2

    if train_from_start:
        checkpoint.epoch = 0
    start_at_epoch = checkpoint.epoch

    train_part_1(args_bundle, lists, checkpoint, fixed_noise, start_at_epoch)
    print("starting stage 2...")
    adjust_learning_rate(optimizerD1, lr)
    adjust_learning_rate(optimizerG1, lr)

    train_part_2(args_bundle, lists, checkpoint, fixed_noise, num_epochs,
                 max(10, start_at_epoch))

    adjust_learning_rate(optimizerD1, lr / 2)
    adjust_learning_rate(optimizerG1, lr / 2)
    adjust_learning_rate(optimizerD2, lr)
    adjust_learning_rate(optimizerG2, lr)

    train_part_2(args_bundle, lists, checkpoint, fixed_noise, num_epochs * 2,
                 max(20, start_at_epoch))

    checkpoint.save()
    show_off(lists, dataloader128)
    return 0
예제 #3
0
def createCheckpointsFromExperiment():

    print "Setting up private mode baselines"
    for np, wl, mem, simpoint, fw in buildPossibleParams():
        if np != 4:
            curWorkload = workloads.getBms(wl, np, True)
            for bm in curWorkload:
                actualPath = checkpoints.getCheckpointDirectory(
                    4, mem, bm, simpoint)
                checkPath = checkpoints.getCheckpointDirectory(
                    np, mem, bm, simpoint)
                checkFile = checkPath + "/m5.cpt"
                if not os.path.exists(checkFile):
                    print "Linking", actualPath, checkPath
                    os.symlink(actualPath, checkPath)

    print
    print "Generating multi-core checkpoints"
    for np, wl, mem, simpoint, fw in buildPossibleParams():
        if Checkpoint.prerequisiteFilesExist(wl, np, mem, simpoint):
            printParameters(np, wl, mem, simpoint, fw)
            path = Checkpoint.generateCheckpoint(wl, np, fw, mem, simpoint)
            print "Generated checkpoint at " + path
        else:
            print "Files needed for np " + str(
                np
            ) + ", workload " + wl + ", memsys " + mem + " and simpoint " + str(
                simpoint) + " not found"
            print "Skipping..."
    return 0
예제 #4
0
 def __init__(self, model):
     BaseEngine.__init__(self) 
     self.model = model
     with tf.Graph().as_default():
         with tf.device('/cpu:0'):
             self.feature_planes = tf.placeholder(tf.float32, shape=[None, self.model.N, self.model.N, self.model.Nfeat], name='feature_planes')
             self.probs_op = model.inference(self.feature_planes, self.model.N, self.model.Nfeat)
             saver = tf.train.Saver(tf.trainable_variables())
             init = tf.initialize_all_variables()
             self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
             self.sess.run(init)
             checkpoint_dir = os.path.join(model.train_dir, 'checkpoints')
             Checkpoint.restore_from_checkpoint(self.sess, saver, checkpoint_dir)
예제 #5
0
def load_and_compare():
    dataloader64, dataloader128 = prep_data()
    netG1, netG2, netD1, netD2 = create_nn()
    optimizerG1, optimizerG2, optimizerD1, optimizerD2 = \
        create_optimizers(netG1, netG2, netD1, netD2)

    fixed_noise = torch.randn(64, nz, 1, 1, device=device)

    checkpoint = Checkpoint(
        {
            'Generator_Stage_1': netG1,
            'Generator_Stage_2': netG2,
            'Discriminator_Stage_1': netD1,
            'Discriminator_Stage_2': netD2
        }, {
            'optimizerG1': optimizerG1,
            'optimizerG2': optimizerG2,
            'optimizerD1': optimizerD1,
            'optimizerD2': optimizerD2
        },
        epoch=0,
        loss=criterion,
        script_name="metalGAN2",
        load=True)

    compare_with_real(device,
                      dataloader128,
                      img_list=None,
                      Generator=netG1,
                      noise=fixed_noise)
예제 #6
0
def main():

    opts, args = parseAargs()

    print
    print "Automatic checkpoint generation for multiprogrammed workloads"
    print

    if opts.checkpointDestination:
        sys.exit(copyCheckpointFiles(opts.checkpointDestination, opts))

    if opts.fromExp:
        sys.exit(createCheckpointsFromExperiment())

    if opts.convertCheckpointFile != "":
        sys.exit(convertCheckpointFile(opts.convertCheckpointFile))

    if opts.test:
        sys.exit(testCheckpoints(opts.siminsts))

    simpoint = -1
    printParameters(opts.np, opts.workload, opts.memsys, simpoint,
                    opts.fwinsts)

    chkptPath = Checkpoint.generateCheckpoint(opts.workload, opts.np,
                                              opts.fwinsts, opts.memsys,
                                              simpoint)

    print
    print "Generated checkpoint at " + chkptPath
    print
예제 #7
0
    def __init__(self, eng_name, model, step=None):
        super(NNEngine,self).__init__(model.N)
        self.eng_name = eng_name
        self.model = model
        self.move_records = []
        self.move_probs = []

        # build the graph
        with tf.Graph().as_default():
            #with tf.device('/cpu:0'):
                self.feature_planes = tf.placeholder(tf.float32, shape=[None, self.model.N + 1, self.model.N + 1, self.model.Nfeat], name='feature_planes')
                self.logits = model.inference(self.feature_planes, self.model.N + 1, self.model.Nfeat)
                saver = tf.train.Saver(tf.trainable_variables())
                init = tf.global_variables_initializer()
                self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
                self.sess.run(init)
                checkpoint_dir = os.path.join(model.train_dir)
                Checkpoint.restore_from_checkpoint(self.sess, saver, checkpoint_dir, step)
예제 #8
0
 def __init__(self, model):
     BaseEngine.__init__(self)
     self.model = model
     with tf.Graph().as_default():
         with tf.device('/cpu:0'):
             self.feature_planes = tf.placeholder(
                 tf.float32,
                 shape=[None, self.model.N, self.model.N, self.model.Nfeat],
                 name='feature_planes')
             self.probs_op = model.inference(self.feature_planes,
                                             self.model.N, self.model.Nfeat)
             saver = tf.train.Saver(tf.trainable_variables())
             init = tf.initialize_all_variables()
             self.sess = tf.Session(config=tf.ConfigProto(
                 log_device_placement=False))
             self.sess.run(init)
             checkpoint_dir = os.path.join(model.train_dir, 'checkpoints')
             Checkpoint.restore_from_checkpoint(self.sess, saver,
                                                checkpoint_dir)
예제 #9
0
    def __init__(self, eng_name, model):
        super(TFEngine,self).__init__() 
        self.eng_name = eng_name
        self.model = model
        self.book = Book.load_GoGoD_book()

        self.last_move_probs = np.zeros((self.model.N, self.model.N,))
        self.kibitz_mode = False

        # build the graph
        with tf.Graph().as_default():
            with tf.device('/cpu:0'):
                self.feature_planes = tf.placeholder(tf.float32, shape=[None, self.model.N, self.model.N, self.model.Nfeat], name='feature_planes')
                self.logits = model.inference(self.feature_planes, self.model.N, self.model.Nfeat)
                saver = tf.train.Saver(tf.trainable_variables())
                init = tf.initialize_all_variables()
                self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
                self.sess.run(init)
                checkpoint_dir = os.path.join(model.train_dir, 'checkpoints')
                Checkpoint.restore_from_checkpoint(self.sess, saver, checkpoint_dir)
 def calc(self, atomlist, do_Qeq=False, charge=0, multiplicity=1):
     """do the actual calculation and parse the results"""
     self.atomlist = atomlist
     if self.embedding == "electrostatic" and self.have_charges == False:
         do_Qeq = True
         self.have_charges = True
     if do_Qeq == True:
         # The charge equilibration with Qeq should be performed only once.
         # For repeated calculations (dynamics, optimization) the partial
         # charges are set to 0, and the electrostatic interaction is added
         # later.
         route = "#P UFF=Qeq Force NoSymm Geom=(Connectivity, NoCrowd)"
     else:
         route = "#P UFF Force NoSymm Geom=(Connectivity, NoCrowd)"
     write_input(self.com_file,
                 self.atomlist,
                 chk_file=self.chk_file,
                 route=route,
                 connectivity=self.connectivity,
                 charge=charge,
                 multiplicity=multiplicity,
                 title="compute energies and gradients")
     # execute g09
     if self.verbose > 0:
         print "Gaussian 09 ..."
         print "  route: %s" % route
     ret = os.system("g09 < %s 2>&1 > %s" % (self.com_file, self.log_file))
     error_msg = "G09 Calculation failed! Check the log-file %s" % self.log_file
     assert ret == 0, error_msg
     # create formatted checkpoint file
     ret = os.system("formchk %s 2>&1 > /dev/null" % self.chk_file)
     assert ret == 0, "ERROR: formchk failed!"
     # parse the checkpoint file and extract:
     #   - the total energy
     #   - the forces
     #   - the partial MM charges from the charge equilibration
     Data = Checkpoint.parseCheckpointFile(self.fchk_file)
     # save results for later use
     self.mm_energy = Data["_Total_Energy"]
     # The gradient does not contain the contribution from electrostatics
     self.mm_gradient = Data["_Cartesian_Gradient"]
     if do_Qeq == True:
         # At the first step, when Qeq is performed
         # gradient and energy contain electrostatic interactions, already
         self.mm_charges = Data["_MM_charges"]
     elif self.embedding == "electrostatic":
         # add the electrostatic energy
         enCoul, gradCoul = self._electrostatics()
         self.mm_energy += enCoul
         self.mm_gradient += gradCoul
예제 #11
0
파일: Train.py 프로젝트: TheDuck314/go-NN
def train_model(model, N, Nfeat, build_feed_dict, normalization, loss_func, train_data_dir, val_data_dir, lr_base, lr_half_life, max_steps, just_validate=False):
    with tf.Graph().as_default():
        # build the graph
        learning_rate_ph = tf.placeholder(tf.float32)
        momentum_ph = tf.placeholder(tf.float32)
        feature_planes = tf.placeholder(tf.float32, shape=[None, N, N, Nfeat])

        model_outputs = model.inference(feature_planes, N, Nfeat)
        outputs_ph, total_loss, accuracy = loss_func(model_outputs)
        train_op = train_step(total_loss, learning_rate_ph, momentum_ph)

        saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=5, keep_checkpoint_every_n_hours=2.0)

        init = tf.initialize_all_variables()
        sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
        sess.run(init)

        summary_writer = tf.train.SummaryWriter(os.path.join(model.train_dir, 'summaries', datetime.now().strftime('%Y%m%d-%H%M%S')), graph=sess.graph, flush_secs=5)
        accuracy_avg = MovingAverage('accuracy', time_constant=1000)
        total_loss_avg = MovingAverage('total_loss', time_constant=1000)

        def run_validation(): # run the validation set
            val_loader = NPZ.Loader(val_data_dir)
            mean_loss = 0.0
            mean_accuracy = 0.0
            mb_num = 0
            print "Starting validation..."
            while val_loader.has_more():
                if mb_num % 100 == 0: print "validation minibatch #%d" % mb_num
                feed_dict = build_feed_dict(val_loader, normalization, feature_planes, outputs_ph)
                loss_value, accuracy_value = sess.run([total_loss, accuracy], feed_dict=feed_dict)
                mean_loss += loss_value
                mean_accuracy += accuracy_value
                mb_num += 1
            mean_loss /= mb_num
            mean_accuracy /= mb_num
            print "Validation: mean loss = %.3f, mean accuracy = %.2f%%" % (mean_loss, 100*mean_accuracy)
            summary_writer.add_summary(make_summary('validation_loss', mean_loss), step)
            summary_writer.add_summary(make_summary('validation_accuracy_percent', 100*mean_accuracy), step)
    
        last_training_loss = None

        if just_validate: # Just run the validation set once
            Checkpoint.restore_from_checkpoint(sess, saver, model.train_dir)
            run_validation()
        else: # Run the training loop
            #step = 0
            step = Checkpoint.optionally_restore_from_checkpoint(sess, saver, os.path.join(model.train_dir, 'checkpoints'))
            #step = optionally_restore_from_checkpoint(sess, saver, model.train_dir)
            #print "WARNING: CHECKPOINTS TURNED OFF!!"
            print "WARNING: WILL STOP AFTER %d STEPS" % max_steps
            print "WARNING: IGNORING lr.txt and momentum.txt"
            print "lr_base = %f, lr_half_life = %f" % (lr_base, lr_half_life)
            #loader = NPZ.AsyncRandomizingLoader(train_data_dir, minibatch_size=128)
            minibatch_size = 128
            batch_queue = EvalTraining.AsyncRandomBatchQueue(feature_planes, outputs_ph, train_data_dir, minibatch_size, normalization)
            #loader = NPZ.RandomizingLoader(train_data_dir, minibatch_size=128)
            #loader = NPZ.GroupingRandomizingLoader(train_data_dir, Ngroup=1)
            #loader = NPZ.SplittingRandomizingLoader(train_data_dir, Nsplit=2)
            last_step_ref_time = 0
            while True:
                if step % 10000 == 0 and step != 0: 
                    run_validation()

                start_time = time.time()
                #feed_dict = build_feed_dict(loader, normalization, feature_planes, outputs_ph)
                feed_dict = batch_queue.next_feed_dict()
                load_time = time.time() - start_time

                if step % 1 == 0:
                    #learning_rate = read_float_from_file('../work/lr.txt', default=0.1)
                    #momentum = read_float_from_file('../work/momentum.txt', default=0.9)
                    if step < 100:
                        learning_rate = 0.0003 # to stabilize initially
                    else:
                        learning_rate = lr_base * 0.5**(float(step-100)/lr_half_life)
                    momentum = 0.9
                    summary_writer.add_summary(make_summary('learningrate', learning_rate), step)
                    summary_writer.add_summary(make_summary('momentum', momentum), step)
                feed_dict[learning_rate_ph] = learning_rate
                feed_dict[momentum_ph] = momentum
    
                start_time = time.time()
                _, loss_value, accuracy_value, outputs_value = sess.run([train_op, total_loss, accuracy, model_outputs], feed_dict=feed_dict)
                train_time = time.time() - start_time

                total_loss_avg.add(loss_value)
                accuracy_avg.add(100 * accuracy_value)
                #print "outputs_value ="
                #print outputs_value.flatten()
                #print "feed_dict[outputs_ph] ="
                #print feed_dict[outputs_ph].flatten()

                if np.isnan(loss_value):
                    print "Model diverged with loss = Nan"
                    return
                #assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

                if step >= max_steps: 
                    return

                if step % 10 == 0:
                    total_loss_avg.write(summary_writer, step)
                    accuracy_avg.write(summary_writer, step)

                full_step_time = time.time() - last_step_ref_time
                last_step_ref_time = time.time()

                if step % 1 == 0:
                    minibatch_size = feed_dict[feature_planes].shape[0]
                    examples_per_sec = minibatch_size / full_step_time
                    print "%s: step %d, lr=%.6f, mom=%.2f, loss = %.4f, accuracy = %.2f%% (mb_size=%d, %.1f examples/sec), (load=%.3f train=%.3f total=%0.3f sec/step)" % \
                            (datetime.now(), step, learning_rate, momentum, loss_value, 100*accuracy_value, minibatch_size, examples_per_sec, load_time, train_time, full_step_time)
                    if step % 10 == 0:
                        summary_writer.add_summary(make_summary('examples/sec', examples_per_sec), step)
                        summary_writer.add_summary(make_summary('step', step), step)
    
                if step % 1000 == 0 and step != 0:
                    #print "WARNING: CHECKPOINTS TURNED OFF!!"
                    saver.save(sess, os.path.join(model.train_dir, "checkpoints", "model.ckpt"), global_step=step)

                step += 1
예제 #12
0
def train(game_type, agent_type, annealer=None):
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))

    agent = agent_type(sess)
    agents = [agent] * game_type.get_num_agents()

    # hyperparameters
    gamma = 0.99
    gae_lambda = 0.95
    learning_rate = 0.0003
    #    learning_rate = 0.0001
    epsilon = 0.1  # ppo parameter TODO: fiddle with me
    value_loss_coef = 0.3
    examples_per_iteration = 1000
    minibatch_size = 100  # experiences in each training batch
    epochs = 3
    hyper_string = "ppo_lr{}_ep{}_vc{}_eit{}_mb{}_ep{}".format(
        learning_rate, epsilon, value_loss_coef, examples_per_iteration,
        minibatch_size, epochs)

    # set up the training operations in tensorflow
    advantage_ph = tf.placeholder(tf.float32, shape=[None],
                                  name='advantage')  # shape: (batch size,)
    old_log_p_chosen_action_ph = tf.placeholder(
        tf.float32, shape=[None],
        name='old_log_p_chosen_action')  # shape: (batch size,)
    log_p_chosen_action_op = agent.get_log_p_chosen_action_op()
    p_ratio = tf.exp(log_p_chosen_action_op - old_log_p_chosen_action_ph)
    clipped_p_ratio = tf.clip_by_value(p_ratio, 1.0 - epsilon, 1.0 + epsilon)
    policy_loss = -tf.reduce_sum(
        tf.minimum(advantage_ph * p_ratio, advantage_ph * clipped_p_ratio))
    # train value function by gradient descent on [(value est) - (cum future reward)] ** 2
    reward_ph = tf.placeholder(tf.float32, shape=[None],
                               name='reward')  # shape: (batch size,)
    value_op = agent.get_value_op()
    value_mse = tf.reduce_sum(tf.square(reward_ph - value_op))
    value_mse_sum = tf.summary.scalar(
        "value_mse", tf.reduce_mean(tf.square(reward_ph - value_op)))
    # put policy and value loss together to get total loss
    total_loss = policy_loss + value_loss_coef * value_mse  # could optionally add an entropy loss to encourage exploration
    learning_rate_ph = tf.placeholder(tf.float32, name="learning_rate")
    train_op = tf.train.AdamOptimizer(learning_rate_ph).minimize(total_loss)

    sess.run(tf.global_variables_initializer())

    exp_buf = []
    rew_buf = []
    adv_buf = []

    prr = PeriodicReplayWriter(game_type=game_type,
                               agents=agents,
                               period=10,
                               outdir="/home/greg/coding/ML/rlgames/replays")

    merged_sum_op = tf.summary.merge_all()
    log_dir = os.path.join(
        "/home/greg/coding/ML/rlgames/logs",
        datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + "_" + hyper_string)
    sum_wri = tf.summary.FileWriter(log_dir, graph=sess.graph, flush_secs=5)

    saver = tf.train.Saver(tf.trainable_variables(),
                           max_to_keep=5,
                           keep_checkpoint_every_n_hours=0.5)
    ckpt_dir = os.path.join("/home/greg/coding/ML/rlgames", "checkpoints")
    steps_between_ckpts = 5000
    step = Checkpoint.optionally_restore_from_checkpoint(sess, saver, ckpt_dir)
    last_ckpt_step = step

    game_frames = 0
    train_frames = 0
    time_tracker = TimeTracker()

    iteration = 0
    while True:
        if annealer:
            annealer.frame(step)

        time_tracker.start("prr")
        prr.maybe_write(iteration)
        time_tracker.end("prr")

        sampler = Sampler()  # stores the examples we'll use in this iteration

        # play games until we have enough examples to do a round of optimization
        print "iteration {}: playing games...".format(iteration)
        while sampler.num_examples < examples_per_iteration:
            # play a game and remember the experiences and rewards
            # If there's more than one player, the same agent is used for all of them
            # so the agent had better not be something with state like an RNN. Then
            # the experiences of all players are used for training.
            time_tracker.start("game")
            game = game_type()
            result = GameLoop.play_game(game, agents)
            time_tracker.end("game")
            game_frames += len(result.episodes[0].experiences)

            #print result.episodes[0]
            for ep in result.episodes:
                # remember each frame as an example to train on later
                ep_rewards = ep.compute_cum_discounted_future_rewards(
                    gamma=gamma)
                ep_advs = ep.compute_generalized_advantage_ests(
                    gamma, gae_lambda)
                ep_log_p_actions = np.array(
                    [exp.log_p_action for exp in ep.experiences])
                ep_feed_dict = {
                    reward_ph: ep_rewards,
                    advantage_ph: ep_advs,
                    old_log_p_chosen_action_ph: ep_log_p_actions
                }
                ep_feed_dict.update(agent.make_train_feed_dict(ep.experiences))
                sampler.add_examples(ep_feed_dict)

                # record some stats
                ep_undisc_rewards = ep.compute_cum_discounted_future_rewards(
                    gamma=1.0)
                sum_wri.add_summary(make_summary("disc_rew", ep_rewards[0]),
                                    global_step=step)
                sum_wri.add_summary(make_summary("undisc_rew",
                                                 ep_undisc_rewards[0]),
                                    global_step=step)
                sum_wri.add_summary(make_summary("init_value_est",
                                                 ep.experiences[0].value_est),
                                    global_step=step)
                sum_wri.add_summary(make_summary("init_value_mse",
                                                 (ep.experiences[0].value_est -
                                                  ep_rewards[0])**2),
                                    global_step=step)
            sum_wri.add_summary(make_summary(
                "game_length", len(result.episodes[0].experiences)),
                                global_step=step)
            sum_wri.add_summary(make_summary(
                "total_undisc_rew",
                sum(
                    sum(exp.reward for exp in ep.experiences)
                    for ep in result.episodes)),
                                global_step=step)

        # do a few epochs of optimization on the examples
        print "iteration {}: starting training...".format(iteration)
        time_tracker.start("train")
        for epoch in range(epochs):
            for mb_i, minibatch_fd in enumerate(
                    sampler.get_minibatches(minibatch_size)):
                #print "begin iteration {} epoch {} minibatch {}".format(iteration, epoch, mb_i)
                minibatch_fd[learning_rate_ph] = learning_rate
                #print "minibatch_fd =\n{}".format(minibatch_fd)
                #print "debug before train step:"
                #agent.print_debug_info()
                [_, sums] = sess.run([train_op, merged_sum_op],
                                     feed_dict=minibatch_fd)
                #print "debug after train step:"
                #agent.print_debug_info()
                sum_wri.add_summary(sums, global_step=step)
                step += minibatch_size
                train_frames += minibatch_size
        time_tracker.end("train")

        iteration += 1
        cur_time = time.time()
        print "iteration {}: finished training.".format(iteration)
        game_seconds = time_tracker.part_seconds["game"]
        train_seconds = time_tracker.part_seconds["train"]
        prr_seconds = time_tracker.part_seconds["prr"]
        total_seconds = time_tracker.get_total_seconds()
        other_seconds = total_seconds - train_seconds - game_seconds - prr_seconds
        print "game  frames = {}  game  seconds = {:.1f}s  game  frames per second = {:.1f}".format(
            game_frames, game_seconds, game_frames / game_seconds)
        print "train frames = {}  train seconds = {:.1f}s  train frames per second = {:.1f}".format(
            train_frames, train_seconds, train_frames / train_seconds)
        print "total time = {:.1f}s  game {:.1f}% train {:.1f}% prr {:.1f}% other {:.1f}%".format(
            total_seconds, 100 * game_seconds / total_seconds,
            100 * train_seconds / total_seconds,
            100 * prr_seconds / total_seconds,
            100 * other_seconds / total_seconds)

        if step - last_ckpt_step >= steps_between_ckpts:
            saver.save(sess,
                       os.path.join(ckpt_dir, "model.ckpt"),
                       global_step=step)
            last_ckpt_step = step
예제 #13
0
def game(level, screen):

    score = 0
    boolean = False
    time_initial = pygame.time.get_ticks()
    clock = pygame.time.Clock()
    music_selection(level)

    #Level selection
    if level.id == 0:
        point1 = Checkpoint(842, 50)
        point2 = Checkpoint(130, 480)
        screen.blit(intro1.image, intro1.rect)
        pygame.display.update()
        while True:  # wait for user to acknowledge and return
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    pygame.quit()
                    sys.exit()
                if event.type == pygame.KEYDOWN and event.key in [pygame.K_RETURN, pygame.K_KP_ENTER,
                                                                  pygame.K_BACKSPACE]:
                    boolean = True
                    break
            if boolean:
                break
            pygame.time.wait(20)
    elif level.id == 1:
        point1 = Checkpoint(730, 30)
        point2 = Checkpoint(858, 500)
        screen.blit(intro2.image, intro2.rect)
        pygame.display.update()
        while True:  # wait for user to acknowledge and return
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    pygame.quit()
                    sys.exit()
                if event.type == pygame.KEYDOWN and event.key in [pygame.K_RETURN, pygame.K_KP_ENTER,
                                                                  pygame.K_BACKSPACE]:
                    boolean = True
                    break
            if boolean:
                break
            pygame.time.wait(20)
    else:
        point1 = Checkpoint(20, 70)
        point2 = Checkpoint(900, 400)
        screen.blit(intro3.image, intro3.rect)
        pygame.display.update()
        while True:  # wait for user to acknowledge and return
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    pygame.quit()
                    sys.exit()
                if event.type == pygame.KEYDOWN and event.key in [pygame.K_RETURN, pygame.K_KP_ENTER,
                                                                  pygame.K_BACKSPACE]:
                    boolean = True
                    break
            if boolean:
                break
            pygame.time.wait(20)

    level.mapa(screen)
    screen.blit(point1.image, point1.rect)
    screen.blit(point2.image, point2.rect)


    #Initialization variables
    player_x0, player_y0, player_angle = player_constants(level)
    heli_x0, heli_y0, heli_angle, patrol_radius = heli_constants(level)

    #Terrain parameters
    angle_step = 7.5
    terrain_factor = 1
    cont = 0
    Font = pygame.font.SysFont("arial", 20, True)
    txt_surf = Font.render("", True, WHITE)

    #Class initialization
    object_group = pygame.sprite.Group()
    player = Player(player_x0, player_y0, player_angle)
    heli = Heli(1, SCREENHEIGHT-1, heli_angle)
    second_heli = Heli(heli_x0, heli_y0, heli_angle)
    capivara = Capivara()
    soldier = Soldier()
    object_group.add(heli)
    object_group.add(second_heli)
    object_group.add(capivara)
    object_group.draw(screen)
    screen.blit(player.image, player.rect)
    if soldier.state:
        screen.blit(soldier.image, soldier.rect)

    #Game Over criteria
    game_over = GameOver()

    while not game_over.state:
        # GET EVENT
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                sys.exit()
            elif event.type == KEYDOWN and event.key == K_p:
                pause(screen)
            player.handle_event(event)
        #Handle terrain
        level.mapa(screen)
        terrain_factor = measure_terrain(player, level, screen)

        if cont == 0:
            Font = pygame.font.SysFont("arial", 20, True)
            txt_surf = Font.render("", True, WHITE)
        else:
            cont -= 1

        # Atualização de Score e Verificação de Flags das etapas dos Jogos
        if level.verificarmissao(player, point1, point2):
            time_flag = pygame.time.get_ticks()
            Font = pygame.font.SysFont("arial", 24, True)
            txt_surf = Font.render("CHECKPOINT ACEITO", True, WHITE)
            cont = 45
            score += int(1000 - 5 * (time_flag / 1000 - time_initial / 1000))  # modelo de Score

        if level.vencedor():
            pygame.mixer.music.stop()
            arq = level.file()
            get_score(screen, arq, score)
            break

        #Player movement
        player.move(terrain_factor, angle_step)

        #Bot reaction
        '''bot_1.follow(player.x, player.y)
        bot_2.follow(player.x, player.y)'''
        capivara.time_counter(level, screen, SCREENHEIGHT)
        soldier.time_counter(level, screen, SCREENHEIGHT)
        player.update_pos(angle_step)
        heli.follow(player.x, player.y)
        second_heli.patrol(heli_x0, heli_y0, patrol_radius)
        heli.update_pos(player.x)
        second_heli.update_pos(player.x)
        level.mapa(screen)
        screen.blit(player.image, player.rect)
        screen.blit(point1.image, point1.rect)
        screen.blit(point2.image, point2.rect)
        screen.blit(txt_surf, (600, 450))
        object_group.draw(screen)
        if soldier.state:
            screen.blit(soldier.image, soldier.rect)

        #Game over verification
        object_group.add(soldier)
        game_over.measure_state(player, object_group)
        object_group.remove(soldier)

        #Screen update
        pygame.display.update()
        clock.tick(20)  # Time do relógio

    if game_over.state:
        pygame.mixer.music.stop()
        screen.blit(gameover.image, gameover.rect)
        pygame.display.update()
        game_over_sound()
        while True:  # wait for user to acknowledge and return
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    pygame.quit()
                    sys.exit()
                if event.type == pygame.KEYDOWN and event.key in [pygame.K_RETURN, pygame.K_KP_ENTER,
                                                                  pygame.K_BACKSPACE]:
                    return
            pygame.time.wait(20)
예제 #14
0
def train_model(model, N, Nfeat, lr_base,
                lr_half_life, max_steps, minibatch_size, engine_strength, just_validate=False):
    with tf.Graph().as_default():
        # build the graph
        learning_rate_ph = tf.placeholder(tf.float32)
        grad_cap = tf.placeholder(tf.float32)
        feature_planes = tf.placeholder(tf.float32, shape=[None, N + 1, N + 1, Nfeat])

        model_outputs = model.inference(feature_planes, N + 1, Nfeat)
        label_op = tf.placeholder(tf.int64, shape=[None])
        outcome_op = tf.placeholder(tf.float32, shape=[None])
        cross_entropy_err = loss_func(model_outputs, label_op)
        train_op, grads_vars_op, dir_grads_vars_op = train_step(cross_entropy_err, learning_rate_ph, outcome_op, grad_cap)

        saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=800, keep_checkpoint_every_n_hours=0.5)

        init = tf.global_variables_initializer()
        sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
        sess.run(init)


        last_training_loss = None

        if just_validate:  # Just run the validation set once
            step = Checkpoint.restore_from_checkpoint(sess, saver, model.train_dir)
            run_validation(step, engine_strength)
        else:  # Run the training loop

            # step = 0
            step = Checkpoint.optionally_restore_from_checkpoint(sess, saver, os.path.join(model.train_dir))
            saver.save(sess, os.path.join(model.train_dir, "model.ckpt"), global_step=step)


            print("WARNING: WILL STOP AFTER %d STEPS" % max_steps)
            print("lr_base = %f, lr_half_life = %f" % (lr_base, lr_half_life))
            # loader = NPZ.AsyncRandomizingLoader(train_data_dir, minibatch_size=128)

            batch_queue = SelfPlayGenerator.AsyncRandomGamePlayQueue()

            last_step_ref_time = 0
            while True:


                if step % (10 * minibatch_size) == 0 and step != 0:
                    win_rate = run_validation(step, engine_strength)
                    if (win_rate > 0.90) and (engine_strength >= 0.05):
                        engine_strength = engine_strength - 0.05

                start_time = time.time()

                checkpoint_paths = saver.last_checkpoints
                versions = [int(checkpoint_paths[ind].split('/')[-1].split('-')[-1]) for ind in
                            range(len(checkpoint_paths))]

                batch_queue.start_gen_game_play(feature_planes, label_op, outcome_op, minibatch_size, versions, model)


                if step < 100:
                    learning_rate = 0.0003  # to stabilize initially
                else:
                    learning_rate = lr_base * 0.5 ** (float(step - 110000) / lr_half_life)
                #summary_writer.add_summary(make_summary('learningrate', learning_rate), step)
                #summary_writer.add_summary(make_summary('momentum', momentum), step)

                mean_loss = 0.0
                counter = 0

                while True:
                    feed_dict = batch_queue.next_feed_dict()
                    if (feed_dict == None):
                        break
                    feed_dict[learning_rate_ph] = learning_rate
                    feed_dict[grad_cap] = learning_rate * 1.0
                    _, loss_value = sess.run(
                        [train_op, cross_entropy_err],
                        feed_dict=feed_dict)

                    # _, loss_value, model_out, label_out = sess.run(
                    #     [train_op, cross_entropy_err, model_outputs, label_op],
                    #     feed_dict=feed_dict)
                    #
                    # e_x = np.exp(model_out - np.max(model_out))
                    # e_x = e_x / e_x.sum()
                    # print("Model out: \n", e_x)
                    # print("True out: \n", label_out)
                    # print("Loss: ", loss_value)
                    #
                    # if (loss_value > 30):
                    #     return

                    mean_loss += loss_value
                    counter += 1

                mean_loss /= counter

                # _, loss_value, outputs_value, grads_vars, dir_grads_vars, outcome = sess.run([train_op, cross_entropy_err, model_outputs,
                #                                          grads_vars_op, dir_grads_vars_op, outcome_op],
                #                                                             feed_dict=feed_dict)
                train_time = time.time() - start_time

                if step >= max_steps:
                    return

                #if step % 10 == 0:
                #    total_loss_avg.write(summary_writer, step)
                #    accuracy_avg.write(summary_writer, step)

                full_step_time = time.time() - last_step_ref_time
                last_step_ref_time = time.time()

                if step % 10 == 0:
                    print("%s: step %d, lr=%f, loss = %.2f, (train=%.3f s/%d)" % \
                    (datetime.now(), step, learning_rate, mean_loss,
                        train_time, minibatch_size))

                step += minibatch_size

                if step % minibatch_size == 0 and step != 0:
                    saver.save(sess, os.path.join(model.train_dir, "model.ckpt"), global_step=step)
예제 #15
0
def train_model(model, N, Nfeat, lr_base,
                lr_half_life, max_steps, just_validate=False):
    with tf.Graph().as_default():
        # build the graph
        learning_rate_ph = tf.placeholder(tf.float32)
        feature_planes = tf.placeholder(tf.float32, shape=[None, N + 1, N + 1, Nfeat])

        model_outputs = model.inference(feature_planes, N + 1, Nfeat)
        true_outputs = tf.placeholder(tf.float32, shape=[None, (N+1)*(N+1)*2])
        loss = loss_func(model_outputs, true_outputs)
        train_op = train_step(loss, learning_rate_ph)

        saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=20, keep_checkpoint_every_n_hours=0.5)

        init = tf.global_variables_initializer()
        sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
        sess.run(init)

        ## initialize gen board queue
        batch_queue = SelfPlayGenerator.AsyncRandomBoardQueue(model.N, feature_planes, true_outputs)

        last_training_loss = None

        def run_validation(step, model, queue):
            # play 100 games with different level of AI / previous version NN
            print("Run validation")
            num_board = 100
            total_err = 0

            for i in range(num_board):

                feed_dict = queue.next_feed_dict()
                loss_value, model_out, true_out = sess.run([loss, model_outputs, true_outputs], feed_dict=feed_dict)
                total_err += loss_value

                if (i % 50 == 0):
                    e_x = np.exp(model_out - np.max(model_out))
                    e_x = e_x / e_x.sum()
                    print("Model out: \n", e_x)
                    print("True out: \n", true_out)

            total_err /= num_board

            print("Validation error: ", total_err)

        if just_validate:  # Just run the validation set once
            step = Checkpoint.restore_from_checkpoint(sess, saver, model.train_dir)
            run_validation(step, model, batch_queue)
        else:  # Run the training loop

            step = Checkpoint.optionally_restore_from_checkpoint(sess, saver, os.path.join(model.train_dir))
            saver.save(sess, os.path.join(model.train_dir, "model.ckpt"), global_step=step)


            print("WARNING: WILL STOP AFTER %d STEPS" % max_steps)
            print("lr_base = %f, lr_half_life = %f" % (lr_base, lr_half_life))

            last_step_ref_time = 0


            test_size = 2000
            save_size = 500

            while True:

                if step % test_size == 0 and step != 0:
                    run_validation(step, model, batch_queue)

                if step < 100:
                    learning_rate = 0.0003  # to stabilize initially
                else:
                    learning_rate = lr_base * 0.5 ** (float(step - 100) / lr_half_life)

                #if (step % 20 == 0):
                #    print("Step = ", step)

                start_time = time.time()

                feed_dict = batch_queue.next_feed_dict()
                feed_dict[learning_rate_ph] = learning_rate
                _, loss_value = sess.run(
                        [train_op, loss],
                        feed_dict=feed_dict)

                train_time = time.time() - start_time

                #print(model_out)
                #print(true_out)

                if step >= max_steps:
                    return

                if step % 100 == 0:
                    print("%s: step %d, lr=%.6f, loss = %.4f, (train=%.3f sec/ step)" % \
                    (datetime.now(), step, learning_rate, loss_value, train_time))

                step += 1

                if step % save_size == 0 and step != 0:
                    saver.save(sess, os.path.join(model.train_dir, "model.ckpt"), global_step=step)
예제 #16
0
            except:
                return None
        row += 1
        if row == 9:
            return blocks
    return None


if len(sys.argv) > 1:
    for i in range(1, len(sys.argv)):
        try:
            file = open(sys.argv[i], 'r')
            read = read_file(file)
            file.close()
            if read is not None:
                checkpoint = Checkpoint(read)
                blocks = checkpoint.check()
                if blocks is not None:
                    row_print = ""
                    for block in blocks:
                        row_print += str(block.get_num())
                        if block.get_column() == 8:
                            row_print += "\n"
                    print(row_print)
                else:
                    print(sys.argv[i] + " is not a correct Sudoku")
            else:
                print(sys.argv[i] +
                      " must have 9 lines, each line having 9 digits")
        except:
            print(sys.argv[i] + " cannot be opened")
예제 #17
0
def train_model(model, N, Nfeat, build_feed_dict, normalization, loss_func, train_data_dir, val_data_dir, lr_base,
                lr_half_life, max_steps, just_validate=False):
    with tf.Graph().as_default():
        # build the graph
        learning_rate_ph = tf.placeholder(tf.float32)
        momentum_ph = tf.placeholder(tf.float32)
        feature_planes = tf.placeholder(tf.float32, shape=[None, N, N, Nfeat])

        model_outputs = model.inference(feature_planes, N, Nfeat)
        outputs_ph, total_loss, accuracy = loss_func(model_outputs)
        train_op = train_step(total_loss, learning_rate_ph, momentum_ph)

        saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=5, keep_checkpoint_every_n_hours=2.0)

        init = tf.initialize_all_variables()
        sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
        sess.run(init)

        summary_writer = tf.train.SummaryWriter(
            os.path.join(model.train_dir, 'summaries', datetime.now().strftime('%Y%m%d-%H%M%S')), graph=sess.graph,
            flush_secs=5)
        accuracy_avg = MovingAverage('accuracy', time_constant=1000)
        total_loss_avg = MovingAverage('total_loss', time_constant=1000)

        def run_validation():  # run the validation set
            val_loader = NPZ.Loader(val_data_dir)
            mean_loss = 0.0
            mean_accuracy = 0.0
            mb_num = 0
            print "Starting validation..."
            while val_loader.has_more():
                if mb_num % 100 == 0: print "validation minibatch #%d" % mb_num
                feed_dict = build_feed_dict(val_loader, normalization, feature_planes, outputs_ph)
                loss_value, accuracy_value = sess.run([total_loss, accuracy], feed_dict=feed_dict)
                mean_loss += loss_value
                mean_accuracy += accuracy_value
                mb_num += 1
            mean_loss /= mb_num
            mean_accuracy /= mb_num
            print "Validation: mean loss = %.3f, mean accuracy = %.2f%%" % (mean_loss, 100 * mean_accuracy)
            summary_writer.add_summary(make_summary('validation_loss', mean_loss), step)
            summary_writer.add_summary(make_summary('validation_accuracy_percent', 100 * mean_accuracy), step)

        last_training_loss = None

        if just_validate:  # Just run the validation set once
            Checkpoint.restore_from_checkpoint(sess, saver, model.train_dir)
            run_validation()
        else:  # Run the training loop
            # step = 0
            step = Checkpoint.optionally_restore_from_checkpoint(sess, saver,
                                                                 os.path.join(model.train_dir, 'checkpoints'))
            # step = optionally_restore_from_checkpoint(sess, saver, model.train_dir)
            # print "WARNING: CHECKPOINTS TURNED OFF!!"
            print "WARNING: WILL STOP AFTER %d STEPS" % max_steps
            print "WARNING: IGNORING lr.txt and momentum.txt"
            print "lr_base = %f, lr_half_life = %f" % (lr_base, lr_half_life)
            # loader = NPZ.AsyncRandomizingLoader(train_data_dir, minibatch_size=128)
            minibatch_size = 128
            batch_queue = EvalTraining.AsyncRandomBatchQueue(feature_planes, outputs_ph, train_data_dir, minibatch_size,
                                                             normalization)
            # loader = NPZ.RandomizingLoader(train_data_dir, minibatch_size=128)
            # loader = NPZ.GroupingRandomizingLoader(train_data_dir, Ngroup=1)
            # loader = NPZ.SplittingRandomizingLoader(train_data_dir, Nsplit=2)
            last_step_ref_time = 0
            while True:
                if step % 10000 == 0 and step != 0:
                    run_validation()

                start_time = time.time()
                # feed_dict = build_feed_dict(loader, normalization, feature_planes, outputs_ph)
                feed_dict = batch_queue.next_feed_dict()
                load_time = time.time() - start_time

                if step % 1 == 0:
                    # learning_rate = read_float_from_file('../work/lr.txt', default=0.1)
                    # momentum = read_float_from_file('../work/momentum.txt', default=0.9)
                    if step < 100:
                        learning_rate = 0.0003  # to stabilize initially
                    else:
                        learning_rate = lr_base * 0.5 ** (float(step - 100) / lr_half_life)
                    momentum = 0.9
                    summary_writer.add_summary(make_summary('learningrate', learning_rate), step)
                    summary_writer.add_summary(make_summary('momentum', momentum), step)
                feed_dict[learning_rate_ph] = learning_rate
                feed_dict[momentum_ph] = momentum

                start_time = time.time()
                _, loss_value, accuracy_value, outputs_value = sess.run([train_op, total_loss, accuracy, model_outputs],
                                                                        feed_dict=feed_dict)
                train_time = time.time() - start_time

                total_loss_avg.add(loss_value)
                accuracy_avg.add(100 * accuracy_value)
                # print "outputs_value ="
                # print outputs_value.flatten()
                # print "feed_dict[outputs_ph] ="
                # print feed_dict[outputs_ph].flatten()

                if np.isnan(loss_value):
                    print "Model diverged with loss = Nan"
                    return
                # assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

                if step >= max_steps:
                    return

                if step % 10 == 0:
                    total_loss_avg.write(summary_writer, step)
                    accuracy_avg.write(summary_writer, step)

                full_step_time = time.time() - last_step_ref_time
                last_step_ref_time = time.time()

                if step % 1 == 0:
                    minibatch_size = feed_dict[feature_planes].shape[0]
                    examples_per_sec = minibatch_size / full_step_time
                    print "%s: step %d, lr=%.6f, mom=%.2f, loss = %.4f, accuracy = %.2f%% (mb_size=%d, %.1f examples/sec), (load=%.3f train=%.3f total=%0.3f sec/step)" % \
                          (datetime.now(), step, learning_rate, momentum, loss_value, 100 * accuracy_value,
                           minibatch_size, examples_per_sec, load_time, train_time, full_step_time)
                    if step % 10 == 0:
                        summary_writer.add_summary(make_summary('examples/sec', examples_per_sec), step)
                        summary_writer.add_summary(make_summary('step', step), step)

                if step % 1000 == 0 and step != 0:
                    # print "WARNING: CHECKPOINTS TURNED OFF!!"
                    saver.save(sess, os.path.join(model.train_dir, "checkpoints", "model.ckpt"), global_step=step)

                step += 1
예제 #18
0
def BatchNorm(Channels):
    RawBatchNorm = cp.CpBatchNorm2d(Channels)
    RawBatchNorm.weight.data.fill_(1)
    RawBatchNorm.bias.data.fill_(0)
    return RawBatchNorm