Esempio n. 1
0
 def do_train(self):
     if self.push_done < self.arg_dict['train_memory']:
         return
     feed_dict = {}
     for k in self.train_ph_dict:
         feed_dict[self.train_ph_dict[k]] = self.queue[k]
     for k in self.var_ph_dict:
         feed_dict[self.var_ph_dict[k]] = self.var_dict[k].eval(self.sess)
     _, loss, score_diff = self.sess.run(
         [self.train, self.loss, self.score_diff], feed_dict=feed_dict)
     logging.debug('ZPDDPYFD loss ' + str(loss) + ' ' + str(score_diff))
     self.train_count += 1
     if self.train_count % 1000 == 0:
         output_file_name = os.path.join(self.arg_dict['output_path'],
                                         'sess', str(self.train_count))
         timestamp_last = time.time()
         logging.info(
             'CLPNAVGR save session: {}, loss: {}, time: {}, time_i: {}'.
             format(output_file_name, loss,
                    int((time.time() - self.timestamp) * 1000),
                    int((timestamp_last - self.timestamp_last) * 1000)))
         self.timestamp_last = timestamp_last
         py23.makedirs(os.path.dirname(output_file_name), exist_ok=True)
         self.saver.save(self.sess, output_file_name)
     logging.debug('HZQQMSQT ' + MY_NAME + ' ' + str(self.train_count))
 def do_train(self):
     if len(self.queue['state_0']) < 100:
         return
     feed_dict = {}
     for k in self.train_ph_dict:
         feed_dict[self.train_ph_dict[k]] = list(self.queue[k])
     for k in self.var_ph_dict:
         feed_dict[self.var_ph_dict[k]] = self.var_dict[k].eval(self.sess)
     _, loss, score_diff = self.sess.run([self.train,self.loss,self.score_diff],feed_dict=feed_dict)
     logging.debug('ZPDDPYFD loss '+str(loss)+' '+str(score_diff))
     self.train_count += 1
     if self.train_count % 1000 == 0:
         output_file_name = "{}/sess/{}.ckpt".format(self.arg_dict['output_path'],self.train_count)
         py23.makedirs(os.path.dirname(output_file_name),exist_ok=True)
         self.saver.save(self.sess,output_file_name)
         logging.info('CLPNAVGR save session: {}, loss: {}, time: {}'.format(output_file_name,loss,int((time.time()-self.timestamp)*1000)))
     logging.debug('HZQQMSQT '+MY_NAME+' '+str(self.train_count))
 def do_train(self):
     if self.push_done < self.arg_dict['train_memory']:
         return
     feed_dict = {}
     for k in self.train_ph_dict:
         feed_dict[self.train_ph_dict[k]] = self.queue[k]
     for k in self.var_ph_dict:
         feed_dict[self.var_ph_dict[k]] = self.var_dict[k].eval(self.sess)
     _, loss, score_diff = self.sess.run([self.train,self.loss,self.score_diff],feed_dict=feed_dict)
     logging.debug('ZPDDPYFD loss '+str(loss)+' '+str(score_diff))
     self.train_count += 1
     if self.train_count % 1000 == 0:
         output_file_name = os.path.join(self.arg_dict['output_path'],'sess',str(self.train_count))
         timestamp_last = time.time()
         logging.info('CLPNAVGR save session: {}, loss: {}, -log(loss): {}, time: {}, time_i: {}'.format(output_file_name,loss,-math.log(loss),int((time.time()-self.timestamp)*1000),int((timestamp_last-self.timestamp_last)*1000)))
         self.timestamp_last = timestamp_last
         py23.makedirs(os.path.dirname(output_file_name),exist_ok=True)
         self.saver.save(self.sess,output_file_name)
     logging.debug('HZQQMSQT '+MY_NAME+' '+str(self.train_count))
 def do_train(self):
     if len(self.queue['state_0']) < 100:
         return
     feed_dict = {}
     for k in self.train_ph_dict:
         feed_dict[self.train_ph_dict[k]] = list(self.queue[k])
     for k in self.var_ph_dict:
         feed_dict[self.var_ph_dict[k]] = self.var_dict[k].eval(self.sess)
     _, loss, score_diff = self.sess.run(
         [self.train, self.loss, self.score_diff], feed_dict=feed_dict)
     logging.debug('ZPDDPYFD loss ' + str(loss) + ' ' + str(score_diff))
     self.train_count += 1
     if self.train_count % 1000 == 0:
         output_file_name = "{}/sess/{}.ckpt".format(
             self.arg_dict['output_path'], self.train_count)
         py23.makedirs(os.path.dirname(output_file_name), exist_ok=True)
         self.saver.save(self.sess, output_file_name)
         logging.info(
             'CLPNAVGR save session: {}, loss: {}, time: {}'.format(
                 output_file_name, loss,
                 int((time.time() - self.timestamp) * 1000)))
     logging.debug('HZQQMSQT ' + MY_NAME + ' ' + str(self.train_count))
def main(_):
    argparser = argparse.ArgumentParser()
    argparser.add_argument(
        '--output_path',
        type=str,
        help='The path to which checkpoints and other outputs '
        'should be saved. This can be either a local or GCS '
        'path.',
        default=None)
    argparser.add_argument('--random_stddev',
                           type=float,
                           help='random_stddev',
                           default=0.1)
    argparser.add_argument('--random_move_chance',
                           type=float,
                           help='RANDOM_MOVE_CHANCE',
                           default=0.05)
    argparser.add_argument('--train_beta',
                           type=float,
                           help='TRAIN_BETA',
                           default=0.99)
    argparser.add_argument('--turn_count',
                           type=int,
                           help='turn_count',
                           default=None)
    argparser.add_argument('--device', type=str, help='device', default=None)
    #     argparser.add_argument(
    #         '--element_l2_factor',
    #         type=float,
    #         help='ELEMENT_L2_FACTOR',
    #         default=10.0
    #     )
    #     argparser.add_argument(
    #         '--l2_weight',
    #         type=float,
    #         help='L2_WEIGHT',
    #         default=0.1
    #     )
    argparser.add_argument('--train_memory',
                           type=int,
                           help='TRAIN_MEMORY',
                           default=20000)
    args, _ = argparser.parse_known_args()
    arg_dict = vars(args)
    logging.info('YGYMBFMN arg_dict {}'.format(json.dumps(arg_dict)))
    if (arg_dict['output_path'] == None):
        timestamp = int(time.time())
        arg_dict['output_path'] = os.path.join('output', MY_NAME, 'deeplearn',
                                               str(timestamp))

    py23.makedirs(arg_dict['output_path'], exist_ok=True)
    with open(os.path.join(arg_dict['output_path'], 'input_arg_dict.json'),
              'w') as out_file:
        json.dump(arg_dict, out_file)

    with tf.device(arg_dict['device']):
        #     with tf.device('/gpu:0'):
        game = Game()
        dl = DeepLearn(arg_dict)

        po = DLPlayer(dl)
        po.set_side(tttl.Pid.O)

        px = DLPlayer(dl)
        px.set_side(tttl.Pid.X)

        game.setPlayer(tttl.Pid.O, po)
        game.setPlayer(tttl.Pid.X, px)

        game.run(turn_count=arg_dict['turn_count'], p=logging.debug)
Esempio n. 6
0
                                               str(timestamp), 'deeplearn')

    py23.makedirs(arg_dict['output_path'], exist_ok=True)
    with open(os.path.join(arg_dict['output_path'], 'input_arg_dict.json'),
              'w') as out_file:
        json.dump(arg_dict, out_file)

    with tf.device(arg_dict['device']):
        #     with tf.device('/gpu:0'):
        game = Game()
        dl = DeepLearn(arg_dict)

        po = DLPlayer(dl)
        po.set_side(tttl.Pid.O)

        px = DLPlayer(dl)
        px.set_side(tttl.Pid.X)

        game.setPlayer(tttl.Pid.O, po)
        game.setPlayer(tttl.Pid.X, px)

        game.run(turn_count=arg_dict['turn_count'], p=logging.debug)


if __name__ == '__main__':
    log_filename = os.path.join(
        'log', '{}-{}-deeplearn.log'.format(str(int(time.time())), MY_NAME))
    py23.makedirs(os.path.dirname(log_filename), exist_ok=True)
    logging.basicConfig(level=logging.INFO, filename=log_filename)
    tf.app.run()
def run_vs_dict(vs_dict,arg_dict):
    tf.reset_default_graph()
    po = create_player(vs_dict['O'],arg_dict)
    px = create_player(vs_dict['X'],arg_dict)
    result = vs(po,px,1000)
    po.close()
    px.close()
    logging.info(vs_dict)
    logging.info(json.dumps(result))
    return result

if __name__ == '__main__':
    timestamp = int(time.time())

    log_filename = os.path.join('log','{}-{}-dlcompare.log'.format(timestamp,MY_NAME))
    py23.makedirs(os.path.dirname(log_filename),exist_ok=True)
    logging.basicConfig(level=logging.INFO,filename=log_filename)

    import argparse

    parser = argparse.ArgumentParser(description='Compare trained model with random and perfect AI')
    parser.add_argument('--path', metavar='N', type=str, nargs='?', default=None, help='working path')
    parser.add_argument('--count', metavar='N', type=int, nargs='?', default=None, help='compare count')
    args = parser.parse_args()
    
    arg_dict = vars(args)

    if arg_dict['path'] == None:
        scan_dir = os.path.join('output',MY_NAME)
        filename_list = os.listdir(scan_dir)
        if len(filename_list) <= 0:
    py23.makedirs(arg_dict['output_path'], exist_ok=True)
    with open(os.path.join(arg_dict['output_path'], 'input_arg_dict.json'),
              'w') as out_file:
        json.dump(arg_dict, out_file)

    with tf.device(arg_dict['device']):
        #     with tf.device('/gpu:0'):
        game = Game()
        dl = DeepLearn(arg_dict)

        po = DLPlayer(dl)
        po.set_side(tttl.Pid.O)

        px = DLPlayer(dl)
        px.set_side(tttl.Pid.X)

        game.setPlayer(tttl.Pid.O, po)
        game.setPlayer(tttl.Pid.X, px)

        game.run(turn_count=arg_dict['turn_count'], p=logging.debug)


if __name__ == '__main__':
    py23.makedirs(os.path.join('log', MY_NAME), exist_ok=True)
    logging.basicConfig(level=logging.INFO,
                        filename=os.path.join(
                            'log', MY_NAME,
                            '{}.log'.format(str(int(time.time())))))
    tf.app.run()
def main(_):
    argparser = argparse.ArgumentParser()
    argparser.add_argument(
        '--output_path',
        type=str,
        help='The path to which checkpoints and other outputs '
        'should be saved. This can be either a local or GCS '
        'path.',
        default=None
    )
    argparser.add_argument(
        '--random_stddev',
        type=float,
        help='random_stddev',
        default=0.1
    )
    argparser.add_argument(
        '--random_move_chance',
        type=float,
        help='RANDOM_MOVE_CHANCE',
        default=0.05
    )
    argparser.add_argument(
        '--train_beta',
        type=float,
        help='TRAIN_BETA',
        default=0.99
    )
    argparser.add_argument(
        '--turn_count',
        type=int,
        help='turn_count',
        default=None
    )
    argparser.add_argument(
        '--device',
        type=str,
        help='device',
        default=None
    )
    argparser.add_argument(
        '--continue',
        action='store_true',
        help='continue'
    )
#     argparser.add_argument(
#         '--element_l2_factor',
#         type=float,
#         help='ELEMENT_L2_FACTOR',
#         default=10.0
#     )
#     argparser.add_argument(
#         '--l2_weight',
#         type=float,
#         help='L2_WEIGHT',
#         default=0.1
#     )
    argparser.add_argument(
        '--train_memory',
        type=int,
        help='TRAIN_MEMORY',
        default=10000
    )
    argparser.add_argument(
        '--dropout_keep_prob',
        type=float,
        help='dropout_keep_prob',
        default=0.5
    )
    args, _ = argparser.parse_known_args()
    arg_dict = vars(args)
    logging.info('YGYMBFMN arg_dict {}'.format(json.dumps(arg_dict)))
    if(arg_dict['output_path']==None):
        if not arg_dict['continue']:
            timestamp = int(time.time())
            arg_dict['output_path'] = os.path.join('output',MY_NAME,str(timestamp),'deeplearn')
        else:
            filename_list = os.listdir(os.path.join('output',MY_NAME))
            filename_int_list = [util.to_int(filename,-1) for filename in filename_list]
            arg_timestamp = str(max(filename_int_list))
            arg_dict['output_path'] = os.path.join('output',MY_NAME,str(arg_timestamp),'deeplearn')

    if arg_dict['continue']:
        with open(os.path.join(arg_dict['output_path'],'input_arg_dict.json'),'r') as deeplearn_arg_dict_file:
            deeplearn_arg_dict = json.load(deeplearn_arg_dict_file)
        arg_dict = deeplearn_arg_dict
        arg_dict['continue'] = True
    else:
        py23.makedirs(arg_dict['output_path'],exist_ok=True)
        with open(os.path.join(arg_dict['output_path'],'input_arg_dict.json'),'w') as out_file:
            json.dump(arg_dict,out_file)

    with tf.device(arg_dict['device']):
#     with tf.device('/gpu:0'):
        game = Game()
        dl = DeepLearn(arg_dict)
        
        po = DLPlayer(dl)
        po.set_side(tttl.Pid.O)
    
        px = DLPlayer(dl)
        px.set_side(tttl.Pid.X)
        
        game.setPlayer(tttl.Pid.O, po)
        game.setPlayer(tttl.Pid.X, px)
        
        game.run(turn_count=arg_dict['turn_count'],p=logging.debug)
def main(_):
    argparser = argparse.ArgumentParser()
    argparser.add_argument(
        '--output_path',
        type=str,
        help='The path to which checkpoints and other outputs '
        'should be saved. This can be either a local or GCS '
        'path.',
        default=None
    )
    argparser.add_argument(
        '--random_stddev',
        type=float,
        help='random_stddev',
        default=0.1
    )
    argparser.add_argument(
        '--random_move_chance',
        type=float,
        help='RANDOM_MOVE_CHANCE',
        default=0.05
    )
    argparser.add_argument(
        '--train_beta',
        type=float,
        help='TRAIN_BETA',
        default=0.99
    )
    argparser.add_argument(
        '--turn_count',
        type=int,
        help='turn_count',
        default=None
    )
    argparser.add_argument(
        '--device',
        type=str,
        help='device',
        default=None
    )
#     argparser.add_argument(
#         '--element_l2_factor',
#         type=float,
#         help='ELEMENT_L2_FACTOR',
#         default=10.0
#     )
#     argparser.add_argument(
#         '--l2_weight',
#         type=float,
#         help='L2_WEIGHT',
#         default=0.1
#     )
    argparser.add_argument(
        '--train_memory',
        type=int,
        help='TRAIN_MEMORY',
        default=20000
    )
    args, _ = argparser.parse_known_args()
    arg_dict = vars(args)
    logging.info('YGYMBFMN arg_dict {}'.format(json.dumps(arg_dict)))
    if(arg_dict['output_path']==None):
        timestamp = int(time.time())
        arg_dict['output_path'] = 'output/{}/deeplearn/{}'.format(MY_NAME, timestamp)
    
    py23.makedirs(arg_dict['output_path'],exist_ok=True)
    with open('{}/input_arg_dict.json'.format(arg_dict['output_path']),'w') as out_file:
        json.dump(arg_dict,out_file)

    with tf.device(arg_dict['device']):
#     with tf.device('/gpu:0'):
        game = Game()
        dl = DeepLearn(arg_dict)
        
        po = DLPlayer(dl)
        po.set_side(tttl.Pid.O)
    
        px = DLPlayer(dl)
        px.set_side(tttl.Pid.X)
        
        game.setPlayer(tttl.Pid.O, po)
        game.setPlayer(tttl.Pid.X, px)
        
        game.run(turn_count=arg_dict['turn_count'],p=logging.debug)
Esempio n. 11
0
def main(_):
    argparser = argparse.ArgumentParser()
    argparser.add_argument(
        '--output_path',
        type=str,
        help='The path to which checkpoints and other outputs '
        'should be saved. This can be either a local or GCS '
        'path.',
        default=None)
    argparser.add_argument('--random_stddev',
                           type=float,
                           help='random_stddev',
                           default=0.1)
    argparser.add_argument('--random_move_chance',
                           type=float,
                           help='RANDOM_MOVE_CHANCE',
                           default=0.05)
    argparser.add_argument('--train_beta',
                           type=float,
                           help='TRAIN_BETA',
                           default=0.99)
    argparser.add_argument('--turn_count',
                           type=int,
                           help='turn_count',
                           default=None)
    argparser.add_argument('--device', type=str, help='device', default=None)
    argparser.add_argument('--continue', action='store_true', help='continue')
    #     argparser.add_argument(
    #         '--element_l2_factor',
    #         type=float,
    #         help='ELEMENT_L2_FACTOR',
    #         default=10.0
    #     )
    #     argparser.add_argument(
    #         '--l2_weight',
    #         type=float,
    #         help='L2_WEIGHT',
    #         default=0.1
    #     )
    argparser.add_argument('--train_memory',
                           type=int,
                           help='TRAIN_MEMORY',
                           default=10000)
    argparser.add_argument('--starter_learning_rate',
                           type=float,
                           help='starter_learning_rate',
                           default=0.01)
    argparser.add_argument('--learning_rate_decay_steps',
                           type=int,
                           help='learning_rate_decay_steps',
                           default=1000000)
    argparser.add_argument('--learning_rate_decay_rate',
                           type=float,
                           help='learning_rate_decay_rate',
                           default=0.5)
    args, _ = argparser.parse_known_args()
    arg_dict = vars(args)
    logging.info('YGYMBFMN arg_dict {}'.format(json.dumps(arg_dict)))
    if (arg_dict['output_path'] == None):
        if not arg_dict['continue']:
            timestamp = int(time.time())
            arg_dict['output_path'] = os.path.join('output', MY_NAME,
                                                   str(timestamp), 'deeplearn')
        else:
            filename_list = os.listdir(os.path.join('output', MY_NAME))
            filename_int_list = [
                util.to_int(filename, -1) for filename in filename_list
            ]
            arg_timestamp = str(max(filename_int_list))
            arg_dict['output_path'] = os.path.join('output', MY_NAME,
                                                   str(arg_timestamp),
                                                   'deeplearn')

    if arg_dict['continue']:
        with open(os.path.join(arg_dict['output_path'], 'input_arg_dict.json'),
                  'r') as deeplearn_arg_dict_file:
            deeplearn_arg_dict = json.load(deeplearn_arg_dict_file)
        arg_dict = deeplearn_arg_dict
        arg_dict['continue'] = True
    else:
        py23.makedirs(arg_dict['output_path'], exist_ok=True)
        with open(os.path.join(arg_dict['output_path'], 'input_arg_dict.json'),
                  'w') as out_file:
            json.dump(arg_dict, out_file)

    with tf.device(arg_dict['device']):
        #     with tf.device('/gpu:0'):
        game = Game()
        dl = DeepLearn(arg_dict)

        po = DLPlayer(dl)
        po.set_side(tttl.Pid.O)

        px = DLPlayer(dl)
        px.set_side(tttl.Pid.X)

        game.setPlayer(tttl.Pid.O, po)
        game.setPlayer(tttl.Pid.X, px)

        game.run(turn_count=arg_dict['turn_count'], p=logging.debug)
    logging.info('YGYMBFMN arg_dict {}'.format(json.dumps(arg_dict)))
    if(arg_dict['output_path']==None):
        timestamp = int(time.time())
        arg_dict['output_path'] = os.path.join('output',MY_NAME,'deeplearn',str(timestamp))
    
    py23.makedirs(arg_dict['output_path'],exist_ok=True)
    with open(os.path.join(arg_dict['output_path'],'input_arg_dict.json'),'w') as out_file:
        json.dump(arg_dict,out_file)

    with tf.device(arg_dict['device']):
#     with tf.device('/gpu:0'):
        game = Game()
        dl = DeepLearn(arg_dict)
        
        po = DLPlayer(dl)
        po.set_side(tttl.Pid.O)
    
        px = DLPlayer(dl)
        px.set_side(tttl.Pid.X)
        
        game.setPlayer(tttl.Pid.O, po)
        game.setPlayer(tttl.Pid.X, px)
        
        game.run(turn_count=arg_dict['turn_count'],p=logging.debug)


if __name__ == '__main__':
    py23.makedirs(os.path.join('log',MY_NAME),exist_ok=True)
    logging.basicConfig(level=logging.INFO,filename=os.path.join('log',MY_NAME,'{}.log'.format(str(int(time.time())))))
    tf.app.run()