# use_chance = min(use_chance,0.95) # use_chance = 0.95 # if random.random() > use_chance: # self.train_dict['choice_0'] = random.randrange(9) # print("KEKPIAXP random: "+str(self.train_dict['choice_0'])) return ACTION_MAP[self.train_dict['choice_0']] def update_status(self,status): pass def turn_end(self): pass def close(self): self.dl.close() if __name__ == '__main__': game = Game() dl = deeplearn.DeepLearn() po = DLPlayer(dl) po.set_side(tttl.Pid.O) px = DLPlayer(dl) px.set_side(tttl.Pid.X) game.setPlayer(tttl.Pid.O, po) game.setPlayer(tttl.Pid.X, px) game.run()
train_dict['cont'] = 1 train_dict['reward_1'] = REWARD_STEP self.dl.push_train_dict(train_dict) self.dl.do_train() self.legit_mask = None self.train_dict = None self.last_choice = None def close(self): self.legit_mask = None self.train_dict = None self.last_choice = None self.dl.close() if __name__ == '__main__': game = Game() dl = DeepLearn() po = DLPlayer(dl) po.set_side(tttl.Pid.O) px = DLPlayer(dl) px.set_side(tttl.Pid.X) game.setPlayer(tttl.Pid.O, po) game.setPlayer(tttl.Pid.X, px) game.run()
def main(_): argparser = argparse.ArgumentParser() argparser.add_argument( '--output_path', type=str, help='The path to which checkpoints and other outputs ' 'should be saved. This can be either a local or GCS ' 'path.', default=None) argparser.add_argument('--random_stddev', type=float, help='random_stddev', default=0.1) argparser.add_argument('--random_move_chance', type=float, help='RANDOM_MOVE_CHANCE', default=0.05) argparser.add_argument('--train_beta', type=float, help='TRAIN_BETA', default=0.99) argparser.add_argument('--turn_count', type=int, help='turn_count', default=None) argparser.add_argument('--device', type=str, help='device', default=None) # argparser.add_argument( # '--element_l2_factor', # type=float, # help='ELEMENT_L2_FACTOR', # default=10.0 # ) # argparser.add_argument( # '--l2_weight', # type=float, # help='L2_WEIGHT', # default=0.1 # ) argparser.add_argument('--train_memory', type=int, help='TRAIN_MEMORY', default=20000) args, _ = argparser.parse_known_args() arg_dict = vars(args) logging.info('YGYMBFMN arg_dict {}'.format(json.dumps(arg_dict))) if (arg_dict['output_path'] == None): timestamp = int(time.time()) arg_dict['output_path'] = os.path.join('output', MY_NAME, 'deeplearn', str(timestamp)) py23.makedirs(arg_dict['output_path'], exist_ok=True) with open(os.path.join(arg_dict['output_path'], 'input_arg_dict.json'), 'w') as out_file: json.dump(arg_dict, out_file) with tf.device(arg_dict['device']): # with tf.device('/gpu:0'): game = Game() dl = DeepLearn(arg_dict) po = DLPlayer(dl) po.set_side(tttl.Pid.O) px = DLPlayer(dl) px.set_side(tttl.Pid.X) game.setPlayer(tttl.Pid.O, po) game.setPlayer(tttl.Pid.X, px) game.run(turn_count=arg_dict['turn_count'], p=logging.debug)
def main(_): argparser = argparse.ArgumentParser() argparser.add_argument( '--output_path', type=str, help='The path to which checkpoints and other outputs ' 'should be saved. This can be either a local or GCS ' 'path.', default=None ) argparser.add_argument( '--random_stddev', type=float, help='random_stddev', default=0.1 ) argparser.add_argument( '--random_move_chance', type=float, help='RANDOM_MOVE_CHANCE', default=0.05 ) argparser.add_argument( '--train_beta', type=float, help='TRAIN_BETA', default=0.99 ) argparser.add_argument( '--turn_count', type=int, help='turn_count', default=None ) argparser.add_argument( '--device', type=str, help='device', default=None ) argparser.add_argument( '--continue', action='store_true', help='continue' ) # argparser.add_argument( # '--element_l2_factor', # type=float, # help='ELEMENT_L2_FACTOR', # default=10.0 # ) # argparser.add_argument( # '--l2_weight', # type=float, # help='L2_WEIGHT', # default=0.1 # ) argparser.add_argument( '--train_memory', type=int, help='TRAIN_MEMORY', default=10000 ) argparser.add_argument( '--dropout_keep_prob', type=float, help='dropout_keep_prob', default=0.5 ) args, _ = argparser.parse_known_args() arg_dict = vars(args) logging.info('YGYMBFMN arg_dict {}'.format(json.dumps(arg_dict))) if(arg_dict['output_path']==None): if not arg_dict['continue']: timestamp = int(time.time()) arg_dict['output_path'] = os.path.join('output',MY_NAME,str(timestamp),'deeplearn') else: filename_list = os.listdir(os.path.join('output',MY_NAME)) filename_int_list = [util.to_int(filename,-1) for filename in filename_list] arg_timestamp = str(max(filename_int_list)) arg_dict['output_path'] = os.path.join('output',MY_NAME,str(arg_timestamp),'deeplearn') if arg_dict['continue']: with open(os.path.join(arg_dict['output_path'],'input_arg_dict.json'),'r') as deeplearn_arg_dict_file: deeplearn_arg_dict = json.load(deeplearn_arg_dict_file) arg_dict = deeplearn_arg_dict arg_dict['continue'] = True else: py23.makedirs(arg_dict['output_path'],exist_ok=True) with open(os.path.join(arg_dict['output_path'],'input_arg_dict.json'),'w') as out_file: json.dump(arg_dict,out_file) with tf.device(arg_dict['device']): # with tf.device('/gpu:0'): game = Game() dl = DeepLearn(arg_dict) po = DLPlayer(dl) po.set_side(tttl.Pid.O) px = DLPlayer(dl) px.set_side(tttl.Pid.X) game.setPlayer(tttl.Pid.O, po) game.setPlayer(tttl.Pid.X, px) game.run(turn_count=arg_dict['turn_count'],p=logging.debug)
def main(_): argparser = argparse.ArgumentParser() argparser.add_argument( '--output_path', type=str, help='The path to which checkpoints and other outputs ' 'should be saved. This can be either a local or GCS ' 'path.', default=None ) argparser.add_argument( '--random_stddev', type=float, help='random_stddev', default=0.1 ) argparser.add_argument( '--random_move_chance', type=float, help='RANDOM_MOVE_CHANCE', default=0.05 ) argparser.add_argument( '--train_beta', type=float, help='TRAIN_BETA', default=0.99 ) argparser.add_argument( '--turn_count', type=int, help='turn_count', default=None ) argparser.add_argument( '--device', type=str, help='device', default=None ) # argparser.add_argument( # '--element_l2_factor', # type=float, # help='ELEMENT_L2_FACTOR', # default=10.0 # ) # argparser.add_argument( # '--l2_weight', # type=float, # help='L2_WEIGHT', # default=0.1 # ) argparser.add_argument( '--train_memory', type=int, help='TRAIN_MEMORY', default=20000 ) args, _ = argparser.parse_known_args() arg_dict = vars(args) logging.info('YGYMBFMN arg_dict {}'.format(json.dumps(arg_dict))) if(arg_dict['output_path']==None): timestamp = int(time.time()) arg_dict['output_path'] = 'output/{}/deeplearn/{}'.format(MY_NAME, timestamp) py23.makedirs(arg_dict['output_path'],exist_ok=True) with open('{}/input_arg_dict.json'.format(arg_dict['output_path']),'w') as out_file: json.dump(arg_dict,out_file) with tf.device(arg_dict['device']): # with tf.device('/gpu:0'): game = Game() dl = DeepLearn(arg_dict) po = DLPlayer(dl) po.set_side(tttl.Pid.O) px = DLPlayer(dl) px.set_side(tttl.Pid.X) game.setPlayer(tttl.Pid.O, po) game.setPlayer(tttl.Pid.X, px) game.run(turn_count=arg_dict['turn_count'],p=logging.debug)
def main(_): argparser = argparse.ArgumentParser() argparser.add_argument( '--output_path', type=str, help='The path to which checkpoints and other outputs ' 'should be saved. This can be either a local or GCS ' 'path.', default=None) argparser.add_argument('--random_stddev', type=float, help='random_stddev', default=0.1) argparser.add_argument('--random_move_chance', type=float, help='RANDOM_MOVE_CHANCE', default=0.05) argparser.add_argument('--train_beta', type=float, help='TRAIN_BETA', default=0.99) argparser.add_argument('--turn_count', type=int, help='turn_count', default=None) argparser.add_argument('--device', type=str, help='device', default=None) argparser.add_argument('--continue', action='store_true', help='continue') # argparser.add_argument( # '--element_l2_factor', # type=float, # help='ELEMENT_L2_FACTOR', # default=10.0 # ) # argparser.add_argument( # '--l2_weight', # type=float, # help='L2_WEIGHT', # default=0.1 # ) argparser.add_argument('--train_memory', type=int, help='TRAIN_MEMORY', default=10000) argparser.add_argument('--starter_learning_rate', type=float, help='starter_learning_rate', default=0.01) argparser.add_argument('--learning_rate_decay_steps', type=int, help='learning_rate_decay_steps', default=1000000) argparser.add_argument('--learning_rate_decay_rate', type=float, help='learning_rate_decay_rate', default=0.5) args, _ = argparser.parse_known_args() arg_dict = vars(args) logging.info('YGYMBFMN arg_dict {}'.format(json.dumps(arg_dict))) if (arg_dict['output_path'] == None): if not arg_dict['continue']: timestamp = int(time.time()) arg_dict['output_path'] = os.path.join('output', MY_NAME, str(timestamp), 'deeplearn') else: filename_list = os.listdir(os.path.join('output', MY_NAME)) filename_int_list = [ util.to_int(filename, -1) for filename in filename_list ] arg_timestamp = str(max(filename_int_list)) arg_dict['output_path'] = os.path.join('output', MY_NAME, str(arg_timestamp), 'deeplearn') if arg_dict['continue']: with open(os.path.join(arg_dict['output_path'], 'input_arg_dict.json'), 'r') as deeplearn_arg_dict_file: deeplearn_arg_dict = json.load(deeplearn_arg_dict_file) arg_dict = deeplearn_arg_dict arg_dict['continue'] = True else: py23.makedirs(arg_dict['output_path'], exist_ok=True) with open(os.path.join(arg_dict['output_path'], 'input_arg_dict.json'), 'w') as out_file: json.dump(arg_dict, out_file) with tf.device(arg_dict['device']): # with tf.device('/gpu:0'): game = Game() dl = DeepLearn(arg_dict) po = DLPlayer(dl) po.set_side(tttl.Pid.O) px = DLPlayer(dl) px.set_side(tttl.Pid.X) game.setPlayer(tttl.Pid.O, po) game.setPlayer(tttl.Pid.X, px) game.run(turn_count=arg_dict['turn_count'], p=logging.debug)