os.path.join(os.path.dirname(checkpoint_file_prefix), "model.config.json")) as f: model_config = json.load(f) elif saved_model_dir is not None: with open(os.path.join(saved_model_dir, "model.config.json")) as f: model_config = json.load(f) else: assert (False) pos_len = 19 # shouldn't matter, all we're doing is exporting weights that don't depend on this if name_scope is not None: with tf.name_scope(name_scope): model = Model(model_config, pos_len, {}) else: model = Model(model_config, pos_len, {}) ModelUtils.print_trainable_variables(log) # Testing ------------------------------------------------------------ print("Testing", flush=True) saver = tf.train.Saver( max_to_keep=10000, save_relative_paths=True, ) #Some tensorflow options #tfconfig = tf.ConfigProto(log_device_placement=False,device_count={'GPU': 0}) tfconfig = tf.ConfigProto(log_device_placement=False) #tfconfig.gpu_options.allow_growth = True #tfconfig.gpu_options.per_process_gpu_memory_fraction = 0.4
def builder(): if sys.argv[2] == '-h' or sys.argv[2] == '-help' or sys.argv[2] == '--help': builderHelp() return cwd = os.getcwd() cwdcat = cwd.partition('model') os.chdir(f'{cwdcat[0]}/model/') if len(sys.argv) < 5: logger.error( 'Please follow format of modelBuilder.py [datasheet] -s [save_path] -k [k-neighbors] -t [Time] -h [hash] -d [DEBUGGING]' ) sys.exit() if not os.path.isdir('dataset'): os.mkdir('dataset') if not os.path.isfile(sys.argv[2]): # fetch n download it path = sys.argv[2].split('/') filename = path[len(path) - 1].split('.')[0] datasetConfig = ModelUtils.fetchDatasetConfig() r = requests.get(datasetConfig['url'], allow_redirects=True) with open(f'./dataset/{filename}.{datasetConfig["type"]}', 'wb') as f: f.write(r.content) logger.debug('Successfully downloaded data') if not os.path.isdir('build'): os.mkdir('build') if not os.path.isdir('bin'): os.mkdir('bin') if not os.path.isdir('bin/currentModels'): os.mkdir('bin/currentModels') if not os.path.isdir('bin/currentModels/fasttext'): os.mkdir('bin/currentModels/fasttext') if not os.path.isdir('bin/currentModels/knn'): os.mkdir('bin/currentModels/knn') if not os.path.isdir('bin/oldModels'): os.mkdir('bin/oldModels') if not os.path.isdir('bin/oldModels/fasttext'): os.mkdir('bin/oldModels/fasttext') if not os.path.isdir('bin/oldModels/knn'): os.mkdir('bin/oldModels/knn') if not os.path.isdir('bin/newModels'): os.mkdir('bin/newModels') if not os.path.isdir('bin/newModels/fasttext'): os.mkdir('bin/newModels/fasttext') if not os.path.isdir('bin/newModels/knn'): os.mkdir('bin/newModels/knn') if not os.path.isdir('bin/newModels/injectModels'): os.mkdir('bin/newModels/injectModels') if not os.path.isdir('bin/newModels/injectModels/fasttext'): os.mkdir('bin/newModels/injectModels/fasttext') if not os.path.isdir('bin/newModels/injectModels/knn'): os.mkdir('bin/newModels/injectModels/knn') k: int = 10 savePath: str = '' hash: str = '' d: bool = False time: int = 5400 for index, item in enumerate(sys.argv, 0): if item == '-s' and index + 1 < len(sys.argv): savePath = f'{sys.argv[index + 1]}' if item == '-h' and index + 1 < len(sys.argv): hash = f'{sys.argv[index + 1]}' if item == '-k' and index + 1 < len(sys.argv): k = int(sys.argv[index + 1]) if item == '-d': d = True if item == '-t' and index + 1 < len(sys.argv): time = int(sys.argv[index + 1]) try: ModelBuilder.cleanFiles(hash) ModelBuilder.createModels(filePath=sys.argv[2], savePath=savePath, k=k, hash=hash, debug=d, time=time) except ModelException as e: logger.critical(str(e)) print('Please check -h for help.') except Exception as e: logger.critical('Stack:', str(e)) print('Please check -h for help.') finally: ModelBuilder.cleanFiles(hash) os.chdir(cwd)
def model_fn(features, labels, mode, params): global printed_model_yet global initial_weights_already_loaded print_model = not printed_model_yet built = ModelUtils.build_model_from_tfrecords_features( features, mode, print_model, trainlog, model_config, pos_len, num_batches_per_epoch, lr_scale) if mode == tf.estimator.ModeKeys.PREDICT: model = built predictions = {} predictions["policy_output"] = model.policy_output predictions["value_output"] = model.value_output return tf.estimator.EstimatorSpec(mode, predictions=predictions) if mode == tf.estimator.ModeKeys.EVAL: (model, target_vars, metrics) = built wsum = tf.Variable(0.0, dtype=tf.float32, name="wsum", trainable=False, collections=[ tf.GraphKeys.LOCAL_VARIABLES, tf.GraphKeys.METRIC_VARIABLES ], synchronization=tf.VariableSynchronization.ON_READ, aggregation=tf.VariableAggregation.SUM) wsum_op = tf.assign_add(wsum, target_vars.weight_sum) return tf.estimator.EstimatorSpec( mode, loss=target_vars.opt_loss / tf.constant(batch_size, dtype=tf.float32), eval_metric_ops={ "wsum": (wsum.read_value(), wsum_op), "p0loss": tf.metrics.mean(target_vars.policy_loss_unreduced, weights=target_vars.target_weight_used), "p1loss": tf.metrics.mean(target_vars.policy1_loss_unreduced, weights=target_vars.target_weight_used), "vloss": tf.metrics.mean(target_vars.value_loss_unreduced, weights=target_vars.target_weight_used), "smloss": tf.metrics.mean(target_vars.scoremean_loss_unreduced, weights=target_vars.target_weight_used), "sbpdfloss": tf.metrics.mean(target_vars.scorebelief_pdf_loss_unreduced, weights=target_vars.target_weight_used), "sbcdfloss": tf.metrics.mean(target_vars.scorebelief_cdf_loss_unreduced, weights=target_vars.target_weight_used), "bbpdfloss": tf.metrics.mean(target_vars.bonusbelief_pdf_loss_unreduced, weights=target_vars.target_weight_used), "bbcdfloss": tf.metrics.mean(target_vars.bonusbelief_cdf_loss_unreduced, weights=target_vars.target_weight_used), "uvloss": tf.metrics.mean(target_vars.utilityvar_loss_unreduced, weights=target_vars.target_weight_used), "oloss": tf.metrics.mean(target_vars.ownership_loss_unreduced, weights=target_vars.target_weight_used), "rwlloss": tf.metrics.mean(target_vars.winloss_reg_loss_unreduced, weights=target_vars.target_weight_used), "rsmloss": tf.metrics.mean(target_vars.scoremean_reg_loss_unreduced, weights=target_vars.target_weight_used), "rsdloss": tf.metrics.mean(target_vars.scorestdev_reg_loss_unreduced, weights=target_vars.target_weight_used), "roloss": tf.metrics.mean(target_vars.ownership_reg_loss_unreduced, weights=target_vars.target_weight_used), "rloss": tf.metrics.mean(target_vars.reg_loss_per_weight, weights=target_vars.weight_sum), "rscloss": tf.metrics.mean(target_vars.scale_reg_loss_unreduced, weights=target_vars.target_weight_used), "pacc1": tf.metrics.mean(metrics.accuracy1_unreduced, weights=target_vars.target_weight_used), "ventr": tf.metrics.mean(metrics.value_entropy_unreduced, weights=target_vars.target_weight_used), "ptentr": tf.metrics.mean(metrics.policy_target_entropy_unreduced, weights=target_vars.target_weight_used) }) if mode == tf.estimator.ModeKeys.TRAIN: (model, target_vars, metrics, global_step, global_step_float, per_sample_learning_rate, train_step) = built printed_model_yet = True def moving_mean(name, x, weights): sumwx = tf.reduce_sum(x * weights) sumw = tf.reduce_sum(weights, name="printstats/" + name) ema = tf.train.ExponentialMovingAverage(decay=0.999) op = ema.apply([sumwx, sumw]) avg = ema.average(sumwx) / ema.average(sumw) return (avg, op) (p0loss, p0loss_op) = moving_mean("p0loss", target_vars.policy_loss_unreduced, weights=target_vars.target_weight_used) (p1loss, p1loss_op) = moving_mean("p1loss", target_vars.policy1_loss_unreduced, weights=target_vars.target_weight_used) (vloss, vloss_op) = moving_mean("vloss", target_vars.value_loss_unreduced, weights=target_vars.target_weight_used) (smloss, smloss_op) = moving_mean("smloss", target_vars.scoremean_loss_unreduced, weights=target_vars.target_weight_used) (sbpdfloss, sbpdfloss_op) = moving_mean( "sbpdfloss", target_vars.scorebelief_pdf_loss_unreduced, weights=target_vars.target_weight_used) (sbcdfloss, sbcdfloss_op) = moving_mean( "sbcdfloss", target_vars.scorebelief_cdf_loss_unreduced, weights=target_vars.target_weight_used) (bbpdfloss, bbpdfloss_op) = moving_mean( "bbpdfloss", target_vars.bonusbelief_pdf_loss_unreduced, weights=target_vars.target_weight_used) (bbcdfloss, bbcdfloss_op) = moving_mean( "bbcdfloss", target_vars.bonusbelief_cdf_loss_unreduced, weights=target_vars.target_weight_used) (uvloss, uvloss_op) = moving_mean("uvloss", target_vars.utilityvar_loss_unreduced, weights=target_vars.target_weight_used) (oloss, oloss_op) = moving_mean("oloss", target_vars.ownership_loss_unreduced, weights=target_vars.target_weight_used) (rwlloss, rwlloss_op) = moving_mean("rwlloss", target_vars.winloss_reg_loss_unreduced, weights=target_vars.target_weight_used) (rsmloss, rsmloss_op) = moving_mean("rsmloss", target_vars.scoremean_reg_loss_unreduced, weights=target_vars.target_weight_used) (rsdloss, rsdloss_op) = moving_mean("rsdloss", target_vars.scorestdev_reg_loss_unreduced, weights=target_vars.target_weight_used) (roloss, roloss_op) = moving_mean("roloss", target_vars.ownership_reg_loss_unreduced, weights=target_vars.target_weight_used) (rloss, rloss_op) = moving_mean("rloss", target_vars.reg_loss_per_weight, weights=target_vars.weight_sum) (rscloss, rscloss_op) = moving_mean("rscloss", target_vars.scale_reg_loss_unreduced, weights=target_vars.target_weight_used) (pacc1, pacc1_op) = moving_mean("pacc1", metrics.accuracy1_unreduced, weights=target_vars.target_weight_used) (ventr, ventr_op) = moving_mean("ventr", metrics.value_entropy_unreduced, weights=target_vars.target_weight_used) (ptentr, ptentr_op) = moving_mean("ptentr", metrics.policy_target_entropy_unreduced, weights=target_vars.target_weight_used) (wmean, wmean_op) = tf.metrics.mean(target_vars.weight_sum) print_train_loss_every_batches = 100 logging_hook = tf.train.LoggingTensorHook( { "nsamp": global_step * tf.constant(batch_size, dtype=tf.int64), "wsum": global_step_float * wmean, "p0loss": p0loss, "p1loss": p1loss, "vloss": vloss, "smloss": smloss, "sbpdfloss": sbpdfloss, "sbcdfloss": sbcdfloss, "bbpdfloss": bbpdfloss, "bbcdfloss": bbcdfloss, "uvloss": uvloss, "oloss": oloss, "rwlloss": rwlloss, "rsmloss": rsmloss, "rsdloss": rsdloss, "roloss": roloss, "rloss": rloss, "rscloss": rscloss, "pacc1": pacc1, "ventr": ventr, "ptentr": ptentr, "pslr": per_sample_learning_rate }, every_n_iter=print_train_loss_every_batches) printed_model_yet = True sys.stdout.flush() sys.stderr.flush() initial_weights_dir = os.path.join(traindir, "initial_weights") if os.path.exists( initial_weights_dir) and not initial_weights_already_loaded: print("Initial weights found at: " + initial_weights_dir) checkpoint_path = os.path.join(initial_weights_dir, "model") vars_in_checkpoint = tf.contrib.framework.list_variables( checkpoint_path) print("Checkpoint contains:") for var in vars_in_checkpoint: print(var) print( "Modifying graph to load weights from checkpoint upon init...") sys.stdout.flush() sys.stderr.flush() variables_to_restore = tf.trainable_variables() assignment_mapping = { v.name.split(":")[0]: v for v in variables_to_restore } tf.train.init_from_checkpoint(checkpoint_path, assignment_mapping) initial_weights_already_loaded = True return tf.estimator.EstimatorSpec( mode, loss=(target_vars.opt_loss / tf.constant(batch_size, dtype=tf.float32)), train_op=tf.group(train_step, p0loss_op, p1loss_op, vloss_op, smloss_op, sbpdfloss_op, sbcdfloss_op, bbpdfloss_op, bbcdfloss_op, uvloss_op, oloss_op, rwlloss_op, rsmloss_op, rsdloss_op, roloss_op, rloss_op, rscloss_op, pacc1_op, ventr_op, ptentr_op, wmean_op), training_hooks=[logging_hook])
def model_fn(features, labels, mode, params): global printed_model_yet global initial_weights_already_loaded print_model = not printed_model_yet built = ModelUtils.build_model_from_tfrecords_features( features, mode, print_model, trainlog, model_config, pos_len, batch_size, lr_scale, gnorm_clip_scale, num_gpus_used) if mode == tf.estimator.ModeKeys.PREDICT: model = built predictions = {} predictions["policy_output"] = model.policy_output predictions["value_output"] = model.value_output return tf.estimator.EstimatorSpec(mode, predictions=predictions) if mode == tf.estimator.ModeKeys.EVAL: (model, target_vars, metrics) = built wsum = tf.Variable(0.0, dtype=tf.float32, name="wsum", trainable=False, collections=[ tf.compat.v1.GraphKeys.LOCAL_VARIABLES, tf.compat.v1.GraphKeys.METRIC_VARIABLES ], synchronization=tf.VariableSynchronization.ON_READ, aggregation=tf.VariableAggregation.SUM) wsum_op = tf.assign_add(wsum, target_vars.weight_sum) eval_metric_ops = { #"wsum": (wsum.read_value(),wsum_op), "p0loss": tf.compat.v1.metrics.mean(target_vars.policy_loss_unreduced, weights=target_vars.target_weight_used), "p1loss": tf.compat.v1.metrics.mean(target_vars.policy1_loss_unreduced, weights=target_vars.target_weight_used), "vloss": tf.compat.v1.metrics.mean(target_vars.value_loss_unreduced, weights=target_vars.target_weight_used), "tdvloss": tf.compat.v1.metrics.mean(target_vars.td_value_loss_unreduced, weights=target_vars.target_weight_used), "smloss": tf.compat.v1.metrics.mean(target_vars.scoremean_loss_unreduced, weights=target_vars.target_weight_used), "leadloss": tf.compat.v1.metrics.mean(target_vars.lead_loss_unreduced, weights=target_vars.target_weight_used), "vtimeloss": tf.compat.v1.metrics.mean(target_vars.variance_time_loss_unreduced, weights=target_vars.target_weight_used), "sbpdfloss": tf.compat.v1.metrics.mean( target_vars.scorebelief_pdf_loss_unreduced, weights=target_vars.target_weight_used), "sbcdfloss": tf.compat.v1.metrics.mean( target_vars.scorebelief_cdf_loss_unreduced, weights=target_vars.target_weight_used), "oloss": tf.compat.v1.metrics.mean(target_vars.ownership_loss_unreduced, weights=target_vars.target_weight_used), "sloss": tf.compat.v1.metrics.mean(target_vars.scoring_loss_unreduced, weights=target_vars.target_weight_used), "fploss": tf.compat.v1.metrics.mean(target_vars.futurepos_loss_unreduced, weights=target_vars.target_weight_used), "rsdloss": tf.compat.v1.metrics.mean( target_vars.scorestdev_reg_loss_unreduced, weights=target_vars.target_weight_used), "rloss": tf.compat.v1.metrics.mean(target_vars.reg_loss_per_weight, weights=target_vars.weight_sum), "rscloss": tf.compat.v1.metrics.mean(target_vars.scale_reg_loss_unreduced, weights=target_vars.target_weight_used), "pacc1": tf.compat.v1.metrics.mean(metrics.accuracy1_unreduced, weights=target_vars.target_weight_used), "ventr": tf.compat.v1.metrics.mean(metrics.value_entropy_unreduced, weights=target_vars.target_weight_used), "ptentr": tf.compat.v1.metrics.mean(metrics.policy_target_entropy_unreduced, weights=target_vars.target_weight_used) } if model.version >= 9: eval_metric_ops["evstloss"] = tf.compat.v1.metrics.mean( target_vars.shortterm_value_error_loss_unreduced, weights=target_vars.target_weight_used) eval_metric_ops["esstloss"] = tf.compat.v1.metrics.mean( target_vars.shortterm_score_error_loss_unreduced, weights=target_vars.target_weight_used) if model.version >= 10: eval_metric_ops["tdsloss"] = tf.compat.v1.metrics.mean( target_vars.td_score_loss_unreduced, weights=target_vars.target_weight_used) return tf.estimator.EstimatorSpec( mode, loss=target_vars.opt_loss / tf.constant(batch_size, dtype=tf.float32), eval_metric_ops=eval_metric_ops) if mode == tf.estimator.ModeKeys.TRAIN: (model, target_vars, metrics, global_step, global_step_float, per_sample_learning_rate, train_step) = built printed_model_yet = True def moving_mean(name, x, weights): sumwx = tf.reduce_sum(x * weights, name="printstats/wx/" + name) sumw = tf.reduce_sum(weights, name="printstats/w/" + name) moving_wx = tf.compat.v1.get_variable(initializer=tf.zeros([]), name=(name + "/moving_wx"), trainable=False) moving_w = tf.compat.v1.get_variable(initializer=tf.zeros([]), name=(name + "/moving_w"), trainable=False) decay = 0.999 with tf.compat.v1.variable_scope(name): wx_op = tf.keras.backend.moving_average_update( moving_wx, sumwx, decay) w_op = tf.keras.backend.moving_average_update( moving_w, sumw, decay) op = tf.group(wx_op, w_op) avg = (moving_wx + sumwx * (1.0 - decay)) / (moving_w + sumw * (1.0 - decay)) return (avg, op) (p0loss, p0loss_op) = moving_mean("p0loss", target_vars.policy_loss_unreduced, weights=target_vars.target_weight_used) (p1loss, p1loss_op) = moving_mean("p1loss", target_vars.policy1_loss_unreduced, weights=target_vars.target_weight_used) (vloss, vloss_op) = moving_mean("vloss", target_vars.value_loss_unreduced, weights=target_vars.target_weight_used) (tdvloss, tdvloss_op) = moving_mean("tdvloss", target_vars.td_value_loss_unreduced, weights=target_vars.target_weight_used) (smloss, smloss_op) = moving_mean("smloss", target_vars.scoremean_loss_unreduced, weights=target_vars.target_weight_used) (leadloss, leadloss_op) = moving_mean("leadloss", target_vars.lead_loss_unreduced, weights=target_vars.target_weight_used) (vtimeloss, vtimeloss_op) = moving_mean("vtimeloss", target_vars.variance_time_loss_unreduced, weights=target_vars.target_weight_used) (sbpdfloss, sbpdfloss_op) = moving_mean( "sbpdfloss", target_vars.scorebelief_pdf_loss_unreduced, weights=target_vars.target_weight_used) (sbcdfloss, sbcdfloss_op) = moving_mean( "sbcdfloss", target_vars.scorebelief_cdf_loss_unreduced, weights=target_vars.target_weight_used) (oloss, oloss_op) = moving_mean("oloss", target_vars.ownership_loss_unreduced, weights=target_vars.target_weight_used) (sloss, sloss_op) = moving_mean("sloss", target_vars.scoring_loss_unreduced, weights=target_vars.target_weight_used) (fploss, fploss_op) = moving_mean("fploss", target_vars.futurepos_loss_unreduced, weights=target_vars.target_weight_used) (skloss, skloss_op) = moving_mean("skloss", target_vars.seki_loss_unreduced, weights=target_vars.target_weight_used) (rsdloss, rsdloss_op) = moving_mean("rsdloss", target_vars.scorestdev_reg_loss_unreduced, weights=target_vars.target_weight_used) (rloss, rloss_op) = moving_mean("rloss", target_vars.reg_loss_per_weight, weights=target_vars.weight_sum) (rscloss, rscloss_op) = moving_mean("rscloss", target_vars.scale_reg_loss_unreduced, weights=target_vars.target_weight_used) if model.version >= 9: (evstloss, evstloss_op) = moving_mean( "evstloss", target_vars.shortterm_value_error_loss_unreduced, weights=target_vars.target_weight_used) (esstloss, esstloss_op) = moving_mean( "esstloss", target_vars.shortterm_score_error_loss_unreduced, weights=target_vars.target_weight_used) # (evstm,evstm_op) = moving_mean("evstm",metrics.shortterm_value_error_mean_unreduced, weights=target_vars.target_weight_used) # (evstv,evstv_op) = moving_mean("evstv",metrics.shortterm_value_error_var_unreduced, weights=target_vars.target_weight_used) # (esstm,esstm_op) = moving_mean("esstm",metrics.shortterm_score_error_mean_unreduced, weights=target_vars.target_weight_used) # (esstv,esstv_op) = moving_mean("esstv",metrics.shortterm_score_error_var_unreduced, weights=target_vars.target_weight_used) if model.version >= 10: (tdsloss, tdsloss_op) = moving_mean("tdsloss", target_vars.td_score_loss_unreduced, weights=target_vars.target_weight_used) (pacc1, pacc1_op) = moving_mean("pacc1", metrics.accuracy1_unreduced, weights=target_vars.target_weight_used) (ptentr, ptentr_op) = moving_mean("ptentr", metrics.policy_target_entropy_unreduced, weights=target_vars.target_weight_used) #NOTE: These two are going to be smaller if using more GPUs since it's the gradient norm as measured on the instance batch #rather than the global batch. #Also, somewhat awkwardly, we say the weight is 1.0 rather than 1.0/num_gpus_used because tensorflow seems to have "meany" #behavior where it updates sumw via the mean of the two separate updates of the gpus rather than the sum. (gnorm, gnorm_op) = moving_mean("gnorm", metrics.gnorm, weights=1.0) (exgnorm, exgnorm_op) = moving_mean("excessgnorm", metrics.excess_gnorm, weights=1.0) (wmean, wmean_op) = tf.compat.v1.metrics.mean(target_vars.weight_sum) # print_op = tf.print( # metrics.gnorm, # target_vars.weight_sum, # target_vars.opt_loss, # metrics.tmp, # foo[0], # output_stream=sys.stdout # ) print_train_loss_every_batches = 100 logvars = { "nsamp": global_step * tf.constant(batch_size, dtype=tf.int64), "wsum": global_step_float * wmean * tf.constant(float(num_gpus_used)), "p0loss": p0loss, "p1loss": p1loss, "vloss": vloss, "tdvloss": tdvloss, "smloss": smloss, "leadloss": leadloss, "vtimeloss": vtimeloss, "sbpdfloss": sbpdfloss, "sbcdfloss": sbcdfloss, "oloss": oloss, "sloss": sloss, "fploss": fploss, "skloss": skloss, "skw": target_vars.seki_weight_scale, "rsdloss": rsdloss, "rloss": rloss, "rscloss": rscloss, "pacc1": pacc1, "ptentr": ptentr, "pslr": per_sample_learning_rate, "gnorm": gnorm, "exgnorm": exgnorm } if model.version >= 9: logvars["evstloss"] = evstloss logvars["esstloss"] = esstloss # logvars["evstm"] = evstm # logvars["evstv"] = evstv # logvars["esstm"] = esstm # logvars["esstv"] = esstv if model.version >= 10: logvars["tdsloss"] = tdsloss logging_hook = CustomLoggingHook( logvars, every_n_iter=print_train_loss_every_batches, handle_logging_values=update_global_latest_extra_stats) printed_model_yet = True sys.stdout.flush() sys.stderr.flush() initial_weights_dir = os.path.join(traindir, "initial_weights") if os.path.exists( initial_weights_dir) and not initial_weights_already_loaded: print("Initial weights dir found at: " + initial_weights_dir) checkpoint_path = None for initial_weights_file in os.listdir(initial_weights_dir): if initial_weights_file.startswith( "model") and initial_weights_file.endswith(".index"): checkpoint_path = os.path.join( initial_weights_dir, initial_weights_file[0:len(initial_weights_file) - len(".index")]) break if checkpoint_path is not None: print("Initial weights checkpoint to use found at: " + checkpoint_path) vars_in_checkpoint = tf.contrib.framework.list_variables( checkpoint_path) varname_in_checkpoint = {} print("Checkpoint contains:") for varandshape in vars_in_checkpoint: print(varandshape) varname_in_checkpoint[varandshape[0]] = True print( "Modifying graph to load weights from checkpoint upon init..." ) sys.stdout.flush() sys.stderr.flush() variables_to_restore = tf.compat.v1.global_variables() assignment_mapping = {} for v in variables_to_restore: name = v.name.split(":")[ 0] # drop the ":0" at the end of each var if name in varname_in_checkpoint: assignment_mapping[name] = v tf.compat.v1.train.init_from_checkpoint( checkpoint_path, assignment_mapping) initial_weights_already_loaded = True ops = [ train_step, p0loss_op, p1loss_op, vloss_op, tdvloss_op, smloss_op, leadloss_op, vtimeloss_op, sbpdfloss_op, sbcdfloss_op, oloss_op, sloss_op, fploss_op, skloss_op, rsdloss_op, rloss_op, rscloss_op, pacc1_op, ptentr_op, wmean_op, gnorm_op, exgnorm_op ] if model.version >= 9: ops.append(evstloss_op) ops.append(esstloss_op) # ops.append(evstm_op) # ops.append(evstv_op) # ops.append(esstm_op) # ops.append(esstv_op) if model.version >= 10: ops.append(tdsloss_op) return tf.estimator.EstimatorSpec( mode, loss=(target_vars.opt_loss / tf.constant(batch_size, dtype=tf.float32)), train_op=tf.group(*ops), training_hooks=[logging_hook])
dataset = dataset.map(parse_input) iterator = dataset.make_one_shot_iterator() features = iterator.get_next() elif using_npz: features = tfrecordio.make_raw_input_feature_placeholders( model_config, pos_len, batch_size) # Model ---------------------------------------------------------------- mode = tf.estimator.ModeKeys.EVAL print_model = False if name_scope is not None: with tf.compat.v1.variable_scope(name_scope): (model, target_vars, metrics) = ModelUtils.build_model_from_tfrecords_features( features, mode, print_model, log, model_config, pos_len, batch_size) else: (model, target_vars, metrics) = ModelUtils.build_model_from_tfrecords_features( features, mode, print_model, log, model_config, pos_len, batch_size) total_parameters = 0 for variable in tf.compat.v1.trainable_variables(): shape = variable.get_shape() variable_parameters = 1 for dim in shape: variable_parameters *= dim.value total_parameters += variable_parameters log("Built model, %d total parameters" % total_parameters)
"ptncm": tf.placeholder(tf.float32,[batch_size,NUM_POLICY_TARGETS,pos_len*pos_len+1]), "gtnc": tf.placeholder(tf.float32,[batch_size,NUM_GLOBAL_TARGETS]), "sdn": tf.placeholder(tf.float32,[batch_size,pos_len*pos_len*2+EXTRA_SCORE_DISTR_RADIUS*2]), "sbsn": tf.placeholder(tf.float32,[batch_size,BONUS_SCORE_RADIUS*2+1]), "vtnchw": tf.placeholder(tf.float32,[batch_size,NUM_VALUE_SPATIAL_TARGETS,pos_len,pos_len]) } # Model ---------------------------------------------------------------- mode = tf.estimator.ModeKeys.EVAL print_model = False num_batches_per_epoch = 1 #doesn't matter if name_scope is not None: with tf.name_scope(name_scope): (model,target_vars,metrics) = ModelUtils.build_model_from_tfrecords_features(features,mode,print_model,log,model_config,pos_len,num_batches_per_epoch) else: (model,target_vars,metrics) = ModelUtils.build_model_from_tfrecords_features(features,mode,print_model,log,model_config,pos_len,num_batches_per_epoch) total_parameters = 0 for variable in tf.trainable_variables(): shape = variable.get_shape() variable_parameters = 1 for dim in shape: variable_parameters *= dim.value total_parameters += variable_parameters log("Built model, %d total parameters" % total_parameters) # Testing ------------------------------------------------------------