def run_evaluate(model, flags_obj, master, is_chief): utils_context.training = False dataset = tf.data.TextLineDataset(flags_obj.id_file) if master: dataset = dataset.shard(len(flags_obj.worker_hosts), flags_obj.task_index) dataset = dataset.map( lambda id_str: tf.string_to_number(id_str, out_type=tf.int64)) dataset = dataset.batch(flags_obj.batch_size) source = dataset.make_one_shot_iterator().get_next() _, _, metric_name, metric = model(source) tf.train.get_or_create_global_step() hooks = [] if master: hooks.append(utils_hooks.SyncExitHook(len(flags_obj.worker_hosts))) with tf.train.MonitoredTrainingSession(master=master, is_chief=is_chief, checkpoint_dir=flags_obj.model_dir, save_checkpoint_secs=None, log_step_count_steps=None, hooks=hooks, config=config) as sess: while not sess.should_stop(): metric_val = sess.run(metric) print('{}: {}'.format(metric_name, metric_val))
def run_train(model, flags_obj, master, is_chief): utils_context.training = True batch_size = flags_obj.batch_size // model.batch_size_ratio source = euler_ops.sample_node(count=batch_size, node_type=flags_obj.train_node_type) source.set_shape([batch_size]) sim_outputs, cor_outputs = model(source) _, sim_loss, metric_name, sim_metric = sim_outputs _, cor_loss, ___________, cor_metric = cor_outputs loss = sim_loss + cor_loss optimizer_class = optimizers.get(flags_obj.optimizer) optimizer = optimizer_class(flags_obj.learning_rate) global_step = tf.train.get_or_create_global_step() train_op = optimizer.minimize(loss, global_step=global_step) hooks = [] tensor_to_log = { 'step': global_step, 'loss': loss, 'sim_loss': sim_loss, 'cor_loss': cor_loss, 'sim_metric': sim_metric, 'cor_metric': cor_metric } hooks.append( tf.train.LoggingTensorHook(tensor_to_log, every_n_iter=flags_obj.log_steps)) num_steps = int( (flags_obj.max_id + 1) // flags_obj.batch_size * flags_obj.num_epochs) hooks.append(tf.train.StopAtStepHook(last_step=num_steps)) extra_param_name = '_'.join(map(str, flags_obj.fanouts)) output_dir = ckpt_dir = '{}/{}/{}_{}_{}/'.format(flags_obj.model_dir, flags_obj.model, extra_param_name, flags_obj.dim, flags_obj.embedding_dim) print("output dir: {}".format(output_dir)) if len(flags_obj.worker_hosts) == 0 or flags_obj.task_index == 1: hooks.append( tf.train.ProfilerHook(save_secs=180, output_dir=output_dir)) if len(flags_obj.worker_hosts): hooks.append(utils_hooks.SyncExitHook(len(flags_obj.worker_hosts))) if hasattr(model, 'make_session_run_hook'): hooks.append(model.make_session_run_hook()) with tf.train.MonitoredTrainingSession(master=master, is_chief=is_chief, checkpoint_dir=ckpt_dir, log_step_count_steps=None, hooks=hooks, config=config) as sess: while not sess.should_stop(): sess.run(train_op)
def run_train(model, flags_obj, master, is_chief): utils_context.training = True batch_size = flags_obj.batch_size // model.batch_size_ratio if flags_obj.model == 'line' or flags_obj.model == 'randomwalk': source = euler_ops.sample_node(count=batch_size, node_type=flags_obj.all_node_type) else: source = euler_ops.sample_node(count=batch_size, node_type=flags_obj.train_node_type) source.set_shape([batch_size]) # dataset = tf.data.TextLineDataset(flags_obj.id_file) # dataset = dataset.map( # lambda id_str: tf.string_to_number(id_str, out_type=tf.int64)) # dataset = dataset.shuffle(buffer_size=20000) # dataset = dataset.batch(batch_size) # dataset = dataset.repeat(flags_obj.num_epochs) # source = dataset.make_one_shot_iterator().get_next() _, loss, metric_name, metric = model(source) optimizer_class = optimizers.get(flags_obj.optimizer) optimizer = optimizer_class(learning_rate=flags_obj.learning_rate) global_step = tf.train.get_or_create_global_step() train_op = optimizer.minimize(loss, global_step=global_step) hooks = [] tensor_to_log = {'step': global_step, 'loss': loss, metric_name: metric} hooks.append( tf.train.LoggingTensorHook(tensor_to_log, every_n_iter=flags_obj.log_steps)) num_steps = int( (flags_obj.max_id + 1) // batch_size * flags_obj.num_epochs) hooks.append(tf.train.StopAtStepHook(last_step=num_steps)) if len(flags_obj.worker_hosts) == 0 or flags_obj.task_index == 1: hooks.append( tf.train.ProfilerHook(save_secs=180, output_dir=flags_obj.model_dir)) if len(flags_obj.worker_hosts): hooks.append(utils_hooks.SyncExitHook(len(flags_obj.worker_hosts))) if hasattr(model, 'make_session_run_hook'): hooks.append(model.make_session_run_hook()) with tf.train.MonitoredTrainingSession(master=master, is_chief=is_chief, checkpoint_dir=flags_obj.model_dir, log_step_count_steps=None, hooks=hooks) as sess: while not sess.should_stop(): sess.run(train_op)
def run_evaluate(model, flags_obj, master, is_chief): tf.random.set_random_seed(20191223) utils_context.training = False ckpt_dir = '{}/{}/{}_{}_{}/'.format(flags_obj.model_dir, flags_obj.model, '_'.join(map(str, flags_obj.fanouts)), flags_obj.dim, flags_obj.embedding_dim) id_file = ckpt_dir + "/id.txt" dataset = tf.data.TextLineDataset(id_file) if master: dataset = dataset.shard(len(flags_obj.worker_hosts), flags_obj.task_index) dataset = dataset.map( lambda id_str: tf.string_to_number(id_str, out_type=tf.int64)) dataset = dataset.batch(flags_obj.batch_size) source = dataset.make_one_shot_iterator().get_next() sim_outputs, cor_outputs = model(source) _, _, metric_name, sim_metric = sim_outputs _, _, ___________, cor_metric = cor_outputs tf.train.get_or_create_global_step() hooks = [] num_steps = int( (flags_obj.max_id + 1) // flags_obj.batch_size * flags_obj.num_epochs) hooks.append(tf.train.StopAtStepHook(last_step=num_steps)) if master: hooks.append(utils_hooks.SyncExitHook(len(flags_obj.worker_hosts))) batch_sim_metric_val, batch_cor_metric_val = [], [] with tf.train.MonitoredTrainingSession(master=master, is_chief=is_chief, checkpoint_dir=ckpt_dir, save_checkpoint_secs=None, log_step_count_steps=None, hooks=hooks, config=config) as sess: while not sess.should_stop(): sim_metric_val, cor_metric_val = sess.run([sim_metric, cor_metric]) batch_sim_metric_val.append(sim_metric_val) batch_cor_metric_val.append(cor_metric_val) sim_metric_val = np.mean(batch_sim_metric_val) cor_metric_val = np.mean(batch_cor_metric_val) print('sim_{}: {}'.format(metric_name, sim_metric_val)) print('cor_{}: {}'.format(metric_name, cor_metric_val))
def run_save_embedding(model, flags_obj, master, is_chief): utils_context.training = False dataset = tf.data.Dataset.range(flags_obj.max_id + 1) if master: dataset = dataset.shard(len(flags_obj.worker_hosts), flags_obj.task_index) dataset = dataset.batch(flags_obj.batch_size) source = dataset.make_one_shot_iterator().get_next() embedding, _, _, _ = model(source) tf.train.get_or_create_global_step() hooks = [] if master: hooks.append(utils_hooks.SyncExitHook(len(flags_obj.worker_hosts))) ids = [] embedding_vals = [] with tf.train.MonitoredTrainingSession(master=master, is_chief=is_chief, checkpoint_dir=flags_obj.model_dir, save_checkpoint_secs=None, log_step_count_steps=None, hooks=hooks, config=config) as sess: while not sess.should_stop(): try: id_, embedding_val = sess.run([source, embedding]) except tf.errors.OutOfRangeError as e: break ids.append(id_) embedding_vals.append(embedding_val) shard_index = 0 if len(ids) == flags_obj.incremental_emb_size: utils_embedding.embedding_save(master, ids, embedding_vals, flags_obj.task_index, shard_index, flags_obj.model_dir) ids = [] embedding_vals = [] shard_index = shard_index + 1 utils_embedding.embedding_save(master, ids, embedding_vals, flags_obj.task_index, shard_index, flags_obj.model_dir)
def run_save_embedding(model, flags_obj, master, is_chief): utils_context.training = False dataset = tf.data.Dataset.range(flags_obj.max_id + 1) if master: dataset = dataset.shard(len(flags_obj.worker_hosts), flags_obj.task_index) dataset = dataset.batch(flags_obj.batch_size) source = dataset.make_one_shot_iterator().get_next() embedding, _, _, _ = model(source) tf.train.get_or_create_global_step() hooks = [] if master: hooks.append(utils_hooks.SyncExitHook(len(flags_obj.worker_hosts))) ids = [] embedding_vals = [] with tf.train.MonitoredTrainingSession(master=master, is_chief=is_chief, checkpoint_dir=flags_obj.model_dir, save_checkpoint_secs=None, log_step_count_steps=None, hooks=hooks, config=config) as sess: while not sess.should_stop(): id_, embedding_val = sess.run([source, embedding]) ids.append(id_) embedding_vals.append(embedding_val) id_ = np.concatenate(ids) embedding_val = np.concatenate(embedding_vals) if master: embedding_filename = 'embedding_{}.npy'.format(flags_obj.task_index) id_filename = 'id_{}.txt'.format(flags_obj.task_index) else: embedding_filename = 'embedding.npy' id_filename = 'id.txt' embedding_filename = flags_obj.model_dir + '/' + embedding_filename id_filename = flags_obj.model_dir + '/' + id_filename with tf.gfile.GFile(embedding_filename, 'w') as embedding_file: np.save(embedding_file, embedding_val) with tf.gfile.GFile(id_filename, 'w') as id_file: id_file.write('\n'.join(map(str, id_)))
def run_save_embedding(model, flags_obj, master, is_chief): tf.random.set_random_seed(20191223) utils_context.training = False dataset = tf.data.Dataset.range(flags_obj.max_id + 1) if master: dataset = dataset.shard(len(flags_obj.worker_hosts), flags_obj.task_index) dataset = dataset.batch(flags_obj.batch_size) source = dataset.make_one_shot_iterator().get_next() sim_outputs, cor_outputs = model(source) sim_embedding, _, _, sim_metric = sim_outputs cor_embedding, _, _, cor_metric = cor_outputs tf.train.get_or_create_global_step() hooks = [] if master: hooks.append(utils_hooks.SyncExitHook(len(flags_obj.worker_hosts))) extra_param_name = '_'.join(map(str, flags_obj.fanouts)) model_dir = '{}/{}/{}_{}_{}/'.format(flags_obj.model_dir, flags_obj.model, extra_param_name, flags_obj.dim, flags_obj.embedding_dim) ids = [] sim_embedding_vals = [] cor_embedding_vals = [] sim_weight_vals = [] cor_weight_vals = [] with tf.train.MonitoredTrainingSession(master=master, is_chief=is_chief, checkpoint_dir=model_dir, save_checkpoint_secs=None, log_step_count_steps=None, hooks=hooks, config=config) as sess: while not sess.should_stop(): id_, sim_embedding_val, cor_embedding_val = sess.run( [source, sim_embedding, cor_embedding]) ids.append(id_) sim_embedding_val, sim_weight_val = sim_embedding_val[ 0], sim_embedding_val[1] cor_embedding_val, cor_weight_val = cor_embedding_val[ 0], cor_embedding_val[1] sim_embedding_vals.append(sim_embedding_val) cor_embedding_vals.append(cor_embedding_val) sim_weight_vals.append(sim_weight_val) cor_weight_vals.append(cor_weight_val) id_ = np.concatenate(ids) sim_embedding_val = np.concatenate(sim_embedding_vals) cor_embedding_val = np.concatenate(cor_embedding_vals) sim_weight_val = np.concatenate(sim_weight_vals) cor_weight_val = np.concatenate(cor_weight_vals) if master: sim_embedding_filename = 'sim_embedding_{}.npy'.format( flags_obj.task_index) cor_embedding_filename = 'cor_embedding_{}.npy'.format( flags_obj.task_index) sim_weight_filename = 'sim_weight_{}.npy'.format(flags_obj.task_index) cor_weight_filename = 'cor_weight_{}.npy'.format(flags_obj.task_index) id_filename = 'id_{}.txt'.format(flags_obj.task_index) else: sim_embedding_filename = 'sim_embedding.npy' cor_embedding_filename = 'cor_embedding.npy' sim_weight_filename = 'sim_weight.npy' cor_weight_filename = 'cor_weight.npy' id_filename = 'id.txt' sim_embedding_filename = model_dir + '/' + sim_embedding_filename cor_embedding_filename = model_dir + '/' + cor_embedding_filename sim_weight_filename = model_dir + '/' + sim_weight_filename cor_weight_filename = model_dir + '/' + cor_weight_filename id_filename = model_dir + '/' + id_filename with tf.gfile.GFile(sim_embedding_filename, 'w') as sim_embedding_file: np.save(sim_embedding_file, sim_embedding_val) with tf.gfile.GFile(cor_embedding_filename, 'w') as cor_embedding_file: np.save(cor_embedding_file, cor_embedding_val) with tf.gfile.GFile(sim_weight_filename, 'w') as sim_weight_file: np.save(sim_weight_file, sim_weight_val) with tf.gfile.GFile(cor_weight_filename, 'w') as cor_weight_file: np.save(cor_weight_file, cor_weight_val) with tf.gfile.GFile(id_filename, 'w') as id_file: id_file.write('\n'.join(map(str, id_)))