Python LearningRateGenerator Examples

Programming Language: Python

Namespace/Package Name: utils

Method/Function: LearningRateGenerator

Examples at hotexamples.com: 5

Python LearningRateGenerator - 5 examples found. These are the top rated real world Python examples of utils.LearningRateGenerator extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def train(dataset, vectors_path, lr_file, ckpt_dir, checkpoint, idx2vocab,
          vocab_unigrams, embedding_size, neg_sampled, distortion_power,
          batch_size, initial_learning_rate, decay_epochs, decay_rate,
          iter_epochs, allow_soft_placement, log_device_placement,
          gpu_memory_fraction, using_gpu, allow_growth, loss_interval,
          summary_steps, ckpt_interval, ckpt_epochs, summary_interval,
          decay_interval, train_workers):

    num_steps_per_epoch = int(dataset.num_examples / batch_size)
    iter_steps = iter_epochs * num_steps_per_epoch
    decay_steps = int(decay_epochs * num_steps_per_epoch)
    ckpt_steps = int(ckpt_epochs * num_steps_per_epoch)

    LR = utils.LearningRateGenerator(
        initial_learning_rate=initial_learning_rate,
        initial_steps=0,
        decay_rate=decay_rate,
        decay_steps=decay_steps)

    with tf.Graph().as_default(), tf.device(
            '/gpu:0' if using_gpu else '/cpu:0'):

        global_step = tf.Variable(0, trainable=False, name="global_step")

        inputs = tf.placeholder(tf.int32, shape=[batch_size], name='inputs')
        labels = tf.placeholder(tf.int32, shape=[batch_size], name='labels')
        learning_rate = tf.placeholder(tf.float32, name='learning_rate')

        model = Word2Vec(vocab_size=len(idx2vocab),
                         embedding_size=embedding_size,
                         vocab_unigrams=vocab_unigrams,
                         neg_sampled=neg_sampled,
                         distortion_power=distortion_power,
                         batch_size=batch_size)

        train_op, loss = model.train(inputs, labels, global_step,
                                     learning_rate)

        # Create a saver.
        saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=5)

        summary_op = tf.summary.merge_all()

        # Build an initialization operation to run below.
        init_op = tf.global_variables_initializer()

        # Start running operations on the Graph. allow_soft_placement must be set to
        # True to build towers on GPU, as some of the ops do not have GPU implementations.
        config = tf.ConfigProto(allow_soft_placement=allow_soft_placement,
                                log_device_placement=log_device_placement)
        config.gpu_options.per_process_gpu_memory_fraction = gpu_memory_fraction
        config.gpu_options.allow_growth = allow_growth
        # config.gpu_options.visible_device_list = visible_device_list

        with tf.Session(config=config) as sess:
            # first_step = 0
            if checkpoint == '0':  # new train
                sess.run(init_op)
            elif checkpoint == '-1':  # choose the latest one
                ckpt = tf.train.get_checkpoint_state(ckpt_dir)
                if ckpt and ckpt.model_checkpoint_path:
                    # new_saver = tf.train.import_meta_graph(ckpt.model_checkpoint_path + '.meta')
                    # Restores from checkpoint
                    saver.restore(sess, ckpt.model_checkpoint_path)
                    # global_step_for_restore = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
                    # first_step = int(global_step_for_restore) + 1
                else:
                    logger.warning('No checkpoint file found')
                    return
            else:
                if os.path.exists(
                        os.path.join(ckpt_dir,
                                     'model.ckpt-' + checkpoint + '.index')):
                    # new_saver = tf.train.import_meta_graph(
                    #     os.path.join(ckpt_dir, 'model.ckpt-' + checkpoint + '.meta'))
                    saver.restore(
                        sess, os.path.join(ckpt_dir,
                                           'model.ckpt-' + checkpoint))
                    # first_step = int(checkpoint) + 1
                else:
                    logger.warning(
                        'checkpoint {} not found'.format(checkpoint))
                    return

            summary_writer = tf.summary.FileWriter(ckpt_dir, sess.graph)

            ## train
            executor_workers = train_workers - 1
            if executor_workers > 0:
                executor = ThreadPoolExecutor(max_workers=executor_workers)
                for _ in range(executor_workers):
                    executor.submit(_train_thread_body, dataset, batch_size,
                                    inputs, labels, sess, train_op, iter_steps,
                                    global_step, learning_rate, LR)

            last_loss_time = time.time() - loss_interval
            last_summary_time = time.time() - summary_interval
            last_decay_time = last_checkpoint_time = time.time()
            last_decay_step = last_summary_step = last_checkpoint_step = 0
            while True:
                start_time = time.time()
                batch_data, batch_labels = dataset.next_batch(
                    batch_size, keep_strict_batching=True)
                feed_dict = {
                    inputs: batch_data,
                    labels: batch_labels,
                    learning_rate: LR.learning_rate
                }
                _, loss_value, cur_step = sess.run(
                    [train_op, loss, global_step], feed_dict=feed_dict)
                now = time.time()

                assert not np.isnan(
                    loss_value), 'Model diverged with loss = NaN'

                epoch, epoch_step = divmod(cur_step, num_steps_per_epoch)

                if now - last_loss_time >= loss_interval:
                    format_str = '%s: step=%d(%d/%d), lr=%.6f, loss=%.6f, duration/step=%.4fs'
                    logger.info(format_str %
                                (time.strftime('%Y-%m-%d %H:%M:%S',
                                               time.localtime(time.time())),
                                 cur_step, epoch_step, epoch, LR.learning_rate,
                                 loss_value, now - start_time))
                    last_loss_time = time.time()
                if now - last_summary_time >= summary_interval or cur_step - last_summary_step >= summary_steps or cur_step >= iter_steps:
                    summary_str = sess.run(summary_op, feed_dict=feed_dict)
                    summary_writer.add_summary(summary_str, cur_step)
                    last_summary_time = time.time()
                    last_summary_step = cur_step
                ckpted = False
                # Save the model checkpoint periodically. (named 'model.ckpt-global_step.meta')
                if now - last_checkpoint_time >= ckpt_interval or cur_step - last_checkpoint_step >= ckpt_steps or cur_step >= iter_steps:
                    checkpoint_path = os.path.join(ckpt_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step=cur_step)
                    # embedding_vectors = sess.run(model.vectors, feed_dict=feed_dict)
                    vecs, weights, biases = sess.run([
                        model.vectors, model.context_weights,
                        model.context_biases
                    ],
                                                     feed_dict=feed_dict)
                    save_word2vec_format(vectors_path, vecs, idx2vocab)
                    np.savetxt(vectors_path + ".contexts", weights)
                    np.savetxt(vectors_path + ".context_biases", biases)
                    last_checkpoint_time = time.time()
                    last_checkpoint_step = cur_step
                    ckpted = True
                # update learning rate
                if ckpted or now - last_decay_time >= decay_interval or cur_step - last_decay_step >= decay_steps:
                    lr_info = np.loadtxt(lr_file, dtype=float)
                    if np.abs(lr_info[1] - decay_epochs) >= 1e-7:
                        decay_epochs = lr_info[1]
                        decay_steps = int(decay_epochs * num_steps_per_epoch)
                    if np.abs(lr_info[2] - decay_rate) >= 1e-7:
                        decay_rate = lr_info[2]
                    if np.abs(lr_info[0] - initial_learning_rate) < 1e-7:
                        LR.exponential_decay(cur_step,
                                             decay_rate=decay_rate,
                                             decay_steps=decay_steps)
                    else:
                        initial_learning_rate = lr_info[0]
                        LR.reset(initial_learning_rate=initial_learning_rate,
                                 initial_steps=cur_step,
                                 decay_rate=decay_rate,
                                 decay_steps=decay_steps)
                    last_decay_time = time.time()
                    last_decay_step = cur_step

                if cur_step >= iter_steps:
                    break

Example #2

Show file

def train(net, vectors_path, lr_file, ckpt_dir, checkpoint, embedding_size,
          neg_sampled, order, distortion_power, iter_epochs, batch_size,
          initial_learning_rate, decay_epochs, decay_interval, decay_rate,
          allow_soft_placement, log_device_placement, gpu_memory_fraction,
          using_gpu, allow_growth, loss_interval, summary_steps,
          summary_interval, ckpt_epochs, ckpt_interval, train_workers):
    edge_sampler = Edge_sampler(net, batch_size)
    edges_size = edge_sampler.edges_size
    nodes_size = net.get_nodes_size()
    num_steps_per_epoch = int(edges_size / batch_size)
    iter_steps = round(
        iter_epochs *
        num_steps_per_epoch)  # iter_epochs should be big enough to converge.
    decay_steps = round(decay_epochs * num_steps_per_epoch)
    ckpt_steps = round(ckpt_epochs * num_steps_per_epoch)

    nodes_degrees = [net.get_degrees(v) for v in range(nodes_size)]

    LR = utils.LearningRateGenerator(
        initial_learning_rate=initial_learning_rate,
        initial_steps=0,
        decay_rate=decay_rate,
        decay_steps=decay_steps,
        iter_steps=iter_steps)

    with tf.Graph().as_default(), tf.device(
            '/gpu:0' if using_gpu else '/cpu:0'):

        inputs = tf.placeholder(tf.int32, shape=[batch_size], name='inputs')
        labels = tf.placeholder(tf.int32, shape=[batch_size], name='labels')
        learning_rate = tf.placeholder(tf.float32, name='learning_rate')

        model_list = []
        trains_list = []
        if order == "1":
            with tf.name_scope("1st_order"):
                model = SGNS(vocab_size=nodes_size,
                             embedding_size=embedding_size,
                             vocab_unigrams=nodes_degrees,
                             distortion_power=distortion_power,
                             neg_sampled=neg_sampled,
                             batch_size=batch_size,
                             order=1)
                global_step = tf.Variable(0,
                                          trainable=False,
                                          name="global_step")
            train_op, loss = model.train(inputs, labels, global_step,
                                         learning_rate)
            model_list.append(model)
            trains_list.append((train_op, loss, global_step))
        elif order == "2":
            with tf.name_scope("2st_order"):
                model = SGNS(vocab_size=nodes_size,
                             embedding_size=embedding_size,
                             vocab_unigrams=nodes_degrees,
                             distortion_power=distortion_power,
                             neg_sampled=neg_sampled,
                             batch_size=batch_size,
                             order=2)
                global_step = tf.Variable(0,
                                          trainable=False,
                                          name="global_step")
            train_op, loss = model.train(inputs, labels, global_step,
                                         learning_rate)
            model_list.append(model)
            trains_list.append((train_op, loss, global_step))
        elif order == "3":
            with tf.name_scope("1st_order"):
                model = SGNS(vocab_size=nodes_size,
                             embedding_size=embedding_size // 2,
                             vocab_unigrams=nodes_degrees,
                             distortion_power=distortion_power,
                             neg_sampled=neg_sampled,
                             batch_size=batch_size,
                             order=1)
                global_step = tf.Variable(0,
                                          trainable=False,
                                          name="global_step")
            train_op, loss = model.train(inputs, labels, global_step,
                                         learning_rate)
            model_list.append(model)
            trains_list.append((train_op, loss, global_step))
            with tf.name_scope("2st_order"):
                model = SGNS(vocab_size=nodes_size,
                             embedding_size=embedding_size // 2,
                             vocab_unigrams=nodes_degrees,
                             distortion_power=distortion_power,
                             neg_sampled=neg_sampled,
                             batch_size=batch_size,
                             order=2)
                global_step = tf.Variable(0,
                                          trainable=False,
                                          name="global_step")
            train_op, loss = model.train(inputs, labels, global_step,
                                         learning_rate)
            model_list.append(model)
            trains_list.append((train_op, loss, global_step))
        else:
            logger.error("unvalid order in LINE: '%s'. " % order)
            sys.exit()

        # Create a saver.
        saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=5)

        summary_op = tf.summary.merge_all()

        # Build an initialization operation to run below.
        init_op = tf.global_variables_initializer()

        # Start running operations on the Graph. allow_soft_placement must be set to
        # True to build towers on GPU, as some of the ops do not have GPU implementations.
        config = tf.ConfigProto(allow_soft_placement=allow_soft_placement,
                                log_device_placement=log_device_placement)
        config.gpu_options.per_process_gpu_memory_fraction = gpu_memory_fraction
        config.gpu_options.allow_growth = allow_growth
        # config.gpu_options.visible_device_list = visible_device_list

        with tf.Session(config=config) as sess:
            # first_step = 0
            if checkpoint == '0':  # new train
                sess.run(init_op)
            elif checkpoint == '-1':  # choose the latest one
                ckpt = tf.train.get_checkpoint_state(ckpt_dir)
                if ckpt and ckpt.model_checkpoint_path:
                    # new_saver = tf.train.import_meta_graph(ckpt.model_checkpoint_path + '.meta')
                    # Restores from checkpoint
                    saver.restore(sess, ckpt.model_checkpoint_path)
                    # global_step_for_restore = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
                    # first_step = int(global_step_for_restore) + 1
                else:
                    logger.warning('No checkpoint file found')
                    return
            else:
                if os.path.exists(
                        os.path.join(ckpt_dir,
                                     'model.ckpt-' + checkpoint + '.index')):
                    # new_saver = tf.train.import_meta_graph(
                    #     os.path.join(ckpt_dir, 'model.ckpt-' + checkpoint + '.meta'))
                    saver.restore(
                        sess, os.path.join(ckpt_dir,
                                           'model.ckpt-' + checkpoint))
                    # first_step = int(checkpoint) + 1
                else:
                    logger.warning(
                        'checkpoint {} not found'.format(checkpoint))
                    return

            summary_writer = tf.summary.FileWriter(ckpt_dir, sess.graph)

            ## train
            executor_workers = train_workers - 1
            if executor_workers > 0:
                futures = set()
                executor = ThreadPoolExecutor(max_workers=executor_workers)
                for _ in range(executor_workers):
                    future = executor.submit(_train_thread_body, edge_sampler,
                                             inputs, labels, sess, trains_list,
                                             learning_rate, LR)
                    logger.info("open a new training thread: %s" % future)
                    futures.add(future)
            last_loss_time = time.time() - loss_interval
            last_summary_time = time.time() - summary_interval
            last_decay_time = last_checkpoint_time = time.time()
            last_decay_step = last_summary_step = last_checkpoint_step = 0
            while True:
                start_time = time.time()
                batch_data, batch_labels = edge_sampler.next_batch()
                feed_dict = {
                    inputs: batch_data,
                    labels: batch_labels,
                    learning_rate: LR.learning_rate
                }
                loss_value_list = []
                for train_op, loss, global_step in trains_list:
                    _, loss_value, cur_step = sess.run(
                        [train_op, loss, global_step], feed_dict=feed_dict)
                    assert not np.isnan(
                        loss_value), 'Model diverged with loss = NaN'
                    loss_value_list.append(loss_value)
                now = time.time()

                epoch, epoch_step = divmod(cur_step, num_steps_per_epoch)

                if now - last_loss_time >= loss_interval:
                    if len(loss_value_list) == 1:
                        loss_str = "%.6f" % loss_value_list[0]
                    else:
                        loss_str = "[%.6f, %.6f]" % (loss_value_list[0],
                                                     loss_value_list[1])
                    format_str = '%s: step=%d(%d/%d), lr=%.6f, loss=%s, duration/step=%.4fs'
                    logger.info(format_str %
                                (time.strftime('%Y-%m-%d %H:%M:%S',
                                               time.localtime(time.time())),
                                 cur_step, epoch_step, epoch, LR.learning_rate,
                                 loss_str, now - start_time))
                    last_loss_time = time.time()
                if now - last_summary_time >= summary_interval or cur_step - last_summary_step >= summary_steps or cur_step >= iter_steps:
                    summary_str = sess.run(summary_op, feed_dict=feed_dict)
                    summary_writer.add_summary(summary_str, cur_step)
                    last_summary_time = time.time()
                    last_summary_step = cur_step
                ckpted = False
                # Save the model checkpoint periodically. (named 'model.ckpt-global_step.meta')
                if now - last_checkpoint_time >= ckpt_interval or cur_step - last_checkpoint_step >= ckpt_steps or cur_step >= iter_steps:
                    # embedding_vectors = sess.run(model.vectors, feed_dict=feed_dict)
                    vecs_list = []
                    for model in model_list:
                        vecs = sess.run(model.vectors, feed_dict=feed_dict)
                        vecs_list.append(vecs)
                    vecs = np.concatenate(vecs_list, axis=1)
                    checkpoint_path = os.path.join(ckpt_dir, 'model.ckpt')
                    utils.save_word2vec_format_and_ckpt(
                        vectors_path, vecs, checkpoint_path, sess, saver,
                        cur_step)
                    last_checkpoint_time = time.time()
                    last_checkpoint_step = cur_step
                    ckpted = True
                # update learning rate
                if ckpted or now - last_decay_time >= decay_interval or (
                        decay_steps > 0
                        and cur_step - last_decay_step >= decay_steps):
                    lr_info = np.loadtxt(lr_file, dtype=float)
                    if np.abs(lr_info[1] - decay_epochs) > 1e-6:
                        decay_epochs = lr_info[1]
                        decay_steps = round(decay_epochs * num_steps_per_epoch)
                    if np.abs(lr_info[2] - decay_rate) > 1e-6:
                        decay_rate = lr_info[2]
                    if np.abs(lr_info[3] - iter_epochs) > 1e-6:
                        iter_epochs = lr_info[3]
                        iter_steps = round(iter_epochs * num_steps_per_epoch)
                    if np.abs(lr_info[0] - initial_learning_rate) > 1e-6:
                        initial_learning_rate = lr_info[0]
                        LR.reset(initial_learning_rate=initial_learning_rate,
                                 initial_steps=cur_step,
                                 decay_rate=decay_rate,
                                 decay_steps=decay_steps,
                                 iter_steps=iter_steps)
                    else:
                        LR.exponential_decay(cur_step,
                                             decay_rate=decay_rate,
                                             decay_steps=decay_steps,
                                             iter_steps=iter_steps)
                    last_decay_time = time.time()
                    last_decay_step = cur_step
                if cur_step >= LR.iter_steps:
                    break

            summary_writer.close()
            if executor_workers > 0:
                logger.info("waiting the training threads finished:")
                try:
                    for future in as_completed(futures):
                        logger.info(future)
                except KeyboardInterrupt:
                    print("stopped by hand.")

Example #3

Show file

File: TF_gcn.py Project: RingBDStack/RWNE

def train(dataset, lr_file, ckpt_dir, checkpoint, options):
    nodes_size = dataset._nodes_size
    num_steps_per_epoch = int(nodes_size / options.batch_size)
    iter_epochs = options.iter_epoches
    iter_steps = round(
        iter_epochs *
        num_steps_per_epoch)  # iter_epoches should be big enough to converge.
    decay_epochs = options.decay_epochs
    decay_steps = round(decay_epochs * num_steps_per_epoch)
    ckpt_steps = round(options.ckpt_epochs * num_steps_per_epoch)
    initial_learning_rate = options.learning_rate
    decay_rate = options.decay_rate

    LR = utils.LearningRateGenerator(
        initial_learning_rate=initial_learning_rate,
        initial_steps=0,
        decay_rate=decay_rate,
        decay_steps=decay_steps,
        iter_steps=iter_steps)

    with tf.Graph().as_default(), tf.device(
            '/gpu:0' if options.using_gpu else '/cpu:0'):

        global_step = tf.Variable(0, trainable=False, name="global_step")
        learning_rate = tf.placeholder(tf.float32, name='learning_rate')
        inputs = tf.placeholder(tf.float32,
                                shape=[None, options.feature_size],
                                name='inputs')
        laplacian = tf.placeholder(tf.float32, [None, None],
                                   name="laplacian_matrix")
        if options.using_label:
            labels = tf.placeholder(tf.int32,
                                    shape=[None, options.label_size],
                                    name='labels')
        else:
            labels = tf.placeholder(tf.int32,
                                    shape=[None, None],
                                    name='adjacency')

        model = GCN(dropout=options.dropout,
                    feature_size=options.feature_size,
                    using_label=options.using_label,
                    embedding_size=options.embedding_size,
                    hidden_size_list=options.hidden_size_list,
                    label_size=options.label_size,
                    weight_decay=options.weight_decay)
        train_op, loss = model.train(inputs, laplacian, labels, global_step,
                                     learning_rate)

        # Create a saver.
        saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=6)

        summary_op = tf.summary.merge_all()

        # Build an initialization operation to run below.
        init_op = tf.global_variables_initializer()

        # Start running operations on the Graph. allow_soft_placement must be set to
        # True to build towers on GPU, as some of the ops do not have GPU implementations.
        config = tf.ConfigProto(
            allow_soft_placement=options.allow_soft_placement,
            log_device_placement=options.log_device_placement)
        config.gpu_options.per_process_gpu_memory_fraction = options.gpu_memory_fraction
        config.gpu_options.allow_growth = options.allow_growth
        # config.gpu_options.visible_device_list = visible_device_list

        with tf.Session(config=config) as sess:
            # first_step = 0
            if checkpoint == '0':  # new train
                sess.run(init_op)

            elif checkpoint == '-1':  # choose the latest one
                ckpt = tf.train.get_checkpoint_state(ckpt_dir)
                if ckpt and ckpt.model_checkpoint_path:
                    # new_saver = tf.train.import_meta_graph(ckpt.model_checkpoint_path + '.meta')
                    # Restores from checkpoint
                    saver.restore(sess, ckpt.model_checkpoint_path)
                    # global_step_for_restore = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
                    # first_step = int(global_step_for_restore) + 1
                else:
                    logger.warning('No checkpoint file found')
                    return
            else:
                if os.path.exists(
                        os.path.join(ckpt_dir,
                                     'model.ckpt-' + checkpoint + '.index')):
                    # new_saver = tf.train.import_meta_graph(
                    #     os.path.join(ckpt_dir, 'model.ckpt-' + checkpoint + '.meta'))
                    saver.restore(
                        sess, os.path.join(ckpt_dir,
                                           'model.ckpt-' + checkpoint))
                    # first_step = int(checkpoint) + 1
                else:
                    logger.warning(
                        'checkpoint {} not found'.format(checkpoint))
                    return

            summary_writer = tf.summary.FileWriter(ckpt_dir, sess.graph)

            last_loss_time = time.time() - options.loss_interval
            last_summary_time = time.time() - options.summary_interval
            last_decay_time = last_checkpoint_time = time.time()
            last_decay_step = last_summary_step = last_checkpoint_step = 0
            while True:
                start_time = time.time()
                batch_features, batch_adj, batch_labels = dataset.next_batch(
                    options.batch_size)
                feed_dict = {
                    inputs: batch_features,
                    laplacian: batch_adj,
                    labels: batch_labels,
                    learning_rate: LR.learning_rate
                }
                _, loss_value, cur_step = sess.run(
                    [train_op, loss, global_step], feed_dict=feed_dict)
                now = time.time()

                assert not np.isnan(
                    loss_value), 'Model diverged with loss = NaN'

                epoch, epoch_step = divmod(cur_step, num_steps_per_epoch)

                if now - last_loss_time >= options.loss_interval:
                    format_str = '%s: step=%d(%d/%d), lr=%.6f, loss=%.6f, duration/step=%.4fs'
                    logger.info(format_str %
                                (time.strftime('%Y-%m-%d %H:%M:%S',
                                               time.localtime(time.time())),
                                 cur_step, epoch_step, epoch, LR.learning_rate,
                                 loss_value, now - start_time))
                    last_loss_time = time.time()
                if now - last_summary_time >= options.summary_interval or cur_step - last_summary_step >= options.summary_steps or cur_step >= iter_steps:
                    summary_str = sess.run(summary_op, feed_dict=feed_dict)
                    summary_writer.add_summary(summary_str, cur_step)
                    last_summary_time = time.time()
                    last_summary_step = cur_step
                ckpted = False
                # Save the model checkpoint periodically. (named 'model.ckpt-global_step.meta')
                if now - last_checkpoint_time >= options.ckpt_interval or cur_step - last_checkpoint_step >= ckpt_steps or cur_step >= iter_steps:
                    if options.batch_size == nodes_size:
                        batch_features, batch_adj, batch_labels = dataset.get_full(
                        )
                        feed_dict = {
                            inputs: batch_features,
                            laplacian: batch_adj,
                            labels: batch_labels,
                            learning_rate: LR.learning_rate
                        }
                        vecs = sess.run(model.vectors, feed_dict=feed_dict)
                    else:
                        vecs = []
                        start = 0
                        while start < nodes_size:
                            end = min(nodes_size, start + options.batch_size)
                            index = np.arange(start, end)
                            start = end
                            batch_features, batch_adj, batch_labels = dataset.get_batch(
                                index)
                            feed_dict = {
                                inputs: batch_features,
                                laplacian: batch_adj,
                                labels: batch_labels,
                                learning_rate: LR.learning_rate
                            }
                            batch_embeddings = sess.run(model.vectors,
                                                        feed_dict=feed_dict)
                            vecs.append(batch_embeddings)
                        vecs = np.concatenate(vecs, axis=0)
                    checkpoint_path = os.path.join(ckpt_dir, 'model.ckpt')
                    utils.save_word2vec_format_and_ckpt(
                        options.vectors_path, vecs, checkpoint_path, sess,
                        saver, cur_step)
                    last_checkpoint_time = time.time()
                    last_checkpoint_step = cur_step
                    ckpted = True
                # update learning rate
                if ckpted or now - last_decay_time >= options.decay_interval or (
                        decay_steps > 0
                        and cur_step - last_decay_step >= decay_steps):
                    lr_info = np.loadtxt(lr_file, dtype=float)
                    if np.abs(lr_info[1] - decay_epochs) > 1e-6:
                        decay_epochs = lr_info[1]
                        decay_steps = round(decay_epochs * num_steps_per_epoch)
                    if np.abs(lr_info[2] - decay_rate) > 1e-6:
                        decay_rate = lr_info[2]
                    if np.abs(lr_info[3] - iter_epochs) > 1e-6:
                        iter_epochs = lr_info[3]
                        iter_steps = round(iter_epochs * num_steps_per_epoch)
                    if np.abs(lr_info[0] - initial_learning_rate) > 1e-6:
                        initial_learning_rate = lr_info[0]
                        LR.reset(initial_learning_rate=initial_learning_rate,
                                 initial_steps=cur_step,
                                 decay_rate=decay_rate,
                                 decay_steps=decay_steps,
                                 iter_steps=iter_steps)
                    else:
                        LR.exponential_decay(cur_step,
                                             decay_rate=decay_rate,
                                             decay_steps=decay_steps,
                                             iter_steps=iter_steps)
                    last_decay_time = time.time()
                    last_decay_step = cur_step
                if cur_step >= LR.iter_steps:
                    break
            summary_writer.close()

Example #4

Show file

def train(walker, lr_file, ckpt_dir, checkpoint, options):
    vocab_size = walker.nodes_size
    types_size = walker.node_types_size
    num_steps_per_epoch = int(
        vocab_size * options.train_workers /
        options.batch_size)  # a rough formula of epoch in RWR.???????????
    iter_epochs = options.iter_epoches
    iter_steps = round(
        iter_epochs *
        num_steps_per_epoch)  # iter_epoches should be big enough to converge.
    decay_epochs = options.decay_epochs
    decay_steps = round(decay_epochs * num_steps_per_epoch)
    ckpt_steps = round(options.ckpt_epochs * num_steps_per_epoch)
    initial_learning_rate = options.learning_rate
    decay_rate = options.decay_rate

    LR = utils.LearningRateGenerator(
        initial_learning_rate=initial_learning_rate,
        initial_steps=0,
        decay_rate=decay_rate,
        decay_steps=decay_steps,
        iter_steps=iter_steps)

    with tf.Graph().as_default(), tf.device(
            '/gpu:0' if options.using_gpu else '/cpu:0'):

        global_step = tf.Variable(0, trainable=False, name="global_step")
        # inputs(center_nodes), labels(context_nodes), labels_type(context_nodes_type), neg_labels(neg_nodes)
        inputs = tf.placeholder(tf.int32, name='inputs')  # center_nodes
        labels = [
            tf.placeholder(tf.int32,
                           shape=[None],
                           name='labels_T{}'.format(type_i))
            for type_i in range(types_size)
        ]
        labels_mask = [
            tf.placeholder(tf.float32, name='labels_mask_T{}'.format(type_i))
            for type_i in range(types_size)
        ]
        neg_labels = [
            tf.placeholder(tf.int32,
                           shape=[None],
                           name='neg_labels_T{}'.format(type_i))
            for type_i in range(types_size)
        ]
        learning_rate = tf.placeholder(tf.float32, name='learning_rate')

        model = SGNS(vocab_size=vocab_size,
                     embedding_size=options.embedding_size,
                     type_size=types_size)

        train_op, loss = model.train(inputs, labels, labels_mask, neg_labels,
                                     global_step, learning_rate)

        # Create a saver.
        saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=6)

        summary_op = tf.summary.merge_all()

        # Build an initialization operation to run below.
        init_op = tf.global_variables_initializer()

        # Start running operations on the Graph. allow_soft_placement must be set to
        # True to build towers on GPU, as some of the ops do not have GPU implementations.
        config = tf.ConfigProto(
            allow_soft_placement=options.allow_soft_placement,
            log_device_placement=options.log_device_placement)
        config.gpu_options.per_process_gpu_memory_fraction = options.gpu_memory_fraction
        config.gpu_options.allow_growth = options.allow_growth
        # config.gpu_options.visible_device_list = visible_device_list

        with tf.Session(config=config) as sess:
            # first_step = 0
            if checkpoint == '0':  # new train
                sess.run(init_op)

            elif checkpoint == '-1':  # choose the latest one
                ckpt = tf.train.get_checkpoint_state(ckpt_dir)
                if ckpt and ckpt.model_checkpoint_path:
                    # new_saver = tf.train.import_meta_graph(ckpt.model_checkpoint_path + '.meta')
                    # Restores from checkpoint
                    saver.restore(sess, ckpt.model_checkpoint_path)
                    # global_step_for_restore = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
                    # first_step = int(global_step_for_restore) + 1
                else:
                    logger.warning('No checkpoint file found')
                    return
            else:
                if os.path.exists(
                        os.path.join(ckpt_dir,
                                     'model.ckpt-' + checkpoint + '.index')):
                    # new_saver = tf.train.import_meta_graph(
                    #     os.path.join(ckpt_dir, 'model.ckpt-' + checkpoint + '.meta'))
                    saver.restore(
                        sess, os.path.join(ckpt_dir,
                                           'model.ckpt-' + checkpoint))
                    # first_step = int(checkpoint) + 1
                else:
                    logger.warning(
                        'checkpoint {} not found'.format(checkpoint))
                    return

            summary_writer = tf.summary.FileWriter(ckpt_dir, sess.graph)

            last_loss_time = time.time() - options.loss_interval
            last_summary_time = time.time() - options.summary_interval
            last_decay_time = last_checkpoint_time = time.time()
            last_decay_step = last_summary_step = last_checkpoint_step = 0
            rwrgenerator = RWRGenerator(walker=walker,
                                        walk_times=options.walk_times)
            while True:
                start_time = time.time()
                batch_inputs, batch_labels, batch_labels_mask, batch_neg_labels = rwrgenerator.next_batch(
                )
                feed_dict = {
                    inputs: batch_inputs,
                    learning_rate: LR.learning_rate
                }
                for type_i in range(types_size):
                    feed_dict[labels[type_i]] = batch_labels[type_i]
                    feed_dict[labels_mask[type_i]] = batch_labels_mask[type_i]
                    feed_dict[neg_labels[type_i]] = batch_neg_labels[type_i]
                _, loss_value, cur_step = sess.run(
                    [train_op, loss, global_step], feed_dict=feed_dict)
                now = time.time()

                assert not np.isnan(
                    loss_value), 'Model diverged with loss = NaN'

                epoch, epoch_step = divmod(cur_step, num_steps_per_epoch)

                if now - last_loss_time >= options.loss_interval:
                    format_str = '%s: step=%d(%d/%d), lr=%.6f, loss=%.6f, duration/step=%.4fs'
                    logger.info(format_str %
                                (time.strftime('%Y-%m-%d %H:%M:%S',
                                               time.localtime(time.time())),
                                 cur_step, epoch_step, epoch, LR.learning_rate,
                                 loss_value, now - start_time))
                    last_loss_time = time.time()
                if now - last_summary_time >= options.summary_interval or cur_step - last_summary_step >= options.summary_steps or cur_step >= iter_steps:
                    summary_str = sess.run(summary_op, feed_dict=feed_dict)
                    summary_writer.add_summary(summary_str, cur_step)
                    last_summary_time = time.time()
                    last_summary_step = cur_step
                ckpted = False
                # Save the model checkpoint periodically. (named 'model.ckpt-global_step.meta')
                if now - last_checkpoint_time >= options.ckpt_interval or cur_step - last_checkpoint_step >= ckpt_steps or cur_step >= iter_steps:
                    vecs, global_step_value = sess.run(
                        [model.vectors, global_step], feed_dict=feed_dict)
                    # vecs,weights,biases = sess.run([model.vectors,model.context_weights,model.context_biases],
                    #                              feed_dict=feed_dict)
                    checkpoint_path = os.path.join(ckpt_dir, 'model.ckpt')
                    utils.save_word2vec_format_and_ckpt(
                        options.vectors_path, vecs, checkpoint_path, sess,
                        saver, global_step_value, types_size)
                    # save_word2vec_format(vectors_path+".contexts", weights, walker.idx_nodes)
                    # save_word2vec_format(vectors_path+".context_biases", np.reshape(biases,[-1,1]), walker.idx_nodes)
                    last_checkpoint_time = time.time()
                    last_checkpoint_step = global_step_value
                    ckpted = True
                # update learning rate
                if ckpted or now - last_decay_time >= options.decay_interval or (
                        decay_steps > 0
                        and cur_step - last_decay_step >= decay_steps):
                    lr_info = np.loadtxt(lr_file, dtype=float)
                    if np.abs(lr_info[1] - decay_epochs) > 1e-6:
                        decay_epochs = lr_info[1]
                        decay_steps = round(decay_epochs * num_steps_per_epoch)
                    if np.abs(lr_info[2] - decay_rate) > 1e-6:
                        decay_rate = lr_info[2]
                    if np.abs(lr_info[3] - iter_epochs) > 1e-6:
                        iter_epochs = lr_info[3]
                        iter_steps = round(iter_epochs * num_steps_per_epoch)
                    if np.abs(lr_info[0] - initial_learning_rate) > 1e-6:
                        initial_learning_rate = lr_info[0]
                        LR.reset(initial_learning_rate=initial_learning_rate,
                                 initial_steps=cur_step,
                                 decay_rate=decay_rate,
                                 decay_steps=decay_steps,
                                 iter_steps=iter_steps)
                    else:
                        LR.exponential_decay(cur_step,
                                             decay_rate=decay_rate,
                                             decay_steps=decay_steps,
                                             iter_steps=iter_steps)
                    last_decay_time = time.time()
                    last_decay_step = cur_step
                if cur_step >= LR.iter_steps:
                    break

            summary_writer.close()

Example #5

Show file

def train(dataset,
          vectors_path,
          lr_file,
          ckpt_dir,
          checkpoint,
          embedding_size,
          struct,
          alpha,
          beta,
          gamma,
          reg,
          sparse_dot,
          iter_epochs,
          batch_size,
          initial_learning_rate,
          decay_epochs,
          decay_interval,
          decay_rate,
          allow_soft_placement,
          log_device_placement,
          gpu_memory_fraction,
          using_gpu,
          allow_growth,
          loss_interval,
          summary_steps,
          summary_interval,
          ckpt_epochs,
          ckpt_interval,
          dbn_initial,
          dbn_epochs,
          dbn_batchsize,
          dbn_learning_rate,
          active_function="sigmoid"):
    actv_func = {
        'sigmoid': tf.sigmoid,
        'tanh': tf.tanh,
        'relu': tf.nn.relu,
        'leaky_relu': tf.nn.leaky_relu
    }[active_function]
    nodes_size = dataset.nodes_size
    num_steps_per_epoch = int(nodes_size / batch_size)  #
    iter_steps = round(
        iter_epochs *
        num_steps_per_epoch)  # iter_epochs should be big enough to converge.
    decay_steps = round(decay_epochs * num_steps_per_epoch)
    ckpt_steps = round(ckpt_epochs * num_steps_per_epoch)

    LR = utils.LearningRateGenerator(
        initial_learning_rate=initial_learning_rate,
        initial_steps=0,
        decay_rate=decay_rate,
        decay_steps=decay_steps,
        iter_steps=iter_steps)

    with tf.Graph().as_default(), tf.device(
            '/gpu:0' if using_gpu else '/cpu:0'):

        global_step = tf.Variable(0, trainable=False, name="global_step")
        adj_matrix = tf.placeholder(tf.float32, [None, None])
        if sparse_dot:
            inputs_sp_indices = tf.placeholder(tf.int64)
            inputs_sp_ids_val = tf.placeholder(tf.float32)
            inputs_sp_shape = tf.placeholder(tf.int64)
            inputs = tf.SparseTensor(inputs_sp_indices, inputs_sp_ids_val,
                                     inputs_sp_shape)
        else:
            inputs = tf.placeholder(tf.float32, [None, nodes_size])
        learning_rate = tf.placeholder(tf.float32, name='learning_rate')

        model = SDNE(nodes_size=nodes_size,
                     struct=struct,
                     embedding_size=embedding_size,
                     alpha=alpha,
                     beta=beta,
                     gamma=gamma,
                     reg=reg,
                     sparse_dot=sparse_dot,
                     active_function=actv_func)

        train_op, loss, embeddings = model.train(inputs, adj_matrix,
                                                 global_step, learning_rate)

        # Create a saver.
        saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=5)

        summary_op = tf.summary.merge_all()

        # Build an initialization operation to run below.
        init_op = tf.global_variables_initializer()

        # Start running operations on the Graph. allow_soft_placement must be set to
        # True to build towers on GPU, as some of the ops do not have GPU implementations.
        config = tf.ConfigProto(allow_soft_placement=allow_soft_placement,
                                log_device_placement=log_device_placement)
        config.gpu_options.per_process_gpu_memory_fraction = gpu_memory_fraction
        config.gpu_options.allow_growth = allow_growth
        # config.gpu_options.visible_device_list = visible_device_list

        with tf.Session(config=config) as sess:
            # first_step = 0
            if checkpoint == '0':  # new train
                sess.run(init_op)
                if dbn_initial:
                    time_start = time.time()
                    logger.info("DBN initial start ...")
                    RBMs = []
                    for i in range(len(model._struct) - 1):
                        RBM = rbm(model._struct[i],
                                  model._struct[i + 1],
                                  batchsize=dbn_batchsize,
                                  learning_rate=dbn_learning_rate,
                                  config=config)
                        logger.info("create rbm {}-{}".format(
                            model._struct[i], model._struct[i + 1]))
                        RBMs.append(RBM)
                        for epoch in range(dbn_epochs):
                            error = 0
                            for batch in range(0, nodes_size, batch_size):
                                # 这句没动
                                # 它是遍历了全局的node？
                                mini_batch, _ = dataset.next_batch(batch_size)
                                for k in range(len(RBMs) - 1):
                                    mini_batch = RBMs[k].getH(mini_batch)
                                error += RBM.fit(mini_batch)
                            logger.info("rbm_" + str(len(RBMs)) + " epochs:" +
                                        str(epoch) + " error: " + str(error))

                        W, bv, bh = RBM.getWb()
                        name = "encoder" + str(i)

                        def assign(a, b, sessss):
                            op = a.assign(b)
                            sessss.run(op)

                        assign(model._weights[name], W, sess)
                        assign(model._bias[name], bh, sess)

                        name = "decoder" + str(len(model._struct) - i - 2)
                        assign(model._weights[name], W.transpose(), sess)
                        assign(model._bias[name], bv, sess)
                    logger.info(
                        "dbn_init finished in {}s.".format(time.time() -
                                                           time_start))

                vecs = []
                start = 0
                while start < nodes_size:
                    end = min(nodes_size, start + batch_size)
                    index = np.arange(start, end)
                    start = end
                    batch_input, batch_adj = dataset.get_batch(index)
                    if sparse_dot:
                        batch_input_ind = np.vstack(
                            np.where(batch_input)).astype(np.int64).T
                        batch_input_shape = np.array(batch_input.shape).astype(
                            np.int64)
                        batch_input_val = batch_input[np.where(batch_input)]
                        feed_dict = {
                            inputs_sp_indices: batch_input_ind,
                            inputs_sp_shape: batch_input_shape,
                            inputs_sp_ids_val: batch_input_val,
                            adj_matrix: batch_adj,
                            learning_rate: LR.learning_rate
                        }
                    else:
                        feed_dict = {
                            inputs: batch_input,
                            adj_matrix: batch_adj,
                            learning_rate: LR.learning_rate
                        }
                    batch_embeddings = sess.run(embeddings,
                                                feed_dict=feed_dict)
                    vecs.append(batch_embeddings)
                vecs = np.concatenate(vecs, axis=0)
                checkpoint_path = os.path.join(ckpt_dir, 'model.ckpt')
                utils.save_word2vec_format_and_ckpt(vectors_path, vecs,
                                                    checkpoint_path, sess,
                                                    saver, 0)

            elif checkpoint == '-1':  # load the latest one
                ckpt = tf.train.get_checkpoint_state(ckpt_dir)
                if ckpt and ckpt.model_checkpoint_path:
                    # new_saver = tf.train.import_meta_graph(ckpt.model_checkpoint_path + '.meta')
                    # Restores from checkpoint
                    saver.restore(sess, ckpt.model_checkpoint_path)
                    # global_step_for_restore = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
                    # first_step = int(global_step_for_restore) + 1
                else:
                    logger.warning('No checkpoint file found')
                    return
            else:
                if os.path.exists(
                        os.path.join(ckpt_dir,
                                     'model.ckpt-' + checkpoint + '.index')):
                    # new_saver = tf.train.import_meta_graph(
                    #     os.path.join(ckpt_dir, 'model.ckpt-' + checkpoint + '.meta'))
                    saver.restore(
                        sess, os.path.join(ckpt_dir,
                                           'model.ckpt-' + checkpoint))
                    # first_step = int(checkpoint) + 1
                else:
                    logger.warning(
                        'checkpoint {} not found'.format(checkpoint))
                    return

            summary_writer = tf.summary.FileWriter(ckpt_dir, sess.graph)

            ## train
            last_loss_time = time.time() - loss_interval
            last_summary_time = time.time() - summary_interval
            last_decay_time = last_checkpoint_time = time.time()
            last_decay_step = last_summary_step = last_checkpoint_step = 0
            while True:
                start_time = time.time()
                batch_input, batch_adj = dataset.next_batch(
                    batch_size, keep_strict_batching=True)
                if sparse_dot:
                    batch_input_ind = np.vstack(np.where(batch_input)).astype(
                        np.int64).T
                    batch_input_shape = np.array(batch_input.shape).astype(
                        np.int64)
                    batch_input_val = batch_input[np.where(batch_input)]
                    feed_dict = {
                        inputs_sp_indices: batch_input_ind,
                        inputs_sp_shape: batch_input_shape,
                        inputs_sp_ids_val: batch_input_val,
                        adj_matrix: batch_adj,
                        learning_rate: LR.learning_rate
                    }
                else:
                    feed_dict = {
                        inputs: batch_input,
                        adj_matrix: batch_adj,
                        learning_rate: LR.learning_rate
                    }

                _, loss_value, cur_step = sess.run(
                    [train_op, loss, global_step], feed_dict=feed_dict)
                now = time.time()

                assert not np.isnan(
                    loss_value), 'Model diverged with loss = NaN'

                epoch, epoch_step = divmod(cur_step, num_steps_per_epoch)

                if now - last_loss_time >= loss_interval:
                    format_str = '%s: step=%d(%d/%d), lr=%.6f, loss=%.6f, duration/step=%.4fs'
                    logger.info(format_str %
                                (time.strftime('%Y-%m-%d %H:%M:%S',
                                               time.localtime(time.time())),
                                 cur_step, epoch_step, epoch, LR.learning_rate,
                                 loss_value, now - start_time))
                    last_loss_time = time.time()
                if now - last_summary_time >= summary_interval or cur_step - last_summary_step >= summary_steps or cur_step >= iter_steps:
                    summary_str = sess.run(summary_op, feed_dict=feed_dict)
                    summary_writer.add_summary(summary_str, cur_step)
                    last_summary_time = time.time()
                    last_summary_step = cur_step
                ckpted = False
                # Save the model checkpoint periodically. (named 'model.ckpt-global_step.meta')
                if now - last_checkpoint_time >= ckpt_interval or cur_step - last_checkpoint_step >= ckpt_steps or cur_step >= iter_steps:
                    vecs = []
                    start = 0
                    while start < nodes_size:
                        end = min(nodes_size, start + batch_size)
                        index = np.arange(start, end)
                        start = end
                        batch_input, batch_adj = dataset.get_batch(index)
                        if sparse_dot:
                            batch_input_ind = np.vstack(
                                np.where(batch_input)).astype(np.int64).T
                            batch_input_shape = np.array(
                                batch_input.shape).astype(np.int64)
                            batch_input_val = batch_input[np.where(
                                batch_input)]
                            feed_dict = {
                                inputs_sp_indices: batch_input_ind,
                                inputs_sp_shape: batch_input_shape,
                                inputs_sp_ids_val: batch_input_val,
                                adj_matrix: batch_adj,
                                learning_rate: LR.learning_rate
                            }
                        else:
                            feed_dict = {
                                inputs: batch_input,
                                adj_matrix: batch_adj,
                                learning_rate: LR.learning_rate
                            }
                        batch_embeddings = sess.run(embeddings,
                                                    feed_dict=feed_dict)
                        vecs.append(batch_embeddings)
                    vecs = np.concatenate(vecs, axis=0)
                    checkpoint_path = os.path.join(ckpt_dir, 'model.ckpt')
                    utils.save_word2vec_format_and_ckpt(
                        vectors_path, vecs, checkpoint_path, sess, saver,
                        cur_step)
                    last_checkpoint_time = time.time()
                    last_checkpoint_step = cur_step
                    ckpted = True
                # update learning rate
                if ckpted or now - last_decay_time >= decay_interval or (
                        decay_steps > 0
                        and cur_step - last_decay_step >= decay_steps):
                    lr_info = np.loadtxt(lr_file, dtype=float)
                    if np.abs(lr_info[1] - decay_epochs) > 1e-6:
                        decay_epochs = lr_info[1]
                        decay_steps = round(decay_epochs * num_steps_per_epoch)
                    if np.abs(lr_info[2] - decay_rate) > 1e-6:
                        decay_rate = lr_info[2]
                    if np.abs(lr_info[3] - iter_epochs) > 1e-6:
                        iter_epochs = lr_info[3]
                        iter_steps = round(iter_epochs * num_steps_per_epoch)
                    if np.abs(lr_info[0] - initial_learning_rate) > 1e-6:
                        initial_learning_rate = lr_info[0]
                        LR.reset(initial_learning_rate=initial_learning_rate,
                                 initial_steps=cur_step,
                                 decay_rate=decay_rate,
                                 decay_steps=decay_steps,
                                 iter_steps=iter_steps)
                    else:
                        LR.exponential_decay(cur_step,
                                             decay_rate=decay_rate,
                                             decay_steps=decay_steps,
                                             iter_steps=iter_steps)
                    last_decay_time = time.time()
                    last_decay_step = cur_step
                if cur_step >= LR.iter_steps:
                    break