Beispiel #1
0
  def testPartitioners(self):
    partitioners = {
        "gamma": tf.fixed_size_partitioner(num_shards=2),
        "beta": tf.fixed_size_partitioner(num_shards=2),
    }

    inputs = tf.placeholder(tf.float32, shape=[None, 10])
    ln = snt.LayerNorm(partitioners=partitioners)
    self.assertEqual(ln.partitioners, partitioners)
    ln(inputs)

    self.assertEqual(type(ln.gamma), variables.PartitionedVariable)
    self.assertEqual(type(ln.beta), variables.PartitionedVariable)
Beispiel #2
0
  def testRecoverPartitionedVariableMap(self):
    with tf.variable_scope("test"):
      partitioner = tf.fixed_size_partitioner(3)
      tf.get_variable(
          initializer=tf.ones([11, 5]),
          name="partitioned_variable",
          partitioner=partitioner)
      tf.get_variable(
          initializer=tf.ones([11, 5]),
          name="normal_variable")

    all_vars = tf.global_variables()
    all_vars_dict = {var.op.name[5:]: var for var in all_vars}
    self.assertEqual(set(all_vars_dict.keys()), set([
        "partitioned_variable/part_0",
        "partitioned_variable/part_1",
        "partitioned_variable/part_2",
        "normal_variable"]))

    self.assertEqual(len(all_vars_dict), 4)
    var_map = native_module.recover_partitioned_variable_map(all_vars_dict)
    self.assertEqual(set(var_map.keys()), set([
        "partitioned_variable", "normal_variable"]))

    # Verify order of the partitioned variable list
    self.assertAllEqual(
        [v.op.name for v in var_map["partitioned_variable"]],
        [
            "test/partitioned_variable/part_0",
            "test/partitioned_variable/part_1",
            "test/partitioned_variable/part_2",
        ])
Beispiel #3
0
  def TestSuccess(self, connectivity, partitioning, fused, use_resource):
    params = {
        'trainable': True,
        'normalizer_fn': layers.batch_norm,
        'normalizer_params': {
            'scale': True,
            'fused': fused
        }
    }

    partitioner = tf.fixed_size_partitioner(2) if partitioning else None
    with tf.variable_scope(
        tf.get_variable_scope(),
        partitioner=partitioner,
        use_resource=use_resource):
      with tf.contrib.framework.arg_scope(
          [layers.conv2d, layers.separable_conv2d], **params):
        build_model()

    sess = tf.Session()
    saver = tf.train.Saver()
    saver.restore(sess, os.path.join(FLAGS.test_tmpdir, CKPT_FILE_NAME))
    mapper = self.createMapper(connectivity)
    conv = get_op('conv1/Conv2D')
    sep_conv = get_op('sep_conv/separable_conv2d')
    with sess.as_default():
      self.assertAllClose(CONV1_GAMMA, mapper.get_gamma(conv).eval())
      self.assertAllClose(SEP_CONV_GAMMA, mapper.get_gamma(sep_conv).eval())
Beispiel #4
0
 def testNoBatchNorm(self, connectivity, partitioning):
   partitioner = tf.fixed_size_partitioner(2) if partitioning else None
   with tf.variable_scope(
       tf.get_variable_scope(), partitioner=partitioner):
     build_model()
   mapper = self.createMapper(connectivity)
   conv = get_op('conv1/Conv2D')
   self.assertEqual(None, mapper.get_gamma(conv))
Beispiel #5
0
  def testPartitioners(self):
    if tf.executing_eagerly():
      self.skipTest("Partitioned variables are not supported in eager mode.")

    inputs = tf.ones(
        dtype=tf.float32, shape=[self.batch_size, self.in_size])
    prev_state = tf.ones(
        dtype=tf.float32, shape=[self.batch_size, self.hidden_size])

    with self.assertRaisesRegexp(KeyError, "Invalid partitioner keys.*"):
      snt.VanillaRNN(name="rnn",
                     hidden_size=self.hidden_size,
                     partitioners={"invalid": None})

    err = "Partitioner for 'w' is not a callable function"
    with self.assertRaisesRegexp(TypeError, err):
      snt.VanillaRNN(name="rnn",
                     hidden_size=self.hidden_size,
                     partitioners={"in_to_hidden": {"w": tf.zeros([10, 10])}})

    # Nested partitioners.
    valid_partitioners = {
        "in_to_hidden": {
            "w": tf.fixed_size_partitioner(num_shards=2),
            "b": tf.fixed_size_partitioner(num_shards=2),
        },
        "hidden_to_hidden": {
            "w": tf.fixed_size_partitioner(num_shards=2),
            "b": tf.fixed_size_partitioner(num_shards=2),
        }
    }

    vanilla_rnn = snt.VanillaRNN(name="rnn",
                                 hidden_size=self.hidden_size,
                                 partitioners=valid_partitioners)

    vanilla_rnn(inputs, prev_state)

    self.assertEqual(type(vanilla_rnn.in_to_hidden_linear.w),
                     variables.PartitionedVariable)
    self.assertEqual(type(vanilla_rnn.in_to_hidden_linear.b),
                     variables.PartitionedVariable)
    self.assertEqual(type(vanilla_rnn.hidden_to_hidden_linear.w),
                     variables.PartitionedVariable)
    self.assertEqual(type(vanilla_rnn.hidden_to_hidden_linear.b),
                     variables.PartitionedVariable)
Beispiel #6
0
  def testPartitioners(self, offset, scale):
    partitioners = {}

    if scale:
      partitioners["gamma"] = tf.fixed_size_partitioner(num_shards=2)
    if offset:
      partitioners["beta"] = tf.fixed_size_partitioner(num_shards=2)

    inputs_shape = [10, 10]
    inputs = tf.placeholder(tf.float32, shape=[None] + inputs_shape)
    bn = snt.BatchNorm(offset=offset, scale=scale, partitioners=partitioners)
    self.assertEqual(bn.partitioners, partitioners)
    bn(inputs, is_training=True)

    if scale:
      self.assertEqual(type(bn.gamma), variables.PartitionedVariable)
    if offset:
      self.assertEqual(type(bn.beta), variables.PartitionedVariable)
 def testFixedSizePartitioner(self):
   with self.test_session():
     partitioner = tf.fixed_size_partitioner(5, axis=0)
     with tf.variable_scope("root", partitioner=partitioner):
       v0 = tf.get_variable("v0", dtype=tf.float32, shape=(10, 10))
       v0_list = v0._get_variable_list()
       v0_part = v0._get_partitions()
       self.assertEqual(len(v0_list), 5)
       self.assertAllEqual(v0_part, (5, 1))
Beispiel #8
0
 def module_fn():
   """A module summing one normal and one partitioned variable."""
   partitioner = tf.fixed_size_partitioner(partitions)
   var_1 = tf.get_variable(
       initializer=tf.ones(shape),
       name="partitioned_variable",
       partitioner=partitioner)
   var_2 = tf.get_variable(
       initializer=tf.ones(shape), name="normal_variable")
   hub.add_signature(outputs=var_1 + var_2)
def module_with_variables():
  tf.get_variable(
      name="weights",
      shape=[3],
      initializer=tf.zeros_initializer())
  tf.get_variable(
      name="partition",
      shape=[4],
      initializer=tf.zeros_initializer(),
      partitioner=tf.fixed_size_partitioner(3))
Beispiel #10
0
def module_with_variables():
  tf.get_variable(
      name="weights",
      shape=[3],
      initializer=tf.zeros_initializer())
  tf.get_variable(
      name="partition",
      shape=[4],
      initializer=tf.zeros_initializer(),
      partitioner=tf.fixed_size_partitioner(3))
  hub.add_signature(outputs=tf.constant(1.0))
Beispiel #11
0
  def testPartitioners(self):
    partitioners = {
        "w": tf.fixed_size_partitioner(num_shards=2),
        "b": tf.fixed_size_partitioner(num_shards=2),
    }

    alex_net = snt.nets.AlexNetMini(
        partitioners=partitioners, name="alexnet1")

    input_shape = [alex_net._min_size, alex_net._min_size, 3]
    inputs = tf.placeholder(tf.float32, shape=[None] + input_shape)
    alex_net(inputs)

    for conv_module in alex_net.conv_modules:
      self.assertEqual(type(conv_module.w), variables.PartitionedVariable)
      self.assertEqual(type(conv_module.b), variables.PartitionedVariable)

    for linear_module in alex_net.linear_modules:
      self.assertEqual(type(linear_module.w), variables.PartitionedVariable)
      self.assertEqual(type(linear_module.b), variables.PartitionedVariable)
  def testPartitioners(self, offset, scale):
    partitioners = {}

    if scale:
      partitioners["gamma"] = tf.fixed_size_partitioner(num_shards=2)
    if offset:
      partitioners["beta"] = tf.fixed_size_partitioner(num_shards=2)

    inputs_shape = [10, 10]
    inputs = tf.placeholder(tf.float32, shape=[None] + inputs_shape)
    bn = snt.BatchNormV2(
        offset=offset,
        scale=scale,
        partitioners=partitioners)
    self.assertEqual(bn.partitioners, partitioners)
    bn(inputs, is_training=True)

    if scale:
      self.assertLen(tf.global_variables("batch_norm/gamma"), 2)
    if offset:
      self.assertLen(tf.global_variables("batch_norm/beta"), 2)
Beispiel #13
0
  def setUp(self):
    super(MLPTest, self).setUp()

    self.output_sizes = [11, 13, 17]
    self.batch_size = 5
    self.input_size = 7
    self.module_name = "mlp"
    self.initializers = {
        "w": tf.truncated_normal_initializer(stddev=1.0),
    }
    self.regularizers = {
        "w": tf.contrib.layers.l1_regularizer(scale=0.1),
    }
    self.partitioners = {
        "w": tf.fixed_size_partitioner(num_shards=2),
    }
 def test_return_all_variables_from_checkpoint_with_partition(self):
   with tf.Graph().as_default():
     partitioner = tf.fixed_size_partitioner(2)
     variables = [
         tf.get_variable(
             name='weights', shape=(2, 2), partitioner=partitioner),
         tf.Variable([1.0, 2.0], name='biases')
     ]
     checkpoint_path = os.path.join(self.get_temp_dir(), 'model.ckpt')
     init_op = tf.global_variables_initializer()
     saver = tf.train.Saver(variables)
     with self.test_session() as sess:
       sess.run(init_op)
       saver.save(sess, checkpoint_path)
     out_variables = variables_helper.get_variables_available_in_checkpoint(
         variables, checkpoint_path)
   self.assertItemsEqual(out_variables, variables)
Beispiel #15
0
  def testInvalidDicts(self):
    batch_size = 3
    # Mistake seen in the wild - https://github.com/deepmind/sonnet/issues/74
    # Should actually be {'hidden_to_hidden': {'w': some_initializers(), ...}}
    initializers = {"hidden_to_hidden": tf.truncated_normal_initializer(0, 1)}
    vanilla_rnn = snt.VanillaRNN(hidden_size=23, initializers=initializers)
    with self.assertRaisesRegexp(TypeError, "Expected a dict"):
      vanilla_rnn(tf.zeros([batch_size, 4], dtype=tf.float32),
                  vanilla_rnn.zero_state(batch_size, dtype=tf.float32))

    # Error: should be a dict mapping strings to partitioners/regularizers.
    partitioners = tf.fixed_size_partitioner(num_shards=16)
    with self.assertRaisesRegexp(TypeError, "Expected a dict"):
      snt.LSTM(hidden_size=42, partitioners=partitioners)

    regularizers = tf.contrib.layers.l1_regularizer(scale=0.5)
    with self.assertRaisesRegexp(TypeError, "Expected a dict"):
      snt.GRU(hidden_size=108, regularizers=regularizers)
  def testConcatOpGetRegularizer(self, use_batch_norm, use_partitioner):
    sc = self._batch_norm_scope() if use_batch_norm else []
    partitioner = tf.fixed_size_partitioner(2) if use_partitioner else None
    with tf.contrib.framework.arg_scope(sc):
      with tf.variable_scope(tf.get_variable_scope(), partitioner=partitioner):
        final_op = op_regularizer_stub.build_model()
    op_reg_manager = orm.OpRegularizerManager([final_op],
                                              op_regularizer_stub.MOCK_REG_DICT)
    expected_alive = op_regularizer_stub.expected_alive()

    expected = np.logical_or(expected_alive['conv4'],
                             expected_alive['concat'])
    with self.test_session():
      conv_reg = op_reg_manager.get_regularizer(_get_op('conv4/Conv2D'))
      self.assertAllEqual(expected, conv_reg.alive_vector.eval())

      relu_reg = op_reg_manager.get_regularizer(_get_op('conv4/Relu'))
      self.assertAllEqual(expected, relu_reg.alive_vector.eval())
  def testSimpleOpGetRegularizer(self, use_batch_norm, use_partitioner, scope):
    # Tests the alive patern of the conv and relu ops.
    # use_batch_norm: A Boolean. Inidcats if batch norm should be used.
    # use_partitioner: A Boolean. Inidcats if a fixed_size_partitioner should be
    #   used.
    # scope: A String. with the scope to test.
    sc = self._batch_norm_scope() if use_batch_norm else []
    partitioner = tf.fixed_size_partitioner(2) if use_partitioner else None
    with tf.contrib.framework.arg_scope(sc):
      with tf.variable_scope(tf.get_variable_scope(), partitioner=partitioner):
        final_op = op_regularizer_stub.build_model()

    op_reg_manager = orm.OpRegularizerManager([final_op],
                                              op_regularizer_stub.MOCK_REG_DICT)
    expected_alive = op_regularizer_stub.expected_alive()
    with self.test_session():
      conv_reg = op_reg_manager.get_regularizer(_get_op(scope + '/Conv2D'))
      self.assertAllEqual(expected_alive[scope],
                          conv_reg.alive_vector.eval())

      relu_reg = op_reg_manager.get_regularizer(_get_op(scope +  '/Relu'))
      self.assertAllEqual(expected_alive[scope],
                          relu_reg.alive_vector.eval())
Beispiel #18
0
    def build_embedding(params, num_shards):
        feature_conf = params['feature_conf']
        feature_list_conf = params['feature_list']
        feature_list = [
            feature_list_conf[key]
            for key in sorted(feature_list_conf, reverse=False)
        ]
        model_conf = params['model_conf']
        vocabulary_conf = params['vocabulary_conf']
        embed_dim = model_conf['embed_dim']
        first_order = int(model_conf['first_order'])
        partitioner = tf.fixed_size_partitioner(
            num_shards) if num_shards > 1 else None

        table = OrderedDict()
        sparse = []
        deep = OrderedDict()
        multi = OrderedDict()
        model_struct = defaultdict(list)
        numeric = []
        dense = []
        dense_tag = []
        wide_dim, deep_dim, deep_num, cate_num, con_num, all_num, con_deep_num, con_bias_num = 0, 0, 0, 0, 0, 0, 0, 0
        for feature in feature_list:
            if not feature in feature_conf:
                continue
            conf = feature_conf[feature]
            if conf['ignore']:
                continue
            f_type, f_tran, f_param = conf['type'], conf['transform'], conf[
                'parameter']
            if 'group' in conf:
                for struct in conf['group']:
                    model_struct[struct].append(feature)
            f_multi = conf['multi'] if 'multi' in conf else {
                'num': 1,
                'same': True,
                'combiner': 'none'
            }
            feature_name = f_param['name'] if 'name' in f_param else feature
            feature_embed_dim = f_param[
                'embed_dim'] if 'embed_dim' in f_param else embed_dim
            feature_scope = f_param[
                'scope'] if 'scope' in f_param else 'embedding'

            with tf.variable_scope(feature_scope,
                                   reuse=tf.AUTO_REUSE,
                                   partitioner=partitioner) as scope:
                if f_type == 'category':
                    f_num, combiner = f_multi['num'], f_multi['combiner']
                    default_value = f_param[
                        'default'] if 'default' in f_param else 0
                    if combiner != 'none' and f_num >= 1:
                        f_num = 1
                    if f_tran == 'vocabulary_list':
                        vocabulary = vocabulary_conf[feature]
                        vocabulary = ['DEFAULT'] + vocabulary
                        table.update({
                            feature:
                            lookup.index_table_from_tensor(
                                mapping=tf.constant(vocabulary),
                                default_value=default_value)
                        })
                        f_dim = len(vocabulary) * f_num
                        f_size = len(vocabulary)
                        fill_value = f_param[
                            'fill'] if 'fill' in f_param else ''  #'DEFAULT'
                    elif f_tran == 'tabled':
                        f_dim = f_param['size'] * f_num
                        f_size = f_param['size']
                        fill_value = f_param[
                            'fill'] if 'fill' in f_param else ''  #'0'
                    else:
                        assert False, 'only support category features with vocabulary or tabled'

                    if 'onehot' in conf['style']:
                        sparse.append(feature)
                        if f_num >= 1:
                            wide_dim += f_dim * f_num
                        else:
                            wide_dim += f_dim * (-f_num)
                    if 'embedding' in conf['style']:
                        deep.update({
                            feature:
                            tf.get_variable(
                                initializer=tf.random.normal(
                                    [f_size, feature_embed_dim + first_order],
                                    0.0, 0.1),
                                name='{}_embedding'.format(feature_name))
                        })

                        if f_num >= 1:
                            deep_dim += (feature_embed_dim +
                                         first_order) * f_num
                            deep_num += f_num
                        else:
                            deep_dim += (feature_embed_dim +
                                         first_order) * (-f_num)
                            deep_num += -f_num

                    f_multi['same'] = False
                    dense_tag += [0] * abs(f_num)
                    cate_num += abs(f_num)
                    all_num += abs(f_num)
                    tail_value = f_param[
                        'tail'] if 'tail' in f_param else f_size

                elif f_type == 'numeric':
                    f_size = 1
                    f_num = f_multi['num']
                    numeric.append(feature)
                    if 'value' in conf['style']:
                        dense.append(feature)
                    if 'embedding' in conf['style']:
                        if f_num >= 1:
                            if f_multi['combiner'] == 'none':
                                if f_multi['same']:
                                    deep.update({
                                        feature:
                                        tf.get_variable(
                                            initializer=tf.random.normal(
                                                [1, embed_dim + first_order],
                                                0.0, 0.1),
                                            name='{}'.format(feature_name))
                                    })
                                else:
                                    deep.update({
                                        feature:
                                        tf.get_variable(
                                            initializer=tf.random.normal([
                                                f_num, embed_dim + first_order
                                            ], 0.0, 0.1),
                                            name='{}'.format(feature_name))
                                    })
                                con_num += f_num
                                all_num += f_num
                                con_deep_num += f_num
                                deep_num += f_num
                                deep_dim += (embed_dim + first_order) * f_num
                                dense_tag += [1] * f_num
                            else:
                                con_num += 1
                                all_num += 1
                                if f_multi['same']:
                                    deep.update({
                                        feature:
                                        tf.get_variable(
                                            initializer=tf.random.normal(
                                                [1, embed_dim + first_order],
                                                0.0, 0.1),
                                            name='{}'.format(feature_name))
                                    })
                                else:
                                    deep.update({
                                        feature:
                                        tf.get_variable(
                                            initializer=tf.random.normal([
                                                f_num, embed_dim + first_order
                                            ], 0.0, 0.1),
                                            name='{}'.format(feature_name))
                                    })
                                con_deep_num += 1
                                deep_num += 1
                                deep_dim += embed_dim + first_order
                                dense_tag.append(1)

                        else:
                            if f_multi['combiner'] == 'none':
                                f_num = -f_num
                                if f_multi['same']:
                                    deep.update({
                                        feature:
                                        tf.get_variable(
                                            initializer=tf.random.normal(
                                                [1, embed_dim + first_order],
                                                0.0, 0.1),
                                            name='{}_embedding'.format(
                                                feature_name))
                                    })
                                else:
                                    deep.update({
                                        feature:
                                        tf.get_variable(
                                            initializer=tf.random.normal([
                                                f_num, embed_dim + first_order
                                            ], 0.0, 0.1),
                                            name='{}_embedding'.format(
                                                feature))
                                    })
                                con_num += f_num
                                all_num += f_num
                                con_deep_num += f_num
                                deep_num += f_num
                                deep_dim += (embed_dim + first_order) * f_num
                                dense_tag += [1] * f_num
                            else:
                                f_num -= f_num
                                con_num += 1
                                all_num += 1
                                if f_multi['same']:
                                    deep.update({
                                        feature:
                                        tf.get_variable(
                                            initializer=tf.random.normal(
                                                [1, embed_dim + first_order],
                                                0.0, 0.1),
                                            name='{}_embedding'.format(
                                                feature))
                                    })
                                else:
                                    deep.update({
                                        feature:
                                        tf.get_variable(
                                            initializer=tf.random.normal([
                                                f_num, embed_dim + first_order
                                            ], 0.0, 0.1),
                                            name='{}_embedding'.format(
                                                feature))
                                    })
                                con_deep_num += 1
                                deep_num += 1
                                deep_dim += embed_dim + first_order
                                dense_tag.append(1)

                    f_dim = -1
                    default_value = 0
                    fill_value = f_param[
                        'fill'] if 'fill' in f_param else ''  #'0'
                    tail_value = f_param['tail'] if 'tail' in f_param else 0
                else:
                    assert False, "cant't handle this type now: {}".format(
                        f_type)
                multi.update({
                    feature:
                    (f_type, f_multi['num'], f_size, f_multi['combiner'],
                     f_multi['same'], default_value, fill_value, tail_value)
                })

        dims = {
            'deep_num': deep_num,
            'deep_dim': deep_dim,
            'wide_dim': wide_dim,
            'con_num': con_num,
            'cate_num': cate_num,
            's_embed_size': embed_dim,
            'cate_deep_num': deep_num - con_deep_num,
            'd_embed_size': embed_dim,
            'all_num': all_num,
            'dense_tag': dense_tag,
            'con_deep_num': con_deep_num
        }
        columns = {
            'table': table,
            'sparse': sparse,
            'deep': deep,
            'dense': dense,
            'numeric': numeric,
            'dense_tag': dense_tag,
            'multi': multi
        }
        # dense_tag = tf.constant(dense_tag)
        return model_struct, columns, dims
Beispiel #19
0
def train():
    ps_hosts = FLAGS.ps_hosts.split(',')
    worker_hosts = FLAGS.worker_hosts.split(',')
    print('PS hosts are: %s' % ps_hosts)
    print('Worker hosts are: %s' % worker_hosts)
    server = tf.train.Server({
        'ps': ps_hosts,
        'worker': worker_hosts
    },
                             job_name=FLAGS.job_name,
                             task_index=FLAGS.task_id)
    if FLAGS.job_name == 'ps':
        server.join()
    is_chief = (FLAGS.task_id == 0)
    if is_chief:
        if tf.gfile.Exists(FLAGS.train_dir):
            tf.gfile.DeleteRecursively(FLAGS.train_dir)
        tf.gfile.MakeDirs(FLAGS.train_dir)

    device_setter = tf.train.replica_device_setter(ps_tasks=len(ps_hosts))
    with tf.device('/job:worker/task:%d' % FLAGS.task_id):
        partitioner = tf.fixed_size_partitioner(len(ps_hosts), axis=0)
        with tf.variable_scope('partitioned_space', partitioner=partitioner):
            with tf.device(device_setter):
                global_step = tf.Variable(0, trainable=False)
                decay_steps = 50000 * 350.0 / FLAGS.batch_size
                batch_size = tf.placeholder(dtype=tf.int32,
                                            shape=(),
                                            name='batch_size')
                images, labels = cifar10.distorted_inputs(batch_size)
                inputs = tf.reshape(images, [-1, _HEIGHT, _WIDTH, _DEPTH])
                labels = tf.one_hot(labels, 10, 1, 0)

                #  	    network_fn = nets_factory.get_network_fn('alexnet_v2',num_classes=10)
                # 	    (logits,_) = network_fn(inputs)
                #            with slim.arg_scope(alexnet.alexnet_v2_arg_scope(weight_decay=0.0)):
                (logits, _) = alexnet.alexnet_v2(inputs,
                                                 num_classes=10,
                                                 is_training=True)

                cross_entropy = tf.losses.softmax_cross_entropy(
                    logits=logits, onehot_labels=labels)
                loss = cross_entropy + _WEIGHT_DECAY * tf.add_n(
                    [tf.nn.l2_loss(v) for v in tf.trainable_variables()])

                # Decay the learning rate exponentially based on the number of steps.
                lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE *
                                                len(worker_hosts),
                                                global_step,
                                                decay_steps,
                                                LEARNING_RATE_DECAY_FACTOR,
                                                staircase=True)
                opt = tf.train.GradientDescentOptimizer(lr)
                # Track the moving averages of all trainable variables.
                exp_moving_averager = tf.train.ExponentialMovingAverage(
                    MOVING_AVERAGE_DECAY, global_step)
                variables_to_average = (tf.trainable_variables() +
                                        tf.moving_average_variables())
                opt = tf.train.SyncReplicasOptimizer(
                    opt,
                    replicas_to_aggregate=len(worker_hosts),
                    total_num_replicas=len(worker_hosts),
                    variable_averages=exp_moving_averager,
                    variables_to_average=variables_to_average)
                naive_grads = opt.compute_gradients(loss)
                grads = [(tf.scalar_mul(
                    tf.cast(batch_size / FLAGS.batch_size, tf.float32),
                    grad), var) for grad, var in naive_grads]
                apply_gradients_op = opt.apply_gradients(
                    grads, global_step=global_step)
                with tf.control_dependencies([apply_gradients_op]):
                    train_op = tf.identity(loss, name='train_op')

                chief_queue_runners = [opt.get_chief_queue_runner()]
                init_tokens_op = opt.get_init_tokens_op()
                saver = tf.train.Saver()
                sv = tf.train.Supervisor(is_chief=is_chief,
                                         logdir=FLAGS.train_dir,
                                         init_op=tf.group(
                                             tf.global_variables_initializer(),
                                             tf.local_variables_initializer()),
                                         summary_op=None,
                                         global_step=global_step,
                                         saver=saver,
                                         recovery_wait_secs=1,
                                         save_model_secs=60)

                tf.logging.info('%s Supervisor' % datetime.now())
                sess_config = tf.ConfigProto(
                    allow_soft_placement=True,
                    log_device_placement=FLAGS.log_device_placement)
                sess_config.gpu_options.allow_growth = True
                sess = sv.prepare_or_wait_for_session(server.target,
                                                      config=sess_config)
                queue_runners = tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS)
                sv.start_queue_runners(sess, queue_runners)

                sv.start_queue_runners(sess, chief_queue_runners)
                sess.run(init_tokens_op)
                """Train CIFAR-10 for a number of steps."""
                time0 = time.time()
                batch_size_num = FLAGS.batch_size
                for step in range(FLAGS.max_steps):
                    start_time = time.time()
                    run_options = tf.RunOptions(
                        trace_level=tf.RunOptions.FULL_TRACE)
                    run_metadata = tf.RunMetadata()
                    num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / batch_size_num
                    decay_steps_num = int(num_batches_per_epoch *
                                          NUM_EPOCHS_PER_DECAY)
                    _, loss_value, gs = sess.run(
                        [train_op, loss, global_step],
                        feed_dict={batch_size: batch_size_num},
                        options=run_options,
                        run_metadata=run_metadata)
                    b = time.time()

                    if step % 1 == 0:
                        duration = time.time() - start_time
                        num_examples_per_step = batch_size_num
                        examples_per_sec = num_examples_per_step / duration
                        sec_per_batch = float(duration)
                        format_str = (
                            "time: " + str(time.time()) +
                            '; %s: step %d (global_step %d), loss = %.2f (%.1f examples/sec; %.3f sec/batch)'
                        )
                        tf.logging.info(format_str %
                                        (datetime.now(), step, gs, loss_value,
                                         examples_per_sec, sec_per_batch))
def model_fn(features, labels, mode, params, config):

    visit_items_index = features["visit_items_index"]    # num * 5
    continuous_features_value = features["continuous_features_value"]  # num * 16
    next_visit_item_index = labels    # num
    keep_prob = params["keep_prob"]
    embedding_size = params["embedding_size"]
    item_num = params["item_num"]
    learning_rate = params["learning_rate"]
    top_k = params["top_k"]

    # items embedding 初始化
    initializer = tf.initializers.random_uniform(minval=-0.5 / embedding_size, maxval=0.5 / embedding_size)
    partitioner = tf.fixed_size_partitioner(num_shards=embedding_size)
    item_embedding = tf.get_variable("item_embedding", [item_num, embedding_size],
                                     tf.float32, initializer=initializer, partitioner=partitioner)

    visit_items_embedding = tf.nn.embedding_lookup(item_embedding, visit_items_index)       # num * 5 * embedding_size
    visit_items_average_embedding = tf.reduce_mean(visit_items_embedding, axis=1)     # num * embedding_size
    input_embedding = tf.concat([visit_items_average_embedding, continuous_features_value], 1)   # num * (embedding_size + 16)
    kernel_initializer_1 = tf.initializers.random_normal(mean=0.0, stddev=0.1)
    bias_initializer_1 = tf.initializers.random_normal(mean=0.0, stddev=0.1)
    layer_1 = tf.layers.dense(input_embedding, 64, activation=tf.nn.relu,
                              kernel_initializer=kernel_initializer_1,
                              bias_initializer=bias_initializer_1, name="layer_1")
    layer_dropout_1 = tf.nn.dropout(layer_1, keep_prob=keep_prob, name="layer_dropout_1")
    kernel_initializer_2 = tf.initializers.random_normal(mean=0.0, stddev=0.1)
    bias_initializer_2 = tf.initializers.random_normal(mean=0.0, stddev=0.1)
    layer_2 = tf.layers.dense(layer_dropout_1, 32, activation=tf.nn.relu,
                              kernel_initializer=kernel_initializer_2,
                              bias_initializer=bias_initializer_2, name="layer_2")
    layer_dropout_2 = tf.nn.dropout(layer_2, keep_prob=keep_prob, name="layer_dropout_2")
    # user vector, num * embedding_size
    kernel_initializer_3 = tf.initializers.random_normal(mean=0.0, stddev=0.1)
    bias_initializer_3 = tf.initializers.random_normal(mean=0.0, stddev=0.1)
    user_vector = tf.layers.dense(layer_dropout_2, embedding_size, activation=tf.nn.relu,
                                  kernel_initializer=kernel_initializer_3,
                                  bias_initializer=bias_initializer_3, name="user_vector")

    if mode == tf.estimator.ModeKeys.TRAIN:
        # 训练
        output_embedding = tf.nn.embedding_lookup(item_embedding, next_visit_item_index)  # num * embedding_size
        logits = tf.matmul(user_vector, output_embedding, transpose_a=False, transpose_b=True)  # num * num
        yhat = tf.nn.softmax(logits)  # num * num
        cross_entropy = tf.reduce_mean(-tf.log(tf.matrix_diag_part(yhat) + 1e-16))
        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        train = optimizer.minimize(cross_entropy, global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode, loss=cross_entropy, train_op=train)

    if mode == tf.estimator.ModeKeys.EVAL:
        # 评估
        output_embedding = tf.nn.embedding_lookup(item_embedding, next_visit_item_index)  # num * embedding_size
        logits = tf.matmul(user_vector, output_embedding, transpose_a=False, transpose_b=True)  # num * num
        yhat = tf.nn.softmax(logits)  # num * num
        cross_entropy = tf.reduce_mean(-tf.log(tf.matrix_diag_part(yhat) + 1e-16))
        return tf.estimator.EstimatorSpec(mode, loss=cross_entropy)

    if mode == tf.estimator.ModeKeys.PREDICT:
        logits_predict = tf.matmul(user_vector, item_embedding, transpose_a=False, transpose_b=True)  # num *  item_num
        yhat_predict = tf.nn.softmax(logits_predict)  # num *  item_num
        _, indices = tf.nn.top_k(yhat_predict, k=top_k, sorted=True)
        index = tf.identity(indices, name="index")  # num * top_k
        # 预测
        predictions = {
            "user_vector": user_vector,
            "index": index
        }
        export_outputs = {
            "prediction": tf.estimator.export.PredictOutput(predictions)
        }
        return tf.estimator.EstimatorSpec(mode, predictions=predictions, export_outputs=export_outputs)
Beispiel #21
0
    def build_inference(self, x, flag="train"):
        # 设置regularizer,本别对应网络的四个部分
        regularizer1 = self.param_dict[
            "regulerizer1"] if flag == "train" else None
        regularizer2 = self.param_dict[
            "regulerizer2"] if flag == "train" else None
        regularizer3 = self.param_dict[
            "regulerizer3"] if flag == "train" else None
        regularizer4 = self.param_dict[
            "regulerizer4"] if flag == "train" else None
        is_train = True if flag == "train" else False
        # 先获取需要的参数
        hash_size = self.param_dict['hash_size']
        no_hash = self.param_dict["no_hash"]
        embed_size = self.param_dict["embed_size"]
        # 根据配置获取激活函数
        act_fn = self.get_activation_func(is_train)
        # 是否启用mini-batch aware regularization
        is_mba_reg = self.param_dict["is_mba_reg"]
        lambda_reg_mba = self.param_dict["lambda_reg_mba"]
        is_action_mba_reg = self.param_dict["is_action_mba_reg"]

        # 将输入划分
        x_feature = x[:, :-3]
        x_action_lists = x[:, -3:]

        # 先将稀疏特征转换成indice
        x_sparse = []
        for i in range(len(hash_size)):
            if i in no_hash:
                # 这部分特征本身可以直接作为indice,不需要转化
                x_i = tf.string_to_number(x_feature[:, i], tf.int32)
                x_sparse.append(x_i)
            else:
                # 这部分特征可以通过哈希函数来转化成index
                x_i = tf.string_to_hash_bucket_strong(
                    input=x_feature[:, i],
                    num_buckets=hash_size[i],
                    key=[679362, 964545],
                    name="sparse_feature_{}".format(i))
                x_sparse.append(x_i)
        # 将稀疏数据转换成embedding向量
        x_embed = []
        w_action_embed = []
        x_action = []
        indice_sku_cate_brand = []
        sku_cate_brand_index = self.param_dict["sku_cate_brand_index"]
        for i in range(len(embed_size)):
            if embed_size[i] != -1:
                with tf.variable_scope("embedding_{}".format(i)):
                    if hash_size[i] <= 500000:
                        weights = self.get_weight_variable(
                            [hash_size[i], embed_size[i]], regularizer1,
                            self.param_dict["initializer_embedding_w"](
                                [hash_size[i], embed_size[i]]))
                    elif hash_size[i] > 500000 and hash_size[i] <= 5000000:
                        weights = self.get_weight_variable(
                            [hash_size[i], embed_size[i]],
                            regularizer1,
                            self.param_dict["initializer_embedding_w"](
                                [hash_size[i], embed_size[i]]),
                            partitioner=tf.fixed_size_partitioner(5, 0))
                    elif hash_size[i] > 5000000 and hash_size[i] <= 10000000:
                        weights = self.get_weight_variable(
                            [hash_size[i], embed_size[i]],
                            regularizer1,
                            self.param_dict["initializer_embedding_w"](
                                [hash_size[i], embed_size[i]]),
                            partitioner=tf.fixed_size_partitioner(10, 0))
                    elif hash_size[i] > 10000000 and hash_size[i] <= 15000000:
                        weights = self.get_weight_variable(
                            [hash_size[i], embed_size[i]],
                            regularizer1,
                            self.param_dict["initializer_embedding_w"](
                                [hash_size[i], embed_size[i]]),
                            partitioner=tf.fixed_size_partitioner(15, 0))
                    elif hash_size[i] > 15000000 and hash_size[i] <= 20000000:
                        weights = self.get_weight_variable(
                            [hash_size[i], embed_size[i]],
                            regularizer1,
                            self.param_dict["initializer_embedding_w"](
                                [hash_size[i], embed_size[i]]),
                            partitioner=tf.fixed_size_partitioner(20, 0))
                    else:
                        weights = self.get_weight_variable(
                            [hash_size[i], embed_size[i]],
                            regularizer1,
                            self.param_dict["initializer_embedding_w"](
                                [hash_size[i], embed_size[i]]),
                            partitioner=tf.fixed_size_partitioner(30, 0))
                x_i = tf.nn.embedding_lookup(weights, x_sparse[i])

                if i in sku_cate_brand_index:  # skuid, cateid, brandid对应的embedding向量
                    w_action_embed.append(weights)
                    x_action.append(x_i)
                    indice_sku_cate_brand.append(x_sparse[i])
                    if is_train and is_mba_reg and not is_action_mba_reg:
                        # 计算mba
                        self.calculate_mini_batch_aware_reg(
                            weights, x_sparse[i], lambda_reg_mba)
                else:
                    if is_train and is_mba_reg:
                        # 计算mba
                        self.calculate_mini_batch_aware_reg(
                            weights, x_sparse[i], lambda_reg_mba)

            else:
                x_i = tf.one_hot(x_sparse[i], depth=hash_size[i])

            x_embed.append(x_i)

            # if i in sku_cate_brand_index: # skuid, cateid, brandid对应的embedding向量
            #     with tf.variable_scope("embedding_{}".format(i)):
            #         weights = self.get_weight_variable([hash_size[i], embed_size[i]], regularizer1,
            #                                             self.param_dict["initializer_embedding_w"]([hash_size[i], embed_size[i]]),
            #                                             partitioner=tf.fixed_size_partitioner(20, 0))
            #         w_action_embed.append(weights)
            #         x_i = tf.nn.embedding_lookup(weights, x_sparse[i])
            #         if is_train and is_mba_reg and not is_action_mba_reg:
            #             # 计算mba
            #             self.calculate_mini_batch_aware_reg(weights, x_sparse[i], lambda_reg_mba)
            #
            #         indice_sku_cate_brand.append(x_sparse[i])
            #         x_embed.append(x_i)
            #         x_action.append(x_i)
            # else:
            #     if embed_size[i] != -1:
            #         with tf.variable_scope("embedding_{}".format(i)):
            #             if i == 0:
            #                 weights = self.get_weight_variable([hash_size[i], embed_size[i]], regularizer1,
            #                                                    self.param_dict["initializer_embedding_w"]([hash_size[i], embed_size[i]]),
            #                                                    partitioner=tf.fixed_size_partitioner(20, 0))
            #             else:
            #                 weights = self.get_weight_variable([hash_size[i], embed_size[i]], regularizer1,
            #                                                    self.param_dict["initializer_embedding_w"]([hash_size[i], embed_size[i]]))
            #             x_i = tf.nn.embedding_lookup(weights, x_sparse[i])
            #             if is_train and is_mba_reg:
            #                 # 计算mba
            #                 self.calculate_mini_batch_aware_reg(weights, x_sparse[i], lambda_reg_mba)
            #
            #             x_embed.append(x_i)
            #     else:
            #         x_i = tf.one_hot(x_sparse[i], depth=hash_size[i])
            #         x_embed.append(x_i)
        x_embed = tf.concat(x_embed, 1)

        # 对浏览行为建模,构建DIN
        with tf.name_scope("user_behaviours"):
            x_browse_skus_list = tf.reshape(x_action_lists[:, 0], [
                -1,
            ])
            x_browse_cates_list = tf.reshape(x_action_lists[:, 1], [
                -1,
            ])
            x_browse_brand_list = tf.reshape(x_action_lists[:, 2], [
                -1,
            ])
            browse_lists = [
                x_browse_skus_list, x_browse_cates_list, x_browse_brand_list
            ]
            browse_names = ['skus', 'cates', 'brands']
            browse_nums = self.param_dict["browse_nums"]
            x_action_list_embeds = []
            sum_poolings = []
            x_action_list_masks = []
            for i in range(len(browse_names)):
                # for i in [0]:
                with tf.name_scope("user_browse_{}_embedding".format(
                        browse_names[i])):
                    browse_w_embed = w_action_embed[i]
                    # x_ad_embedded = x_action[i]
                    x_browse_action = browse_lists[
                        i]  # shape of x_browse_action is [?,]
                    x_browse_action_list = tf.string_split(
                        x_browse_action, "#")
                    x_browse_action_list_indices = tf.sparse_to_dense(
                        x_browse_action_list.indices,
                        # x_browse_action_list.dense_shape,
                        [x_browse_action_list.dense_shape[0], browse_nums[i]],
                        tf.string_to_hash_bucket_strong(
                            x_browse_action_list.values,
                            num_buckets=browse_w_embed.get_shape()[0].value,
                            key=[679362, 964545],
                            name="sparse_user_browse_{}".format(
                                browse_names[i])),
                        -1)
                    indice_mask = tf.reshape(
                        tf.not_equal(x_browse_action_list_indices, -1),
                        [-1, browse_nums[i]])
                    x_action_list_masks.append(indice_mask)
                    x_action_list_embed = tf.reshape(
                        tf.nn.embedding_lookup(browse_w_embed,
                                               x_browse_action_list_indices),
                        [
                            -1, browse_nums[i],
                            browse_w_embed.get_shape()[1].value
                        ])
                    if is_train and is_action_mba_reg:
                        # 计算mba
                        indice_action = tf.concat([
                            tf.string_to_hash_bucket_strong(
                                x_browse_action_list.values,
                                num_buckets=browse_w_embed.get_shape()
                                [0].value,
                                key=[679362, 964545]), indice_sku_cate_brand[i]
                        ], 0)
                        self.calculate_mini_batch_aware_reg(
                            browse_w_embed, indice_action, lambda_reg_mba)
                    x_action_list_embeds.append(x_action_list_embed)

            with tf.name_scope("activation_unit"):
                act_unit_hidden_layers = self.param_dict[
                    "act_unit_hidden_layers"]
                action_indexs = self.param_dict["action_indexs"]
                # for i in range(len(x_action_list_embeds)):
                for i in action_indexs:
                    x_action_list_embed = x_action_list_embeds[i]
                    x_ad_embedded = x_action[i]
                    indice_mask = x_action_list_masks[i]
                    # 外积:笛卡尔积矩阵拉平向量
                    # out_product_list = tf.map_fn(lambda action_emb: tf.reshape(tf.matmul(tf.expand_dims(action_emb, 2), tf.expand_dims(x_ad_embedded, 1)), [-1, x_ad_embedded.shape[1].value ** 2]),
                    #                              tf.transpose(x_action_list_embed, [1, 0, 2]))

                    # 近似外积:向量相减再concat向量点积

                    x_action_list_embed_new = tf.transpose(
                        x_action_list_embed, [1, 0, 2])

                    concat_list = [
                        tf.concat([
                            x_action_list_embed_new[ii],
                            x_action_list_embed_new[ii] - x_ad_embedded,
                            x_action_list_embed_new[ii] * x_ad_embedded,
                            x_ad_embedded
                        ], 1)
                        for ii in range(x_action_list_embed_new.shape[0].value)
                    ]

                    act_unit_in = concat_list[0].shape[1].value
                    act_in = concat_list
                    with tf.variable_scope("activation_unit_{}_list".format(
                            browse_names[i])):
                        for ii in range(len(act_unit_hidden_layers)):
                            weights_act_unit = self.get_weight_variable(
                                [act_unit_in, act_unit_hidden_layers[ii]],
                                regularizer3,
                                self.param_dict["initializer_act_unit_w"](
                                    [act_unit_in, act_unit_hidden_layers[ii]]),
                                name='_act_unit_w_{}'.format(ii))
                            biases_act_unit = tf.get_variable(
                                "biases_{}_act_unit".format(ii),
                                [act_unit_hidden_layers[ii]],
                                initializer=tf.constant_initializer(0.0),
                                dtype=tf.float32)

                            act_out = list(
                                map(
                                    lambda act_in_i: act_fn(
                                        tf.matmul(act_in_i[0], weights_act_unit
                                                  ) + biases_act_unit,
                                        name="act_func_{}_{}".format(
                                            ii, act_in_i[1])),
                                    zip(act_in, range(len(act_in)))))

                            # act_out = [tf.expand_dims(act_fn(tf.matmul(act_in[ii], weights_act_unit) + biases_act_unit, name="act_func_{}_{}".format(i, ii)), 0)
                            #                 for ii in range(act_in.shape[0].value)]
                            act_in = act_out
                            act_unit_in = act_in[0].shape[1].value
                        act_output_in = act_in
                        act_output_unit = act_unit_in
                        weights_act_unit_output = self.get_weight_variable(
                            [act_output_unit, 1],
                            regularizer3,
                            self.param_dict["initializer_act_unit_w"](
                                [act_output_unit, 1]),
                            name='_act_unit_output_w')
                        biases_act_unit_output = tf.get_variable(
                            "biases_act_unit_output", [1],
                            initializer=tf.constant_initializer(0.0),
                            dtype=tf.float32)

                        act_output_out = tf.concat(
                            list(
                                map(
                                    lambda act_output_i: tf.expand_dims(
                                        tf.matmul(act_output_i,
                                                  weights_act_unit_output) +
                                        biases_act_unit_output, 0),
                                    act_output_in)), 0)
                        # act_output_out = tf.concat([tf.expand_dims(tf.matmul(act_output_in[iii], weights_act_unit_output) + biases_act_unit_output, 0) for iii in range(act_output_in.shape[0].value)], 0)
                    active_weight_score = tf.transpose(act_output_out,
                                                       [1, 0, 2])
                    # 将空缺行为的权重设置为0.0
                    padding = tf.zeros_like(active_weight_score)
                    active_weight_score_t = tf.where(
                        tf.expand_dims(indice_mask, 2), active_weight_score,
                        padding)
                    with tf.name_scope("weight_sum_pooling"):
                        sum_pooling = tf.reduce_sum(
                            x_action_list_embed * active_weight_score_t, 1)
                    sum_poolings.append(sum_pooling)
            x_deep_in = tf.concat([x_embed, tf.concat(sum_poolings, 1)], 1)

        # 构建deep模块
        with tf.name_scope("deep_network"):
            deep_layers = self.param_dict["deep_layers"]
            for i in range(len(deep_layers)):
                with tf.variable_scope("dnn_layer_{}".format(i)):
                    weights = self.get_weight_variable(
                        [x_deep_in.shape[1].value, deep_layers[i]],
                        regularizer2, self.param_dict["initializer_dnn_w"](
                            [x_deep_in.shape[1].value, deep_layers[i]]))
                    biases = tf.get_variable(
                        "biases", [deep_layers[i]],
                        initializer=tf.constant_initializer(0.0),
                        dtype=tf.float32)
                    layer_i = act_fn(tf.matmul(x_deep_in, weights) + biases,
                                     name="deep_mlp_{}".format(i))
                    x_deep_in = layer_i

        # 构建输出模块full connect
        x_fc_in = x_deep_in
        with tf.name_scope("fc_layers"):
            fc_layers = self.param_dict['fc_layers']
            for i in range(len(fc_layers)):
                with tf.variable_scope("fc_layers_{}".format(i)):
                    weights = self.get_weight_variable(
                        [x_fc_in.shape[1].value, fc_layers[i]], regularizer4,
                        self.param_dict["initializer_fc_w"](
                            [x_fc_in.shape[1].value, fc_layers[i]]))
                    biases = tf.get_variable(
                        "biases", [fc_layers[i]],
                        initializer=tf.constant_initializer(0.0),
                        dtype=tf.float32)
                    layer_i = tf.nn.sigmoid(
                        tf.matmul(x_fc_in, weights) + biases)
                    x_fc_in = layer_i
        logit = x_fc_in
        return logit
def train():
    ps_hosts = FLAGS.ps_hosts.split(',')
    worker_hosts = FLAGS.worker_hosts.split(',')
    print('PS hosts are: %s' % ps_hosts)
    print('Worker hosts are: %s' % worker_hosts)

    server = tf.train.Server({
        'ps': ps_hosts,
        'worker': worker_hosts
    },
                             job_name=FLAGS.job_name,
                             task_index=FLAGS.task_id)
    if FLAGS.job_name == 'ps':
        server.join()
    is_chief = (FLAGS.task_id == 0)
    if is_chief:
        if tf.gfile.Exists(FLAGS.train_dir):
            tf.gfile.DeleteRecursively(FLAGS.train_dir)
        tf.gfile.MakeDirs(FLAGS.train_dir)

    device_setter = tf.train.replica_device_setter(ps_tasks=len(ps_hosts))
    with tf.device('/job:worker/task:%d' % FLAGS.task_id):
        partitioner = tf.fixed_size_partitioner(len(ps_hosts), axis=0)
        with tf.variable_scope('root', partitioner=partitioner):
            with tf.device(device_setter):
                global_step = tf.Variable(0, trainable=False)

                decay_steps = 50000 * 350.0 / FLAGS.batch_size
                batch_size = tf.placeholder(dtype=tf.int32,
                                            shape=(),
                                            name='batch_size')
                images, labels = cifar10.distorted_inputs(batch_size)
                re = tf.shape(images)[0]
                inputs = tf.reshape(images, [-1, _HEIGHT, _WIDTH, _DEPTH])
                labels = tf.one_hot(labels, 10, 1, 0)
                network_fn = nets_factory.get_network_fn('vgg_16',
                                                         num_classes=10)
                (logits, _) = network_fn(inputs)
                cross_entropy = tf.losses.softmax_cross_entropy(
                    logits=logits, onehot_labels=labels)

                loss = cross_entropy + _WEIGHT_DECAY * tf.add_n(
                    [tf.nn.l2_loss(v) for v in tf.trainable_variables()])

                train_op = cifar10.train(loss, global_step)

                sv = tf.train.Supervisor(is_chief=is_chief,
                                         logdir=FLAGS.train_dir,
                                         init_op=tf.group(
                                             tf.global_variables_initializer(),
                                             tf.local_variables_initializer()),
                                         summary_op=None,
                                         global_step=global_step,
                                         saver=None,
                                         recovery_wait_secs=1,
                                         save_model_secs=60)

                tf.logging.info('%s Supervisor' % datetime.now())
                sess_config = tf.ConfigProto(
                    allow_soft_placement=True,
                    log_device_placement=FLAGS.log_device_placement)
                sess_config.gpu_options.allow_growth = True

                # Get a session.
                sess = sv.prepare_or_wait_for_session(server.target,
                                                      config=sess_config)

                # Start the queue runners.
                queue_runners = tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS)
                sv.start_queue_runners(sess, queue_runners)
                """Train CIFAR-10 for a number of steps."""
                batch_size_num = FLAGS.batch_size
                for step in range(FLAGS.max_steps):
                    start_time = time.time()
                    run_options = tf.RunOptions(
                        trace_level=tf.RunOptions.FULL_TRACE)
                    run_metadata = tf.RunMetadata()
                    num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / batch_size_num
                    decay_steps_num = int(num_batches_per_epoch *
                                          NUM_EPOCHS_PER_DECAY)
                    _, loss_value, gs = sess.run(
                        [train_op, loss, global_step],
                        feed_dict={batch_size: batch_size_num},
                        options=run_options,
                        run_metadata=run_metadata)
                    duration = time.time() - start_time
                    num_examples_per_step = batch_size_num
                    examples_per_sec = num_examples_per_step / duration
                    sec_per_batch = float(duration)
                    format_str = (
                        "time: " + str(time.time()) +
                        '; %s: step %d (global_step %d), loss = %.2f (%.1f examples/sec; %.3f sec/batch)'
                    )
                    tf.logging.info(format_str %
                                    (datetime.now(), step, gs, loss_value,
                                     examples_per_sec, sec_per_batch))
Beispiel #23
0
    #assign tasks in round-robin fashion
    W1 = tf.get_variable('weights_1', [784, 100])
    b1 = tf.get_variable('biases_1', [100])
    W1 = tf.get_variable('weights_2', [100, 10])
    b1 = tf.get_variable('biases_2', [10])

greedy = tf.contrib.training.GreedyLoadBalancingStrategy(_)
with tf.device(tf.train.replica_device_setter(ps_tasks=3, ps_strategy=greedy)):
    #assign tasks in round-robin fashion
    W1 = tf.get_variable('weights_1', [784, 100])
    b1 = tf.get_variable('biases_1', [100])
    W1 = tf.get_variable('weights_2', [100, 10])
    b1 = tf.get_variable('biases_2', [10])

    embedding = tf.get_variable(embedding, [1000000000, 20],
                                partitioner=tf.fixed_size_partitioner(3))

saver = tf.train.Saver(sharded=True)
#each PS task writed in parallel, this is not by default

#distributed code for a worker task
cluster = tf.train.ClusterSpec({
    "workers": ["192.168.0.1:2222", ...],
    "ps": ["192.168.1.1:2222", ...]
})
#cluster mamager called Borg
server = tf.train.Server(cluster, job_name="worker", task_index=0)
#server represents a particular task
with tf.Session(server.target) as sess:
    ...
    if is_chief and step % 1000 == 0:
Beispiel #24
0
def bayesianesque_embeddings(features, labels, mode, params):
    """
    Note: Labels will be max lengths.
    """
    seq_len = features["lens"]
    raw_seqs = features["seqs"]

    pdrop = params["pdrop"] if mode == tf.estimator.ModeKeys.TRAIN else 0.0

    # TODO add partitioners to these
    mu_embed = tf.get_variable(
        "mean_embed",
        shape=[params["vocab_size"], params["embed_dim"]],
        dtype=tf.float32,
        partitioner=tf.fixed_size_partitioner(params["num_shards"]))
    mean_embedded_input = tf.nn.embedding_lookup(mu_embed,
                                                 raw_seqs,
                                                 partition_strategy="div")

    if not params["raw_word2vec"]:
        cov_embed = tf.get_variable(
            "cov_embed",
            shape=[
                params["vocab_size"], params["variance_size"],
                params["embed_dim"]
            ],
            partitioner=tf.fixed_size_partitioner(params["num_shards"]))
        cov_embed_input = tf.nn.embedding_lookup(cov_embed,
                                                 raw_seqs,
                                                 partition_strategy="div")

        transformer_in = add_timing_signal_1d(mean_embedded_input)
        h = block(transformer_in, params["n_heads"], seq_len, pdrop,
                  "trans_block")

        cov = mlp(h,
                  "mlp",
                  params["embed_dim"] * 2,
                  pdrop,
                  nx=params["variance_size"])  # batch, seq, cov_dim
        mean, variance = tf.nn.moments(cov, [0, 1])
        divergence_loss = tf.reduce_mean(tf.abs(mean) + tf.abs(variance - 1.0))
        # TODO minimise divergence

        embedding = mean_embedded_input + tf.reduce_sum(
            tf.expand_dims(cov, 3) * cov_embed_input,
            2)  # batch, seq_len, embed_dim
    else:
        embedding = mean_embedded_input
        divergence_loss = 0.0

    seq_len_with_pad = tf.shape(embedding)[1]
    embed_mask = tf.expand_dims(
        tf.sequence_mask(seq_len, seq_len_with_pad, dtype=tf.float32), -1)

    if mode == tf.estimator.ModeKeys.PREDICT:
        embedding *= embed_mask
        embedding = tf.reduce_mean(embedding, axis=1)
        return tf.estimator.EstimatorSpec(mode=mode, predictions=embedding)

    if params["reweight"]:
        freqs = tf.gather(params["frequencies"], raw_seqs)
        weights = tf.sqrt(1 / freqs)
        embedding = class_reweighting(tf.expand_dims(weights, -1))(embedding)

    output_embed = tf.get_variable(
        "output_embed",
        shape=[params["vocab_size"], params["embed_dim"]],
        dtype=tf.float32,
        partitioner=tf.fixed_size_partitioner(params["num_shards"]))
    output_bias = tf.get_variable("output_bias",
                                  shape=[params["vocab_size"]],
                                  dtype=tf.float32)

    if params["reconstruction_loss"]:
        reconstruction_loss = decoder(
            embedding, raw_seqs, tf.reduce_mean(embedding * embed_mask,
                                                axis=1), output_embed,
            output_bias, params["n_heads"], params["num_decoder_blocks"],
            pdrop, seq_len, params["vocab_size"],
            params["sampled_softmax_size"])

    else:
        reconstruction_loss = 0.0

    window = params["window_size"]
    pf = tf.pad(raw_seqs, [[0, 0], [window, window]],
                constant_values=params["pad_id"])
    targets = tf.map_fn(lambda i: tf.concat(
        (pf[:, i - window:i], pf[:, i + 1:i + 1 + window]), axis=-1),
                        tf.range(window, window + seq_len_with_pad),
                        dtype=tf.int32)
    embedding = tf.reshape(embedding, shape=[-1, params["embed_dim"]])
    mask = tf.reshape(
        tf.sequence_mask(seq_len, seq_len_with_pad, dtype=tf.float32), [-1])
    #    targets = tf.Print(targets, [tf.reshape(targets, shape=[-1, window * 2])[0], raw_seqs[0], mask], summarize=1000)

    loss = tf.nn.nce_loss(
        output_embed,
        output_bias,
        tf.reshape(targets, shape=[-1, window * 2]),
        embedding,
        params["sampled_softmax_size"],
        params["vocab_size"],
        num_true=window * 2,
        remove_accidental_hits=True,
        partition_strategy='div',
        name='nce_loss',
    )

    loss = tf.reduce_sum(loss * mask) / tf.reduce_sum(mask)

    tf.summary.scalar("Reconstruction", reconstruction_loss)
    tf.summary.scalar("MainLoss", loss)
    tf.summary.scalar("DivergenceLoss", divergence_loss)

    loss = loss + reconstruction_loss * params["reconstruction_weight"]

    if mode == tf.estimator.ModeKeys.TRAIN:
        lr = noam_lr(params.get("learning_rate", None), params["embed_dim"],
                     tf.train.get_or_create_global_step(),
                     params["warmup_steps"])
        optimizer = params.get("optimizer")
        train_op = tf.contrib.layers.optimize_loss(
            learning_rate=lr,
            loss=tf.reduce_mean(loss +
                                divergence_loss * params["divergence_weight"]),
            global_step=tf.train.get_global_step(),
            optimizer=optimizer,
            clip_gradients=1.0,
            summaries=[
                "learning_rate",
                "loss",
                "gradients",
                "gradient_norm",
                "global_gradient_norm",
            ])

        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          train_op=train_op)

    assert mode == tf.estimator.ModeKeys.EVAL

    return tf.estimator.EstimatorSpec(mode=mode, loss=loss)
def inception_v3(inputs,
                 dropout_keep_prob=0.8,
                 num_classes=1000,
                 is_training=True,
                 restore_logits=True,
                 scope=''):
  """Latest Inception from http://arxiv.org/abs/1512.00567.

    "Rethinking the Inception Architecture for Computer Vision"

    Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens,
    Zbigniew Wojna

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    dropout_keep_prob: dropout keep_prob.
    num_classes: number of predicted classes.
    is_training: whether is training or not.
    restore_logits: whether or not the logits layers should be restored.
      Useful for fine-tuning a model with different num_classes.
    scope: Optional scope for name_scope.

  Returns:
    a list containing 'logits', 'aux_logits' Tensors.
  """
  # end_points will collect relevant activations for external use, for example
  # summaries or losses.
  end_points = {}
  partitioner=tf.fixed_size_partitioner(2, axis=0)
  with tf.name_scope(scope, 'inception_v3', [inputs]):
    with scopes.arg_scope([ops.conv2d, ops.fc, ops.batch_norm, ops.dropout],
                          is_training=is_training):
      with scopes.arg_scope([ops.conv2d, ops.max_pool, ops.avg_pool],
                            stride=1, padding='VALID'):
        # 299 x 299 x 3
        end_points['conv0'] = ops.conv2d(inputs, 32, [3, 3], stride=2,
                                         scope='conv0')
        # 149 x 149 x 32
        end_points['conv1'] = ops.conv2d(end_points['conv0'], 32, [3, 3],
                                         scope='conv1')
        # 147 x 147 x 32
        end_points['conv2'] = ops.conv2d(end_points['conv1'], 64, [3, 3],
                                         padding='SAME', scope='conv2')
        # 147 x 147 x 64
        end_points['pool1'] = ops.max_pool(end_points['conv2'], [3, 3],
                                           stride=2, scope='pool1')
        # 73 x 73 x 64
        end_points['conv3'] = ops.conv2d(end_points['pool1'], 80, [1, 1],
                                         scope='conv3')
        # 73 x 73 x 80.
        end_points['conv4'] = ops.conv2d(end_points['conv3'], 192, [3, 3],
                                         scope='conv4')
        # 71 x 71 x 192.
        end_points['pool2'] = ops.max_pool(end_points['conv4'], [3, 3],
                                           stride=2, scope='pool2')
        # 35 x 35 x 192.
        net = end_points['pool2']
      # Inception blocks
      with scopes.arg_scope([ops.conv2d, ops.max_pool, ops.avg_pool],
                            stride=1, padding='SAME'):
        # mixed: 35 x 35 x 256.
        with tf.variable_scope('mixed_35x35x256a'):
          with tf.variable_scope('branch1x1'):
            branch1x1 = ops.conv2d(net, 64, [1, 1])
          with tf.variable_scope('branch5x5'):
            branch5x5 = ops.conv2d(net, 48, [1, 1])
            branch5x5 = ops.conv2d(branch5x5, 64, [5, 5])
          with tf.variable_scope('branch3x3dbl'):
            branch3x3dbl = ops.conv2d(net, 64, [1, 1])
            branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
            branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
          with tf.variable_scope('branch_pool'):
            branch_pool = ops.avg_pool(net, [3, 3])
            branch_pool = ops.conv2d(branch_pool, 32, [1, 1])
          net = tf.concat(axis=3, values=[branch1x1, branch5x5, branch3x3dbl, branch_pool])
          end_points['mixed_35x35x256a'] = net
        # mixed_1: 35 x 35 x 288.
        with tf.variable_scope('mixed_35x35x288a'):
          with tf.variable_scope('branch1x1'):
            branch1x1 = ops.conv2d(net, 64, [1, 1])
          with tf.variable_scope('branch5x5'):
            branch5x5 = ops.conv2d(net, 48, [1, 1])
            branch5x5 = ops.conv2d(branch5x5, 64, [5, 5])
          with tf.variable_scope('branch3x3dbl'):
            branch3x3dbl = ops.conv2d(net, 64, [1, 1])
            branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
            branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
          with tf.variable_scope('branch_pool'):
            branch_pool = ops.avg_pool(net, [3, 3])
            branch_pool = ops.conv2d(branch_pool, 64, [1, 1])
          net = tf.concat(axis=3, values=[branch1x1, branch5x5, branch3x3dbl, branch_pool])
          end_points['mixed_35x35x288a'] = net
        # mixed_2: 35 x 35 x 288.
        with tf.variable_scope('mixed_35x35x288b'):
          with tf.variable_scope('branch1x1'):
            branch1x1 = ops.conv2d(net, 64, [1, 1])
          with tf.variable_scope('branch5x5'):
            branch5x5 = ops.conv2d(net, 48, [1, 1])
            branch5x5 = ops.conv2d(branch5x5, 64, [5, 5])
          with tf.variable_scope('branch3x3dbl'):
            branch3x3dbl = ops.conv2d(net, 64, [1, 1])
            branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
            branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
          with tf.variable_scope('branch_pool'):
            branch_pool = ops.avg_pool(net, [3, 3])
            branch_pool = ops.conv2d(branch_pool, 64, [1, 1])
          net = tf.concat(axis=3, values=[branch1x1, branch5x5, branch3x3dbl, branch_pool])
          end_points['mixed_35x35x288b'] = net
        # mixed_3: 17 x 17 x 768.
        with tf.variable_scope('mixed_17x17x768a'):
          with tf.variable_scope('branch3x3'):
            branch3x3 = ops.conv2d(net, 384, [3, 3], stride=2, padding='VALID')
          with tf.variable_scope('branch3x3dbl'):
            branch3x3dbl = ops.conv2d(net, 64, [1, 1])
            branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
            branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3],
                                      stride=2, padding='VALID')
          with tf.variable_scope('branch_pool'):
            branch_pool = ops.max_pool(net, [3, 3], stride=2, padding='VALID')
          net = tf.concat(axis=3, values=[branch3x3, branch3x3dbl, branch_pool])
          end_points['mixed_17x17x768a'] = net
        # mixed4: 17 x 17 x 768.
        with tf.variable_scope('mixed_17x17x768b'):
          with tf.variable_scope('branch1x1'):
            branch1x1 = ops.conv2d(net, 192, [1, 1])
          with tf.variable_scope('branch7x7'):
            branch7x7 = ops.conv2d(net, 128, [1, 1])
            branch7x7 = ops.conv2d(branch7x7, 128, [1, 7])
            branch7x7 = ops.conv2d(branch7x7, 192, [7, 1])
          with tf.variable_scope('branch7x7dbl'):
            branch7x7dbl = ops.conv2d(net, 128, [1, 1])
            branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [7, 1])
            branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [1, 7])
            branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [7, 1])
            branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7])
          with tf.variable_scope('branch_pool'):
            branch_pool = ops.avg_pool(net, [3, 3])
            branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
          net = tf.concat(axis=3, values=[branch1x1, branch7x7, branch7x7dbl, branch_pool])
          end_points['mixed_17x17x768b'] = net
        # mixed_5: 17 x 17 x 768.
        with tf.variable_scope('mixed_17x17x768c'):
          with tf.variable_scope('branch1x1'):
            branch1x1 = ops.conv2d(net, 192, [1, 1])
          with tf.variable_scope('branch7x7'):
            branch7x7 = ops.conv2d(net, 160, [1, 1])
            branch7x7 = ops.conv2d(branch7x7, 160, [1, 7])
            branch7x7 = ops.conv2d(branch7x7, 192, [7, 1])
          with tf.variable_scope('branch7x7dbl'):
            branch7x7dbl = ops.conv2d(net, 160, [1, 1])
            branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1])
            branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [1, 7])
            branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1])
            branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7])
          with tf.variable_scope('branch_pool'):
            branch_pool = ops.avg_pool(net, [3, 3])
            branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
          net = tf.concat(axis=3, values=[branch1x1, branch7x7, branch7x7dbl, branch_pool])
          end_points['mixed_17x17x768c'] = net
        # mixed_6: 17 x 17 x 768.
        with tf.variable_scope('mixed_17x17x768d'):
          with tf.variable_scope('branch1x1'):
            branch1x1 = ops.conv2d(net, 192, [1, 1])
          with tf.variable_scope('branch7x7'):
            branch7x7 = ops.conv2d(net, 160, [1, 1])
            branch7x7 = ops.conv2d(branch7x7, 160, [1, 7])
            branch7x7 = ops.conv2d(branch7x7, 192, [7, 1])
          with tf.variable_scope('branch7x7dbl'):
            branch7x7dbl = ops.conv2d(net, 160, [1, 1])
            branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1])
            branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [1, 7])
            branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1])
            branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7])
          with tf.variable_scope('branch_pool'):
            branch_pool = ops.avg_pool(net, [3, 3])
            branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
          net = tf.concat(axis=3, values=[branch1x1, branch7x7, branch7x7dbl, branch_pool])
          end_points['mixed_17x17x768d'] = net
        # mixed_7: 17 x 17 x 768.
        with tf.variable_scope('mixed_17x17x768e'):
          with tf.variable_scope('branch1x1'):
            branch1x1 = ops.conv2d(net, 192, [1, 1])
          with tf.variable_scope('branch7x7'):
            branch7x7 = ops.conv2d(net, 192, [1, 1])
            branch7x7 = ops.conv2d(branch7x7, 192, [1, 7])
            branch7x7 = ops.conv2d(branch7x7, 192, [7, 1])
          with tf.variable_scope('branch7x7dbl'):
            branch7x7dbl = ops.conv2d(net, 192, [1, 1])
            branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [7, 1])
            branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7])
            branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [7, 1])
            branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7])
          with tf.variable_scope('branch_pool'):
            branch_pool = ops.avg_pool(net, [3, 3])
            branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
          net = tf.concat(axis=3, values=[branch1x1, branch7x7, branch7x7dbl, branch_pool])
          end_points['mixed_17x17x768e'] = net
        # Auxiliary Head logits
        aux_logits = tf.identity(end_points['mixed_17x17x768e'])
        with tf.variable_scope('aux_logits'):
          aux_logits = ops.avg_pool(aux_logits, [5, 5], stride=3,
                                    padding='VALID')
          aux_logits = ops.conv2d(aux_logits, 128, [1, 1], scope='proj')
          # Shape of feature map before the final layer.
          shape = aux_logits.get_shape()
          aux_logits = ops.conv2d(aux_logits, 768, shape[1:3], stddev=0.01,
                                  padding='VALID')
          aux_logits = ops.flatten(aux_logits)
          aux_logits = ops.fc(aux_logits, num_classes, activation=None,
                              stddev=0.001, restore=restore_logits)
          end_points['aux_logits'] = aux_logits
        # mixed_8: 8 x 8 x 1280.
        # Note that the scope below is not changed to not void previous
        # checkpoints.
        # (TODO) Fix the scope when appropriate.
        with tf.variable_scope('mixed_17x17x1280a'):
          with tf.variable_scope('branch3x3'):
            branch3x3 = ops.conv2d(net, 192, [1, 1])
            branch3x3 = ops.conv2d(branch3x3, 320, [3, 3], stride=2,
                                   padding='VALID')
          with tf.variable_scope('branch7x7x3'):
            branch7x7x3 = ops.conv2d(net, 192, [1, 1])
            branch7x7x3 = ops.conv2d(branch7x7x3, 192, [1, 7])
            branch7x7x3 = ops.conv2d(branch7x7x3, 192, [7, 1])
            branch7x7x3 = ops.conv2d(branch7x7x3, 192, [3, 3],
                                     stride=2, padding='VALID')
          with tf.variable_scope('branch_pool'):
            branch_pool = ops.max_pool(net, [3, 3], stride=2, padding='VALID')
          net = tf.concat(axis=3, values=[branch3x3, branch7x7x3, branch_pool])
          end_points['mixed_17x17x1280a'] = net
        # mixed_9: 8 x 8 x 2048.
        with tf.variable_scope('mixed_8x8x2048a'):
          with tf.variable_scope('branch1x1'):
            branch1x1 = ops.conv2d(net, 320, [1, 1])
          with tf.variable_scope('branch3x3'):
            branch3x3 = ops.conv2d(net, 384, [1, 1])
            branch3x3 = tf.concat(axis=3, values=[ops.conv2d(branch3x3, 384, [1, 3]),
                                                  ops.conv2d(branch3x3, 384, [3, 1])])
          with tf.variable_scope('branch3x3dbl'):
            branch3x3dbl = ops.conv2d(net, 448, [1, 1])
            branch3x3dbl = ops.conv2d(branch3x3dbl, 384, [3, 3])
            branch3x3dbl = tf.concat(axis=3, values=[ops.conv2d(branch3x3dbl, 384, [1, 3]),
                                                     ops.conv2d(branch3x3dbl, 384, [3, 1])])
          with tf.variable_scope('branch_pool'):
            branch_pool = ops.avg_pool(net, [3, 3])
            branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
          net = tf.concat(axis=3, values=[branch1x1, branch3x3, branch3x3dbl, branch_pool])
          end_points['mixed_8x8x2048a'] = net
        # mixed_10: 8 x 8 x 2048.
        with tf.variable_scope('mixed_8x8x2048b'):
          with tf.variable_scope('branch1x1'):
            branch1x1 = ops.conv2d(net, 320, [1, 1])
          with tf.variable_scope('branch3x3'):
            branch3x3 = ops.conv2d(net, 384, [1, 1])
            branch3x3 = tf.concat(axis=3, values=[ops.conv2d(branch3x3, 384, [1, 3]),
                                                  ops.conv2d(branch3x3, 384, [3, 1])])
          with tf.variable_scope('branch3x3dbl'):
            branch3x3dbl = ops.conv2d(net, 448, [1, 1])
            branch3x3dbl = ops.conv2d(branch3x3dbl, 384, [3, 3])
            branch3x3dbl = tf.concat(axis=3, values=[ops.conv2d(branch3x3dbl, 384, [1, 3]),
                                                     ops.conv2d(branch3x3dbl, 384, [3, 1])])
          with tf.variable_scope('branch_pool'):
            branch_pool = ops.avg_pool(net, [3, 3])
            branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
          net = tf.concat(axis=3, values=[branch1x1, branch3x3, branch3x3dbl, branch_pool])
          end_points['mixed_8x8x2048b'] = net
        # Final pooling and prediction
        with tf.variable_scope('logits'):
          shape = net.get_shape()
          net = ops.avg_pool(net, shape[1:3], padding='VALID', scope='pool')
          # 1 x 1 x 2048
          net = ops.dropout(net, dropout_keep_prob, scope='dropout')
          net = ops.flatten(net, scope='flatten')
          # 2048
          logits = ops.fc(net, num_classes, activation=None, scope='logits',
                          restore=restore_logits)
          # 1000
          end_points['logits'] = logits
          end_points['predictions'] = tf.nn.softmax(logits, name='predictions')
      return logits, end_points
Beispiel #26
0
def create_emb_for_encoder_and_decoder(share_vocab,
                                       src_vocab_size,
                                       tgt_vocab_size,
                                       src_embed_size,
                                       tgt_embed_size,
                                       dtype=tf.float32,
                                       num_enc_partitions=0,
                                       num_dec_partitions=0,
                                       src_vocab_file=None,
                                       tgt_vocab_file=None,
                                       src_embed_file=None,
                                       tgt_embed_file=None,
                                       use_char_encode=False,
                                       scope=None):
 
  if num_enc_partitions <= 1:
    enc_partitioner = None
  else:
   
    enc_partitioner = tf.fixed_size_partitioner(num_enc_partitions)

  if num_dec_partitions <= 1:
    dec_partitioner = None
  else:
  
    dec_partitioner = tf.fixed_size_partitioner(num_dec_partitions)

  if src_embed_file and enc_partitioner:
    raise ValueError(
        "Can't set num_enc_partitions > 1 when using pretrained encoder "
        "embedding")

  if tgt_embed_file and dec_partitioner:
    raise ValueError(
        "Can't set num_dec_partitions > 1 when using pretrained decdoer "
        "embedding")

  with tf.variable_scope(
      scope or "embeddings", dtype=dtype, partitioner=enc_partitioner) as scope:
    # Share embedding
    if share_vocab:
      if src_vocab_size != tgt_vocab_size:
        raise ValueError("Share embedding but different src/tgt vocab sizes"
                         " %d vs. %d" % (src_vocab_size, tgt_vocab_size))
      assert src_embed_size == tgt_embed_size
      utils.print_out("# Use the same embedding for source and target")
      vocab_file = src_vocab_file or tgt_vocab_file
      embed_file = src_embed_file or tgt_embed_file

      embedding_encoder = _create_or_load_embed(
          "embedding_share", vocab_file, embed_file,
          src_vocab_size, src_embed_size, dtype)
      embedding_decoder = embedding_encoder
    else:
      if not use_char_encode:
        with tf.variable_scope("encoder", partitioner=enc_partitioner):
          embedding_encoder = _create_or_load_embed(
              "embedding_encoder", src_vocab_file, src_embed_file,
              src_vocab_size, src_embed_size, dtype)
      else:
        embedding_encoder = None

      with tf.variable_scope("decoder", partitioner=dec_partitioner):
        embedding_decoder = _create_or_load_embed(
            "embedding_decoder", tgt_vocab_file, tgt_embed_file,
            tgt_vocab_size, tgt_embed_size, dtype)

  return embedding_encoder, embedding_decoder
Beispiel #27
0
    def __call__(self, inputs, state, scope=None):
        """Run one step of LSTM.
        Args:
          inputs: input Tensor, 2D, batch x num_units.
          state: if `state_is_tuple` is False, this must be a state Tensor,
            `2-D, batch x state_size`.  If `state_is_tuple` is True, this must be a
            tuple of state Tensors, both `2-D`, with column sizes `c_state` and
            `m_state`.
          scope: VariableScope for the created subgraph; defaults to "lstm_cell".
        Returns:
          A tuple containing:
          - A `2-D, [batch x output_dim]`, Tensor representing the output of the
            LSTM after reading `inputs` when previous state was `state`.
            Here output_dim is:
               num_proj if num_proj was set,
               num_units otherwise.
          - Tensor(s) representing the new state of LSTM after reading `inputs` when
            the previous state was `state`.  Same type and shape(s) as `state`.
        Raises:
          ValueError: If input size cannot be inferred from inputs via
            static shape inference.
        """
        num_proj = self._num_units if self._num_proj is None else self._num_proj

        if self._state_is_tuple:
            (c_prev, h_prev) = state
        else:
            c_prev = tf.slice(state, begin=[0, 0], size=[-1, self._num_units])
            h_prev = tf.slice(state, begin=[0, self._num_units], size=[-1, num_proj])

        dtype = inputs.dtype
        input_size = inputs.get_shape().with_rank(2)[1]
        if input_size.value is None:
            raise ValueError("Could not infer input size from inputs.get_shape()[-1]")

        with tf.variable_scope(self, scope or "lstm_cell", initializer=self._initializer,
                               reuse=self._reuse) as unit_scope:
            if self._num_unit_shards is not None:
                unit_scope.set_partitioner(
                    tf.fixed_size_partitioner(self._num_unit_shards))

            # i = input_gate, g = new_input, f = forget_gate, o = output_gate
            lstm_matrix = tf.contrib.rnn._linear([inputs, h_prev], 4 * self._num_units, bias=True)
            i, g, f, o = tf.split(value=lstm_matrix, num_or_size_splits=4, axis=1)

            # Diagonal connections
            if self._use_peepholes:
                # tf.variable_scopeとtf.get_variableはセットで使う
                with tf.variable_scope(unit_scope) as projection_scope:
                    if self._num_unit_shards is not None:
                        projection_scope.set_partitioner(None)
                    w_f_diag = tf.get_variable("w_f_diag", shape=[self._num_units], dtype=dtype)
                    w_i_diag = tf.get_variable("w_i_diag", shape=[self._num_units], dtype=dtype)
                    w_o_diag = tf.get_variable("w_o_diag", shape=[self._num_units], dtype=dtype)

            if self._use_peepholes:
                c = (tf.sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev +
                     tf.sigmoid(i + w_i_diag * c_prev) * tf.tanh(g))
            else:
                c = (tf.sigmoid(f + self._forget_bias) * c_prev +
                     tf.sigmoid(i) * tf.tanh(g))

            if self._cell_clip is not None:
                c = tf.clip_by_value(c, -self._cell_clip, self._cell_clip)

            if self._use_peepholes:
                h = tf.sigmoid(o + w_o_diag * c) * tf.tanh(c)
            else:
                h = tf.sigmoid(o) * tf.tanh(c)

            if self._num_proj is not None:
                with tf.variable_scope("projection") as proj_scope:
                    if self._num_proj_shards is not None:
                        proj_scope.set_partitioner(
                            tf.fixed_size_partitioner(self._num_proj_shards))
                    h = tf.contrib.rnn._linear(h, self._num_proj, bias=False)

                if self._proj_clip is not None:
                    h = tf.clip_by_value(h, -self._proj_clip, self._proj_clip)

        new_state = (LSTMStateTuple(c, h) if self._state_is_tuple else
                     tf.concat([c, h], 1))
        return h, new_state
Beispiel #28
0
def train_criteo(model, cluster, task_id, nrank, args):
    def get_current_shard(data):
        part_size = data.shape[0] // nrank
        start = part_size * task_id
        end = start + part_size if task_id != nrank - 1 else data.shape[0]
        return data[start:end]

    if args.all:
        from models.load_data import process_all_criteo_data
        dense, sparse, all_labels = process_all_criteo_data()
        dense_feature = get_current_shard(dense[0])
        sparse_feature = get_current_shard(sparse[0])
        labels = get_current_shard(all_labels[0])
        val_dense = get_current_shard(dense[1])
        val_sparse = get_current_shard(sparse[1])
        val_labels = get_current_shard(all_labels[1])
    else:
        from models.load_data import process_sampled_criteo_data
        dense_feature, sparse_feature, labels = process_sampled_criteo_data()
        dense_feature = get_current_shard(dense_feature)
        sparse_feature = get_current_shard(sparse_feature)
        labels = get_current_shard(labels)

    batch_size = 128
    worker_device = "/job:worker/task:%d/gpu:0" % (task_id)
    with tf.device(worker_device):
        dense_input = tf.compat.v1.placeholder(tf.float32, [batch_size, 13])
        sparse_input = tf.compat.v1.placeholder(tf.int32, [batch_size, 26])
        y_ = y_ = tf.compat.v1.placeholder(tf.float32, [batch_size, 1])

    with tf.device(tf.compat.v1.train.replica_device_setter(cluster=cluster)):
        server_num = len(cluster.as_dict()['ps'])
        # print('this is server num:', server_num)
        embed_partitioner = tf.fixed_size_partitioner(
            server_num, 0) if server_num > 1 else None
        loss, y, opt = model(dense_input, sparse_input, y_, embed_partitioner)
        train_op = opt.minimize(loss)

    server = tf.train.Server(cluster, job_name="worker", task_index=task_id)
    init = tf.compat.v1.global_variables_initializer()
    sv = tf.train.Supervisor(is_chief=(task_id == 0),
                             init_op=init,
                             recovery_wait_secs=1)
    sess_config = tf.compat.v1.ConfigProto(
        allow_soft_placement=True,
        log_device_placement=False,
        device_filters=["/job:ps", "/job:worker/task:%d" % task_id])
    sess = sv.prepare_or_wait_for_session(server.target, config=sess_config)
    # sess.run(init)
    if task_id == 0:
        writer = tf.compat.v1.summary.FileWriter('logs/board', sess.graph)

    my_feed_dict = {
        dense_input: np.empty(shape=(batch_size, 13)),
        sparse_input: np.empty(shape=(batch_size, 26)),
        y_: np.empty(shape=(batch_size, 1)),
    }

    if args.all:
        raw_log_file = './logs/tf_dist_%s_%d.log' % (args.model, task_id)
        print('Processing all data, log to', raw_log_file)
        log_file = open(raw_log_file, 'w')
        iterations = dense_feature.shape[0] // batch_size
        total_epoch = 11
        start_index = 0
        for ep in range(total_epoch):
            # print("iters: %d" % (lp * 1000))
            print("epoch %d" % ep)
            st_time = time.time()
            train_loss, train_acc, train_auc = [], [], []
            for it in range(iterations // 10 + (ep % 10 == 9) *
                            (iterations % 10)):
                my_feed_dict[dense_input][:] = dense_feature[
                    start_index:start_index + batch_size]
                my_feed_dict[sparse_input][:] = sparse_feature[
                    start_index:start_index + batch_size]
                my_feed_dict[y_][:] = labels[start_index:start_index +
                                             batch_size]
                start_index += batch_size
                if start_index + batch_size > dense_feature.shape[0]:
                    start_index = 0
                loss_val = sess.run([loss, y, y_, train_op],
                                    feed_dict=my_feed_dict)
                pred_val = loss_val[1]
                true_val = loss_val[2]
                acc_val = np.equal(true_val, pred_val > 0.5)
                train_loss.append(loss_val[0])
                train_acc.append(acc_val)
                train_auc.append(metrics.roc_auc_score(true_val, pred_val))
            tra_accuracy = np.mean(train_acc)
            tra_loss = np.mean(train_loss)
            tra_auc = np.mean(train_auc)
            en_time = time.time()
            train_time = en_time - st_time

            if args.val:
                val_loss, val_acc, val_auc = [], [], []
                for it in range(val_dense.shape[0] // batch_size):
                    local_st = it * batch_size
                    my_feed_dict[dense_input][:] = val_dense[
                        local_st:local_st + batch_size]
                    my_feed_dict[sparse_input][:] = val_sparse[
                        local_st:local_st + batch_size]
                    my_feed_dict[y_][:] = val_labels[local_st:local_st +
                                                     batch_size]
                    loss_val = sess.run([loss, y, y_], feed_dict=my_feed_dict)
                    pred_val = loss_val[1]
                    true_val = loss_val[2]
                    acc_val = np.equal(true_val, pred_val > 0.5)
                    val_loss.append(loss_val[0])
                    val_acc.append(acc_val)
                    val_auc.append(metrics.roc_auc_score(true_val, pred_val))
                v_accuracy = np.mean(val_acc)
                v_loss = np.mean(val_loss)
                v_auc = np.mean(val_auc)
                printstr = "train_loss: %.4f, train_acc: %.4f, train_auc: %.4f, test_loss: %.4f, test_acc: %.4f, test_auc: %.4f, train_time: %.4f"\
                        % (tra_loss, tra_accuracy, tra_auc, v_loss, v_accuracy, v_auc, train_time)
            else:
                printstr = "train_loss: %.4f, train_acc: %.4f, train_auc: %.4f, train_time: %.4f"\
                        % (tra_loss, tra_accuracy, tra_auc, train_time)

            print(printstr)
            log_file.write(printstr + '\n')
            log_file.flush()
    else:
        # here no val
        iteration = dense_feature.shape[0] // batch_size

        epoch = 10
        for ep in range(epoch):
            print('epoch', ep)
            if ep == 5:
                start = time.time()
            ep_st = time.time()
            train_loss = []
            train_acc = []
            for idx in range(iteration):
                start_index = idx * batch_size
                my_feed_dict[dense_input][:] = dense_feature[
                    start_index:start_index + batch_size]
                my_feed_dict[sparse_input][:] = sparse_feature[
                    start_index:start_index + batch_size]
                my_feed_dict[y_][:] = labels[start_index:start_index +
                                             batch_size]

                loss_val = sess.run([loss, y, y_, train_op],
                                    feed_dict=my_feed_dict)
                pred_val = loss_val[1]
                true_val = loss_val[2]
                if pred_val.shape[1] == 1:  # for criteo case
                    acc_val = np.equal(true_val, pred_val > 0.5)
                else:
                    acc_val = np.equal(np.argmax(pred_val, 1),
                                       np.argmax(true_val, 1)).astype(np.float)
                train_loss.append(loss_val[0])
                train_acc.append(acc_val)
            tra_accuracy = np.mean(train_acc)
            tra_loss = np.mean(train_loss)
            ep_en = time.time()
            print("train_loss: %.4f, train_acc: %.4f, train_time: %.4f" %
                  (tra_loss, tra_accuracy, ep_en - ep_st))
        print("tensorflow: ", (time.time() - start))
Beispiel #29
0
    def build_model(self):
        self.X = tf.placeholder(tf.int64, [None, None], name='input')  # batch * 序列长度
        self.Y = tf.placeholder(tf.int64, [None, None], name='output')  # batch * 序列长度
        self.seq_len = tf.placeholder(tf.int64, [None], name="seq_len")
        self.drop_out = tf.placeholder(tf.float32, name="drop_out")
        self.global_step = tf.Variable(0, name='global_step', trainable=False)
        self.batch_size = tf.shape(self.X)[0]

        with tf.variable_scope('gru_layer'):
            sigma = self.sigma if self.sigma != 0 else np.sqrt(6.0 / (self.n_items + self.rnn_size))
            if self.init_as_normal:
                initializer = tf.random_normal_initializer(mean=0, stddev=sigma)
            else:
                initializer = tf.random_uniform_initializer(minval=-sigma, maxval=sigma)
            partitioner = tf.fixed_size_partitioner(num_shards=self.rnn_size)
            embedding = tf.get_variable('embedding', [self.n_items, self.rnn_size],
                                        tf.float32, initializer=initializer, partitioner=partitioner)
            softmax_W = tf.get_variable('softmax_w', [self.n_items, self.rnn_size],
                                        tf.float32, initializer=initializer, partitioner=partitioner)
            softmax_b = tf.get_variable('softmax_b', [self.n_items],
                                        tf.float32, initializer=tf.constant_initializer(0.0), partitioner=partitioner)

            cell = tf.contrib.cudnn_rnn.CudnnCompatibleGRUCell(self.rnn_size)
            initial_state = cell.zero_state(self.batch_size, dtype=tf.float32)
            drop_cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=self.drop_out)

            inputs = tf.nn.embedding_lookup(embedding, self.X)  # batch * 序列长度 * rnn_size
            output, state = tf.nn.dynamic_rnn(drop_cell, inputs,
                                              initial_state=initial_state,
                                              dtype=tf.float32,
                                              sequence_length=self.seq_len)
            self.output = output  # batch * 序列长度 * rnn_size
            self.state = state  # batch * rnn_size(最后一个序列的状态)

        '''
        训练
        Use other examples of the minibatch as negative samples.
        '''
        self.sampled_W = tf.nn.embedding_lookup(softmax_W, self.Y, name='sampled_W')  # batch * 序列长度 * rnn_size
        self.sampled_b = tf.nn.embedding_lookup(softmax_b, self.Y, name='sampled_b')  # batch * 序列长度
        # logits_train的shape: batch * 序列长度 * batch
        self.logits_train = tf.transpose(tf.matmul(tf.transpose(output, [1, 0, 2]), tf.transpose(self.sampled_W, [1, 2, 0])),
                              [1, 0, 2]) + tf.expand_dims(self.sampled_b, -1)
        self.yhat_train = self.final_activation(self.logits_train, "yhat_train")  # batch * 序列长度 * batch
        self.cost_train = self.loss_function(self.yhat_train)

        '''
        预测
        '''
        output_shape = tf.shape(output)         # output shape: batch * 序列长度 * rnn_size
        softmax_WT = tf.transpose(softmax_W)        # rnn_size * n_items
        swt_shape = tf.shape(softmax_WT)
        re_softmax = tf.reshape(tf.tile(softmax_WT, [output_shape[0], 1]),
                                [output_shape[0], swt_shape[0], swt_shape[1]])
        self.logits_predict = tf.matmul(output, re_softmax) + softmax_b        # batch * 序列长度 * n_items
        self.yhat_predict = self.final_activation(self.logits_predict, "yhat_predict")

        '''
        学习率
        '''
        self.lr = tf.maximum(1e-5, tf.train.exponential_decay(self.learning_rate, self.global_step, self.decay_steps,
                                                              self.decay_rate, staircase=True))

        '''
        Try different optimizers.
        '''
        # optimizer = tf.train.AdagradOptimizer(self.lr)
        optimizer = tf.train.AdamOptimizer(self.lr)
        # optimizer = tf.train.AdadeltaOptimizer(self.lr)
        # optimizer = tf.train.RMSPropOptimizer(self.lr)

        tvars = tf.trainable_variables()
        gvs = optimizer.compute_gradients(self.cost_train, tvars)
        if self.grad_cap > 0:
            capped_gvs = [(tf.clip_by_norm(grad, self.grad_cap), var) for (grad, var) in gvs]
        else:
            capped_gvs = gvs
        self.train_op = optimizer.apply_gradients(capped_gvs, global_step=self.global_step)
  def _make_model(target_words, context_words, mode):
    index_tensor = tf.constant(index)
    reverse_index = tf.contrib.lookup.HashTable(
        tf.contrib.lookup.KeyValueTensorInitializer(
            index_tensor, tf.constant(range(vocab_size - 1), dtype=tf.int64)
        ),
        vocab_size - 1
    )

    # tf.contrib.learn.Estimator.fit adds an addition dimension to input
    target_words_squeezed = tf.squeeze(target_words, squeeze_dims=[1])
    target_indices = reverse_index.lookup(target_words_squeezed)

    with tf.device(tf.train.replica_device_setter()):
      with tf.variable_scope('nce',
                             partitioner=tf.fixed_size_partitioner(
                                 num_partitions)):

        embeddings = tf.get_variable(
            'embeddings',
            shape=[vocab_size, embedding_size],
            dtype=tf.float32,
            initializer=tf.random_uniform_initializer(-1.0, 1.0)
        )
        if mode in [ModeKeys.TRAIN, ModeKeys.EVAL]:
          nce_weights = tf.get_variable(
              'nce_weights',
              shape=[vocab_size, embedding_size],
              dtype=tf.float32,
              initializer=tf.truncated_normal_initializer(
                  stddev=1.0 / math.sqrt(embedding_size)
              )
          )
          nce_biases = tf.get_variable(
              'nce_biases',
              initializer=tf.zeros_initializer([vocab_size]),
              dtype=tf.float32
          )

      prediction_dict, loss, train_op = ({}, None, None)

      if mode in [ModeKeys.TRAIN, ModeKeys.EVAL]:
        context_indices = tf.expand_dims(
            reverse_index.lookup(context_words), 1)
        embedded = tf.nn.embedding_lookup(embeddings, target_indices)

        sampled_words = tf.nn.fixed_unigram_candidate_sampler(
            true_classes=context_indices,
            num_true=1,
            num_sampled=num_sampled,
            unique=True,
            range_max=vocab_size,
            distortion=0.75,
            unigrams=vocab_counts + [1]
        )
        loss = tf.reduce_mean(tf.nn.nce_loss(
            nce_weights,
            nce_biases,
            embedded,
            context_indices,
            num_sampled,
            vocab_size,
            sampled_values=sampled_words
        ))
        tf.scalar_summary('loss', loss)

      if mode == ModeKeys.TRAIN:
        train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(
            loss, global_step=tf.contrib.framework.get_global_step()
        )

      if mode in [ModeKeys.EVAL, ModeKeys.INFER]:
        # Compute the cosine similarity between examples and embeddings.
        norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True))
        normalized_embeddings = embeddings / norm
        valid_embeddings = tf.nn.embedding_lookup(
            normalized_embeddings, target_indices)
        similarity = tf.matmul(
            valid_embeddings, normalized_embeddings, transpose_b=True)
        prediction_dict['values'], predictions = tf.nn.top_k(
            similarity, sorted=True, k=num_sim)
        index_tensor = tf.concat(0, [index_tensor, tf.constant(['UNK'])])
        prediction_dict['predictions'] = tf.gather(index_tensor, predictions)

      return prediction_dict, loss, train_op
  def _model_fn(inputs, context_words, mode):
    target_words = inputs['targets']
    index_tensor = inputs['index']
    reverse_index = tf.contrib.lookup.HashTable(
        tf.contrib.lookup.KeyValueTensorInitializer(
            index_tensor,
            tf.constant(range(1, args.vocab_size), dtype=tf.int64)
        ),
        0
    )

    # tf.contrib.learn.Estimator.fit adds an addition dimension to input
    target_words_squeezed = tf.squeeze(target_words, squeeze_dims=[1])
    target_indices = reverse_index.lookup(target_words_squeezed)

    with tf.device(tf.train.replica_device_setter()):
      with tf.variable_scope('nce',
                             partitioner=tf.fixed_size_partitioner(
                                 args.num_partitions)):

        embeddings = tf.get_variable(
            'embeddings',
            shape=[args.vocab_size, args.embedding_size],
            dtype=tf.float32,
            initializer=tf.random_uniform_initializer(-1.0, 1.0)
        )
        if mode in [ModeKeys.TRAIN, ModeKeys.EVAL]:
          nce_weights = tf.get_variable(
              'nce_weights',
              shape=[args.vocab_size, args.embedding_size],
              dtype=tf.float32,
              initializer=tf.truncated_normal_initializer(
                  stddev=1.0 / math.sqrt(args.embedding_size)
              )
          )
          nce_biases = tf.get_variable(
              'nce_biases',
              initializer=tf.zeros_initializer([args.vocab_size]),
              dtype=tf.float32
          )

      tensors, loss, train_op = ({}, None, None)

      if mode in [ModeKeys.TRAIN, ModeKeys.EVAL]:
        context_indices = tf.expand_dims(
            reverse_index.lookup(context_words), 1)
        embedded = # TODO
        loss = # TODO
        tf.scalar_summary('loss', loss)
        tf.scalar_summary('training/hptuning/metric', loss)

      if mode == ModeKeys.TRAIN:
        train_op = #TODO (pick an Optimizer).minimize(
            loss, global_step=tf.contrib.framework.get_global_step()
        )

      if mode == ModeKeys.INFER:
        # Compute the cosine similarity between examples and embeddings.
        # TODO similarity = 
        tensors['values'], predictions = tf.nn.top_k(
            similarity, sorted=True, k=args.num_sim)
        index_tensor = tf.concat(0, [tf.constant(['UNK']), index_tensor])
        tensors['predictions'] = tf.gather(index_tensor, predictions)
Beispiel #32
0
def create_emb_for_encoder_and_decoder(share_vocab,
                                       src_vocab_size,
                                       tgt_vocab_size,
                                       src_embed_size,
                                       tgt_embed_size,
                                       word_embed,
                                       dtype=tf.float32,
                                       num_partitions=0,
                                       scope=None):
    """Create embedding matrix for both encoder and decoder.

  Args:
    share_vocab: A boolean. Whether to share embedding matrix for both
      encoder and decoder.
    src_vocab_size: An integer. The source vocab size.
    tgt_vocab_size: An integer. The target vocab size.
    src_embed_size: An integer. The embedding dimension for the encoder's
      embedding.
    tgt_embed_size: An integer. The embedding dimension for the decoder's
      embedding.
    dtype: dtype of the embedding matrix. Default to float32.
    num_partitions: number of partitions used for the embedding vars.
    scope: VariableScope for the created subgraph. Default to "embedding".

  Returns:
    embedding_encoder: Encoder's embedding matrix.
    embedding_decoder: Decoder's embedding matrix.

  Raises:
    ValueError: if use share_vocab but source and target have different vocab
      size.
  """

    if num_partitions <= 1:
        partitioner = None
    else:
        # Note: num_partitions > 1 is required for distributed training due to
        # embedding_lookup tries to colocate single partition-ed embedding variable
        # with lookup ops. This may cause embedding variables being placed on worker
        # jobs.
        partitioner = tf.fixed_size_partitioner(num_partitions)

    with tf.variable_scope(scope or "embeddings",
                           dtype=dtype,
                           partitioner=partitioner) as scope:
        # Share embedding
        if share_vocab:
            if src_vocab_size != tgt_vocab_size:
                raise ValueError(
                    "Share embedding but different src/tgt vocab sizes"
                    " %d vs. %d" % (src_vocab_size, tgt_vocab_size))
            utils.print_out("# Use the same source embeddings for target")

            if word_embed == "None":
                utils.print_out(
                    "Using default word embedding. (one-hot based)",
                    new_line=True)
                embedding = tf.get_variable("embedding_share",
                                            [src_vocab_size, src_embed_size],
                                            dtype)
            elif "word2vec" in word_embed or "glove" in word_embed:
                utils.print_out("Loading word embedding: %s" % word_embed,
                                new_line=True)
                word2vec_emd = np.load(word_embed + ".pickle")
                embedding = tf.get_variable(
                    name="embedding_share",
                    shape=[src_vocab_size, src_embed_size],
                    dtype=dtype,
                    initializer=tf.constant_initializer(word2vec_emd),
                    trainable=True)
            else:
                embedding = None

            embedding_encoder = embedding
            embedding_decoder = embedding
        else:
            with tf.variable_scope("encoder", partitioner=partitioner):
                embedding_encoder = tf.get_variable(
                    "embedding_encoder", [src_vocab_size, src_embed_size],
                    dtype)

            with tf.variable_scope("decoder", partitioner=partitioner):
                embedding_decoder = tf.get_variable(
                    "embedding_decoder", [tgt_vocab_size, tgt_embed_size],
                    dtype)

    return embedding_encoder, embedding_decoder
Beispiel #33
0
def train():
    ps_hosts = FLAGS.ps_hosts.split(',')
    worker_hosts = FLAGS.worker_hosts.split(',')
    print('PS hosts are: %s' % ps_hosts)
    print('Worker hosts are: %s' % worker_hosts)
    configP = tf.ConfigProto()

    server = tf.train.Server({
        'ps': ps_hosts,
        'worker': worker_hosts
    },
                             job_name=FLAGS.job_name,
                             task_index=FLAGS.task_id,
                             config=configP)

    batchSizeManager = BatchSizeManager(FLAGS.batch_size, len(worker_hosts))

    if FLAGS.job_name == 'ps':
        rpcServer = batchSizeManager.create_rpc_server(
            ps_hosts[0].split(':')[0])
        rpcServer.serve()
        server.join()

    rpcClient = batchSizeManager.create_rpc_client(ps_hosts[0].split(':')[0])
    is_chief = (FLAGS.task_id == 0)
    if is_chief:
        if tf.gfile.Exists(FLAGS.train_dir):
            tf.gfile.DeleteRecursively(FLAGS.train_dir)
        tf.gfile.MakeDirs(FLAGS.train_dir)

    device_setter = tf.train.replica_device_setter(ps_tasks=len(ps_hosts))
    with tf.device('/job:worker/task:%d' % FLAGS.task_id):
        with tf.device(device_setter):
            global_step = tf.Variable(0, trainable=False)

            decay_steps = 50000 * 350.0 / FLAGS.batch_size
            batch_size = tf.placeholder(dtype=tf.int32,
                                        shape=(),
                                        name='batch_size')
            images, labels = cifar10.distorted_inputs(batch_size)
            re = tf.shape(images)[0]
            with tf.variable_scope('root',
                                   partitioner=tf.fixed_size_partitioner(
                                       len(ps_hosts), axis=0)):
                network = resnet_model.cifar10_resnet_v2_generator(
                    FLAGS.resnet_size, _NUM_CLASSES)
            inputs = tf.reshape(images, [-1, _HEIGHT, _WIDTH, _DEPTH])
            #            labels = tf.reshape(labels, [-1, _NUM_CLASSES])
            print(labels.get_shape())
            labels = tf.one_hot(labels, 10, 1, 0)
            print(labels.get_shape())
            logits = network(inputs, True)
            print(logits.get_shape())
            cross_entropy = tf.losses.softmax_cross_entropy(
                logits=logits, onehot_labels=labels)

            #            logits = cifar10.inference(images, batch_size)

            #            loss = cifar10.loss(logits, labels, batch_size)
            loss = cross_entropy + _WEIGHT_DECAY * tf.add_n(
                [tf.nn.l2_loss(v) for v in tf.trainable_variables()])

            # Decay the learning rate exponentially based on the number of steps.
            lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
                                            global_step,
                                            decay_steps,
                                            LEARNING_RATE_DECAY_FACTOR,
                                            staircase=True)
            opt = tf.train.GradientDescentOptimizer(lr)

            # Track the moving averages of all trainable variables.
            exp_moving_averager = tf.train.ExponentialMovingAverage(
                MOVING_AVERAGE_DECAY, global_step)
            variables_to_average = (tf.trainable_variables() +
                                    tf.moving_average_variables())

            opt = tf.train.SyncReplicasOptimizer(
                opt,
                replicas_to_aggregate=len(worker_hosts),
                #                replica_id=FLAGS.task_id,
                total_num_replicas=len(worker_hosts),
                variable_averages=exp_moving_averager,
                variables_to_average=variables_to_average)

            grads0 = opt.compute_gradients(loss)
            grads = [(tf.scalar_mul(
                tf.cast(batch_size / FLAGS.batch_size, tf.float32), grad), var)
                     for grad, var in grads0]

            apply_gradients_op = opt.apply_gradients(grads,
                                                     global_step=global_step)

            with tf.control_dependencies([apply_gradients_op]):
                train_op = tf.identity(loss, name='train_op')

            chief_queue_runners = [opt.get_chief_queue_runner()]
            init_tokens_op = opt.get_init_tokens_op()

            #            saver = tf.train.Saver()
            sv = tf.train.Supervisor(
                is_chief=is_chief,
                logdir=FLAGS.train_dir,
                init_op=tf.group(tf.global_variables_initializer(),
                                 tf.local_variables_initializer()),
                summary_op=None,
                global_step=global_step,
                #                                     saver=saver,
                saver=None,
                recovery_wait_secs=1,
                save_model_secs=60)

            tf.logging.info('%s Supervisor' % datetime.now())
            sess_config = tf.ConfigProto(
                allow_soft_placement=True,
                intra_op_parallelism_threads=1,
                inter_op_parallelism_threads=1,
                log_device_placement=FLAGS.log_device_placement)
            sess_config.gpu_options.allow_growth = True

            # Get a session.
            sess = sv.prepare_or_wait_for_session(server.target,
                                                  config=sess_config)
            #	    sess.run(tf.global_variables_initializer())

            # Start the queue runners.
            queue_runners = tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS)
            sv.start_queue_runners(sess, queue_runners)

            sv.start_queue_runners(sess, chief_queue_runners)
            sess.run(init_tokens_op)
            """Train CIFAR-10 for a number of steps."""

            time0 = time.time()
            batch_size_num = FLAGS.batch_size
            for step in range(FLAGS.max_steps):

                start_time = time.time()

                run_options = tf.RunOptions(
                    trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()

                #                batch_size_num = updated_batch_size_num
                if step <= 5:
                    batch_size_num = FLAGS.batch_size
                if step >= 0:
                    batch_size_num = int(step / 5) % 512 + 1
                    batch_size_num = 128

                num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / batch_size_num
                decay_steps_num = int(num_batches_per_epoch *
                                      NUM_EPOCHS_PER_DECAY)

                #                mgrads, images_, train_val, real, loss_value, gs = sess.run([grads, images, train_op, re, loss, global_step], feed_dict={batch_size: batch_size_num},  options=run_options, run_metadata=run_metadata)
                _, loss_value, gs = sess.run(
                    [train_op, loss, global_step],
                    feed_dict={batch_size: batch_size_num},
                    options=run_options,
                    run_metadata=run_metadata)
                #                _, loss_value, gs = sess.run([train_op, loss, global_step], feed_dict={batch_size: batch_size_num})
                b = time.time()
                #    		tl = timeline.Timeline(run_metadata.step_stats)
                #		last_batch_time = tl.get_local_step_duration('sync_token_q_Dequeue')
                #		thread = threading2.Thread(target=get_computation_time, name="get_computation_time",args=(run_metadata.step_stats,step,))
                #		thread.start()

                c0 = time.time()

                #	        batch_size_num = rpcClient.update_batch_size(FLAGS.task_id, last_batch_time, available_cpu, available_memory, step, batch_size_num)
                batch_size_num = rpcClient.update_batch_size(
                    FLAGS.task_id, 0, 0, 0, step, batch_size_num)

                if step % 1 == 0:
                    duration = time.time() - start_time
                    num_examples_per_step = batch_size_num
                    examples_per_sec = num_examples_per_step / duration
                    sec_per_batch = float(duration)

                    c = time.time()
                    ##                    tf.logging.info("time statistics - batch_process_time: " + str( last_batch_time)  + " - train_time: " + str(b-start_time) + " - get_batch_time: " + str(c0-b) + " - get_bs_time:  " + str(c-c0) + " - accum_time: " + str(c-time0))

                    format_str = (
                        "time: " + str(time.time()) +
                        '; %s: step %d (global_step %d), loss = %.2f (%.1f examples/sec; %.3f sec/batch) - batch_size: '
                        + str(batch_size_num))
                    tf.logging.info(format_str %
                                    (datetime.now(), step, gs, loss_value,
                                     examples_per_sec, sec_per_batch))
Beispiel #34
0
    def create_model(self, filename_queue):
        optimizer = FLAGS.optimizer
        hash_bucket_size = FLAGS.hash_bucket_size
        nloop = FLAGS.nloop
        with_dependency = FLAGS.with_dependency
        poisson = FLAGS.poisson
        if FLAGS.is_validation:
            poisson = 0.0

        ps_count = self.get_cluster_ps_count()

        with tf.variable_scope("ads_lr_input"):
            reader = tf.TFRecordReader()

        with tf.variable_scope("ads_lr_model"):
            hash_table = tf.get_variable("HashTable", [hash_bucket_size, 3],
                                         initializer=tf.zeros_initializer,
                                         dtype=tf.int32, trainable=False,
                                         partitioner=tf.fixed_size_partitioner(ps_count))
            W_weights = tf.get_variable("W_weights", [hash_bucket_size],
                                        initializer=tf.zeros_initializer, dtype=tf.float32,
                                        partitioner=tf.fixed_size_partitioner(ps_count))

            global_step = tf.Variable(0, name="global_step", trainable=False)

            # --------------------------------- debug
            hash_table_list = list(hash_table)
            for v in hash_table_list:
                print("name:", v.name, "device:", v.device, "shape:", v.get_shape())
            # --------------------------------- debug
            if optimizer == "AdaGrad":
                optimizer_op = tf.train.AdagradOptimizer(FLAGS.learning_rate)
            elif optimizer == "FTRL":
                optimizer_op = tf.train.FtrlOptimizer(learning_rate=FLAGS.learning_rate,
                                                      l1_regularization_strength=FLAGS.l1,
                                                      l2_regularization_strength=FLAGS.l2)
            else:
                raise ValueError("Unrecognized optimizer type" + optimizer)

            # patch optimizer _apply_sparse_duplicate_indices
            if FLAGS.disable_sparse_grad_unique:
                optimizer_op._apply_sparse_duplicate_indices = optimizer_op._apply_sparse
            def one_mini_batch(batch_index, dependencies):
                # read one minibatch data
                with ops.control_dependencies(dependencies):
                    _, batch_example = reader.read_up_to(filename_queue, FLAGS.batch_size)
                    sample_labels, sample_weights, sample_guids, feature_indices, feature_values, feature_shape = raw_key_ops.parse_lr_samples(batch_example)
                    sample_labels = tf.reshape(sample_labels, [-1, 1])
                    sample_weights = tf.reshape(sample_weights, [-1, 1])
                    sample_guids = tf.reshape(sample_guids, [-1, 1])

                with tf.device("/cpu:0"):
                    before_sigmoid = hash_embedding_ops.hash_embedding_lookup_sparse(hash_table, W_weights, feature_indices, feature_values, feature_shape, poisson=poisson)
                    before_sigmoid = tf.reshape(before_sigmoid, [-1, 1])

                    pred = tf.nn.sigmoid(before_sigmoid)

                    unweighted_loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=before_sigmoid, labels=sample_labels)
                    final_loss = tf.multiply(sample_weights, unweighted_loss)

                    if FLAGS.use_reduce_sum:
                        loss_fn = tf.reduce_sum(final_loss)
                    else:
                        loss_fn = tf.reduce_mean(final_loss)

                    train_op = optimizer_op.minimize(loss_fn, global_step=global_step)
                    if with_dependency:
                        return [loss_fn], train_op, pred, loss_fn, sample_labels, sample_weights, sample_guids
                    else:
                        return [], train_op, pred, loss_fn, sample_labels, sample_weights, sample_guids

            dependencies = []
            train_op_array = []
            predictions = []
            loss_fns = []
            sample_labels_array = []
            sample_weights_array = []
            sample_guids_array = []
            for i in range(nloop):
                dependency, train_op, prediction, loss_fn, sample_labels, sample_weights, sample_guids = one_mini_batch(i, None if i == 0 or not with_dependency else dependencies[-1])
                dependencies.append(dependency)
                train_op_array.append(train_op)
                predictions.append(prediction)
                loss_fns.append(loss_fn)
                sample_labels_array.append(sample_labels)
                sample_weights_array.append(sample_weights)
                sample_guids_array.append(sample_guids)

            train_ops = tf.group(*train_op_array)

            weight_save_ops, weight_restore_ops = util.save_model_for_raw_key(
                    FLAGS.init_model_dir, FLAGS.model_dir, "lr_weights", hash_table, W_weights, optimizer_op, FLAGS.output_optimizer_slots)

            # define custom saver for raw key
            self._save_op = tf.tuple(weight_save_ops)
            self._restore_op = tf.tuple(weight_restore_ops) if weight_restore_ops else []

            self._labels = sample_labels_array
            self._weights = sample_weights_array
            self._train_op = train_ops
            self._predictions = predictions
            self._loss_fn = loss_fns
            self._global_step = global_step
            self._guids = sample_guids_array
def train():
    """Train Inception on a dataset for a number of steps."""
    ps_hosts = FLAGS.ps_hosts.split(',')
    worker_hosts = FLAGS.worker_hosts.split(',')
    tf.logging.info('PS hosts are: %s' % ps_hosts)
    tf.logging.info('Worker hosts are: %s' % worker_hosts)

    cluster_spec = tf.train.ClusterSpec({
        'ps': ps_hosts,
        'worker': worker_hosts
    })
    server = tf.train.Server({
        'ps': ps_hosts,
        'worker': worker_hosts
    },
                             job_name=FLAGS.job_name,
                             task_index=FLAGS.task_id,
                             protocol=FLAGS.protocol)
    batchSizeManager = BatchSizeManager(FLAGS.batch_size, len(worker_hosts))
    if FLAGS.job_name == 'ps':
        if FLAGS.task_id == 0:
            rpcServer = batchSizeManager.create_rpc_server(
                ps_hosts[0].split(':')[0])
            rpcServer.serve()
        server.join()

    dataset = ImagenetData(subset=FLAGS.subset)
    rpcClient = batchSizeManager.create_rpc_client(ps_hosts[0].split(':')[0])
    assert dataset.data_files()
    # Only the chief checks for or creates train_dir.
    if FLAGS.task_id == 0:
        if not tf.gfile.Exists(FLAGS.train_dir):
            tf.gfile.MakeDirs(FLAGS.train_dir)
    num_workers = len(cluster_spec.as_dict()['worker'])
    num_parameter_servers = len(cluster_spec.as_dict()['ps'])
    if FLAGS.num_replicas_to_aggregate == -1:
        num_replicas_to_aggregate = num_workers
    else:
        num_replicas_to_aggregate = FLAGS.num_replicas_to_aggregate

    # Both should be greater than 0 in a distributed training.
    assert num_workers > 0 and num_parameter_servers > 0, (
        ' num_workers and '
        'num_parameter_servers'
        ' must be > 0.')

    # Choose worker 0 as the chief. Note that any worker could be the chief
    # but there should be only one chief.
    is_chief = (FLAGS.task_id == 0)

    #batchSizeManager = BatchSizeManager(32, 4)

    # Ops are assigned to worker by default.
    tf.logging.info('cccc-num_parameter_servers:' + str(num_parameter_servers))
    partitioner = tf.fixed_size_partitioner(num_parameter_servers, 0)

    device_setter = tf.train.replica_device_setter(
        ps_tasks=num_parameter_servers)
    slim = tf.contrib.slim
    with tf.device('/job:worker/task:%d' % FLAGS.task_id):
        with tf.variable_scope('root', partitioner=partitioner):
            # Variables and its related init/assign ops are assigned to ps.
            #    with slim.arg_scope(
            #        [slim.variables.variable, slim.variables.global_step],
            #        device=slim.variables.VariableDeviceChooser(num_parameter_servers)):
            with tf.device(device_setter):
                #	partitioner=partitioner):
                # Create a variable to count the number of train() calls. This equals the
                # number of updates applied to the variables.
                #      global_step = slim.variables.global_step()
                global_step = tf.Variable(0, trainable=False)

                # Calculate the learning rate schedule.

                batch_size = tf.placeholder(dtype=tf.int32,
                                            shape=(),
                                            name='batch_size')
                num_batches_per_epoch = (dataset.num_examples_per_epoch() /
                                         FLAGS.batch_size)
                # Decay steps need to be divided by the number of replicas to aggregate.
                decay_steps = int(num_batches_per_epoch *
                                  FLAGS.num_epochs_per_decay /
                                  num_replicas_to_aggregate)

                # Decay the learning rate exponentially based on the number of steps.
                lr = tf.train.exponential_decay(
                    FLAGS.initial_learning_rate,
                    global_step,
                    decay_steps,
                    FLAGS.learning_rate_decay_factor,
                    staircase=True)
                # Add a summary to track the learning rate.
                #      tf.summary.scalar('learning_rate', lr)

                # Create an optimizer that performs gradient descent.

                images, labels = image_processing.distorted_inputs(
                    dataset,
                    batch_size,
                    num_preprocess_threads=FLAGS.num_preprocess_threads)
                print(images.get_shape())
                print(labels.get_shape())

                # Number of classes in the Dataset label set plus 1.
                # Label 0 is reserved for an (unused) background class.
                #      num_classes = dataset.num_classes() + 1
                num_classes = dataset.num_classes()
                print(num_classes)
                #      logits = inception.inference(images, num_classes, for_training=True)
                network_fn = nets_factory.get_network_fn(
                    'inception_v3', num_classes=num_classes)
                (logits, _) = network_fn(images)
                print(logits.get_shape())
                # Add classification loss.
                #      inception.loss(logits, labels, batch_size)

                # Gather all of the losses including regularization losses.
                labels = tf.one_hot(labels, 1000, 1, 0)
                cross_entropy = tf.losses.softmax_cross_entropy(
                    logits=logits, onehot_labels=labels)
                #      losses = tf.get_collection(slim.losses.LOSSES_COLLECTION)
                #      losses += tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
                losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
                total_loss = cross_entropy + _WEIGHT_DECAY * tf.add_n(
                    [tf.nn.l2_loss(v) for v in tf.trainable_variables()])

                #      total_loss = tf.add_n(losses, name='total_loss')

                loss_averages = tf.train.ExponentialMovingAverage(0.9,
                                                                  name='avg')
                loss_averages_op = loss_averages.apply(losses + [total_loss])

                with tf.control_dependencies([loss_averages_op]):
                    opt = tf.train.RMSPropOptimizer(lr,
                                                    RMSPROP_DECAY,
                                                    momentum=RMSPROP_MOMENTUM,
                                                    epsilon=RMSPROP_EPSILON)
                    grads0 = opt.compute_gradients(total_loss)
                    grads = [(tf.scalar_mul(
                        tf.cast(batch_size / FLAGS.batch_size, tf.float32),
                        grad), var) for grad, var in grads0]
                    total_loss = tf.identity(total_loss)

                exp_moving_averager = tf.train.ExponentialMovingAverage(
                    MOVING_AVERAGE_DECAY, global_step)
                variables_averages_op = exp_moving_averager.apply(
                    tf.trainable_variables())

                apply_gradients_op = opt.apply_gradients(
                    grads, global_step=global_step)

                with tf.control_dependencies(
                    [apply_gradients_op, variables_averages_op]):
                    train_op = tf.identity(total_loss, name='train_op')

                # Get chief queue_runners and init_tokens, which is used to synchronize
                # replicas. More details can be found in SyncReplicasOptimizer.
#      chief_queue_runners = [opt.get_chief_queue_runner()]
#      init_tokens_op = opt.get_init_tokens_op()

# Create a saver.
                saver = tf.train.Saver()

                # Build the summary operation based on the TF collection of Summaries.
                #      summary_op = tf.summary.merge_all()

                # Build an initialization operation to run below.
                init_op = tf.global_variables_initializer()

                # We run the summaries in the same thread as the training operations by
                # passing in None for summary_op to avoid a summary_thread being started.
                # Running summaries and training operations in parallel could run out of
                # GPU memory.
                sv = tf.train.Supervisor(
                    is_chief=is_chief,
                    logdir=FLAGS.train_dir,
                    init_op=init_op,
                    summary_op=None,
                    global_step=global_step,
                    recovery_wait_secs=1,
                    saver=None,
                    save_model_secs=FLAGS.save_interval_secs)

                tf.logging.info('%s Supervisor' % datetime.now())

                sess_config = tf.ConfigProto(
                    allow_soft_placement=True,
                    log_device_placement=FLAGS.log_device_placement)

                # Get a session.
                sess = sv.prepare_or_wait_for_session(server.target,
                                                      config=sess_config)

                # Start the queue runners.
                queue_runners = tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS)
                sv.start_queue_runners(sess, queue_runners)
                tf.logging.info('Started %d queues for processing input data.',
                                len(queue_runners))

                #      if is_chief:
                #        sv.start_queue_runners(sess, chief_queue_runners)
                #        sess.run(init_tokens_op)

                # Train, checking for Nans. Concurrently run the summary operation at a
                # specified interval. Note that the summary_op and train_op never run
                # simultaneously in order to prevent running out of GPU memory.
                #      next_summary_time = time.time() + FLAGS.save_summaries_secs
                step = 0
                time0 = time.time()
                batch_size_num = FLAGS.batch_size
                while not sv.should_stop():
                    try:
                        start_time = time.time()

                        run_options = tf.RunOptions(
                            trace_level=tf.RunOptions.FULL_TRACE)
                        run_metadata = tf.RunMetadata()

                        my_images, loss_value, step = sess.run(
                            [images, train_op, global_step],
                            feed_dict={batch_size: batch_size_num},
                            options=run_options,
                            run_metadata=run_metadata)
                        b = time.time()
                        #          assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
                        if step > FLAGS.max_steps:
                            break
                        duration = time.time() - start_time
                        c0 = time.time()

                        # call rrsp mechanism to coordinate the synchronization order and update the batch size
                        batch_size_num = rpcClient.update_batch_size(
                            FLAGS.task_id, 0, 0, 0, step, batch_size_num)
                        #          ctf = tl.generate_chrome_trace_format()
                        #          with open("timeline.json", 'a') as f:
                        #            f.write(ctf)

                        if step % 1 == 0:
                            examples_per_sec = FLAGS.batch_size / float(
                                duration)
                            c = time.time()
                            tf.logging.info("time statistics" +
                                            " - train_time: " +
                                            str(b - start_time) +
                                            " - get_batch_time: " +
                                            str(c0 - b) + " - get_bs_time:  " +
                                            str(c - c0) + " - accum_time: " +
                                            str(c - time0) +
                                            " - batch_size: " +
                                            str(batch_size_num))
                            format_str = (
                                'Worker %d: %s: step %d, loss = %.2f'
                                '(%.1f examples/sec; %.3f  sec/batch)')
                            tf.logging.info(
                                format_str %
                                (FLAGS.task_id, datetime.now(), step,
                                 loss_value, examples_per_sec, duration))

                        # Determine if the summary_op should be run on the chief worker.
#          if is_chief and next_summary_time < time.time():
#            tf.logging.info('Running Summary operation on the chief.')
#            summary_str = sess.run(summary_op)
#            sv.summary_computed(sess, summary_str)
#            tf.logging.info('Finished running Summary operation.')

# Determine the next time for running the summary.
#            next_summary_time += FLAGS.save_summaries_secs
                    except:
                        if is_chief:
                            tf.logging.info(
                                'Chief got exception while running!')
                        raise

                # Stop the supervisor.  This also waits for service threads to finish.
                sv.stop()
def get_all_embeddings(params, dtype=tf.float32, scope=None):

    if params["lang1_partitions"] <= 1:
        lang1_partitioner = None
    else:
        lang1_partitioner = tf.fixed_size_partitioner(
            params["lang1_partitions"])

    if params["lang2_partitions"] <= 1:
        lang2_partitioner = None
    else:
        lang2_partitioner = tf.fixed_size_partitioner(
            params["lang2_partitions"])

    encoder_embeddings = {}
    decoder_embeddings = {}

    lang1_emb_np, lang2_emb_np = None, None
    if params["lang1_embed_file"] and params["lang2_embed_file"]:
        lang1_emb_np = _create_pretrained_emb_from_txt(
            params["lang1_vocab_file"], params["lang1_embed_file"])
        if params["lang1_embed_file"] == params["lang2_embed_file"]:
            lang2_emb_np = lang1_emb_np
        else:
            lang2_emb_np = _create_pretrained_emb_from_txt(
                params["lang2_vocab_file"], params["lang2_embed_file"])

    if params["share_decpro_emb"]:
        if params["share_lang_emb"]:
            assert params["share_output_emb"]
            share_bias = tf.get_variable('share_projection/bias', [
                params["lang1_vocab_size"],
            ],
                                         initializer=tf.zeros_initializer())
            pro_embs = {
                params["lang1"]: share_bias,
                params["lang2"]: share_bias
            }
        else:
            pro_embs = {
                params["lang1"]:
                tf.get_variable('bias', [
                    params["lang1_vocab_size"],
                ],
                                initializer=tf.zeros_initializer()),
                params["lang2"]:
                tf.get_variable('bias', [
                    params["lang2_vocab_size"],
                ],
                                initializer=tf.zeros_initializer())
            }
    else:
        if params["share_output_emb"]:
            assert params["share_lang_emb"]
            if params["pretrained_out"]:
                assert params["lang1_embed_file"] == params["lang2_embed_file"]
                misc_utils.print_out(
                    "# Using pre-trained embedding to initialize shared projection kernel."
                )
                share_proj_layer = tf.layers.Dense(
                    params["lang1_vocab_size"],
                    use_bias=True,
                    kernel_initializer=tf.constant_initializer(
                        lang1_emb_np.transpose()),
                    name="share_projection")
            else:
                share_proj_layer = tf.layers.Dense(params["lang1_vocab_size"],
                                                   use_bias=True,
                                                   name="share_projection")
            pro_embs = {
                params["lang1"]: share_proj_layer,
                params["lang2"]: share_proj_layer
            }
        else:
            if params["pretrained_out"]:
                misc_utils.print_out(
                    "# Using pre-trained embedding to initialize two projection kernels."
                )
                pro_embs = {
                    params["lang1"]:
                    tf.layers.Dense(params["lang1_vocab_size"],
                                    use_bias=True,
                                    kernel_initializer=tf.constant_initializer(
                                        lang1_emb_np.transpose()),
                                    name="%s_projection" % params["lang1"]),
                    params["lang2"]:
                    tf.layers.Dense(params["lang2_vocab_size"],
                                    use_bias=True,
                                    kernel_initializer=tf.constant_initializer(
                                        lang2_emb_np.transpose()),
                                    name="%s_projection" % params["lang2"])
                }
            else:
                pro_embs = {
                    params["lang1"]:
                    tf.layers.Dense(params["lang1_vocab_size"],
                                    use_bias=True,
                                    name="%s_projection" % params["lang1"]),
                    params["lang2"]:
                    tf.layers.Dense(params["lang2_vocab_size"],
                                    use_bias=True,
                                    name="%s_projection" % params["lang2"])
                }

    with tf.variable_scope(scope or "all_embeddings", dtype=dtype) as scope:

        # encoder embeddings
        with tf.variable_scope("encoder", partitioner=lang1_partitioner):
            lang = "share" if params["share_lang_emb"] else params["lang1"]
            lang1_enc_embedding = _create_embed("%s_embedding" % lang,
                                                params["lang1_vocab_size"],
                                                params["hidden_size"], dtype,
                                                lang1_emb_np)

        if params["share_lang_emb"]:
            if params["lang1_vocab_size"] != params["lang2_vocab_size"]:
                raise ValueError(
                    "Share embedding but different vocab sizes"
                    " %d vs. %d" %
                    (params["lang1_vocab_size"], params["lang2_vocab_size"]))
            assert params["lang1_vocab_size"] == params["lang2_vocab_size"]

            misc_utils.print_out(
                "# Use the same encoder embedding for both languages.")
            lang2_enc_embedding = lang1_enc_embedding

        else:
            with tf.variable_scope("encoder", partitioner=lang2_partitioner):
                lang2_enc_embedding = _create_embed(
                    "%s_embedding" % params["lang2"],
                    params["lang2_vocab_size"], params["hidden_size"], dtype,
                    lang2_emb_np)

        encoder_embeddings[params["lang1"]] = lang1_enc_embedding
        encoder_embeddings[params["lang2"]] = lang2_enc_embedding

        # decoder embeddings
        if params["share_encdec_emb"]:
            misc_utils.print_out(
                "# Use the same embedding for encoder and decoder of each language."
            )
            decoder_embeddings = encoder_embeddings

        else:
            with tf.variable_scope("decoder", partitioner=lang1_partitioner):
                lang = "share" if params["share_lang_emb"] else params["lang1"]
                lang1_dec_embedding = _create_embed("%s_embedding" % lang,
                                                    params["lang1_vocab_size"],
                                                    params["hidden_size"],
                                                    dtype, lang1_emb_np)

                if params["share_lang_emb"]:
                    misc_utils.print_out(
                        "# Use the same decoder embedding for both languages.")
                    lang2_dec_embedding = lang1_dec_embedding

                else:
                    lang2_dec_embedding = _create_embed(
                        "%s_embedding" % params["lang2"],
                        params["lang2_vocab_size"], params["hidden_size"],
                        dtype, lang2_emb_np)

                decoder_embeddings[params["lang1"]] = lang1_dec_embedding
                decoder_embeddings[params["lang2"]] = lang2_dec_embedding

    return encoder_embeddings, decoder_embeddings, pro_embs
Beispiel #37
0
def model_fn(features, labels, mode, params):
    init_learning_rate = params['learning_rate']
    decay_steps = params['decay_steps']
    decay_rate = params['decay_rate']

    with tf.name_scope('user'):
        # shape: B (batch size)
        user_embedding = fc.input_layer(features, [user_id, age, gender])

    with tf.name_scope('item'):
        item_buckets = 100
        item_id = features['item_id']
        item_id = tf.reshape(item_id, [-1, 1])
        list_size = tf.shape(item_id)[0]
        item_id = tf.string_to_hash_bucket_fast(item_id, num_buckets=item_buckets)
        # if matrix is huge, it can be distributed
        # item_matrix = tf.get_variable(name='item_matrix',
        #                               shape=(100, 16),
        #                               initializer=tf.initializers.glorot_uniform())
        if mode != tf.estimator.ModeKeys.PREDICT:
            ps_num = len(params['tf_config']['cluster']['ps'])
            item_matrix = tf.get_variable(name='item_matrix',
                                          shape=(100, 16),
                                          initializer=tf.initializers.glorot_uniform(),
                                          partitioner=tf.fixed_size_partitioner(num_shards=ps_num)) #1
        else:
            item_matrix = tf.get_variable(name='item_matrix',
                                          shape=(100, 16),
                                          initializer=tf.initializers.glorot_uniform())

        item_embedding = tf.nn.embedding_lookup(item_matrix,
                                                item_id,
                                                name='item_embedding')
        item_embedding = tf.squeeze(item_embedding, axis=1)

    with tf.name_scope('history'):
        # shape: B * T (sequence length)
        clicked_items = features['clicked_items_15d']
        clicked_mask = tf.cast(tf.not_equal(clicked_items, '0'), tf.bool)
        clicked_items = tf.string_to_hash_bucket_fast(clicked_items, num_buckets=item_buckets)
        # shape: B * T * E
        clicked_embedding = tf.nn.embedding_lookup(item_matrix,
                                                   clicked_items,
                                                   name='clicked_embedding')

    if mode == tf.estimator.ModeKeys.PREDICT:
        user_embedding = tf.tile(user_embedding, [list_size, 1])
        clicked_embedding = tf.tile(clicked_embedding, [list_size, 1, 1])
        clicked_mask = tf.tile(clicked_mask, [list_size, 1])

    # shape: B * E
    clicked_attention = attention(clicked_embedding,
                                  item_embedding,
                                  clicked_mask,
                                  [16, 8],
                                  'clicked_attention')

    fc_inputs = tf.concat([user_embedding, item_embedding, clicked_attention], axis=-1, name='fc_inputs')

    with tf.name_scope('predictions'):
        logits = fc_layers(mode, net=fc_inputs, hidden_units=[64, 16, 1], dropout=0.3)
        predictions = tf.sigmoid(logits, name='predictions')

        if mode != tf.estimator.ModeKeys.PREDICT:
            labels = tf.reshape(labels, [-1, 1])
            loss = tf.losses.sigmoid_cross_entropy(labels, logits)
            if mode == tf.estimator.ModeKeys.EVAL:
                metrics = {
                    'auc': tf.metrics.auc(labels=labels,
                                          predictions=predictions,
                                          num_thresholds=500)
                }
                for metric_name, op in metrics.items():
                    tf.summary.scalar(metric_name, op[1])
                return tf.estimator.EstimatorSpec(mode, loss=loss,
                                                  eval_metric_ops=metrics)
            else:
                global_step = tf.train.get_global_step()
                learning_rate = exponential_decay(global_step, init_learning_rate, decay_steps, decay_rate)
                optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate)
                tf.summary.scalar('learning_rate', learning_rate)
                train_op = optimizer.minimize(loss=loss, global_step=global_step)
                return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
        else:
            predictions = {
                'probability': tf.reshape(predictions, [1, -1])
            }
            export_outputs = {
                'predictions': tf.estimator.export.PredictOutput(predictions)
            }
            return tf.estimator.EstimatorSpec(mode, predictions=predictions,
                                              export_outputs=export_outputs)
Beispiel #38
0
  def _model_fn(inputs, context_indices, mode):
    if mode == ModeKeys.INFER:
      sparse_index_tensor = tf.string_split(
          [tf.read_file(vocab_file)],
          delimiter='\n'
      )
      index_tensor = tf.squeeze(tf.sparse_to_dense(
          sparse_index_tensor.indices,
          [1, vocab_size],
          sparse_index_tensor.values,
          default_value='UNK'
      ))
      reverse_index = tf.contrib.lookup.HashTable(
          tf.contrib.lookup.KeyValueTensorInitializer(
              index_tensor,
              tf.constant(range(vocab_size), dtype=tf.int64)
          ),
          0
      )
      target_indices = reverse_index.lookup(inputs)
    else:
      target_indices = inputs

    with tf.device(tf.train.replica_device_setter()):
      with tf.variable_scope('nce',
                             partitioner=tf.fixed_size_partitioner(
                                 num_partitions)):

        embeddings = tf.get_variable(
            'embeddings',
            shape=[vocab_size, embedding_size],
            dtype=tf.float32,
            initializer=tf.random_uniform_initializer(-1.0, 1.0)
        )
        if mode in [ModeKeys.TRAIN, ModeKeys.EVAL]:
          nce_weights = tf.get_variable(
              'nce_weights',
              shape=[vocab_size, embedding_size],
              dtype=tf.float32,
              initializer=tf.truncated_normal_initializer(
                  stddev=1.0 / math.sqrt(embedding_size)
              )
          )
          nce_biases = tf.get_variable(
              'nce_biases',
              initializer=tf.zeros_initializer([vocab_size]),
              dtype=tf.float32
          )

      tensors, loss, train_op = ({}, None, None)

      if mode in [ModeKeys.TRAIN, ModeKeys.EVAL]:
        embedded = tf.nn.embedding_lookup(embeddings, target_indices)

        loss = tf.reduce_mean(tf.nn.nce_loss(
            nce_weights,
            nce_biases,
            embedded,
            context_indices,
            num_sampled,
            vocab_size
        ))
        tf.summary.scalar('loss', loss)
        tf.summary.scalar('training/hptuning/metric', loss)

        # Embedding Visualizer
        embedding_writer = tf.summary.FileWriter(output_path)
        config = projector.ProjectorConfig()
        embedding = config.embeddings.add()
        embedding.tensor_name = embeddings.name
        embedding.metadata_path = vocab_file
        projector.visualize_embeddings(embedding_writer, config)

      if mode == ModeKeys.TRAIN:
        train_op = tf.train.GradientDescentOptimizer(
            learning_rate
        ).minimize(
            loss,
            global_step=tf.contrib.framework.get_or_create_global_step()
        )

      if mode == ModeKeys.INFER:
        # Compute the cosine similarity between examples and embeddings.
        norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True))
        normalized_embeddings = embeddings / norm
        valid_embeddings = tf.nn.embedding_lookup(
            normalized_embeddings, tf.squeeze(target_indices))
        similarity = tf.matmul(
            valid_embeddings, normalized_embeddings, transpose_b=True)
        tensors['values'], predictions = tf.nn.top_k(
            similarity, sorted=True, k=num_sim)
        index_tensor = tf.concat(0, [tf.constant(['UNK']), index_tensor])
        tensors['predictions'] = tf.gather(index_tensor, predictions)

      return tensors, loss, train_op
Beispiel #39
0
    def build_inference(self, x, flag="train"):
        # 设置regularizer,本别对应网络的四个部分
        regularizer1 = self.param_dict[
            "regulerizer1"] if flag == "train" else None
        regularizer2 = self.param_dict[
            "regulerizer2"] if flag == "train" else None
        regularizer3 = self.param_dict[
            "regulerizer3"] if flag == "train" else None
        regularizer4 = self.param_dict[
            "regulerizer4"] if flag == "train" else None
        is_train = True if flag == "train" else False
        # 先获取需要的参数
        hash_size = self.param_dict['hash_size']
        no_hash = self.param_dict["no_hash"]
        embed_size = self.param_dict["embed_size"]
        # browse_nums = self.param_dict["browse_nums"] # browse_nums = [20, 10, 10]
        # 根据配置获取激活函数
        act_fn = self.get_activation_func(is_train)
        # 是否启用mini-batch aware regularization
        is_mba_reg = self.param_dict["is_mba_reg"]
        lambda_reg_mba = self.param_dict["lambda_reg_mba"]
        is_action_mba_reg = self.param_dict["is_action_mba_reg"]

        # 将输入划分
        x_feature = x[:, :-3]
        x_action_lists = x[:, -3:]

        # 先将稀疏特征转换成indice
        x_sparse = []
        for i in range(len(hash_size)):
            if i in no_hash:
                # 这部分特征本身可以直接作为indice,不需要转化
                x_i = tf.string_to_number(x_feature[:, i], tf.int32)
                x_sparse.append(x_i)
            else:
                # 这部分特征可以通过哈希函数来转化成index
                x_i = tf.string_to_hash_bucket_strong(
                    input=x_feature[:, i],
                    num_buckets=hash_size[i],
                    key=[679362, 964545],
                    name="sparse_feature_{}".format(i))
                x_sparse.append(x_i)
        # 将稀疏数据转换成embedding向量
        x_embed = []
        w_action_embed = []
        x_action = []
        indice_sku_cate_brand = []
        sku_cate_brand_index = self.param_dict["sku_cate_brand_index"]
        for i in range(len(embed_size)):
            if i in sku_cate_brand_index:  # skuid, cateid, brandid对应的embedding向量
                with tf.variable_scope("embedding_{}".format(i)):
                    weights = self.get_weight_variable(
                        [hash_size[i], embed_size[i]],
                        regularizer1,
                        self.param_dict["initializer_embedding_w"](
                            [hash_size[i], embed_size[i]]),
                        partitioner=tf.fixed_size_partitioner(10, 0))
                    w_action_embed.append(weights)
                    x_i = tf.nn.embedding_lookup(weights, x_sparse[i])
                    if is_train and is_mba_reg and not is_action_mba_reg:
                        # 计算mba
                        self.calculate_mini_batch_aware_reg(
                            weights, x_sparse[i], lambda_reg_mba)

                    indice_sku_cate_brand.append(x_sparse[i])
                    x_embed.append(x_i)
                    x_action.append(x_i)
            else:
                if embed_size[i] != -1:
                    with tf.variable_scope("embedding_{}".format(i)):
                        if i == 0:
                            weights = self.get_weight_variable(
                                [hash_size[i], embed_size[i]],
                                regularizer1,
                                self.param_dict["initializer_embedding_w"](
                                    [hash_size[i], embed_size[i]]),
                                partitioner=tf.fixed_size_partitioner(10, 0))
                        else:
                            weights = self.get_weight_variable(
                                [hash_size[i], embed_size[i]], regularizer1,
                                self.param_dict["initializer_embedding_w"](
                                    [hash_size[i], embed_size[i]]))
                        x_i = tf.nn.embedding_lookup(weights, x_sparse[i])
                        if is_train and is_mba_reg:
                            # 计算mba
                            self.calculate_mini_batch_aware_reg(
                                weights, x_sparse[i], lambda_reg_mba)
                        x_embed.append(x_i)
                else:
                    x_i = tf.one_hot(x_sparse[i], depth=hash_size[i])
                    x_embed.append(x_i)
        x_embed = tf.concat(x_embed, 1)
        x_deep_in = x_embed
        is_usingg_user_act_feature = self.param_dict[
            "is_usingg_user_act_feature"]
        if is_usingg_user_act_feature:
            pooling_method = self.param_dict["pooling_method"]
            # 对浏览行为建模,构建行为embedding向量
            with tf.name_scope("user_behaviours"):
                x_browse_skus_list = tf.reshape(x_action_lists[:, 0], [
                    -1,
                ])
                x_browse_cates_list = tf.reshape(x_action_lists[:, 1], [
                    -1,
                ])
                x_browse_brand_list = tf.reshape(x_action_lists[:, 2], [
                    -1,
                ])
                browse_lists = [
                    x_browse_skus_list, x_browse_cates_list,
                    x_browse_brand_list
                ]
                browse_names = ['skus', 'cates', 'brands']
                x_action_list_embeds = []
                for i in range(len(browse_names)):
                    with tf.name_scope("user_browse_{}_embedding".format(
                            browse_names[i])):
                        browse_w_embed = w_action_embed[i]
                        # x_ad_embedded = x_action[i]
                        x_browse_action = browse_lists[
                            i]  # shape of x_browse_action is [?,]
                        x_browse_action_list = tf.string_split(
                            x_browse_action, "#")
                        x_browse_action_list_indices = tf.SparseTensor(
                            x_browse_action_list.indices,
                            tf.string_to_hash_bucket_strong(
                                x_browse_action_list.values,
                                num_buckets=browse_w_embed.get_shape()
                                [0].value,
                                key=[679362, 964545],
                                name="sparse_user_browse_{}".format(
                                    browse_names[i])),
                            x_browse_action_list.dense_shape,
                        )
                        x_action_list_embed = tf.nn.embedding_lookup_sparse(
                            browse_w_embed,
                            sp_ids=x_browse_action_list_indices,
                            sp_weights=None,
                            combiner=pooling_method)
                        if is_train and is_action_mba_reg:
                            # 计算mba
                            indice_action = tf.concat([
                                tf.string_to_hash_bucket_strong(
                                    x_browse_action_list.values,
                                    num_buckets=browse_w_embed.get_shape()
                                    [0].value,
                                    key=[679362, 964545]),
                                indice_sku_cate_brand[i]
                            ], 0)
                            self.calculate_mini_batch_aware_reg(
                                browse_w_embed, indice_action, lambda_reg_mba)
                        x_action_list_embeds.append(x_action_list_embed)
                x_deep_in = tf.concat(
                    [x_deep_in, tf.concat(x_action_list_embeds, 1)], 1)

        # 构建deep模块
        with tf.name_scope("deep_network"):
            deep_layers = self.param_dict["deep_layers"]
            for i in range(len(deep_layers)):
                with tf.variable_scope("dnn_layer_{}".format(i)):
                    weights = self.get_weight_variable(
                        [x_deep_in.shape[1].value, deep_layers[i]],
                        regularizer2, self.param_dict["initializer_dnn_w"](
                            [x_deep_in.shape[1].value, deep_layers[i]]))
                    biases = tf.get_variable(
                        "biases", [deep_layers[i]],
                        initializer=tf.constant_initializer(0.0),
                        dtype=tf.float32)
                    layer_i = act_fn(tf.matmul(x_deep_in, weights) + biases,
                                     name="deep_mlp_{}".format(i))
                    x_deep_in = layer_i

        # 构建输出模块full connect
        x_fc_in = x_deep_in
        with tf.name_scope("fc_layers"):
            fc_layers = self.param_dict['fc_layers']
            for i in range(len(fc_layers)):
                with tf.variable_scope("fc_layers_{}".format(i)):
                    weights = self.get_weight_variable(
                        [x_fc_in.shape[1].value, fc_layers[i]], regularizer4,
                        self.param_dict["initializer_fc_w"](
                            [x_fc_in.shape[1].value, fc_layers[i]]))
                    biases = tf.get_variable(
                        "biases", [fc_layers[i]],
                        initializer=tf.constant_initializer(0.0),
                        dtype=tf.float32)
                    layer_i = tf.nn.sigmoid(
                        tf.matmul(x_fc_in, weights) + biases)
                    x_fc_in = layer_i
        logit = x_fc_in
        return logit
Beispiel #40
0
    def calc_vectors(self):
        with tf.variable_scope("calc_vectors",
                               values=tuple(six.itervalues(self._features))):
            # in this simple case, create ClkI x itemId seq model
            ClkI_sparse = self._features["ClkI"]
            ClkI_seq_len = get_sequence_length(ClkI_sparse)
            itemId_dense = self._features["item_id"]
            itemId_dense = tf.reshape(itemId_dense, shape=[-1])
            print("itemId_dense: ", itemId_dense)

            # step-1, hash raw features
            itemId_hash = tf.string_to_hash_bucket_fast(
                itemId_dense,
                self._itemId_hash_bucket_size)
            ClkI_val_hash = tf.string_to_hash_bucket_fast(
                ClkI_sparse.values,
                self._itemId_hash_bucket_size)
            print("itemId_hash: ", itemId_hash)
            print("ClkI_val_hash: ", ClkI_val_hash)

            # step-1.1, sparse_to_dense
            ClkI_hash = tf.sparse_to_dense(sparse_indices=ClkI_sparse.indices,
                                           output_shape=ClkI_sparse.dense_shape,
                                           sparse_values=ClkI_val_hash)

            # step-2, embedding lookup
            with tf.variable_scope("embedding_tables", reuse=False):
                _itemId_emb_shape = [self._itemId_hash_bucket_size,
                                     self._itemId_embedding_size]
                self._itemId_emb_tab = tf.contrib.framework.model_variable(
                    name="item_id_embedding/weights",
                    shape=_itemId_emb_shape,
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(
                        mean=0.,
                        stddev=1./math.sqrt(self._itemId_hash_bucket_size)),
                    trainable=True,
                    collections=[self._root_scope],
                    partitioner=tf.fixed_size_partitioner(10, axis=0))

            with tf.name_scope("embedding_lookup",
                               values=(self._itemId_emb_tab, itemId_hash, ClkI_hash)):
                itemId_emb = tf.nn.embedding_lookup(self._itemId_emb_tab, itemId_hash)
                ClkI_emb = tf.nn.embedding_lookup(self._itemId_emb_tab, ClkI_hash)
                self._itemId_emb = itemId_emb

            # step-3, proccess sequence_to_vector
            def _atten_fn_mlp(a, b):
                _hidden_units = self._hidden_units
                with tf.variable_scope("atten_fn_linear",
                                       reuse=tf.AUTO_REUSE,
                                       values=(a,b)) as atten_fn_scope:
                    size_a = a.shape[1]
                    size_b = b.shape[1]
                    c = tf.matmul(
                        tf.reshape(a, shape=[-1, size_a, 1]),
                        tf.reshape(b, shape=[-1, 1, size_b]),
                        name="similarity")
                    c = tf.reshape(c, shape=[-1, size_a * size_b])
                    net = tf.concat([a,b,c], axis=1)
                    for layer_id, num_units in enumerate(_hidden_units):
                        with tf.variable_scope(
                                "hidden_layer_%d" % layer_id,
                                values=(net,)) as hidden_layer_scope:
                           net = tf.contrib.layers.fully_connected(
                               net, num_units,
                               activation_fn=tf.nn.relu,
                               variables_collections=[self._root_scope],
                               scope=hidden_layer_scope)
                    _output = tf.contrib.layers.fully_connected(
                        net, 1,
                        activation_fn=tf.exp,
                        variables_collections=[self._root_scope],
                        scope=atten_fn_scope)
                    return _output
            
            atten_params = {
                "proc_type": "atten",
                "target_values": self._itemId_emb,
                "atten_fn": _atten_fn_mlp,
                "normalize_weights": False,
            }
            outputs, ClkI_vec = sequence_to_vector(
                Seq(emb=ClkI_emb, seq_len=ClkI_seq_len), atten_params)
            self._clki_vec = ClkI_vec
            self._vectors = {
                "seq2vec_ClkI_vec": self._clki_vec,
                "seq2vec_itemId_vec": self._itemId_emb,
                }
            return self._vectors
def vgg_16(inputs,
           num_classes=1000,
           is_training=True,
           dropout_keep_prob=0.5,
           spatial_squeeze=True,
           scope='vgg_16',
           fc_conv_padding='VALID',
           global_pool=False):
    """Oxford Net VGG 16-Layers version D Example.

  Note: All the fully_connected layers have been transformed to conv2d layers.
        To use in classification mode, resize input to 224x224.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes. If 0 or None, the logits layer is
      omitted and the input features to the logits layer are returned instead.
    is_training: whether or not the model is being trained.
    dropout_keep_prob: the probability that activations are kept in the dropout
      layers during training.
    spatial_squeeze: whether or not should squeeze the spatial dimensions of the
      outputs. Useful to remove unnecessary dimensions for classification.
    scope: Optional scope for the variables.
    fc_conv_padding: the type of padding to use for the fully connected layer
      that is implemented as a convolutional layer. Use 'SAME' padding if you
      are applying the network in a fully convolutional manner and want to
      get a prediction map downsampled by a factor of 32 as an output.
      Otherwise, the output prediction map will be (input / 32) - 6 in case of
      'VALID' padding.
    global_pool: Optional boolean flag. If True, the input to the classification
      layer is avgpooled to size 1x1, for any input size. (This is not part
      of the original VGG architecture.)

  Returns:
    net: the output of the logits layer (if num_classes is a non-zero integer),
      or the input to the logits layer (if num_classes is 0 or None).
    end_points: a dict of tensors with intermediate activations.
  """
    #with tf.variable_scope(scope, 'vgg_16', [inputs])
    partitioner = tf.fixed_size_partitioner(2, axis=0)
    #  with tf.variable_scope(scope, 'vgg_16', [inputs], partitioner=partitioner) as sc:
    with tf.variable_scope(scope, 'vgg_16', [inputs]) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        # Collect outputs for conv2d, fully_connected and max_pool2d.
        with slim.arg_scope(
            [slim.conv2d, slim.fully_connected, slim.max_pool2d],
                outputs_collections=end_points_collection):
            net = slim.repeat(inputs,
                              2,
                              slim.conv2d,
                              64, [3, 3],
                              scope='conv1')
            net = slim.max_pool2d(net, [2, 2], scope='pool1')
            net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
            net = slim.max_pool2d(net, [2, 2], scope='pool2')
            net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
            net = slim.max_pool2d(net, [2, 2], scope='pool3')
            net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
            net = slim.max_pool2d(net, [2, 2], scope='pool4')
            net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
            net = slim.max_pool2d(net, [2, 2], scope='pool5')

            # Use conv2d instead of fully_connected layers.
            fc_conv_padding = 'same'
            net = slim.conv2d(net,
                              4096, [7, 7],
                              padding=fc_conv_padding,
                              scope='fc6')
            net = slim.dropout(net,
                               dropout_keep_prob,
                               is_training=is_training,
                               scope='dropout6')
            net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
            # Convert end_points_collection into a end_point dict.
            end_points = slim.utils.convert_collection_to_dict(
                end_points_collection)
            if global_pool:
                net = tf.reduce_mean(net, [1, 2],
                                     keep_dims=True,
                                     name='global_pool')
                end_points['global_pool'] = net
            if num_classes:
                net = slim.dropout(net,
                                   dropout_keep_prob,
                                   is_training=is_training,
                                   scope='dropout7')
                net = slim.conv2d(net,
                                  num_classes, [1, 1],
                                  activation_fn=None,
                                  normalizer_fn=None,
                                  scope='fc8')
                if spatial_squeeze:
                    net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
                end_points[sc.name + '/fc8'] = net
            return net, end_points
Beispiel #42
0
def create_emb_for_encoder_and_decoder(share_vocab,
                                       src_vocab_size,
                                       tgt_vocab_size,
                                       src_embed_size,
                                       tgt_embed_size,
                                       dtype=tf.float32,
                                       num_partitions=0,
                                       src_vocab_file=None,
                                       tgt_vocab_file=None,
                                       src_embed_file=None,
                                       tgt_embed_file=None,
                                       scope=None):
    """Creating embedding matrix for both encoder and decoder
    
    Args:
        share_vocab: A boolean. Whether to share embedding matrix for 
            encoder and decoder.
        src_vocab_size: An integer. The source vocab size.
        tgt_vocab_size: An integer. The target vocab size.
        src_embed_size: An integer. The embedding dimension for encoder's 
            embedding.
        tgt_embed_size: 
        dtype: dtype fo the embedding matrix. Default to tf.flotat32
        num_partitions: #partitions used for embedding var.
        scope: VariableScope for created subgraph. Default to "embedding"
    
    Return:
        embedding_encoder: Encoder's embedding matrix
        embedding_decoder: Decoder's embedding matrix
    
    Raises:
        share_vocab=True, yet the src_vocab_size and tgt_vocab_size is different        
    
    """
    
    if num_partitions <= 1:
        partitioner = None
    else:
        partitioner = tf.fixed_size_partitioner(num_partitions)
    
    if (src_embed_file or tgt_embed_file) and partitioner:
        raise ValueError("can not set partitions >1 when use pretrained embeddings")
    
    with tf.variable_scope(
            scope or "embedding", dtype=dtype, partitioner=partitioner) as scope:
        if share_vocab:
            if src_vocab_size != tgt_vocab_size:
                raise ValueError("share embedding but with different vocab size %d/%d"%
                                 (src_vocab_size, tgt_vocab_size))
                
            assert src_embed_size == tgt_embed_size
            utils.print_out("Share embedding")
            
            vocab_file = src_vocab_file or tgt_vocab_file
            embed_file = src_embed_file or tgt_embed_file
            
            embedding_encoder = _create_or_load_embed(
                    "embedding_share", vocab_file, embed_file,
                    src_vocab_size, src_embed_size, dtype)
            embedding_decoder = embedding_encoder
        
        else:            
            with tf.variable_scope("encoder", partitioner=partitioner):
                embedding_encoder = _create_or_load_embed(
                        "embedding_encoder", src_vocab_file,
                        src_embed_file, src_vocab_size, src_embed_size, dtype)
                
            with tf.variable_scope("decoder", partitioner=partitioner):
                embedding_decoder = _create_or_load_embed(
                        "embedding_decoder", tgt_vocab_file,
                        tgt_embed_file, tgt_vocab_size, tgt_embed_size, dtype)
    
    return embedding_encoder, embedding_decoder
def train(target, dataset, cluster_spec):
  """Train Inception on a dataset for a number of steps."""
  # Number of workers and parameter servers are inferred from the workers and ps
  # hosts string.
  num_workers = len(cluster_spec.as_dict()['worker'])
  num_parameter_servers = len(cluster_spec.as_dict()['ps'])
  # If no value is given, num_replicas_to_aggregate defaults to be the number of
  # workers.
  if FLAGS.num_replicas_to_aggregate == -1:
    num_replicas_to_aggregate = num_workers
  else:
    num_replicas_to_aggregate = FLAGS.num_replicas_to_aggregate

  # Both should be greater than 0 in a distributed training.
  assert num_workers > 0 and num_parameter_servers > 0, (' num_workers and '
                                                         'num_parameter_servers'
                                                         ' must be > 0.')

  # Choose worker 0 as the chief. Note that any worker could be the chief
  # but there should be only one chief.
  is_chief = (FLAGS.task_id == 0)

  #batchSizeManager = BatchSizeManager(32, 4)

  # Ops are assigned to worker by default.
  tf.logging.info('cccc-num_parameter_servers:'+str(num_parameter_servers))
  partitioner = tf.fixed_size_partitioner(num_parameter_servers, 0)  

  device_setter = tf.train.replica_device_setter(ps_tasks=num_parameter_servers)
  slim = tf.contrib.slim
  with tf.device('/job:worker/task:%d' % FLAGS.task_id):
   with tf.variable_scope('root', partitioner=partitioner):
    # Variables and its related init/assign ops are assigned to ps.
#    with slim.arg_scope(
#        [slim.variables.variable, slim.variables.global_step],
#        device=slim.variables.VariableDeviceChooser(num_parameter_servers)):
    with tf.device(device_setter):
#	partitioner=partitioner):
      # Create a variable to count the number of train() calls. This equals the
      # number of updates applied to the variables.
#      global_step = slim.variables.global_step()
      global_step = tf.Variable(0, trainable=False)

      # Calculate the learning rate schedule.

      batch_size = tf.placeholder(dtype=tf.int32, shape=(), name='batch_size')
      num_batches_per_epoch = (dataset.num_examples_per_epoch() /
                               FLAGS.batch_size)
      # Decay steps need to be divided by the number of replicas to aggregate.
      decay_steps = int(num_batches_per_epoch * FLAGS.num_epochs_per_decay /
                        num_replicas_to_aggregate)

      # Decay the learning rate exponentially based on the number of steps.
      lr = tf.train.exponential_decay(FLAGS.initial_learning_rate*num_workers,
                                      global_step,
                                      decay_steps,
                                      FLAGS.learning_rate_decay_factor,
                                      staircase=True)
      # Add a summary to track the learning rate.
#      tf.summary.scalar('learning_rate', lr)

      # Create an optimizer that performs gradient descent.
      opt = tf.train.RMSPropOptimizer(lr,
                                      RMSPROP_DECAY,
                                      momentum=RMSPROP_MOMENTUM,
                                      epsilon=RMSPROP_EPSILON)

      images, labels = image_processing.distorted_inputs(
          dataset,
          batch_size,
          num_preprocess_threads=FLAGS.num_preprocess_threads)
      print(images.get_shape())
      print(labels.get_shape())

      # Number of classes in the Dataset label set plus 1.
      # Label 0 is reserved for an (unused) background class.
#      num_classes = dataset.num_classes() + 1
      num_classes = dataset.num_classes()
      print(num_classes)
#      logits = inception.inference(images, num_classes, for_training=True)
      network_fn = nets_factory.get_network_fn('inception_v3',num_classes=num_classes) 
      (logits,_) = network_fn(images)
      print(logits.get_shape())
      # Add classification loss.
#      inception.loss(logits, labels, batch_size)

      # Gather all of the losses including regularization losses.
      labels = tf.one_hot(labels, 1000, 1, 0)
      cross_entropy = tf.losses.softmax_cross_entropy(
          logits=logits, 
          onehot_labels=labels)
#      losses = tf.get_collection(slim.losses.LOSSES_COLLECTION)
#      losses += tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
      losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
      total_loss = cross_entropy + _WEIGHT_DECAY * tf.add_n(
          [tf.nn.l2_loss(v) for v in tf.trainable_variables()])

#      total_loss = tf.add_n(losses, name='total_loss')

      if is_chief:
        # Compute the moving average of all individual losses and the
        # total loss.
        loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
        loss_averages_op = loss_averages.apply(losses + [total_loss])

        # Attach a scalar summmary to all individual losses and the total loss;
        # do the same for the averaged version of the losses.
#        for l in losses + [total_loss]:
#          loss_name = l.op.name
          # Name each loss as '(raw)' and name the moving average version of the
          # loss as the original loss name.
#          tf.summary.scalar(loss_name + ' (raw)', l)
#          tf.summary.scalar(loss_name, loss_averages.average(l))

        # Add dependency to compute loss_averages.
        with tf.control_dependencies([loss_averages_op]):
          total_loss = tf.identity(total_loss)

      # Track the moving averages of all trainable variables.
      # Note that we maintain a 'double-average' of the BatchNormalization
      # global statistics.
      # This is not needed when the number of replicas are small but important
      # for synchronous distributed training with tens of workers/replicas.
      exp_moving_averager = tf.train.ExponentialMovingAverage(
          MOVING_AVERAGE_DECAY, global_step)

      variables_to_average = (
          tf.trainable_variables() + tf.moving_average_variables())

      # Add histograms for model variables.
#      for var in variables_to_average:
#        tf.summary.histogram(var.op.name, var)

      # Create synchronous replica optimizer.
      opt = tf.train.SyncReplicasOptimizer(
          opt,
          replicas_to_aggregate=num_replicas_to_aggregate,
          total_num_replicas=num_workers,
          variable_averages=exp_moving_averager,
          variables_to_average=variables_to_average)

#      batchnorm_updates = tf.get_collection(slim.ops.UPDATE_OPS_COLLECTION)
#      assert batchnorm_updates, 'Batchnorm updates are missing'
#      batchnorm_updates_op = tf.group(*batchnorm_updates)
#      # Add dependency to compute batchnorm_updates.
#      with tf.control_dependencies([batchnorm_updates_op]):
#        total_loss = tf.identity(total_loss)

      # Compute gradients with respect to the loss.
      # grads = opt.compute_gradients(total_loss)
      grads0 = opt.compute_gradients(total_loss) 
      grads = [(tf.scalar_mul(tf.cast(batch_size/FLAGS.batch_size, tf.float32), grad), var) for grad, var in grads0]

      # Add histograms for gradients.
#      for grad, var in grads:
#        if grad is not None:
#          tf.summary.histogram(var.op.name + '/gradients', grad)

      apply_gradients_op = opt.apply_gradients(grads, global_step=global_step)

      with tf.control_dependencies([apply_gradients_op]):
        train_op = tf.identity(total_loss, name='train_op')

      # Get chief queue_runners and init_tokens, which is used to synchronize
      # replicas. More details can be found in SyncReplicasOptimizer.
      chief_queue_runners = [opt.get_chief_queue_runner()]
      init_tokens_op = opt.get_init_tokens_op()

      # Create a saver.
      saver = tf.train.Saver()

      # Build the summary operation based on the TF collection of Summaries.
#      summary_op = tf.summary.merge_all()

      # Build an initialization operation to run below.
      init_op = tf.global_variables_initializer()

      # We run the summaries in the same thread as the training operations by
      # passing in None for summary_op to avoid a summary_thread being started.
      # Running summaries and training operations in parallel could run out of
      # GPU memory.
      sv = tf.train.Supervisor(is_chief=is_chief,
                               logdir=FLAGS.train_dir,
                               init_op=init_op,
                               summary_op=None,
                               global_step=global_step,
                               recovery_wait_secs=1,
                               saver=None,
                               save_model_secs=FLAGS.save_interval_secs)

      tf.logging.info('%s Supervisor' % datetime.now())

      sess_config = tf.ConfigProto(
          allow_soft_placement=True,
          log_device_placement=FLAGS.log_device_placement)

      # Get a session.
      sess = sv.prepare_or_wait_for_session(target, config=sess_config)

      # Start the queue runners.
      queue_runners = tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS)
      sv.start_queue_runners(sess, queue_runners)
      tf.logging.info('Started %d queues for processing input data.',
                      len(queue_runners))

      if is_chief:
        sv.start_queue_runners(sess, chief_queue_runners)
        sess.run(init_tokens_op)

      # Train, checking for Nans. Concurrently run the summary operation at a
      # specified interval. Note that the summary_op and train_op never run
      # simultaneously in order to prevent running out of GPU memory.
#      next_summary_time = time.time() + FLAGS.save_summaries_secs
      step = 0
      time0 = time.time()
      batch_size_num = 1
      while not sv.should_stop():
        try:
          start_time = time.time()

	  batch_size_num = 32
	  batch_size_num = 2*int(step/5)+16
#	   batch_size_num = int((int(step)/3*10)) % 100000 + 1
#          if step < 5:
#            batch_size_num = 32 
#          batch_size_num = (batch_size_num ) % 64 + 1
#          else:
#            batch_size_num = 80

          run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
          run_metadata = tf.RunMetadata()

          my_images, loss_value, step = sess.run([images, train_op, global_step], feed_dict={batch_size: batch_size_num}, options=run_options, run_metadata=run_metadata)
	  b = time.time()
#          assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
          if step > FLAGS.max_steps:
            break
          duration = time.time() - start_time
	  thread = threading2.Thread(target=get_computation_time, name="get_computation_time",args=(run_metadata.step_stats,step,))
	  thread.start()
#          tl = timeline.Timeline(run_metadata.step_stats)
#          last_batch_time = tl.get_local_step_duration('sync_token_q_Dequeue')
          c0 = time.time()
#          batch_size_num = batchSizeManager.dictate_new_batch_size(FLAGS.task_id, last_batch_time)
#          batch_size_num = rpcClient.update_batch_size(FLAGS.task_id, last_batch_time, available_cpu, available_memory, step, batch_size_num) 
#          ctf = tl.generate_chrome_trace_format()
#          with open("timeline.json", 'a') as f:
#            f.write(ctf)

          if step % 1 == 0:
            examples_per_sec = FLAGS.batch_size / float(duration)
            c = time.time()
            tf.logging.info("time statistics" + " - train_time: " + str(b-start_time) + " - get_batch_time: " + str(c0-b) + " - get_bs_time:  " + str(c-c0) + " - accum_time: " + str(c-time0) + " - batch_size: " + str(batch_size_num))
            format_str = ('Worker %d: %s: step %d, loss = %.2f'
                          '(%.1f examples/sec; %.3f  sec/batch)')
            tf.logging.info(format_str %
                            (FLAGS.task_id, datetime.now(), step, loss_value,
                             examples_per_sec, duration))

          # Determine if the summary_op should be run on the chief worker.
#          if is_chief and next_summary_time < time.time():
#            tf.logging.info('Running Summary operation on the chief.')
#            summary_str = sess.run(summary_op)
#            sv.summary_computed(sess, summary_str)
#            tf.logging.info('Finished running Summary operation.')

            # Determine the next time for running the summary.
#            next_summary_time += FLAGS.save_summaries_secs
        except:
          if is_chief:
            tf.logging.info('Chief got exception while running!')
          raise

      # Stop the supervisor.  This also waits for service threads to finish.
      sv.stop()
Beispiel #44
0
    def _initialize_parameters(self, hparams, ppm):

        K = np.float32(self.K)

        su, tu, a, b, self.size_u = (hparams['su'], hparams['tu'], hparams['a'], hparams['b'], hparams['size_u'])
        si, ti, c, d, self.size_i = (hparams['si'], hparams['ti'], hparams['c'], hparams['d'], hparams['size_i'])

        with tf.name_scope("hparams"), tf.device(self.device):
            ## Hyperparameters
            self.lsu = tf.Variable(softplus_inverse(-hparams['su'] + 1.), dtype=tf.float32, name="lsu")
            self.su = -tf.nn.softplus(self.lsu) + 1.

            self.tu = tf.Variable(hparams['tu'], dtype=tf.float32, name="tu")

            self.a = tf.Variable(hparams['a'], dtype=tf.float32, name="a")
            self.b = tf.Variable(hparams['b'], dtype=tf.float32, name="b")

            self.lsi = tf.Variable(softplus_inverse(-hparams['si'] + 1.), dtype=tf.float32, name="lsi")
            self.si = -tf.nn.softplus(self.lsi) + 1.

            self.ti = tf.Variable(hparams['ti'], dtype=tf.float32, name="ti")

            self.c = tf.Variable(hparams['c'], dtype=tf.float32, name="c")
            self.d = tf.Variable(hparams['d'], dtype=tf.float32, name="d")

        e = np.sum(self.edge_vals_d, dtype=np.float32)

        # initial values for total user and total item masses of type K
        # set st \sum_k tim_k * tum_k = e (which is in fact a bit higher than it oughta be)
        # and using item_mass / user_mass ~ item_size / user_size (which is only kind of true)
        tum_init = np.sqrt(self.size_u / self.size_i * e / K)
        tim_init = np.sqrt(self.size_i / self.size_u * e / K)

        with tf.name_scope("user_params"), tf.device(self.device):
            # shape params are read off immediately from update equations
            # rate params set to be consistent w \gam_i ~ 1, \sum_j beta_jk beta_k ~ \sqrt(e/k) (which is self consistent)
            if ppm :
                # If creating the principled predictive (ppm), don't have the user_degree. Just create some random initialization for now, we'll update it with a default value
                self.gam_shp = tf.Variable(tf.random_gamma([self.U, 1], 5., 5., seed=self.seed), dtype=tf.float32, name="gam_rte") 
                self.gam_rte = tf.Variable(tf.random_gamma([self.U, 1], 5., 5., seed=self.seed), dtype=tf.float32, name="gam_rte") 
                self.theta_shp = tf.Variable(tf.random_gamma([self.U, self.K], 10., 10., seed=self.seed), name="theta_shp")
                self.theta_rte =tf.Variable(tf.random_gamma([self.U, self.K], 5., 5., seed=self.seed), name="theta_rte") 
                self.g = tf.Variable(tf.random_gamma([self.K, 1], 0.001, 1, seed=self.seed) + TINY, name="g") 
            else:
                user_degs = np.expand_dims(self.user_degree, axis=1)
                self.gam_shp = tf.Variable((user_degs - su), name="gam_shp")  # s^U
                self.gam_rte = tf.Variable(np.sqrt(e) * (0.9 + 0.1*tf.random_gamma([self.U, 1], 5., 5., seed=self.seed)), dtype=tf.float32, name="gam_rte")  # r^U
                init_gam_mean = self.gam_shp.initial_value / self.gam_rte.initial_value
                self.theta_shp = tf.Variable((a + user_degs/K) * tf.random_gamma([self.U, self.K], 10., 10., seed=self.seed), name="theta_shp")  # kap^U
                self.theta_rte = tf.Variable((b + init_gam_mean * tim_init)*(0.9 + 0.1*tf.random_gamma([self.U, self.K], 5., 5., seed=self.seed)), name="theta_rte")  # lam^U
                self.g = tf.Variable(tf.random_gamma([self.K, 1], 0.001, 1, seed=self.seed) + TINY, name="g")  # g


        with tf.name_scope("item_params"), tf.device(self.device):
            ## Items
            if ppm:
                self.omega_shp = tf.Variable(tf.random_gamma([self.I, 1], 5., 5., seed=self.seed), name="omega_shp")  # s^I
                self.omega_rte = tf.Variable(tf.random_gamma([self.I, 1], 5., 5., seed=self.seed), dtype=tf.float32, name="omega_rte")  # r^I
                self.beta_shp = tf.Variable(tf.random_gamma([self.I, self.K], 10., 10., seed=self.seed), name="beta_shp")  # kap^I
                self.beta_rte = tf.Variable(tf.random_gamma([self.I, self.K], 5., 5., seed=self.seed), name="beta_rte")  # lam^I
                self.w = tf.Variable(tf.random_gamma([self.K, 1], 0.001, 1, seed=self.seed) + TINY, name="w")  # w
            else:
                item_degs = np.expand_dims(self.item_degree, axis=1)
                self.omega_shp = tf.Variable((item_degs - si), name="omega_shp")  # s^I
                self.omega_rte = tf.Variable(np.sqrt(e) * (0.9 + 0.1*tf.random_gamma([self.I, 1], 5., 5., seed=self.seed)), dtype=tf.float32, name="omega_rte")  # r^I
                init_omega_mean = self.omega_shp.initial_value / self.omega_rte.initial_value
                self.beta_shp = tf.Variable((c + item_degs/K) * tf.random_gamma([self.I, self.K], 10., 10., seed=self.seed), name="beta_shp")  # kap^I
                self.beta_rte = tf.Variable((d + init_omega_mean*tum_init) * (0.9 + 0.1*tf.random_gamma([self.I, self.K], 5., 5., seed=self.seed)), name="beta_rte")  # lam^I
                self.w = tf.Variable(tf.random_gamma([self.K, 1], 0.001, 1, seed=self.seed) + TINY, name="w")  # w

        with tf.device('/cpu:0'):
            with tf.variable_scope("edge_params", reuse=None):
                ## Edges
                if self.simple_graph:
                    # set init value so there's approximately 1 expected edge between each pair... WARNING: this may be profoundly stupid
                    self.sg_edge_param = tf.get_variable(name="sg_edge_param", shape=[self.occupied_pairs, self.K], dtype=tf.float32,
                                    initializer=tf.random_normal_initializer(mean=-np.log(K), stddev=1. / K, seed=self.seed),
                                    partitioner=tf.fixed_size_partitioner(self.edge_param_splits, 0))
                else:
                    self.lphi = tf.get_variable(name="lphi", shape=[self.occupied_pairs, self.K], dtype=tf.float32,
                                    initializer=tf.random_normal_initializer(mean=0, stddev=1. / K, seed=self.seed),
                                    partitioner=tf.fixed_size_partitioner(self.edge_param_splits, 0))

        with tf.name_scope("variational_post"), tf.device(self.device):

            # Variational posterior distributions
            self.q_gam = Gamma(concentration=self.gam_shp, rate=self.gam_rte, name="q_gam")
            self.q_theta = Gamma(concentration=self.theta_shp, rate=self.theta_rte, name="q_theta")
            self.q_g = PointMass(self.g, name="q_g")

            self.q_omega = Gamma(concentration=self.omega_shp, rate=self.omega_rte, name="q_omega")
            self.q_beta = Gamma(concentration=self.beta_shp, rate=self.beta_rte, name="q_beta")
            self.q_w = PointMass(self.w, name="q_w")

            if self.simple_graph:
                self.q_e_aux_vals = tPoissonMulti(log_lams=self.sg_edge_param, name="q_e_aux_vals") # q_edges_aux_flat
            else:
                self.q_e_aux_vals = Multinomial(total_count=self.edge_vals, logits=self.lphi, name="q_e_aux_vals") # q_edges_aux_flat
                self.q_e_aux_vals_mean = self.q_e_aux_vals.mean()

        with tf.name_scope("degree_vars"):
            # create some structures to make it easy to work with the expected value (wrt q) of the edges

            # qm_du[u,k] is the expected weighted degree of user u counting only edges of type k
            # qm_du[u,k] = E_q[e^k_i.] in the language of the paper
            # initialized arbitrarily, will override at end of init to set to
            # we use a tf.Variable here to cache the q_e_aux_vals.mean() value
            self.qm_du = tf.Variable(tf.ones([self.U, self.K], dtype=tf.float32), name="qm_du")
            self.qm_di = tf.Variable(tf.ones([self.I, self.K], dtype=tf.float32), name="qm_di")

        # Total Item Mass:
        self.i_tot_mass_m = self.q_w.mean() + tf.matmul(self.q_beta.mean(), self.q_omega.mean(), transpose_a=True)
        # Total User Mass:
        self.u_tot_mass_m = self.q_g.mean() + tf.matmul(self.q_theta.mean(), self.q_gam.mean(), transpose_a=True)
Beispiel #45
0
def create_emb_for_encoder_and_decoder(share_vocab,
                                       src_vocab_size,
                                       tgt_vocab_size,
                                       src_embed_size,
                                       tgt_embed_size,
                                       dtype=tf.float32,
                                       num_partitions=0,
                                       src_vocab_file=None,
                                       tgt_vocab_file=None,
                                       src_embed_file=None,
                                       tgt_embed_file=None,
                                       scope=None):
  """Create embedding matrix for both encoder and decoder.

  Args:
    share_vocab: A boolean. Whether to share embedding matrix for both
      encoder and decoder.
    src_vocab_size: An integer. The source vocab size.
    tgt_vocab_size: An integer. The target vocab size.
    src_embed_size: An integer. The embedding dimension for the encoder's
      embedding.
    tgt_embed_size: An integer. The embedding dimension for the decoder's
      embedding.
    dtype: dtype of the embedding matrix. Default to float32.
    num_partitions: number of partitions used for the embedding vars.
    scope: VariableScope for the created subgraph. Default to "embedding".

  Returns:
    embedding_encoder: Encoder's embedding matrix.
    embedding_decoder: Decoder's embedding matrix.

  Raises:
    ValueError: if use share_vocab but source and target have different vocab
      size.
  """

  if num_partitions <= 1:
    partitioner = None
  else:
    # Note: num_partitions > 1 is required for distributed training due to
    # embedding_lookup tries to colocate single partition-ed embedding variable
    # with lookup ops. This may cause embedding variables being placed on worker
    # jobs.
    partitioner = tf.fixed_size_partitioner(num_partitions)

  if (src_embed_file or tgt_embed_file) and partitioner:
    raise ValueError(
        "Can't set num_partitions > 1 when using pretrained embedding")

  with tf.variable_scope(
      scope or "embeddings", dtype=dtype, partitioner=partitioner) as scope:
    # Share embedding
    if share_vocab:
      if src_vocab_size != tgt_vocab_size:
        raise ValueError("Share embedding but different src/tgt vocab sizes"
                         " %d vs. %d" % (src_vocab_size, tgt_vocab_size))
      assert src_embed_size == tgt_embed_size
      utils.print_out("# Use the same embedding for source and target")
      vocab_file = src_vocab_file or tgt_vocab_file
      embed_file = src_embed_file or tgt_embed_file

      embedding_encoder = _create_or_load_embed(
          "embedding_share", vocab_file, embed_file,
          src_vocab_size, src_embed_size, dtype)
      embedding_decoder = embedding_encoder
    else:
      with tf.variable_scope("encoder", partitioner=partitioner):
        embedding_encoder = _create_or_load_embed(
            "embedding_encoder", src_vocab_file, src_embed_file,
            src_vocab_size, src_embed_size, dtype)

      with tf.variable_scope("decoder", partitioner=partitioner):
        embedding_decoder = _create_or_load_embed(
            "embedding_decoder", tgt_vocab_file, tgt_embed_file,
            tgt_vocab_size, tgt_embed_size, dtype)

  return embedding_encoder, embedding_decoder
Beispiel #46
0
  def __init__(self, num_units, mem_input,
               use_peepholes=False, cell_clip=None,
               initializer=None, num_proj=None, proj_clip=None,
               num_unit_shards=None, num_proj_shards=None,
               forget_bias=1.0, state_is_tuple=True,
               activation=None, reuse=None, name=None, dtype=None,
               use_beam=False,
               hps=None):
    """Initialize the HyperLSTM cell.

    Args:
      num_units: int, The number of units in the LSTM cell.
      mem_input: mem_input.
      use_peepholes: bool, use peephole connections or not.
      cell_clip: (optional) A float value, if provided the cell state is clipped
        by this value prior to the cell output activation.
      initializer: (optional) The initializer to use for the weight and
        projection matrices.
      num_proj: (optional) int, The output dimensionality for the projection
        matrices.  If None, no projection is performed.
      proj_clip: (optional) A float value.  If `num_proj > 0` and `proj_clip` is
        provided, then the projected values are clipped elementwise to within
        `[-proj_clip, proj_clip]`.
      num_unit_shards: Deprecated, will be removed by Jan. 2017.
        Use a variable_scope partitioner instead.
      num_proj_shards: Deprecated, will be removed by Jan. 2017.
        Use a variable_scope partitioner instead.
      forget_bias: float, The bias added to forget gates (see above).
        Must set to `0.0` manually when restoring from CudnnLSTM-trained
        checkpoints.
      state_is_tuple: If True, accepted and returned states are 2-tuples of
        the `c_state` and `m_state`.  If False, they are concatenated
        along the column axis.  The latter behavior will soon be deprecated.
      activation: Activation function of the inner states.  Default: `tanh`.
      reuse: (optional) Python boolean describing whether to reuse variables
        in an existing scope.  If not `True`, and the existing scope already has
        the given variables, an error is raised.
      name: String, the name of the layer. Layers with the same name will
        share weights, but to avoid mistakes we require reuse=True in such
        cases.
      dtype: Default dtype of the layer (default of `None` means use the type
        of the first input). Required when `build` is called before `call`.
      use_beam: Use beam search or not.
      hps: hyperparameters.
    """

    super(HyperLSTMCell, self).__init__(_reuse=reuse, name=name, dtype=dtype)
    if not state_is_tuple:
      tf.logging.warn("%s: Using a concatenated state is slower and will soon "
                      "be deprecated.  Use state_is_tuple=True.", self)
    if num_unit_shards is not None or num_proj_shards is not None:
      tf.logging.warn(
          "%s: The num_unit_shards and proj_unit_shards parameters are "
          "deprecated and will be removed in Jan 2017.  "
          "Use a variable scope with a partitioner instead.", self)

    assert not use_peepholes, "currently not supporting peephole connections"
    assert hps is not None
    # Inputs must be 2-dimensional.
    self.input_spec = tf.layers.InputSpec(ndim=2)

    self._num_units = num_units
    self._rank = hps.rank
    assert self._rank == self._num_units or self._rank == 2 * self._num_units
    self._use_peepholes = use_peepholes
    self._cell_clip = cell_clip
    self._initializer = initializer
    self._num_proj = num_proj
    self._proj_clip = proj_clip
    self._num_unit_shards = num_unit_shards
    self._num_proj_shards = num_proj_shards
    self._forget_bias = forget_bias
    self._state_is_tuple = state_is_tuple
    self._activation = activation or tf.tanh
    self._sigma_norm = hps.sigma_norm
    self._beam_width = hps.beam_width
    self._mem_input = mem_input
    self._use_beam = use_beam

    if num_proj:
      self._state_size = (
          tf.nn.rnn_cell.LSTMStateTuple(num_units, num_proj)
          if state_is_tuple else num_units + num_proj)
      self._output_size = num_proj
    else:
      self._state_size = (
          tf.nn.rnn_cell.LSTMStateTuple(num_units, num_units)
          if state_is_tuple else 2 * num_units)
      self._output_size = num_units

    input_depth = hps.emb_dim + hps.decoder_dim
    # if hps.encode_neighbor:
    #   input_depth += hps.decoder_dim
    h_depth = self._num_units if self._num_proj is None else self._num_proj

    maybe_partitioner = (
        tf.fixed_size_partitioner(self._num_unit_shards)
        if self._num_unit_shards is not None else None)

    # `u`s are matrices of [input_shape, rank], `v`s being [rank, hidden_size]
    # they are the collection of rank-1 parameter matrices.
    # The full parameter matrix is constructed by taking `U\sigma V`,
    # with diagonal matrix `\sigma` computed in the `self.initialize` function.

    redundant_rank = (self._rank > self._num_units)
    # `u`, `v` used to construct matrix from input `x` to input_gate `i`.
    u_xi, v_xi = self._orthogonal_init(
        shape=[input_depth, self._num_units],
        initializer=initializer,
        redundant_rank=redundant_rank)
    self._u_xi = tf.get_variable(
        "u_xi/%s" % _WEIGHTS_VARIABLE_NAME,
        initializer=u_xi,
        partitioner=maybe_partitioner)
    self._v_xi = tf.get_variable(
        "v_xi/%s" % _WEIGHTS_VARIABLE_NAME,
        initializer=v_xi,
        partitioner=maybe_partitioner)

    # `u`, `v` used to construct matrix that maps input `x` to cell_state `j`.
    u_xj, v_xj = self._orthogonal_init(
        shape=[input_depth, self._num_units],
        initializer=initializer,
        redundant_rank=redundant_rank)
    self._u_xj = tf.get_variable(
        "u_xj/%s" % _WEIGHTS_VARIABLE_NAME,
        initializer=u_xj,
        partitioner=maybe_partitioner)
    self._v_xj = tf.get_variable(
        "v_xj/%s" % _WEIGHTS_VARIABLE_NAME,
        initializer=v_xj,
        partitioner=maybe_partitioner)

    # `u`, `v` used to construct matrix
    # that maps input `x` to forget_gate `f`.
    u_xf, v_xf = self._orthogonal_init(
        shape=[input_depth, self._num_units],
        initializer=initializer,
        redundant_rank=redundant_rank)
    self._u_xf = tf.get_variable(
        "u_xf/%s" % _WEIGHTS_VARIABLE_NAME,
        initializer=u_xf,
        partitioner=maybe_partitioner)
    self._v_xf = tf.get_variable(
        "v_xf/%s" % _WEIGHTS_VARIABLE_NAME,
        initializer=v_xf,
        partitioner=maybe_partitioner)

    # `u`, `v` used to construct matrix
    # that maps input `x` to output_gate `o`.
    u_xo, v_xo = self._orthogonal_init(
        shape=[input_depth, self._num_units],
        initializer=initializer,
        redundant_rank=redundant_rank)
    self._u_xo = tf.get_variable(
        "u_xo/%s" % _WEIGHTS_VARIABLE_NAME,
        initializer=u_xo,
        partitioner=maybe_partitioner)
    self._v_xo = tf.get_variable(
        "v_xo/%s" % _WEIGHTS_VARIABLE_NAME,
        initializer=v_xo,
        partitioner=maybe_partitioner)

    # `u`, `v` used to construct matrix
    # that maps hid_state `h` to input_gate `i`.
    u_hi, v_hi = self._orthogonal_init(
        shape=[h_depth, self._num_units],
        initializer=initializer,
        redundant_rank=redundant_rank)
    self._u_hi = tf.get_variable(
        "u_hi/%s" % _WEIGHTS_VARIABLE_NAME,
        initializer=u_hi,
        partitioner=maybe_partitioner)
    self._v_hi = tf.get_variable(
        "v_hi/%s" % _WEIGHTS_VARIABLE_NAME,
        initializer=v_hi,
        partitioner=maybe_partitioner)

    # `u`, `v` used to construct matrix
    # that maps hid_state `h` to cell_state `j`.
    u_hj, v_hj = self._orthogonal_init(
        shape=[h_depth, self._num_units],
        initializer=initializer,
        redundant_rank=redundant_rank)
    self._u_hj = tf.get_variable(
        "u_hj/%s" % _WEIGHTS_VARIABLE_NAME,
        initializer=u_hj,
        partitioner=maybe_partitioner)
    self._v_hj = tf.get_variable(
        "v_hj/%s" % _WEIGHTS_VARIABLE_NAME,
        initializer=v_hj,
        partitioner=maybe_partitioner)

    # `u`, `v` used to construct matrix
    # that maps hid_state `h` to forget_gate `f`.
    u_hf, v_hf = self._orthogonal_init(
        shape=[h_depth, self._num_units],
        initializer=initializer,
        redundant_rank=redundant_rank)
    self._u_hf = tf.get_variable(
        "u_hf/%s" % _WEIGHTS_VARIABLE_NAME,
        initializer=u_hf,
        partitioner=maybe_partitioner)
    self._v_hf = tf.get_variable(
        "v_hf/%s" % _WEIGHTS_VARIABLE_NAME,
        initializer=v_hf,
        partitioner=maybe_partitioner)

    # `u`, `v` used to construct matrix
    # that maps hid_state `h` to output_gate `o`.
    u_ho, v_ho = self._orthogonal_init(
        shape=[h_depth, self._num_units],
        initializer=initializer,
        redundant_rank=redundant_rank)
    self._u_ho = tf.get_variable(
        "u_ho/%s" % _WEIGHTS_VARIABLE_NAME,
        initializer=u_ho,
        partitioner=maybe_partitioner)
    self._v_ho = tf.get_variable(
        "v_ho/%s" % _WEIGHTS_VARIABLE_NAME,
        initializer=v_ho,
        partitioner=maybe_partitioner)

    self._c = tf.get_variable(
        "c/%s" % _WEIGHTS_VARIABLE_NAME,
        shape=[self._num_units, self._rank],
        initializer=tf.contrib.layers.xavier_initializer(),
        partitioner=maybe_partitioner)

    initializer = tf.zeros_initializer(dtype=tf.float32)
    self._b = tf.get_variable(
        "b/%s" % _BIAS_VARIABLE_NAME,
        shape=[4 * h_depth, self._rank],
        initializer=initializer)

    if self._num_proj is not None:
      if self._num_proj_shards is not None:
        maybe_proj_partitioner = (
            tf.fixed_size_partitioner(self._num_proj_shards))
      else:
        maybe_proj_partitioner = (None)
      self._proj_kernel = self.add_variable(
          "projection/%s" % _WEIGHTS_VARIABLE_NAME,
          shape=[self._num_units, self._num_proj],
          initializer=tf.uniform_unit_scaling_initializer(),
          partitioner=maybe_proj_partitioner)
    self.initialize()
    self.built = True
Beispiel #47
0
def create_emb_for_encoder_and_decoder(share_vocab,
                                       src_vocab_size,
                                       tgt_vocab_size,
                                       src_embed_size,
                                       tgt_embed_size,
                                       dtype=tf.float32,
                                       num_partitions=0,
                                       src_vocab_file=None,
                                       tgt_vocab_file=None,
                                       src_embed_file=None,
                                       tgt_embed_file=None,
                                       scope=None):
    """Create embedding matrix for both encoder and decoder.

    Args:
      share_vocab: A boolean. Whether to share embedding matrix for both
        encoder and decoder.
      src_vocab_size: An integer. The source vocab size.
      tgt_vocab_size: An integer. The target vocab size.
      src_embed_size: An integer. The embedding dimension for the encoder's
        embedding.
      tgt_embed_size: An integer. The embedding dimension for the decoder's
        embedding.
      dtype: dtype of the embedding matrix. Default to float32.
      num_partitions: number of partitions used for the embedding vars.
      scope: VariableScope for the created subgraph. Default to "embedding".

    Returns:
      embedding_encoder: Encoder's embedding matrix.
      embedding_decoder: Decoder's embedding matrix.

    Raises:
      ValueError: if use share_vocab but source and target have different vocab
        size.
    """

    if num_partitions <= 1:
        partitioner = None
    else:
        # Note: num_partitions > 1 is required for distributed training due to
        # embedding_lookup tries to colocate single partition-ed embedding variable
        # with lookup ops. This may cause embedding variables being placed on worker
        # jobs.
        partitioner = tf.fixed_size_partitioner(num_partitions)

    if (src_embed_file or tgt_embed_file) and partitioner:
        raise ValueError(
            "Can't set num_partitions > 1 when using pretrained embedding")

    with tf.variable_scope(scope or "embeddings",
                           dtype=dtype,
                           partitioner=partitioner) as scope:
        # Share embedding
        if share_vocab:
            if src_vocab_size != tgt_vocab_size:
                raise ValueError(
                    "Share embedding but different src/tgt vocab sizes"
                    " %d vs. %d" % (src_vocab_size, tgt_vocab_size))
            assert src_embed_size == tgt_embed_size
            utils.print_out("# Use the same embedding for source and target")
            vocab_file = src_vocab_file or tgt_vocab_file
            embed_file = src_embed_file or tgt_embed_file

            embedding_encoder = _create_or_load_embed("embedding_share",
                                                      vocab_file, embed_file,
                                                      src_vocab_size,
                                                      src_embed_size, dtype)
            embedding_decoder = embedding_encoder
        else:
            with tf.variable_scope("encoder", partitioner=partitioner):
                embedding_encoder = _create_or_load_embed(
                    "embedding_encoder", src_vocab_file, src_embed_file,
                    src_vocab_size, src_embed_size, dtype)

            with tf.variable_scope("decoder", partitioner=partitioner):
                embedding_decoder = _create_or_load_embed(
                    "embedding_decoder", tgt_vocab_file, tgt_embed_file,
                    tgt_vocab_size, tgt_embed_size, dtype)

    return embedding_encoder, embedding_decoder
Beispiel #48
0
def train():
    global updated_batch_size_num
    global passed_info
    global shall_update
    ps_hosts = FLAGS.ps_hosts.split(',')
    worker_hosts = FLAGS.worker_hosts.split(',')
    print('PS hosts are: %s' % ps_hosts)
    print('Worker hosts are: %s' % worker_hosts)

    server = tf.train.Server({
        'ps': ps_hosts,
        'worker': worker_hosts
    },
                             job_name=FLAGS.job_name,
                             task_index=FLAGS.task_id)

    sspManager = SspManager(len(worker_hosts), 5)

    if FLAGS.job_name == 'ps':
        if FLAGS.task_id == 0:
            rpcServer = sspManager.create_rpc_server(ps_hosts[0].split(':')[0])
            rpcServer.serve()
        server.join()

    time.sleep(5)
    is_chief = (FLAGS.task_id == 0)
    rpcClient = sspManager.create_rpc_client(ps_hosts[0].split(':')[0])
    if is_chief:
        if tf.gfile.Exists(FLAGS.train_dir):
            tf.gfile.DeleteRecursively(FLAGS.train_dir)
        tf.gfile.MakeDirs(FLAGS.train_dir)

    device_setter = tf.train.replica_device_setter(ps_tasks=len(ps_hosts))
    with tf.device('/job:worker/task:%d' % FLAGS.task_id):
        partitioner = tf.fixed_size_partitioner(len(ps_hosts), axis=0)
        with tf.variable_scope('root', partitioner=partitioner):
            with tf.device(device_setter):
                global_step = tf.Variable(0, trainable=False)

                decay_steps = 50000 * 350.0 / FLAGS.batch_size
                batch_size = tf.placeholder(dtype=tf.int32,
                                            shape=(),
                                            name='batch_size')
                images, labels = cifar10.distorted_inputs(batch_size)
                #            print (str(tf.shape(images))+ str(tf.shape(labels)))
                re = tf.shape(images)[0]
                inputs = tf.reshape(images, [-1, _HEIGHT, _WIDTH, _DEPTH])
                #            labels = tf.reshape(labels, [-1, _NUM_CLASSES])
                print(labels.get_shape())
                labels = tf.one_hot(labels, 10, 1, 0)
                print(labels.get_shape())
                network_fn = nets_factory.get_network_fn('alexnet_v2',
                                                         num_classes=10)
                (logits, _) = network_fn(inputs)
                print(logits.get_shape())
                cross_entropy = tf.losses.softmax_cross_entropy(
                    logits=logits, onehot_labels=labels)

                #            logits = cifar10.inference(images, batch_size)

                #            loss = cifar10.loss(logits, labels, batch_size)
                loss = cross_entropy + _WEIGHT_DECAY * tf.add_n(
                    [tf.nn.l2_loss(v) for v in tf.trainable_variables()])
                train_op = cifar10.train(loss, global_step)

                # Decay the learning rate exponentially based on the number of steps.
                sv = tf.train.Supervisor(
                    is_chief=is_chief,
                    logdir=FLAGS.train_dir,
                    init_op=tf.group(tf.global_variables_initializer(),
                                     tf.local_variables_initializer()),
                    summary_op=None,
                    global_step=global_step,
                    #                                     saver=saver,
                    saver=None,
                    recovery_wait_secs=1,
                    save_model_secs=60)

                tf.logging.info('%s Supervisor' % datetime.now())
                sess_config = tf.ConfigProto(
                    allow_soft_placement=True,
                    log_device_placement=FLAGS.log_device_placement)
                sess_config.gpu_options.allow_growth = True

                # Get a session.
                sess = sv.prepare_or_wait_for_session(server.target,
                                                      config=sess_config)
                #	    sess.run(tf.global_variables_initializer())

                # Start the queue runners.
                queue_runners = tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS)
                sv.start_queue_runners(sess, queue_runners)

                #            sv.start_queue_runners(sess, chief_queue_runners)
                #            sess.run(init_tokens_op)
                """Train CIFAR-10 for a number of steps."""
                #            available_cpu = psutil.cpu_percent(interval=None)

                #            thread = threading2.Thread(target = local_update_batch_size, name = "update_batch_size_thread", args = (rpcClient, FLAGS.task_id,))
                #            thread.start()

                time0 = time.time()
                batch_size_num = FLAGS.batch_size
                for step in range(FLAGS.max_steps):

                    start_time = time.time()

                    run_options = tf.RunOptions(
                        trace_level=tf.RunOptions.FULL_TRACE)
                    run_metadata = tf.RunMetadata()

                    #                batch_size_num = updated_batch_size_num
                    if step <= 5:
                        batch_size_num = FLAGS.batch_size
                    if step >= 0:
                        batch_size_num = int(step / 5) % 512 + 1
                        batch_size_num = 128

                    num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / batch_size_num
                    decay_steps_num = int(num_batches_per_epoch *
                                          NUM_EPOCHS_PER_DECAY)

                    #                mgrads, images_, train_val, real, loss_value, gs = sess.run([grads, images, train_op, re, loss, global_step], feed_dict={batch_size: batch_size_num},  options=run_options, run_metadata=run_metadata)
                    _, loss_value, gs = sess.run(
                        [train_op, loss, global_step],
                        feed_dict={batch_size: batch_size_num},
                        options=run_options,
                        run_metadata=run_metadata)
                    #                _, loss_value, gs = sess.run([train_op, loss, global_step], feed_dict={batch_size: batch_size_num})
                    b = time.time()
                    #    		tl = timeline.Timeline(run_metadata.step_stats)
                    ##	        ctf = tl.generate_chrome_trace_format()
                    #		last_batch_time = tl.get_local_step_duration('sync_token_q_Dequeue')
                    #		thread = threading2.Thread(target=get_computation_time, name="get_computation_time",args=(run_metadata.step_stats,step,))
                    #		thread.start()

                    #                available_cpu = 100-psutil.cpu_percent(interval=None)
                    #                available_memory = psutil.virtual_memory()[1]/1000000
                    c0 = time.time()

                    #	        batch_size_num = rpcClient.update_batch_size(FLAGS.task_id, last_batch_time, available_cpu, available_memory, step, batch_size_num)
                    #		if gs < 10:
                    #	            with open('timeline.json', 'w') as f:
                    #	                f.write(ctf)
                    #		    tf.logging.info('write json')

                    #	        batch_size_num = rpcClient.update_batch_size(FLAGS.task_id, 0,0,0, step, batch_size_num)

                    if step % 1 == 0:
                        duration = time.time() - start_time
                        num_examples_per_step = batch_size_num
                        examples_per_sec = num_examples_per_step / duration
                        sec_per_batch = float(duration)

                        c = time.time()
                        ##                    tf.logging.info("time statistics - batch_process_time: " + str( last_batch_time)  + " - train_time: " + str(b-start_time) + " - get_batch_time: " + str(c0-b) + " - get_bs_time:  " + str(c-c0) + " - accum_time: " + str(c-time0))

                        format_str = (
                            "time: " + str(time.time()) +
                            '; %s: step %d (global_step %d), loss = %.2f (%.1f examples/sec; %.3f sec/batch)'
                        )
                        tf.logging.info(format_str %
                                        (datetime.now(), step, gs, loss_value,
                                         examples_per_sec, sec_per_batch))
                        rpcClient.check_staleness(FLAGS.task_id, step)
Beispiel #49
0
def train_ncf(cluster, rank, nrank, args):
    def validate():
        # validate phase
        hits, ndcgs = [], []
        for idx in range(testData.shape[0]):
            start_index = idx * 100
            my_feed_dict = {
                user_input: testUserInput[start_index:start_index+100],
                item_input: testItemInput[start_index:start_index+100],
            }
            predictions = sess.run([y], feed_dict=my_feed_dict)
            map_item_score = {testItemInput[start_index+i]: predictions[0][i] for i in range(100)}
            
            # Evaluate top rank list
            ranklist = heapq.nlargest(topK, map_item_score, key=map_item_score.get)
            hr = getHitRatio(ranklist, testItemInput[start_index])
            ndcg = getNDCG(ranklist, testItemInput[start_index])
            hits.append(hr)
            ndcgs.append(ndcg)
        hr, ndcg = np.array(hits).mean(), np.array(ndcgs).mean()
        return hr, ndcg

    def get_current_shard(data):
        part_size = data.shape[0] // nrank
        start = part_size * rank
        end = start + part_size if rank != nrank - 1 else data.shape[0]
        return data[start:end]

    from movielens import getdata
    if args.all:
        trainData, testData = getdata('ml-25m', 'datasets')
        trainUsers = get_current_shard(trainData['user_input'])
        trainItems = get_current_shard(trainData['item_input'])
        trainLabels = get_current_shard(trainData['labels'])
        testData = get_current_shard(testData)
        testUserInput = np.repeat(np.arange(testData.shape[0], dtype=np.int32), 100)
        testItemInput = testData.reshape((-1,))
    else:
        trainData, testData = getdata('ml-25m', 'datasets')
        trainUsers = get_current_shard(trainData['user_input'][:1024000])
        trainItems = get_current_shard(trainData['item_input'][:1024000])
        trainLabels = get_current_shard(trainData['labels'][:1024000])
        testData = get_current_shard(testData[:1470])
        testUserInput = np.repeat(np.arange(testData.shape[0], dtype=np.int32), 100)
        testItemInput = testData.reshape((-1,))
        
    num_users, num_items = {
        'ml-1m': (6040, 3706),
        'ml-20m': (138493, 26744),
        'ml-25m': (162541, 59047),
    }['ml-25m']
    batch_size = 1024
    num_negatives = 4
    topK = 10

    worker_device = "/job:worker/task:%d/gpu:0" % (rank)
    with tf.device(worker_device):
        user_input = tf.compat.v1.placeholder(tf.int32, [None, ])
        item_input = tf.compat.v1.placeholder(tf.int32, [None, ])
        y_ = tf.compat.v1.placeholder(tf.float32, [None, ])

    with tf.device(tf.compat.v1.train.replica_device_setter(cluster=cluster)):
        server_num = len(cluster.as_dict()['ps'])
        embed_partitioner = tf.fixed_size_partitioner(server_num, 0) if server_num > 1 else None
        loss, y, opt = neural_mf(user_input, item_input, y_, num_users, num_items, embed_partitioner)
        train_op = opt.minimize(loss)

    server = tf.train.Server(
        cluster, job_name="worker", task_index=rank)
    init = tf.compat.v1.global_variables_initializer()
    sv = tf.train.Supervisor(
        is_chief=(rank == 0),
        init_op=init,
        recovery_wait_secs=1)
    sess_config = tf.compat.v1.ConfigProto(
        allow_soft_placement=True,
        log_device_placement=False,
        device_filters=["/job:ps",
                        "/job:worker/task:%d" % rank])
    sess = sv.prepare_or_wait_for_session(server.target, config=sess_config)
    # sess.run(init)
    
    log = Logging(path='logs/tflog%d.txt' % rank)
    epoch = 7
    iterations = trainUsers.shape[0] // batch_size
    start = time.time()
    for ep in range(epoch):
        ep_st = time.time()
        log.write('epoch %d' % ep)
        train_loss = []
        for idx in tqdm(range(iterations)):
            start_index = idx * batch_size
            my_feed_dict = {
                user_input: trainUsers[start_index:start_index+batch_size],
                item_input: trainItems[start_index:start_index+batch_size],
                y_: trainLabels[start_index:start_index+batch_size],
            }

            loss_val = sess.run([loss, train_op], feed_dict=my_feed_dict)
            train_loss.append(loss_val[0])

            # if idx % 10000 == 0:
            #     hr, ndcg = validate()
            #     printstr = "HR: %.4f, NDCF: %.4f" % (hr, ndcg)
            #     log.write(printstr)

        tra_loss = np.mean(train_loss)
        ep_en = time.time()
        
        # validate phase
        if args.val:
            hr, ndcg = validate()
            printstr = "train_loss: %.4f, HR: %.4f, NDCF: %.4f, train_time: %.4f" % (tra_loss, hr, ndcg, ep_en - ep_st)
        else:
            printstr = "train_loss: %.4f, train_time: %.4f" % (tra_loss, ep_en - ep_st)
        log.write(printstr)
    log.write('all time: %f' % (time.time() - start))
def train():
    ps_hosts = FLAGS.ps_hosts.split(',')
    worker_hosts = FLAGS.worker_hosts.split(',')
    print('PS hosts are: %s' % ps_hosts)
    print('Worker hosts are: %s' % worker_hosts)
    configP = tf.ConfigProto()
    server = tf.train.Server({
        'ps': ps_hosts,
        'worker': worker_hosts
    },
                             job_name=FLAGS.job_name,
                             task_index=FLAGS.task_id,
                             config=configP)

    if FLAGS.job_name == 'ps':
        server.join()

    is_chief = (FLAGS.task_id == 0)
    if is_chief:
        if tf.gfile.Exists(FLAGS.train_dir):
            tf.gfile.DeleteRecursively(FLAGS.train_dir)
        tf.gfile.MakeDirs(FLAGS.train_dir)

    device_setter = tf.train.replica_device_setter(ps_tasks=len(ps_hosts))
    with tf.device('/job:worker/task:%d' % FLAGS.task_id):
        with tf.device(device_setter):
            """Prepare Input"""
            global_step = tf.Variable(0, trainable=False)
            decay_steps = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN * NUM_EPOCHS_PER_DECAY / FLAGS.batch_size
            batch_size = tf.placeholder(dtype=tf.int32,
                                        shape=(),
                                        name='batch_size')
            with tf.device('/cpu:0'):
                images, labels = cifar10.distorted_inputs(batch_size)
            inputs = tf.reshape(images, [-1, _HEIGHT, _WIDTH, _DEPTH])
            """Inference"""
            with tf.variable_scope('root',
                                   partitioner=tf.fixed_size_partitioner(
                                       len(ps_hosts), axis=0)):
                network = resnet_model.cifar10_resnet_v2_generator(
                    FLAGS.resnet_size, _NUM_CLASSES)
            logits = network(inputs, True)
            labels = tf.cast(labels, tf.int64)
            correct_prediction = tf.equal(tf.argmax(logits, 1), labels)
            correct_prediction = tf.cast(correct_prediction, tf.float32)
            accuracy_op = tf.reduce_mean(correct_prediction)
            """Loss"""
            labels = tf.one_hot(labels, 10, 1, 0)
            cross_entropy = tf.losses.softmax_cross_entropy(
                logits=logits, onehot_labels=labels)
            loss = cross_entropy + _WEIGHT_DECAY * tf.add_n(
                [tf.nn.l2_loss(v) for v in tf.trainable_variables()])
            """Define Optimization"""
            # Decay the learning rate exponentially based on the number of steps.
            lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE *
                                            len(worker_hosts),
                                            global_step,
                                            decay_steps,
                                            LEARNING_RATE_DECAY_FACTOR,
                                            staircase=True)
            opt = tf.train.GradientDescentOptimizer(lr)
            # Track the moving averages of all trainable variables.
            exp_moving_averager = tf.train.ExponentialMovingAverage(
                MOVING_AVERAGE_DECAY, global_step)
            variables_to_average = (tf.trainable_variables() +
                                    tf.moving_average_variables())
            opt = tf.train.SyncReplicasOptimizer(
                opt,
                replicas_to_aggregate=len(worker_hosts),
                total_num_replicas=len(worker_hosts),
                variable_averages=exp_moving_averager,
                variables_to_average=variables_to_average)
            # Compute gradients with respect to the loss.
            grads = opt.compute_gradients(loss)
            apply_gradients_op = opt.apply_gradients(grads,
                                                     global_step=global_step)
            with tf.control_dependencies([apply_gradients_op]):
                train_op = tf.identity(loss, name='train_op')
            """Sychronization Management"""
            if is_chief:
                chief_queue_runners = [opt.get_chief_queue_runner()]
                init_tokens_op = opt.get_init_tokens_op()
            saver = tf.train.Saver(max_to_keep=1)
            sv = tf.train.Supervisor(is_chief=is_chief,
                                     logdir=FLAGS.train_dir,
                                     init_op=tf.group(
                                         tf.global_variables_initializer(),
                                         tf.local_variables_initializer()),
                                     summary_op=None,
                                     global_step=global_step,
                                     saver=saver,
                                     recovery_wait_secs=1,
                                     save_model_secs=60)
            tf.logging.info('%s Supervisor' % datetime.now())
            """Train CIFAR-10 for a number of steps."""
            sess_config = tf.ConfigProto(
                allow_soft_placement=True,
                log_device_placement=FLAGS.log_device_placement)
            sess = sv.prepare_or_wait_for_session(server.target,
                                                  config=sess_config)
            queue_runners = tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS)
            sv.start_queue_runners(sess, queue_runners)

            if is_chief:
                sv.start_queue_runners(sess, chief_queue_runners)
                sess.run(init_tokens_op)

            batch_size_num = FLAGS.batch_size
            for step in range(init_global_step, FLAGS.max_steps):
                step_start_time = time.time()
                run_options = tf.RunOptions(
                    trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()
                num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / batch_size_num
                decay_steps_num = int(num_batches_per_epoch *
                                      NUM_EPOCHS_PER_DECAY)
                _, loss_value, gs = sess.run(
                    [train_op, loss, global_step],
                    feed_dict={batch_size: batch_size_num},
                    options=run_options,
                    run_metadata=run_metadata)

                duration = time.time() - step_start_time
                num_examples_per_step = batch_size_num
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = float(duration)
                format_str = (
                    "time: " + str(time.time()) +
                    '; %s: step %d (gs %d), loss= %.2f (%.1f samples/s; %.3f s/batch)'
                )
                tf.logging.info(format_str %
                                (datetime.now(), step, gs, loss_value,
                                 examples_per_sec, sec_per_batch))
                """Do evaluation on accuracy (this is not testset evaluation)"""
                if step % 200 == 0:
                    accuracy = sess.run(accuracy_op,
                                        feed_dict={batch_size: 10000})
                    tf.logging.info('evaluation: step - ' + str(step) +
                                    '; accuracy: ' + str(accuracy))