def __init__(self, input_dim=None, output_dim=1, init_path=None, opt_algo='gd', learning_rate=1e-2, l2_weight=0, random_seed=None): Model.__init__(self) # 声明参数 init_vars = [('w', [input_dim, output_dim], 'xavier', dtype), ('b', [output_dim], 'zero', dtype)] self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) # 用稀疏的placeholder self.X = tf.sparse_placeholder(dtype) self.y = tf.placeholder(dtype) # init参数 self.vars = init_var_map(init_vars, init_path) w = self.vars['w'] b = self.vars['b'] # sigmoid(wx+b) xw = tf.sparse_tensor_dense_matmul(self.X, w) logits = tf.reshape(xw + b, [-1]) self.y_prob = tf.sigmoid(logits) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=self.y, logits=logits)) + \ l2_weight * tf.nn.l2_loss(xw) self.optimizer = get_optimizer(opt_algo, learning_rate, self.loss) # GPU设定 config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) # 初始化图里的参数 tf.global_variables_initializer().run(session=self.sess)
def __init__(self, field_sizes=None, embed_size=10, filter_sizes=None, layer_acts=None, drop_out=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None): Model.__init__(self) init_vars = [] num_inputs = len(field_sizes) for i in range(num_inputs): init_vars.append(('embed_%d' % i, [field_sizes[i], embed_size], 'xavier', dtype)) init_vars.append(('f1', [embed_size, filter_sizes[0], 1, 2], 'xavier', dtype)) init_vars.append(('f2', [embed_size, filter_sizes[1], 2, 2], 'xavier', dtype)) init_vars.append(('w1', [2 * 3 * embed_size, 1], 'xavier', dtype)) init_vars.append(('b1', [1], 'zero', dtype)) self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.y = tf.placeholder(dtype) self.keep_prob_train = 1 - np.array(drop_out) self.keep_prob_test = np.ones_like(drop_out) self.layer_keeps = tf.placeholder(dtype) self.vars = init_var_map(init_vars, init_path) w0 = [self.vars['embed_%d' % i] for i in range(num_inputs)] xw = tf.concat([tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(num_inputs)], 1) l = xw l = tf.transpose(tf.reshape(l, [-1, num_inputs, embed_size, 1]), [0, 2, 1, 3]) f1 = self.vars['f1'] l = tf.nn.conv2d(l, f1, [1, 1, 1, 1], 'SAME') l = tf.transpose( max_pool_4d( tf.transpose(l, [0, 1, 3, 2]), int(num_inputs / 2)), [0, 1, 3, 2]) f2 = self.vars['f2'] l = tf.nn.conv2d(l, f2, [1, 1, 1, 1], 'SAME') l = tf.transpose( max_pool_4d( tf.transpose(l, [0, 1, 3, 2]), 3), [0, 1, 3, 2]) l = tf.nn.dropout( activate( tf.reshape(l, [-1, embed_size * 3 * 2]), layer_acts[0]), self.layer_keeps[0]) w1 = self.vars['w1'] b1 = self.vars['b1'] l = tf.matmul(l, w1) + b1 l = tf.squeeze(l) self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) self.optimizer = get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def __init__(self, input_dim=None, output_dim=1, init_path=None, opt_algo='gd', learning_rate=1e-2, l2_weight=0, random_seed=None): Model.__init__(self) init_vars = [('w', [input_dim, output_dim], 'xavier', dtype), ('b', [output_dim], 'zero', dtype)] self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = tf.sparse_placeholder(dtype) self.y = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) # 初始化变量w, b w = self.vars['w'] b = self.vars['b'] xw = tf.sparse_tensor_dense_matmul(self.X, w) logits = tf.reshape(xw + b, [-1]) self.y_prob = tf.sigmoid(logits) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=self.y, logits=logits)) + \ l2_weight * tf.nn.l2_loss(xw) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def __init__(self, input_dim=None, output_dim=1, init_path=None, opt_algo='gd', learning_rate=1e-2, l2_weight=0, random_seed=None): init_vars = [('w', [input_dim, output_dim], 'tnormal', dtype), ('b', [output_dim], 'zero', dtype)] self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = tf.sparse_placeholder(dtype) self.y = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w = self.vars['w'] b = self.vars['b'] logits = tf.sparse_tensor_dense_matmul(self.X, w) + b self.y_prob = tf.sigmoid(logits) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits, self.y)) + \ l2_weight * tf.nn.l2_loss(w) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.initialize_all_variables().run(session=self.sess)
def __init__(self, layer_sizes=None, layer_acts=None, layer_keeps=None, layer_l2=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None): init_vars = [] num_inputs = len(layer_sizes[0]) factor_order = layer_sizes[1] for i in range(num_inputs): layer_input = layer_sizes[0][i] layer_output = factor_order init_vars.append(('w0_%d' % i, [layer_input, layer_output], 'tnormal', dtype)) init_vars.append(('b0_%d' % i, [layer_output], 'zero', dtype)) init_vars.append(('w1', [num_inputs * factor_order, layer_sizes[2]], 'tnormal', dtype)) init_vars.append(('b1', [layer_sizes[2]], 'zero', dtype)) for i in range(2, len(layer_sizes) - 1): layer_input = layer_sizes[i] layer_output = layer_sizes[i + 1] init_vars.append(('w%d' % i, [layer_input, layer_output], 'tnormal', dtype)) init_vars.append(('b%d' % i, [layer_output], 'zero', dtype)) self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.y = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w0 = [self.vars['w0_%d' % i] for i in range(num_inputs)] b0 = [self.vars['b0_%d' % i] for i in range(num_inputs)] l = tf.nn.dropout( utils.activate( tf.concat([tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) + b0[i] for i in range(num_inputs)], 1), layer_acts[0]), layer_keeps[0]) for i in range(1, len(layer_sizes) - 1): wi = self.vars['w%d' % i] bi = self.vars['b%d' % i] l = tf.nn.dropout( utils.activate( tf.matmul(l, wi) + bi, layer_acts[i]), layer_keeps[i]) self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) if layer_l2 is not None: for i in range(num_inputs): self.loss += layer_l2[0] * tf.nn.l2_loss(w0[i]) for i in range(1, len(layer_sizes) - 1): wi = self.vars['w%d' % i] # bi = self.vars['b%d' % i] self.loss += layer_l2[i] * tf.nn.l2_loss(wi) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def __init__(self, input_dim=None, output_dim=1, factor_order=10, init_path=None, opt_algo='gd', learning_rate=1e-2, l2_w=0, l2_v=0, random_seed=None): Model.__init__(self) init_vars = [('w', [input_dim, output_dim], 'tnormal', dtype), ('v', [input_dim, factor_order], 'tnormal', dtype), ('b', [output_dim], 'zero', dtype)] self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = tf.sparse_placeholder(dtype) self.y = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w = self.vars['w'] v = self.vars['v'] b = self.vars['b'] """ SparseTensor(values=[1, 2], indices=[[0, 0], [1, 2]], shape=[3, 4]) [[1, 0, 0, 0] [0, 0, 2, 0] [0, 0, 0, 0]] http://www.jianshu.com/p/c233e09d2f5f """ # 得到x*x的张量 X_square = tf.SparseTensor(self.X.indices, tf.square(self.X.values), tf.to_int64(tf.shape(self.X))) xv = tf.square(tf.sparse_tensor_dense_matmul(self.X, v)) p = 0.5 * tf.reshape( tf.reduce_sum( xv - tf.sparse_tensor_dense_matmul(X_square, tf.square(v)), 1), [-1, output_dim]) xw = tf.sparse_tensor_dense_matmul(self.X, w) logits = tf.reshape(xw + b + p, [-1]) # 预测出的目标值 self.y_prob = tf.sigmoid(logits) # self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=self.y)) + \ l2_w * tf.nn.l2_loss(xw) + \ l2_v * tf.nn.l2_loss(xv) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def __init__(self, field_sizes=None, embed_size=10, layer_sizes=None, layer_acts=None, drop_out=None, embed_l2=None, layer_l2=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None): Model.__init__(self) init_vars = [] num_inputs = len(field_sizes) print('num_inputs:{0}\\t\tlayer_size:{1}'.format(num_inputs, layer_sizes)) for i in range(num_inputs): init_vars.append(('embed_%d' % i, [field_sizes[i], embed_size], 'xavier', dtype)) # 为每个特征值初始化一个长度为10的向量 node_in = num_inputs * embed_size # 将每个特征embeding 为10维的向量, 总共16个特征,所以是160个输入 网络为[160,500,1] for i in range(len(layer_sizes)): init_vars.append(('w%d' % i, [node_in, layer_sizes[i]], 'xavier', dtype)) init_vars.append(('b%d' % i, [layer_sizes[i]], 'zero', dtype)) node_in = layer_sizes[i] print('init_vars:', init_vars) self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.y = tf.placeholder(dtype) self.keep_prob_train = 1 - np.array(drop_out) self.keep_prob_test = np.ones_like(drop_out) self.layer_keeps = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w0 = [self.vars['embed_%d' % i] for i in range(num_inputs)] xw = tf.concat([tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(num_inputs)], 1) # 将每个特征的隐含向量连起来,组成网络的输入,160维 l = xw for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] bi = self.vars['b%d' % i] print('第{0}个隐藏层l.shape, wi.shape, bi.shape'.format(i), l.shape, wi.shape, bi.shape) l = tf.nn.dropout( utils.activate( tf.matmul(l, wi) + bi, layer_acts[i]), self.layer_keeps[i]) l = tf.squeeze(l) # 从tensor中删除所有大小是1的维度 self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) if layer_l2 is not None: self.loss += embed_l2 * tf.nn.l2_loss(xw) for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] self.loss += layer_l2[i] * tf.nn.l2_loss(wi) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def __init__(self, field_sizes=None, embed_size=10, layer_sizes=None, layer_acts=None, drop_out=None, embed_l2=None, layer_l2=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None): Model.__init__(self) init_vars = [] num_inputs = len(field_sizes) print('num_inputs:{0}\\t\tlayer_size:{1}'.format(num_inputs, layer_sizes)) for i in range(num_inputs): init_vars.append(('embed_%d' % i, [field_sizes[i], embed_size], 'xavier', dtype)) # 为每个特征值初始化一个长度为10的向量 node_in = num_inputs * embed_size # 将每个特征embeding 为10维的向量, 总共16个特征,所以是160个输入 网络为[160, 500, 1] for i in range(len(layer_sizes)): init_vars.append(('w%d' % i, [node_in, layer_sizes[i]], 'xavier', dtype)) init_vars.append(('b%d' % i, [layer_sizes[i]], 'zero', dtype)) node_in = layer_sizes[i] print('init_vars:', init_vars) self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.y = tf.placeholder(dtype) self.keep_prob_train = 1 - np.array(drop_out) self.keep_prob_test = np.ones_like(drop_out) self.layer_keeps = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w0 = [self.vars['embed_%d' % i] for i in range(num_inputs)] xw = tf.concat([tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(num_inputs)], 1) # 将每个特征的隐含向量连起来,组成网络的输入,160维 l = xw for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] bi = self.vars['b%d' % i] print('第{0}个隐藏层l.shape, wi.shape, bi.shape'.format(i), l.shape, wi.shape, bi.shape) l = tf.nn.dropout( utils.activate( tf.matmul(l, wi) + bi, layer_acts[i]), self.layer_keeps[i]) l = tf.squeeze(l) # 从tensor中删除所有大小是1的维度 self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) if layer_l2 is not None: self.loss += embed_l2 * tf.nn.l2_loss(xw) for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] self.loss += layer_l2[i] * tf.nn.l2_loss(wi) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def __init__(self, feature_size, field_size, optimizer_type='gd', learning_rate=1e-2, l2_reg=0, verbose=False, random_seed=None, eval_metric=roc_auc_score, greater_is_better=True, epoch=10, batch_size=1024): Model.__init__(self, eval_metric, greater_is_better, epoch, batch_size, verbose) init_vars = [('w', [feature_size, 1], 'zero', tf.float32), ('b', [1], 'zero', tf.float32)] self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.feat_index = tf.placeholder(tf.int32, shape=[None, None], name="feat_index") # None * F self.feat_value = tf.placeholder(tf.float32, shape=[None, None], name="feat_value") # None * F self.label = tf.placeholder(tf.float32, shape=[None, 1], name="label") # None * 1 self.vars = utils.init_var_map(init_vars) w = self.vars['w'] b = self.vars['b'] self.embeddings = tf.nn.embedding_lookup( w, self.feat_index) # None * F * K feat_value = tf.reshape(self.feat_value, shape=[-1, field_size, 1]) self.embeddings = tf.multiply(self.embeddings, feat_value) logits = tf.reduce_sum(self.embeddings, 1) + b self.y_prob = tf.sigmoid(logits) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=self.label, logits=logits)) + \ l2_reg * tf.nn.l2_loss(self.embeddings) self.optimizer = utils.get_optimizer(optimizer_type, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def __init__(self, input_dim=None, output_dim=1, factor_order=10, init_path=None, opt_algo='gd', learning_rate=1e-2, l2_w=0, l2_v=0, random_seed=None): Model.__init__(self) # 一次、二次交叉、偏置项 init_vars = [('w', [input_dim, output_dim], 'xavier', dtype), ('v', [input_dim, factor_order], 'xavier', dtype), ('b', [output_dim], 'zero', dtype)] self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = tf.sparse_placeholder(dtype) self.y = tf.placeholder(dtype) self.vars = init_var_map(init_vars, init_path) w = self.vars['w'] v = self.vars['v'] b = self.vars['b'] # [(x1+x2+x3)^2 - (x1^2+x2^2+x3^2)]/2 # 先计算所有的交叉项,再减去平方项(自己和自己相乘) X_square = tf.SparseTensor(self.X.indices, tf.square(self.X.values), tf.to_int64(tf.shape(self.X))) xv = tf.square(tf.sparse_tensor_dense_matmul(self.X, v)) p = 0.5 * tf.reshape( tf.reduce_sum( xv - tf.sparse_tensor_dense_matmul(X_square, tf.square(v)), 1), [-1, output_dim]) xw = tf.sparse_tensor_dense_matmul(self.X, w) logits = tf.reshape(xw + b + p, [-1]) self.y_prob = tf.sigmoid(logits) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=self.y)) + \ l2_w * tf.nn.l2_loss(xw) + \ l2_v * tf.nn.l2_loss(xv) self.optimizer = get_optimizer(opt_algo, learning_rate, self.loss) #GPU设定 config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) # 图中所有variable初始化 tf.global_variables_initializer().run(session=self.sess)
def __init__(self, net_type, net_argv, init_path, init_argv, dim_argv, batch_size, opt_argv): if net_type == "nn": self.graph = tf.Graph() nb_dim = dim_argv[0] depth, h_dims, act_func = net_argv with self.graph.as_default(): var_init = [] if not init_path: _j = 0 for _i in range(depth - 1): var_init.extend([ ("W{}".format(_i), [h_dims[_i], h_dims[_i + 1]], init_argv[_j][0], init_argv[_j][1:]), ("b{}".format(_i), [1, h_dims[_i + 1]], init_argv[_j + 1][0], init_argv[_j + 1][1:]) ]) _j += 2 var_map = init_var_map(init_path, var_init) self.W = [0] * (depth - 1) self.b = [0] * (depth - 1) for _i in range(depth - 1): self.W[_i] = tf.Variable(var_map["W{}".format(_i)]) self.b[_i] = tf.Variable(var_map["b{}".format(_i)]) self.x_vec = tf.placeholder(tf.float32, shape=[1, nb_dim]) self.batch_x_vecs = tf.placeholder(tf.float32, shape=[batch_size, nb_dim]) self.batch_value_labels = tf.placeholder(tf.float32, shape=[batch_size, 1]) self.value_prediction = self.forward(net_type, depth, act_func, self.x_vec, [self.W, self.b]) self.batch_value_predictions = self.forward( net_type, depth, act_func, self.batch_x_vecs, [self.W, self.b]) square_loss_value = tf.square(self.batch_value_labels - self.batch_value_predictions) if opt_argv[-1] == "sum": self.loss_value = tf.reduce_sum(square_loss_value) elif opt_argv[-1] == "mean": self.loss_value = tf.reduce_mean(square_loss_value) self.opt_value = build_optimizer(opt_argv, self.loss_value) #self.init = tf.initialize_all_variables() self.init = tf.global_variables_initializer() #self.log = "net_type={}\tnet_argv={}\tinit_path={}\tinit_argv={}\tdim_argv={}\tbatch_size={}\topt_argv={}" \ # .format(net_type, net_argv, init_path, init_argv, dim_argv, batch_size, opt_argv) self.log = "net_type={}\tnet_argv={}\tinit_path={}\tinit_argv={}\tdim_argv={}\tbatch_size={}\topt_argv={}" \ .format(net_type, net_argv, init_path, init_argv, dim_argv, batch_size, opt_argv)
def __init__(self, field_sizes=None, embed_size=10, layer_sizes=None, layer_acts=None, drop_out=None, embed_l2=None, layer_l2=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None): Model.__init__(self) init_vars = [] num_inputs = len(field_sizes) for i in range(num_inputs): init_vars.append(('embed_%d' % i, [field_sizes[i], embed_size], 'xavier', dtype)) node_in = num_inputs * embed_size for i in range(len(layer_sizes)): init_vars.append(('w%d' % i, [node_in, layer_sizes[i]], 'xavier', dtype)) init_vars.append(('b%d' % i, [layer_sizes[i]], 'zero', dtype)) node_in = layer_sizes[i] self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.y = tf.placeholder(dtype) self.keep_prob_train = 1 - np.array(drop_out) self.keep_prob_test = np.ones_like(drop_out) self.layer_keeps = tf.placeholder(dtype) self.vars = init_var_map(init_vars, init_path) w0 = [self.vars['embed_%d' % i] for i in range(num_inputs)] xw = tf.concat([tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(num_inputs)], 1) l = xw for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] bi = self.vars['b%d' % i] print(l.shape, wi.shape, bi.shape) l = tf.nn.dropout( activate( tf.matmul(l, wi) + bi, layer_acts[i]), self.layer_keeps[i]) l = tf.squeeze(l) self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) if layer_l2 is not None: self.loss += embed_l2 * tf.nn.l2_loss(xw) for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] self.loss += layer_l2[i] * tf.nn.l2_loss(wi) self.optimizer = get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def __init__(self, input_dim=None, output_dim=1, factor_dim=10, init_path=None, opt_algo='gd', learning_rate=1e-2, l2_weight=0, l2_v=0, random_seed=None): Model.__init__(self) init_vars = [('w', [input_dim, output_dim], 'xavier', dtype), ('v', [input_dim, factor_dim], 'xavier', dtype), ('b', [output_dim], 'zero', dtype)] self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = tf.sparse_placeholder(dtype) # n * input_dim self.y = tf.placeholder(dtype) # n * 1 self.vars = utils.init_var_map(init_vars, init_path) w = self.vars['w'] # input_dim * output_dim v = self.vars['v'] # input_dim * factor_dim b = self.vars['b'] # input_dim # n * input_dim X_square = tf.SparseTensor(self.X.indices, tf.square(self.X.values), tf.to_int64(tf.shape(self.X))) # n * input_dim * input_dim * factor_dim => n * factor_dim xv = tf.square(tf.sparse_tensor_dense_matmul(self.X, v)) # 二次项 n * factor_dim-n * factor_dim , 再按factor_dim求和, n * output_dim p = 0.5 * tf.reshape( tf.reduce_sum( xv - tf.sparse_tensor_dense_matmul(X_square, tf.square(v)), 1), [-1, output_dim]) xw = tf.sparse_tensor_dense_matmul(self.X, w) # n * output_dim l = tf.reshape(xw + b + p, [-1]) # n self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l,labels=self.y)) \ + l2_weight * tf.nn.l2_loss(xw) + l2_v * tf.nn.l2_loss(xv) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def __init__(self, input_dim=None, output_dim=1, factor_order=10, init_path=None, opt_algo='gd', learning_rate=1e-2, l2_w=0, l2_v=0, random_seed=None): init_vars = [('w', [input_dim, output_dim], 'tnormal', dtype), ('v', [input_dim, factor_order], 'tnormal', dtype), ('b', [output_dim], 'zero', dtype)] self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = tf.sparse_placeholder(dtype) self.y = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w = self.vars['w'] v = self.vars['v'] b = self.vars['b'] X_square = tf.SparseTensor(self.X.indices, tf.square(self.X.values), self.X.shape) p = 0.5 * tf.reshape( tf.reduce_sum( tf.square(tf.sparse_tensor_dense_matmul(self.X, v)) - tf.sparse_tensor_dense_matmul(X_square, tf.square(v)), 1), [-1, output_dim]) logits = tf.sparse_tensor_dense_matmul(self.X, w) + b + p self.y_prob = tf.sigmoid(logits) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits, self.y)) + \ l2_w * tf.nn.l2_loss(w) + \ l2_v * tf.nn.l2_loss(v) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.initialize_all_variables().run(session=self.sess)
def __init__(self, input_dim=None, output_dim=1, init_path=None, opt_algo='gd', learning_rate=1e-2, l2_weight=0, sync=False, workers=20): Model.__init__(self) #self.graph = tf.Graph() #with self.graph.as_default(): with tf.device('/cpu:0'): self.X = tf.sparse_placeholder(dtype) self.y = tf.placeholder(dtype) init_vars = [('w', [input_dim, output_dim], 'xavier', dtype), ('b', [output_dim], 'zero', dtype)] self.vars = utils.init_var_map(init_vars, init_path) w = self.vars['w'] b = self.vars['b'] xw = tf.sparse_tensor_dense_matmul(self.X, w) logits = tf.reshape(xw + b, [-1]) self.y_prob = tf.sigmoid(logits) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=self.y, logits=logits)) + \ l2_weight * tf.nn.l2_loss(xw) self.global_step = _variable_on_cpu( 'global_step', [], initializer=tf.constant_initializer(0), trainable=False) if sync: self.optimizer = utils.get_sync_optimizer(opt_algo, learning_rate, workers) else: self.optimizer = utils.get_optimizer(opt_algo, learning_rate) self.train_op = self.optimizer.minimize(self.loss, global_step=self.global_step)
def __init__(self, input_dim=None, output_dim=1, factor_order=10, init_path=None, opt_algo='gd', learning_rate=1e-2, l2_w=0, l2_v=0, random_seed=None): Model.__init__(self) init_vars = [('w', [input_dim, output_dim], 'xavier', dtype), ('v', [input_dim, factor_order], 'xavier', dtype), ('b', [output_dim], 'zero', dtype)] self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = tf.sparse_placeholder(dtype) self.y = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w = self.vars['w'] v = self.vars['v'] b = self.vars['b'] X_square = tf.SparseTensor(self.X.indices, tf.square(self.X.values), tf.to_int64(tf.shape(self.X))) xv = tf.square(tf.sparse_tensor_dense_matmul(self.X, v)) p = 0.5 * tf.reshape( tf.reduce_sum(xv - tf.sparse_tensor_dense_matmul(X_square, tf.square(v)), 1), [-1, output_dim]) xw = tf.sparse_tensor_dense_matmul(self.X, w) logits = tf.reshape(xw + b + p, [-1]) self.y_prob = tf.sigmoid(logits) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=self.y)) + \ l2_w * tf.nn.l2_loss(xw) + \ l2_v * tf.nn.l2_loss(xv) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def __init__(self, input_dim=None, output_dim=1, init_path=None, opt_algo='gd', learning_rate=1e-2, l2_weight=0, random_seed=None): Model.__init__(self) init_vars = [('w', [input_dim, output_dim], 'xavier', dtype), ('b', [output_dim], 'zero', dtype)] self.graph = tf.Graph() # 设置新的默认图 with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = tf.sparse_placeholder(dtype) self.y = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w = self.vars['w'] b = self.vars['b'] xw = tf.sparse_tensor_dense_matmul(self.X, w) logits = tf.reshape(xw + b, [-1]) self.y_prob = tf.sigmoid(logits) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=self.y, logits = logits))\ + l2_weight*tf.nn.l2_loss(xw) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True #允许显存增长。如果设置为 True,分配器不会预先分配一定量 GPU 显存,而是先分配一小块,必要时增加显存分配 self.sess = tf.Session(config=config) tf.global_variables_initializer().run( session=self.sess) # 前者为变量初始化
def __init__(self, field_sizes=None, embed_size=10, filter_sizes=None, layer_acts=None, drop_out=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None): Model.__init__(self) init_vars = [] num_inputs = len(field_sizes) for i in range(num_inputs): init_vars.append(('embed_%d' % i, [field_sizes[i], embed_size], 'xavier', dtype)) init_vars.append(('f1', [embed_size, filter_sizes[0], 1, 2], 'xavier', dtype)) init_vars.append(('f2', [embed_size, filter_sizes[1], 2, 2], 'xavier', dtype)) init_vars.append(('w1', [2 * 3 * embed_size, 1], 'xavier', dtype)) init_vars.append(('b1', [1], 'zero', dtype)) print('init_vars: ', init_vars) self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.y = tf.placeholder(dtype) self.keep_prob_train = 1 - np.array(drop_out) self.keep_prob_test = np.ones_like(drop_out) self.layer_keeps = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w0 = [self.vars['embed_%d' % i] for i in range(num_inputs)] xw = tf.concat([tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(num_inputs)], 1) l = xw l = tf.transpose(tf.reshape(l, [-1, num_inputs, embed_size, 1]), [0, 2, 1, 3]) # 变为 16 x 10 矩阵 f1 = self.vars['f1'] l = tf.nn.conv2d(l, f1, [1, 1, 1, 1], 'SAME') l = tf.transpose( utils.max_pool_4d( tf.transpose(l, [0, 1, 3, 2]), int(num_inputs / 2)), [0, 1, 3, 2]) f2 = self.vars['f2'] l = tf.nn.conv2d(l, f2, [1, 1, 1, 1], 'SAME') l = tf.transpose( utils.max_pool_4d( tf.transpose(l, [0, 1, 3, 2]), 3), [0, 1, 3, 2]) l = tf.nn.dropout( utils.activate( tf.reshape(l, [-1, embed_size * 3 * 2]), layer_acts[0]), self.layer_keeps[0]) w1 = self.vars['w1'] b1 = self.vars['b1'] l = tf.matmul(l, w1) + b1 l = tf.squeeze(l) self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def __init__(self, field_size=None, embed_size=10, layer_sizes=None, layer_acts=None, drop_out=None, embed_l2=None, layer_l2=None, init_path=None, opt_algo='gd', learning_rate=1e-3, random_seed=None): Model.__init__(self) init_vars = [] num_inputs = len(field_size) for i in range(num_inputs): init_vars.append(('embed_%d' % i, [field_size[i], embed_size], 'xavier', dtype)) num_pairs = int(num_inputs * (num_inputs - 1) / 2) node_in = num_inputs * embed_size + num_pairs for i in range(len(layer_sizes)): init_vars.append(('w%d' % i, [node_in, layer_sizes[i]], 'xavier', dtype)) init_vars.append(('b%d' % i, [layer_sizes[i]], 'zero', dtype)) node_in = layer_sizes[i] self.graph = tf.Graph() with self.graph.as_default(): if (random_seed is not None): tf.set_random_seed(random_seed) self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.y = tf.placeholder(dtype) self.keep_prob_train = 1 - np.array(drop_out) self.keep_prob_test = np.ones_like(drop_out) self.layer_keeps = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w0 = [self.vars['embed_%d' % i] for i in range(num_inputs)] # [num_inputs, field_size[i], k] xw = tf.concat([ tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(num_inputs) ], 1) # [num_inputs*k] xw3d = tf.reshape( xw, [-1, num_inputs, embed_size]) # [batch, num_inputs, k] row = [] # num_pairs col = [] # num_pairs for i in range(num_inputs - 1): for j in range(i + 1, num_inputs): row.append(i) col.append(j) p = tf.transpose( tf.gather( tf.transpose(xw3d, [1, 0, 2]), # [num_inputs, batch, k] row), # [num_pairs, batch, k] [1, 0, 2]) # [batch, num_pairs, k] q = tf.transpose( tf.gather( tf.transpose(xw3d, [1, 0, 2]), # [num_inputs, batch, k] col), # [num_pairs, batch, k] [1, 0, 2]) # [batch, num_pairs, k] p = tf.reshape( p, [-1, num_pairs, embed_size]) # [batch, num_pairs, k] q = tf.reshape( q, [-1, num_pairs, embed_size]) # [batch, num_pairs, k] ip = tf.reshape(tf.reduce_sum(p * q, [-1]), [-1, num_pairs]) l = tf.concat([xw, ip], 1) # [num_inputs*k + num_pairs] for i in range(len(layer_sizes)): w = self.vars['w%d' % i] b = self.vars['b%d' % i] l = utils.activate(tf.matmul(l, w) + b, layer_acts[i]) l = tf.nn.dropout(l, self.layer_keeps[i]) print('l', l) l = tf.squeeze(l) self.y_prob = tf.sigmoid(l) print('l', l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) if (layer_l2 is not None): self.loss += embed_l2 * tf.nn.l2_loss(xw) for i in range(len(layer_sizes)): w = self.vars['w%d' % i] self.loss += layer_l2 * tf.nn.l2_loss(w) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def __init__(self, field_sizes=None, embed_size=10, layer_sizes=None, layer_acts=None, drop_out=None, embed_l2=None, layer_l2=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None): Model.__init__(self) init_vars = [] num_inputs = len(field_sizes) # for i in range(num_inputs): # init_vars.append(('embed_%d' % i, [field_sizes[i], embed_size], 'xavier', dtype)) node_in = num_inputs * embed_size print('node_in', node_in) for i in range(len(layer_sizes)): init_vars.append(('w%d' % i, [node_in, layer_sizes[i]], 'xavier', dtype)) init_vars.append(('b%d' % i, [layer_sizes[i]], 'zero', dtype)) node_in = layer_sizes[i] self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.y = tf.placeholder(dtype) self.keep_prob_train = 1 - np.array(drop_out) self.keep_prob_test = np.ones_like(drop_out) self.layer_keeps = tf.placeholder(dtype) ################################################## # todo: restore w,v,b parameters from fm model feature_size = sum(field_sizes) init_vars.append(('w', [feature_size, 1], 'fm', dtype)) init_vars.append(('v', [feature_size, embed_size], 'fm', dtype)) init_vars.append(('b', [ 1, ], 'fm', dtype)) self.vars = utils.init_var_map(init_vars, init_path) ################################################## # use fm paraeters to fit original interface init_w0 = tf.concat([self.vars['w'], self.vars['v']], 1) lower, upper = 0, field_sizes[0] for i in range(num_inputs): if (i != 0): lower, upper = upper, upper + field_sizes[i] self.vars['embed_%d' % i] = init_w0[lower:upper] ################################################## print('init_vars, init_path', init_vars, init_path) w0 = [self.vars['embed_%d' % i] for i in range(num_inputs)] print('X[0].shape', self.X[0].shape) print('w0[0].shape', w0[0].shape) xw = tf.concat([ tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(num_inputs) ], 1) ################################################## l = xw for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] bi = self.vars['b%d' % i] print('l.shape', 'wi.shape', 'bi.shape', l.shape, wi.shape, bi.shape) l = tf.nn.dropout( utils.activate(tf.matmul(l, wi) + bi, layer_acts[i]), self.layer_keeps[i]) l = tf.squeeze(l) self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) if layer_l2 is not None: self.loss += embed_l2 * tf.nn.l2_loss(xw) for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] self.loss += layer_l2[i] * tf.nn.l2_loss(wi) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def __init__(self, field_sizes=None, embed_size=10, layer_sizes=None, layer_acts=None, drop_out=None, embed_l2=None, layer_l2=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None, layer_norm=True): Model.__init__(self) init_vars = [] num_inputs = len(field_sizes) for i in range(num_inputs): init_vars.append(('embed_%d' % i, [field_sizes[i], embed_size], 'xavier', dtype)) node_in = num_inputs * embed_size + embed_size * embed_size for i in range(len(layer_sizes)): init_vars.append(('w%d' % i, [node_in, layer_sizes[i]], 'xavier', dtype)) init_vars.append(('b%d' % i, [layer_sizes[i]], 'zero', dtype)) node_in = layer_sizes[i] self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.y = tf.placeholder(dtype) self.keep_prob_train = 1 - np.array(drop_out) self.keep_prob_test = np.ones_like(drop_out) self.layer_keeps = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w0 = [self.vars['embed_%d' % i] for i in range(num_inputs)] xw = tf.concat([ tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(num_inputs) ], 1) z = tf.reduce_sum(tf.reshape(xw, [-1, num_inputs, embed_size]), 1) op = tf.reshape( tf.matmul(tf.reshape(z, [-1, embed_size, 1]), tf.reshape(z, [-1, 1, embed_size])), [-1, embed_size * embed_size]) if layer_norm: # x_mean, x_var = tf.nn.moments(xw, [1], keep_dims=True) # xw = (xw - x_mean) / tf.sqrt(x_var) # x_g = tf.Variable(tf.ones([num_inputs * embed_size]), name='x_g') # x_b = tf.Variable(tf.zeros([num_inputs * embed_size]), name='x_b') # x_g = tf.Print(x_g, [x_g[:10], x_b]) # xw = xw * x_g + x_b p_mean, p_var = tf.nn.moments(op, [1], keep_dims=True) op = (op - p_mean) / tf.sqrt(p_var) p_g = tf.Variable(tf.ones([embed_size**2]), name='p_g') p_b = tf.Variable(tf.zeros([embed_size**2]), name='p_b') # p_g = tf.Print(p_g, [p_g[:10], p_b]) op = op * p_g + p_b l = tf.concat([xw, op], 1) for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] bi = self.vars['b%d' % i] l = tf.nn.dropout( utils.activate(tf.matmul(l, wi) + bi, layer_acts[i]), self.layer_keeps[i]) l = tf.squeeze(l) self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) if layer_l2 is not None: self.loss += embed_l2 * tf.nn.l2_loss(tf.concat(w0, 0)) for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] self.loss += layer_l2[i] * tf.nn.l2_loss(wi) self.optimizer = utils.get_optimizer(opt_algo, learning_rate) self.train_op = self.optimizer.minimize(self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def __init__(self, data_dir=None, summary_dir=None, eval_dir=None, batch_size=None, input_dim=None, output_dim=1, layer_sizes=None, layer_acts=None, drop_out=None, layer_l2=None, kernel_l2=None, l2_w=0, init_path=None, opt_algo='gd', learning_rate=1e-2, sync=False, workers=20): Model.__init__(self) eprint("------- create graph ---------------") init_vars = [] num_inputs = len(layer_sizes[0]) factor_order = layer_sizes[1] for i in range(num_inputs): layer_input = layer_sizes[0][i] layer_output = factor_order init_vars.append(('w0_%d' % i, [layer_input, layer_output], 'tnormal', dtype)) init_vars.append(('b0_%d' % i, [layer_output], 'zero', dtype)) init_vars.append(('w1', [num_inputs * factor_order, layer_sizes[2]], 'tnormal', dtype)) init_vars.append(('k1', [num_inputs, layer_sizes[2]], 'tnormal', dtype)) init_vars.append(('b1', [layer_sizes[2]], 'zero', dtype)) for i in range(2, len(layer_sizes) - 1): layer_input = layer_sizes[i] layer_output = layer_sizes[i + 1] init_vars.append(( 'w%d' % i, [layer_input, layer_output], 'tnormal', )) init_vars.append(('b%d' % i, [layer_output], 'zero', dtype)) with tf.name_scope('input_%d' % FLAGS.task_index) as scope: self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.B = tf.sparse_placeholder(tf.float32, name='B') self.y = tf.placeholder(dtype) self.keep_prob_train = 1 - np.array(drop_out) self.keep_prob_test = np.ones_like(drop_out) self.layer_keeps = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w0 = [self.vars['w0_%d' % i] for i in range(num_inputs)] b0 = [self.vars['b0_%d' % i] for i in range(num_inputs)] xw = [ tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(num_inputs) ] x = tf.concat([xw[i] + b0[i] for i in range(num_inputs)], 1) l = tf.nn.dropout(utils.activate(x, layer_acts[0]), self.layer_keeps[0]) w1 = self.vars['w1'] k1 = self.vars['k1'] b1 = self.vars['b1'] p = tf.reduce_sum( tf.reshape( tf.matmul( tf.reshape( tf.transpose( tf.reshape(l, [-1, num_inputs, factor_order]), [0, 2, 1]), [-1, num_inputs]), k1), [-1, factor_order, layer_sizes[2]]), 1) l = tf.nn.dropout( utils.activate(tf.matmul(l, w1) + b1 + p, layer_acts[1]), self.layer_keeps[1]) for i in range(2, len(layer_sizes) - 1): wi = self.vars['w%d' % i] bi = self.vars['b%d' % i] l = tf.nn.dropout( utils.activate(tf.matmul(l, wi) + bi, layer_acts[i]), self.layer_keeps[i]) ## logits l = tf.reshape(l, [-1]) self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) if layer_l2 is not None: self.loss += layer_l2[0] * tf.nn.l2_loss(tf.concat(xw, 1)) for i in range(1, len(layer_sizes) - 1): wi = self.vars['w%d' % i] self.loss += layer_l2[i] * tf.nn.l2_loss(wi) if kernel_l2 is not None: self.loss += kernel_l2 * tf.nn.l2_loss(k1) self.global_step = _variable_on_cpu( 'global_step', [], initializer=tf.constant_initializer(0), trainable=False) if sync: self.optimizer = utils.get_sync_optimizer(opt_algo, learning_rate, workers) else: self.optimizer = utils.get_optimizer(opt_algo, learning_rate) self.train_op = self.optimizer.minimize(self.loss, global_step=self.global_step) self.summary_op = tf.summary.merge_all()
def __init__(self, layer_sizes=None, layer_acts=None, layer_keeps=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None): init_vars = [] num_inputs = len(layer_sizes[0]) embedding_order = layer_sizes[1] for i in range(num_inputs): layer_input = layer_sizes[0][i] layer_output = embedding_order init_vars.append(('w0_%d' % i, [layer_input, layer_output], 'tnormal', dtype)) init_vars.append(('b0_%d' % i, [layer_output], 'zero', dtype)) init_vars.append(('f1', [embedding_order, layer_sizes[2], 1, 2], 'tnormal', dtype)) init_vars.append(('f2', [embedding_order, layer_sizes[3], 2, 2], 'tnormal', dtype)) init_vars.append(('w1', [2 * 3 * embedding_order, 1], 'tnormal', dtype)) init_vars.append(('b1', [1], 'zero', dtype)) self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.y = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w0 = [self.vars['w0_%d' % i] for i in range(num_inputs)] b0 = [self.vars['b0_%d' % i] for i in range(num_inputs)] l = tf.nn.dropout( utils.activate( tf.concat(1, [ tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) + b0[i] for i in range(num_inputs) ]), layer_acts[0]), layer_keeps[0]) l = tf.transpose( tf.reshape(l, [-1, num_inputs, embedding_order, 1]), [0, 2, 1, 3]) f1 = self.vars['f1'] l = tf.nn.conv2d(l, f1, [1, 1, 1, 1], 'SAME') l = tf.transpose( utils.max_pool_4d(tf.transpose(l, [0, 1, 3, 2]), num_inputs / 2), [0, 1, 3, 2]) f2 = self.vars['f2'] l = tf.nn.conv2d(l, f2, [1, 1, 1, 1], 'SAME') l = tf.transpose( utils.max_pool_4d(tf.transpose(l, [0, 1, 3, 2]), 3), [0, 1, 3, 2]) l = tf.nn.dropout( utils.activate(tf.reshape(l, [-1, embedding_order * 3 * 2]), layer_acts[1]), layer_keeps[1]) w1 = self.vars['w1'] b1 = self.vars['b1'] l = tf.nn.dropout( utils.activate(tf.matmul(l, w1) + b1, layer_acts[2]), layer_keeps[2]) self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(l, self.y)) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.initialize_all_variables().run(session=self.sess)
def __init__(self, field_sizes=None, embed_size=10, layer_sizes=None, layer_acts=None, drop_out=None, embed_l2=None, layer_l2=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None): Model.__init__(self) init_vars = [] num_inputs = len(field_sizes) for i in range(num_inputs): init_vars.append(('embed_%d' % i, [field_sizes[i], embed_size], 'xavier', dtype)) node_in = num_inputs * embed_size for i in range(len(layer_sizes) - 1): init_vars.append(('w%d' % i, [node_in, layer_sizes[i]], 'xavier', dtype)) init_vars.append(('b%d' % i, [layer_sizes[i]], 'zero', dtype)) node_in = layer_sizes[i] init_vars.append(('w_final', [2 * node_in, layer_sizes[-1]], 'xavier', dtype)) init_vars.append(('b_final', [layer_sizes[-1]], 'zero', dtype)) self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) # 38个field--一个大的系数矩阵(6086维),其中按照每个field的维数进行分别embedding,最后再拼接 self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.y = tf.placeholder(dtype) self.keep_prob_train = 1 - np.array(drop_out) self.keep_prob_test = np.ones_like(drop_out) self.layer_keeps = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w0 = [self.vars['embed_%d' % i] for i in range(num_inputs)] xw = tf.concat([ tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(num_inputs) ], 1) l = xw la = [] for i in range(len(layer_sizes) - 2): wi = self.vars['w%d' % i] bi = self.vars['b%d' % i] print(l.shape, wi.shape, bi.shape) l = tf.nn.dropout( utils.activate(tf.matmul(l, wi) + bi, layer_acts[i]), self.layer_keeps[i]) la.append(l) l_final = tf.nn.dropout( utils.activate( tf.matmul(l, self.vars['w%d' % (len(layer_sizes) - 2)]) + self.vars['b%d' % (len(layer_sizes) - 2)], layer_acts[len(layer_sizes) - 2]), self.layer_keeps[len(layer_sizes) - 2]) la_new = tf.concat([x for x in la], 0) H = tf.reshape(la_new, [-1, len(layer_sizes) - 2, layer_sizes[0] ]) # shape = [batch_size,3,128] H_T = tf.transpose(H, [0, 2, 1]) # shape=[batch_size,128,3] S_0 = tf.matmul(H, H_T) mask = [x for x in range(len(layer_sizes) - 2)] mask_zero = tf.ones([len(layer_sizes) - 2, len(layer_sizes) - 2]) - tf.one_hot( mask, len(layer_sizes) - 2) S = tf.multiply(S_0, mask_zero) print(S.shape) A = tf.nn.softmax(S, name='attention') # shape = batch_size *3*3 G = tf.reduce_sum(tf.matmul(A, H), 1) # shape = batch_size * 128 print(G.shape) M = tf.concat([l_final, G], 1) w_final = self.vars['w_final'] b_final = self.vars['b_final'] l_final = tf.matmul(M, w_final) + b_final l_final = tf.squeeze(l_final) self.y_prob = l_final self.loss = tf.reduce_sum( tf.nn.sigmoid_cross_entropy_with_logits(logits=l_final, labels=self.y)) if layer_l2 is not None: self.loss += embed_l2 * tf.nn.l2_loss(xw) for i in range(len(layer_sizes) - 1): wi = self.vars['w%d' % i] self.loss += layer_l2[i] * tf.nn.l2_loss(wi) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess) self.saver = tf.train.Saver(max_to_keep=3)
def __init__(self, data_dir=None, eval_dir=None, summary_dir=None, num_epochs=1, batch_size=None, input_dim=None, output_dim=1, factor_order=10, init_path=None, opt_algo='gd', learning_rate=1e-2, l2_w=0, l2_v=0, sync=False, workers=20): Model.__init__(self) data_file_list = tf.gfile.ListDirectory(data_dir) data_file_list = [x for x in data_file_list if '.tf' in x] data_file_list = [os.path.join(data_dir, x) for x in data_file_list] data_file_list.sort() eprint("input files:", data_file_list) input_files = data_file_list eprint("-------- create graph ----------") #self.graph = tf.Graph() #with self.graph.as_default(): with tf.device('/cpu:0'): self.X = tf.sparse_placeholder(tf.float32, name='X') self.B = tf.sparse_placeholder(tf.float32, name='B') self.y = tf.placeholder(tf.float32, shape=[None], name='y') init_vars = [('linear', [input_dim, output_dim], 'xavier', dtype), ('V', [input_dim, factor_order], 'xavier', dtype), ('bias', [output_dim], 'zero', dtype)] self.vars = utils.init_var_map(init_vars, None) w = self.vars['linear'] V = self.vars['V'] b = self.vars['bias'] ## linear term Xw = tf.sparse_tensor_dense_matmul(self.B, w) ## cross term # X^2 X_square = tf.SparseTensor(self.X.indices, tf.square(self.X.values), tf.to_int64(tf.shape(self.X))) # XV, shape: input_dim*k XV_square = tf.square(tf.sparse_tensor_dense_matmul(self.X, V)) # X^2 * V^2, shape: input_dim*k X2V2 = tf.sparse_tensor_dense_matmul(X_square, tf.square(V)) ## normalize Xnorm = tf.reshape(1.0 / tf.sparse_reduce_sum(self.X, 1), [-1, output_dim]) # 1/2 * row_sum(XV_square - X2V2), shape: input_dim*1 p = 0.5 * Xnorm * tf.reshape(tf.reduce_sum(XV_square - X2V2, 1), [-1, output_dim]) logits = tf.reshape(b + Xw + p, [-1]) self.y_prob = tf.sigmoid(logits) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=self.y)) + \ l2_w * tf.nn.l2_loss(Xw) self.global_step = _variable_on_cpu( 'global_step', [], initializer=tf.constant_initializer(0), trainable=False) if sync: self.optimizer = utils.get_sync_optimizer(opt_algo, learning_rate, workers) else: self.optimizer = utils.get_optimizer(opt_algo, learning_rate) self.train_op = self.optimizer.minimize(self.loss, global_step=self.global_step) self.summary_op = tf.summary.merge_all()
def __init__(self, feature_size, field_size, embedding_size=8, optimizer_type='gd', learning_rate=1e-2, verbose=False, random_seed=None, eval_metric=roc_auc_score, greater_is_better=True, epoch=10, batch_size=1024, l2_reg=0, deep_layers=[32, 32], batch_norm=True, dropout_deep=[], cross_layer_num=3): Model.__init__(self, eval_metric, greater_is_better, epoch, batch_size, verbose, batch_norm, dropout_deep) init_vars = [('weight', [feature_size, 1], 'uniform', tf.float32), ('bias', [1], 'uniform', tf.float32), ('feature_embed', [feature_size, embedding_size], 'normal', tf.float32)] node_in = embedding_size * field_size for i in range(len(deep_layers)): init_vars.extend([('layer_%d' % i, [node_in, deep_layers[i]], 'glorot_normal', tf.float32)]) init_vars.extend([('bias_%d' % i, [1, deep_layers[i]], 'glorot_normal', tf.float32)]) node_in = deep_layers[i] for i in range(cross_layer_num): init_vars.extend([('cross_layer_%d' % i, [1, embedding_size * field_size ], 'glorot_normal', tf.float32)]) init_vars.extend([('cross_bias_%d' % i, [1, embedding_size * field_size ], 'glorot_normal', tf.float32)]) node_in = embedding_size * field_size + deep_layers[-1] init_vars.extend([('concat_projection', [node_in, 1], 'glorot_normal', tf.float32)]) init_vars.extend([('concat_bias', [1, 1], 'glorot_normal', tf.float32)]) self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.feat_index = tf.placeholder(tf.int32, shape=[None, None], name="feat_index") # None * F self.feat_value = tf.placeholder(tf.float32, shape=[None, None], name="feat_value") # None * F self.label = tf.placeholder(tf.float32, shape=[None, 1], name="label") # None * 1 self.dropout_keep_deep = tf.placeholder(tf.float32, shape=[None], name="dropout_keep_deep") self.train_phase = tf.placeholder(tf.bool, name="train_phase") self.vars = utils.init_var_map(init_vars) self.embeddings = tf.nn.embedding_lookup( self.vars["feature_embed"], self.feat_index) # None * F * K feat_value = tf.reshape(self.feat_value, shape=[-1, field_size, 1]) self.embeddings = tf.reshape( tf.multiply(self.embeddings, feat_value), shape=[-1, embedding_size * field_size]) # ---------- cross layer ---------- self.deep_cross_input = tf.nn.dropout(self.embeddings, self.dropout_keep_deep[0]) self.cross_layer_out = self.deep_cross_input for i in range(1, cross_layer_num): self.x0xiT = self.deep_cross_input * self.cross_layer_out self.x0xiT = tf.reduce_sum(self.x0xiT, 1, keep_dims=True) # None * 1 self.cross_layer_out = tf.add(tf.matmul(self.x0xiT, self.vars['cross_layer_%d' % i]) \ , self.vars['cross_bias_%d' % i]) + self.cross_layer_out # ---------- deep component -------- self.y_deep = self.deep_cross_input for i in range(len(deep_layers)): self.y_deep = tf.add( tf.matmul(self.y_deep, self.vars['layer_%s' % i]), self.vars['bias_%s' % i]) if self.batch_norm: self.y_deep = self.batch_norm_layer( self.y_deep, train_phase=self.train_phase, scope_bn="bn_%s" % i) self.y_deep = tf.nn.dropout( utils.activate(self.y_deep, 'relu'), self.dropout_keep_deep[i + 1]) concat_projection = self.vars['concat_projection'] concat_bias = self.vars['concat_bias'] self.out = tf.concat([self.y_deep, self.cross_layer_out], 1) self.out = tf.add(tf.matmul(self.out, concat_projection), concat_bias) self.y_prob = tf.sigmoid(self.out) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=self.label, logits=self.out)) + \ tf.contrib.layers.l2_regularizer( l2_reg)(self.vars["concat_projection"]) for i in range(len(deep_layers)): self.loss += tf.contrib.layers.l2_regularizer(l2_reg)( self.vars["layer_%d" % i]) self.optimizer = utils.get_optimizer(optimizer_type, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) self.sess.run(tf.global_variables_initializer())
def __init__(self, field_sizes=None, embed_size=10, layer_size=None, layer_acts=None, drop_out=None, embed_l2=None, layer_l2=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None, layer_norm=True): Model.__init__(self) init_vars = [] num_input = len(field_sizes) for i in range(num_input): init_vars.append(('embed_%d' % i, [field_sizes[i], embed_size], 'xavier', dtype)) num_pairs = int(num_input * (num_input - 1) / 2) # field对的个数 node_in = num_input * embed_size + num_pairs # 此处为设计的关键 init_vars.append(('kernel', [embed_size, num_pairs, embed_size], 'xavier', dtype)) for i in range(len(layer_size)): init_vars.append(('w%d' % i, [node_in, layer_size[i]], 'xavier', dtype)) init_vars.append(('b%d' % i, [layer_size[i]], 'zero', dtype)) node_in = layer_size[i] self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = [tf.sparse_placeholder(dtype) for i in range(num_input)] self.y = tf.placeholder(dtype) self.keep_prob_train = 1 - np.array(drop_out) self.keep_prob_test = np.ones_like(drop_out) self.layer_keeps = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w_0 = [self.vars['embed_%d' % i] for i in range(num_input)] # batch * (embed_size*num_field) xw = tf.concat([ tf.sparse_tensor_dense_matmul(self.X[i], w_0[i]) for i in range(num_input) ], 1) xw3d = tf.reshape(xw, [-1, num_input, embed_size]) row = [] col = [] # 构造pair for i in range(num_input - 1): for j in range(i + 1, num_input): row.append(i) col.append(j) # batch * pair * embed_size p = tf.transpose( # pair * batch * embed_size tf.gather( # num_field * batch * embed_size tf.transpose(xw3d, [1, 0, 2]), row), [1, 0, 2]) q = tf.transpose( # pair * batch * embed_size tf.gather( # num_field * batch * embed_size tf.transpose(xw3d, [1, 0, 2]), col), [1, 0, 2]) p = tf.reshape(p, [-1, num_pairs, embed_size]) q = tf.reshape(q, [-1, num_pairs, embed_size]) # embed_size*num_pairs*embed_size k = self.vars['kernel'] p = tf.expand_dims(p, 1) # 增加维度 batch * 1 * pair * embed_size # batch * num_pairs # temp = tf.multiply(p,k) # temp = tf.reduce_sum(temp,-1) # temp = tf.transpose(temp,[0,2,1]) # temp = tf.multiply(temp,q) # temp = tf.reduce_sum(temp,-1) kp = tf.reduce_sum( # batch * num_pairs * embed_size tf.multiply( # 置换位置 batch * num_pairs * embed_size tf.transpose( # 按最后一个维度求和 batch * embed_size * num_pairs tf.reduce_sum( # 点乘 batch * embed_size*num_pairs*embed_size tf.multiply(p, k), -1), [0, 2, 1]), q), -1) l = tf.concat([xw, kp], 1) for i in range(len(layer_size)): w_i = self.vars['w%d' % i] b_i = self.vars['b%d' % i] l = tf.nn.dropout( utils.activate(tf.matmul(l, w_i) + b_i, layer_acts[i]), self.layer_keeps[i]) l = tf.squeeze(l) self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) if layer_l2 is not None: self.loss += embed_l2 * tf.nn.l2_loss(xw) for i in range(len(layer_size)): w_i = self.vars['w%d' % i] self.loss += layer_l2[i] * tf.nn.l2_loss(w_i) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def __init__(self, layer_sizes=None, layer_acts=None, drop_out=None, layer_l2=None, kernel_l2=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None): Model.__init__(self) init_vars = [] num_inputs = len(layer_sizes[0]) factor_order = layer_sizes[1] for i in range(num_inputs): layer_input = layer_sizes[0][i] layer_output = factor_order init_vars.append(('w0_%d' % i, [layer_input, layer_output], 'tnormal', dtype)) init_vars.append(('b0_%d' % i, [layer_output], 'zero', dtype)) init_vars.append(('w1', [num_inputs * factor_order, layer_sizes[2]], 'tnormal', dtype)) init_vars.append(('k1', [factor_order * factor_order, layer_sizes[2]], 'tnormal', dtype)) init_vars.append(('b1', [layer_sizes[2]], 'zero', dtype)) for i in range(2, len(layer_sizes) - 1): layer_input = layer_sizes[i] layer_output = layer_sizes[i + 1] init_vars.append(( 'w%d' % i, [layer_input, layer_output], 'tnormal', )) init_vars.append(('b%d' % i, [layer_output], 'zero', dtype)) self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.y = tf.placeholder(dtype) self.keep_prob_train = 1 - np.array(drop_out) self.keep_prob_test = np.ones_like(drop_out) self.layer_keeps = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w0 = [self.vars['w0_%d' % i] for i in range(num_inputs)] b0 = [self.vars['b0_%d' % i] for i in range(num_inputs)] xw = [ tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(num_inputs) ] x = tf.concat([xw[i] + b0[i] for i in range(num_inputs)], 1) l = tf.nn.dropout(utils.activate(x, layer_acts[0]), self.layer_keeps[0]) w1 = self.vars['w1'] k1 = self.vars['k1'] b1 = self.vars['b1'] z = tf.reduce_sum(tf.reshape(l, [-1, num_inputs, factor_order]), 1) p = tf.reshape( tf.matmul(tf.reshape(z, [-1, factor_order, 1]), tf.reshape(z, [-1, 1, factor_order])), [-1, factor_order * factor_order]) l = tf.nn.dropout( utils.activate( tf.matmul(l, w1) + tf.matmul(p, k1) + b1, layer_acts[1]), self.layer_keeps[1]) for i in range(2, len(layer_sizes) - 1): wi = self.vars['w%d' % i] bi = self.vars['b%d' % i] l = tf.nn.dropout( utils.activate(tf.matmul(l, wi) + bi, layer_acts[i]), self.layer_keeps[i]) l = tf.reshape(l, [-1]) self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) if layer_l2 is not None: # for i in range(num_inputs): self.loss += layer_l2[0] * tf.nn.l2_loss(tf.concat(xw, 1)) for i in range(1, len(layer_sizes) - 1): wi = self.vars['w%d' % i] # bi = self.vars['b%d' % i] self.loss += layer_l2[i] * tf.nn.l2_loss(wi) if kernel_l2 is not None: self.loss += kernel_l2 * tf.nn.l2_loss(k1) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def __init__(self, field_sizes=None, embed_size=10, layer_sizes=None, layer_acts=None, drop_out=None, embed_l2=None, layer_l2=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None, layer_norm=True): Model.__init__(self) init_vars = [] num_inputs = len(field_sizes) for i in range(num_inputs): init_vars.append(('embed_%d' % i, [field_sizes[i], embed_size], 'xavier', dtype)) num_pairs = int(num_inputs * (num_inputs - 1) / 2) node_in = num_inputs * embed_size + num_pairs init_vars.append(('kernel', [embed_size, num_pairs, embed_size], 'xavier', dtype)) for i in range(len(layer_sizes)): init_vars.append(('w%d' % i, [node_in, layer_sizes[i]], 'xavier', dtype)) init_vars.append(('b%d' % i, [layer_sizes[i]], 'zero', dtype)) node_in = layer_sizes[i] self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.y = tf.placeholder(dtype) self.keep_prob_train = 1 - np.array(drop_out) self.keep_prob_test = np.ones_like(drop_out) self.layer_keeps = tf.placeholder(dtype) self.vars = init_var_map(init_vars, init_path) w0 = [self.vars['embed_%d' % i] for i in range(num_inputs)] xw = tf.concat([tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(num_inputs)], 1) xw3d = tf.reshape(xw, [-1, num_inputs, embed_size]) row = [] col = [] for i in range(num_inputs - 1): for j in range(i + 1, num_inputs): row.append(i) col.append(j) # batch * pair * k p = tf.transpose( # pair * batch * k tf.gather( # num * batch * k tf.transpose( xw3d, [1, 0, 2]), row), [1, 0, 2]) # batch * pair * k q = tf.transpose( tf.gather( tf.transpose( xw3d, [1, 0, 2]), col), [1, 0, 2]) # b * p * k p = tf.reshape(p, [-1, num_pairs, embed_size]) # b * p * k q = tf.reshape(q, [-1, num_pairs, embed_size]) # k * p * k k = self.vars['kernel'] # batch * 1 * pair * k p = tf.expand_dims(p, 1) # batch * pair kp = tf.reduce_sum( # batch * pair * k tf.multiply( # batch * pair * k tf.transpose( # batch * k * pair tf.reduce_sum( # batch * k * pair * k tf.multiply( p, k), -1), [0, 2, 1]), q), -1) # # if layer_norm: # # x_mean, x_var = tf.nn.moments(xw, [1], keep_dims=True) # # xw = (xw - x_mean) / tf.sqrt(x_var) # # x_g = tf.Variable(tf.ones([num_inputs * embed_size]), name='x_g') # # x_b = tf.Variable(tf.zeros([num_inputs * embed_size]), name='x_b') # # x_g = tf.Print(x_g, [x_g[:10], x_b]) # # xw = xw * x_g + x_b # p_mean, p_var = tf.nn.moments(op, [1], keep_dims=True) # op = (op - p_mean) / tf.sqrt(p_var) # p_g = tf.Variable(tf.ones([embed_size**2]), name='p_g') # p_b = tf.Variable(tf.zeros([embed_size**2]), name='p_b') # # p_g = tf.Print(p_g, [p_g[:10], p_b]) # op = op * p_g + p_b l = tf.concat([xw, kp], 1) for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] bi = self.vars['b%d' % i] l = tf.nn.dropout( activate( tf.matmul(l, wi) + bi, layer_acts[i]), self.layer_keeps[i]) l = tf.squeeze(l) self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) if layer_l2 is not None: self.loss += embed_l2 * tf.nn.l2_loss(xw)#tf.concat(w0, 0)) for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] self.loss += layer_l2[i] * tf.nn.l2_loss(wi) self.optimizer = get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def __init__(self, layer_sizes=None, layer_acts=None, layer_keeps=None, layer_l2=None, kernel_l2=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None): """ # Arguments: layer_size: [num_fields, factor_layer, l_p size] layer_acts: ["tanh", "none"] layer_keep: [1, 1] layer_l2: [0, 0] kernel_l2: 0 """ init_vars = [] num_inputs = len(layer_sizes[0]) factor_order = layer_sizes[1] for i in range(num_inputs): layer_input = layer_sizes[0][i] layer_output = factor_order # w0 store the embeddings for all features. init_vars.append(('w0_%d' % i, [layer_input, layer_output], 'tnormal', dtype)) init_vars.append(('b0_%d' % i, [layer_output], 'zero', dtype)) init_vars.append(('w_l', [num_inputs * factor_order, layer_sizes[2]], 'tnormal', dtype)) init_vars.append(('w_p', [num_inputs * num_inputs, layer_sizes[2]], 'tnormal', dtype)) #init_vars.append(('w1', [num_inputs * factor_order + num_inputs * num_inputs, layer_sizes[2]], 'tnormal', dtype)) init_vars.append(('b1', [layer_sizes[2]], 'zero', dtype)) for i in range(2, len(layer_sizes) - 1): layer_input = layer_sizes[i] layer_output = layer_sizes[i + 1] init_vars.append(('w%d' % i, [layer_input, layer_output], 'tnormal', dtype)) init_vars.append(('b%d' % i, [layer_output], 'zero', dtype)) self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.y = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w0 = [self.vars['w0_%d' % i] for i in range(num_inputs)] b0 = [self.vars['b0_%d' % i] for i in range(num_inputs)] # Multiply SparseTensor X[i] by dense matrix w0[i] xw = [ tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(num_inputs) ] x = tf.concat([xw[i] + b0[i] for i in range(num_inputs)], 1) l = tf.nn.dropout(utils.activate(x, layer_acts[0]), layer_keeps[0]) w_l = self.vars['w_l'] w_p = self.vars['w_p'] b1 = self.vars['b1'] # This is where W_p \cdot p happens. # k1 is \theta, which is the weight for each field(feature) vector p = tf.matmul( tf.reshape(l, [-1, num_inputs, factor_order]), tf.transpose(tf.reshape(l, [-1, num_inputs, factor_order]), [0, 2, 1])) p = tf.nn.dropout( utils.activate( tf.matmul(tf.reshape(p, [-1, num_inputs * num_inputs]), w_p), 'none'), 1.0) l = tf.nn.dropout( utils.activate(tf.matmul(l, w_l) + b1 + p, layer_acts[1]), layer_keeps[1]) for i in range(2, len(layer_sizes) - 1): wi = self.vars['w%d' % i] bi = self.vars['b%d' % i] l = tf.nn.dropout( utils.activate(tf.matmul(l, wi) + bi, layer_acts[i]), layer_keeps[i]) self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) if layer_l2 is not None: # for i in range(num_inputs): self.loss += layer_l2[0] * tf.nn.l2_loss(tf.concat(xw, 1)) for i in range(1, len(layer_sizes) - 1): if i == 1: self.loss += layer_l2[i] * tf.nn.l2_loss(w_l) self.loss += layer_l2[i] * tf.nn.l2_loss(w_p) else: wi = self.vars['w%d' % i] # bi = self.vars['b%d' % i] self.loss += layer_l2[i] * tf.nn.l2_loss(wi) if kernel_l2 is not None: pass self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def __init__(self, field_sizes=None, embed_size=10, layer_sizes=None, layer_acts=None, drop_out=None, embed_l2=None, layer_l2=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None): Model.__init__(self) init_vars = [] num_inputs = len(field_sizes) for i in range(num_inputs): init_vars.append(('embed_%d' % i, [field_sizes[i], embed_size], 'xavier', dtype)) num_pairs = int(num_inputs * (num_inputs - 1) / 2) node_in = num_inputs * embed_size + num_pairs # node_in = num_inputs * (embed_size + num_inputs) for i in range(len(layer_sizes)): init_vars.append(('w%d' % i, [node_in, layer_sizes[i]], 'xavier', dtype)) init_vars.append(('b%d' % i, [layer_sizes[i]], 'zero', dtype)) node_in = layer_sizes[i] self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.y = tf.placeholder(dtype) self.keep_prob_train = 1 - np.array(drop_out) self.keep_prob_test = np.ones_like(drop_out) self.layer_keeps = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w0 = [self.vars['embed_%d' % i] for i in range(num_inputs)] xw = tf.concat([tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(num_inputs)], 1) xw3d = tf.reshape(xw, [-1, num_inputs, embed_size]) row = [] col = [] for i in range(num_inputs-1): for j in range(i+1, num_inputs): row.append(i) col.append(j) # batch * pair * k p = tf.transpose( # pair * batch * k tf.gather( # num * batch * k tf.transpose( xw3d, [1, 0, 2]), row), [1, 0, 2]) # batch * pair * k q = tf.transpose( tf.gather( tf.transpose( xw3d, [1, 0, 2]), col), [1, 0, 2]) p = tf.reshape(p, [-1, num_pairs, embed_size]) q = tf.reshape(q, [-1, num_pairs, embed_size]) ip = tf.reshape(tf.reduce_sum(p * q, [-1]), [-1, num_pairs]) # simple but redundant # batch * n * 1 * k, batch * 1 * n * k # ip = tf.reshape( # tf.reduce_sum( # tf.expand_dims(xw3d, 2) * # tf.expand_dims(xw3d, 1), # 3), # [-1, num_inputs**2]) l = tf.concat([xw, ip], 1) for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] bi = self.vars['b%d' % i] l = tf.nn.dropout( utils.activate( tf.matmul(l, wi) + bi, layer_acts[i]), self.layer_keeps[i]) l = tf.squeeze(l) self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) if layer_l2 is not None: self.loss += embed_l2 * tf.nn.l2_loss(xw) for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] self.loss += layer_l2[i] * tf.nn.l2_loss(wi) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def __init__(self, data_dir=None, summary_dir=None, eval_dir=None, batch_size=None, input_dim=None, output_dim=1, factor_order=10, init_path=None, opt_algo='gd', learning_rate=1e-2, l2_w=0, sync=False, workers=20): Model.__init__(self) eprint("-------- create graph ----------") with tf.name_scope('input_%d' % FLAGS.task_index) as scope: self.X = tf.sparse_placeholder(tf.float32, name='X') self.B = tf.sparse_placeholder(tf.float32, name='B') self.y = tf.placeholder(tf.float32, shape=[None], name='y') init_vars = [('linear', [input_dim, output_dim], 'xavier', dtype), ('U', [input_dim, factor_order], 'xavier', dtype), ('V', [input_dim, factor_order], 'xavier', dtype), ('bias', [output_dim], 'zero', dtype)] self.vars = utils.init_var_map(init_vars, None) w = self.vars['linear'] U = self.vars['U'] V = self.vars['V'] b = self.vars['bias'] ## normalize Xnorm = tf.reshape(1.0 / tf.sparse_reduce_sum(self.X, 1), [-1, output_dim]) ## linear term Xw = tf.sparse_tensor_dense_matmul(self.B, w, name="Xw") ## cross term XU = tf.sparse_tensor_dense_matmul(self.X, U, name="XU") XV = tf.sparse_tensor_dense_matmul(self.X, V, name="XV") X_square = tf.SparseTensor(self.X.indices, tf.square(self.X.values), tf.to_int64(tf.shape(self.X))) p = 0.5 * Xnorm * tf.reshape( tf.reduce_sum( XU * XV - tf.sparse_tensor_dense_matmul(X_square, U * V), 1), [-1, output_dim]) logits = tf.reshape(b + Xw + p, [-1]) self.y_prob = tf.sigmoid(logits) # self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=self.y)) + \ l2_w * tf.nn.l2_loss(Xw) self.global_step = _variable_on_cpu( 'global_step', [], initializer=tf.constant_initializer(0), trainable=False) if sync: self.optimizer = utils.get_sync_optimizer(opt_algo, learning_rate, workers) else: self.optimizer = utils.get_optimizer(opt_algo, learning_rate) self.train_op = self.optimizer.minimize(self.loss, global_step=self.global_step) self.summary_op = tf.summary.merge_all()
def __init__(self, field_sizes=None, embed_size=10, layer_sizes=None, layer_acts=None, drop_out=None, embed_l2=None, layer_l2=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None, layer_norm=True): Model.__init__(self) init_vars = [] num_inputs = len(field_sizes) for i in range(num_inputs): init_vars.append(('embed_%d' % i, [field_sizes[i], embed_size], 'xavier', dtype)) node_in = num_inputs * embed_size + embed_size * embed_size for i in range(len(layer_sizes)): init_vars.append(('w%d' % i, [node_in, layer_sizes[i]], 'xavier', dtype)) init_vars.append(('b%d' % i, [layer_sizes[i]], 'zero', dtype)) node_in = layer_sizes[i] self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.y = tf.placeholder(dtype) self.keep_prob_train = 1 - np.array(drop_out) self.keep_prob_test = np.ones_like(drop_out) self.layer_keeps = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w0 = [self.vars['embed_%d' % i] for i in range(num_inputs)] xw = tf.concat([tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(num_inputs)], 1) z = tf.reduce_sum(tf.reshape(xw, [-1, num_inputs, embed_size]), 1) op = tf.reshape( tf.matmul(tf.reshape(z, [-1, embed_size, 1]), tf.reshape(z, [-1, 1, embed_size])), [-1, embed_size * embed_size]) if layer_norm: # x_mean, x_var = tf.nn.moments(xw, [1], keep_dims=True) # xw = (xw - x_mean) / tf.sqrt(x_var) # x_g = tf.Variable(tf.ones([num_inputs * embed_size]), name='x_g') # x_b = tf.Variable(tf.zeros([num_inputs * embed_size]), name='x_b') # x_g = tf.Print(x_g, [x_g[:10], x_b]) # xw = xw * x_g + x_b p_mean, p_var = tf.nn.moments(op, [1], keep_dims=True) op = (op - p_mean) / tf.sqrt(p_var) p_g = tf.Variable(tf.ones([embed_size**2]), name='p_g') p_b = tf.Variable(tf.zeros([embed_size**2]), name='p_b') # p_g = tf.Print(p_g, [p_g[:10], p_b]) op = op * p_g + p_b l = tf.concat([xw, op], 1) for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] bi = self.vars['b%d' % i] l = tf.nn.dropout( utils.activate( tf.matmul(l, wi) + bi, layer_acts[i]), self.layer_keeps[i]) l = tf.squeeze(l) self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) if layer_l2 is not None: self.loss += embed_l2 * tf.nn.l2_loss(tf.concat(w0, 0)) for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] self.loss += layer_l2[i] * tf.nn.l2_loss(wi) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def __init__(self, data_dir=None, summary_dir=None, eval_dir=None, batch_size=None, input_dim=None, output_dim=1, layer_sizes=None, layer_acts=None, drop_out=None, init_path=None, opt_algo='gd', learning_rate=1e-2, l2_w=0, layer_l2=None, sync=False, workers=20): Model.__init__(self) eprint("-------- create graph ----------") init_vars = [] # linear part init_vars.append(('linear', [input_dim, output_dim], 'xavier', dtype)) init_vars.append(('bias', [output_dim], 'zero', dtype)) num_inputs = len(layer_sizes[0]) factor_order = layer_sizes[1] for i in range(num_inputs): layer_input = layer_sizes[0][i] layer_output = factor_order # field_sizes[i] stores the i-th field feature number init_vars.append(('w0_%d' % i, [layer_input, layer_output], 'xavier', dtype)) init_vars.append(('b0_%d' % i, [layer_output], 'zero', dtype)) # full connection node_in = num_inputs * factor_order init_vars.append(('w1', [node_in, layer_sizes[2]], 'xavier', dtype)) init_vars.append(('b1', [layer_sizes[2]], 'zero', dtype)) for i in range(2, len(layer_sizes) - 1): layer_input = layer_sizes[i] layer_output = layer_sizes[i + 1] init_vars.append(('w%d' % i, [layer_input, layer_output], 'xavier', dtype)) init_vars.append(('b%d' % i, [layer_output], 'zero', dtype)) #self.graph = tf.Graph() #with self.graph.as_default(): #with tf.device('/cpu:0'): with tf.name_scope('input_%d' % FLAGS.task_index) as scope: self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.B = tf.sparse_placeholder(tf.float32, name='B') self.y = tf.placeholder(dtype) self.keep_prob_train = 1 - np.array(drop_out) self.keep_prob_test = np.ones_like(drop_out) self.layer_keeps = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w0 = [self.vars['w0_%d' % i] for i in range(num_inputs)] b0 = [self.vars['b0_%d' % i] for i in range(num_inputs)] xw = [ tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(num_inputs) ] x = tf.concat([xw[i] + b0[i] for i in range(num_inputs)], 1) ## normalize fmX = tf.sparse_add(self.X[0], self.X[1]) for i in range(2, num_inputs): fmX = tf.sparse_add(fmX, self.X[i]) Xnorm = tf.reshape(1.0 / tf.sparse_reduce_sum(fmX, 1), [-1, output_dim]) l = tf.nn.dropout(utils.activate(x, layer_acts[0]), self.layer_keeps[0]) for i in range(1, len(layer_sizes) - 1): wi = self.vars['w%d' % i] bi = self.vars['b%d' % i] eprint(l.get_shape(), wi.get_shape(), bi.get_shape()) l = tf.nn.dropout( utils.activate(tf.matmul(l, wi) + bi, layer_acts[i]), self.layer_keeps[i]) ## FM linear part fmb = self.vars['bias'] fmw = self.vars['linear'] Xw = tf.sparse_tensor_dense_matmul(self.B, fmw) ## cross term # XV, shape: input_dim*k fmXV = tf.add_n(xw) XV_square = tf.square(fmXV) eprint(XV_square.get_shape()) # X^2 * V^2, shape: input_dim*k fmX2 = [ tf.SparseTensor(self.X[i].indices, tf.square(self.X[i].values), tf.to_int64(tf.shape(self.X[i]))) for i in range(num_inputs) ] fmV2 = [tf.square(w0[i]) for i in range(num_inputs)] fmX2V2 = [ tf.sparse_tensor_dense_matmul(fmX2[i], fmV2[i]) for i in range(num_inputs) ] X2V2 = tf.add_n(fmX2V2) eprint(X2V2.get_shape()) # 1/2 * row_sum(XV_square - X2V2), shape: input_dim*1 p = 0.5 * Xnorm * tf.reshape(tf.reduce_sum(XV_square - X2V2, 1), [-1, output_dim]) ## logits logits = tf.reshape(l + Xw + fmb + p, [-1]) ## predict self.y_prob = tf.sigmoid(logits) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=self.y)) + \ l2_w * tf.nn.l2_loss(Xw) if layer_l2 is not None: self.loss += layer_l2[0] * tf.nn.l2_loss(tf.concat(xw, 1)) for i in range(1, len(layer_sizes) - 1): wi = self.vars['w%d' % i] self.loss += layer_l2[i] * tf.nn.l2_loss(wi) self.global_step = _variable_on_cpu( 'global_step', [], initializer=tf.constant_initializer(0), trainable=False) if sync: self.optimizer = utils.get_sync_optimizer(opt_algo, learning_rate, workers) else: self.optimizer = utils.get_optimizer(opt_algo, learning_rate) self.train_op = self.optimizer.minimize(self.loss, global_step=self.global_step) self.summary_op = tf.summary.merge_all()
def __init__(self, field_sizes=None, embed_size=10, layer_sizes=None, layer_acts=None, drop_out=None, embed_l2=None, layer_l2=None, init_path=None, opt_algo='gd', learning_rate=1e-2, random_seed=None): Model.__init__(self) init_vars = [] num_inputs = len(field_sizes) for i in range(num_inputs): init_vars.append(('embed_%d' % i, [field_sizes[i], embed_size], 'xavier', dtype)) num_pairs = int(num_inputs * (num_inputs - 1) / 2) node_in = num_inputs * embed_size + num_pairs # node_in = num_inputs * (embed_size + num_inputs) for i in range(len(layer_sizes)): init_vars.append(('w%d' % i, [node_in, layer_sizes[i]], 'xavier', dtype)) init_vars.append(('b%d' % i, [layer_sizes[i]], 'zero', dtype)) node_in = layer_sizes[i] self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.y = tf.placeholder(dtype) self.keep_prob_train = 1 - np.array(drop_out) self.keep_prob_test = np.ones_like(drop_out) self.layer_keeps = tf.placeholder(dtype) self.vars = init_var_map(init_vars, init_path) w0 = [self.vars['embed_%d' % i] for i in range(num_inputs)] xw = tf.concat([tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(num_inputs)], 1) xw3d = tf.reshape(xw, [-1, num_inputs, embed_size]) row = [] col = [] for i in range(num_inputs-1): for j in range(i+1, num_inputs): row.append(i) col.append(j) # batch * pair * k p = tf.transpose( # pair * batch * k tf.gather( # num * batch * k tf.transpose( xw3d, [1, 0, 2]), row), [1, 0, 2]) # batch * pair * k q = tf.transpose( tf.gather( tf.transpose( xw3d, [1, 0, 2]), col), [1, 0, 2]) p = tf.reshape(p, [-1, num_pairs, embed_size]) q = tf.reshape(q, [-1, num_pairs, embed_size]) ip = tf.reshape(tf.reduce_sum(p * q, [-1]), [-1, num_pairs]) # simple but redundant # batch * n * 1 * k, batch * 1 * n * k # ip = tf.reshape( # tf.reduce_sum( # tf.expand_dims(xw3d, 2) * # tf.expand_dims(xw3d, 1), # 3), # [-1, num_inputs**2]) l = tf.concat([xw, ip], 1) for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] bi = self.vars['b%d' % i] l = tf.nn.dropout( activate( tf.matmul(l, wi) + bi, layer_acts[i]), self.layer_keeps[i]) l = tf.squeeze(l) self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) if layer_l2 is not None: self.loss += embed_l2 * tf.nn.l2_loss(xw) for i in range(len(layer_sizes)): wi = self.vars['w%d' % i] self.loss += layer_l2[i] * tf.nn.l2_loss(wi) self.optimizer = get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def __init__(self, feature2field , layer_sizes, layer_acts, drop_out, learning_rate=1e-2, l2_reg = 0, feature_size = None, field_size = None, k = 40, opt_algo = 'adam', train = True): # feature_size : feature number # field_size : field number # k : latent vector dimension Model.__init__(self) layers = [int(e) for e in layer_sizes.split(',')] dropout = [float(e) for e in drop_out.split(',')] layer_active_func = [e for e in layer_acts.split(',')] self.X = tf.placeholder(dtype=dtype, shape=[None, feature_size], name='input') self.y = tf.placeholder(dtype=dtype, shape=[None, ], name='label') self.keep_prob = tf.placeholder(dtype=dtype) with tf.variable_scope('linear_layer'): # w .* x + b self.b = tf.get_variable(name='bias', initializer=tf.constant(0.5), dtype=dtype) # tf.zeros_initializer() self.w1 = tf.get_variable(name='w1', shape=[feature_size], initializer=tf.truncated_normal_initializer(mean=0, stddev=1e-2), dtype=dtype) self.linear_terms = tf.reduce_sum(tf.multiply(self.w1, self.X), 1) + self.b with tf.variable_scope('field_aware_interaction_layer'): # sum(<vi_fj, vj_fi> * x_i * x_j) self.nfk = tf.get_variable('nfk', shape=[feature_size, field_size, k], dtype=dtype, initializer=tf.truncated_normal_initializer(mean=0, stddev=0.01)) self.field_aware_interaction_terms = tf.constant(0, dtype=dtype) for i in range(feature_size): for j in range(i + 1, feature_size): self.field_aware_interaction_terms += tf.multiply( tf.reduce_sum(tf.multiply(self.nfk[i, feature2field[j]], self.nfk[j, feature2field[i]])), tf.multiply(self.X[:, i], self.X[:, j]) ) init_deep_layer_vars = [] input_dim = feature_size for i in range(len(layers)): output_dim = layers[i] init_deep_layer_vars.append(('deepW_%d' % i, [input_dim, output_dim], 'xavier', dtype)) init_deep_layer_vars.append(('deepB_%d' % i, [output_dim], 'zero', dtype)) input_dim = layers[i] init_deep_layer_vars.append(('outW', [layers[-1], 1], 'xavier', dtype)) init_deep_layer_vars.append(('outB', [1], 'zero', dtype)) self.deepVars = init_var_map(init_deep_layer_vars) with tf.variable_scope("Deep-part"): hidden = self.X for i in range(len(layers)): if train: hidden = tf.nn.dropout( # h_i = W_i * x + b_i activate(tf.matmul(hidden, self.deepVars['deepW_%d' % i]) + self.deepVars['deepB_%d' % i], layer_active_func[i]), dropout[i]) else: hidden = activate(tf.matmul(hidden, self.deepVars['deepW_%d' % i]) + self.deepVars['deepB_%d' % i], layer_active_func[i]) #self.a=hidden;self.aa=self.deepVars['outW'];self.aaa=tf.matmul(hidden, self.deepVars['outW']);self.aaaa=self.deepVars['outB'] self.deepOut = tf.matmul(hidden, self.deepVars['outW']) + self.deepVars['outB'] self.deepOut = tf.reshape(self.deepOut, shape=[-1]) # tf.reshape() 防止 python 自带的广播机制算出奇怪的值 with tf.variable_scope("DeepFM-out"): self.out_sum = self.linear_terms + self.field_aware_interaction_terms + self.deepOut self.pred_prob = tf.sigmoid(self.out_sum) # ------bulid loss------ self.loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.out_sum, labels=self.y)) + \ l2_reg * tf.nn.l2_loss(self.w1) + \ l2_reg * tf.nn.l2_loss(self.nfk) # ------bulid optimizer------ self.optimizer = get_optimizer(opt_algo, learning_rate, self.loss) # 保存模型的参数 self.saver = tf.train.Saver(tf.global_variables()) # GPU设定 config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config)