optimizer = tf.train.AdamOptimizer(learning_rate=0.02) train_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) train_op = optimizer.minimize(loss=loss, var_list=train_var) gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True, log_device_placement=False) with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sess.run(tf.tables_initializer()) #sess.graph.finalize() for num in range(epoch_num): print(getNow(), 'now shuffle begin') train_data = dataClass.samplingagain() x_data = train_data[dataClass.col_names] y_data = train_data[['label']] train_x, test_x, train_y, test_y = train_test_split(x_data, y_data, test_size=0.0005) print('alldata.shape:', train_data.shape) print('train_x.shape:', train_x.shape, 'train_y.shape:', train_y.shape) print('test _x.shape:', test_x.shape, 'test _y.shape:', test_y.shape) print(getNow(), 'now shuffle and split end') for i in range(int(train_x.shape[0] / batch_size)): input_x = train_x[i * batch_size:(i + 1) * batch_size] input_y = train_y[i * batch_size:(i + 1) * batch_size] sess.run(train_op, feed_dict={x: input_x, y: input_y}) if i % 100 == 0:
model_dir = model_dir, \ config = run_config, \ params = params) model_dir = '../model/' shutil.rmtree(model_dir) os.mkdir(model_dir) model = build_estimator(model_dir) train_input_fn = get_input_from_pd(train_x, train_y, shuffle=True) test_input_fn = get_input_from_pd(test_x, test_y, shuffle=False) print 'debug, train_x.shape:', train_x.shape, 'train_y.shape:', train_y.shape print 'debug, test_x.shape :', test_x.shape, 'test_y.shape :', test_y.shape epoch_per_eval = 1 for n in range(epoch_num): model.train(input_fn = train_input_fn, hooks=[]) res = model.evaluate(input_fn = test_input_fn) print getNow(), 'epoch:', n, 'step:', global_step, 'test accuracy:', res['accuracy'] #for key in sorted(res): print key, res[key] ''' notice: 1) hand code is faster than tf.estimator 2) batch_size need to set large enough is epanded GPU power 3) neg sampling need again to do each epoch, especial hand neg sampling 4) accuracy in tf.estimator could be tf.metrics.accuracy; bu in hand code as follow # correct_prediction = tf.equal( tf.argmax(y_, 1), tf.argmax(output, 1)) # accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) 5) '''
#correct_prediction = tf.equal(labels, plabel) #accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) optimizer = tf.train.AdamOptimizer(learning_rate=0.002) train_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) train_op = optimizer.minimize(loss=loss, var_list=train_var) gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True, log_device_placement=False) with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sess.run(tf.tables_initializer()) #sess.graph.finalize() for num in range(epoch_num): print getNow(), 'now shuffle begin' train_data = dataClass.shuffle() x_data = train_data[[ 'feat_' + str(i) for i in range(1 + seq_len * filed_len) ]] y_data = train_data[['label_' + str(i) for i in range(seq_len)]] #train_x, test_x, train_y, test_y = train_test_split(x_data, y_data, test_size=0.0005) train_x, train_y = x_data, y_data #test_data = testClass.data_read #test_x = test_data[['feat_'+str(i) for i in range(1+seq_len*filed_len)]] #test_y = test_data[['label_'+str(i) for i in range(seq_len)]] print 'alldata.shape:', train_data.shape print 'train_x.shape:', train_x.shape, 'train_y.shape:', train_y.shape #print 'test _x.shape:', test_x.shape, 'test _y.shape:', test_y.shape #test_data_raw = testClassRaw.data_read #test_x_raw = test_data_raw[['feat_'+str(i) for i in range(1+seq_len*filed_len)]]
except: topK = 30 index = faiss.IndexFlatIP(d) index.add(id2emb) #quantizer = faiss.IndexFlatL2(d) #index = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_INNER_PRODUCT) #assert not index.is_trained #index.train(np.ascontiguousarray(id2emb)) #index.train(id2emb) #index.add(id2emb) #assert index.is_trained import time if os.path.isfile(file_save) == True: os.rename(file_save, file_save + '.' + getNow()) handle = open(file_save, 'aw') for user, user_emb in user_emb_map.items(): #print 'user', user, 'user_emb', user_emb S, I = index.search(user_emb, topK) #print 'type(I)', I, I.shape Id = [str(i) for i in I[0]] Tag = [id2tag[i] for i in Id] Score = [str(i) for i in S[0]] res = [Tag[i] + '\2' + Score[i] for i in range(topK)] handle.write(user + '\t' + '\1'.join(res) + '\n') handle.close()