def do_inference(hostport, work_dir, concurrency, num_tests): """Tests PredictionService with concurrent requests. Args: hostport: Host:port address of the PredictionService. work_dir: The full path of working directory for test data set. concurrency: Maximum number of concurrent requests. num_tests: Number of test images to use. Returns: The classification error rate. Raises: IOError: An error occurred processing test data set. """ """ #test_data_set = mnist_input_data.read_data_sets(work_dir).test test_data_set = DataReadAndNegSamp(file_input='./20190623.ID.test') test_data_set = test_data_set.train_data.values print ('test_data_set.shape:', test_data_set.shape, type(test_data_set)) channel = grpc.insecure_channel(hostport) stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) pred_res = [[], []] real_res = [] for index in range(int(num_tests/concurrency)): request = predict_pb2.PredictRequest() request.model_spec.name = 'seq_model' request.model_spec.signature_name = 'predict' #request.model_spec.signature_name = 'pred_prob' ## here used 'input' cur_data = test_data_set[index*concurrency:(index+1)*concurrency] cur_label = cur_data[:, 0] cur_feature = cur_data[:, 1:].reshape((-1, 13)) #print ('cur_data.shape:', cur_data.shape, type(cur_data)) #print ('cur_label.shape:', cur_label.shape, type(cur_label)) #print ('cur_feature.shape:', cur_feature.shape, type(cur_feature)) #image, label = test_data_set[index] #request.inputs['input'].CopyFrom( request.inputs['input'].CopyFrom( tf.contrib.util.make_tensor_proto(values=cur_feature, dtype=tf.int32, shape=cur_feature.shape)) #tf.contrib.util.make_tensor_proto(image[0], shape=[1, image[0].size])) response = stub.Predict(request, 5.0) #print ('respose:', response, type(response)) results1 = tf.contrib.util.make_ndarray(response.outputs['predict_label']) results2 = tf.contrib.util.make_ndarray(response.outputs['predict_prob']) pred_res[0].append(list(results1)) pred_res[1].append(list(results2)) real_res.append(list(cur_label)) ''' ## here means saveModel can multi-ouput sig_def ## results = {} for key in response.outputs: tensor_proto = response.outputs[key] nd_array = tf.contrib.util.make_ndarray(tensor_proto) results[key] = nd_array for key, values in results.items(): print ('in result:', key, values) ''' pred_res1 = np.array(pred_res[0]).reshape((-1,1)) pred_res2 = np.array(pred_res[1]).reshape((-1,1)) real_res = np.array(real_res).reshape((-1,1)) #print ('pred_res1.shape:', pred_res1.shape, 'pred_res2.shape:', pred_res2.shape, 'real_res.shape:', real_res.shape) res = np.concatenate((real_res, pred_res1, pred_res2), axis=1) print ('real-label\t pred-label\tpred-probability\n', res) return np.sum(np.equal(res[:,0], res[:, 1]))/res.shape[0]
4) add target-url similary with history-urls as feature ''' import os import sys import time #reload(sys) #sys.setdefaultencoding('utf-8') import numpy as np import pandas as pd import tensorflow as tf from DataRead import DataReadAndNegSamp, getNow from sklearn.model_selection import train_test_split from sklearn.utils import shuffle os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' dataClass = DataReadAndNegSamp(file_input='../data/20190702.ID.clean') train_data = dataClass.train_data hidden_dim = 128 batch_size = 1024*48 epoch_num = 15 seq_len = 12 row_num, col_num = train_data.shape col_max = train_data[['user']].max() max_id_user= col_max.max() # for userID == > userTagsID # col_max = train_data[['seq'+str(i) for i in range(seq_len)]].max() max_id_urls= col_max.max() # for urlsID == > urlsTagsID # print ('max_index, max_user:'******'max_urls:', max_id_urls) col_names = ['user_tag_'+str(i) for i in range(20)] user2tags = pd.read_csv('../data/key2id.user.tags.ID.only', sep='\t', names=col_names, header=None, index_col=0, dtype=np.int32)
#!/usr/bin/python # -*- coding:utf-8 -*- import os import sys import time #reload(sys) #sys.setdefaultencoding('utf-8') import tensorflow as tf from DataRead import DataReadAndNegSamp, getNow from sklearn.model_selection import train_test_split from sklearn.utils import shuffle os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' dataClass = DataReadAndNegSamp(file_input='../data/20190623.ID') train_data = dataClass.train_data hidden_dim = 128 batch_size = 1024 * 48 epoch_num = 8 row_num, col_num = train_data.shape col_max = train_data.max() max_index = col_max.max() print('max_index:', max_index) with tf.device('/gpu:1'): with tf.variable_scope('weight'): w = tf.get_variable(name = 'w', dtype = tf.float32, trainable=True, shape = [max_index+1, hidden_dim], \ regularizer = tf.contrib.layers.l2_regularizer(0.01), \ initializer = tf.contrib.layers.xavier_initializer()) with tf.variable_scope('compute'): x = tf.placeholder(name='x', shape=[None, 13], dtype=tf.int32)
#!/usr/bin/python # -*- coding:utf-8 -*- import os import sys reload(sys) sys.setdefaultencoding('utf-8') import tensorflow as tf from DataRead import DataReadAndNegSamp, getNow from sklearn.model_selection import train_test_split os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' import shutil tf.logging.set_verbosity(tf.logging.INFO) ## hook-logging alos print ## train_data = DataReadAndNegSamp(file_input='./user_click_urls.ID.small').train_data hidden_dim = 128 batch_size = 1024*32 epoch_num = 10 row_num, col_num = train_data.shape col_max = train_data.max() max_index = col_max.max() x_data = train_data[['user', 'url', 'tag1', 'tag2', 'tag3', 'tag4', 'tag5']] y_data = train_data[['label']] train_x, test_x, train_y, test_y = train_test_split(x_data, y_data, test_size=0.1) def get_input_from_pd(x, y, num_epochs=1, shuffle=True, batch_size=batch_size): return tf.estimator.inputs.pandas_input_fn( x = x, \ y = y, \ num_epochs = num_epochs, \ shuffle = shuffle, \ batch_size = batch_size, \