def load_test_data(test_file, labels,flag): #df = pd.read_csv(test_file, sep='|') logging.critical('{} test_file received '.format(test_file)) select = ['Descript'] if flag == 0: df = pd.read_csv(test_file) df = df.dropna(axis=0, how='any', subset=select) test_examples = df[select[0]].apply(lambda x: data_helper.clean_str(x).split(' ')).tolist() else: df = pd.Series(test_file) df =pd.DataFrame(df.values, columns=[select]) test_examples = df.iloc[0].apply(lambda x: data_helper.clean_str(x).split(' ')).tolist() logging.critical('{} df received '.format(df)) num_labels = len(labels) one_hot = np.zeros((num_labels, num_labels), int) np.fill_diagonal(one_hot, 1) label_dict = dict(zip(labels, one_hot)) y_ = None if 'Category' in df.columns: select.append('Category') y_ = df[select[1]].apply(lambda x: label_dict[x]).tolist() not_select = list(set(df.columns) - set(select)) df = df.drop(not_select, axis=1) return test_examples, y_, df
def load_test_data(test_file, labels): df = pd.read_csv(test_file, sep=',', encoding="utf8", error_bad_lines=False) select = ['Descript'] df = df.dropna(axis=0, how='any', subset=select) test_examples = df[select[0]].apply( lambda x: data_helper.clean_str(x).split(' ')).tolist() # logging.info(test_examples) num_labels = len(labels) one_hot = np.zeros((num_labels, num_labels), int) np.fill_diagonal(one_hot, 1) label_dict = dict(zip(labels, one_hot)) y_ = None if 'Category' in df.columns: select.append('Category') y_ = df[select[1]].apply(lambda x: label_dict[x]).tolist() not_select = list(set(df.columns) - set(select)) df = df.drop(not_select, axis=1) return test_examples, y_, df
def load_test_data(sentence = None): if not sentence: sentence = raw_input("input >>> ") if sentence == 'exit': return -1 test_examples = [data_helper.clean_str(sentence).split(' ')] return test_examples
def load_test_data(test_file, labels): df=pd.read_csv(test_file, encoding='utf-8') select=['WYSText'] df=df.dropna(axis=0, how='any', subset=select) test_examples=df[select[0]].apply(lambda x:data_helper.clean_str(x).split(' ')).tolist() num_labels=len(labels) one_hot=np.zeros((num_labels, num_labels), int) np.fill_diagonal(one_hot, 1) label_dict=dict(zip(labels, one_hot)) y_=None if 'category' in df.columns: select.append('category') # y_=df[select[1]].apply(lambda x:label_dict[x]).tolist() y_=[] labels=label_dict.keys() for i in range(len(df)): if df.iloc[i,0] in labels: y_.append(label_dict[df.iloc[i, 0]].tolist()) else: y_.append(label_dict['<UNK>'].tolist()) not_select=list(set(df.columns)-set(select)) df=df.drop(not_select, axis=1) return test_examples, y_, df
def load_file(hotel_name, filepath): review = data_helper.clean_str(filepath[0]) ratings = product_info.parse_rating(filepath[1]) num_prod_mentions = [] mention_count = product_info.count_product_mentions(hotel_name, review) num_prod_mentions.append(mention_count) return review, num_prod_mentions, ratings
def load_test_data(test_file, labels): df = pd.read_csv(test_file) df.loc[df.severity == "crit", "severity"] = 1 df.loc[df.severity == "err", "severity"] = 1 df.loc[df.severity != 1, "severity"] = 0 # print(df) select = ['message'] df = df.dropna(axis=0, how='any', subset=select) test_examples = df[select[0]].apply( lambda x: data_helper.clean_str(x).split(' ')).tolist() num_labels = len(labels) one_hot = np.zeros((num_labels, num_labels), int) np.fill_diagonal(one_hot, 1) label_dict = dict(zip(labels, one_hot)) y_ = None if 'severity' in df.columns: select.append('severity') y_ = df[select[1]].apply(lambda x: label_dict[x]).tolist() not_select = list(set(df.columns) - set(select)) df = df.drop(not_select, axis=1) return test_examples, y_, df
def load_test_data(test_file, labels): df = pd.read_csv(test_file, sep='|') select = ['Descript'] #x变成用,隔开的数据 df = df.dropna(axis=0, how='any', subset=select) test_examples = df[select[0]].apply( lambda x: data_helper.clean_str(x).split(' ')).tolist() print("test example:", test_examples) #把所有的label变成onehot存储下来 num_labels = len(labels) one_hot = np.zeros((num_labels, num_labels), int) np.fill_diagonal(one_hot, 1) label_dict = dict(zip(labels, one_hot)) #print("label_dict",label_dict) #找到测试案例的label对应的onehot y_ = None if 'Category' in df.columns: select.append('Category') y_ = df[select[1]].apply(lambda x: label_dict[x]).tolist() #print("y_",y_) #去掉重复的内容 not_select = list(set(df.columns) - set(select)) print('not_select', not_select) df = df.drop(not_select, axis=1) print("df:", df) return test_examples, y_, df
def load_file(filename): hotel_name = product_info.parse_product_name(re.sub(r"_", " ", filename[2]).lower()) ratings = product_info.parse_rating(filename[1]) hotel_data = data_helper.clean_str(filename[0]) num_prod_mentions = [] mention_count = product_info.count_product_mentions(hotel_name, hotel_data) num_prod_mentions.append(mention_count) return hotel_name, hotel_data, ratings, num_prod_mentions
def predict_new_data(): """Step 0: load trained model and parameters""" params = json.loads(open('./parameters.json').read()) checkpoint_dir = sys.argv[1] if not checkpoint_dir.endswith('/'): checkpoint_dir += '/' checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir + 'checkpoints') logging.critical('Loaded the trained model: {}'.format(checkpoint_file)) """Step 1: load data for prediction""" test_file = sys.argv[2] test_examples = json.loads(open(test_file).read()) # labels.json was saved during training, and it has to be loaded during prediction labels = json.loads(open('./labels.json').read()) one_hot = np.zeros((len(labels), len(labels)), int) np.fill_diagonal(one_hot, 1) label_dict = dict(zip(labels, one_hot)) x_raw = [example['consumer_complaint_narrative'] for example in test_examples] x_test = [data_helper.clean_str(x) for x in x_raw] logging.info('The number of x_test: {}'.format(len(x_test))) y_test = None if 'product' in test_examples[0]: y_raw = [example['product'] for example in test_examples] y_test = [label_dict[y] for y in y_raw] logging.info('The number of y_test: {}'.format(len(y_test))) vocab_path = os.path.join(checkpoint_dir, "vocab.pickle") vocab_processor = learn.preprocessing.VocabularyProcessor.restore(vocab_path) x_test = np.array(list(vocab_processor.transform(x_test))) """Step 2: compute the predictions""" graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess = tf.Session(config=session_conf) with sess.as_default(): saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) input_x = graph.get_operation_by_name("input_x").outputs[0] dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] predictions = graph.get_operation_by_name("output/predictions").outputs[0] batches = data_helper.batch_iter(list(x_test), params['batch_size'], 1, shuffle=False) all_predictions = [] for x_test_batch in batches: batch_predictions = sess.run(predictions, {input_x: x_test_batch, dropout_keep_prob: 1.0}) all_predictions = np.concatenate([all_predictions, batch_predictions]) if y_test is not None: y_test = np.argmax(y_test, axis=1) correct_predictions = sum(all_predictions == y_test) logging.critical('The accuracy is: {}'.format(correct_predictions / float(len(y_test))))
def input_valid_data(input_file): df = pd.read_excel(input_file) contents = df['web_content'].values valid_data = [] for content in contents: content = data_helper.clean_str(find_chinese(content)) content = data_helper.seperate_line(content) valid_data.append(content) return valid_data
def load_test_data(test_file, labels): df = pd.read_csv(test_file, sep='|') select = ['Descript'] df = df.dropna(axis=0, how='any', subset=select) test_examples = df[select[0]].apply( lambda x: data_helper.clean_str(x).split(' ')).tolist() num_labels = len(labels) ont_hot = np.zeros((num_labels, num_labels), int) np.fill_diagonals(one_hot, 1) label_dict = dict(zip(labels, ont_hot))
def predict_unseen_data(userMessage): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #logging.getLogger().setLevel(logging.INFO) """Step 0: load trained model and parameters""" mainDir = "C:/Users/sid/Desktop/Darwin/DarwinBot/process_message/intentClassifier/" params = json.loads(open(mainDir+'parameters.json').read()) #checkpoint_dir = mainDir+"trained_model_1519274258/" checkpoint_dir = mainDir+"trained_model_1522994422" if not checkpoint_dir.endswith('/'): checkpoint_dir += '/' checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir + 'checkpoints') #logging.critical('Loaded the trained model: {}'.format(checkpoint_file)) """Step 1: load data for prediction""" # labels.json was saved during training, and it has to be loaded during prediction labels = json.loads(open(mainDir+'labels.json').read()) one_hot = np.zeros((len(labels), len(labels)), int) np.fill_diagonal(one_hot, 1) label_dict = dict(zip(labels, one_hot)) x_raw = userMessage x_test = [data_helper.clean_str(x_raw)] #logging.info('The number of x_test: {}'.format(len(x_test))) vocab_path = os.path.join(checkpoint_dir, "vocab.pickle") vocab_processor = learn.preprocessing.VocabularyProcessor.restore(vocab_path) x_test = np.array(list(vocab_processor.transform(x_test))) """Step 2: compute the predictions""" graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess = tf.Session(config=session_conf) with sess.as_default(): saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) input_x = graph.get_operation_by_name("input_x").outputs[0] dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] predictions = graph.get_operation_by_name("output/predictions").outputs[0] batches = data_helper.batch_iter(list(x_test), params['batch_size'], 1, shuffle=False) all_predictions = [] for x_test_batch in batches: batch_predictions = sess.run(predictions, {input_x: x_test_batch, dropout_keep_prob: 1.0}) all_predictions = np.concatenate([all_predictions, batch_predictions]) all_predictions = all_predictions.tolist() return labels[int(all_predictions[0])]
def load_test_data(test_file, column): df = pd.read_csv(test_file, sep=',') select = ['content'] df = df.dropna(axis=0, how='any', subset=select) test_examples = df[select[0]].apply( lambda x: data_helper.clean_str(x).split(' ')).tolist() y_ = None if column in df.columns: select.append(column) not_select = list(set(df.columns) - set(select)) df = df.drop(not_select, axis=1) return test_examples, y_, df
def load_test_data(test_file, labels, column): df = pd.read_csv(test_file, sep=',') select = ['content'] df = df.dropna(axis=0, how='any', subset=select) test_examples = df[select[0]].apply( lambda x: data_helper.clean_str(x).split(' ')).tolist() num_labels = len(labels) one_hot = np.zeros((num_labels, num_labels), int) np.fill_diagonal(one_hot, 1) label_dict = dict(zip(labels, one_hot)) y_ = None if column in df.columns: select.append(column) y_ = df[select[1]].apply(lambda x: label_dict[x]).tolist() not_select = list(set(df.columns) - set(select)) df = df.drop(not_select, axis=1) return test_examples, y_, df
def load_test_data(test_file, labels): df = pd.read_csv(test_file, sep="|") select = ["Descript"] df = df.dropna(axis=0, how="any", subset=select) test_examples = (df[select[0]].apply( lambda x: data_helper.clean_str(x).split(" ")).tolist()) num_labels = len(labels) one_hot = np.zeros((num_labels, num_labels), int) np.fill_diagonal(one_hot, 1) label_dict = dict(zip(labels, one_hot)) y_ = None if "Category" in df.columns: select.append("Category") y_ = df[select[1]].apply(lambda x: label_dict[x]).tolist() not_select = list(set(df.columns) - set(select)) df = df.drop(not_select, axis=1) return test_examples, y_, df
def load_test_data(test_file, labels): # df = pd.read_csv(test_file, sep='|') # select = ['Descript'] #x变成用,隔开的数据 test_examples_test=[] # df = df.dropna(axis=0, how='any', subset=select) test_str=sys.argv[1] #test_examples = df[select[0]].apply(lambda x: data_helper.clean_str(x).split(' ')).tolist() clean_str = list(data_helper.clean_str(test_str).split(' ')) test_examples_test.append(clean_str) # print("test example:",test_examples) # print("test example type:",type(test_examples)) # print('test example test:',test_examples_test) # print('test example test type:', type(test_examples_test)) test_examples=test_examples_test #把所有的label变成onehot存储下来 num_labels = len(labels) one_hot = np.zeros((num_labels, num_labels), int) np.fill_diagonal(one_hot, 1) label_dict = dict(zip(labels, one_hot)) #print("label_dict",label_dict) #找到测试案例的label对应的onehot y_ = None # if 'Category' in df.columns: # select.append('Category') # y_ = df[select[1]].apply(lambda x: label_dict[x]).tolist() #print("y_",y_) #去掉重复的内容 # not_select = list(set(df.columns) - set(select)) # print('not_select',not_select) # df = df.drop(not_select, axis=1) # print("df:",df) df=pd.DataFrame() return test_examples, y_, df
def load_test_data(test_file, labels): df = pd.read_csv(test_file) select = ['Descript'] df = df.dropna(axis=0, how='any', subset=select) test_examples = df[select[0]].apply( lambda x: data_helper.clean_str(x).split(' ')).tolist() vocabulary, vocabulary_inv = data_helper.build_vocab(test_examples) x = np.array([[vocabulary[word] for word in sentence] for sentence in test_examples]) num_labels = len(labels) one_hot = np.zeros((num_labels, num_labels), int) np.fill_diagonal(one_hot, 1) label_dict = dict(zip(labels, one_hot)) y_ = None if 'Category' in df.columns: select.append('Category') y_ = df[select[1]].apply(lambda x: label_dict[x]).tolist() not_select = list(set(df.columns) - set(select)) df = df.drop(not_select, axis=1) return x, y_, df
df_sports = pd.read_json("Sports_and_Outdoors_5.json", lines=True) #Concat the dataframes df = pd.concat([df_clothing, df_sports]) # delete this later df = df[:1000] #Get the text from the dataframe text = df["reviewText"].values #Reduce memory del df #clean strings text = [data_helper.clean_str(sent) for sent in text] ''' if max sent length = 4 Vocab processor: Our text Our new data ["wow what great stuff", "good stuff"] --> [[1,2,3], [4,5]] ''' #set max sentence length max_sent_length = 100 #make vocab processor
def read_Surveycsv(filepath): """ author: Pengfei date: 07/06/2017 """ df = pd.read_csv(filepath, usecols=['Q3_1', 'Q3_2', 'Q3_3', 'Q3_4', 'Q3_5', 'Q3_6', 'Q3_7', 'Q3_8', 'Q3_9', 'Q3_10', 'Q4', 'Q5']) #print(df.describe()) q4 = df['Q4'].astype('category') #print("\nCount summary for each question:") #print(q4.value_counts()) q5 = df['Q5'] idx_bool_Noimprovement = q4 == "No improvement require" idx_Noimprovement = [i for i, bool in enumerate(idx_bool_Noimprovement) if bool] q5_Noimprovement = q5[q4 == "No improvement require"].tolist() corpus_Noimprovement = [data_helper.clean_str(sent) for sent in q5_Noimprovement] idx_bool_Charges = q4 == "The charges you paid for the last service" idx_Charges = [i for i, bool in enumerate(idx_bool_Charges) if bool] q5_Charges = q5[q4 == "The charges you paid for the last service"].tolist() corpus_Charges = [data_helper.clean_str(sent) for sent in q5_Charges] idx_bool_Location = q4 == "The dealership location" idx_Location = [i for i, bool in enumerate(idx_bool_Location) if bool] q5_Location = q5[q4 == "The dealership location"].tolist() corpus_Location = [data_helper.clean_str(sent) for sent in q5_Location] idx_bool_VehicleCondition = q4 == "The condition and cleanliness of vehicle when you received your vehicle after servicing" idx_VehicleCondition = [i for i, bool in enumerate(idx_bool_VehicleCondition) if bool] q5_VehicleCondition = q5[ q4 == "The condition and cleanliness of vehicle when you received your vehicle after servicing"].tolist() corpus_VehicleCondition = [data_helper.clean_str(sent) for sent in q5_VehicleCondition] idx_bool_TimeTaken = q4 == "The total time taken to complete the servicing of your vehicle" idx_TimeTaken = [i for i, bool in enumerate(idx_bool_TimeTaken) if bool] q5_TimeTaken = q5[q4 == "The total time taken to complete the servicing of your vehicle"].tolist() corpus_TimeTaken = [data_helper.clean_str(sent) for sent in q5_TimeTaken] idx_bool_OpeningTime = q4 == "Dealership opening/closing days and time" idx_OpeningTime = [i for i, bool in enumerate(idx_bool_OpeningTime) if bool] q5_OpeningTime = q5[q4 == "Dealership opening/closing days and time"].tolist() corpus_OpeningTime = [data_helper.clean_str(sent) for sent in q5_OpeningTime] idx_bool_WaitingArea = q4 == "The waiting area (e.g., comfort, cleanness, facilities)" idx_WaitingArea = [i for i, bool in enumerate(idx_bool_WaitingArea) if bool] q5_WaitingArea = q5[q4 == "The waiting area (e.g., comfort, cleanness, facilities)"].tolist() corpus_WaitingArea = [data_helper.clean_str(sent) for sent in q5_WaitingArea] idx_bool_Quality = q4 == "Quality of the work performed on your vehicle (e.g., fixing of issues during this servicing visit)" idx_Quality = [i for i, bool in enumerate(idx_bool_Quality) if bool] q5_Quality = q5[ q4 == "Quality of the work performed on your vehicle (e.g., fixing of issues during this servicing visit)"].tolist() corpus_Quality = [data_helper.clean_str(sent) for sent in q5_Quality] idx_bool_FollowUp = q4 == "The follow-up calls made by the dealership post servicing of your vehicle to check your service experience and car condition" idx_FollowUp = [i for i, bool in enumerate(idx_bool_FollowUp) if bool] q5_FollowUp = q5[ q4 == "The follow-up calls made by the dealership post servicing of your vehicle to check your service experience and car condition"].tolist() corpus_FollowUp = [data_helper.clean_str(sent) for sent in q5_FollowUp] idx_bool_Explanations = q4 == "The explanations given by dealership staff during your service visit (e.g., helpful/detailed)" idx_Explanations = [i for i, bool in enumerate(idx_bool_Explanations) if bool] q5_Explanations = q5[ q4 == "The explanations given by dealership staff during your service visit (e.g., helpful/detailed)"].tolist() corpus_Explanations = [data_helper.clean_str(sent) for sent in q5_Explanations] idx_bool_Appointment = q4 == "Arranging service appointment/visits to the dealership" idx_Appointment = [i for i, bool in enumerate(idx_bool_Appointment) if bool] q5_Appointment = q5[q4 == "Arranging service appointment/visits to the dealership"].tolist() corpus_Appointment = [data_helper.clean_str(sent) for sent in q5_Appointment] return [[corpus_Noimprovement, idx_Noimprovement], [corpus_Appointment, idx_Appointment], [corpus_OpeningTime, idx_OpeningTime], [corpus_Explanations,idx_Explanations], [corpus_VehicleCondition, idx_VehicleCondition], [corpus_Quality, idx_Quality], [corpus_Location, idx_Location], [corpus_WaitingArea, idx_WaitingArea], [corpus_TimeTaken, idx_TimeTaken], [corpus_Charges, idx_Charges], [corpus_FollowUp, idx_FollowUp]]
data_frame_sports = pd.read_json("reviews_Sports_and_Outdoors_5.json", lines=True) data_frame = pd.concat([data_frame_clothing, data_frame_sports]) # Reduce size temporarily while iterating data_frame = data_frame[:1000] # Retreive text from the data frame review_text = data_frame["reviewText"].values # Tidy del data_frame # clean strings review_text = [data_helper.clean_str(x) for x in review_text] # Assign unique word ids to each word in a corpus max_sentence_length = 100 vocab_processor = tflearn.data_utils.VocabularyProcessor(max_sentence_length) word_ids = list(vocab_processor.fit_transform(review_text)) # Total number of unique words vocabulary_size = len(vocab_processor.vocabulary_) # Convert np arrays to lists to support popping of 0 values word_ids = [i.tolist() for i in word_ids] # Remove trailing 0s for sentences not 100 words long # TIL: I tried benchmarking np.trim_zeros here to avoid the type conversion,
def demo_cnn_rnn(demo_model): # load training parameters params, words_index, labels, embedding_mat=load_trained_params('data_path_save/cnn_rnn_'+demo_model+'/trained_results/') with tf.Graph().as_default(): session_conf=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess=tf.Session(config=session_conf) with sess.as_default(): cnn_rnn=TextCNNRNN(embedding_mat=embedding_mat, non_static=params['non_static'], hidden_unit=params['hidden_unit'], sequence_length=params['sequence_length'], max_pool_size=params['max_pool_size'], filter_sizes=map(int, params['filter_sizes'].split(",")), num_filters=params['num_filters'], num_classes=len(labels),embedding_size=params['embedding_dim'],l2_reg_lambda=params['l2_reg_lambda']) def real_len(batches): return [np.ceil(np.argmin(batch+[0])*1.0/params['max_pool_size']) for batch in batches] def predict_step(x_batch): feed_dict={ cnn_rnn.input_x: x_batch, cnn_rnn.dropout_keep_prob: 1.0, cnn_rnn.batch_size: len(x_batch), cnn_rnn.pad: np.zeros([len(x_batch), 1, params['embedding_dim'], 1]), cnn_rnn.real_len: real_len(x_batch) } predictions=sess.run([cnn_rnn.predictions], feed_dict=feed_dict) return predictions checkpoint_file=tf.train.latest_checkpoint('data_path_save/cnn_rnn_'+demo_model+'/checkpoints/') saver=tf.train.Saver(tf.all_variables()) saver=tf.train.import_meta_graph('{}.meta'.format(checkpoint_file)) saver.restore(sess, checkpoint_file) logging.critical('{} has been loaded'.format(checkpoint_file)) while(1): print('Please input your sentence:') input_sentence = input() if input_sentence == '' or input_sentence.isspace(): print('See you next time!') break else: x_=data_helper.clean_str(input_sentence).split(' ') # Prediction: cut off the sentence if it is longer than the sequence length sequence_length=params['sequence_length'] num_padding=sequence_length-len(x_) padded_sentence=[] if num_padding<0: logging.info('This sentence has to be cut off because it is longer than trained sequence length') padded_sentence=x_[0: sequence_length] else: padded_sentence=x_+['<PAD/>']*num_padding # Get word index temp=[] for word in padded_sentence: if word in words_index: temp.append(words_index[word]) else: temp.append(0) temp=np.asarray(temp) x_test=np.expand_dims(temp, axis=0) prediction=predict_step(x_test)[0][0] predicted_label=labels[prediction] print('\n疾病类别: '+predicted_label+'\n')
def predict_unseen_data(): x_test = [] #y_test = [] """Step 0: load trained model and parameters""" params = json.loads(open('./parameters.json').read()) checkpoint_dir = sys.argv[1] if not checkpoint_dir.endswith('/'): checkpoint_dir += '/' checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir + 'checkpoints') logging.critical('Loaded the trained model: {}'.format(checkpoint_file)) """Step 1: load data for prediction""" test_file = sys.argv[2] #test_examples = json.loads(open(test_file).read()) test_examples = pd.read_csv(test_file, sep='\t') # labels.json was saved during training, and it has to be loaded during prediction labels = json.loads(open('./labels.json').read()) one_hot = np.zeros((len(labels), len(labels)), int) np.fill_diagonal(one_hot, 1) label_dict = dict(zip(labels, one_hot)) #x_raw = [example['consumer_complaint_narrative'] for example in test_examples] for x in range(test_examples.consumer_complaint_narrative.shape[0]): x_raw = test_examples.consumer_complaint_narrative[x] #y_raw = test_examples.tag[x] #y_test.append(y_raw) temp_str = data_helper.clean_str(x_raw) x_test.append(temp_str) #x_test = [data_helper.clean_str(x) for x in x_raw] logging.info('The number of x_test: {}'.format(len(x_test))) #y_test = None #if 'product' in test_examples[0]: #y_raw = [example['product'] for example in test_examples] #y_test = [label_dict[y] for y in y_raw] #logging.info('The number of y_test: {}'.format(len(y_test))) vocab_path = os.path.join(checkpoint_dir, "vocab.pickle") vocab_processor = learn.preprocessing.VocabularyProcessor.restore( vocab_path) x_test = np.array(list(vocab_processor.transform(x_test))) """Step 2: compute the predictions""" graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess = tf.Session(config=session_conf) with sess.as_default(): saver = tf.train.import_meta_graph( "{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) input_x = graph.get_operation_by_name("input_x").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] predictions = graph.get_operation_by_name( "output/predictions").outputs[0] batches = data_helper.batch_iter(list(x_test), params['batch_size'], 1, shuffle=False) all_predictions = [] nf = open("tags.txt", 'a+') for x_test_batch in batches: batch_predictions = sess.run(predictions, { input_x: x_test_batch, dropout_keep_prob: 1.0 }) #print (batch_predictions) for x in batch_predictions: tot = str(x) + "\n" nf.write(tot) all_predictions = np.concatenate( [all_predictions, batch_predictions]) #print(all_predictions) #for x in all_predictions: #tot = str(x) + "\n" #nf.write(tot) nf.close()
def predict_unseen_data(): """Step 0: load trained model and parameters""" params = json.loads(open(root_path + 'parameters.json').read()) checkpoint_dir = root_path + "trained_model\\" # if not checkpoint_dir.endswith('/'): # checkpoint_dir += '/' checkpoint_file = tf.train.latest_checkpoint( "E:\工作文档\bs\textcnn-multi-class\textcnn-multi-class\trained_model_1558186060\checkpoints" ) logging.critical('Loaded the trained model: {}'.format(checkpoint_file)) """Step 1: load data for prediction""" # 自定义参数 text = sys.argv[2] text = processData(text) # test_df = pd.read_csv(sys.argv[1]) # test_examples = json.loads(open(test_file).read()) # labels.json was saved during training, and it has to be loaded during prediction labels = json.loads(open(root_path + 'labels.json').read()) one_hot = np.zeros((len(labels), len(labels)), int) np.fill_diagonal(one_hot, 1) label_dict = dict(zip(labels, one_hot)) # x_text = data_helper.clean_str(text) x_raw = [sys.argv[1]] x_test = [data_helper.clean_str(x) for x in x_raw] logging.info('The number of x_test: {}'.format(len(x_test))) # print(s_text) y_test = None # y_raw = test_df['category'] # y_test = [label_dict[y] for y in y_raw] # logging.info('The number of y_test: {}'.format(len(y_test))) vocab_path = os.path.join(checkpoint_dir, "vocab.pickle") vocab_processor = learn.preprocessing.VocabularyProcessor.restore( vocab_path) x_test = np.array(list(vocab_processor.transform(x_test))) """Step 2: compute the predictions""" graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess = tf.Session(config=session_conf) with sess.as_default(): saver = tf.train.import_meta_graph( "{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) input_x = graph.get_operation_by_name("input_x").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] predictions = graph.get_operation_by_name( "output/predictions").outputs[0] batches = data_helper.batch_iter(list(x_test), params['batch_size'], 1, shuffle=False) all_predictions = [] for x_test_batch in batches: batch_predictions = sess.run(predictions, { input_x: x_test_batch, dropout_keep_prob: 1 }) all_predictions = np.concatenate( [all_predictions, batch_predictions]) actual_labels = [labels[int(prediction)] for prediction in all_predictions] print(actual_labels[0])
def predict_unseen_data(): """Step 0: load trained model and parameters""" params = json.loads(open('./parameters.json').read()) checkpoint_dir = "trained_model_1516138925/" if not checkpoint_dir.endswith('/'): checkpoint_dir += '/' checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir + 'checkpoints') #logging.critical('Loaded the trained model: {}'.format(checkpoint_file)) """Step 1: load data for prediction""" data = {} data = sys.argv[1] message = ('[{"content": "%s"}]') % (data) test_examples = json.loads(message) # labels.json was saved during training, and it has to be loaded during prediction labels = json.loads(open('./labels.json').read()) one_hot = np.zeros((len(labels), len(labels)), int) np.fill_diagonal(one_hot, 1) label_dict = dict(zip(labels, one_hot)) x_raw = [example['content'] for example in test_examples] x_test = [data_helper.clean_str(x) for x in x_raw] logging.info('The number of x_test: {}'.format(len(x_test))) y_test = None if 'label' in test_examples[0]: y_raw = [example['label'] for example in test_examples] y_test = [label_dict[y] for y in y_raw] logging.info('The number of y_test: {}'.format(len(y_test))) vocab_path = os.path.join(checkpoint_dir, "vocab.pickle") vocab_processor = learn.preprocessing.VocabularyProcessor.restore( vocab_path) x_test = np.array(list(vocab_processor.transform(x_test))) """Step 2: compute the predictions""" graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess = tf.Session(config=session_conf) with sess.as_default(): saver = tf.train.import_meta_graph( "{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) input_x = graph.get_operation_by_name("input_x").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] predictions = graph.get_operation_by_name( "output/predictions").outputs[0] batches = data_helper.batch_iter(list(x_test), params['batch_size'], 1, shuffle=False) all_predictions = [] for x_test_batch in batches: batch_predictions = sess.run(predictions, { input_x: x_test_batch, dropout_keep_prob: 1.0 }) all_predictions = np.concatenate( [all_predictions, batch_predictions]) print("Predictions:") looper = 0 length = len(all_predictions) location_dataset = "data/Dataset.csv" while looper < length: label = get_label(all_predictions[looper]) #Change," Updated the rule file" print(label) print(data) dataset = "\n%s,\"%s\" " % (label, data) f = open(location_dataset, "a") f.write(dataset) f.close looper += 1 if y_test is not None: y_test = np.argmax(y_test, axis=1) correct_predictions = sum(all_predictions == y_test) logging.critical('The accuracy is: {}'.format(correct_predictions / float(len(y_test)))) logging.critical('The prediction is complete')
def predict_unseen_data(): """Step 0: load trained model and parameters""" params = json.loads( open('./parameters.json').read()) #load parameter dari parameters.json checkpoint_dir = sys.argv[1] #path/alamat ke checkpoint (pada arg1) if not checkpoint_dir.endswith('/'): #tambah / ke alamat checkpoint checkpoint_dir += '/' checkpoint_file = tf.train.latest_checkpoint( checkpoint_dir + 'checkpoints') #train checkpoint terakhir logging.critical( 'Loaded the trained model: {}'.format(checkpoint_file)) #log """Step 1: load data for prediction""" test_file = sys.argv[2] #lokasi test_file (pada arg2) test_examples = json.loads( open(test_file).read()) #load contoh test dari direktori test_file # labels.json was saved during training, and it has to be loaded during prediction labels = json.loads( open('./labels.json').read()) #label yang ditentukan dalam bentuk json one_hot = np.zeros((len(labels), len(labels)), int) #buat array isi nol dari panjang labels np.fill_diagonal(one_hot, 1) #isi diagonal dari array one_hot dengan angka 1 label_dict = dict(zip(labels, one_hot)) x_raw = [ example['consumer_complaint_narrative'] for example in test_examples ] x_test = [data_helper.clean_str(x) for x in x_raw] logging.info('The number of x_test: {}'.format(len(x_test))) y_test = None if 'product' in test_examples[0]: y_raw = [example['product'] for example in test_examples] y_test = [label_dict[y] for y in y_raw] logging.info('The number of y_test: {}'.format(len(y_test))) vocab_path = os.path.join(checkpoint_dir, "vocab.pickle") vocab_processor = learn.preprocessing.VocabularyProcessor.restore( vocab_path) x_test = np.array(list(vocab_processor.transform(x_test))) """Step 2: compute the predictions""" graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess = tf.Session(config=session_conf) with sess.as_default(): saver = tf.train.import_meta_graph( "{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) input_x = graph.get_operation_by_name("input_x").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] predictions = graph.get_operation_by_name( "output/predictions").outputs[0] batches = data_helper.batch_iter(list(x_test), params['batch_size'], 1, shuffle=False) all_predictions = [] for x_test_batch in batches: batch_predictions = sess.run(predictions, { input_x: x_test_batch, dropout_keep_prob: 1.0 }) all_predictions = np.concatenate( [all_predictions, batch_predictions]) if y_test is not None: y_test = np.argmax(y_test, axis=1) correct_predictions = sum(all_predictions == y_test) # Save the actual labels back to file actual_labels = [ labels[int(prediction)] for prediction in all_predictions ] for idx, example in enumerate(test_examples): example['new_prediction'] = actual_labels[idx] with open('./data/small_samples_prediction.json', 'w') as outfile: json.dump(test_examples, outfile, indent=4) logging.critical('The accuracy is: {}'.format(correct_predictions / float(len(y_test)))) logging.critical('The prediction is complete')
def predict_unseen_data(): """Step 0: load trained model and parameters""" params = { "num_epochs": 25, "batch_size": 30, "num_filters": 128, "filter_sizes": "3,4,5", "embedding_dim": 300, "l2_reg_lambda": 0.15, "evaluate_every": 200, "dropout_keep_prob": 0.5 } curr_checkpoint = 'trained_model_1589201852' checkpoint_dir = file_dir + '/' + curr_checkpoint + '/' if not checkpoint_dir.endswith('/'): checkpoint_dir += '/' checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir + 'checkpoints') logging.critical('Loaded the trained model: {}'.format(checkpoint_file)) """Step 1: load data for prediction""" test_file = file_dir + '/data/small_samples.json' test_examples = json.loads(open(test_file).read()) # labels.json was saved during training, and it has to be loaded during prediction labels = json.loads(open(file_dir+'/labels.json').read()) one_hot = np.zeros((len(labels), len(labels)), int) np.fill_diagonal(one_hot, 1) label_dict = dict(zip(labels, one_hot)) x_raw = [example['comments'] for example in test_examples] x_test = [data_helper.clean_str(x) for x in x_raw] logging.info('The number of x_test: {}'.format(len(x_test))) y_test = None if 'category' in test_examples[0]: y_raw = [example['category'] for example in test_examples] y_test = [label_dict[y] for y in y_raw] logging.info('The number of y_test: {}'.format(len(y_test))) vocab_path = os.path.join(checkpoint_dir, "vocab.pickle") vocab_processor = learn.preprocessing.VocabularyProcessor.restore(vocab_path) x_test = np.array(list(vocab_processor.transform(x_test))) """Step 2: compute the predictions""" graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess = tf.Session(config=session_conf) with sess.as_default(): saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) input_x = graph.get_operation_by_name("input_x").outputs[0] dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] predictions = graph.get_operation_by_name("output/predictions").outputs[0] batches = data_helper.batch_iter(list(x_test), params['batch_size'], 1, shuffle=False) all_predictions = [] for x_test_batch in batches: batch_predictions = sess.run(predictions, {input_x: x_test_batch, dropout_keep_prob: 1.0}) all_predictions = np.concatenate([all_predictions, batch_predictions]) if y_test is not None: y_test = np.argmax(y_test, axis=1) correct_predictions = sum(all_predictions == y_test) # Save the actual labels back to file actual_labels = [labels[int(prediction)] for prediction in all_predictions] for idx, example in enumerate(test_examples): example['new_prediction'] = actual_labels[idx] with open(file_dir+'/data/small_samples_prediction.json', 'w') as outfile: json.dump(test_examples, outfile, indent=4) logging.critical('The accuracy is: {}'.format(correct_predictions / float(len(y_test)))) logging.critical('The prediction is complete')
def predict_unseen_data(): """Step 0: load trained model and parameters""" # print ("loading") params = json.loads(open('./parameters.json').read()) checkpoint_dir = sys.argv[1] if not checkpoint_dir.endswith('/'): checkpoint_dir += '/' checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir + 'checkpoints') # logging.critical('Loaded the trained model: {}'.format(checkpoint_file)) """Step 1: load data for prediction""" test_file = sys.argv[2] # test_examples = json.loads(open(test_file).read()) df = pd.read_csv(test_file, sep="|", encoding="unicode_escape") df.columns = ['label', 'text'] # labels.json was saved during training, and it has to be loaded during prediction labels = json.loads(open('./labels.json').read()) one_hot = np.zeros((len(labels), len(labels)), int) np.fill_diagonal(one_hot, 1) label_dict = dict(zip(labels, one_hot)) x_raw = df['text'].apply(lambda x: str(x)).tolist() x_test = [data_helper.clean_str(x) for x in x_raw] # logging.info('The number of x_test: {}'.format(len(x_test))) y_test = None # if 'label' in test_examples[0]: # y_raw = [example['label'] for example in df] y_raw = df['label'].tolist() # y_raw = df['label'].apply(lambda y: y)]).tolist() y_test = [label_dict[str(y)] for y in y_raw] # logging.info('The number of y_test: {}'.format(len(y_test))) vocab_path = os.path.join(checkpoint_dir, "vocab.pickle") vocab_processor = learn.preprocessing.VocabularyProcessor.restore( vocab_path) x_test = np.array(list(vocab_processor.transform(x_test))) # print ("computing") """Step 2: compute the predictions""" graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess = tf.Session(config=session_conf) with sess.as_default(): saver = tf.train.import_meta_graph( "{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) input_x = graph.get_operation_by_name("input_x").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] predictions = graph.get_operation_by_name( "output/predictions").outputs[0] batches = data_helper.batch_iter(list(x_test), params['batch_size'], 1, shuffle=False) all_predictions = [] for x_test_batch in batches: batch_predictions = sess.run(predictions, { input_x: x_test_batch, dropout_keep_prob: 1.0 }) all_predictions = np.concatenate( [all_predictions, batch_predictions]) # print ("done computing") if y_test is not None: y_test = np.argmax(y_test, axis=1) correct_predictions = sum(all_predictions == y_test) # Save the actual labels back to file actual_labels = [ labels[int(prediction)] for prediction in all_predictions ] # print (all_predictions) labels = [int(pred) for pred in all_predictions] # for idx, example in enumerate(df): # example['new_prediction'] = actual_labels[idx] # # with open('./data/small_samples_prediction.json', 'w') as outfile: # json.dump(df, outfile, indent=4) # logging.critical('The accuracy is: {}'.format(correct_predictions / float(len(y_test)))) # logging.critical('The prediction is complete') print("Accuracy: ", accuracy_score(y_raw, labels)) print(classification_report(y_raw, labels))
def nps_prediction(): print("Sentiment being processed") publish_date=[] first_name=[] last_name=[] activity=[] channel_code=[] customer_id=[] response_date=[] rating=[] why=[] what=[] system_date=[] sentiment_why=[] sentiment_what = [] sentiment=[] non_digital_why=[] non_digital_what=[] non_digital=[] id_val=[] s = solr.Solr('http://localhost:8983/solr/' + table_name) url_call = 'http://localhost:8983/solr/' + table_name + '/select?indent=on&q=*:*&wt=python' conn = urlopen(url_call) resp = eval(conn.read()) resp_rows = str(resp['response']['numFound']) print(resp_rows) #How many record url_string = url_call + '&rows=' + resp_rows + '&start=0' print(url_string) conn = urlopen(url_string) resp = eval(conn.read()) for doc1 in resp['response']['docs']: if 'why' in doc1.keys(): why.append(doc1['why'][0]) else: why.append('no comment') if 'what' in doc1.keys(): what.append(doc1['what'][0]) else: what.append('no comment') if 'last_name' in doc1.keys(): last_name.append(doc1['last_name'][0]) else: last_name.append('Business User') publish_date.append(doc1['publish_date'][0]) first_name.append(doc1['first_name'][0]) activity.append(doc1['activity'][0]) channel_code.append(doc1['channel_code'][0]) customer_id.append(doc1['customer_id'][0]) response_date.append(doc1['response_date'][0]) rating.append(doc1['rating'][0]) system_date.append(doc1['system_date'][0]) #Initial Flags sentiment.append('no') non_digital.append('no') id_val.append(doc1['id_val'][0]) for prediction in ['sentiment','digital']: if prediction == 'sentiment': params = json.loads(open('./parameters_sentiment.json').read()) checkpoint_dir = "C:\\Code_Sketch\\NPS\\S3134076\\PycharmProjects\\nps\\trained_model_1509637563" labels = json.loads(open('./labels_sentiment.json').read()) vocab_path = os.path.join(checkpoint_dir, "vocab_sentiment.pickle") elif prediction == 'digital': params = json.loads(open('./parameters_digital.json').read()) checkpoint_dir = "C:\\Code_Sketch\\NPS\\S3134076\\PycharmProjects\\nps\\trained_model_1508867627" labels = json.loads(open('./labels_digital.json').read()) vocab_path = os.path.join(checkpoint_dir, "vocab_digital.pickle") logging.getLogger().setLevel(logging.INFO) # if not checkpoint_dir.endswith('/'): # checkpoint_dir += '/' checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir + '\\checkpoints') logging.critical('Loaded the trained model: {}'.format(checkpoint_file)) one_hot = np.zeros((len(labels), len(labels)), int) np.fill_diagonal(one_hot, 1) vocab_processor = learn.preprocessing.VocabularyProcessor.restore(vocab_path) x_test_why = [data_helper.clean_str(x) for x in why] x_test_why = np.array(list(vocab_processor.transform(x_test_why))) x_text_what=[data_helper.clean_str(x) for x in what] x_test_what = np.array(list(vocab_processor.transform(x_text_what))) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess = tf.Session(config=session_conf) with sess.as_default(): saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) input_x_why = graph.get_operation_by_name("input_x").outputs[0] dropout_keep_prob_why = graph.get_operation_by_name("dropout_keep_prob").outputs[0] predictions_why = graph.get_operation_by_name("output/predictions").outputs[0] input_x_what = graph.get_operation_by_name("input_x").outputs[0] dropout_keep_prob_what = graph.get_operation_by_name("dropout_keep_prob").outputs[0] predictions_what = graph.get_operation_by_name("output/predictions").outputs[0] batches_why = data_helper.batch_iter(list(x_test_why), params['batch_size'], 1, shuffle=False) batches_what= data_helper.batch_iter(list(x_test_what), params['batch_size'], 1, shuffle=False) all_predictions_why = [] all_predictions_what = [] for x_test_batch in batches_why: batch_predictions_why = sess.run(predictions_why, {input_x_why: x_test_batch, dropout_keep_prob_why: 1.0}) all_predictions_why = np.concatenate([all_predictions_why, batch_predictions_why]) # Giving Prediction batch by batch for x_test_batch in batches_what: batch_predictions_what = sess.run(predictions_what, {input_x_what: x_test_batch, dropout_keep_prob_what: 1.0}) all_predictions_what = np.concatenate([all_predictions_what, batch_predictions_what]) for predict in all_predictions_why: #print(labels[int(predict)]) if prediction == 'sentiment': if labels[int(predict)] == 'positive': sentiment_why.append("positive") else: sentiment_why.append("negative") elif prediction == 'digital': if labels[int(predict)] == 'non_digital': non_digital_why.append("non_digital") else: non_digital_why.append("digital") for predict in all_predictions_what: if prediction == 'sentiment': if labels[int(predict)] == 'positive': sentiment_what.append("positive") else: sentiment_what.append("negative") elif prediction == 'digital': if labels[int(predict)] == 'non_digital': non_digital_what.append("non_digital") else: non_digital_what.append("digital") current_directory = os.getcwd() os.chdir(solr_path) os.system('solr delete -c ' + table_name) os.system('solr create -c ' + table_name) os.chdir(current_directory) doc=[]
def main(_): print(time.ctime()) host, port = FLAGS.server.split(':') channel = implementations.insecure_channel(host, int(port)) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) #Send request params = json.loads(open('./parameters.json').read()) checkpoint_dir = './trained_model_1531212474/' if not checkpoint_dir.endswith('/'): checkpoint_dir += '/' """Step 1: load data for prediction""" test_file = './data/data.csv' # test_examples = json.loads(open(test_file).read()) test_examples = pd.read_csv(test_file, dtype={'data': object}) df = pd.read_csv(test_file, dtype={'data': object}) # selected = ['text','title','style','structural','tag'] selected = ['data', 'tag'] non_selected = list(set(df.columns) - set(selected)) df = df.drop(non_selected, axis=1) # Drop non selected columns df = df.dropna(axis=0, how='any', subset=selected) # Drop null rows df = df.reindex(np.random.permutation(df.index)) # Shuffle the dataframe # Map the actual labels to one hot labels labels = sorted(list(set(df[selected[1]].tolist()))) one_hot = np.zeros((len(labels), len(labels)), int) np.fill_diagonal(one_hot, 1) label_dict = dict(zip(labels, one_hot)) vocab_path = os.path.join(checkpoint_dir, "vocab.pickle") vocab_processor = learn.preprocessing.VocabularyProcessor.restore( vocab_path) x_raw = df[selected[0]].apply(lambda x: clean_str(x)).tolist() x_test = [data_helper.clean_str(x) for x in x_raw] x_test = np.array(list(vocab_processor.transform(x_test))) logging.info('The number of x_test: {}'.format(len(x_test))) y_test = None y_raw = df[selected[1]].apply(lambda y: label_dict[y]).tolist() y_test = np.array(y_raw) logging.info('The number of y_test: {}'.format(len(y_test))) batches = data_helper.batch_iter(list(zip(x_test, y_test)), params['batch_size'], 1, shuffle=False) all_predictions = [] start = time.time() for test_batch in batches: x_test_batch, y_test_batch = zip(*test_batch) x_test_batch = np.array(x_test_batch) x_test_batch = numpy_array_int32(x_test_batch) y_test_batch = np.array(y_test_batch) y_test_batch = numpy_array_float(y_test_batch) print(type(y_test_batch)) request = predict_pb2.PredictRequest() request.model_spec.name = 'saved_model' request.model_spec.version.value = 1 request.model_spec.signature_name = 'predict_label' request.inputs['input_x'].CopyFrom( tf.contrib.util.make_tensor_proto(x_test_batch)) request.inputs['input_y'].CopyFrom( tf.contrib.util.make_tensor_proto(y_test_batch)) print("lalalala") request.inputs['dropout_keep_prob'].CopyFrom( tf.contrib.util.make_tensor_proto(1.0)) print("hahahahaha") result = stub.Predict(request, 10.0) # 10 secs timeout print(result.outputs['accuracy/accuracy']) batch_predictions = (result.outputs['output/predictions'].int64_val) all_predictions = np.concatenate([all_predictions, batch_predictions]) print(batch_predictions) print(result) print("batch_predictions", batch_predictions) print("all_predictions", all_predictions) print(time.time() - start) if y_test is not None: y_test = np.argmax(y_test, axis=1) correct_predictions = sum(all_predictions == y_test) print(correct_predictions) # Save the actual labels back to file for idx, example in enumerate(test_examples): print(idx, '\t', example) # example['tag'] = actual_labels[idx] with open('./data/small_samples_prediction.json', 'w') as outfile: json.dump(test_examples.to_json(), outfile, indent=4) logging.critical('The accuracy is: {}'.format(correct_predictions / float(len(y_test)))) logging.critical('The prediction is complete')
def predict_unseen_data(): """Step 0: load trained model and parameters""" params = json.loads(open('./parameters.json').read()) checkpoint_dir = sys.argv[1] if not checkpoint_dir.endswith('/'): checkpoint_dir += '/' checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir + 'checkpoints') logging.critical('Loaded the trained model: {}'.format(checkpoint_file)) """Step 1: load data for prediction""" test_file = sys.argv[2] test_examples = json.loads(open(test_file).read()) # labels.json was saved during training, and it has to be loaded during prediction labels = json.loads(open('./labels.json').read()) one_hot = np.zeros((len(labels), len(labels)), int) np.fill_diagonal(one_hot, 1) label_dict = dict(zip(labels, one_hot)) x_raw = [example['consumer_complaint_narrative'] for example in test_examples] x_test = [data_helper.clean_str(x) for x in x_raw] logging.info('The number of x_test: {}'.format(len(x_test))) y_test = None if 'product' in test_examples[0]: y_raw = [example['product'] for example in test_examples] y_test = [label_dict[y] for y in y_raw] logging.info('The number of y_test: {}'.format(len(y_test))) vocab_path = os.path.join(checkpoint_dir, "vocab.pickle") vocab_processor = learn.preprocessing.VocabularyProcessor.restore(vocab_path) x_test = np.array(list(vocab_processor.transform(x_test))) """Step 2: compute the predictions""" graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess = tf.Session(config=session_conf) with sess.as_default(): saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) input_x = graph.get_operation_by_name("input_x").outputs[0] dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] predictions = graph.get_operation_by_name("output/predictions").outputs[0] batches = data_helper.batch_iter(list(x_test), params['batch_size'], 1, shuffle=False) all_predictions = [] for x_test_batch in batches: batch_predictions = sess.run(predictions, {input_x: x_test_batch, dropout_keep_prob: 1.0}) all_predictions = np.concatenate([all_predictions, batch_predictions]) if y_test is not None: y_test = np.argmax(y_test, axis=1) correct_predictions = sum(all_predictions == y_test) # Save the actual labels back to file actual_labels = [labels[int(prediction)] for prediction in all_predictions] for idx, example in enumerate(test_examples): example['new_prediction'] = actual_labels[idx] with open('./data/small_samples_prediction.json', 'w') as outfile: json.dump(test_examples, outfile, indent=4) logging.critical('The accuracy is: {}'.format(correct_predictions / float(len(y_test)))) logging.critical('The prediction is complete')
def console_predict(): """Step 0: load trained model and parameters""" params = json.loads(open('./parameters.json').read()) checkpoint_dir = sys.argv[1] checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir + 'checkpoints') logging.critical('Loaded the trained model: {}'.format(checkpoint_file)) # labels.json was saved during training, and it has to be loaded during prediction labels = json.loads(open('./labels.json').read()) one_hot = np.zeros((len(labels), len(labels)), int) np.fill_diagonal(one_hot, 1) label_dict = dict(zip(labels, one_hot)) ###For Console input #text = input("Input : ") ###For C# inter-processing #text = sys.argv[2] ###For URL request text = flask.request.args['query'] logging.info('ENCODING : {}'.format(flask.request.args['query'])) li = [text] x_raw = li x_test = [data_helper.clean_str(x) for x in x_raw] logging.info('The number of x_test: {}'.format(len(x_test))) y_test = None vocab_path = os.path.join(checkpoint_dir, "vocab.pickle") vocab_processor = learn.preprocessing.VocabularyProcessor.restore( vocab_path) x_test = np.array(list(vocab_processor.transform(x_test))) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess = tf.Session(config=session_conf) with sess.as_default(): saver = tf.train.import_meta_graph( "{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) input_x = graph.get_operation_by_name("input_x").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] predictions = graph.get_operation_by_name( "output/predictions").outputs[0] batches = data_helper.batch_iter(list(x_test), params['batch_size'], 1, shuffle=False) all_predictions = [] for x_test_batch in batches: batch_predictions = sess.run(predictions, { input_x: x_test_batch, dropout_keep_prob: 1.0 }) all_predictions = np.concatenate( [all_predictions, batch_predictions]) # Save the actual labels back to file actual_labels = [labels[int(prediction)] for prediction in all_predictions] logging.info('RESULT : {}'.format(actual_labels)) print(actual_labels[0]) result = {'result': actual_labels[0]} return flask.jsonify(result)