def gather_sentiment_data_set(): """ labelled_data.to_csv('all_labelled_data_before_processing.csv', encoding="utf-8", index=False) unlabelled_data.to_csv('all_unlabelled_data_before_processing.csv', encoding="utf-8", index=False) :return: labelled_data, unlabelled_data """ mc.move_to_main_location() path_to_train_pos = os.path.abspath(os.path.join(os.path.curdir, "SentimentDataSet", "train", "pos")) path_to_train_neg = os.path.abspath(os.path.join(os.path.curdir, "SentimentDataSet", "train", "neg")) path_to_test_pos = os.path.abspath(os.path.join(os.path.curdir, "SentimentDataSet", "test", "pos")) path_to_test_neg = os.path.abspath(os.path.join(os.path.curdir, "SentimentDataSet", "test", "neg")) train_pos_df = review2pddataframe(path_to_train_pos, 1) train_neg_df = review2pddataframe(path_to_train_neg, 0) test_pos_df = review2pddataframe(path_to_test_pos, 1) test_neg_df = review2pddataframe(path_to_test_neg, 0) labelled_data = [test_neg_df, train_neg_df, test_pos_df, train_pos_df] labelled_data = pd.concat(labelled_data) labelled_data = labelled_data.sample(frac=1).reset_index(drop=True) path_to_unlabelled_data = os.path.abspath(os.path.join(os.path.curdir, "SentimentDataSet", "train", "unsup")) unlabelled_data = review2pddataframe(path_to_unlabelled_data, -1) unlabelled_data = unlabelled_data.sample(frac=1).reset_index(drop=True) mc.move_to_data_location() labelled_data.to_csv('all_labelled_data_before_processing.csv', encoding="utf-8", index=False) unlabelled_data.to_csv('all_unlabelled_data_before_processing.csv', encoding="utf-8", index=False) return labelled_data, unlabelled_data
def convert_pd_words_reviews_to_np_ids_matrix(reviews_df, maxSeqLength, wordsList, set_name): """ mc.move_to_data_location() np.save(set_name + '_ids_matrix', ids_matrix) np.save(set_name + '_sentiment_labels', sentiment_set) :param reviews_df: :param maxSeqLength: :param wordsList: :param set_name: :return: ids_matrix, sentiment_set """ number_of_rev = len(reviews_df) id_of_unknown = len(wordsList) ids_matrix = np.full((number_of_rev, maxSeqLength), id_of_unknown, dtype='int32') rev_set = reviews_df['review'].as_matrix() sentiment_set = reviews_df['sentiment'].as_matrix() rev_counter = 0 word_counter = 0 for i in tnrange(len(rev_set), desc='processing review'): for word in rev_set[i]: try: ids_matrix[rev_counter][word_counter] = wordsList.index(word) except ValueError: ids_matrix[rev_counter][word_counter] = id_of_unknown word_counter += 1 rev_counter += 1 word_counter = 0 mc.move_to_data_location() np.save(set_name + '_ids_matrix', ids_matrix) np.save(set_name + '_sentiment_labels', sentiment_set) mc.move_to_main_location() return ids_matrix, sentiment_set
# Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram( "{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar( "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) mc.move_to_main_location() # !!! change if changing to another NN type !!! model_name = "_mlp_ls" + str(FLAGS.layers_sizes) + "_lr" + str( FLAGS.learning_rate) + '_drop' + str( FLAGS.dropout_keep_prob) + '_bs' + str( FLAGS.train_batch_size) out_dir = os.path.abspath( os.path.join(os.path.curdir, "Models", "MLP", "runs", timestamp + model_name)) print('Writing to {}\n'.format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", mlp.loss) acc_summary = tf.summary.scalar("accuracy", mlp.accuracy)