Exemple #1
0
def gather_sentiment_data_set():
	"""
	labelled_data.to_csv('all_labelled_data_before_processing.csv', encoding="utf-8", index=False)
	unlabelled_data.to_csv('all_unlabelled_data_before_processing.csv', encoding="utf-8", index=False)

	:return: labelled_data, unlabelled_data
	"""
	mc.move_to_main_location()
	path_to_train_pos = os.path.abspath(os.path.join(os.path.curdir, "SentimentDataSet", "train", "pos"))
	path_to_train_neg = os.path.abspath(os.path.join(os.path.curdir, "SentimentDataSet", "train", "neg"))
	path_to_test_pos = os.path.abspath(os.path.join(os.path.curdir, "SentimentDataSet", "test", "pos"))
	path_to_test_neg = os.path.abspath(os.path.join(os.path.curdir, "SentimentDataSet", "test", "neg"))

	train_pos_df = review2pddataframe(path_to_train_pos, 1)
	train_neg_df = review2pddataframe(path_to_train_neg, 0)
	test_pos_df = review2pddataframe(path_to_test_pos, 1)
	test_neg_df = review2pddataframe(path_to_test_neg, 0)

	labelled_data = [test_neg_df, train_neg_df, test_pos_df, train_pos_df]
	labelled_data = pd.concat(labelled_data)
	labelled_data = labelled_data.sample(frac=1).reset_index(drop=True)

	path_to_unlabelled_data = os.path.abspath(os.path.join(os.path.curdir, "SentimentDataSet", "train", "unsup"))
	unlabelled_data = review2pddataframe(path_to_unlabelled_data, -1)
	unlabelled_data = unlabelled_data.sample(frac=1).reset_index(drop=True)

	mc.move_to_data_location()
	labelled_data.to_csv('all_labelled_data_before_processing.csv', encoding="utf-8", index=False)
	unlabelled_data.to_csv('all_unlabelled_data_before_processing.csv', encoding="utf-8", index=False)

	return labelled_data, unlabelled_data
Exemple #2
0
def convert_pd_words_reviews_to_np_ids_matrix(reviews_df, maxSeqLength,
                                              wordsList, set_name):
    """
	mc.move_to_data_location()
	np.save(set_name + '_ids_matrix', ids_matrix)
	np.save(set_name + '_sentiment_labels', sentiment_set)

	:param reviews_df:
	:param maxSeqLength:
	:param wordsList:
	:param set_name:
	:return: ids_matrix, sentiment_set
	"""
    number_of_rev = len(reviews_df)
    id_of_unknown = len(wordsList)
    ids_matrix = np.full((number_of_rev, maxSeqLength),
                         id_of_unknown,
                         dtype='int32')
    rev_set = reviews_df['review'].as_matrix()
    sentiment_set = reviews_df['sentiment'].as_matrix()
    rev_counter = 0
    word_counter = 0
    for i in tnrange(len(rev_set), desc='processing review'):
        for word in rev_set[i]:
            try:
                ids_matrix[rev_counter][word_counter] = wordsList.index(word)
            except ValueError:
                ids_matrix[rev_counter][word_counter] = id_of_unknown
            word_counter += 1
        rev_counter += 1
        word_counter = 0
    mc.move_to_data_location()
    np.save(set_name + '_ids_matrix', ids_matrix)
    np.save(set_name + '_sentiment_labels', sentiment_set)
    mc.move_to_main_location()
    return ids_matrix, sentiment_set
Exemple #3
0
            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.summary.histogram(
                        "{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar(
                        "{}/grad/sparsity".format(v.name),
                        tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            mc.move_to_main_location()

            # !!! change if changing to another NN type !!!
            model_name = "_mlp_ls" + str(FLAGS.layers_sizes) + "_lr" + str(
                FLAGS.learning_rate) + '_drop' + str(
                    FLAGS.dropout_keep_prob) + '_bs' + str(
                        FLAGS.train_batch_size)
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "Models", "MLP", "runs",
                             timestamp + model_name))
            print('Writing to {}\n'.format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", mlp.loss)
            acc_summary = tf.summary.scalar("accuracy", mlp.accuracy)