def load2_data_and_labels(): """ Loads MR polarity data from files, splits the data into words and generates labels. Returns split sentences and labels. """ # Load data from files positive_examples = list( open("./rt-polaritydata/rt-polarity.pos", "r").readlines()) positive_examples = [s.strip() for s in positive_examples] negative_examples = list( open("./rt-polaritydata/rt-polarity.neg", "r").readlines()) negative_examples = [s.strip() for s in negative_examples] # Split by words x_text = positive_examples + negative_examples x_text = [clean_str(sent) for sent in x_text] max_document_length = max([len(x.split(" ")) for x in x_text]) x = np.ndarray(shape=(len(x_text), max_document_length, word_embedding_size), dtype=np.float32) for i in range(len(x_text)): x[i] = util.getSentence_matrix(x_text[i], max_document_length) # Generate labels positive_labels = [0 for _ in positive_examples] negative_labels = [1 for _ in negative_examples] y = np.concatenate([positive_labels, negative_labels], 0) return [x, y]
def load_test_data(): semeval_data = SemEval_test_data() Train_Size = len(semeval_data) train_data = numpy.ndarray(shape=(Train_Size,MAX_DOCUMENT_LENGTH,word_embedding_size),dtype=numpy.float32) train_label = numpy.ndarray(shape=(Train_Size,num_classes),dtype=numpy.float32) i = 0 for one in semeval_data: sentence = one[0] train_data[i]=util.getSentence_matrix(sentence,MAX_DOCUMENT_LENGTH) train_label[i]=getLabelVector(one[3],num_class=num_classes) i+=1 return train_data,train_label