Exemple #1
0
def load2_data_and_labels():
    """
    Loads MR polarity data from files, splits the data into words and generates labels.
    Returns split sentences and labels.
    """
    # Load data from files
    positive_examples = list(
        open("./rt-polaritydata/rt-polarity.pos", "r").readlines())
    positive_examples = [s.strip() for s in positive_examples]
    negative_examples = list(
        open("./rt-polaritydata/rt-polarity.neg", "r").readlines())
    negative_examples = [s.strip() for s in negative_examples]
    # Split by words
    x_text = positive_examples + negative_examples
    x_text = [clean_str(sent) for sent in x_text]
    max_document_length = max([len(x.split(" ")) for x in x_text])
    x = np.ndarray(shape=(len(x_text), max_document_length,
                          word_embedding_size),
                   dtype=np.float32)
    for i in range(len(x_text)):
        x[i] = util.getSentence_matrix(x_text[i], max_document_length)

    # Generate labels
    positive_labels = [0 for _ in positive_examples]
    negative_labels = [1 for _ in negative_examples]
    y = np.concatenate([positive_labels, negative_labels], 0)
    return [x, y]
Exemple #2
0
def load_test_data():
    semeval_data = SemEval_test_data()
    Train_Size = len(semeval_data)
    train_data = numpy.ndarray(shape=(Train_Size,MAX_DOCUMENT_LENGTH,word_embedding_size),dtype=numpy.float32)
    train_label = numpy.ndarray(shape=(Train_Size,num_classes),dtype=numpy.float32)
    i = 0
    for one in semeval_data:
        sentence = one[0]
        train_data[i]=util.getSentence_matrix(sentence,MAX_DOCUMENT_LENGTH)
        train_label[i]=getLabelVector(one[3],num_class=num_classes)
        i+=1

    return train_data,train_label