Exemple #1
0
def get_data():
    print("Loading Data...")
    t = time.time()
    data = useful_functions.load_cspubsumext()
    sentences_class = []
    for item in data:
        sentences = item["sentences"]
        features = item["sentence_features"]

        for sentence, feat in zip(sentences, features):
            sent = sentence[0]
            sec = sentence[1]
            y = sentence[2]
            sentences_class.append((sent, feat, y))
    data = sentences_class
    print("Done, took ", time.time() - t, " seconds")

    print("Processing Data...")

    #new_data = [x for x in data if len(x[0]) < MAX_SENT_LEN]
    new_data = []
    for sent, feat, y in data:
        if len(sent) > MAX_SENT_LEN:
            new_sent = sent[0:MAX_SENT_LEN]
        else:
            new_sent = sent
        new_data.append((new_sent, feat, y))

    print("Done")

    return new_data
Exemple #2
0
def get_data():
    """
    Loads the data from the data directory given above and puts it into the form required by the summarisers. In this
    summariser the data we require is: the raw sentences, the abstract and the features.
    :return: The data, but discarding the sentences longer than the maximum length.
    """

    print("Loading Data...")
    t = time.time()

    # The data is a pickled object
    data = useful_functions.load_cspubsumext()

    # Data list
    sents_absvec_feats_class = []

    for item in data:

        sentences = item["sentences"]
        abstract_vec = item["abstract_vec"]
        features = item["sentence_features"]

        for sentence, feat in zip(sentences, features):
            sent = sentence[0]
            sec = sentence[1]
            y = sentence[2]
            sents_absvec_feats_class.append((sent, abstract_vec, feat, y))

    data = sents_absvec_feats_class

    print("Done, took ", time.time() - t, " seconds")

    print("Processing Data...")

    new_data = []
    for sent, abs_vec, feat, y in data:

        new_feat = (feat[1], feat[2], feat[3], feat[4], feat[5], feat[6],
                    feat[7])

        if len(sent) > MAX_SENT_LEN:
            new_sent = sent[0:MAX_SENT_LEN]
        else:
            new_sent = sent
        new_data.append((new_sent, abs_vec, new_feat, y))

    return new_data
Exemple #3
0
def get_data():
    """
    Loads the data from the data directory given above and puts it into the form required by the summarisers. In this
    summariser the data we require is: the raw sentences, the abstract and the features.
    :return: The data, but discarding the sentences longer than the maximum length.
    """

    print("Loading Data...")
    t = time.time()

    # The data is a pickled object
    data = useful_functions.load_cspubsumext()

    # Data list
    sents_absvec_feats_class = []

    for item in data:

        sentences = item["sentences"]
        abstract_vec = item["abstract_vec"]
        features = item["sentence_features"]

        for sentence, feat in zip(sentences, features):
            sent = sentence[0]
            sec = sentence[1]
            y = sentence[2]
            #sents_absvec_feats_class.append((sent, abstract_vec, feat, y))
            sents_absvec_feats_class.append((sent[:MAX_SEN_LEN], abstract_vec, feat, y))

    data = sents_absvec_feats_class

    print("Done, took ", time.time() - t, " seconds")

    #avoiding this step as we are using [:MAX_SENT_LEN] in above step
    #To truncate the input sentences at fixed length
    #print("Processing Data...")
    #new_data = []
    #for sent, abs_vec, feat, y in data:
    #    if len(sent) > MAX_SENT_LEN:
    #        new_sent = sent[0:MAX_SENT_LEN]
    #    else:
    #        new_sent = sent
    #    new_data.append((new_sent, abs_vec, feat, y))

    #return new_data
    return data
Exemple #4
0
def get_data():

    print("Loading Data...")
    t = time.time()

    data = useful_functions.load_cspubsumext()
    sents = []
    labs = []
    for item in data:
        sentences = item["sentences"]
        for sent, sec, y in sentences:
            sents.append(sent)
            labs.append(num2onehot(y, NUM_CLASSES))

    print("Done, took ", time.time() - t, " seconds")

    data = {"sentences": sents, "labels": labs}

    return data
correct_answers = tf.argmax(labels, axis=1)
accuracy = tf.reduce_mean(
    tf.cast(tf.equal(pred_answers, correct_answers), tf.float32))

with tf.Session() as sess:

    # Initialise all variables
    sess.run(tf.global_variables_initializer())

    # Saving object
    saver = tf.train.Saver()

    # Data is 3D - of form [sentence_vector, classification, abstract_vector]. We will ignore the abstract vector here.
    print("Loading Data...")
    t = time.time()
    data = useful_functions.load_cspubsumext()
    sentence_class_abstract = []
    for item in data:
        sentence_vecs = item["sentence_vecs"]
        abstract_vec = item["abstract_vec"]
        for sent, sec, y in sentence_vecs:
            sentence_class_abstract.append((sent, y, abstract_vec))
    data = sentence_class_abstract
    print("Done, took ", time.time() - t, " seconds")

    test_len = int(len(data) * (1 / 3))
    test_data = data[0:test_len]
    train_data = data[test_len:]

    print("Length of Training Data: ", len(train_data))
    print("Length of Testing Data: ", len(test_data))