예제 #1
0
def get_sentences(number, class_id, annotations="intersected"):
    """Returns number sentences which have the relation type class_id.

    Useful if you need to get an overview over sentences with a certain temporal relation.

    """
    data = parse_XML("fables-100-temporal-dependency.xml", "McIntyreLapata09Resources/fables")

    i=0
    go_to_next_textfile = False

    for txt in data.textfiles:
        go_to_next_textfile = False

        if annotations == "union":
            txt.compute_union_relations()
        elif annotations == "intersected":
            txt.compute_intersection_relations()

        for rel in txt.relations:
            f = Feature(rel)

            if f.get_class() == class_id and go_to_next_textfile == False:
                # Stop if number relations are reached
                if i >= number:
                    break
                i += 1

                if rel.target.sentence == rel.source.sentence:
                    print "---------------"
                    print "Source event: " +rel.source.content
                    print "Target event: " +rel.target.content
                    print rel.target.sentence
                    print
                    print "Source Surrounding: " + rel.source.surrounding
                    print "Target Surrounding: " + rel.target.surrounding
                else:
                    print "---------------"
                    print "Source event: " +rel.source.content
                    print "Whole sentence " +rel.source.sentence
                    print "Surrounding" + rel.source.surrounding
                    print
                    print "Target event: " +rel.target.content
                    print "Whole sentence: " + rel.target.sentence
                    print "Surrounding: " + rel.target.surrounding

                tense_source = f.get_tense_source()
                tense_target = f.get_tense_target()
                if tense_source == 0:
                    print "Estimated tense for source event: None"
                elif tense_source == 1:
                    print "Estimated tense for source event: Present"
                elif tense_source == 2:
                    print "Estimated tense for source event: Past"
                elif tense_source == 3:
                    print "Estimated tense for source event: Future"

                if tense_target == 0:
                    print "Estimated tense for target event: None"
                elif tense_target == 1:
                    print "Estimated tense for target event: Present"
                elif tense_target == 2:
                    print "Estimated tense for target event: Past"
                elif tense_target == 3:
                    print "Estimated tense for target event: Future"

                aspect_source = f.get_aspect_source()
                aspect_target = f.get_aspect_target()
                if aspect_source == 0:
                    print "Estimated aspect for source event: None"
                elif aspect_source == 1:
                    print "Estimated aspect for source event: Progressive"
                elif aspect_source == 2:
                    print "Estimated aspect for source event: Perfect"
                elif aspect_source == 3:
                    print "Estimated aspect for source event: Perfect Progressive"

                if aspect_target == 0:
                    print "Estimated aspect for target event: None"
                elif aspect_target == 1:
                    print "Estimated aspect for target event: Progressive"
                elif aspect_target == 2:
                    print "Estimated aspect for target event: Perfect"
                elif aspect_target == 3:
                    print "Estimated aspect for target event: Perfect Progressive"

                print "Distance between events: " + str(f.get_distance())


                print "---------------"
                print

                # Get next sentence from the next text
                go_to_next_textfile = True
def parse_Features(data, new=False, annotations="union", features=["pos", "stem", "aspect", "tense", "distance", "similarity", "polarity", "modality"], distance=False):
    """Extracts the features out of the dataset and returns a list of features with the corresponding classes.

    Args:
        data (list): The parsed data from fables-100-temporal-dependency.xml.
        new (bool): With new=True a new calculation of Pos() and Stem() can be enforced. Otherwise it will be loaded from a file.
        annotations (str): Looking on all relations ("union") or at all relations in common between the annotators ("intersected").
        features (list): Determines which features should be activated. Possible values: "pos", "stem", "aspect", "tense", "distance", "similarity", "polarity", "modality".
        distance (bool): If set to True parse_Features() will return distance information for the data (needed for evaluation)

    """
    # Only compute pos and stem if new flag is set
    if "pos" in features or "stem" in features:
        if new or not os.path.isfile("set.p"):
                pos = Pos(data, 6, annotations)
                stem = Stem(data, annotations)
                pickle.dump((pos, stem), open("save.p", "wb"))
        else:
            pos, stem = pickle.load(open("save.p", "rb"))

    if distance:
        distance_diff = []

    X = []
    y = np.array([], dtype=int)

    for txt in data.textfiles:
        # Union or intersected relations?
        if annotations == "union":
            txt.compute_union_relations()
        elif annotations == "intersected":
            txt.compute_intersection_relations()

        for rel in txt.relations:
            f = Feature(rel)

            feature = []

            # Make polarity feature
            if "polarity" in features:
                feature = np.concatenate((feature, [f.get_polarity()]))

            # Make distance feature
            if "distance" in features:
                feature = np.concatenate((feature, f.get_distance()))

            # Make POS feature
            if "pos" in features:
                pos_feature = pos.transform(f.get_pos_target(), f.get_pos_source())
                pos_feature = pos_feature.toarray()[0]
                feature = np.concatenate((feature, pos_feature))

            # Make Stem feature
            if "stem" in features:
                stem_feature = stem.transform(f.get_stem_source(), f.get_stem_target())
                stem_feature = stem_feature[0]
                feature = np.concatenate((feature, stem_feature))

            # Make similarity feature
            if "similarity" in features:
                feature = np.concatenate((feature, [f.get_similarity_of_words()]))

            # Make modality feature
            if "modality" in features:
                feature = np.concatenate((feature, [f.get_modality()]))

            # Make aspect feature
            if "aspect" in features:
                feature = np.concatenate((feature, f.get_aspect()))

            # Make tense feature
            if "tense" in features:
                feature = np.concatenate((feature, f.get_tense()))

            # Append feature to X
            X.append(feature)
            y = np.append(y, [f.get_class()])

            # Append distance information if needed
            if distance:
                distance_diff.append(f.get_distance_diff())

    if distance:
        return (X, y, distance_diff)
    else:
        return (X, y)
def get_sentences(number, class_id, annotations="intersected"):
    """Returns number sentences which have the relation type class_id.

    Useful if you need to get an overview over sentences with a certain temporal relation.

    """
    data = parse_XML("fables-100-temporal-dependency.xml",
                     "McIntyreLapata09Resources/fables")

    i = 0
    go_to_next_textfile = False

    for txt in data.textfiles:
        go_to_next_textfile = False

        if annotations == "union":
            txt.compute_union_relations()
        elif annotations == "intersected":
            txt.compute_intersection_relations()

        for rel in txt.relations:
            f = Feature(rel)

            if f.get_class() == class_id and go_to_next_textfile == False:
                # Stop if number relations are reached
                if i >= number:
                    break
                i += 1

                if rel.target.sentence == rel.source.sentence:
                    print "---------------"
                    print "Source event: " + rel.source.content
                    print "Target event: " + rel.target.content
                    print rel.target.sentence
                    print
                    print "Source Surrounding: " + rel.source.surrounding
                    print "Target Surrounding: " + rel.target.surrounding
                else:
                    print "---------------"
                    print "Source event: " + rel.source.content
                    print "Whole sentence " + rel.source.sentence
                    print "Surrounding" + rel.source.surrounding
                    print
                    print "Target event: " + rel.target.content
                    print "Whole sentence: " + rel.target.sentence
                    print "Surrounding: " + rel.target.surrounding

                tense_source = f.get_tense_source()
                tense_target = f.get_tense_target()
                if tense_source == 0:
                    print "Estimated tense for source event: None"
                elif tense_source == 1:
                    print "Estimated tense for source event: Present"
                elif tense_source == 2:
                    print "Estimated tense for source event: Past"
                elif tense_source == 3:
                    print "Estimated tense for source event: Future"

                if tense_target == 0:
                    print "Estimated tense for target event: None"
                elif tense_target == 1:
                    print "Estimated tense for target event: Present"
                elif tense_target == 2:
                    print "Estimated tense for target event: Past"
                elif tense_target == 3:
                    print "Estimated tense for target event: Future"

                aspect_source = f.get_aspect_source()
                aspect_target = f.get_aspect_target()
                if aspect_source == 0:
                    print "Estimated aspect for source event: None"
                elif aspect_source == 1:
                    print "Estimated aspect for source event: Progressive"
                elif aspect_source == 2:
                    print "Estimated aspect for source event: Perfect"
                elif aspect_source == 3:
                    print "Estimated aspect for source event: Perfect Progressive"

                if aspect_target == 0:
                    print "Estimated aspect for target event: None"
                elif aspect_target == 1:
                    print "Estimated aspect for target event: Progressive"
                elif aspect_target == 2:
                    print "Estimated aspect for target event: Perfect"
                elif aspect_target == 3:
                    print "Estimated aspect for target event: Perfect Progressive"

                print "Distance between events: " + str(f.get_distance())

                print "---------------"
                print

                # Get next sentence from the next text
                go_to_next_textfile = True