def get_sentences(number, class_id, annotations="intersected"): """Returns number sentences which have the relation type class_id. Useful if you need to get an overview over sentences with a certain temporal relation. """ data = parse_XML("fables-100-temporal-dependency.xml", "McIntyreLapata09Resources/fables") i=0 go_to_next_textfile = False for txt in data.textfiles: go_to_next_textfile = False if annotations == "union": txt.compute_union_relations() elif annotations == "intersected": txt.compute_intersection_relations() for rel in txt.relations: f = Feature(rel) if f.get_class() == class_id and go_to_next_textfile == False: # Stop if number relations are reached if i >= number: break i += 1 if rel.target.sentence == rel.source.sentence: print "---------------" print "Source event: " +rel.source.content print "Target event: " +rel.target.content print rel.target.sentence print print "Source Surrounding: " + rel.source.surrounding print "Target Surrounding: " + rel.target.surrounding else: print "---------------" print "Source event: " +rel.source.content print "Whole sentence " +rel.source.sentence print "Surrounding" + rel.source.surrounding print print "Target event: " +rel.target.content print "Whole sentence: " + rel.target.sentence print "Surrounding: " + rel.target.surrounding tense_source = f.get_tense_source() tense_target = f.get_tense_target() if tense_source == 0: print "Estimated tense for source event: None" elif tense_source == 1: print "Estimated tense for source event: Present" elif tense_source == 2: print "Estimated tense for source event: Past" elif tense_source == 3: print "Estimated tense for source event: Future" if tense_target == 0: print "Estimated tense for target event: None" elif tense_target == 1: print "Estimated tense for target event: Present" elif tense_target == 2: print "Estimated tense for target event: Past" elif tense_target == 3: print "Estimated tense for target event: Future" aspect_source = f.get_aspect_source() aspect_target = f.get_aspect_target() if aspect_source == 0: print "Estimated aspect for source event: None" elif aspect_source == 1: print "Estimated aspect for source event: Progressive" elif aspect_source == 2: print "Estimated aspect for source event: Perfect" elif aspect_source == 3: print "Estimated aspect for source event: Perfect Progressive" if aspect_target == 0: print "Estimated aspect for target event: None" elif aspect_target == 1: print "Estimated aspect for target event: Progressive" elif aspect_target == 2: print "Estimated aspect for target event: Perfect" elif aspect_target == 3: print "Estimated aspect for target event: Perfect Progressive" print "Distance between events: " + str(f.get_distance()) print "---------------" print # Get next sentence from the next text go_to_next_textfile = True
def parse_Features(data, new=False, annotations="union", features=["pos", "stem", "aspect", "tense", "distance", "similarity", "polarity", "modality"], distance=False): """Extracts the features out of the dataset and returns a list of features with the corresponding classes. Args: data (list): The parsed data from fables-100-temporal-dependency.xml. new (bool): With new=True a new calculation of Pos() and Stem() can be enforced. Otherwise it will be loaded from a file. annotations (str): Looking on all relations ("union") or at all relations in common between the annotators ("intersected"). features (list): Determines which features should be activated. Possible values: "pos", "stem", "aspect", "tense", "distance", "similarity", "polarity", "modality". distance (bool): If set to True parse_Features() will return distance information for the data (needed for evaluation) """ # Only compute pos and stem if new flag is set if "pos" in features or "stem" in features: if new or not os.path.isfile("set.p"): pos = Pos(data, 6, annotations) stem = Stem(data, annotations) pickle.dump((pos, stem), open("save.p", "wb")) else: pos, stem = pickle.load(open("save.p", "rb")) if distance: distance_diff = [] X = [] y = np.array([], dtype=int) for txt in data.textfiles: # Union or intersected relations? if annotations == "union": txt.compute_union_relations() elif annotations == "intersected": txt.compute_intersection_relations() for rel in txt.relations: f = Feature(rel) feature = [] # Make polarity feature if "polarity" in features: feature = np.concatenate((feature, [f.get_polarity()])) # Make distance feature if "distance" in features: feature = np.concatenate((feature, f.get_distance())) # Make POS feature if "pos" in features: pos_feature = pos.transform(f.get_pos_target(), f.get_pos_source()) pos_feature = pos_feature.toarray()[0] feature = np.concatenate((feature, pos_feature)) # Make Stem feature if "stem" in features: stem_feature = stem.transform(f.get_stem_source(), f.get_stem_target()) stem_feature = stem_feature[0] feature = np.concatenate((feature, stem_feature)) # Make similarity feature if "similarity" in features: feature = np.concatenate((feature, [f.get_similarity_of_words()])) # Make modality feature if "modality" in features: feature = np.concatenate((feature, [f.get_modality()])) # Make aspect feature if "aspect" in features: feature = np.concatenate((feature, f.get_aspect())) # Make tense feature if "tense" in features: feature = np.concatenate((feature, f.get_tense())) # Append feature to X X.append(feature) y = np.append(y, [f.get_class()]) # Append distance information if needed if distance: distance_diff.append(f.get_distance_diff()) if distance: return (X, y, distance_diff) else: return (X, y)
def get_sentences(number, class_id, annotations="intersected"): """Returns number sentences which have the relation type class_id. Useful if you need to get an overview over sentences with a certain temporal relation. """ data = parse_XML("fables-100-temporal-dependency.xml", "McIntyreLapata09Resources/fables") i = 0 go_to_next_textfile = False for txt in data.textfiles: go_to_next_textfile = False if annotations == "union": txt.compute_union_relations() elif annotations == "intersected": txt.compute_intersection_relations() for rel in txt.relations: f = Feature(rel) if f.get_class() == class_id and go_to_next_textfile == False: # Stop if number relations are reached if i >= number: break i += 1 if rel.target.sentence == rel.source.sentence: print "---------------" print "Source event: " + rel.source.content print "Target event: " + rel.target.content print rel.target.sentence print print "Source Surrounding: " + rel.source.surrounding print "Target Surrounding: " + rel.target.surrounding else: print "---------------" print "Source event: " + rel.source.content print "Whole sentence " + rel.source.sentence print "Surrounding" + rel.source.surrounding print print "Target event: " + rel.target.content print "Whole sentence: " + rel.target.sentence print "Surrounding: " + rel.target.surrounding tense_source = f.get_tense_source() tense_target = f.get_tense_target() if tense_source == 0: print "Estimated tense for source event: None" elif tense_source == 1: print "Estimated tense for source event: Present" elif tense_source == 2: print "Estimated tense for source event: Past" elif tense_source == 3: print "Estimated tense for source event: Future" if tense_target == 0: print "Estimated tense for target event: None" elif tense_target == 1: print "Estimated tense for target event: Present" elif tense_target == 2: print "Estimated tense for target event: Past" elif tense_target == 3: print "Estimated tense for target event: Future" aspect_source = f.get_aspect_source() aspect_target = f.get_aspect_target() if aspect_source == 0: print "Estimated aspect for source event: None" elif aspect_source == 1: print "Estimated aspect for source event: Progressive" elif aspect_source == 2: print "Estimated aspect for source event: Perfect" elif aspect_source == 3: print "Estimated aspect for source event: Perfect Progressive" if aspect_target == 0: print "Estimated aspect for target event: None" elif aspect_target == 1: print "Estimated aspect for target event: Progressive" elif aspect_target == 2: print "Estimated aspect for target event: Perfect" elif aspect_target == 3: print "Estimated aspect for target event: Perfect Progressive" print "Distance between events: " + str(f.get_distance()) print "---------------" print # Get next sentence from the next text go_to_next_textfile = True