Exemple #1
0
    def add_grammatical_relations(self, sentence, parsed_sentence,
                                  relationships, dependencies):

        for dependency in parsed_sentence["dependencies"]:
            # We don't want to make a dependency involving ROOT
            if int(dependency[2]) > 0 and int(dependency[4]) > 0:
                governor = dependency[1]
                dependent = dependency[3]
                governor_index = int(dependency[2]) - 1
                dependent_index = int(dependency[4]) - 1
                governor_pos = parsed_sentence["words"][governor_index][1]\
                    ["PartOfSpeech"]
                try:
                    governor_lemma = parsed_sentence["words"][governor_index][1]\
                        ["Lemma"].lower()
                except AttributeError:
                    # this word wasn't recognized as a word by the parser,
                    # it's probably a weird character or something
                    governor_lemma = "*" * (
                        int(parsed_sentence["words"][governor_index][1]
                            ["CharacterOffsetEnd"]) -
                        int(parsed_sentence["words"][governor_index][1]
                            ["CharacterOffsetBegin"]))
                    governor = governor_lemma[:]
                dependent_pos = parsed_sentence["words"][dependent_index][1]\
                    ["PartOfSpeech"]
                try:
                    dependent_lemma = parsed_sentence["words"][dependent_index][1]\
                        ["Lemma"].lower()
                except AttributeError:
                    # this word wasn't recognized as a word by the parser,
                    # it's probably a weird character or something
                    dependent_lemma = "*" * (
                        int(parsed_sentence["words"][dependent_index][1]
                            ["CharacterOffsetEnd"]) -
                        int(parsed_sentence["words"][dependent_index][1]
                            ["CharacterOffsetBegin"]))
                    dependent = dependent_lemma[:]
                grammatical_relationship = dependency[0]

                # If dictionaries are present, run with duplication handling
                if relationships != None and dependencies != None:
                    key = grammatical_relationship

                    if key in relationships.keys():
                        relationship = relationships[key]
                    else:

                        try:
                            relationship = GrammaticalRelationship.query.\
                                filter_by(name=grammatical_relationship,
                                          project=self.project).one()
                        except MultipleResultsFound:
                            project_logger.error(
                                "duplicate records found "
                                "for: %s", str(key))
                        except NoResultFound:
                            relationship = GrammaticalRelationship(
                                name=grammatical_relationship,
                                project=self.project)

                        relationships[key] = relationship

                    # Read the data for the governor, and find the
                    # corresponding word
                    governor = Word.query.filter_by(
                        lemma=governor_lemma,
                        surface=governor.lower(),
                        part_of_speech=governor_pos).first()

                    # Same as above for the dependent in the relationship
                    dependent = Word.query.filter_by(
                        lemma=dependent_lemma,
                        surface=dependent.lower(),
                        part_of_speech=dependent_pos).first()

                    try:
                        governor.id
                        dependent.id
                    except:
                        project_logger.error(
                            "Governor or dependent not "
                            "found; giving up on parse. This likely indicates "
                            "an error in the preprocessing; rerunning the "
                            "preprocessor is recommended.")
                        project_logger.error("Failed to process : %s",
                                             sentence.text)

                        return  #die

                    key = (relationship.name, governor.id, dependent.id)

                    if key in dependencies.keys():
                        dependency = dependencies[key]
                    else:

                        try:
                            dependency = Dependency.query.filter_by(
                                grammatical_relationship=relationship,
                                governor=governor,
                                dependent=dependent).one()
                        except MultipleResultsFound:
                            project_logger.error(
                                "duplicate records found for: %s", str(key))
                        except NoResultFound:
                            dependency = Dependency(
                                grammatical_relationship=relationship,
                                governor=governor,
                                dependent=dependent)

                        dependencies[key] = dependency

                    # Add the dependency to the sentence
                    sentence.add_dependency(dependency=dependency,
                                            governor_index=governor_index,
                                            dependent_index=dependent_index,
                                            project=self.project,
                                            force=False)

                    dependency.save(False)

                else:
                    # TODO: fill
                    pass

        db.session.commit()