def add_grammatical_relations(self, sentence, parsed_sentence, relationships, dependencies): for dependency in parsed_sentence["dependencies"]: # We don't want to make a dependency involving ROOT if int(dependency[2]) > 0 and int(dependency[4]) > 0: governor = dependency[1] dependent = dependency[3] governor_index = int(dependency[2]) - 1 dependent_index = int(dependency[4]) - 1 governor_pos = parsed_sentence["words"][governor_index][1]\ ["PartOfSpeech"] try: governor_lemma = parsed_sentence["words"][governor_index][1]\ ["Lemma"].lower() except AttributeError: # this word wasn't recognized as a word by the parser, # it's probably a weird character or something governor_lemma = "*" * ( int(parsed_sentence["words"][governor_index][1] ["CharacterOffsetEnd"]) - int(parsed_sentence["words"][governor_index][1] ["CharacterOffsetBegin"])) governor = governor_lemma[:] dependent_pos = parsed_sentence["words"][dependent_index][1]\ ["PartOfSpeech"] try: dependent_lemma = parsed_sentence["words"][dependent_index][1]\ ["Lemma"].lower() except AttributeError: # this word wasn't recognized as a word by the parser, # it's probably a weird character or something dependent_lemma = "*" * ( int(parsed_sentence["words"][dependent_index][1] ["CharacterOffsetEnd"]) - int(parsed_sentence["words"][dependent_index][1] ["CharacterOffsetBegin"])) dependent = dependent_lemma[:] grammatical_relationship = dependency[0] # If dictionaries are present, run with duplication handling if relationships != None and dependencies != None: key = grammatical_relationship if key in relationships.keys(): relationship = relationships[key] else: try: relationship = GrammaticalRelationship.query.\ filter_by(name=grammatical_relationship, project=self.project).one() except MultipleResultsFound: project_logger.error( "duplicate records found " "for: %s", str(key)) except NoResultFound: relationship = GrammaticalRelationship( name=grammatical_relationship, project=self.project) relationships[key] = relationship # Read the data for the governor, and find the # corresponding word governor = Word.query.filter_by( lemma=governor_lemma, surface=governor.lower(), part_of_speech=governor_pos).first() # Same as above for the dependent in the relationship dependent = Word.query.filter_by( lemma=dependent_lemma, surface=dependent.lower(), part_of_speech=dependent_pos).first() try: governor.id dependent.id except: project_logger.error( "Governor or dependent not " "found; giving up on parse. This likely indicates " "an error in the preprocessing; rerunning the " "preprocessor is recommended.") project_logger.error("Failed to process : %s", sentence.text) return #die key = (relationship.name, governor.id, dependent.id) if key in dependencies.keys(): dependency = dependencies[key] else: try: dependency = Dependency.query.filter_by( grammatical_relationship=relationship, governor=governor, dependent=dependent).one() except MultipleResultsFound: project_logger.error( "duplicate records found for: %s", str(key)) except NoResultFound: dependency = Dependency( grammatical_relationship=relationship, governor=governor, dependent=dependent) dependencies[key] = dependency # Add the dependency to the sentence sentence.add_dependency(dependency=dependency, governor_index=governor_index, dependent_index=dependent_index, project=self.project, force=False) dependency.save(False) else: # TODO: fill pass db.session.commit()