def specifier_before_cem_and_value_phrase(self): return ( self.specifier_phrase + OneOrMore(Not(self.cem_phrase | self.specifier_phrase | self.value_phrase) + Any().hide()) + self.cem_phrase + OneOrMore(Not(self.cem_phrase | self.specifier_phrase | self.value_phrase) + Any().hide()) + Optional(self.prefix) + self.value_phrase)('root_phrase')
def specifier_before_cem_and_value_phrase(self): return (self.specifier_phrase + OneOrMore( Not(self.cem_phrase | self.specifier_phrase | self.value_phrase) + Any().hide()) #+ self.connection + Optional(self.article)) + self.cem_phrase + OneOrMore( Not(self.cem_phrase | self.specifier_phrase | I('dielectric') + I('constant') | self.value_phrase | I('not')) + Any().hide()) + Optional(self.prefix) + self.value_phrase)('root_phrase')
def multi_entity_phrase_2(self): """single compound, single specifier, multiple transitions e.g. BiFeO3 shows magnetic transitions at 1093 and 640 K """ return Group(self.single_cem + OneOrMore(Not(self.single_cem | self.specifier_phrase | self.value_phrase) + Any().hide()) + self.specifier_phrase + OneOrMore(Not(self.single_cem | self.specifier_phrase | self.value_phrase) + Any().hide()) + self.list_of_values + Optional(delim).hide() + Optional(I('respectively')))('multi_entity_phrase_2')
def multi_entity_phrase_4a(self): """multiple compounds, single specifier, single transition e.g. TC of 640 K in BifEO3, LaFeO3 and MnO """ return Group(self.single_specifier_and_value + OneOrMore(Not(self.single_cem | self.specifier_phrase | self.value_phrase) + Any().hide()) + self.list_of_cems)('multi_entity_phrase_4')
def multi_entity_phrase_1(self): """Single compound, multiple specifiers, values e.g. BiFeO3 has TC1 = 1093 K and Tc2 = 640 K """ return Group(self.single_cem + OneOrMore(Not(self.single_cem | self.specifier_phrase | self.value_phrase) + Any().hide()) + self.list_of_properties)('multi_entity_phrase_1')
def list_of_values(self): """List of values with either multiple units or one at the end""" # option 1: single unit at the end option_1 = Group(self.value_with_optional_unit + Optional(OneOrMore(delim.hide() + self.value_with_optional_unit)) + Optional(delim).hide() + (I('and') | I('or')).hide() + Optional(delim).hide() + self.value_phrase)('value_list') # option 2: Multiple units option_2 = (self.value_phrase + Optional(OneOrMore(delim.hide() + self.value_phrase)) + Optional(delim).hide() + (I('and') | I('or') | delim).hide() + self.value_phrase)('value_list') return (option_1 | option_2)
def cem_before_specifier_and_value_phrase(self): return (self.cem_phrase + OneOrMore( Not(self.cem_phrase | self.specifier_phrase | self.specifier_and_value) + Any().hide()) #+ self.between_cem_specifier) + self.specifier_phrase + self.specifier_and_value)('root_phrase')
def multi_entity_phrase_3a(self): """multiple compounds, single specifier, multiple transitions cems first e.g. TC in BiFeO3 and LaFeO3 of 640 and 750 K """ return Group(Optional(self.specifier_phrase) + Optional(I('in') | I('for')).hide() + self.list_of_cems + OneOrMore(Not(self.single_cem | self.specifier_phrase | self.value_phrase) + Any().hide()) + self.prefix + self.list_of_values + Optional(delim.hide() + I('respectively').hide()))('multi_entity_phrase_3')
def multi_entity_phrase_4b(self): """Cems first""" return Group(self.list_of_cems + OneOrMore(Not(self.single_cem | self.specifier_phrase | self.value_phrase) + Any().hide()) + self.single_specifier_and_value)('multi_entity_phrase_4')
def cem_after_specifier_and_value_phrase(self): return ( self.specifier_and_value + OneOrMore(Not(self.cem_phrase | self.specifier_phrase | self.value_phrase) + Any().hide()) + self.cem_phrase)('root_phrase')
# Add to the available models Compound.curie_temperatures = ListType(ModelType(CurieTemperature)) # Define a very basic entity tagger specifier = (I('curie') + I('temperature') + Optional(lrb | delim) + Optional(R('^T(C|c)(urie)?')) + Optional(rrb) | R('^T(C|c)(urie)?'))('specifier').add_action(join) units = (R('^[CFK]\.?$'))('units').add_action(merge) value = (R('^\d+(\.\,\d+)?$'))('value') # Let the entities be any combination of chemical names, specifier values and units entities = (chemical_name | specifier | value + units) # Now create a very generic parse phrase that will match any combination of these entities curie_temperature_phrase = (entities + OneOrMore(entities | Any()))('curie_temperature') curie_temp_entities = [chemical_name, specifier, value, units] # Define the relationship and give it a name curie_temp_relationship = ChemicalRelationship(curie_temp_entities, curie_temperature_phrase, name='curie_temperatures') # class TestSnowball(unittest.TestCase): # maxDiff = None # training_corpus = 'tests/data/relex/curie_training/' # snowball_pkl = 'tests/data/relex/curie_temperatures.pkl' # snowball_pkl_py2 = 'tests/data/relex/curie_temperatures_py2.pkl' # def test_load_snowball(self):