def test_it_should_process_entities_with_variants(self): result = parse(""" @[city] paris rouen ~[new york] @[city#variant] one variant another one ~[new york] nyc the big apple """) snips_data = snips(result) expect(snips_data['entities']).to.have.length_of(1) expect(snips_data['entities']).to.have.key('city') entity = snips_data['entities']['city'] expect(entity['data']).to.have.length_of(5) expect(entity['use_synonyms']).to.be.true expect(entity['automatically_extensible']).to.be.true expect(entity['data'][2]['synonyms']).to.have.length_of(2) expect(entity['data'][2]['synonyms']).to.equal( ['nyc', 'the big apple']) data = [d.get('value') for d in entity['data']] expect(data).to.equal( ['paris', 'rouen', 'new york', 'one variant', 'another one'])
def fit_from_skill_data(self, skills=None): """Fit the interpreter with every training data registered in the system. Args: skills (list of str): Optional list of skill names from which we should retrieve training data. """ filtered_module_trainings = get_training_data(self.lang) if skills: filtered_module_trainings = { k: v for (k, v) in filtered_module_trainings.items() if k in skills } self._logger.info('Merging skill training data from "%d" modules' % len(filtered_module_trainings)) data = {} sorted_trainings = sorted(filtered_module_trainings.items(), key=lambda x: x[0]) for (module, training_dsl) in sorted_trainings: if training_dsl: try: data = deep_update(data, parse(training_dsl)) except Exception as e: self._logger.error('Could not parse "%s" training data: "%s"' % (module, e)) else: self._logger.warning('No training data found for "%s"' % module) try: data = getattr(postprocessors, self.name)(data, language=self.lang) except AttributeError: return self._logger.critical('No post-processors found on pychatl for this interpreter!') self.fit(data)
def test_it_should_parse_synonyms(self): result = parse(""" ~[new york](some=prop, something=else) nyc the big apple""") expect(result['synonyms']).to.have.length_of(1) expect(result['synonyms']).to.have.key('new york') synonym = result['synonyms']['new york'] expect(synonym['props']).to.have.length_of(2) expect(synonym['props']).to.have.key('some') expect(synonym['props']).to.have.key('something') expect(synonym['props']['some']).to.equal('prop') expect(synonym['props']['something']).to.equal('else') expect(synonym['data']).to.have.length_of(2) data = synonym['data'] expect(data[0]['type']).to.equal('text') expect(data[0]['value']).to.equal('nyc') expect(data[1]['type']).to.equal('text') expect(data[1]['value']).to.equal('the big apple')
def test_it_should_parse_entities_variants(self): result = parse(""" @[city](some=prop, something=else) paris rouen ~[new york] @[city#variant](var=prop) one variant another one """) expect(result['entities']).to.have.length_of(1) entity = result['entities']['city'] expect(entity['variants']).to.have.length_of(1) expect(entity['variants']).to.have.key('variant') expect(entity['props']).to.have.length_of(3) expect(entity['props']).to.have.key('some') expect(entity['props']).to.have.key('something') expect(entity['props']).to.have.key('var') expect(entity['props']['some']).to.equal('prop') expect(entity['props']['something']).to.equal('else') expect(entity['props']['var']).to.equal('prop') variant = entity['variants']['variant'] expect(variant).to.have.length_of(2) expect(variant[0]['type']).to.equal('text') expect(variant[0]['value']).to.equal('one variant') expect(variant[1]['type']).to.equal('text') expect(variant[1]['value']).to.equal('another one')
def main(): # pragma: no cover parser = argparse.ArgumentParser(description='Generates training dataset from a simple DSL.') parser.add_argument('--version', action='version', version='%(prog)s v' + __version__) parser.add_argument('files', type=str, nargs='+', help='One or more DSL files to process') parser.add_argument('-a', '--adapter', type=str, help='Adapter to use when post-processing outputed data') parser.add_argument('-o', '--options', type=str, help='Raw options to give to the post processor') parser.add_argument('--pretty', action='store_true', help='Pretty output') args = parser.parse_args(sys.argv[1:]) parsed_data = [] for file in args.files: with open(file, encoding='utf-8') as f: parsed_data.append(parse(f.read())) # Merge all processed data data = {} for d in parsed_data: data = deep_update(data, d) if args.options: options = json.loads(args.options) else: options = {} if args.adapter: data = getattr(postprocess, args.adapter)(data, **options) print (json.dumps(data, indent=2 if args.pretty else None))
def test_it_should_parse_entities(self): result = parse(""" @[city](some=prop, something=else) paris rouen ~[new york] """) expect(result['entities']).to.have.length_of(1) expect(result['entities']).to.have.key('city') entity = result['entities']['city'] expect(entity['props']).to.have.length_of(2) expect(entity['props']).to.have.key('some') expect(entity['props']).to.have.key('something') expect(entity['props']['some']).to.equal('prop') expect(entity['props']['something']).to.equal('else') data = entity['data'] expect(data).to.have.length_of(3) expect(data[0]['type']).to.equal('text') expect(data[0]['value']).to.equal('paris') expect(data[1]['type']).to.equal('text') expect(data[1]['value']).to.equal('rouen') expect(data[2]['type']).to.equal('synonym') expect(data[2]['value']).to.equal('new york')
def test_it_should_parse_comments(self): result = parse(""" # chatl is really easy to understand. # # You can defines: # - Intents # - Entities (with or without variants) # - Synonyms # - Comments (only at the top level) # Inside an intent, you got training data. # Training data can refer to one or more entities and/or synonyms, they will be used # by generators to generate all possible permutations and training samples. %[my_intent] ~[greet] some training data @[date] another training data that uses an @[entity] at @[date#with_variant] ~[greet] hi hello # Entities contains available samples and could refer to a synonym. @[entity] some value other value ~[a synonym] # Synonyms contains only raw values ~[a synonym] possible synonym another one # Entities and intents can define arbitrary properties that will be made available # to generators. # For snips, `type` and `extensible` are used for example. @[date](type=snips/datetime) tomorrow today # Variants is used only to generate training sample with specific values that should # maps to the same entity name, here `date`. Props will be merged with the root entity. @[date#with_variant] the end of the day nine o clock twenty past five """) expect(result['intents']).to.have.length_of(1) expect(result['entities']).to.have.length_of(2) expect(result['synonyms']).to.have.length_of(2)
def test_it_should_process_entities(self): result = parse(""" @[city] paris rouen ~[new york] @[room](extensible=false, strictness=0.8) kitchen bedroom @[date](type=datetime) tomorrow on tuesday ~[new york] nyc the big apple """) snips_data = snips(result) expect(snips_data['entities']).to.have.length_of(3) expect(snips_data['entities']).to.have.key('city') entity = snips_data['entities']['city'] expect(entity['use_synonyms']).to.be.true expect(entity['automatically_extensible']).to.be.true expect(entity['matching_strictness']).to.equal(1.0) expect(entity['data']).to.have.length_of(3) expect(entity['data'][0]['value']).to.equal('paris') expect(entity['data'][1]['value']).to.equal('rouen') expect(entity['data'][2]['value']).to.equal('new york') expect(entity['data'][2]['synonyms']).to.have.length_of(2) expect(entity['data'][2]['synonyms']).to.equal( ['nyc', 'the big apple']) expect(snips_data['entities']).to.have.key('room') entity = snips_data['entities']['room'] expect(entity['use_synonyms']).to.be.false expect(entity['automatically_extensible']).to.be.false expect(entity['matching_strictness']).to.equal(0.8) expect(entity['data']).to.have.length_of(2) expect(snips_data['entities']).to_not.have.key('date') expect(snips_data['entities']).to.have.key('snips/datetime') expect(snips_data['entities']['snips/datetime']).to.be.empty
def test_process_options(self): result = parse(""" @[city] paris rouen """) snips_data = snips(result) expect(snips_data.get('language')).to.equal('en') snips_data = snips(result, language='fr') expect(snips_data.get('language')).to.equal('fr')
def test_it_should_allow_obsolete_declaration_of_type_for_now(self): with patch('logging.warning') as mlog: result = parse(""" @[date](snips:type=snips/datetime) tomorrow """) snips_data = snips(result) expect(snips_data['entities']).to.have.key('snips/datetime') expect(snips_data['entities']['snips/datetime']).to.be.empty mlog.assert_called_once_with( 'snips:type has been replaced by type. You should now leave the snips/ prefix away when using it' )
def test_it_should_parse_complex_properties(self): result = parse(""" @[an entity](with complex=property value, and:maybe=an0 ther @) a value """) expect(result['entities']).to.have.length_of(1) expect(result['entities']).to.have.key('an entity') entity = result['entities']['an entity'] expect(entity['props']).to.have.length_of(2) expect(entity['props']).to.have.key('with complex') expect(entity['props']).to.have.key('and:maybe') expect(entity['props']['with complex']).to.equal('property value') expect(entity['props']['and:maybe']).to.equal('an0 ther @')
def test_it_should_parse_empty_entities(self): result = parse(""" @[room] kitchen bedroom @[anotherRoom](type=room) """) expect(result['entities']).to.have.length_of(2) expect(result['entities']).to.have.key('room') expect(result['entities']).to.have.key('anotherRoom') expect(result['entities']['anotherRoom']['props']).to.have.key('type') expect(result['entities']['anotherRoom']['props']['type']).to.equal( 'room') expect(result['entities']['anotherRoom']['data']).to.be.empty
def test_it_should_parse_intents(self): result = parse(""" %[get_forecast](some=prop, something=else) will it rain in @[city] ~[greet] what's the weather like in @[city#variant] """) expect(result['intents']).to.have.length_of(1) expect(result['intents']).to.have.key('get_forecast') intent = result['intents']['get_forecast'] expect(intent['props']).to.have.length_of(2) expect(intent['props']).to.have.key('some') expect(intent['props']).to.have.key('something') expect(intent['props']['some']).to.equal('prop') expect(intent['props']['something']).to.equal('else') expect(intent['data']).to.have.length_of(2) data = intent['data'][0] expect(data).to.have.length_of(2) expect(data[0]['type']).to.equal('text') expect(data[0]['value']).to.equal('will it rain in ') expect(data[1]['type']).to.equal('entity') expect(data[1]['value']).to.equal('city') expect(data[1]['variant']).to.be.none data = intent['data'][1] expect(data).to.have.length_of(3) expect(data[0]['type']).to.equal('synonym') expect(data[0]['value']).to.equal('greet') expect(data[1]['type']).to.equal('text') expect(data[1]['value']).to.equal(" what's the weather like in ") expect(data[2]['type']).to.equal('entity') expect(data[2]['value']).to.equal('city') expect(data[2]['variant']).to.equal('variant')
def main(): # pragma: no cover """Main entry point for the program. """ parser = argparse.ArgumentParser( description='Generates training dataset from a simple DSL.') parser.add_argument('--version', action='version', version='%(prog)s v' + __version__) parser.add_argument('files', type=str, nargs='+', help='One or more DSL files to process') parser.add_argument('-a', '--adapter', type=str, help='Name of the adapter to use') parser.add_argument('-m', '--merge', type=str, help='Options file to merge with the final result') parser.add_argument('--pretty', action='store_true', help='Pretty output') args = parser.parse_args(sys.argv[1:]) data = {} for file in args.files: with open(file, encoding='utf-8') as handle: data = merge(data, parse(handle.read())) if args.merge: options = json.loads(args.merge) else: options = {} if args.adapter: data = getattr(adapters, args.adapter)(data, **options) print(json.dumps(data, indent=2 if args.pretty else None))
def test_it_should_process_empty_entities_which_refer_to_another_one(self): result = parse(""" %[my_intent] we should go from @[room] to @[anotherRoom] @[room] kitchen bedroom @[anotherRoom](type=room) """) snips_data = snips(result) expect(snips_data['entities']).to.have.length_of(1) expect(snips_data['entities']).to.have.key('room') expect(snips_data['intents']).to.have.length_of(1) expect(snips_data['intents']).to.have.key('my_intent') intent = snips_data['intents']['my_intent'] expect(intent).to.have.key('utterances') utterances = intent['utterances'] expect(utterances).to.have.length_of(1) data = utterances[0].get('data') expect(data).to.have.length_of(4) expect(data[0].get('text')).to.equal('we should go from ') expect(data[1].get('slot_name')).to.equal('room') expect(data[1].get('entity')).to.equal('room') expect(data[1].get('text')).to.equal('kitchen') expect(data[2].get('text')).to.equal(' to ') expect(data[3].get('slot_name')).to.equal('anotherRoom') expect(data[3].get('entity')).to.equal('room') expect(data[3].get('text')).to.equal('bedroom')
def fit_from_skill_data(self, skills: List[str] = None) -> None: # pylint: disable=inconsistent-return-statements """Fit the interpreter with every training data registered in the inner TrainingsStore. Args: skills (list of str): Optional list of skill names from which we should retrieve training data. Used to handle context understanding. """ filtered_module_trainings = self._trainings.all(self.lang) if skills: filtered_module_trainings = { k: v for (k, v) in filtered_module_trainings.items() if k in skills} self._logger.info( 'Merging skill training data from "%d" modules', len(filtered_module_trainings)) data = {} sorted_trainings = sorted(filtered_module_trainings.items(), key=lambda x: x[0]) for (module, training_dsl) in sorted_trainings: if training_dsl: try: data = merge(data, parse(training_dsl)) except Exception as err: # pylint: disable=W0703 self._logger.error( 'Could not parse "%s" training data: "%s"', module, err) else: self._logger.warning('No training data found for "%s"', module) try: data = getattr(adapters, self.name)(data, language=self.lang) except AttributeError: return self._logger.critical( 'No post-processors found on pychatl for this interpreter!') self.fit(data)
def it_should_transform_to_snips_dataset(self, it, dsl, options, expected): expect(snips(parse(dsl), **options)).to.equal(expected)
def it_should_correctly_parse_dsl(self, it, dsl, expected): expect(parse(dsl)).to.equal(expected)
def get_data(): data = parse(""" %[lights_on] turn the @[room]'s lights on would you turn lights on in the @[room] lights on in @[room] please turn on the lights in @[room] turn the lights on in @[room] enlight me in @[room] %[lights_off] turn the @[room]'s lights off would you turn lights off in the @[room] lights off in @[room] please lights off in @[room] and @[room] turn off the lights in @[room] turn the lights off in @[room] %[start_vacuum_cleaner] start ~[vacuum_cleaner] would you like start ~[vacuum_cleaner] please %[stop_vacuum_cleaner] stop ~[vacuum_cleaner] would you like stop ~[vacuum_cleaner] please %[get_forecast] will it be sunny in @[location] at @[date#at] what's the weather like in @[location] on @[date#on] will it rain in @[location] @[date] can we expect a sunny day @[date] in @[location] should I take an umbrella in @[location] @[date] what kind of weather should I expect at @[date#at] in @[location] what will be the weather on @[date#on] in @[location] tell me if it is going to rain @[date] in @[location] ~[vacuum_cleaner] vacuum cleaner vacuum vacuuming hoover hoovering aspirator ~[basement] cellar @[room](extensible=false) living room kitchen bedroom ~[basement] @[location] los angeles paris rio de janeiro tokyo london tel aviv new york saint-étienne du rouvray @[date](type=datetime) tomorrow today this evening @[date#at] the end of the day nine o'clock @[date#on] tuesday monday """) return data
def test_it_should_process_intents(self): result = parse(""" %[get_forecast] will it rain in @[city] on @[date] ~[greet] what's the weather like in @[city#variant] @[city] paris rouen @[city#variant] new york los angeles @[date](type=datetime) tomorrow ~[greet] hi hello """) snips_data = snips(result) expect(snips_data['intents']).to.have.length_of(1) expect(snips_data['intents']).to.have.key('get_forecast') intent = snips_data['intents']['get_forecast'] expect(intent).to.have.key('utterances') utterances = intent['utterances'] expect(utterances).to.have.length_of(3) data = utterances[0].get('data') expect(data).to.have.length_of(4) expect(data[0].get('text')).to.equal('will it rain in ') expect(data[1].get('text')).to.equal('paris') expect(data[1].get('slot_name')).to.equal('city') expect(data[1].get('entity')).to.equal('city') expect(data[2].get('text')).to.equal(' on ') expect(data[3].get('text')).to.equal('tomorrow') expect(data[3].get('slot_name')).to.equal('date') expect(data[3].get('entity')).to.equal('snips/datetime') data = utterances[1].get('data') expect(data).to.have.length_of(3) expect(data[0].get('text')).to.equal('hi') expect(data[1].get('text')).to.equal(" what's the weather like in ") expect(data[2].get('text')).to.equal('new york') expect(data[2].get('slot_name')).to.equal('city') expect(data[2].get('entity')).to.equal('city') data = utterances[2].get('data') expect(data).to.have.length_of(3) expect(data[0].get('text')).to.equal('hello') expect(data[1].get('text')).to.equal(" what's the weather like in ") expect(data[2].get('text')).to.equal('los angeles') expect(data[2].get('slot_name')).to.equal('city') expect(data[2].get('entity')).to.equal('city')