def _load_stops_mapping(self): """Load the mapping from ALEX stops format (city + pronounceable name) into IDOS format (IDOS list + IDOS name including abbreviations and whatever rubbish). Also creates a reverse mapping of stop names, so that pronounceable names are returned. Updates the mapping from the server if needed @rtype: tuple @return: Mapping (city, stop) -> (idos_list, idos_stop), mapping (idos_stop) -> (stop) """ # update the mapping file from the server online_update( to_project_path( os.path.join(os.path.dirname(__file__), self.CONVERSION_FNAME))) # load the mapping mapping = {} reverse_mapping = {} with codecs.open(os.path.join(self.file_dir, self.CONVERSION_FNAME), 'r', 'UTF-8') as fh: for line in fh: line = line.strip() city, stop, idos_list, idos_stop = line.split("\t") mapping[(city, stop)] = (idos_list, idos_stop) idos_stop = self._normalize_idos_name(idos_stop) reverse_mapping[idos_stop] = stop return mapping, reverse_mapping
def __init__(self, cases_list, strip_punct, lowercase_forms, personal_names): """Initialize the expander object, initialize the morphological analyzer and generator. @param cases_list: List of cases (given as strings) to be used for generation \ (Czech numbers 1-7 are used) @param strip_punct: Strip all punctuation ? @param lowercase_forms: Lowercase all forms on the output? @param personal_names: Are we inflecting personal names? """ self.stops = defaultdict(list) self.cases_list = cases_list self.personal_names = personal_names # initialize postprocessing postprocess_func = ((lambda text: re.sub(r' ([\.,])', r'\1', text)) if not strip_punct else (lambda text: re.sub(r' [\.,\-–\(\)\{\}\[\];\\\/+&](?: [\.,\-–\(\)\{\}\[\];])*( |$)', r'\1', text))) if lowercase_forms: lc_func = lambda text: postprocess_func(text).lower() self.__postprocess_func = lc_func else: self.__postprocess_func = postprocess_func # initialize morphology analyzer_model = online_update('applications/PublicTransportInfoCS/data/czech.tagger') generator_model = online_update('applications/PublicTransportInfoCS/data/czech.dict') self.__analyzer = Analyzer(analyzer_model) self.__generator = Generator(generator_model)
def __init__(self, ontology): super(PTICSNLGPreprocessing, self).__init__(ontology) # keep track of relative and absolute time slots self.rel_time_slots = set() self.abs_time_slots = set() # keep track of temperature and temperature interval slots self.temp_slots = set() self.temp_int_slots = set() # keep track of translated slots self.translated_slots = set() self.translations = {} # load their lists from the ontology if 'slot_attributes' in self.ontology: for slot in self.ontology['slot_attributes']: if 'relative_time' in self.ontology['slot_attributes'][slot]: self.rel_time_slots.add(slot) elif 'absolute_time' in self.ontology['slot_attributes'][slot]: self.abs_time_slots.add(slot) elif 'temperature' in self.ontology['slot_attributes'][slot]: self.temp_slots.add(slot) elif 'temperature_int' in self.ontology['slot_attributes'][slot]: self.temp_int_slots.add(slot) # load translations from the ontology if 'value_translation' in self.ontology: self.translations = self.ontology['value_translation'] for slot in self.ontology['value_translation']: self.translated_slots.add(slot) analyzer_model = online_update('applications/PublicTransportInfoCS/data/czech.tagger') generator_model = online_update('applications/PublicTransportInfoCS/data/czech.dict') self._analyzer = Analyzer(analyzer_model) self._generator = Generator(generator_model)
def __init__(self, ontology): super(PTICSNLGPreprocessing, self).__init__(ontology) # keep track of relative and absolute time slots self.rel_time_slots = set() self.abs_time_slots = set() # keep track of temperature and temperature interval slots self.temp_slots = set() self.temp_int_slots = set() # keep track of translated slots self.translated_slots = set() self.translations = {} # load their lists from the ontology if 'slot_attributes' in self.ontology: for slot in self.ontology['slot_attributes']: if 'relative_time' in self.ontology['slot_attributes'][slot]: self.rel_time_slots.add(slot) elif 'absolute_time' in self.ontology['slot_attributes'][slot]: self.abs_time_slots.add(slot) elif 'temperature' in self.ontology['slot_attributes'][slot]: self.temp_slots.add(slot) elif 'temperature_int' in self.ontology['slot_attributes'][ slot]: self.temp_int_slots.add(slot) # load translations from the ontology if 'value_translation' in self.ontology: self.translations = self.ontology['value_translation'] for slot in self.ontology['value_translation']: self.translated_slots.add(slot) analyzer_model = online_update( 'applications/PublicTransportInfoCS/data/czech.tagger') generator_model = online_update( 'applications/PublicTransportInfoCS/data/czech.dict') self._analyzer = Analyzer(analyzer_model) self._generator = Generator(generator_model)
def _load_stops_mapping(self): """Load the mapping from ALEX stops format (city + pronounceable name) into IDOS format (IDOS list + IDOS name including abbreviations and whatever rubbish). Also creates a reverse mapping of stop names, so that pronounceable names are returned. Updates the mapping from the server if needed @rtype: tuple @return: Mapping (city, stop) -> (idos_list, idos_stop), mapping (idos_stop) -> (stop) """ # update the mapping file from the server online_update(to_project_path(os.path.join(os.path.dirname(__file__), self.CONVERSION_FNAME))) # load the mapping mapping = {} reverse_mapping = {} with codecs.open(os.path.join(self.file_dir, self.CONVERSION_FNAME), 'r', 'UTF-8') as fh: for line in fh: line = line.strip() city, stop, idos_list, idos_stop = line.split("\t") mapping[(city, stop)] = (idos_list, idos_stop) idos_stop = self._normalize_idos_name(idos_stop) reverse_mapping[idos_stop] = stop return mapping, reverse_mapping
from database import database import codecs import os from alex.utils.config import online_update, to_project_path # tab-separated file containing street + city + lon|lat coordinates + slot_specification STREETS_TYPES_FNAME = 'streets.types.csv' # tab-separated file containing stop + city + lon|lat coordinates GENERAL_STOPS_LOCATIONS_FNAME = 'stops.locations.csv' BOROUGH_STOPS_LOCATIONS_FNAME = 'stops.borough.locations.csv' # tab-separated file containing city + state + lon|lat coordinates CITIES_LOCATIONS_FNAME = 'cities.locations.csv' # load new versions of the data files from the server online_update( to_project_path( os.path.join(os.path.dirname(os.path.abspath(__file__)), STREETS_TYPES_FNAME))) online_update( to_project_path( os.path.join(os.path.dirname(os.path.abspath(__file__)), GENERAL_STOPS_LOCATIONS_FNAME))) online_update( to_project_path( os.path.join(os.path.dirname(os.path.abspath(__file__)), BOROUGH_STOPS_LOCATIONS_FNAME))) online_update( to_project_path( os.path.join(os.path.dirname(os.path.abspath(__file__)), CITIES_LOCATIONS_FNAME))) ontology = {
#!/usr/bin/env python # -*- coding: utf-8 -*- if __name__ == '__main__': import autopath from alex.utils.config import online_update if __name__ == '__main__': online_update('applications/PublicTransportInfoEN/lm/final.bg.arpa') online_update('applications/PublicTransportInfoEN/lm/final.tg.arpa') online_update('applications/PublicTransportInfoEN/lm/final.qg.arpa') online_update('applications/PublicTransportInfoEN/lm/final.pg.arpa') online_update('applications/PublicTransportInfoEN/lm/final.dict') online_update('applications/PublicTransportInfoEN/lm/final.dict.sp_sil') online_update('applications/PublicTransportInfoEN/lm/final.vocab')
] # name of the file with one stop per line, assumed to reside in the same # directory as this script # # The file is expected to have this format: # <value>; <phrase>; <phrase>; ... # where <value> is the value for a slot and <phrase> is its possible surface # form. STOPS_FNAME = "stops.expanded.txt" CITIES_FNAME = "cities.expanded.txt" TRAIN_NAMES_FNAME = "train_names.expanded.txt" # load new stops & cities list from the server if needed online_update( to_project_path( os.path.join(os.path.dirname(os.path.abspath(__file__)), STOPS_FNAME))) online_update( to_project_path( os.path.join(os.path.dirname(os.path.abspath(__file__)), CITIES_FNAME))) online_update( to_project_path( os.path.join(os.path.dirname(os.path.abspath(__file__)), TRAIN_NAMES_FNAME))) def db_add(category_label, value, form): """A wrapper for adding a specified triple to the database.""" # category_label = category_label.strip() # value = value.strip()
#!/usr/bin/env python # -*- coding: utf-8 -*- if __name__ == '__main__': import autopath from alex.utils.config import online_update if __name__ == '__main__': online_update('applications/PublicTransportInfoCS/lm/final.bg.arpa') online_update('applications/PublicTransportInfoCS/lm/final.tg.arpa') online_update('applications/PublicTransportInfoCS/lm/final.qg.arpa') online_update('applications/PublicTransportInfoCS/lm/final.pg.arpa') online_update('applications/PublicTransportInfoCS/lm/final.dict') online_update('applications/PublicTransportInfoCS/lm/final.dict.sp_sil') online_update('applications/PublicTransportInfoCS/lm/final.vocab')
from __future__ import unicode_literals from database import database import codecs import os from alex.utils.config import online_update, to_project_path # tab-separated file containing street + city + lon|lat coordinates + slot_specification STREETS_TYPES_FNAME = 'streets.types.csv' # tab-separated file containing stop + city + lon|lat coordinates GENERAL_STOPS_LOCATIONS_FNAME = 'stops.locations.csv' BOROUGH_STOPS_LOCATIONS_FNAME = 'stops.borough.locations.csv' # tab-separated file containing city + state + lon|lat coordinates CITIES_LOCATIONS_FNAME = 'cities.locations.csv' # load new versions of the data files from the server online_update(to_project_path(os.path.join(os.path.dirname(os.path.abspath(__file__)), STREETS_TYPES_FNAME))) online_update(to_project_path(os.path.join(os.path.dirname(os.path.abspath(__file__)), GENERAL_STOPS_LOCATIONS_FNAME))) online_update(to_project_path(os.path.join(os.path.dirname(os.path.abspath(__file__)), BOROUGH_STOPS_LOCATIONS_FNAME))) online_update(to_project_path(os.path.join(os.path.dirname(os.path.abspath(__file__)), CITIES_LOCATIONS_FNAME))) ontology = { 'slots': { 'silence': set([]), 'ludait': set([]), 'task': set(['find_connection', 'find_platform', 'weather']), 'from': set([]), 'to': set([]), 'via': set([]), 'in': set([]), 'stop': set([]), 'street': set([]),
#!/usr/bin/env python # -*- coding: utf-8 -*- if __name__ == '__main__': import autopath from alex.utils.config import online_update if __name__ == '__main__': online_update('applications/PublicTransportInfoCS/lm/final.pg.arpa') online_update('applications/PublicTransportInfoCS/lm/final.dict') online_update('applications/PublicTransportInfoCS/lm/final.vocab')
#!/usr/bin/env python # encoding: utf-8 import autopath from alex.utils.config import online_update if __name__ == "__main__": # Description files online_update('resources/asr/voip_en/kaldi/results.log') online_update('resources/asr/voip_en/kaldi/experiment_bash_vars.log') online_update('resources/asr/voip_en/kaldi/alex_gitlog.log') online_update('resources/asr/voip_en/kaldi/alex_gitdiff.log') # Models online_update('resources/asr/voip_en/kaldi/mfcc.conf') online_update('resources/asr/voip_en/kaldi/tree') online_update('resources/asr/voip_en/kaldi/silence.csl') online_update('resources/asr/voip_en/kaldi/final.mat') online_update('resources/asr/voip_en/kaldi/tri2a.mdl') online_update('resources/asr/voip_en/kaldi/tri2b.mdl') online_update('resources/asr/voip_en/kaldi/tri2b_mmi.mdl')
#!/usr/bin/env python # -*- coding: utf-8 -*- from alex.utils.config import online_update if __name__ == '__main__': import autopath online_update('applications/PublicTransportInfoCS/data/czech.dict') online_update('applications/PublicTransportInfoCS/data/czech.tagger') #online_update('applications/PublicTransportInfoCS/data/stops-idos.tsv') #online_update('applications/PublicTransportInfoCS/data/cities.txt') online_update('applications/PublicTransportInfoCS/data/stops.txt') online_update('applications/PublicTransportInfoCS/data/cities.expanded.txt') online_update('applications/PublicTransportInfoCS/data/stops.expanded.txt') online_update('applications/PublicTransportInfoCS/data/cities_stops.tsv') #online_update('applications/PublicTransportInfoCS/data/cities_locations.tsv') online_update('applications/PublicTransportInfoCS/data/idos_map.tsv')
#!/usr/bin/env python # encoding: utf-8 from alex.utils.config import online_update if __name__ == "__main__": import autopath # Description files online_update('resources/asr/voip_en/kaldi/results.log') online_update('resources/asr/voip_en/kaldi/experiment_bash_vars.log') online_update('resources/asr/voip_en/kaldi/alex_gitlog.log') online_update('resources/asr/voip_en/kaldi/alex_gitdiff.log') # Models online_update('resources/asr/voip_en/kaldi/mfcc.conf') online_update('resources/asr/voip_en/kaldi/tree') online_update('resources/asr/voip_en/kaldi/silence.csl') online_update('resources/asr/voip_en/kaldi/final.mat') online_update('resources/asr/voip_en/kaldi/tri2a.mdl') online_update('resources/asr/voip_en/kaldi/tri2b.mdl') online_update('resources/asr/voip_en/kaldi/tri2b_mmi.mdl')
#!/usr/bin/env python # encoding: utf-8 import autopath from alex.utils.config import online_update if __name__ == '__main__': # Description files online_update('resources/asr/voip_cs/kaldi/results.log') online_update('resources/asr/voip_cs/kaldi/experiment_bash_vars.log') online_update('resources/asr/voip_cs/kaldi/alex_gitlog.log') online_update('resources/asr/voip_cs/kaldi/alex_gitdiff.log') # Models online_update('resources/asr/voip_cs/kaldi/mfcc.conf') online_update('resources/asr/voip_cs/kaldi/phones.txt') online_update('resources/asr/voip_cs/kaldi/silence.csl') online_update('resources/asr/voip_cs/kaldi/tri2a.mdl') online_update('resources/asr/voip_cs/kaldi/tri2a.tree') online_update('resources/asr/voip_cs/kaldi/tri2b.mdl') online_update('resources/asr/voip_cs/kaldi/tri2b.tree') online_update('resources/asr/voip_cs/kaldi/tri2b.mat') online_update('resources/asr/voip_cs/kaldi/tri2b_bmmi.mdl') online_update('resources/asr/voip_cs/kaldi/tri2b_bmmi.tree') online_update('resources/asr/voip_cs/kaldi/tri2b_bmmi.mat')
#!/usr/bin/env python # encoding: utf-8 if __name__ == '__main__': import autopath from alex.utils.config import online_update # Description files online_update('resources/asr/voip_cs/kaldi/results.log') # online_update('resources/asr/voip_cs/kaldi/experiment_bash_vars.log') # online_update('resources/asr/voip_cs/kaldi/alex_gitlog.log') # online_update('resources/asr/voip_cs/kaldi/alex_gitdiff.log') # Models online_update('resources/asr/voip_cs/kaldi/mfcc.conf') online_update('resources/asr/voip_cs/kaldi/phones.txt') online_update('resources/asr/voip_cs/kaldi/silence.csl') online_update('resources/asr/voip_cs/kaldi/tri2b.mdl') online_update('resources/asr/voip_cs/kaldi/tri2b.tree') online_update('resources/asr/voip_cs/kaldi/tri2b.mat') online_update('resources/asr/voip_cs/kaldi/tri2b_bmmi.mdl') online_update('resources/asr/voip_cs/kaldi/tri2b_bmmi.tree') online_update('resources/asr/voip_cs/kaldi/tri2b_bmmi.mat')
#!/usr/bin/env python # -*- coding: utf-8 -*- if __name__ == '__main__': import autopath from alex.utils.config import online_update if __name__ == '__main__': online_update("applications/PublicTransportInfoCS/slu/dainnclassifier/dainn.nbl.model.all")
# -*- coding: utf-8 -*- from __future__ import unicode_literals from database import database import codecs import os from alex.utils.config import online_update, to_project_path # tab-separated file containing city + stop in that city, one per line CITIES_STOPS_FNAME = 'cities_stops.tsv' # tab-separated file containing city + all locations of the city/cities with this name # (as pipe-separated longitude, latitude, district, region) CITIES_LOCATION_FNAME = 'cities_locations.tsv' # load new versions of the data files from the server online_update(to_project_path(os.path.join(os.path.dirname(os.path.abspath(__file__)), CITIES_STOPS_FNAME))) online_update(to_project_path(os.path.join(os.path.dirname(os.path.abspath(__file__)), CITIES_LOCATION_FNAME))) ontology = { 'slots': { 'silence': set([]), 'ludait': set([]), 'task': set(['find_connection', 'find_platform', 'weather']), 'from': set([]), 'to': set([]), 'via': set([]), 'in': set([]), 'stop': set([]), 'from_stop': set(['Zličín', 'Anděl', ]), 'to_stop': set(['Zličín', 'Anděl', ]), 'via_stop': set(['Zličín', 'Anděl', ]),
#!/usr/bin/env python # -*- coding: utf-8 -*- import autopath from alex.utils.config import online_update online_update('applications/PublicTransportInfoEN/data/states.expanded.txt') online_update('applications/PublicTransportInfoEN/data/cities.expanded.txt') online_update('applications/PublicTransportInfoEN/data/stops.expanded.txt') online_update('applications/PublicTransportInfoEN/data/streets.expanded.txt') online_update('applications/PublicTransportInfoEN/data/boroughs.expanded.txt') online_update('applications/PublicTransportInfoEN/data/cities.locations.csv') online_update('applications/PublicTransportInfoEN/data/stops.locations.csv') online_update('applications/PublicTransportInfoEN/data/stops.borough.locations.csv') online_update('applications/PublicTransportInfoEN/data/streets.types.csv')
"osmnáctý", "devatenáctý", "dvacátý", "jednadvacátý", "dvaadvacátý", "třiadvacátý"] # name of the file with one stop per line, assumed to reside in the same # directory as this script # # The file is expected to have this format: # <value>; <phrase>; <phrase>; ... # where <value> is the value for a slot and <phrase> is its possible surface # form. STOPS_FNAME = "stops.expanded.txt" CITIES_FNAME = "cities.expanded.txt" TRAIN_NAMES_FNAME = "train_names.expanded.txt" # load new stops & cities list from the server if needed online_update(to_project_path(os.path.join(os.path.dirname(os.path.abspath(__file__)), STOPS_FNAME))) online_update(to_project_path(os.path.join(os.path.dirname(os.path.abspath(__file__)), CITIES_FNAME))) online_update(to_project_path(os.path.join(os.path.dirname(os.path.abspath(__file__)), TRAIN_NAMES_FNAME))) def db_add(category_label, value, form): """A wrapper for adding a specified triple to the database.""" # category_label = category_label.strip() # value = value.strip() # form = form.strip() if len(value) == 0 or len(form) == 0: return if value in database[category_label] and isinstance(database[category_label][value], list): database[category_label][value] = set(database[category_label][value])
#!/usr/bin/env python # -*- coding: utf-8 -*- if __name__ == '__main__': import autopath from alex.utils.config import online_update if __name__ == '__main__': online_update('applications/PublicTransportInfoEN/lm/final.pg.arpa') online_update('applications/PublicTransportInfoEN/lm/final.dict') online_update('applications/PublicTransportInfoEN/lm/final.vocab')
# name of the file with one stop per line, assumed to reside in the same # directory as this script # # The file is expected to have this format: # <value>; <phrase>; <phrase>; ... # where <value> is the value for a slot and <phrase> is its possible surface # form. STREETS_FNAME = "streets.expanded.txt" STOPS_FNAME = "stops.expanded.txt" BOROUGHS_FNAME = "boroughs.expanded.txt" CITIES_FNAME = "cities.expanded.txt" STATES_FNAME = "states.expanded.txt" # load new stops & cities list from the server if needed online_update(to_project_path(os.path.join(os.path.dirname(os.path.abspath(__file__)), STREETS_FNAME))) online_update(to_project_path(os.path.join(os.path.dirname(os.path.abspath(__file__)), STOPS_FNAME))) online_update(to_project_path(os.path.join(os.path.dirname(os.path.abspath(__file__)), BOROUGHS_FNAME))) online_update(to_project_path(os.path.join(os.path.dirname(os.path.abspath(__file__)), CITIES_FNAME))) online_update(to_project_path(os.path.join(os.path.dirname(os.path.abspath(__file__)), STATES_FNAME))) def db_add(category_label, value, form): """A wrapper for adding a specified triple to the database.""" # category_label = category_label.strip() # value = value.strip() # form = form.strip() if len(value) == 0 or len(form) == 0: return
#!/usr/bin/env python # -*- coding: utf-8 -*- if __name__ == '__main__': import autopath from alex.utils.config import online_update if __name__ == '__main__': online_update('applications/PublicTransportInfoEN/hclg/models/mfcc.conf') online_update( 'applications/PublicTransportInfoEN/hclg/models/tri2b_bmmi.mdl') online_update( 'applications/PublicTransportInfoEN/hclg/models/tri2b_bmmi.mat') online_update( 'applications/PublicTransportInfoEN/hclg/models/HCLG_tri2b_bmmi.fst') online_update('applications/PublicTransportInfoEN/hclg/models/words.txt') online_update('applications/PublicTransportInfoEN/hclg/models/silence.csl')
#!/usr/bin/env python # -*- coding: utf-8 -*- import autopath from alex.utils.config import online_update if __name__ == '__main__': online_update('applications/PublicTransportInfoCS/hclg/models/mfcc.conf') online_update('applications/PublicTransportInfoCS/hclg/models/tri2b_bmmi.mdl') online_update('applications/PublicTransportInfoCS/hclg/models/tri2b_bmmi.mat') online_update('applications/PublicTransportInfoCS/hclg/models/HCLG_tri2b_bmmi.fst') online_update('applications/PublicTransportInfoCS/hclg/models/words.txt') online_update('applications/PublicTransportInfoCS/hclg/models/silence.csl')
#!/usr/bin/env python # -*- coding: utf-8 -*- if __name__ == '__main__': import autopath from alex.utils.config import online_update if __name__ == '__main__': online_update('applications/PublicTransportInfoCS/data/czech.dict') online_update('applications/PublicTransportInfoCS/data/czech.tagger') #online_update('applications/PublicTransportInfoCS/data/stops-idos.tsv') #online_update('applications/PublicTransportInfoCS/data/cities.txt') online_update('applications/PublicTransportInfoCS/data/stops.txt') online_update( 'applications/PublicTransportInfoCS/data/cities.expanded.txt') online_update('applications/PublicTransportInfoCS/data/stops.expanded.txt') online_update('applications/PublicTransportInfoCS/data/cities_stops.tsv') #online_update('applications/PublicTransportInfoCS/data/cities_locations.tsv') online_update('applications/PublicTransportInfoCS/data/idos_map.tsv')
#!/usr/bin/env python # encoding: utf-8 if __name__ == '__main__': import autopath from alex.utils.config import online_update # terms online_update('applications/PublicTransportInfoEN/lm/terms/boroughs.txt') online_update('applications/PublicTransportInfoEN/lm/terms/cities.txt') online_update('applications/PublicTransportInfoEN/lm/terms/states.txt') online_update('applications/PublicTransportInfoEN/lm/terms/stops.txt') online_update('applications/PublicTransportInfoEN/lm/terms/streets.txt')