def concatenate_aiml(path='aiml-en-us-foundation-alice.v1-9.zip', outfile='aiml-en-us-foundation-alice.v1-9.aiml'): """Strip trailing </aiml> tag and concatenate all valid AIML files found in the ZIP.""" path = find_data_path(path) or path zf = zipfile.ZipFile(path) for name in zf.namelist(): if not name.lower().endswith('.aiml'): continue with zf.open(name) as fin: happyending = '#!*@!!BAD' for i, line in enumerate(fin): try: line = line.decode('utf-8').strip() except UnicodeDecodeError: line = line.decode('ISO-8859-1').strip() if line.lower().startswith('</aiml>') or line.lower().endswith( '</aiml>'): happyending = (i, line) break else: pass if happyending != (i, line): print( 'Invalid AIML format: {}\nLast line (line number {}) was: {}\nexpected "</aiml>"' .format(name, i, line))
def extract_aiml(path='aiml-en-us-foundation-alice.v1-9'): """ Extract an aiml.zip file if it hasn't been already and return a list of aiml file paths """ path = find_data_path(path) or path if os.path.isdir(path): paths = os.listdir(path) paths = [os.path.join(path, p) for p in paths] else: zf = zipfile.ZipFile(path) paths = [] for name in zf.namelist(): if '.hg/' in name: continue paths.append(zf.extract(name, path=BIGDATA_PATH)) return paths
def create_brain(path='aiml-en-us-foundation-alice.v1-9.zip'): """ Create an aiml_bot.Bot brain from an AIML zip file or directory of AIML files """ path = find_data_path(path) or path bot = Bot() num_templates = bot._brain.template_count paths = extract_aiml(path=path) for path in paths: if not path.lower().endswith('.aiml'): continue try: bot.learn(path) except AimlParserError: logger.error(format_exc()) logger.warn('AIML Parse Error: {}'.format(path)) num_templates = bot._brain.template_count - num_templates logger.info( 'Loaded {} trigger-response pairs.\n'.format(num_templates)) print('Loaded {} trigger-response pairs from {} AIML files.'.format( bot._brain.template_count, len(paths))) return bot