Beispiel #1
0
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier

from keras import layers
from keras.models import Model
from keras import backend as K
import tensorflow as tf
import tensorflow_hub as hub

from nlpia_bot.constants import DATA_DIR
from nlpia_bot import spacy_language_model

nlp = spacy_language_model.load("en_core_web_lg")

file_name = os.path.join(DATA_DIR, 'trec', 'train_5500.label')

with open(file_name, 'rb') as f:
    txt = f.read()

lines = txt.decode('latin').splitlines()


df = []
for idx, line in enumerate(lines):
    match = re.match('([A-Z]+):([a-z]+)[ ]+(.+)', line)
    # print(match.groups())
    df.append(match.groups())
Beispiel #2
0
import time
import csv
import gzip

from tqdm import tqdm
import pandas as pd
from wikipediaapi import Wikipedia

from nlpia_bot import constants
from nlpia_bot.spacy_language_model import load
from nlpia_bot.etl.vectors import phrase_to_vec

import logging
log = logging.getLogger(locals().get('__name__', ''))

nlp = load('en_core_web_md')
TITLES = ['Chatbot', 'ELIZA', 'Turing_test', 'AIML', 'Chatterbot', 'Loebner_prize', 'Chinese_room']
EXCLUDE_HEADINGS = ['See also', 'References', 'Bibliography', 'External links']


class WikiIndex():
    _url = 'https://dumps.wikimedia.org/enwiki/latest/enwiki-latest-all-titles-in-ns0.gz'

    def __init__(self, url=None, refresh=False, **pd_kwargs):
        self._url = url or self._url
        self.df_titles = self.load(url=self._url, refresh=refresh, **pd_kwargs)
        # self.title_slug = self.df_titles.to_dict()
        # self.df_vectors = pd.DataFrame(nlp(s).vector for s in self.df_titles.index.values)
        # self.vectors = dict(zip(range(len(self.df_titles)), ))
        self.title_row = dict(zip(self.df_titles.index.values, range(len(self.df_titles))))
        # AttributeError: 'tuple' object has no attribute 'lower
Beispiel #3
0
""" Pattern and template based chatbot dialog engines """
import re

import pandas as pd

from nlpia_bot.etl import glossaries
from nlpia_bot import spacy_language_model

nlp = spacy_language_model.load('en_core_web_md')


class Bot:
    """ Bot that can reply with definitions from glossary yml files in data/faq/glossary-*.yml

    >>> bot = Bot()
    >>> bot.reply('allele')
    [(1.0, "I don't understand")]
    >>> bot.reply('What is a nucleotide?')
    [(1,
     'The basic building blocks of DNA and RNA...
    """
    def __init__(self, domains=('dsdh', )):
        global nlp
        self.nlp = nlp
        self.glossary = glossaries.load(domains=domains)
        self.glossary.fillna('', inplace=True)
        self.glossary.index = self.glossary['term'].str.lower().str.strip()
        self.vector = dict()
        self.vector['term'] = pd.DataFrame(
            {s: nlp(s or '').vector
             for s in self.glossary['term']})