Python Nlpの例

def predict():
    """
    予測テスト
    """
    result = None
    if request.method == "POST":
        article = request.form['article']
        nlp = Nlp()
        classes = nlp.predict([article])
        c = classes[0][0]
        result = {'article': article, 'class': c}
    return render_template("predict.html", result=result)

コード例 #2

ファイルを表示

class Database:
    def __init__(self,data_dir,schema_dir):
        self.data_dir=data_dir
        self.schema_dir=schema_dir
        self.nlp=Nlp(data_dir,schema_dir)
        self.data_process=data_utils(data_dir, schema_dir)
    def query_db(self,question):
        engine = create_engine('sqlite://', echo=False)
        csv=self.nlp.csv_select(question)
        data_frame=self.data_process.get_dataframe(csv).astype(str)
        schema=self.data_process.get_schema_for_csv(csv)
    
        data_frame = data_frame.fillna(data_frame.mean())
        sql_schema = {}
        for col in schema['columns']:
            colname = col['name']
            coltype = col['type']
            coltype = column_types.get(coltype).sql_type
            if '(' in coltype:
                coltype, arg = coltype.split('(')
                arg ='(' + arg[:-1] + ',)'
                coltype = getattr(types, coltype)(*(ast.literal_eval(arg)))
            else:
                coltype = getattr(types, coltype)()
            sql_schema[colname] = coltype
        data_frame.to_sql(schema['name'].lower(), con=engine, if_exists='replace', dtype=sql_schema)
        agent=Agent(self.data_dir,self.schema_dir)
        query=agent.get_query(question)      
        return engine.execute(query).fetchall()

コード例 #3

ファイルを表示

    def get_query(self, question):
        try:
            data_process = data_utils(self.data_dir, self.schema_dir)
            data_process.create_vocab()
            nlp = Nlp(self.data_dir, self.schema_dir)
            csv = nlp.csv_select(question)
            if csv is None:
                print("Sorry,didn't catch that")
            question, valmap = nlp.get_sql_query(csv, question)

            sql_query = question
            for k, v in valmap.items():
                sql_query = sql_query.replace(k, v)

            return sql_query
        except Exception as e:
            print(e)

コード例 #4

ファイルを表示

def index(label=None):
    """
    記事一覧表示
    """
    db.build_db()
    limit = 0
    if request.args.get('label'):
        label = int(request.args.get('label'))
        labels = (label,)
    else:
        label = 'all'
        labels = (0, 1, 2)
    articles = db.get_articles(labels, limit)
    # 予測
    nlp = Nlp()
    predict_labels = [pl[0] for pl in nlp.predict([a[3] for a in articles])]
    return render_template("index.html", articles=articles, predict_labels=predict_labels, label=label)

コード例 #5

ファイルを表示

 def transform(self, X, y=None):
     nlp = Nlp()
     X = [nlp.remove_diacritics(doc) for doc in X]
     X = [nlp.remove_punctuations(doc) for doc in X]
     X = [nlp.normalize_arabic(doc) for doc in X]
     X = [nlp.num_to_text(doc) for doc in X]
     return X

コード例 #6

ファイルを表示

ファイル: gui.py プロジェクト: amit-lab/virtual_pa

    def nlp_page(self):
        def destroy_win():
            nlp_win.destroy()

        def launch():
            text = search_entry.get()
            nlp_obj.other_launcher(text)
            nlp_obj.gui_launcher(text, self)
            nlp_obj.query(text)

        def switch_to_gui():
            self.gui_page()
            nlp_win.destroy()

        def go_to_menu():
            self.__init__()
            self.main_page()
            nlp_win.destroy()

        nlp_obj = Nlp()
        nlp_win = Tk()
        nlp_win.title("VPA Command Window")
        nlp_win.config(padx=30, pady=30)
        info_label = Label(nlp_win,
                           text="Insert what you want in search box \n",
                           fg='green',
                           bg='white')
        info_label.grid(row=0, column=0, columnspan=2)
        search_entry = Entry(nlp_win, width=50)
        search_entry.grid(row=1, column=0, columnspan=2)
        search_btn = Button(nlp_win,
                            text="GO",
                            padx=22,
                            bg='brown',
                            fg='white',
                            command=launch)
        search_btn.grid(row=2, column=0, columnspan=2)

コード例 #7

ファイルを表示

    def transform(self, X, y=None):
        nlp = Nlp()
        # Tokenization
        word_tokens = nlp.tokenize_doc(X)

        if (not self.lemmatization):
            # Stemming
            word_tokens = [nlp.stemmimg_text(w) for w in word_tokens]
        else:
            # Lemmatization
            word_tokens = [nlp.lemmatization_text(w) for w in word_tokens]

        # Stop words removal
        word_tokens = [nlp.remove_stop_words(w) for w in word_tokens]

        word_df = [
            ' '.join([str(elem) for elem in doc]) for doc in word_tokens
        ]

        return word_df

コード例 #8

ファイルを表示

 def __init__(self):
     self.nlp = Nlp()
     self.data = Data()
     self.tmodel = Tmodel()

コード例 #9

ファイルを表示

class Controller:
    def __init__(self):
        self.nlp = Nlp()
        self.data = Data()
        self.tmodel = Tmodel()

    def preprocessingData(self, data):
        label_id = []
        label = []
        pattern_words = []
        all_word_list = [
        ]  # Liste mit allen in Pattern erhaltenen Wörtern / stemmed

        for row in range(data.shape[0]):
            for pattern in data["patterns"][row]:
                normalized_pattern = self.nlp.normalizeSentence(pattern)
                words = self.nlp.tokenizeSentence(normalized_pattern)
                words = self.nlp.stemWords(words)
                all_word_list.extend(words)
                label_id.append(data["id"][row])
                label.append(data["label"][row])
                # pattern_words.append(words)
                pattern_words.append(' '.join(words))

        return {
            "df": self.data.createDataFrame(label_id, label, pattern_words),
            "unique_word_list": sorted(list(set(all_word_list)))
        }

    def create_x(self, processedObj: dict):
        x = []
        for row in range(processedObj["df"].shape[0]):
            bow = []

            for word in processedObj["unique_word_list"]:
                if word in processedObj["df"]["pattern_words"][row]:
                    bow.append(1)
                else:
                    bow.append(0)
            x.append(bow)

        return np.array(x)

    def create_y(self, data, processedObj: dict):
        labelDict = {}
        for row in range(data.shape[0]):
            if data["label"][row] not in labelDict:
                labelDict[data["label"][row]] = row

        labels = []
        for row in range(processedObj["df"].shape[0]):
            labels.append(labelDict[processedObj["df"]["label"][row]])

        unique_labels = list(set(labels))
        return np.array(to_categorical(labels, num_classes=len(unique_labels)))

    def model(self, x, y):
        #======Training model: 3 Layer mit 256, 128, und Anzahl Intent-classes an Neuronen======#
        model = Sequential()
        model.add(Dense(128, input_shape=(x.shape[1], ), activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(64, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(y.shape[1], activation='softmax'))

        # SDG =  Stochastic gradient descent
        sgd = SGD(lr=1e-2, decay=1e-2, momentum=0.9, nesterov=True)
        model.compile(optimizer=sgd,
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])
        print(model.summary())

        hist = model.fit(x, y, epochs=150, batch_size=5, verbose=1)
        model.save('model.h5', hist)

    def run(self):
        data = self.data.provideData()

        print("Processing Data")
        processedObj = self.preprocessingData(data)

        print("Generating Trainingsset")
        x = self.create_x(processedObj)
        y = self.create_y(data, processedObj)

        print("Train Model")
        self.model(x, y)

        print("model created")

コード例 #10

ファイルを表示

def fit():
    """
    学習
    """
    nlp = Nlp()
    articles = db.get_articles()
    articles = [{'id': a[0], 'entry_id':a[1], 'link':a[2],
                 'summary':a[3], 'label':a[4]}for a in articles]
    nlp.build_dictionary(articles)
    dictionary = nlp.get_dictionary()
    nlp.build_articles(dictionary, articles)
    nlp.build_labels(articles)
    labels = nlp.get_labels()
    nlp.fit(dictionary, articles, labels)

    return redirect(url_for('index'))

コード例 #11

ファイルを表示

ファイル: voice_recognition.py プロジェクト: amit-lab/virtual_pa

 def __init__(self, gui=None):
     self.prompt_limit = None
     self.recognizer = sr.Recognizer()
     self.microphone = sr.Microphone()
     self.nlp = Nlp()
     self.gui = gui

コード例 #12

ファイルを表示

ファイル: voice_recognition.py プロジェクト: amit-lab/virtual_pa

class VoiceRecognition:

    def __init__(self, gui=None):
        self.prompt_limit = None
        self.recognizer = sr.Recognizer()
        self.microphone = sr.Microphone()
        self.nlp = Nlp()
        self.gui = gui

    def run(self):
        self.wish_me()
        while True:
            response = self.recognize_speech_from_mic()
            print(response)
            if response['transcription'] == None:
                continue
            #self.nlp.gui_launcher(response['transcription'], self.gui)
            self.nlp.other_launcher(response['transcription'])
            self.nlp.query(response['transcription'])

    def wish_me(self):
        hour = int(datetime.datetime.now().hour)
        if hour>=0 and hour<12:
            text = "Good Morning! "
        elif hour>=21 and hour<18:
            text = "Good Afternoon! "
        else:
            text = "Good Evening! "

        text += "sir, how may I help you?"
        self.speak(text)

    def speak(self, text):
        Message = text
        speech = gTTS(text = Message)
        speech.save('./data/sound.mp3')
        playsound('./data/sound.mp3')

    def recognize_speech_from_mic(self):
        """Transcribe speech from recorded from `self.microphone`.
    
        Returns a dictionary with three keys:
        "success": a boolean indicating whether or not the API request was
                   successful
        "error":   `None` if no error occured, otherwise a string containing
                   an error message if the API could not be reached or
                   speech was unrecognizable
        "transcription": `None` if speech could not be transcribed,
                   otherwise a string containing the transcribed text
        """
        # check that self.recognizer and self.microphone arguments are appropriate type
        if not isinstance(self.recognizer, sr.Recognizer):
            raise TypeError("`self.recognizer` must be `Recognizer` instance")
    
        if not isinstance(self.microphone, sr.Microphone):
            raise TypeError("`self.microphone` must be `Microphone` instance")
    
        # adjust the self.recognizer sensitivity to ambient noise and record audio
        # from the self.microphone
        with self.microphone as source:
            self.recognizer.adjust_for_ambient_noise(source)
            audio = self.recognizer.listen(source)
    
        # set up the response object
        response = {
            "success": True,
            "error": None,
            "transcription": None
        }
    
        # try recognizing the speech in the recording
        # if a RequestError or UnknownValueError exception is caught,
        #     update the response object accordingly
        try:
            response["transcription"] = self.recognizer.recognize_google(audio, language="en-in")
        except sr.RequestError:
            # API was unreachable or unresponsive
            response["success"] = False
            response["error"] = "API unavailable"
        except sr.UnknownValueError:
            # speech was unintelligible
            response["error"] = "Unable to recognize speech"
    
        return response

コード例 #13

ファイルを表示

 def __init__(self,data_dir,schema_dir):
     self.data_dir=data_dir
     self.schema_dir=schema_dir
     self.nlp=Nlp(data_dir,schema_dir)
     self.data_process=data_utils(data_dir, schema_dir)