Ejemplo n.º 1
0
    def get_class_incident_by_text(self, inc_text):

        logging.info('Зашли в функцию')
        inc_lemmatization = (ml.lemmatization(inc_text))
        logging.info('Сделали лемматизацию')
        inc_tokenize = ml.w2v_tokenize_text(inc_lemmatization)
        logging.info('Сделали токенизацию')
        inc_average_list = ml.word_averaging(self.kv_inc, inc_tokenize)
        logging.info('Вычислили вектор инцидента')
        predicted = self.logreg.predict([inc_average_list])
        return (predicted[0])
Ejemplo n.º 2
0
    def get_closed_similar_incident_by_text(self, inc_text):

        df_result = self.df_result
        X_full_word_average = self.X_full_word_average  #ml.word_averaging_list(self.kv_inc,df_result['full_tokenized'])
        inc_lemmatization = (ml.lemmatization(inc_text))
        inc_tokenize = ml.w2v_tokenize_text(inc_lemmatization)
        inc_average_list = ml.word_averaging(self.kv_inc, inc_tokenize)

        df_result['similar_index'] = ml.find_similar_index(
            inc_average_list, X_full_word_average.T)
        df_result = df_result.sort_values('similar_index')
        df_order = df_result[df_result['similar_index'] < 0.3].head(1000)
        return (list(
            zip(df_order['Number'], df_order['FixedDescription'],
                df_order['similar_index'], df_order['DateCreate'].astype(str),
                df_order['Status_New'])))
Ejemplo n.º 3
0
    def get_similar_incident(self, number):

        df_result = ServerJson.prepare_vector_matrix(self,
                                                     number)  #type DataFrame
        X_full_word_average = ml.word_averaging_list(
            self.kv_inc, df_result['full_tokenized'])
        inc_text = ServerJson.get_text_incident('', int(number))
        inc_lemmatization = (ml.lemmatization(inc_text))
        inc_tokenize = ml.w2v_tokenize_text(inc_lemmatization)
        inc_average_list = ml.word_averaging(self.kv_inc, inc_tokenize)

        df_result['similar_index'] = ml.find_similar_index(
            inc_average_list, X_full_word_average.T)
        df_order = df_result[df_result['similar_index'] < 0.3].head(1000)
        return (list(
            zip(df_order['Number'], df_order['FixedDescription'],
                df_order['similar_index'], df_order['DateCreate'].astype(str),
                df_order['Status_New'])))
Ejemplo n.º 4
0
 def prepare_vector_matrix(self, number):
     cnxn = pyodbc.connect(
         'DRIVER={SQL Server};SERVER=MyServer;DATABASE=MyDataBase;UID=MyUser;PWD=MyPassword'
     )
     cursor = cnxn.cursor()
     sql = '''
     SELECT Number, Description, ShortDescription, DateCreate, Status_New 
     FROM [SMP].[dbo].[OP_Request] (nolock)
     where RoleAssigned in ('IT Инж 2 ур 1С', 'IT Инж 2 ур 1С эксперт')
     and ShortDescription like '1C Retail%'
     and Status_New in ('Устранение инцидента', 'Ожидание')'''
     df_result = pd.read_sql_query(sql, cnxn)
     df_result['FixedDescription'] = df_result['Description']
     for index, row in df_result.iterrows():
         text = row['Description']
         text_new = ml.lemmatization(text)
         df_result.iat[index, 1] = text_new
     full_tokenized = df_result.apply(
         lambda r: ml.w2v_tokenize_text(r['Description']), axis=1).values
     df_result['full_tokenized'] = full_tokenized
     return (df_result)