Python PreProcessing.normalize_dictionary 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: pre_processing

클래스/타입: PreProcessing

메소드/함수: normalize_dictionary

hotexamples.com에서의 예제들: 2

Python PreProcessing.normalize_dictionary - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 pre_processing.PreProcessing.normalize_dictionary에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

PreProcessing(30)

__normalize__(4)

__obfuscate__(3)

build_bow(3)

compute_tfidf(3)

remove_stop_words(2)

preprocess_reviews(2)

normalize_dictionary(2)

get_stemmed_text(2)

stem(2)

clean(2)

clean_and_stem(2)

filter_and_combine(1)

pre_processing_text_for_similarity(1)

__remove_stopwords__(1)

process2(1)

process(1)

change_categories_column(1)

preprocess(1)

pre_processing_page_rank_file(1)

pre_processing_text_for_neural_network(1)

get_binary_image(1)

load_calibration_params(1)

get_undistorted_image(1)

denoise(1)

get_df_reviews(1)

get_df_meta(1)

get_sites_info(1)

예제 #1

파일 보기

파일: dataset.py 프로젝트: douglasdcm/chatbot_for_movies

class Dataset:
    def __init__(self):
        self.pp = PreProcessing()

    def import_dataset(self):
        messages = pd.read_csv(DATA_FILE,
                               delimiter="\t",
                               quoting=3,
                               encoding="ISO-8859-2")
        messages.columns = [
            'msg_line', 'user_id', 'movie_id', 'msg', 'msg_pre_processed',
            'msg_2', 'target'
        ]
        return messages

    def get_questions(self, messages):
        return set(
            messages[messages["target"] == 1]["msg_pre_processed"].astype(str))

    def get_answers(self, messages):
        return set(
            messages[messages["target"] == 0]["msg_pre_processed"].astype(str))

    def get_page_compute(self, qea=0):

        pc = None
        file = None

        if qea == 0:
            file = PAGE_RANK_ANSWERS
        else:
            file = PAGE_RANK_QUESTIONS

        pc = self.pp.pre_processing_page_rank_file(file)

        return self.pp.normalize_dictionary(pc)

    def load_tokenizer(self):
        with open(TOKENIZER_FILE, "rb") as handle:
            tokenizer = pickle.load(handle)
        return tokenizer

예제 #2

파일 보기

class Similarity:
    def __init__(self, questions: set, answers: set, word_vectors=None):

        self.bow = CountVectorizer()
        self.questions = questions
        self.answers = answers
        self.word_vectors = word_vectors
        self.pp = PreProcessing()

    def get_the_next_conversation(self, conversations, df):
        """
		Get the first item in the dict
		"""

        keys_view = conversations.keys()
        keys_iterator = iter(keys_view)
        try:
            conversation = next(keys_iterator)
        except Exception as e:
            save_content_to_log(e)
            return naive_massage()

        return list(df[df['msg_pre_processed'] == conversation]['msg_2'])[0]

    def return_conversation_by_page_rank(self,
                                         msg,
                                         conversations,
                                         page_compute,
                                         reverse=True):
        """
		Return a dictionary of message and similarity sorted by highter similarity
		"""

        conversations = self.pp.normalize_dictionary(conversations)

        conversations = {
            k: page_compute[k] + v
            for k, v in conversations.items()
        }

        return {
            k: v
            for k, v in sorted(conversations.items(),
                               key=lambda item: item[1],
                               reverse=reverse)
        }

    def return_conversation_by_cossine(self, msg, res):
        """
		Return a dictionary of message and similarity sorted by highter similarity
		"""
        if res >= 0.5:
            msg_list = self.questions
        else:
            msg_list = self.answers

        similarity = []

        for m in msg_list:
            m = str(m)
            new_msg_list = [msg, m]
            vector_bow = self.bow.fit_transform(new_msg_list)
            msg_bow = vector_bow.todense()[0]
            m_bow = vector_bow.todense()[1]

            d1_array = (1, 1)

            if m_bow.shape == d1_array and msg_bow.shape == d1_array:
                d = 1 - distance.euclidean(msg_bow, m_bow)
            else:
                d = 1 - distance.cosine(msg_bow, m_bow)

            if math.isnan(float(d)):
                similarity.append(0.0)
            else:
                similarity.append(d)
        """

		vector_bow = [self.bow.fit_transform([msg, m]) for m in msg_list]
		msg_bow = [vect.todense()[0] for vect in vector_bow]
		m_bow = [vect.todense()[1] for vect in vector_bow]

		similarity = [1 - distance.cosine(msg_vect, m_vect) for msg_vect, m_vect in zip(msg_bow, m_bow)]
		
		"""
        result = dict(zip(msg_list, similarity))

        return result