Beispiel #1
0
    def predict(self, input_data):
        try:
            title_1, title_2 = input_data['title_1'], input_data['title_2']

            tokens1 = np.array([
                shutils.padd_fn(shutils.get_tokens(title_1),
                                max_len=cnt.MAX_WORDS)
            ])
            tokens2 = np.array([
                shutils.padd_fn(shutils.get_tokens(title_2),
                                max_len=cnt.MAX_WORDS)
            ])

            sent_data_1 = shutils.get_vectors(self.vector_model, tokens1,
                                              cnt.VECTOR_DIM)
            sent_data_2 = shutils.get_vectors(self.vector_model, tokens2,
                                              cnt.VECTOR_DIM)

            prediction, probability = self.network.predict(
                [sent_data_1, sent_data_2], return_probability=True)

            return {
                "status": 1,
                "is_match": int(prediction[0]),
                "confidence": float(probability[0])
            }

        except Exception as err:
            return {"status": 0, "message": str(err)}
Beispiel #2
0
def get_all_tokens_for_vector(items, char_tokens=False):
    text_data = [str(item[0]) for item in items]
    all_tokens = [
        shutils.padd_fn(shutils.get_tokens(x, char_tokens=char_tokens),
                        max_len=cnt.MAX_WORDS) for x in text_data
    ]

    return all_tokens
Beispiel #3
0
def get_all_tokens_for_vector(items, char_tokens=False):
    wm_text = [str(item[3]) for item in items]
    cm_text = [str(item[5]) for item in items]
    
    all_titles = wm_text + cm_text
    all_tokens = [shutils.padd_fn(shutils.get_tokens(title, char_tokens=char_tokens), max_len=cnt.MAX_WORDS) for title in all_titles]
    
    return all_tokens
Beispiel #4
0
def get_tokens_indices(items, indices):
    data_pairs = []
    for i in indices:
        wm_title = str(items[i][3])
        cm_title = str(items[i][5])
        
#         wm_desc = str(items[i][4])
#         cm_desc = str(items[i][6])

        wm_word_tokens = shutils.padd_fn(shutils.get_tokens(wm_title, char_tokens=False), max_len=cnt.MAX_WORDS)
        cm_word_tokens = shutils.padd_fn(shutils.get_tokens(cm_title, char_tokens=False), max_len=cnt.MAX_WORDS)
        
        wm_char_tokens = [shutils.padd_fn(shutils.get_tokens(token, char_tokens=True), max_len=cnt.MAX_CHARS) for token in wm_word_tokens]
        cm_char_tokens = [shutils.padd_fn(shutils.get_tokens(token, char_tokens=True), max_len=cnt.MAX_CHARS) for token in cm_word_tokens]

        label = int(items[i][-1])

        data_pairs.append((wm_word_tokens, cm_word_tokens, wm_char_tokens, cm_char_tokens, label))
        
    return data_pairs
Beispiel #5
0
    def predict_batch(self, multi_input_data):
        try:
            tokens1, tokens2 = [], []

            for input_data in multi_input_data:
                title_1, title_2 = input_data['title_1'], input_data['title_2']

                tokens1.append(
                    shutils.padd_fn(shutils.get_tokens(title_1),
                                    max_len=cnt.MAX_WORDS))
                tokens2.append(
                    shutils.padd_fn(shutils.get_tokens(title_2),
                                    max_len=cnt.MAX_WORDS))

            tokens1 = np.array(tokens1)
            tokens2 = np.array(tokens2)

            sent_data_1 = shutils.get_vectors(self.vector_model, tokens1,
                                              cnt.VECTOR_DIM)
            sent_data_2 = shutils.get_vectors(self.vector_model, tokens2,
                                              cnt.VECTOR_DIM)

            prediction, probability = self.network.predict(
                [sent_data_1, sent_data_2], return_probability=True)

            return {
                "status":
                1,
                "response": [{
                    "status": 1,
                    "is_match": int(prediction[i]),
                    "confidence": float(probability[i])
                } for i in range(len(multi_input_data))]
            }

        except Exception as err:
            return {"status": 0, "message": str(err)}