Esempi in Python per str2regex

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: kbclean.utils.data.helpers

Metodo/funzione: str2regex

Esempi su hotexamples.com: 11

str2regex in Python: 11 esempi trovati. Questi sono i migliori esempi reali in Python per kbclean.utils.data.helpers.str2regex, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

    def transform(self, dirty_df: pd.DataFrame, col: str):
        char_features = self.char_counter.transform(
            dirty_df[col].values.tolist()).todense()
        # word_features = self.word_counter.transform(
        #     dataset.dirty_df[col].values.tolist()
        # ).todense()
        regex_features = self.regex_counter.transform([
            str2regex(val, match_whole_token=False)
            for val in dirty_df[col].values
        ]).todense()

        regex_features2 = self.regex_counter2.transform([
            str2regex(val, match_whole_token=True)
            for val in dirty_df[col].values
        ]).todense()

        # word_features =  self.word_counter.transform(
        #     dirty_df[col].values
        # ).todense()

        return [
            torch.tensor(
                np.concatenate(
                    [char_features, regex_features, regex_features2], axis=1))
        ]

Esempio n. 2

Mostra file

 def fit(self, dirty_df: pd.DataFrame, col: str):
     self.char_counter.fit(dirty_df[col].values.tolist())
     self.regex_counter.fit([
         str2regex(val, match_whole_token=False)
         for val in dirty_df[col].values
     ])
     self.regex_counter2.fit([
         str2regex(val, match_whole_token=True)
         for val in dirty_df[col].values
     ])

Esempio n. 3

Mostra file

File: query.py Progetto: minhptx/spade

    def get_coexist_counts(self, values):
        set_values = set(values)
        query = "{}\n" + "\n{}\n".join(
            [
                json.dumps(
                    {
                        "query": {
                            "term": {
                                "data": {
                                    "value": str2regex(val, match_whole_token=True)
                                }
                            }
                        }
                    }
                )
                for val in set_values
            ]
        )
        mresult = self.es.msearch(query, index="n_reversed_indices")

        indices_list = [ESQuery.get_results(res, "idx") for res in mresult["responses"]]

        coexist_count = defaultdict(lambda: {})

        for idx1, val1 in enumerate(values):
            for idx2, val2 in enumerate(values):
                if indices_list[idx1] is None or indices_list[idx2] is None:
                    coexist_count[val1][val2] = 0
                else:
                    coexist_count[val1][val2] = set(indices_list[idx1]).intersection(
                        indices_list[idx2]
                    )

        return coexist_count

Esempio n. 4

Mostra file

    def transform(self, dirty_df: pd.DataFrame, col: str):
        tfidf = self.tfidf.transform(dirty_df[col].values.tolist()).todense()

        sym_tfidf = self.sym_tfidf.transform(dirty_df[col].apply(
            lambda x: str2regex(x, match_whole_token=False)).values).todense()

        return [torch.tensor(np.concatenate([tfidf], axis=1))]

Esempio n. 5

Mostra file

File: holo.py Progetto: minhptx/spade

    def fit(self, values):
        trigram = [["".join(x) for x in list(xngrams(val, 3))]
                   for val in values]
        ngrams = list(itertools.chain.from_iterable(trigram))
        self.trigram_counter = Counter(ngrams)
        sym_ngrams = [str2regex(x, False) for x in ngrams]

        self.sym_trigram_counter = Counter(sym_ngrams)
        self.val_counter = Counter(values)

        sym_values = [str2regex(x, False) for x in values]
        self.sym_val_counter = Counter(sym_values)

        self.func2counter = {
            val_trigrams: self.trigram_counter,
            sym_trigrams: self.sym_trigram_counter,
            value_freq: self.val_counter,
            sym_value_freq: self.sym_val_counter,
        }

Esempio n. 6

Mostra file

def sym_value_freq(values, counter):
    patterns = list(map(lambda x: str2regex(x, True), values))

    return value_freq(patterns, counter)

Esempio n. 7

Mostra file

def sym_trigrams(values, counter):
    patterns = list(map(lambda x: str2regex(x, False), values))
    return val_trigrams(patterns, counter)

Esempio n. 8

Mostra file

File: format.py Progetto: minhptx/spade

 def transform(self, dirty_df: pd.DataFrame, col):
     return (dirty_df[col].swifter.apply(lambda x: self.counter[str2regex(
         x, match_whole_token=True)] / len(dirty_df)).values)

Esempio n. 9

Mostra file

File: format.py Progetto: minhptx/spade

 def fit(self, dirty_df: pd.DataFrame, col):
     self.counter = (dirty_df[col].swifter.apply(lambda x: str2regex(
         x, match_whole_token=True)).value_counts().to_dict())

Esempio n. 10

Mostra file

 def fit(self, dirty_df: pd.DataFrame, col: str):
     self.tfidf.fit(dirty_df[col].values.tolist())
     self.sym_tfidf.fit(dirty_df[col].apply(
         lambda x: str2regex(x, match_whole_token=False)).values)

Esempio n. 11

Mostra file

def clean_str(x):
    x = x.strip().encode("ascii", "ignore").decode("ascii")
    return str2regex(x, True)