Ejemplos de Tfidf en Python

Lenguaje de programación: Python

Namespace/Package Name: aristote.feature_extraction.tfidf

Clase / Tipo: Tfidf

Ejemplos en hotexamples.com: 6

Python Tfidf - 6 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de aristote.feature_extraction.tfidf.Tfidf extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

Tfidf(6)

fit_model(5)

transform(4)

Métodos usados con frecuencia

Tfidf (6)

fit_model (5)

transform (4)

Ejemplo n.º 1

Mostrar archivo

Archivo: test_feature_extraction.py Proyecto: Jwuthri/Aristote

    def test_get_features_name(self):
        tfidf = Tfidf()
        docs = ['where you from', 'where are you']
        tfidf.fit_model(documents=docs)
        columns = tfidf.get_features_name
        t_columns = ['are', 'from', 'where', 'you']

        assert columns == t_columns

Ejemplo n.º 2

Mostrar archivo

    def create_feature(self, data, feature):
        new_data = None
        featuring = st.sidebar.selectbox("How to generate features?",
                                         ['Embedding', 'TfIdf'])
        if featuring == 'Embedding':
            embeddings = self.embedding.predict_one(x=[data[feature].values])
            new_data = pd.DataFrame([[float(x) for x in d]
                                     for d in embeddings])
        elif featuring == "TfIdf":
            grams = st.sidebar.multiselect("Use bigrams? unigrams? or both?",
                                           ['unigrams', 'bigrams'])
            if grams:
                if "unigrams" in grams and "bigrams" in grams:
                    tfidf = Tfidf(bigrams=True, unigrams=True)
                elif "bigrams" in grams:
                    tfidf = Tfidf(bigrams=True, unigrams=False)
                else:
                    tfidf = Tfidf(bigrams=False, unigrams=True)
                tfidf.fit_model(documents=data[feature].values)
                new_data = pd.DataFrame(
                    tfidf.transform(document=data[feature].values))
                new_data.columns = tfidf.get_features_name
            else:
                st.sidebar.warning("Please select the ngrams.")

        return new_data

Ejemplo n.º 3

Mostrar archivo

Archivo: test_feature_extraction.py Proyecto: Jwuthri/Aristote

    def test_transform(self):
        tfidf = Tfidf()
        docs = ['where you from', 'where are you']
        tfidf.fit_model(documents=docs)
        data = tfidf.transform(document=docs)
        t_data = np.array([[0., 0.70490949, 0.50154891, 0.50154891],
                           [0.70490949, 0., 0.50154891, 0.50154891]])
        t_data = [[round(x, 3) for x in xx] for xx in t_data]
        data = [[round(x, 3) for x in xx] for xx in data]

        assert data == t_data

Ejemplo n.º 4

Mostrar archivo

Archivo: featuring.py Proyecto: Jwuthri/Aristote

 def main(self):
     text = st.text_area("Enter the text to normalize here:")
     if text:
         if self.model_name == "MultilangEmbedding":
             data = self.embedding.predict_one(x=text)
             data = [float(x) for x in data[0]]
             st.write(pd.DataFrame([data]))
         else:
             bigrams = st.checkbox('Use bigrams?')
             self.tfidf = Tfidf(bigrams=bigrams)
             self.tfidf.fit_model(documents=[text])
             data = self.tfidf.transform(document=[text])
             cols = self.tfidf.get_features_name
             df = pd.DataFrame(data)
             df.columns = cols
             st.write(df)

Ejemplo n.º 5

Mostrar archivo

Archivo: test_feature_extraction.py Proyecto: Jwuthri/Aristote

    def test_end_to_end(self):
        tfidf = Tfidf()
        docs = ['where you from', 'where are you']
        tfidf.fit_model(documents=docs)
        data = tfidf.transform(document=docs)
        columns = tfidf.get_features_name
        data = [[round(x, 3) for x in xx] for xx in data]
        df = pd.DataFrame(data)
        df.columns = columns
        t_data = np.array([[0., 0.70490949, 0.50154891, 0.50154891],
                           [0.70490949, 0., 0.50154891, 0.50154891]])
        t_columns = ['are', 'from', 'where', 'you']
        t_data = [[round(x, 3) for x in xx] for xx in t_data]
        t_df = pd.DataFrame(t_data)
        t_df.columns = t_columns

        assert data == t_data
        assert columns == t_columns
        assert df.equals(t_df)

Ejemplo n.º 6

Mostrar archivo

Archivo: featuring.py Proyecto: Jwuthri/Aristote

 def __init__(self, model_name):
     self.model_name = model_name
     self.tfidf = Tfidf()
     self.embedding = MUSE()