Python Loader Exemples, utils.utils.Loader Python Exemples

Exemple #1

0

Afficher le fichier

            if x == -1 or x == 0:
                f.write(str(classes[0]))
            else:
                f.write(str(classes[1]))
            f.write("\n")
    return res


"""      
X_train,Y_train = Loader.load_pres(fname)
X_test, _ = Loader.load_pres(tname)

result = predict(X_train, Y_train, X_test, save = "auteurs.txt", classes = ["M","C"], post_processing=True)

fig,ax = plt.subplots(figsize=(35,100)) 
ax.imshow(result.reshape(54,-1),interpolation="nearest")
"""
# plt.tight_layout()

X_train, Y_train = Loader.load_movies(fname_2)
X_test = Loader.load_movies_test(tname_2)

result_sent = predict(X_train,
                      Y_train,
                      X_test,
                      params=params_sentiments,
                      save="sentiments.txt",
                      classes=["-1", "1"],
                      post_processing=False,
                      equilibrage=False)

Exemple #2

0

Afficher le fichier

Fichier : test_solution.py Projet : LieceC/Projet-Rital-Luc-Dao

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn import linear_model as lin
from sklearn import svm
import sklearn.naive_bayes as nb

from wordcloud import WordCloud
from nltk.corpus import stopwords

import matplotlib.pyplot as plt
from time import time
import spacy
import numpy as np
import pickle
fname = "Data/AFDpresidentutf8/corpus.tache1.learn.utf8"
alltxts, alllabs = Loader.load_pres(fname)

params = {
    "lowercase": [False, True],
    "punct": [False, True],
    "marker": [False, True],
    "number": [False, True],
    "stemming": [False, Preprocessing.stem],
    "ligne": [None, -2, 0],
    "strip_accents": [False, True],
    "stopwords": [None, stop],  # set(STOPWORDS)],
    "Vectorizer": [CountVectorizer, TfidfVectorizer],
    "binary": [True, False],
    "class_weight": ["balanced", None],
    "max_features": [None, 10000, 7000],
    "ngram_range": [(1, 1), (1, 2)],

Exemple #3

0

Afficher le fichier

Fichier : vocabulary_extraction.py Projet : MrJuin/Projet-Rital-Luc-Dao

# -*- coding: utf-8 -*-
from utils.utils import Loader
from utils.preprocessing import Preprocessing
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from utils.oddsRatio import OddsRatioCloud
from time import time
import spacy
from nltk.corpus import stopwords

fname = "Data/AFDpresidentutf8/corpus.tache1.learn.utf8"
train_x,train_y = Loader.load_pres(fname)

stop = list(stopwords.words('french')) # + ['cet', 'cette', 'là']
params = {
    "lowercase":False,
    "punct":False,
    "marker":False,
    "number":False,
    "stemming": Preprocessing.lem, # Preprocessing.stem,
    "ligne": None,
    "strip_accents":False,
    "stopwords": stop # set(stop)
}
f = lambda x: Preprocessing.preprocessing(x,params)
#%%


vectorizer = CountVectorizer(preprocessor = f,lowercase=False,token_pattern = Preprocessing.token_pattern)

Exemple #4

0

Afficher le fichier

from utils.utils import Loader

fname = "Data/AFDpresidentutf8/corpus.tache1.learn.utf8"
alltxts,alllabs = Loader.load_pres(fname)


fname = "Data/AFDpresidentutf8/corpus.tache1.test.utf8"
alltxts_test,alllabs_test = Loader.load_pres(fname)

'''
print(len(alltxts),len(alllabs))
print(alltxts[0])
print(alllabs[0])
print(alltxts[-1])
print(alllabs[-1])

path = "Data/AFDmovies/movies1000/"
alltxts,alllabs = Loader.load_movies(path)
'''

Exemple #5

0

Afficher le fichier

Fichier : campaign.py Projet : MrJuin/Projet-Rital-Luc-Dao

from sklearn.linear_model import LogisticRegression

stop = list(stopwords.words('english'))
stop = list(
    set(stop) - {
        "no", "not", "nor"
        'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't",
        'doesn', "doesn't", 'hadn', "hadn't", 'hasn', "hasn't", 'haven',
        "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn',
        "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn',
        "shouldn't", 'wasn', "wasn't", 'weren', "weren't", 'won', "won't",
        'wouldn', "wouldn't", 'don', "don't", 'should', "should've"
    })

fname = "Data/AFDmovies/movies1000/"
alltxts, alllabs = Loader.load_movies(fname)
alltxts = np.array(alltxts)
alllabs = np.array(alllabs)

params = {
    # lowercase":[False,True],
    "punct": [False, True],
    # "marker":[False,True],
    # "number":[False,True],
    "stemming": [False, Preprocessing.stem_eng],  #,Preprocessing.stem],
    "ligne": [None, -2, 0],
    # "strip_accents":[False,True], #
    "stopwords": [None, stop],  # set(STOPWORDS)],
    "Vectorizer": [CountVectorizer, TfidfVectorizer],
    # "binary": [False,True],
    # "class_weight": [[0.1,1]],# ["balanced"],

Exemple #6

0

Afficher le fichier

# -*- coding: utf-8 -*-
from utils.utils import Loader
from utils.preprocessing import Preprocessing
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from utils.oddsRatio import OddsRatioCloud
from time import time
from nltk.corpus import stopwords
from utils.scoring import get_vectorizer

fname = "Data/AFDmovies/movies1000/"
train_x, train_y = Loader.load_movies(fname)

stop = list(stopwords.words('english'))
stop = list(
    set(stop) - {
        "no", "not", "nor"
        'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't",
        'doesn', "doesn't", 'hadn', "hadn't", 'hasn', "hasn't", 'haven',
        "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn',
        "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn',
        "shouldn't", 'wasn', "wasn't", 'weren', "weren't", 'won', "won't",
        'wouldn', "wouldn't", 'don', "don't", 'should', "should've"
    })

params = {
    "lowercase": False,
    "punct": False,
    # "marker":False,