def main(args): ds=Dataset('pubmed_rct20k') Statistic(ds.documents_train,ds.labels_train) return 0
def main(args): ds = Dataset('20newsgroups') documents = ds.documents_train labels = ds.labels_train pp = Preprocess(documents=documents, labels=labels, nb_sample=500) documents = pp.documents labels = pp.labels cl = Classification(documents_train=documents, labels_train=labels) cl.list_labels = pp.list_labels cl.process() print(cl.predict(documents[:5])) return 0
def main(args): ds = Dataset('Short_Jokes') model = Model(model_name='gpt2') text_loader = Create_DataLoader_generation(ds.documents_train[:10000], batch_size=32) model.load_type() model.load_tokenizer() model.load_class() model.devices() model.configuration(text_loader) gn = Generation(model) gn.model.fit_generation(text_loader) output = model.predict_generation('What did you expect ?') output_text = decode_text(output, model.tokenizer) print(output_text) return 0
from Manteia.Dataset import Dataset ds = Dataset('Amazon Review Full', test=True, desc=True) print('Train : ') print(ds.documents_train[:5]) print(ds.labels_train[:5]) print('Test : ') print(ds.documents_test[:5]) print(ds.labels_test[:5]) print('Description :') print(ds.description)
from Manteia.Dataset import Dataset ds=Dataset('pubmed_rct20k') print('Train : ') print(ds.documents_train[:5]) print(ds.labels_train[:5])
from Manteia.Dataset import Dataset ds = Dataset('Yelp Review Full', test=True, desc=True) print('Train : ') print(ds.documents_train[:5]) print(ds.labels_train[:5]) print('Test : ') print(ds.documents_test[:5]) print(ds.labels_test[:5]) print('Description :') print(ds.description)
#modifier->parametre du notebook->GPU #import nltk #nltk.download('wordnet') from Manteia.Classification import Classification from Manteia.Model import * from Manteia.Dataset import Dataset from Manteia.Preprocess import list_labels from Manteia.Augmentation import * from sklearn.model_selection import train_test_split,KFold ds=Dataset('drugscom') ds.documents_train=np.array(ds.documents_train[:100]) ds.labels_train=np.array(ds.labels_train[:100]) model = Model(model_name ='bert',early_stopping=True) model.load_type() model.load_tokenizer() list_label=list_labels(ds.labels_train) print(list_label) model.num_labels=len(list_label) model.load_class() model.save('model_init') #validation croisée nb_pass=4 def coss_validation_idx(nb_pass,nb_docs): docs_idx = [idx for idx in range(nb_docs)] train_idx, test_idx = [], [] for pli in range(nb_pass):
from Manteia.Dataset import Dataset ds = Dataset('20newsgroups') print('Train : ') print(ds.documents_train[:5]) print(ds.labels_train[:5])
from Manteia.Dataset import Dataset ds = Dataset('Yelp Review Polarity', test=True, desc=True) print('Train : ') print(ds.documents_train[:5]) print(ds.labels_train[:5]) print(ds.documents_test[:5]) print(ds.labels_test[:5]) print(ds.description)
from Manteia.Dataset import Dataset ds = Dataset('Yahoo! Answers', test=True, desc=True) print('Train : ') print(ds.documents_train[:5]) print(ds.labels_train[:5]) print('Test : ') print(ds.documents_test[:5]) print(ds.labels_test[:5]) print('Description :') print(ds.description) print('List labels :') print(ds.list_labels)
from Manteia.Dataset import Dataset ds=Dataset('drugscom') print('Train : ') print(ds.documents_train[:5]) print(ds.labels_train[:5])
from Manteia.Dataset import Dataset ds = Dataset('Amazon Review Polarity', test=True, desc=True) print('Train : ') print(ds.documents_train[:5]) print(ds.labels_train[:5]) print(ds.documents_test[:5]) print(ds.labels_test[:5]) print(ds.description)
from Manteia.Dataset import Dataset ds=Dataset('agnews') print('Train : ') print(ds.documents_train[:5]) print(ds.labels_train[:5])
from Manteia.Dataset import Dataset ds=Dataset('DBPedia',path='test/test/',test=True,desc=True,classe=True) print('Train : ') print(ds.documents_train[:5]) print(ds.labels_train[:5]) print('Test : ') print(ds.documents_test[:5]) print(ds.labels_test[:5]) print('Description :') print(ds.description) print('List labels :') print(ds.list_labels)
from Manteia.Dataset import Dataset ds = Dataset('SST-5', dev=True) print('Dev : ') print(ds.documents_dev[:5]) print(ds.labels_dev[:5])
from Manteia.Dataset import Dataset ds = Dataset('Tweeter Airline Sentiment') print('Train : ') print(ds.documents_train[:5]) print(ds.labels_train[:5])
from Manteia.Dataset import Dataset ds = Dataset('Short_Jokes') print('Train : ') print(ds.documents_train[:5])
from Manteia.Dataset import Dataset ds = Dataset('SST-2') print('Train : ') print(ds.documents_train[:5]) print(ds.labels_train[:5])
from Manteia.Dataset import Dataset ds = Dataset('trec') print('Train : ') print(ds.documents_train[:5]) print(ds.labels_train[:5])