def __init__(self, inFileName, outFileName): self.fileName = inFileName self.outFileName = outFileName self.resultDict = {} self.conjugator = mlconjug3.Conjugator(language='it') self.readFile() self.outputToFile()
import mlconjug3 default_conjugator = mlconjug3.Conjugator("es") conjug_info = default_conjugator.conjugate("correr").conjug_info gerund = conjug_info["Gerundio"] print(gerund)
from allennlp.predictors.predictor import Predictor import allennlp_models.classification from core.settings import settings logger = logging.Logger(__name__) allennlp_sentiment_model = "https://storage.googleapis.com/allennlp-public-models/sst-roberta-large-2020.06.08.tar.gz" nltk.download('punkt') nltk.download('wordnet') nltk.download('averaged_perceptron_tagger') spacy_tokenizer = spacy.load('en_core_web_md') lemmatizer = WordNetLemmatizer() default_conjugator = mlconjug3.Conjugator(language='en') # # Download sentiment analysis model # try: # logger.info(f"Downloading sentiment analysis model from: {allennlp_sentiment_model}") # sentiment_analyser = Predictor.from_path(allennlp_sentiment_model) # except Exception as es: # sentiment_analyser = None # print(es) def load_mask_predictor(model_name='roberta-large'): logger.info( f"Downloading roBERTa model from huggingface for Masked Text Prediction " ) model = RobertaForMaskedLM.from_pretrained(model_name)
# Feature reduction feature_reductor = mlconjug3.SelectFromModel(mlconjug3.LinearSVC(penalty="l1", dual=False, verbose=0)) # Prediction Classifier classifier = mlconjug3.SGDClassifier(verbose=0) # Initialize Data Set dataset = mlconjug3.DataSet(mlconjug3.Verbiste(language=lang).verbs) dataset.split_data(proportion=0.95) # Initialize Conjugator model = mlconjug3.Model(vectorizer, feature_reductor, classifier) model.pipeline.set_params(**best_params) conjugator = mlconjug3.Conjugator(lang, model) # Training and prediction print('training {0} model on train set'.format(lang)) t0 = time() conjugator.model.train(dataset.train_input, dataset.train_labels) duration = round(time() - t0, 3) print('{0} model trained on train set in {1} seconds.'.format(lang, duration)) predicted = conjugator.model.predict(dataset.test_input) predicted2 = conjugator.model.predict(dataset.verbs_list) print('training {0} model on full data set'.format(lang)) t0 = time() conjugator.model.train(dataset.verbs_list, dataset.templates_list) duration2 = round(time() - t0, 3) print('{0} model trained on full data set in {1} seconds.'.format(lang, duration2))
#!/usr/bin/env python # -*- coding: UTF-8 -*- import string from copy import deepcopy import mlconjug3 default_conjugator = mlconjug3.Conjugator(language="en") alpha = string.ascii_uppercase alpha_lower = string.ascii_lowercase translator = str.maketrans("", "", string.punctuation) PREP_DICT = {"where": "in", "when": "in", "how": "by", "why": "because"} AUX_DO = ["do", "does", "did"] VERB_DO = ["do", "does", "did", "done", "doing"] SUBJ_WH = ["what", "who", "which", "whom", "whose"] AUX_BE = ["is", "are", "was", "were", "been", "being", "be"] AUX_HAVE = ["has", "have", "had"] PREPS = ["TO", "IN", "RP"] AUX = AUX_BE + AUX_DO + AUX_HAVE TIME_WORDS = ["year", "month", "day", "hour", "decade", "century", "millenium"] DETS = ["the", "a", "an"] # with open("preps.txt", "r") as f: # common_preps = f.read().splitlines() common_preps = [
def __init__(self, ignore_vosotros=False): super().__init__(language='es') self.ignore_vosotros = ignore_vosotros self.conjugator = mlconjug3.Conjugator(language='es')