예제 #1
0
 def __init__(self, inFileName, outFileName):
     self.fileName = inFileName
     self.outFileName = outFileName
     self.resultDict = {}
     self.conjugator = mlconjug3.Conjugator(language='it')
     self.readFile()
     self.outputToFile()
예제 #2
0
import mlconjug3

default_conjugator = mlconjug3.Conjugator("es")
conjug_info = default_conjugator.conjugate("correr").conjug_info
gerund = conjug_info["Gerundio"]
print(gerund)
예제 #3
0
from allennlp.predictors.predictor import Predictor
import allennlp_models.classification

from core.settings import settings

logger = logging.Logger(__name__)

allennlp_sentiment_model = "https://storage.googleapis.com/allennlp-public-models/sst-roberta-large-2020.06.08.tar.gz"

nltk.download('punkt')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

spacy_tokenizer = spacy.load('en_core_web_md')
lemmatizer = WordNetLemmatizer()
default_conjugator = mlconjug3.Conjugator(language='en')

# # Download sentiment analysis model
# try:
#     logger.info(f"Downloading  sentiment analysis model from: {allennlp_sentiment_model}")
#     sentiment_analyser = Predictor.from_path(allennlp_sentiment_model)
# except Exception as es:
#     sentiment_analyser = None
#     print(es)


def load_mask_predictor(model_name='roberta-large'):
    logger.info(
        f"Downloading  roBERTa model from huggingface for Masked Text Prediction "
    )
    model = RobertaForMaskedLM.from_pretrained(model_name)
    # Feature reduction
    feature_reductor = mlconjug3.SelectFromModel(mlconjug3.LinearSVC(penalty="l1",
                                                                     dual=False,
                                                                     verbose=0))

    # Prediction Classifier
    classifier = mlconjug3.SGDClassifier(verbose=0)

    # Initialize Data Set
    dataset = mlconjug3.DataSet(mlconjug3.Verbiste(language=lang).verbs)
    dataset.split_data(proportion=0.95)

    # Initialize Conjugator
    model = mlconjug3.Model(vectorizer, feature_reductor, classifier)
    model.pipeline.set_params(**best_params)
    conjugator = mlconjug3.Conjugator(lang, model)

    # Training and prediction
    print('training {0} model on train set'.format(lang))
    t0 = time()
    conjugator.model.train(dataset.train_input, dataset.train_labels)
    duration = round(time() - t0, 3)
    print('{0} model trained on train set in {1} seconds.'.format(lang, duration))
    predicted = conjugator.model.predict(dataset.test_input)
    predicted2 = conjugator.model.predict(dataset.verbs_list)

    print('training {0} model on full data set'.format(lang))
    t0 = time()
    conjugator.model.train(dataset.verbs_list, dataset.templates_list)
    duration2 = round(time() - t0, 3)
    print('{0} model trained on full data set in {1} seconds.'.format(lang, duration2))
예제 #5
0
파일: rule.py 프로젝트: ai-systems/poly-nlp
#!/usr/bin/env python

# -*- coding: UTF-8 -*-
import string
from copy import deepcopy

import mlconjug3

default_conjugator = mlconjug3.Conjugator(language="en")


alpha = string.ascii_uppercase
alpha_lower = string.ascii_lowercase

translator = str.maketrans("", "", string.punctuation)

PREP_DICT = {"where": "in", "when": "in", "how": "by", "why": "because"}
AUX_DO = ["do", "does", "did"]
VERB_DO = ["do", "does", "did", "done", "doing"]
SUBJ_WH = ["what", "who", "which", "whom", "whose"]
AUX_BE = ["is", "are", "was", "were", "been", "being", "be"]
AUX_HAVE = ["has", "have", "had"]
PREPS = ["TO", "IN", "RP"]
AUX = AUX_BE + AUX_DO + AUX_HAVE
TIME_WORDS = ["year", "month", "day", "hour", "decade", "century", "millenium"]
DETS = ["the", "a", "an"]

# with open("preps.txt", "r") as f:
#     common_preps = f.read().splitlines()

common_preps = [
예제 #6
0
 def __init__(self, ignore_vosotros=False):
     super().__init__(language='es')
     self.ignore_vosotros = ignore_vosotros
     self.conjugator = mlconjug3.Conjugator(language='es')