Esempio n. 1
0
    def train_classifier_model(self, raw_data, pretrained_model, directory):
        self.logger.log_info("Prepare training and testing data")
        df_train, df_val, df_test = self._prepare_train_data(raw_data)
        tokenizer = ftext.Tokenizer(lang='xx')
        data_lm = ftext.TextLMDataBunch.from_df('',
                                                tokenizer=tokenizer,
                                                bs=16,
                                                train_df=df_train,
                                                valid_df=df_val,
                                                text_cols=0,
                                                label_delim=' ')

        self.logger.log_info("Start training language model")
        language_model_learner = self._create_language_model_learner(
            data_lm, pretrained_model)
        encoder_name = self._train_language_model(language_model_learner,
                                                  directory)

        self.logger.log_info("Start training classifier")
        classifier_learner = self._create_classifier_learner(
            data_lm, df_train, df_val, tokenizer, encoder_name)
        self._train_classifier(classifier_learner)

        self.logger.log_info("Export model")
        output_path = os.path.join(directory, "model")
        classifier_learner.export(output_path)
        learner_new = ftext.load_learner(output_path)
        label_precision = self._evaluate_model(df_test, learner_new)
        return label_precision, output_path
Esempio n. 2
0
def classify(qstr: Query):
    k = qstr.query
    k = clean_text(k)

    learn = load_learner(".")
    res = learn.predict(k)
    r = str(res)
    print(r)
    p = r.split(", tensor")[2]
    i = r.split(", tensor")[1]
    i = re.findall(r"\w+", i)
    result = [int(x) for x in i]
    print(result)
    k = re.findall(r"[-+]?\d*\.\d+|\d+", p)

    score = [float(x) for x in k]
    label_cols = [
        "toxic",
        "severe_toxic",
        "obscene",
        "threat",
        "insult",
        "identity_hate",
    ]
    d = {}
    l = []

    for i in range(0, len(label_cols)):
        if result[i] == 1:
            l.append(label_cols[i])
            d[label_cols[i]] = score[i]

    final = {"result": r, "list": score, "dict": d}
    return final
Esempio n. 3
0
def predict(texto: str,
            temas: list,
            model_filename: str,
            temas_sub: list = None):

    data_path = Config.data_path()
    name = f'ptwiki/models/tmp/'
    path_t = data_path / name
    path_t.mkdir(exist_ok=True, parents=True)

    torch.device('cpu')

    model_path = 'modelos'
    shutil.copy(model_path + '/spm.model', path_t)

    model = load_learner(path=model_path, file=model_filename)
    _fix_sp_processor(learner=model,
                      sp_path=Path(model_path),
                      sp_model="spm.model",
                      sp_vocab="spm.vocab")

    try:
        preds = np.around(np.array(model.predict(texto)[2]), 3)
        preds = [float(p) for p in preds]
        if temas_sub is not None:
            return {"temas": temas, "p": preds, "temas_sub": temas_sub}
        else:
            return {"temas": temas, "p": preds}
    except Exception as e:
        return {"erro": str(e)}
Esempio n. 4
0
    def __init__(self, config):
        # download the model
        bucket, key = re.match("s3://(.+?)/(.+)", config["model"]).groups()
        s3 = boto3.client("s3", config=Config(signature_version=UNSIGNED))
        os.mkdir("/tmp/model")
        s3.download_file(bucket, key, "/tmp/model/export.pkl")

        self.predictor = load_learner("/tmp/model")
Esempio n. 5
0
    def __init__(self):
        dataset_size = "large"
        print(f"Processing {dataset_size} dataset.", flush=True)
        path = f"datasets/{dataset_size}"

        print("Loading the model", flush=True)
        self._learn = load_learner(path=path)
        print("Done loading the model", flush=True)
 def from_export(cls, export_path:str, export_name:str = 'export.pkl'):
     """Creates a `FastaiTextClassifier` from an export pickle dump.
     
     Attributes:
         export_path: Path for the Learner's cache folder.
         export_name: Export file name (.pkl).
     """
     learn = load_learner(Path(export_path), fname=export_name)
     return cls(learn)
Esempio n. 7
0
 def _load_model(self) -> None:
     """
     Loads model from file.
     """
     model_path = self.path.parent
     model_file = self.path.name
     self.learner = load_learner(model_path, model_file)
     self._fix_sp_processor(model_path, 'spm.model', 'spm.vocab')
     self.learner.predict("")  # force loading of SentencePiece files
Esempio n. 8
0
 def predict(self, data):
     path = Path(self.path_class)
     learn_classifier = load_learner(path=path.parent, file=path.name)
     learn_classifier.model.eval()
     preds = []
     # TODO: for sure Fastai has a more efficient way to predict than this
     #  something like:
     #  with concurrent.futures.ProcessPoolExecutor() as executor:
     #    predictions = [i for i in executor.map(predict, x)]
     for text in tqdm(data):
         preds.append(learn_classifier.predict(text)[2].numpy())
     # TODO: return the soft prediction?
     return np.argmax(preds, axis=1)
Esempio n. 9
0
    def get(self):
        learner = load_learner('language_model_learner')

        text = "My favorite part was when"
        # text = "The best scene was  "
        n_words = 50
        n_sentences = 6
        temp = .75
        sep_string = ' ============================================================================XXXXX>>>>>>>>>'
        output = sep_string.join(
            learner.predict(text, n_words, temperature=temp)
            for i in range(n_sentences))
        return output
Esempio n. 10
0
    def __init__(self):
        with open("./tweets_analysis/twitter_credentials.json", "r") as file:
            creds = json.load(file)

        self.tweets = Twython(creds['CONSUMER_KEY'], creds['CONSUMER_SECRET'],
                              creds['ACCESS_TOKEN'], creds['ACCESS_SECRET'])

        self.senti_model1 = torch.load(
            './tweets_analysis/model_data/model1.pth', map_location=device)
        self.word_to_idx = pickle.load(
            open('./tweets_analysis/model_data/word_to_idx.pkl', 'rb'))
        self.senti_model2 = load_learner('./tweets_analysis/',
                                         'model_data/model2.pkl')
        self.idx_to_label = {0: 'negative', 1: 'neutral', 2: 'positive'}
Esempio n. 11
0
    def get(self):

        learner_clf = load_learner('language_classifier_learner')

        parser = reqparse.RequestParser()
        parser.add_argument('review')
        request_body = parser.parse_args()
        try:
            if isinstance(request_body, str):
                request_body = json.loads(request_body)
            text = request_body['review']
        except:
            text = "what a wonderful film"

        output = learner_clf.predict(text)
        return str(output)
Esempio n. 12
0
def main():

    setup_mlflow()

    # Get dataset from Elasticsearch
    to_predict_par_df = get_for_predict_dataframe()

    #ROOT_PATH = r"C:\Users\Test Machine\Documents\ADB-CognitiveSearch-ML\pipeline\functions\models"
    ROOT_PATH = "./models"

    # Load saved model file
    #learn = load_learner(Path(ROOT_PATH), "lesson_classif-04-05-2020_11-05-30_PM.pkl")
    lesson_learner = load_learner(Path(ROOT_PATH), args.model_filename)

    forecasts = []
    actual = to_predict_pardf.isLesson.values

    for p in to_predict_par_df.paragraph:
        ##print(learn_classif.predict(p))
        forecasts.append(try_int(lesson_learner.predict(p)[0]))
    ##plot_confusion_matrix(actual, forecasts)

    # Get sentences
    credentials = get_credentials(args.credentials)
    df2 = ef.getSentences(credentials)

    # Update isLessons in sentences
    to_predict_par_df2 = to_predict_par_df
    to_predict_par_df2.isLesson = forecasts

    to_predict_par_df2.isLesson = to_predict_par_df2.isLsson.replace(
        int(1), True).replace(int(0), False)

    df2.isLesson, df2.paragraph = to_predict_par_df2.isLesson, to_predict_par_df2.paragraph

    ##ef.updateSentences(credentials, df2)
    print(df2.head())
Esempio n. 13
0
    def get(self):

        learner = load_learner('language_model_learner')

        temp = .75
        n_sentences = 1

        parser = reqparse.RequestParser()
        parser.add_argument('start_txt')
        parser.add_argument('n_words')
        request_body = parser.parse_args()
        try:
            if isinstance(request_body, str):
                request_body = json.loads(request_body)
            text = request_body['start_txt']
            n_words = int(request_body['n_words'])
        except:
            text = "the plot"
            n_words = 35

        output = ("\n".join(
            learner.predict(text, n_words, temperature=temp)
            for _ in range(n_sentences)))
        return output
Esempio n. 14
0
            required=True,
            description="input text",
            help="Cannot be blank.",
            example="5 mã trắng cửa bán rồi thì FLC tí nữa thôi là lại tím lịm"
        )
    })


#load model
# @np_func
def f1(inp, targ):
    return f1_score(targ, np.argmax(inp, axis=-1))


model_dir = os.getcwd()
learn = load_learner(model_dir, file='stock_sentiment_model.pkl')
learn.to_fp32()  # using with cpu


#
@name_space.route("/")
class MainClass(Resource):
    @app.doc(responses={
        200: 'OK',
        400: 'Invalid Argument',
        500: 'Mapping Key Error'
    })
    @app.expect(body_require)
    def post(self):
        text = request.json['text']
        predicted_value = learn.predict(text.lower())[2][1].item()
Esempio n. 15
0
 def __init__(self, model_path):
     self.model = load_learner(model_path)
     self.preprocessor = TextPreprocessor()
Esempio n. 16
0
if __name__ == "__main__":
    main_path = sys.argv[1]
    fasttext_path = sys.argv[2]
    dataset_path = sys.argv[3]
    out_folder = sys.argv[4]

    dl = E2ENLGDataLoader(dataset_path,
                          "trainset.csv",
                          "devset.csv",
                          percentile=100)
    dl.setDataAndMaxSize(bs=32)
    data = dl.data

    # seq2seq model
    learn = load_learner(os.path.join(main_path, "models"))
    #dl.load_data(os.path.join(main_path,"models"))
    learn.data = dl.data

    predictor = MrPredictor(os.path.join(main_path, "models",
                                         "classifier"), dataset_path,
                            "trainset.csv", "devset.csv", "testset_w_refs.csv")
    predict_utils = PredictUtils(learn)

    reranker = Reranker(predictor, predict_utils, k=20, p=0.2)

    rxs, rys, rzs, xs, ys, zs = predict_utils.preds_acts(
        ds_type=DatasetType.Valid)

    #reranker.write_inputs_candidates("candidates.csv",xs, ys, zs, rxs, rys, rzs)
Esempio n. 17
0
import logging

from fastai.text import load_learner
from flask import Flask
from flask import jsonify
from flask import request

# load model
learn = load_learner(".", "20191001.reducelabels.pkl")

# load web app
app = Flask(__name__)

if __name__ != "__main__":
    gunicorn_logger = logging.getLogger("gunicorn.error")
    app.logger.handlers = gunicorn_logger.handlers
    app.logger.setLevel(gunicorn_logger.level)


@app.route("/healthz")
def healthz():
    return "."


@app.route('/predict/', methods=['POST'])
def predict():
    data = request.get_json()
    summary = data.get("summary", "")
    description = data.get("description", "xyznodescriptionzyx")
    text = " ".join([summary, description])
Esempio n. 18
0
 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     path = text.Path(
         ToxicityclassifierConfig.model_folder)
     self.learner = text.load_learner(path, 'text_toxicity.pkl').to_fp32()
Esempio n. 19
0
import re, psycopg2, os
from fastai.text import load_learner
from pathlib import Path
from dotenv import load_dotenv

load_dotenv()

path = Path(__file__).parent
learn = load_learner("./", path / 'models/deep_poet')
URI = os.getenv("URI")


def generate_poem(init, length, temp):
    return learn.predict(init, n_words=length, temperature=temp)


def getMax(poem):
    poem = re.sub('\n', "", poem)
    list = poem.split(" ")
    occ = {}
    max = 0
    for word in list:
        if len(word) > 3:
            occ[word] = occ.get(word, 0) + 1
            if occ[word] > max: max = occ[word]
    return max


def savePoem(poem, init, length, temp, counter, repetition_tolerance):
    connection = psycopg2.connect(URI)  #.env uri
    cursor = connection.cursor()
Esempio n. 20
0
path = Path(__file__).parent 


async def download_file(url, dest):
    if dest.exists(): return
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as response:
            data = await response.read()
            with open(dest, 'wb') as f:
                f.write(data)

download_file(export_file_url, path / export_file_name)


learn = ftxt.load_learner(path, file = export_file_name)



def generatequote():    
    quotes = []
    while quotes==[]:
        idea = learn.predict("xxbos", n_words=20, temperature=0.75)
        ideas = idea.split("xxbos")
        if ideas[1][-1] == "”":
            quotes.append(ideas[1])
            
    message = quotes[0][:]
    message = '“'+message[3:].capitalize()
    message = message.replace(' . ','.')
    message = message.replace(' ,',',')
Esempio n. 21
0
 def load(self):
     self.model = load_learner("/mnt/models")
     self.ready = True
Esempio n. 22
0
def init():
    global learn
    print("model initialized" + time.strftime("%H:%M:%S"))
    model_path = Model.get_model_path(model_name='saps_classification')
    #     model = load(model_path)
    learn = load_learner(model_path, '')
Esempio n. 23
0
# Demonstrate how to load a model and predict with it:

# load a model
from fastai.text import load_learner

dataset_size = "large"
checkpoint_key = "AWD_LSTM-clean-5_15"
print(f"Processing {dataset_size} dataset.", flush=True)
path = f"datasets/{dataset_size}"

print("Loading the model", flush=True)
learn = load_learner(path=path)
print("Done loading the model", flush=True)
probabilities = learn.predict("Hey, how is it going?")

print(f"P(is_bullying)={probabilities[2][1]}")
Esempio n. 24
0
from fastai.text import load_learner, defaults
import torch

from flask import Blueprint, jsonify, request

api = Blueprint('api', __name__)

defaults.device = torch.device('cpu')

model = load_learner(path='.', file='export_32.pkl')


@api.route('/test')
def test():
    return jsonify('api test')


@api.route('/predict', methods=['POST'])
def predict():
    input = request.json
    text = input['text']

    cat, ten, score = model.predict(text)
    return jsonify({
        'label': cat.__str__(),
        'score': score[ten].item(),
        'model': 'fastai'
    })
            f.write(raw[0] + '\t' + idx_to_label[out] + '\t' + raw[2] + '\n')

    macro_avg = classification_report(answer,
                                      baseline,
                                      target_names=label_names,
                                      output_dict=True)['macro avg']

    print('baseline score: ')
    print('\tprecision\trecall\t\tf1-score')
    print('\t%.4f\t\t%.4f\t\t%.4f' %
          (macro_avg['precision'], macro_avg['recall'], macro_avg['f1-score']))
    print()

    m1 = model('model1')
    m1.load_model()
    m2 = load_learner('./', 'model_data/model2.pkl')

    model1_out = []
    model2_out = []
    model3_out = []

    with multiprocessing.Pool(pool_num) as p:
        models_out = p.map(predict, test[1])

    model1_out = [out for out, _, _ in models_out]
    model2_out = [out for _, out, _ in models_out]
    model3_out = [out for _, _, out in models_out]

    with open('../output/model1_output.txt', 'w') as f:
        for raw, out in zip(raw_text, model1_out):
            f.write(raw[0] + '\t' + idx_to_label[out] + '\t' + raw[2] + '\n')
Esempio n. 26
0
 def load_learner(path='models', file='finalv1.model') -> "AI":
     nlp_model: RNNLearner = load_learner(path=path, file=file)
     nlp_model.__class__ = AI
     return nlp_model