def train_classifier_model(self, raw_data, pretrained_model, directory): self.logger.log_info("Prepare training and testing data") df_train, df_val, df_test = self._prepare_train_data(raw_data) tokenizer = ftext.Tokenizer(lang='xx') data_lm = ftext.TextLMDataBunch.from_df('', tokenizer=tokenizer, bs=16, train_df=df_train, valid_df=df_val, text_cols=0, label_delim=' ') self.logger.log_info("Start training language model") language_model_learner = self._create_language_model_learner( data_lm, pretrained_model) encoder_name = self._train_language_model(language_model_learner, directory) self.logger.log_info("Start training classifier") classifier_learner = self._create_classifier_learner( data_lm, df_train, df_val, tokenizer, encoder_name) self._train_classifier(classifier_learner) self.logger.log_info("Export model") output_path = os.path.join(directory, "model") classifier_learner.export(output_path) learner_new = ftext.load_learner(output_path) label_precision = self._evaluate_model(df_test, learner_new) return label_precision, output_path
def classify(qstr: Query): k = qstr.query k = clean_text(k) learn = load_learner(".") res = learn.predict(k) r = str(res) print(r) p = r.split(", tensor")[2] i = r.split(", tensor")[1] i = re.findall(r"\w+", i) result = [int(x) for x in i] print(result) k = re.findall(r"[-+]?\d*\.\d+|\d+", p) score = [float(x) for x in k] label_cols = [ "toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate", ] d = {} l = [] for i in range(0, len(label_cols)): if result[i] == 1: l.append(label_cols[i]) d[label_cols[i]] = score[i] final = {"result": r, "list": score, "dict": d} return final
def predict(texto: str, temas: list, model_filename: str, temas_sub: list = None): data_path = Config.data_path() name = f'ptwiki/models/tmp/' path_t = data_path / name path_t.mkdir(exist_ok=True, parents=True) torch.device('cpu') model_path = 'modelos' shutil.copy(model_path + '/spm.model', path_t) model = load_learner(path=model_path, file=model_filename) _fix_sp_processor(learner=model, sp_path=Path(model_path), sp_model="spm.model", sp_vocab="spm.vocab") try: preds = np.around(np.array(model.predict(texto)[2]), 3) preds = [float(p) for p in preds] if temas_sub is not None: return {"temas": temas, "p": preds, "temas_sub": temas_sub} else: return {"temas": temas, "p": preds} except Exception as e: return {"erro": str(e)}
def __init__(self, config): # download the model bucket, key = re.match("s3://(.+?)/(.+)", config["model"]).groups() s3 = boto3.client("s3", config=Config(signature_version=UNSIGNED)) os.mkdir("/tmp/model") s3.download_file(bucket, key, "/tmp/model/export.pkl") self.predictor = load_learner("/tmp/model")
def __init__(self): dataset_size = "large" print(f"Processing {dataset_size} dataset.", flush=True) path = f"datasets/{dataset_size}" print("Loading the model", flush=True) self._learn = load_learner(path=path) print("Done loading the model", flush=True)
def from_export(cls, export_path:str, export_name:str = 'export.pkl'): """Creates a `FastaiTextClassifier` from an export pickle dump. Attributes: export_path: Path for the Learner's cache folder. export_name: Export file name (.pkl). """ learn = load_learner(Path(export_path), fname=export_name) return cls(learn)
def _load_model(self) -> None: """ Loads model from file. """ model_path = self.path.parent model_file = self.path.name self.learner = load_learner(model_path, model_file) self._fix_sp_processor(model_path, 'spm.model', 'spm.vocab') self.learner.predict("") # force loading of SentencePiece files
def predict(self, data): path = Path(self.path_class) learn_classifier = load_learner(path=path.parent, file=path.name) learn_classifier.model.eval() preds = [] # TODO: for sure Fastai has a more efficient way to predict than this # something like: # with concurrent.futures.ProcessPoolExecutor() as executor: # predictions = [i for i in executor.map(predict, x)] for text in tqdm(data): preds.append(learn_classifier.predict(text)[2].numpy()) # TODO: return the soft prediction? return np.argmax(preds, axis=1)
def get(self): learner = load_learner('language_model_learner') text = "My favorite part was when" # text = "The best scene was " n_words = 50 n_sentences = 6 temp = .75 sep_string = ' ============================================================================XXXXX>>>>>>>>>' output = sep_string.join( learner.predict(text, n_words, temperature=temp) for i in range(n_sentences)) return output
def __init__(self): with open("./tweets_analysis/twitter_credentials.json", "r") as file: creds = json.load(file) self.tweets = Twython(creds['CONSUMER_KEY'], creds['CONSUMER_SECRET'], creds['ACCESS_TOKEN'], creds['ACCESS_SECRET']) self.senti_model1 = torch.load( './tweets_analysis/model_data/model1.pth', map_location=device) self.word_to_idx = pickle.load( open('./tweets_analysis/model_data/word_to_idx.pkl', 'rb')) self.senti_model2 = load_learner('./tweets_analysis/', 'model_data/model2.pkl') self.idx_to_label = {0: 'negative', 1: 'neutral', 2: 'positive'}
def get(self): learner_clf = load_learner('language_classifier_learner') parser = reqparse.RequestParser() parser.add_argument('review') request_body = parser.parse_args() try: if isinstance(request_body, str): request_body = json.loads(request_body) text = request_body['review'] except: text = "what a wonderful film" output = learner_clf.predict(text) return str(output)
def main(): setup_mlflow() # Get dataset from Elasticsearch to_predict_par_df = get_for_predict_dataframe() #ROOT_PATH = r"C:\Users\Test Machine\Documents\ADB-CognitiveSearch-ML\pipeline\functions\models" ROOT_PATH = "./models" # Load saved model file #learn = load_learner(Path(ROOT_PATH), "lesson_classif-04-05-2020_11-05-30_PM.pkl") lesson_learner = load_learner(Path(ROOT_PATH), args.model_filename) forecasts = [] actual = to_predict_pardf.isLesson.values for p in to_predict_par_df.paragraph: ##print(learn_classif.predict(p)) forecasts.append(try_int(lesson_learner.predict(p)[0])) ##plot_confusion_matrix(actual, forecasts) # Get sentences credentials = get_credentials(args.credentials) df2 = ef.getSentences(credentials) # Update isLessons in sentences to_predict_par_df2 = to_predict_par_df to_predict_par_df2.isLesson = forecasts to_predict_par_df2.isLesson = to_predict_par_df2.isLsson.replace( int(1), True).replace(int(0), False) df2.isLesson, df2.paragraph = to_predict_par_df2.isLesson, to_predict_par_df2.paragraph ##ef.updateSentences(credentials, df2) print(df2.head())
def get(self): learner = load_learner('language_model_learner') temp = .75 n_sentences = 1 parser = reqparse.RequestParser() parser.add_argument('start_txt') parser.add_argument('n_words') request_body = parser.parse_args() try: if isinstance(request_body, str): request_body = json.loads(request_body) text = request_body['start_txt'] n_words = int(request_body['n_words']) except: text = "the plot" n_words = 35 output = ("\n".join( learner.predict(text, n_words, temperature=temp) for _ in range(n_sentences))) return output
required=True, description="input text", help="Cannot be blank.", example="5 mã trắng cửa bán rồi thì FLC tí nữa thôi là lại tím lịm" ) }) #load model # @np_func def f1(inp, targ): return f1_score(targ, np.argmax(inp, axis=-1)) model_dir = os.getcwd() learn = load_learner(model_dir, file='stock_sentiment_model.pkl') learn.to_fp32() # using with cpu # @name_space.route("/") class MainClass(Resource): @app.doc(responses={ 200: 'OK', 400: 'Invalid Argument', 500: 'Mapping Key Error' }) @app.expect(body_require) def post(self): text = request.json['text'] predicted_value = learn.predict(text.lower())[2][1].item()
def __init__(self, model_path): self.model = load_learner(model_path) self.preprocessor = TextPreprocessor()
if __name__ == "__main__": main_path = sys.argv[1] fasttext_path = sys.argv[2] dataset_path = sys.argv[3] out_folder = sys.argv[4] dl = E2ENLGDataLoader(dataset_path, "trainset.csv", "devset.csv", percentile=100) dl.setDataAndMaxSize(bs=32) data = dl.data # seq2seq model learn = load_learner(os.path.join(main_path, "models")) #dl.load_data(os.path.join(main_path,"models")) learn.data = dl.data predictor = MrPredictor(os.path.join(main_path, "models", "classifier"), dataset_path, "trainset.csv", "devset.csv", "testset_w_refs.csv") predict_utils = PredictUtils(learn) reranker = Reranker(predictor, predict_utils, k=20, p=0.2) rxs, rys, rzs, xs, ys, zs = predict_utils.preds_acts( ds_type=DatasetType.Valid) #reranker.write_inputs_candidates("candidates.csv",xs, ys, zs, rxs, rys, rzs)
import logging from fastai.text import load_learner from flask import Flask from flask import jsonify from flask import request # load model learn = load_learner(".", "20191001.reducelabels.pkl") # load web app app = Flask(__name__) if __name__ != "__main__": gunicorn_logger = logging.getLogger("gunicorn.error") app.logger.handlers = gunicorn_logger.handlers app.logger.setLevel(gunicorn_logger.level) @app.route("/healthz") def healthz(): return "." @app.route('/predict/', methods=['POST']) def predict(): data = request.get_json() summary = data.get("summary", "") description = data.get("description", "xyznodescriptionzyx") text = " ".join([summary, description])
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) path = text.Path( ToxicityclassifierConfig.model_folder) self.learner = text.load_learner(path, 'text_toxicity.pkl').to_fp32()
import re, psycopg2, os from fastai.text import load_learner from pathlib import Path from dotenv import load_dotenv load_dotenv() path = Path(__file__).parent learn = load_learner("./", path / 'models/deep_poet') URI = os.getenv("URI") def generate_poem(init, length, temp): return learn.predict(init, n_words=length, temperature=temp) def getMax(poem): poem = re.sub('\n', "", poem) list = poem.split(" ") occ = {} max = 0 for word in list: if len(word) > 3: occ[word] = occ.get(word, 0) + 1 if occ[word] > max: max = occ[word] return max def savePoem(poem, init, length, temp, counter, repetition_tolerance): connection = psycopg2.connect(URI) #.env uri cursor = connection.cursor()
path = Path(__file__).parent async def download_file(url, dest): if dest.exists(): return async with aiohttp.ClientSession() as session: async with session.get(url) as response: data = await response.read() with open(dest, 'wb') as f: f.write(data) download_file(export_file_url, path / export_file_name) learn = ftxt.load_learner(path, file = export_file_name) def generatequote(): quotes = [] while quotes==[]: idea = learn.predict("xxbos", n_words=20, temperature=0.75) ideas = idea.split("xxbos") if ideas[1][-1] == "”": quotes.append(ideas[1]) message = quotes[0][:] message = '“'+message[3:].capitalize() message = message.replace(' . ','.') message = message.replace(' ,',',')
def load(self): self.model = load_learner("/mnt/models") self.ready = True
def init(): global learn print("model initialized" + time.strftime("%H:%M:%S")) model_path = Model.get_model_path(model_name='saps_classification') # model = load(model_path) learn = load_learner(model_path, '')
# Demonstrate how to load a model and predict with it: # load a model from fastai.text import load_learner dataset_size = "large" checkpoint_key = "AWD_LSTM-clean-5_15" print(f"Processing {dataset_size} dataset.", flush=True) path = f"datasets/{dataset_size}" print("Loading the model", flush=True) learn = load_learner(path=path) print("Done loading the model", flush=True) probabilities = learn.predict("Hey, how is it going?") print(f"P(is_bullying)={probabilities[2][1]}")
from fastai.text import load_learner, defaults import torch from flask import Blueprint, jsonify, request api = Blueprint('api', __name__) defaults.device = torch.device('cpu') model = load_learner(path='.', file='export_32.pkl') @api.route('/test') def test(): return jsonify('api test') @api.route('/predict', methods=['POST']) def predict(): input = request.json text = input['text'] cat, ten, score = model.predict(text) return jsonify({ 'label': cat.__str__(), 'score': score[ten].item(), 'model': 'fastai' })
f.write(raw[0] + '\t' + idx_to_label[out] + '\t' + raw[2] + '\n') macro_avg = classification_report(answer, baseline, target_names=label_names, output_dict=True)['macro avg'] print('baseline score: ') print('\tprecision\trecall\t\tf1-score') print('\t%.4f\t\t%.4f\t\t%.4f' % (macro_avg['precision'], macro_avg['recall'], macro_avg['f1-score'])) print() m1 = model('model1') m1.load_model() m2 = load_learner('./', 'model_data/model2.pkl') model1_out = [] model2_out = [] model3_out = [] with multiprocessing.Pool(pool_num) as p: models_out = p.map(predict, test[1]) model1_out = [out for out, _, _ in models_out] model2_out = [out for _, out, _ in models_out] model3_out = [out for _, _, out in models_out] with open('../output/model1_output.txt', 'w') as f: for raw, out in zip(raw_text, model1_out): f.write(raw[0] + '\t' + idx_to_label[out] + '\t' + raw[2] + '\n')
def load_learner(path='models', file='finalv1.model') -> "AI": nlp_model: RNNLearner = load_learner(path=path, file=file) nlp_model.__class__ = AI return nlp_model