def test_bigru(self):
        trn, val, preproc = txt.texts_from_array(x_train=self.trn[0], 
                                                 y_train=self.trn[1],
                                                 x_test=self.val[0], 
                                                 y_test=self.val[1],
                                                 class_names=self.classes,
                                                 preprocess_mode='standard',
                                                 maxlen=350, 
                                                 max_features=35000,
                                                 ngram_range=1)
        model = txt.text_classifier('bigru', train_data=trn, preproc=preproc)
        learner = ktrain.get_learner(model, train_data=trn, val_data=val, batch_size=32, eval_batch_size=EVAL_BS)
        lr = 0.01
        hist = learner.autofit(lr, 1)

        # test training results
        self.assertAlmostEqual(max(hist.history['lr']), lr)
        self.assertGreater(max(hist.history[VAL_ACC_NAME]), 0.89)
        self.assertAlmostEqual(max(hist.history['momentum']), 0.95)
        self.assertAlmostEqual(min(hist.history['momentum']), 0.85)


        # test top losses
        obs = learner.top_losses(n=1, val_data=None)
        self.assertIn(obs[0][0], list(range(len(val[0]))))
        learner.view_top_losses(preproc=preproc, n=1, val_data=None)

        # test weight decay
        self.assertEqual(learner.get_weight_decay(), None)
        learner.set_weight_decay(1e-2)
        self.assertAlmostEqual(learner.get_weight_decay(), 1e-2)


        # test load and save model
        learner.save_model('/tmp/test_model')
        learner.load_model('/tmp/test_model')

        # test validate
        cm = learner.validate()
        print(cm)
        for i, row in enumerate(cm):
            self.assertEqual(np.argmax(row), i)

        # test predictor
        p = ktrain.get_predictor(learner.model, preproc, batch_size=EVAL_BS)
        self.assertEqual(p.predict([TEST_DOC])[0], 'soc.religion.christian')
        p.save('/tmp/test_predictor')
        p = ktrain.load_predictor('/tmp/test_predictor', batch_size=EVAL_BS)
        self.assertEqual(p.predict([TEST_DOC])[0], 'soc.religion.christian')
        self.assertEqual(p.predict(TEST_DOC), 'soc.religion.christian')
        self.assertEqual(np.argmax(p.predict_proba([TEST_DOC])[0]), 3)
        self.assertEqual(type(p.explain(TEST_DOC)), IPython.core.display.HTML)
Beispiel #2
0
    def test_fasttext_chinese(self):
        trn, val, preproc = txt.texts_from_csv(
            "./text_data/chinese_hotel_reviews.csv",
            "content",
            label_columns=["pos", "neg"],
            max_features=30000,
            maxlen=75,
            preprocess_mode="standard",
            sep="|",
        )
        model = txt.text_classifier("fasttext",
                                    train_data=trn,
                                    preproc=preproc)
        learner = ktrain.get_learner(model,
                                     train_data=trn,
                                     val_data=val,
                                     batch_size=32)
        lr = 5e-3
        hist = learner.autofit(lr, 10)

        # test training results
        self.assertAlmostEqual(max(hist.history["lr"]), lr)
        self.assertGreater(max(hist.history[VAL_ACC_NAME]), 0.85)

        # test top losses
        obs = learner.top_losses(n=1, val_data=None)
        self.assertIn(obs[0][0], list(range(len(val[0]))))
        learner.view_top_losses(preproc=preproc, n=1, val_data=None)

        # test weight decay
        self.assertEqual(learner.get_weight_decay(), None)
        learner.set_weight_decay(1e-2)
        self.assertAlmostEqual(learner.get_weight_decay(), 1e-2)

        # test load and save model
        learner.save_model("/tmp/test_model")
        learner.load_model("/tmp/test_model")

        # test validate
        cm = learner.validate(class_names=preproc.get_classes())
        print(cm)
        for i, row in enumerate(cm):
            self.assertEqual(np.argmax(row), i)

        # test predictor
        p = ktrain.get_predictor(learner.model, preproc)
        self.assertEqual(p.predict([TEST_DOC])[0], "pos")
        p.save("/tmp/test_predictor")
        p = ktrain.load_predictor("/tmp/test_predictor")
        self.assertEqual(p.predict(TEST_DOC), "pos")
        self.assertEqual(np.argmax(p.predict_proba([TEST_DOC])[0]), 0)
        self.assertEqual(type(p.explain(TEST_DOC)), IPython.core.display.HTML)
Beispiel #3
0
 def loadmodel(self, path=None):
     with self.graph.as_default():
         with self.session.as_default():
             try:
                 if path is not None:
                     # load the model
                     self.model = ktrain.load_predictor(path)
                 logging.info("Bert predictor loaded: ")
                 return True
             except Exception as e:
                 print(e)
                 logging.exception(e)
                 return False
Beispiel #4
0
    def test_bert(self):
        trn, val, preproc = txt.texts_from_array(
            x_train=self.trn[0],
            y_train=self.trn[1],
            x_test=self.val[0],
            y_test=self.val[1],
            class_names=self.classes,
            preprocess_mode="bert",
            maxlen=350,
            max_features=35000,
        )
        model = txt.text_classifier("bert", train_data=trn, preproc=preproc)
        learner = ktrain.get_learner(model,
                                     train_data=trn,
                                     batch_size=6,
                                     eval_batch_size=EVAL_BS)
        lr = 2e-5
        hist = learner.fit_onecycle(lr, 1)

        # test training results
        self.assertAlmostEqual(max(hist.history["lr"]), lr)
        self.assertGreater(max(hist.history[ACC_NAME]), 0.7)

        # test top losses
        obs = learner.top_losses(n=1, val_data=val)
        self.assertIn(obs[0][0], list(range(len(val[0][0]))))
        learner.view_top_losses(preproc=preproc, n=1, val_data=val)

        # test weight decay
        self.assertEqual(learner.get_weight_decay(), None)
        learner.set_weight_decay(1e-2)
        self.assertAlmostEqual(learner.get_weight_decay(), 1e-2)

        # test load and save model
        learner.save_model("/tmp/test_model")
        learner.load_model("/tmp/test_model")

        # test validate
        cm = learner.validate(val_data=val)
        print(cm)
        for i, row in enumerate(cm):
            self.assertEqual(np.argmax(row), i)

        # test predictor
        p = ktrain.get_predictor(learner.model, preproc, batch_size=EVAL_BS)
        self.assertEqual(p.predict([TEST_DOC])[0], "soc.religion.christian")
        p.save("/tmp/test_predictor")
        p = ktrain.load_predictor("/tmp/test_predictor", batch_size=EVAL_BS)
        self.assertEqual(p.predict(TEST_DOC), "soc.religion.christian")
        self.assertEqual(np.argmax(p.predict_proba([TEST_DOC])[0]), 3)
        self.assertEqual(type(p.explain(TEST_DOC)), IPython.core.display.HTML)
    def test_cora(self):

        (trn, val,
         preproc) = gr.graph_links_from_csv('graph_data/cora/cora.content',
                                            'graph_data/cora/cora.cites',
                                            sep='\t')

        learner = ktrain.get_learner(model=gr.graph_link_predictor(
            'graphsage', trn, preproc),
                                     train_data=trn,
                                     val_data=val)

        lr = 0.01
        hist = learner.fit_onecycle(lr, 5)

        # test training results
        self.assertAlmostEqual(max(hist.history['lr']), lr)
        self.assertGreater(max(hist.history[VAL_ACC_NAME]), 0.78)

        # test top losses
        obs = learner.top_losses(n=1, val_data=val)
        self.assertIn(obs[0][0], list(range(val.targets.shape[0])))
        learner.view_top_losses(preproc=preproc, n=1, val_data=val)

        # test weight decay
        self.assertEqual(learner.get_weight_decay(), None)
        learner.set_weight_decay(1e-2)
        self.assertAlmostEqual(learner.get_weight_decay(), 1e-2)

        # test load and save model
        learner.save_model('/tmp/test_model')
        learner.load_model('/tmp/test_model')

        # test validate
        learner.validate(val_data=val)
        cm = learner.validate(val_data=val)
        print(cm)
        for i, row in enumerate(cm):
            self.assertEqual(np.argmax(row), i)

        # test predictor
        p = ktrain.get_predictor(learner.model, preproc)
        self.assertIn(
            p.predict(preproc.G, list(preproc.G.edges()))[:5][0],
            preproc.get_classes())
        p.save('/tmp/test_predictor')
        p = ktrain.load_predictor('/tmp/test_predictor')
        self.assertEqual(
            p.predict(preproc.G, list(preproc.G.edges()))[:5][0],
            preproc.get_classes()[1])
Beispiel #6
0
    def test_fasttext_chinese(self):
        trn, val, preproc = txt.texts_from_csv(
            './text_data/chinese_hotel_reviews.csv',
            'content',
            label_columns=["pos", "neg"],
            max_features=30000,
            maxlen=75,
            preprocess_mode='standard',
            sep='|')
        model = txt.text_classifier('fasttext', train_data=trn)
        learner = ktrain.get_learner(model,
                                     train_data=trn,
                                     val_data=val,
                                     batch_size=32)
        lr = 5e-3
        hist = learner.autofit(lr, 10)

        # test training results
        self.assertAlmostEqual(max(hist.history['lr']), lr)
        self.assertGreater(max(hist.history['val_acc']), 0.85)

        # test top losses
        obs = learner.top_losses(n=1, val_data=None)
        self.assertIn(obs[0][0], list(range(len(val[0]))))
        learner.view_top_losses(preproc=preproc, n=1, val_data=None)

        # test weight decay
        self.assertEqual(len(learner.get_weight_decay()), 2)
        self.assertEqual(learner.get_weight_decay()[0], None)
        learner.set_weight_decay(1e-4)
        self.assertAlmostEqual(learner.get_weight_decay()[0], 1e-4)

        # test load and save model
        learner.save_model('/tmp/test_model')
        learner.load_model('/tmp/test_model')

        # test validate
        cm = learner.validate()
        print(cm)
        for i, row in enumerate(cm):
            self.assertEqual(np.argmax(row), i)

        # test predictor
        p = ktrain.get_predictor(learner.model, preproc)
        self.assertEqual(p.predict([TEST_DOC])[0], 'pos')
        p.save('/tmp/test_predictor')
        p = ktrain.load_predictor('/tmp/test_predictor')
        self.assertEqual(p.predict(TEST_DOC), 'pos')
        self.assertEqual(np.argmax(p.predict_proba([TEST_DOC])[0]), 0)
        self.assertEqual(type(p.explain(TEST_DOC)), IPython.core.display.HTML)
Beispiel #7
0
    def test_classification(self):
        train_df = pd.read_csv("tabular_data/train.csv", index_col=0)
        train_df = train_df.drop("Name", 1)
        train_df = train_df.drop("Ticket", 1)
        trn, val, preproc = tabular.tabular_from_df(train_df,
                                                    label_columns="Survived",
                                                    random_state=42)
        model = tabular.tabular_classifier("mlp", trn)
        learner = ktrain.get_learner(model,
                                     train_data=trn,
                                     val_data=val,
                                     batch_size=32)

        lr = 0.001
        hist = learner.fit_onecycle(lr, 30)

        # test training results
        self.assertAlmostEqual(max(hist.history["lr"]), lr)
        self.assertGreater(max(hist.history[VAL_ACC_NAME]), 0.8)

        # test top losses
        obs = learner.top_losses(n=1, val_data=val)
        self.assertIn(obs[0][0], list(range(val.df.shape[0])))
        learner.view_top_losses(preproc=preproc, n=1, val_data=val)

        # test weight decay
        self.assertEqual(learner.get_weight_decay(), None)
        learner.set_weight_decay(1e-2)
        self.assertAlmostEqual(learner.get_weight_decay(), 1e-2)

        # test load and save model
        learner.save_model("/tmp/test_model")
        learner.load_model("/tmp/test_model")

        # test validate
        cm = learner.evaluate(val)
        print(cm)
        for i, row in enumerate(cm):
            self.assertEqual(np.argmax(row), i)

        # test predictor
        p = ktrain.get_predictor(learner.model, preproc)

        predicted_label = p.predict(train_df)[0]
        self.assertIn(predicted_label, preproc.get_classes())
        p.save("/tmp/test_predictor")
        p = ktrain.load_predictor("/tmp/test_predictor")
        self.assertEqual(p.predict(train_df)[0], predicted_label)
Beispiel #8
0
    def test_transformers_api_2(self):
        MODEL_NAME = 'distilbert-base-uncased'
        preproc = txt.Transformer(MODEL_NAME, maxlen=500, classes=self.classes)
        trn = preproc.preprocess_train(self.trn[0], self.trn[1])
        val = preproc.preprocess_test(self.val[0], self.val[1])
        model = preproc.get_classifier()
        learner = ktrain.get_learner(model,
                                     train_data=trn,
                                     val_data=val,
                                     batch_size=6,
                                     eval_batch_size=EVAL_BS)
        lr = 5e-5
        hist = learner.fit_onecycle(lr, 1)

        # test training results
        self.assertAlmostEqual(max(hist.history['lr']), lr)
        self.assertGreater(max(hist.history[VAL_ACC_NAME]), 0.9)

        # test top losses
        obs = learner.top_losses(n=1, val_data=None)
        self.assertIn(obs[0][0], list(range(len(val.x))))
        learner.view_top_losses(preproc=preproc, n=1, val_data=None)

        # test weight decay
        self.assertEqual(learner.get_weight_decay(), None)
        learner.set_weight_decay(1e-2)
        self.assertAlmostEqual(learner.get_weight_decay(), 1e-2)

        # test load and save model
        tmp_folder = ktrain.imports.tempfile.mkdtemp()
        learner.save_model(tmp_folder)
        learner.load_model(tmp_folder)

        # test validate
        cm = learner.validate()
        print(cm)
        for i, row in enumerate(cm):
            self.assertEqual(np.argmax(row), i)

        # test predictor
        p = ktrain.get_predictor(learner.model, preproc, batch_size=EVAL_BS)
        self.assertEqual(p.predict([TEST_DOC])[0], 'soc.religion.christian')
        tmp_folder = ktrain.imports.tempfile.mkdtemp()
        p.save(tmp_folder)
        p = ktrain.load_predictor(tmp_folder, batch_size=EVAL_BS)
        self.assertEqual(p.predict(TEST_DOC), 'soc.religion.christian')
        self.assertEqual(np.argmax(p.predict_proba([TEST_DOC])[0]), 3)
        self.assertEqual(type(p.explain(TEST_DOC)), IPython.core.display.HTML)
Beispiel #9
0
    def test_regression(self):
        trn, val, preproc = tabular.tabular_from_csv(
            "tabular_data/adults.csv",
            label_columns=["age"],
            is_regression=True,
            random_state=42,
        )
        model = tabular.tabular_regression_model("mlp", trn)
        learner = ktrain.get_learner(model,
                                     train_data=trn,
                                     val_data=val,
                                     batch_size=128)

        lr = 0.001
        hist = learner.autofit(lr, 5)

        # test training results
        self.assertAlmostEqual(max(hist.history["lr"]), lr)
        self.assertLess(min(hist.history["val_mae"]), 8.0)

        # test top losses
        obs = learner.top_losses(n=1, val_data=val)
        self.assertIn(obs[0][0], list(range(val.df.shape[0])))
        learner.view_top_losses(preproc=preproc, n=1, val_data=val)

        # test weight decay
        self.assertEqual(learner.get_weight_decay(), None)
        learner.set_weight_decay(1e-2)
        self.assertAlmostEqual(learner.get_weight_decay(), 1e-2)

        # test load and save model
        learner.save_model("/tmp/test_model")
        learner.load_model("/tmp/test_model")

        # test validate
        cm = learner.evaluate(val)

        # test predictor
        p = ktrain.get_predictor(learner.model, preproc)

        train_df = pd.read_csv("tabular_data/adults.csv")
        age = p.predict(train_df)[0][0]
        self.assertLess(age, 100)
        p.save("/tmp/test_predictor")
        p = ktrain.load_predictor("/tmp/test_predictor")
        self.assertAlmostEqual(p.predict(train_df)[0][0], age)
    def model_pred(self, message):
        print("loading model")
        reloaded_predictor = ktrain.load_predictor(
            'model/distilbert_model_40Epochs')
        print("predicting..")

        result = reloaded_predictor.predict(message)
        results = [result, 'ticket_gen']
        predicts = reloaded_predictor.predict_proba(message)
        print("prediction done")
        # results = self.decode(le,predicts)
        sp = np.max(predicts)
        cs = round(sp, 6) * 100
        print('Confidence Score : ' + str(cs) + "%")

        response = [cs, results]
        return response
    def test_linreg(self):
        trn, val, preproc = txt.texts_from_array(x_train=self.trn[0],
                                                 y_train=self.trn[1],
                                                 x_test=self.val[0],
                                                 y_test=self.val[1],
                                                 preprocess_mode='standard',
                                                 ngram_range=3,
                                                 maxlen=200,
                                                 max_features=35000)
        model = txt.text_regression_model('linreg',
                                          train_data=trn,
                                          preproc=preproc)
        learner = ktrain.get_learner(model,
                                     train_data=trn,
                                     val_data=val,
                                     batch_size=256)
        lr = 0.01
        hist = learner.fit_onecycle(lr, 10)

        # test training results
        self.assertAlmostEqual(max(hist.history['lr']), lr)
        self.assertLess(min(hist.history['val_mae']), 12)

        # test top losses
        obs = learner.top_losses(n=1, val_data=None)
        self.assertIn(obs[0][0], list(range(len(val[0]))))
        learner.view_top_losses(preproc=preproc, n=1, val_data=None)

        # test weight decay
        self.assertEqual(learner.get_weight_decay(), None)
        learner.set_weight_decay(1e-2)
        self.assertAlmostEqual(learner.get_weight_decay(), 1e-2)

        # test load and save model
        learner.save_model('/tmp/test_model')
        learner.load_model('/tmp/test_model')

        # test predictor
        p = ktrain.get_predictor(learner.model, preproc)
        self.assertGreater(p.predict([TEST_DOC])[0], 100)
        p.save('/tmp/test_predictor')
        p = ktrain.load_predictor('/tmp/test_predictor')
        self.assertGreater(p.predict([TEST_DOC])[0], 100)
        self.assertIsNone(p.explain(TEST_DOC))
    def test_distilbert(self):
        trn, val, preproc = txt.texts_from_array(x_train=self.trn[0],
                                                 y_train=self.trn[1],
                                                 x_test=self.val[0],
                                                 y_test=self.val[1],
                                                 preprocess_mode='distilbert',
                                                 maxlen=75)
        model = txt.text_regression_model('distilbert',
                                          train_data=trn,
                                          preproc=preproc)
        learner = ktrain.get_learner(model,
                                     train_data=trn,
                                     val_data=val,
                                     batch_size=100)
        lr = 5e-5
        hist = learner.fit_onecycle(lr, 1)

        # test training results
        self.assertAlmostEqual(max(hist.history['lr']), lr)
        self.assertLess(min(hist.history['val_mae']), 16)

        # test top losses
        obs = learner.top_losses(n=1, val_data=None)
        self.assertIn(obs[0][0], list(range(len(val.x))))
        learner.view_top_losses(preproc=preproc, n=1, val_data=None)

        # test weight decay
        self.assertEqual(learner.get_weight_decay(), None)
        learner.set_weight_decay(1e-2)
        self.assertAlmostEqual(learner.get_weight_decay(), 1e-2)

        # test load and save model
        tmp_folder = ktrain.imports.tempfile.mkdtemp()
        learner.save_model(tmp_folder)
        learner.load_model(tmp_folder, preproc=preproc)

        # test predictor
        p = ktrain.get_predictor(learner.model, preproc, batch_size=64)
        self.assertGreater(p.predict([TEST_DOC])[0], 1)
        tmp_folder = ktrain.imports.tempfile.mkdtemp()
        p.save(tmp_folder)
        p = ktrain.load_predictor(tmp_folder, batch_size=64)
        self.assertGreater(p.predict([TEST_DOC])[0], 1)
        self.assertIsNone(p.explain(TEST_DOC))
Beispiel #13
0
    def test_ner(self):
        model = txt.sequence_tagger('bilstm-bert',
                                    self.preproc,
                                    bert_model='bert-base-cased')
        learner = ktrain.get_learner(model,
                                     train_data=self.trn,
                                     val_data=self.val,
                                     batch_size=128)
        lr = 0.01
        hist = learner.fit(lr, 1)

        # test training results
        #self.assertAlmostEqual(max(hist.history['lr']), lr)
        self.assertGreater(learner.validate(), 0.79)

        # test top losses
        obs = learner.top_losses(n=1)
        self.assertIn(obs[0][0], list(range(len(self.val.x))))
        learner.view_top_losses(n=1)

        # test weight decay
        self.assertEqual(learner.get_weight_decay(), None)
        learner.set_weight_decay(1e-2)
        self.assertAlmostEqual(learner.get_weight_decay(), 1e-2)

        # test load and save model
        learner.save_model('/tmp/test_model')
        learner.load_model('/tmp/test_model')

        # test predictor
        SENT = 'There is a man named John Smith.'
        p = ktrain.get_predictor(learner.model, self.preproc)
        self.assertEqual(p.predict(SENT)[-2][1], 'I-PER')
        p.save('/tmp/test_predictor')
        p = ktrain.load_predictor('/tmp/test_predictor')
        self.assertEqual(p.predict(SENT)[-2][1], 'I-PER')
        merged_prediction = p.predict(SENT,
                                      merge_tokens=True,
                                      return_offsets=True)
        self.assertEqual(merged_prediction[0][0], 'John Smith')
        self.assertEqual(merged_prediction[0][1], 'PER')
        self.assertEqual(merged_prediction[0][2], (21, 31))
    def load(path):
        assert os.path.isdir(path), "Path must be a directory to load"

        params_path = os.path.join(path, 'params.json')

        with open(params_path, 'r') as f:
            params = json.load(f)

        clf = DocumentClassifier(params)

        if params['clf'] == 'BERT':
            import ktrain
            predictor_path = os.path.join(path, 'predictor')
            clf.predictor = ktrain.load_predictor(predictor_path)
        else:
            pipeline_path = os.path.join(path, 'pipeline.pickle')
            with open(pipeline_path, 'rb') as f:
                clf.pipeline = pickle.load(f)

        clf.fitted = True

        return clf
Beispiel #15
0
    def test_ner(self):
        wv_url = (
            "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.en.300.vec.gz"
        )
        model = txt.sequence_tagger("bilstm-crf",
                                    self.preproc,
                                    wv_path_or_url=wv_url)
        learner = ktrain.get_learner(model,
                                     train_data=self.trn,
                                     val_data=self.val,
                                     batch_size=128)
        lr = 0.01
        hist = learner.fit(lr, 1)

        # test training results
        # self.assertAlmostEqual(max(hist.history['lr']), lr)
        self.assertGreater(learner.validate(), 0.65)

        # test top losses
        obs = learner.top_losses(n=1)
        self.assertIn(obs[0][0], list(range(len(self.val.x))))
        learner.view_top_losses(n=1)

        # test weight decay
        self.assertEqual(learner.get_weight_decay(), None)
        learner.set_weight_decay(1e-2)
        self.assertAlmostEqual(learner.get_weight_decay(), 1e-2)

        # test load and save model
        learner.save_model("/tmp/test_model")
        learner.load_model("/tmp/test_model")

        # test predictor
        SENT = "There is a man named John Smith."
        p = ktrain.get_predictor(learner.model, self.preproc)
        self.assertEqual(p.predict(SENT)[-2][1], "I-PER")
        p.save("/tmp/test_predictor")
        p = ktrain.load_predictor("/tmp/test_predictor")
        self.assertEqual(p.predict(SENT)[-2][1], "I-PER")
Beispiel #16
0
# Import Required Modules
import requests
from bs4 import BeautifulSoup as bs
from googlesearch import search
from ktrain import load_predictor

is_predictor = 1
print("Loading BERT Model...")
# Load BERT Model
try:
    predictor = load_predictor('model/bert_model')
    print("Model Loded Successfully")
except:
    is_precictor = 0
    print("Model not found")


# [ Function to get Code snippets from StackOverFlow ]
def get_stackoverflow_codes(link):
    res = requests.get(link)  # get HTML template
    soup = bs(res.text, "html.parser")
    alla = soup.select(".answer")

    # Function to get codes
    def get_answers(ans):
        fin_ans = []
        for i in range(len(ans)):
            user = ""
            code_section = ans[i].select(".js-post-body")[0]
            pres = code_section.select("pre")
            codes = []
Beispiel #17
0
from shapely.geometry import Polygon, MultiPolygon, Point
import numpy as np
import random

from geopy.distance import geodesic
import plotly.io as pio
from urllib.request import urlopen
import json
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
model_path = os.getenv(
    "MODEL_FILEPATH",
    os.path.join(os.path.dirname(os.path.dirname(__file__)), "model/v2"))
import ktrain
predictor = ktrain.load_predictor(model_path)
pio.renderers.default = "browser"
import re
import dataset
import psycopg2
import os

GENERATE_PLOTS = False
TABLE = 'articles_v2'
# #
db_config = {
    "user": "******",
    "password": "******",
    "host": "127.0.0.1",
    "port": "5432",
    "database": "cvwire",
Beispiel #18
0
# due to hardware limitations

import ktrain
from ktrain import text
import glob

(x_train, y_train), (x_test, y_test), preproc = text.texts_from_folder(
    'aclImdb',
    maxlen=500,
    preprocess_mode='bert',
    train_test_names=['train', 'test'],
    classes=['pos', 'neg'])
model = text.text_classifier('bert', (x_train, y_train), preproc=preproc)
learner = ktrain.get_learner(model,
                             train_data=(x_train, y_train),
                             val_data=(x_test, y_test),
                             batch_size=6)
learner.fit_onecycle(2e-5, 2)  # train for 2 epochs
predictor = ktrain.get_predictor(model, preproc)
predictor.save('/models/predictor')

predictor = ktrain.load_predictor('/models/predictor')
dataset = 'aclImdb/train/unsup'
file_list = glob.glob(dataset + "/*.txt")
results = open("train_labels.txt", "w")
for file in file_list:
    review_text = open(file, "r", encoding="utf-8").readlines()[0]
    predict = predictor.predict(review_text)
    results.write(predict + '\n')
results.close()
Beispiel #19
0
 def __init__(self):
     self.predictor = ktrain.load_predictor(
         'gsa_server/resources/xlnet_6epoch_3e-5')
     self.t = text.Transformer(MODEL_NAME, maxlen=500, class_names=[0, 1])
Beispiel #20
0
"""

predictor = ktrain.get_predictor(learner.model, preproc=t)

#predictor.predict('F**k you.')

# predicted probability scores for each category
#predictor.predict_proba('Jesus Christ is the central figure of Christianity.')

#predictor.get_classes()
"""As expected, `soc.religion.christian` is assigned the highest probability.

Let's invoke the `explain` method to see which words contribute most to the classification.

We will need a forked version of the **eli5** library that supportes TensorFlow Keras, so let's install it first.
"""

#!pip3 install -q git+https://github.com/amaiya/eli5@tfkeras_0_10_1

#predictor.explain('Jesus Christ is the central figure in Christianity.')
"""The words in the darkest shade of green contribute most to the classification and agree with what you would expect for this example.

We can save and reload our predictor for later deployment.
"""

predictor.save('./distilbert_predictor')

reloaded_predictor = ktrain.load_predictor('./distilbert_predictor')

print(reloaded_predictor.predict('My computer monitor is really blurry.'))
def loader_distilbert():
    return ktrain.load_predictor(
        './models/distillbert/category_distilbert_predictor')
        'what a beautiful movie. great plot. acting was good. will see it again']


predictor.predict(data)

#return_proba = True means it will give the prediction probabilty for each class

predictor.predict(data, return_proba=True)

#classes available

predictor.get_classes()

# !zip -r /content/bert.zip /content/bert

## Deploy Model

# #loading the model

predictor_load = ktrain.load_predictor('/content/drive/My Drive/ColabData/bert')

# #predicting the data

# predictor_load.predict(data)

## References

- [`ktrain` module](https://github.com/amaiya/ktrain)
- [Sentiment Classification Using Bert](https://kgptalkie.com/sentiment-classification-using-bert/)
- [當Bert遇上Keras:這可能是Bert最簡單的打開姿勢](http://www.ipshop.xyz/15376.html)
- [進擊的 BERT:NLP 界的巨人之力與遷移學習](https://leemeng.tw/attack_on_bert_transfer_learning_in_nlp.html)
from django.shortcuts import render, redirect
from django.contrib import messages
from django.http import JsonResponse
import praw
import ktrain
import os
THIS_FOLDER = os.path.dirname(os.path.abspath(__file__))
my_file = os.path.join(THIS_FOLDER, 'distilbert_predictor_final')
predictor = ktrain.load_predictor(my_file)
# Create your views here.


def get_submission_from_url(submission_url):
    reddit = praw.Reddit(client_id='2uReEcmijpNWnw',
                         client_secret='V0PCW7O1S6r3prN6ieRr4LVPGKo', user_agent='test reddit app')
    submission = None
    if submission_url.startswith('www'):
        submission_url = "https://" + submission_url
    elif submission_url.startswith('reddit'):
        submission_url = "https://www." + submission_url

    if submission_url.startswith('https://'):
        submission = reddit.submission(url=submission_url)
    return submission


def get_data_from_post(submission_url):

    submission = get_submission_from_url(submission_url)
    if submission is not None:
        full_text = submission.title + submission.selftext
Beispiel #24
0
from flask import Flask,render_template,url_for,request
import pandas as pd
import pickle
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
import joblib
import ktrain

# load the model from disk
filename = 'bert'
#clf = pickle.load(open(filename, 'rb'))
#cv=pickle.load(open('tranform.pkl','rb'))
model = ktrain.load_predictor(filename)
app = Flask(__name__)

@app.route('/')
def home():
	return render_template('home.html')

@app.route('/predict',methods=['POST'])
def predict():
#	df= pd.read_csv("spam.csv", encoding="latin-1")
#	df.drop(['Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4'], axis=1, inplace=True)
#	# Features and Labels
#	df['label'] = df['class'].map({'ham': 0, 'spam': 1})
#	X = df['message']
#	y = df['label']
#
#	# Extract Feature With CountVectorizer
#	cv = CountVectorizer()
#	X = cv.fit_transform(X) # Fit the Data
Beispiel #25
0
import ktrain
import os

os.environ["CUDA_VISIBLE_DEVICES"] = '-1'

predictor = ktrain.load_predictor('distilbert')


def get_prediction(x):
    sent = predictor.predict([x])
    return sent[0]
Beispiel #26
0
# Shuffle dataset to get a good variation over the labels by first zipping
# the list together, then shuffle and unzip again
shuffle_zip = list(zip(text, labels))
np.random.shuffle(shuffle_zip)
text_shuffled, labels_shuffled = zip(*shuffle_zip)
text_shuffled = list(text_shuffled)
labels_shuffled = list(labels_shuffled)

# Split the data in 80% for the train and validation set and 20% for the test set
test_split = int(0.8 * len(text_shuffled))
test_text = text_shuffled[test_split:]
test_labels = labels_shuffled[test_split:]

# Load the BERT classifer from memory
tf.autograph.set_verbosity(0)
classifier = ktrain.load_predictor(
    os.path.join(model_path, model_name, "Final", "BERT_model"))

# Make predictions on the test set
test_predicted_prob = classifier.predict_proba(test_text)[:, 1]
tf.keras.backend.clear_session(
)  # Clear session to prevent memory leak from TF

# Determine the final predicted class labels for this model
test_predicted = np.where(test_predicted_prob < np.float64(threshold), 0, 1)

# Check if results folder exists for this case
if not os.path.exists(result_path):
    os.makedirs(result_path)

# Plot the ROC curve
fig, ax = plt.subplots()
Beispiel #27
0
def load_model_sentiment(model_path):
    return ktrain.load_predictor(model_path)
Beispiel #28
0
import ktrain

predictor = ktrain.load_predictor("distilbert")


def get_prediction(x):
    pred = predictor.predict([x])
    return pred[0]
    def test_folder(self):
        (trn, val, preproc) = vis.images_from_folder(
            datadir='image_data/image_folder',
            data_aug=vis.get_data_aug(horizontal_flip=True),
            classes=['cat', 'dog'],
            train_test_names=['train', 'valid'])
        model = vis.image_classifier('pretrained_resnet50', trn, val)
        learner = ktrain.get_learner(model=model,
                                     train_data=trn,
                                     val_data=val,
                                     batch_size=1)
        learner.freeze()
        hist = learner.autofit(1e-3, monitor='val_acc')

        # test train
        self.assertAlmostEqual(max(hist.history['lr']), 1e-3)
        if max(hist.history['acc']) == 0.5:
            raise Exception('unlucky initialization: please run test again')
        self.assertGreater(max(hist.history['acc']), 0.8)

        # test top_losses
        obs = learner.top_losses(n=1, val_data=val)
        print(obs)
        if obs:
            self.assertIn(obs[0][0], list(range(U.nsamples_from_data(val))))
        else:
            self.assertEqual(max(hist.history['val_acc']), 1)

        # test weight decay
        self.assertEqual(len(learner.get_weight_decay()), 54)
        self.assertEqual(learner.get_weight_decay()[0], None)
        learner.set_weight_decay(1e-4)
        self.assertAlmostEqual(learner.get_weight_decay()[0], 1e-4)

        # test load and save model
        learner.save_model('/tmp/test_model')
        learner.load_model('/tmp/test_model')

        # test validate
        cm = learner.validate(val_data=val)
        print(cm)
        for i, row in enumerate(cm):
            self.assertEqual(np.argmax(row), i)

        # test predictor
        p = ktrain.get_predictor(learner.model, preproc)
        r = p.predict_folder('image_data/image_folder/train/')
        print(r)
        self.assertEqual(r[0][1], 'cat')
        r = p.predict_proba_folder('image_data/image_folder/train/')
        self.assertEqual(np.argmax(r[0][1]), 0)
        r = p.predict_filename(
            'image_data/image_folder/train/cat/cat.11737.jpg')
        self.assertEqual(r, ['cat'])
        r = p.predict_proba_filename(
            'image_data/image_folder/train/cat/cat.11737.jpg')
        self.assertEqual(np.argmax(r), 0)

        p.save('/tmp/test_predictor')
        p = ktrain.load_predictor('/tmp/test_predictor')
        r = p.predict_filename(
            'image_data/image_folder/train/cat/cat.11737.jpg')
        self.assertEqual(r, ['cat'])
Beispiel #30
0
import pandas as pd
import ktrain
import sys

args = sys.argv[1:]
path_model = args[0]
path_predict = args[1]
X_predict = pd.read_csv(path_predict)
predictor = ktrain.load_predictor(path_model)
print(predictor.predict(list(X_predict['Reviews'])))