def test_is_quote_in_datasource(self):
        """Is quote in datasource?"""

        datasource = "quotes.txt"
        quotes_file_provider = DataProvider(datasource)
        data_list = json.load(open(datasource, "r"))
        self.assertIn(quotes.random_quote(quotes_file_provider), data_list)
Esempio n. 2
0
from os import environ
from src.data_provider import DataProvider
from src.bm_cat_bot import BMCatBot
from flask import Flask

PORT = int(environ.get('PORT', '5000'))
BMCAT_HOST = environ.get('BM_CAT_HOST', '')
BMCAT_PORT = int(environ.get('BM_CAT_PORT', '8443'))
WEBHOOK_LISTEN = '0.0.0.0'
WEBHOOK_URLBASE = "https://%s:%s" % (BMCAT_HOST, BMCAT_PORT)

BMCAT_APIKEY = environ.get('BM_CAT_API_KEY', 'api_key_stab')
WEBHOOK_URLPATH = "/%s" % BMCAT_APIKEY

BMCAT_SSLCERT_PATH = environ.get('BM_CAT_SSL_CERTIFICATE_PATH', '')
BMCAT_PRIVATEKEY_PATH = environ.get('BM_CAT_PRIVATE_KEY_PATH', '')

app = Flask(__name__)

quotes = '../quotes.txt'
quotes_file_provider = DataProvider(quotes)

bmcat_bot = BMCatBot(BMCAT_APIKEY, quotes_file_provider)
Esempio n. 3
0
from os.path import join
from time import time

from src.data_provider import DataProvider
from src.model_define import model_and_loss
from config import Configs
from model_evaluation.weight_evaluation import test_on_benchmark


if __name__ == '__main__':

    c = Configs()

    model = model_and_loss(training=True)

    data_provider = DataProvider(c.training_data_folder, c.batch_size, c.training_dim, stride=c.stride,
                                 side=c.side, mixing_train=c.mixing_train, model_code=c.model_code)
    data_provider.start_loading()

    start_time = time()

    for iteration in range(c.iterations):

        train_x, train_y = data_provider.get_batch()

        print 'start training on mini batch'
        training_loss = model.train_on_batch(train_x, train_y)

        total_iteration = c.iteration_to_load + (iteration + 1)
        print 'iteration: %d, training loss:' % total_iteration, training_loss

        # save model
    def run_pipeline(self, datadir):

        print('Data', datadir)
        ##############################################################

        print('experiment 1 baseline')
        model = NaiveBayesModel(smoothing=0.5)
        model.create_inverted_index(DataProvider(datadir, source='train'))
        model.calc_probability()
        model.save_model_to_file(datadir + '/out/baseline-model.txt')
        results, cm = model.inference(DataProvider(datadir, source='test'))
        print(cm)
        model.save_results_to_file(results,
                                   datadir + '/out/baseline-result.txt')

        ##############################################################

        print('experiment 2 stop_words')
        with open(
                os.path.dirname(os.path.abspath(__file__)) +
                '/../English_stop_word.txt',
                'r',
        ) as f:
            stop_words = [l.strip() for l in f.readlines()]
        model = NaiveBayesModel(smoothing=0.5, stop_words=stop_words)
        model.create_inverted_index(DataProvider(datadir, source='train'))
        model.calc_probability()
        model.save_model_to_file(datadir + '/out/stopword-model.txt')
        results, cm = model.inference(DataProvider(datadir, source='test'))
        print(cm)
        model.save_results_to_file(results,
                                   datadir + '/out/stopword-result.txt')

        ##############################################################

        print('experiment 3 length')
        model = NaiveBayesModel(smoothing=0.5,
                                min_len_filter=2,
                                max_len_filter=9)
        model.create_inverted_index(DataProvider(datadir, source='train'))
        model.calc_probability()
        model.save_model_to_file(datadir + '/out/wordlength-model.txt')
        results, cm = model.inference(DataProvider(datadir, source='test'))
        print(cm)
        model.save_results_to_file(results,
                                   datadir + '/out/wordlength-result.txt')

        ##############################################################

        for f in [1, 5, 10, 15, 20]:
            print('experiment 4 low frequency_{}'.format(f))
            model = NaiveBayesModel(smoothing=0.5, cutoff_low_count=f)
            model.create_inverted_index(DataProvider(datadir, source='train'))
            model.calc_probability()
            results, cm = model.inference(DataProvider(datadir, source='test'))
            print(cm)

        for f in [0.05, 0.1, 0.15, 0.2, 0.25]:
            print('experiment 4 high frequency_{}'.format(f))
            model = NaiveBayesModel(smoothing=0.5,
                                    cutoff_top_frequent_words_fraction=f)
            model.create_inverted_index(DataProvider(datadir, source='train'))
            model.calc_probability()
            results, cm = model.inference(DataProvider(datadir, source='test'))
            print(cm)

        ##############################################################

        for s in [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
            print('experiment 5 smoothed_{}'.format(s))
            model = NaiveBayesModel(smoothing=s)
            model.create_inverted_index(DataProvider(datadir, source='train'))
            model.calc_probability()
            results, cm = model.inference(DataProvider(datadir, source='test'))
            print(cm)
        print(date)

        total_tests = covid_scraper.get_total_tests()
        print("total tests: " + str(total_tests))

        positive_cases = covid_scraper.get_positive_cases()
        print("positive cases: " + str(positive_cases))

        negative_cases = covid_scraper.get_negative_cases()
        print("negative cases: " + str(negative_cases))

        total_deaths = covid_scraper.get_total_deaths()
        print("total deaths: " + str(total_deaths))

        cases_by_healthboard = covid_scraper.get_health_board_cases()
        print(cases_by_healthboard)

        scraped_data_set = ScrapedDataSet(total_tests, positive_cases,
                                          negative_cases, total_deaths,
                                          cases_by_healthboard)

        with DataProvider() as provider:
            daily_deaths = scraped_data_set.total_deaths - provider.get_total_deaths_for_date(
                date - timedelta(days=1))
            print("daily deaths: " + str(daily_deaths))
            calculated_data_set = CalculatedDataSet(daily_deaths)

            dataset = DataSet(date, scraped_data_set, calculated_data_set)

            provider.upload_data(dataset)
Esempio n. 6
0
 def test_is_quote_in_datasource(self):
     """Is quote in datasource?"""
     quotes_file_provider = DataProvider('storage.txt')
     cache = Cache(quotes_file_provider)
     self.assertIn(quotes.get_random(cache),
                   list(cache.get('quotes').values()))
Esempio n. 7
0
def test_quote_should_be_none_for_empty_list():
    empty_data = DataProvider('')
    empty_data.get_all = MagicMock(return_value=[])
    quote = quotes.random_quote(empty_data)
    assert quote is None
Esempio n. 8
0
def test_quote_should_be_in_array():
    fake_data = DataProvider('')
    qts = ['quote' + str(x) for x in range(5)]
    fake_data.get_all = MagicMock(return_value=qts)
    quote = quotes.random_quote(fake_data)
    assert quote in qts
Esempio n. 9
0
from src.cache import Cache
from src.data_provider import DataProvider
from src.bm_cat_bot import BMCatBot
from flask import Flask

PORT = int(environ.get('PORT', '5000'))
BMCAT_HOST = environ.get('BM_CAT_HOST', '')
BMCAT_PORT = int(environ.get('BM_CAT_PORT', '8443'))
WEBHOOK_LISTEN = '0.0.0.0'
WEBHOOK_URLBASE = "https://%s:%s" % (BMCAT_HOST, BMCAT_PORT)

BMCAT_APIKEY = environ.get('BM_CAT_API_KEY', 'api_key_stab')
WEBHOOK_URLPATH = "/%s" % BMCAT_APIKEY

BMCAT_SSLCERT_PATH = environ.get('BM_CAT_SSL_CERTIFICATE_PATH', '')
BMCAT_PRIVATEKEY_PATH = environ.get('BM_CAT_PRIVATE_KEY_PATH', '')

log_fmt = '%(asctime)-15s %(levelname)s: %(message)s'
logging.basicConfig(format=log_fmt)
logger = logging.getLogger(__name__)
logger.setLevel(level=20)

app = Flask(__name__)

connection_string = '../storage.txt'
data_provider = DataProvider(connection_string)
cache = Cache(data_provider)

bmcat_bot = BMCatBot(BMCAT_APIKEY, cache, logger)