def test_is_quote_in_datasource(self): """Is quote in datasource?""" datasource = "quotes.txt" quotes_file_provider = DataProvider(datasource) data_list = json.load(open(datasource, "r")) self.assertIn(quotes.random_quote(quotes_file_provider), data_list)
from os import environ from src.data_provider import DataProvider from src.bm_cat_bot import BMCatBot from flask import Flask PORT = int(environ.get('PORT', '5000')) BMCAT_HOST = environ.get('BM_CAT_HOST', '') BMCAT_PORT = int(environ.get('BM_CAT_PORT', '8443')) WEBHOOK_LISTEN = '0.0.0.0' WEBHOOK_URLBASE = "https://%s:%s" % (BMCAT_HOST, BMCAT_PORT) BMCAT_APIKEY = environ.get('BM_CAT_API_KEY', 'api_key_stab') WEBHOOK_URLPATH = "/%s" % BMCAT_APIKEY BMCAT_SSLCERT_PATH = environ.get('BM_CAT_SSL_CERTIFICATE_PATH', '') BMCAT_PRIVATEKEY_PATH = environ.get('BM_CAT_PRIVATE_KEY_PATH', '') app = Flask(__name__) quotes = '../quotes.txt' quotes_file_provider = DataProvider(quotes) bmcat_bot = BMCatBot(BMCAT_APIKEY, quotes_file_provider)
from os.path import join from time import time from src.data_provider import DataProvider from src.model_define import model_and_loss from config import Configs from model_evaluation.weight_evaluation import test_on_benchmark if __name__ == '__main__': c = Configs() model = model_and_loss(training=True) data_provider = DataProvider(c.training_data_folder, c.batch_size, c.training_dim, stride=c.stride, side=c.side, mixing_train=c.mixing_train, model_code=c.model_code) data_provider.start_loading() start_time = time() for iteration in range(c.iterations): train_x, train_y = data_provider.get_batch() print 'start training on mini batch' training_loss = model.train_on_batch(train_x, train_y) total_iteration = c.iteration_to_load + (iteration + 1) print 'iteration: %d, training loss:' % total_iteration, training_loss # save model
def run_pipeline(self, datadir): print('Data', datadir) ############################################################## print('experiment 1 baseline') model = NaiveBayesModel(smoothing=0.5) model.create_inverted_index(DataProvider(datadir, source='train')) model.calc_probability() model.save_model_to_file(datadir + '/out/baseline-model.txt') results, cm = model.inference(DataProvider(datadir, source='test')) print(cm) model.save_results_to_file(results, datadir + '/out/baseline-result.txt') ############################################################## print('experiment 2 stop_words') with open( os.path.dirname(os.path.abspath(__file__)) + '/../English_stop_word.txt', 'r', ) as f: stop_words = [l.strip() for l in f.readlines()] model = NaiveBayesModel(smoothing=0.5, stop_words=stop_words) model.create_inverted_index(DataProvider(datadir, source='train')) model.calc_probability() model.save_model_to_file(datadir + '/out/stopword-model.txt') results, cm = model.inference(DataProvider(datadir, source='test')) print(cm) model.save_results_to_file(results, datadir + '/out/stopword-result.txt') ############################################################## print('experiment 3 length') model = NaiveBayesModel(smoothing=0.5, min_len_filter=2, max_len_filter=9) model.create_inverted_index(DataProvider(datadir, source='train')) model.calc_probability() model.save_model_to_file(datadir + '/out/wordlength-model.txt') results, cm = model.inference(DataProvider(datadir, source='test')) print(cm) model.save_results_to_file(results, datadir + '/out/wordlength-result.txt') ############################################################## for f in [1, 5, 10, 15, 20]: print('experiment 4 low frequency_{}'.format(f)) model = NaiveBayesModel(smoothing=0.5, cutoff_low_count=f) model.create_inverted_index(DataProvider(datadir, source='train')) model.calc_probability() results, cm = model.inference(DataProvider(datadir, source='test')) print(cm) for f in [0.05, 0.1, 0.15, 0.2, 0.25]: print('experiment 4 high frequency_{}'.format(f)) model = NaiveBayesModel(smoothing=0.5, cutoff_top_frequent_words_fraction=f) model.create_inverted_index(DataProvider(datadir, source='train')) model.calc_probability() results, cm = model.inference(DataProvider(datadir, source='test')) print(cm) ############################################################## for s in [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]: print('experiment 5 smoothed_{}'.format(s)) model = NaiveBayesModel(smoothing=s) model.create_inverted_index(DataProvider(datadir, source='train')) model.calc_probability() results, cm = model.inference(DataProvider(datadir, source='test')) print(cm)
print(date) total_tests = covid_scraper.get_total_tests() print("total tests: " + str(total_tests)) positive_cases = covid_scraper.get_positive_cases() print("positive cases: " + str(positive_cases)) negative_cases = covid_scraper.get_negative_cases() print("negative cases: " + str(negative_cases)) total_deaths = covid_scraper.get_total_deaths() print("total deaths: " + str(total_deaths)) cases_by_healthboard = covid_scraper.get_health_board_cases() print(cases_by_healthboard) scraped_data_set = ScrapedDataSet(total_tests, positive_cases, negative_cases, total_deaths, cases_by_healthboard) with DataProvider() as provider: daily_deaths = scraped_data_set.total_deaths - provider.get_total_deaths_for_date( date - timedelta(days=1)) print("daily deaths: " + str(daily_deaths)) calculated_data_set = CalculatedDataSet(daily_deaths) dataset = DataSet(date, scraped_data_set, calculated_data_set) provider.upload_data(dataset)
def test_is_quote_in_datasource(self): """Is quote in datasource?""" quotes_file_provider = DataProvider('storage.txt') cache = Cache(quotes_file_provider) self.assertIn(quotes.get_random(cache), list(cache.get('quotes').values()))
def test_quote_should_be_none_for_empty_list(): empty_data = DataProvider('') empty_data.get_all = MagicMock(return_value=[]) quote = quotes.random_quote(empty_data) assert quote is None
def test_quote_should_be_in_array(): fake_data = DataProvider('') qts = ['quote' + str(x) for x in range(5)] fake_data.get_all = MagicMock(return_value=qts) quote = quotes.random_quote(fake_data) assert quote in qts
from src.cache import Cache from src.data_provider import DataProvider from src.bm_cat_bot import BMCatBot from flask import Flask PORT = int(environ.get('PORT', '5000')) BMCAT_HOST = environ.get('BM_CAT_HOST', '') BMCAT_PORT = int(environ.get('BM_CAT_PORT', '8443')) WEBHOOK_LISTEN = '0.0.0.0' WEBHOOK_URLBASE = "https://%s:%s" % (BMCAT_HOST, BMCAT_PORT) BMCAT_APIKEY = environ.get('BM_CAT_API_KEY', 'api_key_stab') WEBHOOK_URLPATH = "/%s" % BMCAT_APIKEY BMCAT_SSLCERT_PATH = environ.get('BM_CAT_SSL_CERTIFICATE_PATH', '') BMCAT_PRIVATEKEY_PATH = environ.get('BM_CAT_PRIVATE_KEY_PATH', '') log_fmt = '%(asctime)-15s %(levelname)s: %(message)s' logging.basicConfig(format=log_fmt) logger = logging.getLogger(__name__) logger.setLevel(level=20) app = Flask(__name__) connection_string = '../storage.txt' data_provider = DataProvider(connection_string) cache = Cache(data_provider) bmcat_bot = BMCatBot(BMCAT_APIKEY, cache, logger)