def station_set_training(station_name: str, enabled: bool): station = Station.get_or_none(Station.name == station_name) if station is None: raise HTTPException(404, 'station not found') else: log.info(f'Setting training enabled: {enabled} for {station}') station.is_training = enabled station.save() if not enabled: model = train_model(station) station.trained_model = serialize_model(model) station.save() return {'status': 'ok'}
#solcast.loc[solcast.Azimuth<=-1] = solcast.loc[solcast.Azimuth<=-1] + 360 est_dhi = solcast.Ghi - solcast.Dni * np.cos(solcast.Zenith) model, batch, validation_data, callbacks, x_test_scaled, y_test_scaled, y_test, y_scaler = ml.model_generation( solcast, target_output, batch_size=256, sequence_length=24, training_len=0.75, validation_len=0.225) model = ml.train_model(model, batch, validation_data, x_test_scaled, y_test_scaled, callbacks, epoch_size=25, epoch_steps=100) ml.save_model(model, 'model.h5') output = ml.load_models('model.h5') import importlib importlib.reload(ml) start_idx = 0 length = 500 target_names = target_output y_pred, y_true, x = ml.plot_comparison(x_test_scaled, y_test,
def test_truth_table(self): build_train_set() train_model(prob_thresh=prob_thresh) match_query = """ SELECT company_projects.*, web_certificates.url_key FROM web_certificates LEFT JOIN attempted_matches ON web_certificates.cert_id = attempted_matches.cert_id LEFT JOIN company_projects ON attempted_matches.project_id = company_projects.project_id LEFT JOIN base_urls ON base_urls.source = web_certificates.source WHERE company_projects.closed=1 AND attempted_matches.ground_truth=1 AND attempted_matches.multi_phase=0 AND attempted_matches.validate=0 """ corr_web_certs_query = """ SELECT web_certificates.* FROM web_certificates LEFT JOIN attempted_matches ON web_certificates.cert_id = attempted_matches.cert_id LEFT JOIN company_projects ON attempted_matches.project_id = company_projects.project_id LEFT JOIN base_urls ON base_urls.source = web_certificates.source WHERE company_projects.closed=1 AND attempted_matches.ground_truth=1 AND attempted_matches.multi_phase=0 AND attempted_matches.validate=0 """ with create_connection() as conn: test_company_projects = pd.read_sql(match_query, conn) test_web_df = pd.read_sql(corr_web_certs_query, conn) test_web_df = wrangle(test_web_df) results = match( company_projects=test_company_projects, df_web=test_web_df, test=True, prob_thresh=prob_thresh, version="new", ) # confrim 100% recall with below assert qty_actual_matches = int(len(results)**0.5) qty_found_matches = results[results.pred_match == 1].title.nunique() self.assertTrue( qty_found_matches == qty_actual_matches, msg= f"qty_found_matches({qty_found_matches}) not equal qty_actual_matches({qty_actual_matches})", ) # make sure not more than 25% false positives with below assert false_positives = len( results[results.pred_match == 1]) - qty_found_matches self.assertTrue( false_positives <= round(qty_actual_matches * 0.25, 1), msg= f"found too many false positives ({false_positives}) out of total test projects ({qty_actual_matches})", ) # test single sample sample_company = pd.DataFrame( { "cert_id": "99999", "project_id": "99999", "job_number": "2387", "city": "Ottawa", "address": "2562 Del Zotto Ave., Ottawa, Ontario", "title": "DWS Building Expansion", "owner": "Douglas Stalker", "contractor": "GNC", "engineer": "Goodkey", "address_lat": 45.312234, "address_lng": -75.623789, "receiver_emails_dump": "{'alex': '*****@*****.**'}", "closed": "0", }, index=range(1), ) sample_web = pd.DataFrame( { "cert_id": "99998", "pub_date": "2019-03-06", "city": "Ottawa-Carleton", "address": "2562 Del Zotto Avenue, Gloucester, Ontario", "title": "Construct a 1 storey storage addition to a 2 storey office/industrial building", "owner": "Doug Stalker, DWS Roofing", "contractor": "GNC Constructors Inc.", "engineer": None, "address_lat": 45.312234, "address_lng": -75.623789, "url_key": "B0046A36-3F1C-11E9-9A87-005056AA6F02", "source": "dcn", }, index=range(1), ) is_match, prob = match( company_projects=sample_company, df_web=sample_web, test=True, version="new").iloc[0][["pred_match", "pred_prob"]] self.assertTrue( is_match, msg= f"Project #{sample_company.job_number} did not match successfully. Match probability returned was {prob}.", ) # test same sample but using db retreival results = match( company_projects=sample_company, since="2019-03-05", until="2019-03-07", test=True, version="new", ) prob_from_db_cert = ( results[results.contractor == "gnc"].iloc[0].pred_prob ) #'gnc' is what is returned from the wrangling funcs self.assertTrue(round(prob, 2) == round(prob_from_db_cert, 2)) # make sure validation runs validate_model(prob_thresh=prob_thresh, test=True)