Esempio n. 1
0
def station_set_training(station_name: str, enabled: bool):
    station = Station.get_or_none(Station.name == station_name)
    if station is None:
        raise HTTPException(404, 'station not found')
    else:
        log.info(f'Setting training enabled: {enabled} for {station}')
        station.is_training = enabled
        station.save()
        if not enabled:
            model = train_model(station)
            station.trained_model = serialize_model(model)
            station.save()
        return {'status': 'ok'}
Esempio n. 2
0
#solcast.loc[solcast.Azimuth<=-1] = solcast.loc[solcast.Azimuth<=-1] + 360

est_dhi = solcast.Ghi - solcast.Dni * np.cos(solcast.Zenith)

model, batch, validation_data, callbacks, x_test_scaled, y_test_scaled, y_test, y_scaler = ml.model_generation(
    solcast,
    target_output,
    batch_size=256,
    sequence_length=24,
    training_len=0.75,
    validation_len=0.225)

model = ml.train_model(model,
                       batch,
                       validation_data,
                       x_test_scaled,
                       y_test_scaled,
                       callbacks,
                       epoch_size=25,
                       epoch_steps=100)

ml.save_model(model, 'model.h5')

output = ml.load_models('model.h5')

import importlib
importlib.reload(ml)
start_idx = 0
length = 500
target_names = target_output
y_pred, y_true, x = ml.plot_comparison(x_test_scaled,
                                       y_test,
Esempio n. 3
0
    def test_truth_table(self):
        build_train_set()
        train_model(prob_thresh=prob_thresh)
        match_query = """
            SELECT
                company_projects.*,
                web_certificates.url_key
            FROM 
                web_certificates
            LEFT JOIN
                attempted_matches
            ON
                web_certificates.cert_id = attempted_matches.cert_id
            LEFT JOIN
                company_projects
            ON
                attempted_matches.project_id = company_projects.project_id
            LEFT JOIN
                base_urls
            ON
                base_urls.source = web_certificates.source
            WHERE 
                company_projects.closed=1
            AND
                attempted_matches.ground_truth=1
            AND 
                attempted_matches.multi_phase=0
            AND 
                attempted_matches.validate=0
        """
        corr_web_certs_query = """
            SELECT
                web_certificates.*
            FROM 
                web_certificates
            LEFT JOIN
                attempted_matches
            ON
                web_certificates.cert_id = attempted_matches.cert_id
            LEFT JOIN
                company_projects
            ON
                attempted_matches.project_id = company_projects.project_id
            LEFT JOIN
                base_urls
            ON
                base_urls.source = web_certificates.source
            WHERE 
                company_projects.closed=1
            AND
                attempted_matches.ground_truth=1
            AND 
                attempted_matches.multi_phase=0
            AND 
                attempted_matches.validate=0
        """

        with create_connection() as conn:
            test_company_projects = pd.read_sql(match_query, conn)
            test_web_df = pd.read_sql(corr_web_certs_query, conn)
        test_web_df = wrangle(test_web_df)
        results = match(
            company_projects=test_company_projects,
            df_web=test_web_df,
            test=True,
            prob_thresh=prob_thresh,
            version="new",
        )

        # confrim 100% recall with below assert
        qty_actual_matches = int(len(results)**0.5)
        qty_found_matches = results[results.pred_match == 1].title.nunique()
        self.assertTrue(
            qty_found_matches == qty_actual_matches,
            msg=
            f"qty_found_matches({qty_found_matches}) not equal qty_actual_matches({qty_actual_matches})",
        )

        # make sure not more than 25% false positives with below assert
        false_positives = len(
            results[results.pred_match == 1]) - qty_found_matches
        self.assertTrue(
            false_positives <= round(qty_actual_matches * 0.25, 1),
            msg=
            f"found too many false positives ({false_positives}) out of total test projects ({qty_actual_matches})",
        )

        # test single sample
        sample_company = pd.DataFrame(
            {
                "cert_id": "99999",
                "project_id": "99999",
                "job_number": "2387",
                "city": "Ottawa",
                "address": "2562 Del Zotto Ave., Ottawa, Ontario",
                "title": "DWS Building Expansion",
                "owner": "Douglas Stalker",
                "contractor": "GNC",
                "engineer": "Goodkey",
                "address_lat": 45.312234,
                "address_lng": -75.623789,
                "receiver_emails_dump": "{'alex': '*****@*****.**'}",
                "closed": "0",
            },
            index=range(1),
        )
        sample_web = pd.DataFrame(
            {
                "cert_id": "99998",
                "pub_date": "2019-03-06",
                "city": "Ottawa-Carleton",
                "address": "2562 Del Zotto Avenue, Gloucester, Ontario",
                "title":
                "Construct a 1 storey storage addition to a 2 storey office/industrial building",
                "owner": "Doug Stalker, DWS Roofing",
                "contractor": "GNC Constructors Inc.",
                "engineer": None,
                "address_lat": 45.312234,
                "address_lng": -75.623789,
                "url_key": "B0046A36-3F1C-11E9-9A87-005056AA6F02",
                "source": "dcn",
            },
            index=range(1),
        )
        is_match, prob = match(
            company_projects=sample_company,
            df_web=sample_web,
            test=True,
            version="new").iloc[0][["pred_match", "pred_prob"]]
        self.assertTrue(
            is_match,
            msg=
            f"Project #{sample_company.job_number} did not match successfully. Match probability returned was {prob}.",
        )

        # test same sample but using db retreival
        results = match(
            company_projects=sample_company,
            since="2019-03-05",
            until="2019-03-07",
            test=True,
            version="new",
        )
        prob_from_db_cert = (
            results[results.contractor == "gnc"].iloc[0].pred_prob
        )  #'gnc' is what is returned from the wrangling funcs
        self.assertTrue(round(prob, 2) == round(prob_from_db_cert, 2))

        # make sure validation runs
        validate_model(prob_thresh=prob_thresh, test=True)