コード例 #1
0
def test_upload_in_batches():

    bq_service = BigQueryService(dataset_name="impeachment_test")

    # when inserting more than 10,000 rows,
    # is able to overcome error "too many rows present in the request, limit: 10000":
    lots_of_rows = [{"start_date":"2020-01-01", "user_id":i, "bot_probability": .99} for i in range(1, 36000)]
    errors = bq_service.upload_daily_bot_probabilities(lots_of_rows)
    assert not any(errors)
コード例 #2
0
            title=
            f"Bot Probability Scores for Period '{date_range.start_date}' (excludes 0.5)"
        )
        storage.upload_bot_probabilities_histogram()

        # UPLOAD SELECTED ROWS TO BIG QUERY (IF POSSIBLE, OTHERWISE CAN ADD FROM GCS LATER)
        try:
            bots_df = clf.bot_probabilities_df[
                clf.bot_probabilities_df["bot_probability"] > 0.5]
            records = [{
                **{
                    "start_date": date_range.start_date
                },
                **record
            } for record in bots_df.to_dict("records")]
            print("UPLOADING", len(records), "BOT SCORES TO BQ...")
            bq_service.upload_daily_bot_probabilities(records)

            del bots_df
            del records
        except Exception as err:
            print("OOPS", err)

        del storage
        del clf
        gc.collect()
        print("\n\n\n\n")

    print("JOB COMPLETE!")
    server_sleep()