def test_upload_in_batches(): bq_service = BigQueryService(dataset_name="impeachment_test") # when inserting more than 10,000 rows, # is able to overcome error "too many rows present in the request, limit: 10000": lots_of_rows = [{"start_date":"2020-01-01", "user_id":i, "bot_probability": .99} for i in range(1, 36000)] errors = bq_service.upload_daily_bot_probabilities(lots_of_rows) assert not any(errors)
title= f"Bot Probability Scores for Period '{date_range.start_date}' (excludes 0.5)" ) storage.upload_bot_probabilities_histogram() # UPLOAD SELECTED ROWS TO BIG QUERY (IF POSSIBLE, OTHERWISE CAN ADD FROM GCS LATER) try: bots_df = clf.bot_probabilities_df[ clf.bot_probabilities_df["bot_probability"] > 0.5] records = [{ **{ "start_date": date_range.start_date }, **record } for record in bots_df.to_dict("records")] print("UPLOADING", len(records), "BOT SCORES TO BQ...") bq_service.upload_daily_bot_probabilities(records) del bots_df del records except Exception as err: print("OOPS", err) del storage del clf gc.collect() print("\n\n\n\n") print("JOB COMPLETE!") server_sleep()