def perform(self): remaining_status_ids = self.fetch_remaining_status_ids() if any(remaining_status_ids): for batch_of_ids in split_into_batches(remaining_status_ids, batch_size=self.batch_size): self.process_batch(batch_of_ids) else: print("OH ALL DONE! SLEEPING...") server_sleep(10 * 60 * 60)
from app.retweet_graphs_v2.retweet_grapher import RetweetGrapher from app.retweet_graphs_v2.k_days.generator import DateRangeGenerator if __name__ == "__main__": gen = DateRangeGenerator() bq_service = BigQueryService() for date_range in gen.date_ranges: storage_dirpath = f"retweet_graphs_v2/k_days/{gen.k_days}/{date_range.start_date}" grapher = RetweetGrapher(storage_dirpath=storage_dirpath, bq_service=bq_service, tweets_start_at=date_range.start_at, tweets_end_at=date_range.end_at ) grapher.save_metadata() grapher.start() grapher.perform() grapher.end() grapher.report() grapher.save_results() grapher.save_graph() del grapher # clearing graph from memory print("\n\n\n\n") print("JOB COMPLETE!") server_sleep()
#lock = BoundedSemaphore() futures = [executor.submit(self.process_batch_async, batch) for batch in batches] print("BATCHES WILL PROCESS:", len(futures)) for future in as_completed(futures): #lock.acquire() future.result() #lock.release() print("----------------") print("ASYNC PERFORMANCE COMPLETE...") if __name__ == "__main__": scorer = ToxicityScorerAsync() print("----------------") print("SCORES COUNT:", fmt_n(scorer.count_scores())) scorer.perform_async() print("----------------") print("JOB COMPLETE!") print("----------------") print("SCORES COUNT:", fmt_n(scorer.count_scores())) del scorer gc.collect() server_sleep(seconds=5*60) # give the server a break before restarting