Ejemplo n.º 1
0
 def perform(self):
     remaining_status_ids = self.fetch_remaining_status_ids()
     if any(remaining_status_ids):
         for batch_of_ids in split_into_batches(remaining_status_ids,
                                                batch_size=self.batch_size):
             self.process_batch(batch_of_ids)
     else:
         print("OH ALL DONE! SLEEPING...")
         server_sleep(10 * 60 * 60)
Ejemplo n.º 2
0
from app.retweet_graphs_v2.retweet_grapher import RetweetGrapher
from app.retweet_graphs_v2.k_days.generator import DateRangeGenerator


if __name__ == "__main__":

    gen = DateRangeGenerator()

    bq_service = BigQueryService()

    for date_range in gen.date_ranges:
        storage_dirpath = f"retweet_graphs_v2/k_days/{gen.k_days}/{date_range.start_date}"

        grapher = RetweetGrapher(storage_dirpath=storage_dirpath, bq_service=bq_service,
            tweets_start_at=date_range.start_at, tweets_end_at=date_range.end_at
        )
        grapher.save_metadata()
        grapher.start()
        grapher.perform()
        grapher.end()
        grapher.report()
        grapher.save_results()
        grapher.save_graph()

        del grapher # clearing graph from memory
        print("\n\n\n\n")

    print("JOB COMPLETE!")

    server_sleep()
            #lock = BoundedSemaphore()
            futures = [executor.submit(self.process_batch_async, batch) for batch in batches]
            print("BATCHES WILL PROCESS:", len(futures))
            for future in as_completed(futures):
                #lock.acquire()
                future.result()
                #lock.release()

        print("----------------")
        print("ASYNC PERFORMANCE COMPLETE...")



if __name__ == "__main__":

    scorer = ToxicityScorerAsync()

    print("----------------")
    print("SCORES COUNT:", fmt_n(scorer.count_scores()))

    scorer.perform_async()

    print("----------------")
    print("JOB COMPLETE!")
    print("----------------")
    print("SCORES COUNT:", fmt_n(scorer.count_scores()))

    del scorer
    gc.collect()
    server_sleep(seconds=5*60) # give the server a break before restarting