def save_celebs_to_corpus(self, celeb): """[Add the celebs to corpus. saves the corpus to cloud every data_handler.thresh_save_corpus seconds] Arguments: celeb {[str]} -- [The formmated celebrity string to be added to corpus] """ if time.time( ) - self.last_time_record > data_handler.thresh_save_corpus: # Save only if the celebs corpus has new value if len(self.celebs_corpus) > self.last_celebs_corpus_size: print("SAVING TO CLOUD") data_handler.save_celebs_corpus(self.celebs_corpus) data_handler.save_names_corpus(self.names_corpus) self.last_celebs_corpus_size = len(self.celebs_corpus) self.last_time_record = time.time() try: self.celebs_corpus.add(celeb) print(" ##### Saving celeb : {} #######".format(celeb)) names = celeb.split(" ") for name in names: if len(name) > 1: self.names_corpus.add(name) except Exception as e: print(e) pass
def spider_closed(self): """[Function to activate when the spider is closed... saves the updated corpus to cloud] """ print("SAVING BEFORE EXIT") data_handler.save_celebs_corpus(self.celebs_corpus) data_handler.save_names_corpus(self.names_corpus)
def spider_closed(self): print("SAVING BEFORE EXIT") data_handler.save_celebs_corpus(self.celebs_corpus) data_handler.save_names_corpus(self.names_corpus)