def scrap(self): if self.parent == "true": print(self.survey_id, "is a parent survey") try: task = Status(unique_identifier=self.survey_id + self.provider).count() if task == 0: Status(unique_identifier=self.survey_id + self.provider, scraped_status="success").save() else: pass except Exception as e: print("Exception", e, "for", self.survey_id) try: Status(unique_identifier=self.survey_id + self.provider, scraped_status="success").save() except: pass else: task = Status.objects(unique_identifier=self.survey_id + self.provider).count() if task == 1: print(self.survey_id, "ignored for provider", self.provider) pass else: if self.provider == "zomato": try: print("Scraping", self.survey_id) # Zomato(self.base_url,self.survey_id,self.provider).get_data() Status(unique_identifier=self.survey_id + self.provider, scraped_status="success").save() except Exception as e: print("Exception occured for ", e, "***********", self.survey_id, "provider", self.provider) with open("log.txt", "a") as f: f.write(str(e) + "****** \n") elif self.provider == "tripadvisor": try: TripAdvisor(self.base_url, self.survey_id, self.provider).get_data() Status(unique_identifier=self.survey_id + self.provider, scraped_status="success").save() except: print("Exception occured for ", self.survey_id, "provider", self.provider) else: print("Bad Provider: ", self.provider)
def get_data(self): if isinstance(self.sid, list): print("Zomato ignored", self.sid) pass # parent_id= self.sid[0] # for i in range(len(self.sid)): # if i==0: # pass # else: # objects=Reviews.objects(survey_id= self.sid[i],provider=self.p) # for obj in objects: # Reviews() # pass else: rid = self.get_id() total = self.get_total() turn = int(total / 5) + 1 print(turn) # 1/0 if len(Record.objects(survey_id=self.sid, rid=str(rid))) != 0: print("Already Review Collected") else: pool = Pool() ids = list(range(0, turn)) print(ids) # 1/0 # for i in ids: # self.sub_get(i) pool.map(self.sub_get, ids) Record(provider="zomato", survey_id=self.sid, rid=str(rid)).save() Status(unique_identifier=self.sid + provider, scraped_status="success").save()