コード例 #1
0
    def scrap(self):
        if self.parent == "true":
            print(self.survey_id, "is a parent survey")
            try:
                task = Status(unique_identifier=self.survey_id +
                              self.provider).count()
                if task == 0:
                    Status(unique_identifier=self.survey_id + self.provider,
                           scraped_status="success").save()
                else:
                    pass
            except Exception as e:
                print("Exception", e, "for", self.survey_id)
                try:
                    Status(unique_identifier=self.survey_id + self.provider,
                           scraped_status="success").save()
                except:
                    pass
        else:
            task = Status.objects(unique_identifier=self.survey_id +
                                  self.provider).count()
            if task == 1:
                print(self.survey_id, "ignored for provider", self.provider)
                pass
            else:

                if self.provider == "zomato":
                    try:
                        print("Scraping", self.survey_id)
                        # Zomato(self.base_url,self.survey_id,self.provider).get_data()
                        Status(unique_identifier=self.survey_id +
                               self.provider,
                               scraped_status="success").save()

                    except Exception as e:
                        print("Exception occured for ", e, "***********",
                              self.survey_id, "provider", self.provider)
                        with open("log.txt", "a") as f:
                            f.write(str(e) + "****** \n")
                elif self.provider == "tripadvisor":
                    try:
                        TripAdvisor(self.base_url, self.survey_id,
                                    self.provider).get_data()
                        Status(unique_identifier=self.survey_id +
                               self.provider,
                               scraped_status="success").save()
                    except:
                        print("Exception occured for ", self.survey_id,
                              "provider", self.provider)
                else:
                    print("Bad Provider: ", self.provider)
コード例 #2
0
    def get_data(self):
        if isinstance(self.sid, list):
            print("Zomato ignored", self.sid)
            pass
            # parent_id= self.sid[0]
            # for i in range(len(self.sid)):
            # 	if i==0:
            # 		pass
            # 	else:
            # 		objects=Reviews.objects(survey_id= self.sid[i],provider=self.p)
            # 		for obj in objects:
            # 			Reviews()
            # 		pass
        else:
            rid = self.get_id()
            total = self.get_total()
            turn = int(total / 5) + 1
            print(turn)
            # 1/0
            if len(Record.objects(survey_id=self.sid, rid=str(rid))) != 0:
                print("Already Review Collected")
            else:
                pool = Pool()
                ids = list(range(0, turn))
                print(ids)
                # 1/0
                # for i in ids:
                # 	self.sub_get(i)
                pool.map(self.sub_get, ids)

                Record(provider="zomato", survey_id=self.sid,
                       rid=str(rid)).save()
                Status(unique_identifier=self.sid + provider,
                       scraped_status="success").save()