def analyze(self, tag_weights=None): for email in self.emails: if self.emailranker.rank_email(email) == 1: self.best_email = email self.domain_doc = build_doc(self.responses) self.tags = build_tags(self.domain_doc, tag_weights) log.info((self.domain, self.emails, self.tags)) return self.domain, self.best_email, self.emails, self.tags
def get_url_response(url): log.info("Processing %s" % url) try: response = requests.get(url, timeout=3) except (requests.exceptions.RequestException, UnicodeError) as e: log.debug("{} failed: {}".format(url, str(e))) response = None log.debug("done processing") return response
def write(self): log.info("WRITING BATCH TO DB!!!!") for model in self.data: try: model.save() except Exception as e: log.info( "FAILED TO WRITE RECORD: {} WITH EXCEPTION: {}".format( model, e)) self.empty_data()
def add_or_update_seed(seed): existing_seeds = Seed.objects.filter(url=seed.url) if existing_seeds: log.info("Seed or Seeds {} found, updating....".format(seed.url)) for e_seed in existing_seeds: e_seed.modified_count = F('modified_count') + 1 e_seed.crawled = seed.crawled e_seed.weighted_terms = seed.weighted_terms e_seed.search_term = seed.search_term e_seed.save() return else: log.info("new Seed {} being saved to DB".format(seed)) seed.save()
def add_email(self, email, url, seed=None): if not self.blacklist.is_blacklisted(email): try: exists = Email.objects.get(email_address=email) log.info("Email {} found, updating....".format(email)) exists.modified_count = F('modified_count') + 1 exists.save() return except Email.DoesNotExist: pass email_model = Email(seed_url=seed, email_address=email.lower(), from_url=url, tier=self.get_email_tier(email)) self.__writer.add_data(email_model)
def add_blogger(self, blogger): try: e_blogger = Blogger.objects.get( email_address=blogger.email_address) log.info("blogger {} already found. proceeding...".format(blogger)) e_blogger.modified_count = F('modified_count') + 1 e_blogger.save() except Blogger.MultipleObjectsReturned: log.info("blogger {} already found. proceeding...".format(blogger)) except Blogger.DoesNotExist: log.info("new blogger {} being saved to DB".format(blogger)) blogger.save()
def add_data(self, model_data): log.info("adding %s to persistence" % model_data) self.data.add(model_data) self.check_should_write()