def _find_keyword_in_url(self, links, domain): # Get all url and normalize it normalized_links = self._normalize_elems(links) # Filter url that doesn't contain keyword candidate_links = \ filter(lambda x: self._is_contain_keyword(x.replace(Utility.find_domain_name(domain), '')), normalized_links) return candidate_links
def sort_email(self, emails, domain): # If this is not governor's domain, do not get any email candidate with .go.id domain name if '.go.id' not in domain: emails = [email for email in emails if '.go.id' not in emails] domain_name = Utility.find_domain_name(domain) emails = map(lambda email: (email, domain_name), emails) # Sort based on score descending emails.sort(cmp=lambda a, b: -1 if self.email_scoring(a) > self.email_scoring(b) else 0) emails = [x for x, y in emails] return emails[:cfg.max_email]