Example #1
0
    def contacts(self):
        group_result = utils.group_by_common_parents(self.email_els, self.phone_els)
        doc = self.body.document()

        results = []

        for path, els in utils.find_similar_selector_paths([group_parent for email, phone, group_parent in group_result]):
            for group_parent in utils.get_elements_for_path(doc, path):
                email_els, emails = self.emails(group_parent)
                phone_els, phones = self.phones(group_parent)

                all_links = utils.traverse(group_parent, match_el=lambda el: el.tagName() == 'A')

                urls = list(set([unicode(a.attribute('href')) for a in all_links if validators.url_no_path_re.match(a.attribute('href'))]))

                social_urls = [{
                    'type': [unicode(name) for name in validators.social_url_re.match(a.attribute('href')).groups() if name][0],
                    'url': unicode(a.attribute('href'))
                } for a in all_links if validators.social_url_re.match(a.attribute('href'))]

                result = {
                    'emails': emails,
                    'phones': phones,
                    'addresses': list(chain(*[
                        [(lambda city, state, zip_code: {'city': city.strip(), 'state': state, 'zip': zip_code})(*match) for match in matches]
                            for el, matches in utils.traverse_extract(group_parent, match_text=lambda s: validators.address_re.findall(s))
                    ])),
                    'urls': urls,
                    'social_urls': social_urls,
                }
                if any(result.values()):
                    results.append(result)
        return results
Example #2
0
 def phones(self, parent=None):
     number_match = lambda t: list(phonenumbers.PhoneNumberMatcher(t, 'US'))
     phone_els = utils.traverse(parent or self.body,
             match_text=number_match)
     phones = []
     for el in phone_els:
         for match in phonenumbers.PhoneNumberMatcher(unicode(el.toPlainText()), 'US'):
             phones.append({
                     'type' : utils.number_type(unicode(el.toPlainText()), match.raw_string),
                     'raw_number' : match.raw_string,
                     'number' : utils.format_us_phone_number(match.raw_string),
                 })
     return phone_els, phones
Example #3
0
 def emails(self, parent=None):
     email_els = utils.traverse(parent or self.body,
             match_el=lambda el: utils.find_emails(el.attribute('href')),
             match_text=lambda s: utils.find_emails(s), ignore_tags=[])
     return email_els, list(set(chain(*[utils.find_emails(el.attribute('href')) + utils.find_emails(unicode(el.toPlainText())) for el in email_els])))