Example #1
0
from hatchbuck import Hatchbuck
import pprint
import sys
import logging

LOGFORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
logging.basicConfig(level=logging.DEBUG, format=LOGFORMAT)
pp = pprint.PrettyPrinter()

hatchbuck = Hatchbuck(sys.argv[1], noop=False)
profile = hatchbuck.search_email(sys.argv[2])
pp.pprint(profile["phones"])
profile = hatchbuck.clean_all_phone_numbers(profile)
pp.pprint(profile["phones"])
Example #2
0
class HatchbuckParser:
    """
    An object that does all the parsing for/with Hatchbuck.
    """
    def __init__(self, args):
        self.args = args
        self.stats = {}
        self.hatchbuck = None

    def main(self):
        """Parsing gets kicked off here"""
        logging.debug("starting with arguments: %s", self.args)
        self.init_hatchbuck()
        self.parse_files()

    def show_summary(self):
        """Show some statistics"""
        logging.info(self.stats)

    def init_hatchbuck(self):
        """Initialize hatchbuck API incl. authentication"""
        if not self.args.hatchbuck:
            logging.error("No hatchbuck_key found.")
            sys.exit(1)

        self.hatchbuck = Hatchbuck(self.args.hatchbuck, noop=self.args.noop)

    def parse_files(self):
        """Start parsing files"""
        if self.args.file:
            for file in self.args.file:
                logging.debug("parsing file %s", file)
                self.parse_file(file)
        elif self.args.dir:
            for direc in self.args.dir:
                logging.debug("using directory %s", direc)
                for file in os.listdir(direc):
                    if file.endswith(".vcf"):
                        file_path = os.path.join(direc, file)
                        logging.info("parsing file %s", file_path)
                        try:
                            self.parse_file(file_path)
                        except binascii.Error as error:
                            logging.error("error parsing: %s", error)
        else:
            logging.info("Nothing to do.")

    # pylint: disable=too-many-branches
    # pylint: disable=too-many-locals
    # pylint: disable=too-many-statements
    def parse_file(self, file):
        """
        Parse a single address book file
        """
        prin = pprint.PrettyPrinter()
        self.stats = {}

        for vob in vobject.readComponents(open(file)):
            content = vob.contents
            if self.args.verbose:
                logging.debug("parsing %s:", file)
                prin.pprint(content)

            if "n" not in content:
                self.stats["noname"] = self.stats.get("noname", 0) + 1
                return
            if "email" not in content or not re.match(
                    r"^[^@]+@[^@]+\.[^@]+$", content["email"][0].value):
                self.stats["noemail"] = self.stats.get("noemail", 0) + 1
                return
            self.stats["valid"] = self.stats.get("valid", 0) + 1

            # aggregate stats what kind of fields we have available
            for i in content:
                # if i in c:
                self.stats[i] = self.stats.get(i, 0) + 1

            emails = []
            for email in content.get("email", []):
                if re.match(r"^[^@äöü]+@[^@]+\.[^@]+$", email.value):
                    emails.append(email.value)

            profile_list = []
            for email in emails:
                profile = self.hatchbuck.search_email(email)
                if profile:
                    profile_list.append(profile)
                else:
                    continue

            # No contacts found
            if not profile_list:
                # create new contact
                profile = dict()
                profile["firstName"] = content["n"][0].value.given
                profile["lastName"] = content["n"][0].value.family
                if "title" in content:
                    profile["title"] = content["title"][0].value
                if "org" in content:
                    profile["company"] = content["org"][0].value

                profile["subscribed"] = True
                profile["status"] = {"name": "Lead"}

                if self.args.source:
                    profile["source"] = {"id": self.args.source}

                # override hatchbuck sales rep username if set
                # (default: api key owner)
                if self.args.user:
                    profile["salesRep"] = {"username": self.args.user}

                profile["emails"] = []
                for email in content.get("email", []):
                    if not re.match(r"^[^@äöü]+@[^@]+\.[^@]+$", email.value):
                        continue
                    if "WORK" in email.type_paramlist:
                        kind = "Work"
                    elif "HOME" in email.type_paramlist:
                        kind = "Home"
                    else:
                        kind = "Other"
                    profile["emails"].append({
                        "address": email.value,
                        "type": kind
                    })

                profile = self.hatchbuck.create(profile)
                logging.info("added contact: %s", profile)

            for profile in profile_list:
                if profile["firstName"] == "" or "@" in profile["firstName"]:
                    profile = self.hatchbuck.profile_add(
                        profile, "firstName", None,
                        content["n"][0].value.given)

                if profile["lastName"] == "" or "@" in profile["lastName"]:
                    profile = self.hatchbuck.profile_add(
                        profile, "lastName", None,
                        content["n"][0].value.family)

                if "title" in content and profile.get("title", "") == "":
                    profile = self.hatchbuck.profile_add(
                        profile, "title", None, content["title"][0].value)
                if "company" in profile:
                    if "org" in content and profile.get("company", "") == "":
                        profile = self.hatchbuck.profile_add(
                            profile, "company", None, content["org"][0].value)
                    if profile["company"] == "":
                        # empty company name ->
                        # maybe we can guess the company name from the email
                        # address?
                        # logging.warning("empty company with emails: %s",
                        #                  profile['emails'])
                        pass

                    # clean up company name
                    if re.match(r";$", profile["company"]):
                        logging.warning("found unclean company name: %s",
                                        profile["company"])

                    if re.match(r"\|", profile["company"]):
                        logging.warning("found unclean company name: %s",
                                        profile["company"])

                for addr in content.get("adr", []):
                    address = {
                        "street": addr.value.street,
                        "zip_code": addr.value.code,
                        "city": addr.value.city,
                        "country": addr.value.country,
                    }
                    try:
                        if "WORK" in addr.type_paramlist:
                            kind = "Work"
                        elif "HOME" in addr.type_paramlist:
                            kind = "Home"
                        else:
                            kind = "Other"
                    except AttributeError:
                        # if there is no type at all
                        kind = "Other"
                    logging.debug("adding address %s %s", address, profile)
                    profile = self.hatchbuck.profile_add_address(
                        profile, address, kind)

                for telefon in content.get("tel", []):
                    # number cleanup
                    number = telefon.value
                    for rep in "()-\xa0":
                        # clean up number
                        number = number.replace(rep, "")
                    number = number.replace("+00", "+").replace("+0", "+")

                    try:
                        if "WORK" in telefon.type_paramlist:
                            kind = "Work"
                        elif "HOME" in telefon.type_paramlist:
                            kind = "Home"
                        else:
                            kind = "Other"
                    except AttributeError:
                        # if there is no type at all
                        kind = "Other"

                    redundant = False

                    try:
                        phonenumber = phonenumbers.parse(number, None)
                        pformatted = phonenumbers.format_number(
                            phonenumber,
                            phonenumbers.PhoneNumberFormat.INTERNATIONAL)
                    except phonenumbers.phonenumberutil.NumberParseException:
                        # number could not be parsed, e.g. because it is a
                        # local number without country code
                        logging.warning(
                            "could not parse number %s as %s in %s, "
                            "trying to guess country from address",
                            telefon.value,
                            number,
                            self.hatchbuck.short_contact(profile),
                        )
                        pformatted = number

                        # try to guess the country from the addresses
                        countries_found = []
                        for addr in profile.get("addresses", []):
                            if (addr.get("country", False) and addr["country"]
                                    not in countries_found):
                                countries_found.append(addr["country"])
                        logging.debug("countries found %s", countries_found)

                        if len(countries_found) == 1:
                            # lets try to parse the number with the country
                            countrycode = countries.lookup(
                                countries_found[0]).alpha_2
                            logging.debug("countrycode %s", countrycode)
                            try:
                                phonenumber = phonenumbers.parse(
                                    number, countrycode)
                                pformatted = phonenumbers.format_number(
                                    phonenumber,
                                    phonenumbers.PhoneNumberFormat.
                                    INTERNATIONAL,
                                )
                                logging.debug("guess %s", pformatted)
                                profile = self.hatchbuck.profile_add(
                                    profile,
                                    "phones",
                                    "number",
                                    pformatted,
                                    {"type": kind},
                                )
                                # if we got here we now have a full number
                                continue
                            except phonenumbers.phonenumberutil.NumberParseException:
                                logging.warning(
                                    "could not parse number %s as %s using country %s in %s",
                                    telefon.value,
                                    number,
                                    countrycode,
                                    self.hatchbuck.short_contact(profile),
                                )
                                pformatted = number

                        # check that there is not an international/longer
                        # number there already
                        # e.g. +41 76 4000 464 compared to 0764000464

                        # skip the 0 in front
                        num = number.replace(" ", "")[1:]
                        for tel2 in profile["phones"]:
                            # check for suffix match
                            if tel2["number"].replace(" ", "").endswith(num):
                                logging.warning(
                                    "not adding number %s from %s because it "
                                    "is a suffix of existing %s",
                                    num,
                                    self.hatchbuck.short_contact(profile),
                                    tel2["number"],
                                )
                                redundant = True
                                break

                        if not redundant:
                            profile = self.hatchbuck.profile_add(
                                profile, "phones", "number", pformatted,
                                {"type": kind})
                # clean & deduplicate all phone numbers
                profile = self.hatchbuck.clean_all_phone_numbers(profile)

                for skype in content.get("x-skype", []):
                    profile = self.hatchbuck.profile_add(
                        profile,
                        "instantMessaging",
                        "address",
                        skype.value,
                        {"type": "Skype"},
                    )

                for msn in content.get("x-msn", []):
                    profile = self.hatchbuck.profile_add(
                        profile,
                        "instantMessaging",
                        "address",
                        msn.value,
                        {"type": "Messenger"},
                    )

                for msn in content.get("x-msnim", []):
                    profile = self.hatchbuck.profile_add(
                        profile,
                        "instantMessaging",
                        "address",
                        msn.value,
                        {"type": "Messenger"},
                    )

                for twitter in content.get("x-twitter", []):
                    if "twitter.com" in twitter.value:
                        value = twitter.value
                    else:
                        value = "http://twitter.com/" + twitter.value.replace(
                            "@", "")
                    profile = self.hatchbuck.profile_add(
                        profile, "socialNetworks", "address", value,
                        {"type": "Twitter"})

                for url in content.get("url", []) + content.get(
                        "x-socialprofile", []):
                    value = url.value
                    if not value.startswith("http"):
                        value = "http://" + value
                    if "facebook.com" in value:
                        profile = self.hatchbuck.profile_add(
                            profile,
                            "socialNetworks",
                            "address",
                            value,
                            {"type": "Facebook"},
                        )
                    elif "twitter.com" in value:
                        profile = self.hatchbuck.profile_add(
                            profile,
                            "socialNetworks",
                            "address",
                            value,
                            {"type": "Twitter"},
                        )
                    else:
                        profile = self.hatchbuck.profile_add(
                            profile, "website", "websiteUrl", value)

                for bday in content.get("bday", []):
                    date = {
                        "year": bday.value[0:4],
                        "month": bday.value[5:7],
                        "day": bday.value[8:10],
                    }
                    profile = self.hatchbuck.profile_add_birthday(
                        profile, date)

                if self.args.tag:
                    if not self.hatchbuck.profile_contains(
                            profile, "tags", "name", self.args.tag):
                        self.hatchbuck.add_tag(profile["contactId"],
                                               self.args.tag)

            # get the list of unique contacts IDs to detect if there are
            # multiple contacts in hatchbuck for this one contact in CardDAV
            profile_contactids = []
            message = ""
            for profile in profile_list:
                if profile["contactId"] not in profile_contactids:
                    profile_contactids.append(profile["contactId"])

                    email_profile = " "
                    for email_add in profile.get("emails", []):
                        email_profile = email_add["address"] + " "

                    number_profile = " "
                    for phone_number in profile.get("phones", []):
                        number_profile = phone_number["number"] + " "

                    message += ("{0} {1} ({2}, {3}, {4})".format(
                        profile["firstName"],
                        profile["lastName"],
                        email_profile,
                        number_profile,
                        profile["contactUrl"],
                    ) + ", ")

            if len(profile_contactids) > 1:
                # there are duplicates
                NotificationService().send_message(
                    "Duplicates: %s from file: %s" % (message[:-2], file))
from hatchbuck import Hatchbuck
import pprint
import sys

pp = pprint.PrettyPrinter()
hatchbuck = Hatchbuck(sys.argv[1])
profile = hatchbuck.search_email("*****@*****.**")
pp.pprint(profile)