Ejemplo n.º 1
0
class HubSpotWriter:
    def __init__(self):
        self.redis_client = RedisConnectionManager().get_client()
        self.queue_to_read = os.environ["CONTATCS_FOR_UPDATE_QUEUE"]
        self.redis_reader = RedisReader(
            self.queue_to_read, os.environ["MAX_BUFFER_SENDER"]
        )
        self.redis_writer = RedisWriter()
        self.contact_post_api_url = os.environ["CONTACT_POST_API_URL"]
        self.contacts_for_update = []
        self.processed_contacts = {}

    def __call__(self):
        self.contacts_for_update = self.redis_reader()
        self._send_contacts()

        if self.processed_contacts:
            self.redis_reader.remove_items()

    @try_again
    def _send_contacts(self):
        self.processed_contacts = {}
        for contact in self.contacts_for_update:
            # better ask for forgiveness than permission
            try:
                self.processed_contacts[contact["contact_id"]]
            except KeyError:
                contact_payload = json.dumps(
                    {"properties": [{"property": "ML", "value": contact["prediction"]}]}
                )
                url = f"{self.contact_post_api_url}vid/{contact['contact_id']}/profile?"
                access_token = self.redis_client.get("access_token").decode("utf-8")
                headers = {"Authorization": f"Bearer {access_token}"}
                res = requests.post(url=url, headers=headers, data=contact_payload)
                logger.info(
                    msg=f"status_code: {res.status_code}", extra={"full_msg": res.text}
                )

                if res.status_code == 429:
                    logger.error(
                        msg="429 - Ten sencondly rolling limit reached",
                        extra={"full_msg": res.text},
                    )
                    contact_wrapper = {self.queue_to_read: [contact]}
                    self.redis_writer(contact_wrapper)
                self.processed_contacts[contact["contact_id"]] = True
Ejemplo n.º 2
0
class Predictor:
    def __init__(self):

        self.queue_to_read = os.environ["CONTATCS_FOR_PREDICTION_QUEUE"]
        self.contacts_for_update = os.environ["CONTATCS_FOR_UPDATE_QUEUE"]
        self.redis_reader = RedisReader(self.queue_to_read,
                                        os.environ["MAX_BUFFER_MODEL"])
        self.redis_writer = RedisWriter()
        self.relevant_columns = ast.literal_eval(
            ast.literal_eval(os.environ["DATA_TRAIN_COLUMNS"]))
        train_objects = ModelTrainer()()
        self.model = train_objects["model"]
        self.reference_dicts = train_objects["reference_dicts"]
        self.contacts = {self.contacts_for_update: []}
        self.dataframe = pd.DataFrame()

    def __call__(self):
        self.contacts_for_prediction = self.redis_reader()
        if self.contacts_for_prediction:
            logger.info(msg="Data extracted from redis")
            self.dataframe = pd.DataFrame(self.contacts_for_prediction)

            if self._convert_strings_to_int() is None:
                self._build_contacts_for_update()
            else:
                self._check_for_strings_in_dataframe()
                predictions = self._generate_predictions()
                self._build_contacts_for_update(predictions=predictions)

            self.redis_writer(self.contacts)
            self.redis_reader.remove_items()
            self.contacts[self.contacts_for_update].clear()

    def _convert_estunidos_to_eua(self):

        self.dataframe.loc[self.dataframe.loc[
            self.dataframe["País de interesse"] == "Estados Unidos"].index,
                           "País de interesse", ] = "EUA"

    def _convert_strings_to_int(self):

        try:
            for column in self.reference_dicts.keys():
                for label in self.reference_dicts[column].keys():
                    self.dataframe.loc[
                        self.dataframe.loc[self.dataframe[column] ==
                                           label].index,
                        column, ] = self.reference_dicts[column][label]

            return True
        except KeyError as err:
            logger.error(
                msg="Unknown column. It's not possible to generate predictions",
                extra={"Full msg error": err},
            )
            return None

    def _check_for_strings_in_dataframe(self):

        for column in self.dataframe.columns:
            if (len(self.dataframe.loc[self.dataframe[column].apply(
                    lambda x: isinstance(x, str)) == True]) > 0):

                try:
                    highest_code = self._find_highest_code(
                        self.reference_dicts[column])
                    self.dataframe.loc[self.dataframe[column].apply(
                        lambda x: isinstance(x, str)) == True,
                                       column, ] = highest_code
                except KeyError:
                    logger.info(
                        msg=f"Column {column} not found in the references")

    def _find_highest_code(self, reference_dict):

        highest_code = 0
        for label in reference_dict.keys():
            if reference_dict[label] > highest_code:
                highest_code = reference_dict[label]

        return highest_code + 1

    def _generate_predictions(self):
        self.dataframe.loc[
            self.dataframe["Qual a duração do seu intercâmbio?"] ==
            "Ainda não sei", "Qual a duração do seu intercâmbio?", ] = np.nan
        dataframe_for_predicttion = self.dataframe.loc[:,
                                                       self.relevant_columns]
        dataframe_for_predicttion = ModelTrainer.fill_empty_fields(
            dataframe_for_predicttion)
        predictions = self.model.predict_proba(dataframe_for_predicttion) * 100
        logger.info(msg="Predicitions generated")
        return predictions

    def _build_contacts_for_update(self, predictions=None):
        contacts_id = self.dataframe["contact_id"]
        i = 0
        for contact_id in contacts_id:
            if predictions is None:
                contact = {"contact_id": contact_id, "prediciton": "400"}
            else:
                predicton = str(round(predictions[i][0], 2))
                contact = {"contact_id": contact_id, "prediction": predicton}
            self.contacts[self.contacts_for_update].append(contact)
            i += 1
Ejemplo n.º 3
0
class HubSpotFetcher:
    """Fetches deals and contact information in Hubspot"""
    def __init__(self, queue_to_read=None):
        self.redis_client = RedisConnectionManager().get_client()
        self.deal_api_url = os.environ["DEAL_API_URL"]
        self.contact_api_url = os.environ["CONTACT_GET_API_URL"]
        self.contact_creation_subscription = os.environ[
            "CONTACT_CREATION_SUBSCRIPTION"]
        self.predictable_contacts = os.environ["PREDICTABLE_CONTACTS_QUEUE"]
        self.contacts_for_update = os.environ["CONTATCS_FOR_UPDATE_QUEUE"]

        if queue_to_read is None:
            self.queue_to_read = os.environ["QUEUE_TO_READ"]
        else:
            self.queue_to_read = queue_to_read

        max_buffer = os.environ["MAX_BUFFER_COLLETOR"]
        self.redis_reader = RedisReader(self.queue_to_read, max_buffer)
        self.redis_writer = RedisWriter()
        self.contacts_id_to_fetch = []
        self.contacts = {
            self.predictable_contacts: [],
            self.contacts_for_update: [],
        }
        self.processed_items = {}

    def __call__(self):
        subscription_items = self.redis_reader()
        self.contacts[self.predictable_contacts].clear()
        self.contacts[self.contacts_for_update].clear()
        self.contacts_id_to_fetch.clear()
        self.processed_items = {}
        self._fetch_from_hubspot(subscription_items)
        if self.contacts_id_to_fetch:
            self._fetch_contact_info()

        self.redis_writer(self.contacts)
        if self.processed_items:
            self.redis_reader.remove_items()

    def _fetch_from_hubspot(self, subscription_items):
        """Fetches deals and contacts information from HubSpot database"""
        for item in subscription_items:
            subscription_type = item["subscriptionType"]
            object_name = item["subscriptionType"].split(".")[0]

            # better ask for forgiveness than permission
            try:
                self.processed_items[item["objectId"]]
            except KeyError:
                if subscription_type == self.contact_creation_subscription:
                    self._format_new_contact(item["objectId"])
                elif object_name == "deal":
                    self._fetch_deal_info(item["objectId"])
                elif object_name == "contact":
                    self.contacts_id_to_fetch.append(item["objectId"])

                if len(self.contacts_id_to_fetch) > 99:
                    self._fetch_contact_info()

                self.processed_items[item["objectId"]] = True

    def _format_new_contact(self, contact_id):
        """Assigns prediction equals to 0 in contacts recently created"""
        new_contact = {"contact_id": contact_id, "prediction": "0"}
        self.contacts[self.contacts_for_update].append(new_contact)

    def _fetch_deal_info(self, deal_id):
        """ Fetches a deal information from Hubspot database"""
        url = self._build_url(self.deal_api_url, deal_id)
        deal = self._request(url)
        try:
            contacts_id = deal["associations"]["associatedVids"]
            if contacts_id:
                self.contacts_id_to_fetch = self.contacts_id_to_fetch + contacts_id
        except (KeyError, TypeError) as err:
            logger.error(
                msg="Deal with no contact associated",
                extra={"full_msg_error": err},
            )

    def _fetch_contact_info(self):
        """Fetches a contact information from Hubspot database"""
        url = self._build_url(self.contact_api_url, *self.contacts_id_to_fetch)
        contacts_fetched = self._request(url)
        if contacts_fetched:
            self.contacts[self.predictable_contacts].append(contacts_fetched)
            self.contacts_id_to_fetch.clear()
            logger.info(
                msg=f"Fetched {len(self.contacts_id_to_fetch)} contact's info")

    @try_again
    def _request(self, url):
        """Sends a http request to HubSpot"""
        access_token = self.redis_client.get("access_token").decode("utf-8")
        headers = {"Authorization": f"Bearer {access_token}"}
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            return json.loads(response.text)
        # Add contacts to redis again
        if response.status_code == 429:
            logger.error(
                msg="429 - Ten sencondly rolling limit reached",
                extra={"full_msg": response.text},
            )
            self._add_items_to_redis_again(url)

        return None

    def _add_items_to_redis_again(self, url):
        """ Add objets that got 429 requet error to redis again"""
        is_contact = re.findall("contact", url)
        if is_contact:
            subscription_type = "contact.Error429"
        else:
            subscription_type = "deal.Error429"

        objects_id = re.findall(r"\d+", url)[1:]
        objects = []
        for object_id in objects_id:
            object_ = {
                "objectId": int(object_id),
                "subscriptionType": subscription_type,
            }
            objects.append(object_)
        objects_wrapper = {self.queue_to_read: objects}
        self.redis_writer(objects_wrapper)

    @staticmethod
    def _build_url(base_url, *ids_to_fetch):
        """ Builds urls to fetch both deal and contact information"""
        iterable = iter(ids_to_fetch)
        url = f"{base_url}{next(iterable)}"
        if len(ids_to_fetch) > 1:
            for id in iterable:
                url = f"{url}&vid={id}"
        return url
Ejemplo n.º 4
0
class Transformer:
    def __init__(self):
        self.contacts_for_prediction = os.environ[
            "CONTATCS_FOR_PREDICTION_QUEUE"]
        self.contacts_for_update = os.environ["CONTATCS_FOR_UPDATE_QUEUE"]
        self.redis_reader = RedisReader(
            os.environ["PREDICTABLE_CONTACTS_QUEUE"],
            os.environ["MAX_BUFFER_PREPROCESSOR"],
        )
        self.redis_writer = RedisWriter()
        self.contacts = {
            self.contacts_for_prediction: [],
            self.contacts_for_update: []
        }

    def __call__(self):
        predictable_contacts = self.redis_reader()
        self.contacts[self.contacts_for_prediction].clear()
        self.contacts[self.contacts_for_update].clear()
        self._format_predictable_contacts(predictable_contacts)
        self.redis_writer(self.contacts)
        if (self.contacts[self.contacts_for_prediction]
                or self.contacts[self.contacts_for_update]):
            self.redis_reader.remove_items()

    def _format_predictable_contacts(self, predictable_contacts):
        """Formats contacts to be predict by the Machine Learning model"""
        processed_contacts = {}
        for contact in predictable_contacts:
            for key in contact.keys():
                # better ask for forgiveness than permission
                try:
                    processed_contacts[key]
                except KeyError:
                    contact_properties = contact[key]["properties"]
                    contact_info = self._extract_contact_main_info(
                        key, contact_properties)
                    if contact_info:
                        self.contacts[self.contacts_for_prediction].append(
                            contact_info)
                    else:
                        contact_info = self._format_non_predictable_contact(
                            key)
                        self.contacts[self.contacts_for_update].append(
                            contact_info)
                    processed_contacts[key] = True

    def _extract_contact_main_info(self, contact_id, contact_properties):

        try:
            contact = {
                "contact_id":
                contact_id,
                "País de interesse":
                contact_properties["country_of_interest"]["value"],
                "Qual a duração do seu intercâmbio?":
                contact_properties["program_duration"]["value"],
                "Idade":
                int(contact_properties["idade"]["value"]),
                "Associated Deals":
                int(contact_properties["num_associated_deals"]["value"]),
                "Number of times contacted":
                int(contact_properties["num_contacted_notes"]["value"]),
                "Number of Sales Activities":
                int(contact_properties["num_notes"]["value"]),
                "Number of Unique Forms Submitted":
                int(contact_properties["num_unique_conversion_events"]
                    ["value"]),
                "Number of Form Submissions":
                int(contact_properties["num_conversion_events"]["value"]),
            }
            self._convert_program_duration_to_int(contact)
            return contact
        except (KeyError, TypeError) as err:
            logger.error(
                msg="Contact is not predictable due to misinformation",
                extra={"Full msg error": err},
            )
            return None

    @staticmethod
    def _format_non_predictable_contact(contact_id):
        """Assigns the '404' label to contacts that does not have enough information """
        non_predictable_contact = {
            "contact_id": contact_id,
            "prediction": "404"
        }
        return non_predictable_contact

    @staticmethod
    def _convert_program_duration_to_int(contact):
        program_duration = re.findall(
            r"\d+", contact["Qual a duração do seu intercâmbio?"])
        if program_duration:
            contact["Qual a duração do seu intercâmbio?"] = int(
                program_duration[0])