Ejemplo n.º 1
0
 def remove_items(self):
     """Removes data from Redis"""
     upper_bound = self.redis_client.llen(self.queue_to_read)
     lower_bound = int(self.max_buffer) + 1
     self.redis_client.ltrim(self.queue_to_read, lower_bound, upper_bound)
     logger.info(
         msg=f"Items were deleted from {self.queue_to_read} redis queue")
Ejemplo n.º 2
0
    def _send_contacts(self):
        self.processed_contacts = {}
        for contact in self.contacts_for_update:
            # better ask for forgiveness than permission
            try:
                self.processed_contacts[contact["contact_id"]]
            except KeyError:
                contact_payload = json.dumps(
                    {"properties": [{"property": "ML", "value": contact["prediction"]}]}
                )
                url = f"{self.contact_post_api_url}vid/{contact['contact_id']}/profile?"
                access_token = self.redis_client.get("access_token").decode("utf-8")
                headers = {"Authorization": f"Bearer {access_token}"}
                res = requests.post(url=url, headers=headers, data=contact_payload)
                logger.info(
                    msg=f"status_code: {res.status_code}", extra={"full_msg": res.text}
                )

                if res.status_code == 429:
                    logger.error(
                        msg="429 - Ten sencondly rolling limit reached",
                        extra={"full_msg": res.text},
                    )
                    contact_wrapper = {self.queue_to_read: [contact]}
                    self.redis_writer(contact_wrapper)
                self.processed_contacts[contact["contact_id"]] = True
Ejemplo n.º 3
0
 def _fetch_contact_info(self):
     """Fetches a contact information from Hubspot database"""
     url = self._build_url(self.contact_api_url, *self.contacts_id_to_fetch)
     contacts_fetched = self._request(url)
     if contacts_fetched:
         self.contacts[self.predictable_contacts].append(contacts_fetched)
         self.contacts_id_to_fetch.clear()
         logger.info(
             msg=f"Fetched {len(self.contacts_id_to_fetch)} contact's info")
Ejemplo n.º 4
0
 def _insert_to_redis(self, contacts):
     """Inserts contacts to redis"""
     for key in contacts.keys():
         if contacts[key]:
             for contact in contacts[key]:
                 self.redis_client.rpush(key, str([contact]))
             logger.info(
                 msg=
                 f"{len(contacts[key])} contacts were inserted into {key} queue."
             )
Ejemplo n.º 5
0
 def _generate_predictions(self):
     self.dataframe.loc[
         self.dataframe["Qual a duração do seu intercâmbio?"] ==
         "Ainda não sei", "Qual a duração do seu intercâmbio?", ] = np.nan
     dataframe_for_predicttion = self.dataframe.loc[:,
                                                    self.relevant_columns]
     dataframe_for_predicttion = ModelTrainer.fill_empty_fields(
         dataframe_for_predicttion)
     predictions = self.model.predict_proba(dataframe_for_predicttion) * 100
     logger.info(msg="Predicitions generated")
     return predictions
Ejemplo n.º 6
0
    def _convert_strings_to_int(self):

        for column in self.dataframe.columns:
            if (len(self.dataframe.loc[self.dataframe[column].apply(
                    lambda x: isinstance(x, str)) == True]) > 0):
                self.reference_dicts[column] = {}
                self._make_word_dict(column)
                self.dataframe.loc[:,
                                   column] = self.label_encoder.fit_transform(
                                       self.dataframe[column].astype("str"))
        logger.info(msg="Strings were converted to int")
Ejemplo n.º 7
0
    def _check_for_strings_in_dataframe(self):

        for column in self.dataframe.columns:
            if (len(self.dataframe.loc[self.dataframe[column].apply(
                    lambda x: isinstance(x, str)) == True]) > 0):

                try:
                    highest_code = self._find_highest_code(
                        self.reference_dicts[column])
                    self.dataframe.loc[self.dataframe[column].apply(
                        lambda x: isinstance(x, str)) == True,
                                       column, ] = highest_code
                except KeyError:
                    logger.info(
                        msg=f"Column {column} not found in the references")
Ejemplo n.º 8
0
    def __call__(self):
        self.contacts_for_prediction = self.redis_reader()
        if self.contacts_for_prediction:
            logger.info(msg="Data extracted from redis")
            self.dataframe = pd.DataFrame(self.contacts_for_prediction)

            if self._convert_strings_to_int() is None:
                self._build_contacts_for_update()
            else:
                self._check_for_strings_in_dataframe()
                predictions = self._generate_predictions()
                self._build_contacts_for_update(predictions=predictions)

            self.redis_writer(self.contacts)
            self.redis_reader.remove_items()
            self.contacts[self.contacts_for_update].clear()
Ejemplo n.º 9
0
    def _prepare_data(self):

        self.dataframe["Qual a duração do seu intercâmbio?"] = self.dataframe[
            "Qual a duração do seu intercâmbio?"].replace(
                "Ainda não sei", np.nan)
        self.dataframe[
            "Qual a duração do seu intercâmbio?"] = self.dataframe.loc[
                self.dataframe["Qual a duração do seu intercâmbio?"].
                apply(lambda x: isinstance(x, str)) == True,
                "Qual a duração do seu intercâmbio?", ].apply(
                    lambda x: int(re.findall(r"\d+", x)[0]))

        self.dataframe.loc[self.dataframe.Idade < 0, "Idade"] = np.nan
        self.dataframe.loc[self.dataframe.Idade > 90, "Idade"] = np.nan
        self.dataframe["Idade"].fillna(self.dataframe["Idade"].mean(),
                                       inplace=True)
        logger.info(msg="Preparing data for training")
Ejemplo n.º 10
0
    def _read_from_redis(self):
        """Reads items from redis queue and convertes them to a list of dictonaries"""
        redis_items = []
        items = self.redis_client.lrange(self.queue_to_read, 0,
                                         self.max_buffer)

        for item in items:
            parsed_item = ast.literal_eval(item.decode("utf-8"))
            try:
                redis_items = redis_items + parsed_item
            except TypeError:
                redis_items = redis_items + [parsed_item]

        if redis_items:
            logger.info(
                msg=
                f"{len(redis_items)} items were read from redis {self.queue_to_read}"
            )
        return redis_items
Ejemplo n.º 11
0
    def _fit_model(self):

        self.model.fit(self.dataframe, self.dataframe_sold)
        logger.info(msg="Model was trained")