def remove_items(self): """Removes data from Redis""" upper_bound = self.redis_client.llen(self.queue_to_read) lower_bound = int(self.max_buffer) + 1 self.redis_client.ltrim(self.queue_to_read, lower_bound, upper_bound) logger.info( msg=f"Items were deleted from {self.queue_to_read} redis queue")
def _send_contacts(self): self.processed_contacts = {} for contact in self.contacts_for_update: # better ask for forgiveness than permission try: self.processed_contacts[contact["contact_id"]] except KeyError: contact_payload = json.dumps( {"properties": [{"property": "ML", "value": contact["prediction"]}]} ) url = f"{self.contact_post_api_url}vid/{contact['contact_id']}/profile?" access_token = self.redis_client.get("access_token").decode("utf-8") headers = {"Authorization": f"Bearer {access_token}"} res = requests.post(url=url, headers=headers, data=contact_payload) logger.info( msg=f"status_code: {res.status_code}", extra={"full_msg": res.text} ) if res.status_code == 429: logger.error( msg="429 - Ten sencondly rolling limit reached", extra={"full_msg": res.text}, ) contact_wrapper = {self.queue_to_read: [contact]} self.redis_writer(contact_wrapper) self.processed_contacts[contact["contact_id"]] = True
def _fetch_contact_info(self): """Fetches a contact information from Hubspot database""" url = self._build_url(self.contact_api_url, *self.contacts_id_to_fetch) contacts_fetched = self._request(url) if contacts_fetched: self.contacts[self.predictable_contacts].append(contacts_fetched) self.contacts_id_to_fetch.clear() logger.info( msg=f"Fetched {len(self.contacts_id_to_fetch)} contact's info")
def _insert_to_redis(self, contacts): """Inserts contacts to redis""" for key in contacts.keys(): if contacts[key]: for contact in contacts[key]: self.redis_client.rpush(key, str([contact])) logger.info( msg= f"{len(contacts[key])} contacts were inserted into {key} queue." )
def _generate_predictions(self): self.dataframe.loc[ self.dataframe["Qual a duração do seu intercâmbio?"] == "Ainda não sei", "Qual a duração do seu intercâmbio?", ] = np.nan dataframe_for_predicttion = self.dataframe.loc[:, self.relevant_columns] dataframe_for_predicttion = ModelTrainer.fill_empty_fields( dataframe_for_predicttion) predictions = self.model.predict_proba(dataframe_for_predicttion) * 100 logger.info(msg="Predicitions generated") return predictions
def _convert_strings_to_int(self): for column in self.dataframe.columns: if (len(self.dataframe.loc[self.dataframe[column].apply( lambda x: isinstance(x, str)) == True]) > 0): self.reference_dicts[column] = {} self._make_word_dict(column) self.dataframe.loc[:, column] = self.label_encoder.fit_transform( self.dataframe[column].astype("str")) logger.info(msg="Strings were converted to int")
def _check_for_strings_in_dataframe(self): for column in self.dataframe.columns: if (len(self.dataframe.loc[self.dataframe[column].apply( lambda x: isinstance(x, str)) == True]) > 0): try: highest_code = self._find_highest_code( self.reference_dicts[column]) self.dataframe.loc[self.dataframe[column].apply( lambda x: isinstance(x, str)) == True, column, ] = highest_code except KeyError: logger.info( msg=f"Column {column} not found in the references")
def __call__(self): self.contacts_for_prediction = self.redis_reader() if self.contacts_for_prediction: logger.info(msg="Data extracted from redis") self.dataframe = pd.DataFrame(self.contacts_for_prediction) if self._convert_strings_to_int() is None: self._build_contacts_for_update() else: self._check_for_strings_in_dataframe() predictions = self._generate_predictions() self._build_contacts_for_update(predictions=predictions) self.redis_writer(self.contacts) self.redis_reader.remove_items() self.contacts[self.contacts_for_update].clear()
def _prepare_data(self): self.dataframe["Qual a duração do seu intercâmbio?"] = self.dataframe[ "Qual a duração do seu intercâmbio?"].replace( "Ainda não sei", np.nan) self.dataframe[ "Qual a duração do seu intercâmbio?"] = self.dataframe.loc[ self.dataframe["Qual a duração do seu intercâmbio?"]. apply(lambda x: isinstance(x, str)) == True, "Qual a duração do seu intercâmbio?", ].apply( lambda x: int(re.findall(r"\d+", x)[0])) self.dataframe.loc[self.dataframe.Idade < 0, "Idade"] = np.nan self.dataframe.loc[self.dataframe.Idade > 90, "Idade"] = np.nan self.dataframe["Idade"].fillna(self.dataframe["Idade"].mean(), inplace=True) logger.info(msg="Preparing data for training")
def _read_from_redis(self): """Reads items from redis queue and convertes them to a list of dictonaries""" redis_items = [] items = self.redis_client.lrange(self.queue_to_read, 0, self.max_buffer) for item in items: parsed_item = ast.literal_eval(item.decode("utf-8")) try: redis_items = redis_items + parsed_item except TypeError: redis_items = redis_items + [parsed_item] if redis_items: logger.info( msg= f"{len(redis_items)} items were read from redis {self.queue_to_read}" ) return redis_items
def _fit_model(self): self.model.fit(self.dataframe, self.dataframe_sold) logger.info(msg="Model was trained")