def __init__(self, **kwargs): """ Sources-Opportunity relation """ super().__init__(**kwargs) self.crm_connector = DBConnect(CRM_CREDENTIALS) self.galileo_connector = DBConnect(GALILEO_DB_CREDENTIALS)
def __init__(self, **kwargs): """ Active users and roles """ super().__init__(**kwargs) self.crm_connector = DBConnect(CRM_CREDENTIALS) self.data_set = self.crm_connector.custom_query(self.query_users)
def __init__(self, rfc: str, interval_unit: str, ids: tuple, path: str = os.getcwd()): self.rfc = rfc self.interval_unit = interval_unit self.path = path self.ids = ids os.makedirs(path, exist_ok=True) self.capone_connector = DBConnect(CAPONE_CREDENTIALS) self.declarations_df = self.get_all_declarations_for_given_rfc()
def get_declarations_ids(rfc: str, interval_unit: str, dec_type: str) -> set: capone_connector = DBConnect(CAPONE_CREDENTIALS) query = f''' SELECT id FROM declarations WHERE rfc = '{rfc}' AND type = '{dec_type}' AND interval_unit = '{interval_unit}'; ''' all_ids = capone_connector.custom_query(query) return set(all_ids['id'])
def get_update_table_data(self): mysql_capone = DBConnect(CAPONE_CREDENTIALS) extract_query = """ SELECT credential_rfc client_rfc, handler data_type, state, finished_at FROM extractions WHERE credential_rfc = '{}'; """.format(self.rfc) extract_table = mysql_capone.custom_query(extract_query) update_table = extract_table.groupby(['client_rfc', 'data_type']).apply(lambda x: self.max_selector(x)) update_table.reset_index(inplace=True, drop=True) return update_table
def get_all_types_of_declarations_df(): ''' This method returns a dataframe that states all the different types of declarations Capone found given its information :return: a DataFrame with all the types of declarations capone found ''' capone_connector = DBConnect(CAPONE_CREDENTIALS) query = 'SELECT interval_unit, type, complementary FROM declarations' declarations_frame = capone_connector.custom_query(query) declarations_frame['ind'] = 1 tipos_declaraciones = declarations_frame.groupby(['interval_unit', 'type', 'complementary'])[ 'ind'].count().reset_index() return tipos_declaraciones
def __init__(self, credentials=MARKETING_CREDENTIALS): super().__init__(credentials=credentials) self.aurum_conn = DBConnect(CRM_CREDENTIALS) GALILEO_DB_CREDENTIALS['database'] = 'marketing' self.galileo_conn = DBConnect(GALILEO_DB_CREDENTIALS) self.marketing_utf_conn = create_engine(str(self.db_uri) + "?charset=utf8", encoding="utf-8") conn = self.aurum_conn.conn.connect() result = conn.execute('SELECT * FROM oro_enum_opportunity_status;') _tmp = result.fetchall() _tmp = tuple(sorted(_tmp, key=lambda item: item[2])) conn.close() self.SALES_FUNNEL_ORDER_ESP = [x[1] for x in _tmp]
def __init__(self, **kwargs): super().__init__(**kwargs) self.crm_connector = DBConnect(CRM_CREDENTIALS) self.data_set = self.crm_connector.custom_query( self.query_tasks_by_opp) # self.detect_closed_unsuccessful_tasks() Esto tiene que ser implementado en la notebook self.get_last_filter() self.get_last_closer() self.drop_auxiliar_columns([ "owner_names_record", "role_names_record", "not_closed_tasks_due_dates", "not_closed_tasks_created_at" ])
def __init__(self, **kwargs): print("This is only auditable from: 2017-06-02 11:19:00") super().__init__(**kwargs) self.crm_connector = DBConnect(CRM_CREDENTIALS) self.data_set = self.crm_connector.custom_query( self.query_not_closed_tasks_audit) self.get_first_open_status_at() self.get_last_closed_status_at() self.get_current_due_date() self.detect_closed_successful_tasks() self.drop_auxiliar_columns( ["task_status_record", "task_status_at_record", "due_date_record"])
def preload_transactions(rfc): credentials = GALILEO_DB_CREDENTIALS.copy() credentials['database'] = 'capone' galileo_conn = DBConnect(credentials) transactions_query = f"SELECT * FROM transactions WHERE client_rfc = '{rfc}'" transactions = galileo_conn.custom_query(transactions_query) transactions['fecha'] = pd.to_datetime(transactions['fecha']) valid_currencies = ['MXN', 'USD', 'CAD', 'EUR', 'GBP'] valid_methods = ['PPD', 'PUE'] catchall_mask(transactions, 'moneda', 'moneda_adjusted', valid_currencies, 'MXN') catchall_mask(transactions, 'pay_method', 'pay_method_adjusted', valid_methods, 'PUE') pickle_name = f'transactions-{rfc}.pkl.xz' upload_df(transactions, pickle_name, base_dir='pickles')
def get_razon_social(self): mysql_capone = DBConnect(CAPONE_CREDENTIALS) print("Updating rfc table for {}".format(self.rfc)) df = mysql_capone.custom_query(self.razon_social_query) df = df.applymap(column_format) razon_social = 'N/A' issuer_mask = df['issuer_rfc'] == self.rfc receiver_mask = df['receiver_rfc'] == self.rfc if issuer_mask.any(): razon_social = df[issuer_mask].loc[0, 'issuer_name'] elif receiver_mask.any(): razon_social = df[receiver_mask].loc[0, 'receiver_name'] return razon_social
def __init__(self, **kwargs): """ Pivot: Fecha de creacion de la llamada """ super().__init__(**kwargs) self.crm_connector = DBConnect(CRM_CREDENTIALS) self.data_set = self.crm_connector.custom_query(self.query) self.extract_first_filter_data() self.get_contacted_at() self.detect_first_call() self.get_first_asigned_owner_data() self.drop_auxiliar_columns([ "status_at_record", "status_record", "owner_at_record", "role_names_record", "owner_names_record" ])
def run(self): ''' This method returns a dataframe with 20 (or less if less were found) declarations for each type of declaration Capone detected based on interval_unit, type and complementary :return: Dataframe with rfc, interval_unit, type, complementary, presentation_at, pdf and declaration_key columns. The pdf column contains the binary to generate each declaration's pdf. ''' pdfs_by_dec = [] self.check_for_new_declarations() type_of_dec_frame = self.get_all_types_of_declarations_df() capone_connector = DBConnect(CAPONE_CREDENTIALS) type_of_dec_frame['declaration_key'] = type_of_dec_frame['interval_unit'] + type_of_dec_frame['type'] + \ type_of_dec_frame['complementary'] for _, row in type_of_dec_frame.iterrows(): query = self.type_of_declaration_sample_query(row['interval_unit'], row['type'], row['complementary']) declarations_frame = capone_connector.custom_query(query) declarations_frame['declaration_key'] = row['declaration_key'] pdfs_by_dec.append(declarations_frame) declarations_df = pd.concat(pdfs_by_dec) return declarations_df
class DeclarationsManager: """ 'DeclarationsManager' class that allows the user to obtain all available declarations for a given RFC in pdf as tempfiles. It has also got a method that erases all pdf files generated in the container. If the RFC isn't found, it returns an empty dataframe. The declarations PDFs will be located in the path specified by the user and if it doesn't exist, it will be created. """ def __init__(self, rfc: str, interval_unit: str, ids: tuple, path: str = os.getcwd()): self.rfc = rfc self.interval_unit = interval_unit self.path = path self.ids = ids os.makedirs(path, exist_ok=True) self.capone_connector = DBConnect(CAPONE_CREDENTIALS) self.declarations_df = self.get_all_declarations_for_given_rfc() def get_all_declarations_for_given_rfc(self) -> pd.DataFrame: if len(self.ids) == 0: return pd.DataFrame() params = {'id': self.ids, 'rfc': self.rfc, 'interval': self.interval_unit} query = """SELECT rfc, interval_unit, type, complementary, presentation_at, id, period, pdf FROM declarations WHERE rfc = %(rfc)s AND interval_unit = %(interval)s AND id in %(id)s;""" declarations_frame = self.capone_connector.custom_query(query, params=params) if declarations_frame.empty: return declarations_frame dfs_list = [] for index, row in declarations_frame.iterrows(): temp_df = pd.DataFrame() temp_df = temp_df.append(row) pdf_binary = temp_df['pdf'][index] t = NamedTemporaryFile(dir=self.path) with open(f"{t.name}.pdf", 'wb') as f: f.write(pdf_binary) temp_df['file_name'] = f.name temp_df = temp_df.drop(columns=['pdf']) dfs_list.append(temp_df) declarations_df = pd.concat(dfs_list) return declarations_df def remove_pdfs_from_container(self): file_list = self.declarations_df['file_name'] for file in file_list: os.remove(os.path.join(self.path, file))
class VisitorData(DBPreprocess): TABLE_ALIAS = 'visits_vpn_left_join' DB_ALIAS = 'marketing' join_call = Template(""" SELECT v.visitor_id AS tracking_id, vpn.source, v.created_at as created_at, visit_data, visit_date FROM visits v LEFT JOIN visitor_phone_numbers vpn ON vpn.visitor_id = v.visitor_id WHERE $condition; """) def __init__(self, **kwargs): super().__init__(**kwargs) self.sherlock_conn = DBConnect(SHERLOCK_CREDENTIALS) def call_batch(self, batch_query): join_call_df = self.sherlock_conn.custom_query(batch_query) return join_call_df.drop_duplicates(subset=['tracking_id', 'visit_date']) def start_process(self, start_date, end_date, freq='30D'): dates = batch_generator(start=start_date, end=end_date, freq=freq) batches = get_substitutions(self.join_call, 'date', 'v.created_at', dates) for index, batch in enumerate(batches): print(f"Calling batch [{index}]") self.data_set = self.call_batch(batch) print(f"Sending batch [{index}]") self.send_to_db(if_exist="append") print("Done.") def deduplicate(self): dedup_query_1 = f"CREATE TABLE IF NOT EXISTS {self.DB_ALIAS}.tmp LIKE {self.DB_ALIAS}.visits_vpn_left_join;" dedup_query_2 = f""" INSERT INTO {self.DB_ALIAS}.tmp (SELECT * FROM {self.DB_ALIAS}.visits_vpn_left_join GROUP BY tracking_id, visit_date);""" dedup_query_3 = f"DROP TABLE {self.DB_ALIAS}.visits_vpn_left_join;" dedup_query_4 = f"RENAME TABLE {self.DB_ALIAS}.tmp TO {self.DB_ALIAS}.visits_vpn_left_join;" galileo_db_conn = self.db_engine.connect() galileo_db_conn.execute(dedup_query_1) galileo_db_conn.execute(dedup_query_2) galileo_db_conn.execute(dedup_query_3) galileo_db_conn.execute(dedup_query_4) galileo_db_conn.close()
class InvestorsPreprocessPortfolio(DBPreprocess): DB_ALIAS = "investors" TABLE_ALIAS = "outstanding_principal" MXN_TO_USD = 1 / usd_mxn_rate principal_balances_query = """ SELECT coalesce(nullif(loanrecord.payment_date, NULL ), loan.start_date) payment_date, loanrecord.principal_balance, loanrecord.loan_id FROM loans_loanrecord loanrecord LEFT JOIN loans_loan loan ON loanrecord.loan_id = loan.id; """ def __init__(self, **kwargs): super().__init__(**kwargs) self.loany_connector = DBConnect(LOANY_CREDENTIALS, "postgresql") def start_process(self): self.data_set = self.get_raw_data() def get_raw_data(self): master_table = self.loany_connector.custom_query( self.principal_balances_query) master_table['payment_date'] = pd.to_datetime( master_table['payment_date']) master_table = master_table.set_index('payment_date') master_table = master_table.groupby([pd.Grouper(freq='M'), 'loan_id' ])['principal_balance'].min() master_table = master_table.unstack() master_table.ffill(inplace=True) master_table = master_table.stack() master_table = master_table.reset_index() master_table.rename(columns={0: 'principal_balance_mxn'}, inplace=True) master_table = master_table.groupby( 'payment_date')['principal_balance_mxn'].sum() master_table = master_table.reset_index() master_table['principal_balance_usd'] = master_table[ 'principal_balance_mxn'] * self.MXN_TO_USD return master_table
class UsersPreprocess(DBPreprocess): DB_ALIAS = "owners" TABLE_ALIAS = "active_users" query_users = ''' SELECT concat(u.first_name, ' ', u.last_name) name, u_role_name.label role, u.enabled enabled FROM oro_user u LEFT JOIN oro_user_access_role u_role_id ON u_role_id.user_id=u.id LEFT JOIN oro_access_role u_role_name ON u_role_name.id=u_role_id.role_id WHERE (u_role_name.label='Sales Filter' OR u_role_name.label='Closer') AND enabled=1 ''' def __init__(self, **kwargs): """ Active users and roles """ super().__init__(**kwargs) self.crm_connector = DBConnect(CRM_CREDENTIALS) self.data_set = self.crm_connector.custom_query(self.query_users)
class Funnel(DBPreprocess): DB_ALIAS = 'aurum_basetable' TABLE_ALIAS = 'funnel' def __init__(self, **kwargs): super().__init__(**kwargs, charset='utf8') self.aurum_db = DBConnect(CRM_CREDENTIALS) def start_process(self): self.data_set = self.get_funnel() def add_indexes(self): for index in opportunities_funnel_d['index_column']: if index == 'opportunity_id': self.db_engine.execute( f"ALTER TABLE `{self.DB_ALIAS}`.`{self.TABLE_ALIAS}` ADD INDEX ({index});") if index == 'field_value': self.db_engine.execute( f"ALTER TABLE `{self.DB_ALIAS}`.`{self.TABLE_ALIAS}` ADD INDEX ({index} (20));") def process_columns(self, funnel_df): # map mask = funnel_df['field_value'].isin(['Avalúo']) funnel_df['field_value'][mask] = 'Avalúo pagado' mask = funnel_df['field_value'].isin( ['Oferta final enviada', 'Oferta Final Enviada']) funnel_df['field_value'][mask] = 'Oferta final' mask = funnel_df['field_value'].isin(['Espera de firma']) funnel_df['field_value'][mask] = 'En espera de firma' mask = funnel_df['field_value'].isin(['Closed Won']) funnel_df['field_value'][mask] = 'Ganado' mask = funnel_df['field_value'].isin(['Closed Lost']) funnel_df['field_value'][mask] = 'Perdido' mask = funnel_df['field_value'].isin(['Entrevista Agendada']) funnel_df['field_value'][mask] = 'Cita agendada' mask = funnel_df['field_value'].isin(['Oportunidad Nueva']) funnel_df['field_value'][mask] = 'Nuevo' mask = funnel_df['field_value'].isin(['Oferta Final Aceptada']) funnel_df['field_value'][mask] = 'Oferta final' return funnel_df def get_funnel(self): funnel_df = self.aurum_db.custom_query( ''' SELECT o.id opportunity_id, o.status_id AS last_status, o.created_at AS opportunity_created_at, af.field AS field_type, a.user_id AS status_owner_id, CONCAT(ou.first_name, ' ', ou.last_name) AS status_owner_name, af.new_text AS field_value, a.logged_at AS field_value_at FROM orocrm_sales_opportunity o JOIN oro_audit a ON a.object_id=o.id JOIN oro_audit_field af ON af.audit_id = a.id JOIN oro_user ou ON ou.id = a.user_id WHERE a.object_class='Oro\\\Bundle\\\SalesBundle\\\Entity\\\Opportunity' AND af.field IN ('status', 'owner') ORDER BY o.id, field_value_at; ''' ) funnel_df = self.process_columns(funnel_df) return funnel_df
class TasksAuditPreprocess(DBPreprocess): """ Tasks audit This is only auditable from: 2017-06-02 11:19:00 > For user created tasks: (task_owner_name == user_name).shape[0] closed tasks: (current_task_status == 'Cerrado').shape[0] not closed tasks: (current_task_status != 'Cerrado').shape[0] closed sucessful tasks: """ query_not_closed_tasks_audit = ''' SELECT ot.task_id, convert_tz(t.createdAt,'UTC','America/Mexico_City') task_created_at, ts.name current_task_status, ot.opportunity_id opp_id, t.owner_id, u.enabled is_owner_enabled, concat(u.first_name, " ", u.last_name) task_owner_name, u_role_name.label task_owner_role, group_concat(af.new_text) task_status_record, group_concat(CONVERT_TZ(a.logged_at,'UTC','America/Mexico_City')) task_status_at_record, group_concat(CONVERT_TZ(t.due_date,'UTC','America/Mexico_City')) due_date_record FROM oro_rel_f24c741b5154c0033bfb48 ot LEFT JOIN oro_audit a ON ot.task_id=a.object_id LEFT JOIN oro_audit_field af ON a.id = af.audit_id LEFT JOIN orocrm_task t ON ot.task_id = t.id LEFT JOIN oro_user u ON t.owner_id = u.id LEFT JOIN oro_user_access_role u_role ON u.id = u_role.user_id LEFT JOIN oro_access_role u_role_name ON u_role_name.id=u_role.role_id LEFT JOIN oro_enum_task_status ts ON t.status_id = ts.id WHERE a.object_class="Oro\\\Bundle\\\TaskBundle\\\Entity\\\Task" AND af.field = "status" GROUP BY ot.task_id ''' DB_ALIAS = "operational" TABLE_ALIAS = "tasks_audit" def __init__(self, **kwargs): print("This is only auditable from: 2017-06-02 11:19:00") super().__init__(**kwargs) self.crm_connector = DBConnect(CRM_CREDENTIALS) self.data_set = self.crm_connector.custom_query( self.query_not_closed_tasks_audit) self.get_first_open_status_at() self.get_last_closed_status_at() self.get_current_due_date() self.detect_closed_successful_tasks() self.drop_auxiliar_columns( ["task_status_record", "task_status_at_record", "due_date_record"]) def get_first_open_status_at(self): self.extract_feature_relation(list_column="task_status_record", target_columns=["task_status_at_record"], search_indexer="Abierto", first=True, column_names=["first_open_status_at"]) def get_last_closed_status_at(self): self.extract_feature_relation(list_column="task_status_record", target_columns=["task_status_at_record"], search_indexer="Cerrado", first=False, column_names=["last_closed_status_at"]) def get_current_due_date(self): due_dates_list = self.data_set["due_date_record"].fillna("").str.split( ",") self.data_set["current_due_date"] = due_dates_list.apply( lambda l: l[-1]) def detect_closed_successful_tasks(self): closed = self.data_set.query("current_task_status == 'Cerrado'") is_successful_closed = pd.to_datetime( closed.current_due_date) > pd.to_datetime( closed.last_closed_status_at) is_successful_closed = is_successful_closed.map({True: 1, False: 0}) self.data_set["is_successful_closed"] = is_successful_closed
"""The purpose of this document is to define certain environmental constants that help to control the behavior of the dashboards, e.g. the order of sales funnel stages so that they are no default-ordered by count """ from ds_dependencies.base_preprocess import DBConnect from ds_dependencies.credentials import CRM_CREDENTIALS crm_connector = DBConnect(CRM_CREDENTIALS) query = 'SELECT * FROM oro_enum_opportunity_status;' sf_stages = crm_connector.custom_query(query) sf_stages.sort_values(by='priority', inplace=True) SALES_FUNNEL_ORDER = sf_stages['id'].tolist() SALES_FUNNEL_ORDER_ESP = sf_stages['name'].tolist() SALES_FUNNEL_MAPPING = sf_stages[['id', 'name']].set_index('id')['name'] frequencies = ['Diario', 'Semanal', 'Quincenal', 'Mensual']
def __init__(self, **kwargs): super().__init__(**kwargs, charset='utf8') self.aurum_db = DBConnect(CRM_CREDENTIALS)
def __init__(self, **kwargs): super().__init__(**kwargs) self.sherlock_conn = DBConnect(SHERLOCK_CREDENTIALS)
class OppTasksPreprocess(DBPreprocess): """ Opps-tasks relation opps without tasks: '(not_closed_tasks_count == 0) & (open_tasks_count == 0)' opps with open tasks: 'open_tasks_count != 0' opps with tasks without due date: '(open_tasks_count != 0) & (due_dates_count < open_tasks_count)' opps with unsuccessful open tasks : 'not_closed_tasks_count > 0' and dues < created """ query_tasks_by_opp = ''' SELECT o.id opp_id, concat(u.first_name, " ", u.last_name) current_owner, group_concat(o_u.first_name, " ", o_u.last_name) owner_names_record, group_concat(u_role_name.label) role_names_record, ( SELECT group_concat(t.id) FROM oro_rel_f24c741b5154c0033bfb48 r LEFT JOIN orocrm_task t ON r.task_id = t.id WHERE 1 AND r.opportunity_id = o.id AND t.status_id != 'closed' ) AS not_closed_tasks_id_record, ( SELECT group_concat(CONVERT_TZ(t.createdAt, 'UTC','America/Mexico_City')) FROM oro_rel_f24c741b5154c0033bfb48 r LEFT JOIN orocrm_task t ON r.task_id = t.id WHERE 1 AND r.opportunity_id = o.id AND t.status_id != 'closed' ) AS not_closed_tasks_created_at, ( SELECT count(t.id) FROM oro_rel_f24c741b5154c0033bfb48 r LEFT JOIN orocrm_task t ON r.task_id = t.id WHERE 1 AND r.opportunity_id = o.id AND t.status_id != 'closed' ) AS not_closed_tasks_count, ( SELECT group_concat(CONVERT_TZ(t.due_date,'UTC','America/Mexico_City')) FROM oro_rel_f24c741b5154c0033bfb48 r LEFT JOIN orocrm_task t ON t.id = r.task_id WHERE 1 AND r.opportunity_id = o.id AND t.status_id != 'closed' ) AS not_closed_tasks_due_dates, ( SELECT count(t.due_date) FROM oro_rel_f24c741b5154c0033bfb48 r LEFT JOIN orocrm_task t ON t.id = r.task_id WHERE 1 AND r.opportunity_id = o.id AND t.status_id != 'closed' ) AS due_dates_count, ( SELECT count(t.id) FROM oro_rel_f24c741b5154c0033bfb48 r LEFT JOIN orocrm_task t ON t.id = r.task_id WHERE 1 AND r.opportunity_id = o.id AND t.status_id = 'open' ) open_tasks_count FROM orocrm_sales_opportunity o LEFT JOIN oro_user u ON o.user_owner_id = u.id LEFT JOIN oro_audit a ON a.object_id=o.id LEFT JOIN oro_audit_field af ON af.audit_id=a.id LEFT JOIN oro_user o_u ON o_u.username=af.new_text LEFT JOIN oro_user_access_role u_role_id ON u_role_id.user_id=o_u.id LEFT JOIN oro_access_role u_role_name ON u_role_name.id=u_role_id.role_id WHERE a.object_class='Oro\\\Bundle\\\SalesBundle\\\Entity\\\Opportunity' AND af.field='owner' GROUP BY o.id ''' DB_ALIAS = "operational" TABLE_ALIAS = "tasks_by_opp" def __init__(self, **kwargs): super().__init__(**kwargs) self.crm_connector = DBConnect(CRM_CREDENTIALS) self.data_set = self.crm_connector.custom_query( self.query_tasks_by_opp) # self.detect_closed_unsuccessful_tasks() Esto tiene que ser implementado en la notebook self.get_last_filter() self.get_last_closer() self.drop_auxiliar_columns([ "owner_names_record", "role_names_record", "not_closed_tasks_due_dates", "not_closed_tasks_created_at" ]) def get_last_filter(self): self.extract_feature_relation(list_column="role_names_record", target_columns=["owner_names_record"], search_indexer="Sales Filter", first=False, column_names=["last_filter_name"]) def get_last_closer(self): self.extract_feature_relation(list_column="role_names_record", target_columns=["owner_names_record"], search_indexer="Closer", first=False, column_names=["last_closer_name"])
def init_data_set(self): self.crm_connector = DBConnect(CRM_CREDENTIALS) self.data_set = self.crm_connector.custom_query(self.query_owners)
class AurumToSherlock(DBPreprocess): TABLE_ALIAS = 'aurum_to_sherlock' DB_ALIAS = 'minimalist' CAMPAIGN_COLUMNS = ['ga_campaign', 'campaign'] opp_stage_info_query = Template(""" SELECT o.id opportunity_id, group_concat(af.new_text) status, MAX(a.logged_at) last_status_at, MAX(o.created_at) opp_created_at, leads_table.tracking_id AS tracking_id, o.created_at AS time_in_crm, o.budget_amount_value AS budget_amount_value, o.collateral_value, o.notes FROM orocrm_sales_opportunity o LEFT JOIN oro_audit a ON a.object_id=o.id LEFT JOIN oro_audit_field af ON af.audit_id=a.id LEFT JOIN orocrm_sales_lead leads_table ON leads_table.id = o.lead_id WHERE $condition AND a.object_class='Oro\\\Bundle\\\SalesBundle\\\Entity\\\Opportunity' AND af.field='status' GROUP BY o.id; """) online_info_query = Template(""" SELECT tracking_id, source, visit_data, visit_date FROM visits_vpn_left_join WHERE $condition; """) def __init__(self, credentials=MARKETING_CREDENTIALS): super().__init__(credentials=credentials) self.aurum_conn = DBConnect(CRM_CREDENTIALS) GALILEO_DB_CREDENTIALS['database'] = 'marketing' self.galileo_conn = DBConnect(GALILEO_DB_CREDENTIALS) self.marketing_utf_conn = create_engine(str(self.db_uri) + "?charset=utf8", encoding="utf-8") conn = self.aurum_conn.conn.connect() result = conn.execute('SELECT * FROM oro_enum_opportunity_status;') _tmp = result.fetchall() _tmp = tuple(sorted(_tmp, key=lambda item: item[2])) conn.close() self.SALES_FUNNEL_ORDER_ESP = [x[1] for x in _tmp] def get_offline_info(self, query): opp_stage_info = self.aurum_conn.custom_query(query) opp_stage_info['status'] = opp_stage_info['status'].str.split(',') for stage in self.SALES_FUNNEL_ORDER_ESP: opp_stage_info[stage] = opp_stage_info['status'].apply(lambda x: int(stage in x)) opp_stage_info = opp_stage_info.drop(['status'], axis=1, inplace=False) return opp_stage_info def get_online_data(self, query): print('importing online data...') online_info = self.galileo_conn.custom_query(query) online_info = minimalist_utils.online_info_parse(online_info) expanded_visit_data = online_info['visit_data'].apply(lambda x: pd.Series(_json_parser(x))) del online_info['visit_data'] online_info = pd.concat([online_info, expanded_visit_data], axis=1) online_info.replace('(not set)', np.nan, inplace=True) online_info = minimalist_utils.column_condenser(online_info, self.CAMPAIGN_COLUMNS, 'aggregated_campaign') online_info.drop(self.CAMPAIGN_COLUMNS, axis=1, inplace=True) online_info['paid'] = pd.isnull(online_info['aggregated_campaign']).apply(lambda x: int(not x)) online_info = online_info.drop(['glcid', 'source'], axis=1, inplace=False) return online_info def merge_aurum_to_sherlock(self, offline_info, online_info): print('Merging online and offline data tables for Sherlock <- Aurum...') aurum_to_sherlock = online_info.merge(offline_info, on='tracking_id', how='left') aurum_to_sherlock.drop_duplicates(inplace=True) aurum_to_sherlock.sort_values(by=['visit_date'], inplace=True) aurum_to_sherlock['weight'] = 0.0 aurum_to_sherlock['amount_times_weight'] = 0.0 return aurum_to_sherlock def format_final_table(self): print('Removing tracking id-site visit duplicates') table_name = f"{self.DB_ALIAS}.{self.TABLE_ALIAS}" dedup_query_1 = f"CREATE TABLE IF NOT EXISTS {self.DB_ALIAS}.tmp LIKE {table_name};" dedup_query_2 = f""" INSERT INTO {self.DB_ALIAS}.tmp (SELECT * FROM {table_name} GROUP BY tracking_id, visit_date);""" dedup_query_3 = f"DROP TABLE {table_name};" dedup_query_4 = f"RENAME TABLE {self.DB_ALIAS}.tmp TO {table_name};" conn = self.db_engine.connect() conn.execute(dedup_query_1) conn.execute(dedup_query_2) conn.execute(dedup_query_3) conn.execute(dedup_query_4) conn.close() print('Adding row index...') row_index_query_1 = f"""ALTER TABLE {table_name} ADD row_index INT(11) DEFAULT '0' NOT NULL FIRST;""" row_index_query_2 = """SELECT @n:=0;""" row_index_query_3 = f"""UPDATE {table_name} SET row_index = @n := @n + 1;""" conn = self.db_engine.connect() conn.execute(row_index_query_1) conn.execute(row_index_query_2) conn.execute(row_index_query_3) conn.close() print('Indexing row index...') indexer_query = f"ALTER TABLE {table_name} ADD INDEX (row_index);" conn = self.db_engine.connect() conn.execute(indexer_query) conn.close() def start_process(self, start_date, end_date, freq): dates = batch_generator(start=start_date, end=end_date, freq=freq) online_data_batches = get_substitutions(self.online_info_query, 'date', 'visit_date', dates) for idx, batch in enumerate(online_data_batches): print(f"Batch [{idx}]") online_info = self.get_online_data(batch) tracking_ids = list(online_info['tracking_id'].unique()) searches = batch_generator(on=tracking_ids, freq=4000) offline_info_batches = get_substitutions(self.opp_stage_info_query, 'search', 'leads_table.tracking_id', searches) offline_info = [] for sub_idx, offline_batch in enumerate(offline_info_batches): print(f"sub-batch [{sub_idx}]") offline_info.append(self.get_offline_info(offline_batch)) offline_info = pd.concat(offline_info) aurum_to_sherlock = self.merge_aurum_to_sherlock(offline_info, online_info) self.data_set = aurum_to_sherlock print("Sending to Marketing") self.send_to_db(if_exist="append")
def get_data(self): aurum_db = DBConnect(CRM_CREDENTIALS) aurum_df = aurum_db.custom_query( """SELECT cjr.opportunity_id, cjr.resource_id, 1 as has_questionnaire_flag, cjr.role_type_id, sl.source_id, so.created_at as opportunity_created_at, so.closed_at as opportunity_closed_at, so.data_channel_id, so.customer_association_id, b2bc.employees, oct.first_name AS contact_first_name, oct.last_name AS contact_last_name, oct_2.contact_phone_number, oct_3.contact_email, cjr.person_type_id, so.collateral_value, so.collateral_usage, so.collateral_usage_type_id, so.collateral_location, so.collateral_neighborhood, so.collateral_municipality, so.collateral_reg_combined_code, so.collateral_inegi_code, so.property_tax_account, so.collateral_zip_code, so.lead_id, so.contact_id, so.user_owner_id, so.status_id, so.income_type_id, so.budget_amount_currency, so.monthly_sales, so.budget_amount_value, so.monthly_profits, so.monthly_formal_income, so.loan_purpose, so.primary_clients, so.product_id, so.cj_score, so.risk_level_score, so.risk_level_id, so.collateral_type_id, so.owner_experience_years, so.debt_service_coverage_ratio, docs.document_type_id, docs.document, cjr.resource_subtype as document_name FROM cj_requirements cjr LEFT JOIN (SELECT cjd.id AS resource_id, cjd.document_type_id, cjd.DATA AS 'document' FROM oro_crm.cj_documents cjd LEFT JOIN oro_crm.cj_document_types cjdt ON cjdt.id = cjd.document_type_id WHERE cjd.document_type_id = %s) docs ON docs.resource_id = cjr.resource_id LEFT JOIN orocrm_sales_opportunity so ON so.id = cjr.opportunity_id LEFT JOIN orocrm_sales_lead sl ON sl.id = so.lead_id LEFT JOIN orocrm_sales_customer c ON c.id = so.customer_association_id LEFT JOIN orocrm_sales_b2bcustomer b2bc ON b2bc.id = c.b2b_customer_188b774c_id LEFT JOIN orocrm_contact oct ON oct.id = so.contact_id LEFT JOIN (SELECT owner_id AS contact_id, GROUP_CONCAT(phone SEPARATOR ' / ') AS contact_phone_number FROM orocrm_contact_phone GROUP BY owner_id) oct_2 ON oct_2.contact_id = so.contact_id LEFT JOIN (SELECT owner_id AS contact_id, GROUP_CONCAT(email SEPARATOR ' / ') AS contact_email FROM orocrm_contact_email GROUP BY owner_id) oct_3 ON oct_3.contact_id = so.contact_id -- only extract last document submitted JOIN (SELECT cjr.opportunity_id, MAX(cjr.resource_id) AS resource_id FROM cj_requirements cjr JOIN orocrm_sales_opportunity so ON so.id = cjr.opportunity_id JOIN (SELECT cjd.id AS resource_id, cjd.document_type_id, cjd.DATA AS 'document' FROM oro_crm.cj_documents cjd JOIN oro_crm.cj_document_types cjdt ON cjdt.id = cjd.document_type_id WHERE cjd.document_type_id = %s) docs ON docs.resource_id = cjr.resource_id WHERE cjr.resource_type = 'Document' AND cjr.role_type_id = 'ACCREDITED' AND cjr.resource_id IS NOT NULL AND cjr.opportunity_id IS NOT NULL GROUP BY cjr.opportunity_id) t ON t.resource_id = cjr.resource_id WHERE cjr.resource_type = 'Document' AND cjr.role_type_id = 'ACCREDITED' AND cjr.resource_id IS NOT NULL AND cjr.opportunity_id IS NOT NULL AND docs.document IS NOT NULL;""" % (self.document_type_id, self.document_type_id)) return aurum_df
class SourcesPreprocess(DBPreprocess): DB_ALIAS = "marketing" TABLE_ALIAS = "sources" query_sources = ''' SELECT l.id lead_id, o.id opp_id, o.name opp_name, c.name campaign_id, l.source_id, l.medium_id, o.budget_amount_value, o.close_revenue_value, o.loan_duration, o.loan_interest_rate, l.status_id lead_status_id, status_table.status_record, status_table.status_at_record, CONVERT_TZ(o.created_at,'UTC','America/Mexico_City') opp_created_at FROM orocrm_sales_opportunity o LEFT JOIN orocrm_sales_lead l ON o.lead_id = l.id LEFT JOIN orocrm_campaign c ON l.campaign_id = c.id LEFT JOIN ( SELECT group_concat(af.new_text) status_record, group_concat(CONVERT_TZ(a.logged_at,'UTC','America/Mexico_City')) status_at_record, o.id opp_id FROM orocrm_sales_opportunity o LEFT JOIN oro_audit a ON a.object_id = o.id LEFT JOIN oro_audit_field af ON af.audit_id = a.id WHERE a.object_class='Oro\\\Bundle\\\SalesBundle\\\Entity\\\Opportunity' AND af.field='status' GROUP BY o.id ) AS status_table ON status_table.opp_id = o.id; ''' query_owners = ''' SELECT ow.opp_id, ow.last_filter_name, ow.last_closer_name, ow.current_owner_name FROM owners.user_opportunities ow ''' def __init__(self, **kwargs): """ Sources-Opportunity relation """ super().__init__(**kwargs) self.crm_connector = DBConnect(CRM_CREDENTIALS) self.galileo_connector = DBConnect(GALILEO_DB_CREDENTIALS) def start_process(self): self.data_set = self.get_data_set() self.fix_rates() self.get_current_status_info() self.get_stage_transition_columns() self.drop_auxiliar_columns(["status_at_record", "status_record"]) datetime_cols = [ column for column in self.data_set.columns if "_at" in column ] self.ensure_datetime_type(datetime_cols) def get_data_set(self): data_sources = self.crm_connector.custom_query(self.query_sources) data_owners = self.galileo_connector.custom_query(self.query_owners) data_sources.set_index("opp_id", inplace=True) data_owners.set_index("opp_id", inplace=True) data_sources.loc[data_owners.index, "last_closer_name"] = data_owners["last_closer_name"] data_sources.loc[data_owners.index, "last_filter_name"] = data_owners["last_filter_name"] data_sources.loc[ data_owners.index, "current_owner_name"] = data_owners["current_owner_name"] return data_sources.reset_index() def fix_rates(self): self.data_set['loan_interest_rate'] = self.data_set[ 'loan_interest_rate'].apply(lambda rate: rate * 100 if rate < 0.01 else rate) def get_current_status_info(self): self.sort_column_by_another('status_record', 'status_at_record') self.data_set['current_status_name'] = self.data_set[ 'status_record'].str[-1] self.data_set['current_status_at'] = self.data_set[ 'status_at_record'].str[-1] def get_stage_transition_columns(self): """ Warning: This works on the last status founded! :return: """ for status in constants.SALES_FUNNEL_ORDER_ESP: self.extract_feature_relation( list_column="status_record", target_columns=["status_at_record"], search_indexer=status, first=False, column_names=[ "last_{}_at".format(status.lower().replace(" ", "_").replace( "ú", "u")) ])
class AurumBasetable(DBPreprocess): DB_ALIAS = 'aurum_basetable' TABLE_ALIAS = 'basetable' def __init__(self, valid_document_types_id=[1, 2, 6, 7, 12], **kwargs): super().__init__(**kwargs, charset='utf8') self.valid_document_types_id = valid_document_types_id self.aurum_db = DBConnect(CRM_CREDENTIALS) def get_aurum_basetable(self): # process all documents df = self.get_df() # join extra tables legal_guardian_df = self.get_legal_guardian_df() loany_df = self.get_interest_rate_and_loan_unique_id() df = pd.merge(df, legal_guardian_df, how='left', on='opportunity_id') df = pd.merge(df, loany_df, how='left', on='opportunity_id') # add extra columns (features) df = self.get_extra_columns(df) return df def get_df(self): df = [ AurumBasetableHelper(i).run() for i in self.valid_document_types_id ] df = pd.concat(df, ignore_index=True, axis=0) # add opportunities without questionnaires df_wq = self.get_opportunities_without_questionnaires(df) # join data frames df = pd.concat([df, df_wq], ignore_index=True, axis=0, sort=True) # find and drop duplicates df_agg = df.groupby('opportunity_id').agg({'opportunity_id': 'count'}) duplicated_opp_id = df_agg[df_agg['opportunity_id'] > 1].index.tolist() if len(duplicated_opp_id) > 0: df = df[~df['opportunity_id'].isin(duplicated_opp_id)] return df def get_opportunities_without_questionnaires_query(self): query = """ SELECT cjr.opportunity_id, NULL AS resource_id, 0 as has_questionnaire_flag, cjr.role_type_id, sl.source_id, so.created_at AS opportunity_created_at, so.closed_at AS opportunity_closed_at, so.data_channel_id, so.customer_association_id, b2bc.employees, oct.first_name AS contact_first_name, oct.last_name AS contact_last_name, oct_2.contact_phone_number, oct_3.contact_email, cjr.person_type_id, so.collateral_value, so.collateral_usage, so.collateral_usage_type_id, so.collateral_location, so.collateral_neighborhood, so.collateral_municipality, so.collateral_reg_combined_code, so.collateral_inegi_code, so.property_tax_account, so.collateral_zip_code, so.lead_id, so.contact_id, so.user_owner_id, so.status_id, so.income_type_id, so.budget_amount_currency, so.monthly_sales, so.budget_amount_value, so.monthly_profits, so.monthly_formal_income, so.loan_purpose, so.primary_clients, so.product_id, so.cj_score, so.risk_level_score, so.risk_level_id, so.collateral_type_id, so.owner_experience_years, so.debt_service_coverage_ratio, NULL AS document_type_id, NULL AS document, NULL AS document_name FROM cj_requirements cjr LEFT JOIN orocrm_sales_opportunity so ON so.id = cjr.opportunity_id LEFT JOIN orocrm_sales_lead sl ON sl.id = so.lead_id LEFT JOIN orocrm_sales_customer c ON c.id = so.customer_association_id LEFT JOIN orocrm_sales_b2bcustomer b2bc ON b2bc.id = c.b2b_customer_188b774c_id LEFT JOIN orocrm_contact oct ON oct.id = so.contact_id LEFT JOIN (SELECT owner_id AS contact_id, GROUP_CONCAT(phone SEPARATOR ' / ') AS contact_phone_number FROM orocrm_contact_phone GROUP BY owner_id) oct_2 ON oct_2.contact_id = so.contact_id LEFT JOIN (SELECT owner_id AS contact_id, GROUP_CONCAT(email SEPARATOR ' / ') AS contact_email FROM orocrm_contact_email GROUP BY owner_id) oct_3 ON oct_3.contact_id = so.contact_id -- extract only the last requirement for each opportunity id to avoid repeated opportunities JOIN (SELECT cjr.opportunity_id, MAX(cjr.id) AS cjr_id FROM cj_requirements cjr WHERE cjr.role_type_id = 'ACCREDITED' AND cjr.resource_id IS NOT NULL AND cjr.person_type_id IS NOT NULL AND cjr.opportunity_id IS NOT NULL GROUP BY cjr.opportunity_id) t ON t.cjr_id = cjr.id WHERE cjr.role_type_id = 'ACCREDITED' AND cjr.resource_id IS NOT NULL AND cjr.opportunity_id IS NOT NULL AND cjr.person_type_id IS NOT NULL AND cjr.opportunity_id NOT IN %(opportunity_ids)s;""" return query def get_opportunities_without_questionnaires(self, df): query = self.get_opportunities_without_questionnaires_query() df_wq = pd.read_sql( sql=query, con=self.aurum_db.conn, params={'opportunity_ids': df['opportunity_id'].unique().tolist()}) return df_wq def get_extra_columns(self, df): df = self.get_time_delta_columns(df) return df def get_time_delta(self, future_event, past_event, time='days'): days_delta = future_event - past_event days_delta = days_delta.fillna(0) days_delta = days_delta.apply(lambda x: x.days) if time == 'days': return days_delta if time == 'years': years_delta = np.floor(days_delta / 365).astype(int) return years_delta def get_time_delta_columns(self, df): # cast df['fecha_nacimiento'] = pd.to_datetime(df['fecha_nacimiento']) df['fecha_constitucion'] = pd.to_datetime(df['fecha_constitucion']) df['opportunity_created_at'] = pd.to_datetime( df['opportunity_created_at']) df['opportunity_closed_at'] = pd.to_datetime( df['opportunity_closed_at']) # compute df['client_age'] = self.get_time_delta( future_event=pd.to_datetime('today'), past_event=df['fecha_nacimiento'], time='years') df['company_age'] = self.get_time_delta( future_event=pd.to_datetime('today'), past_event=df['fecha_constitucion'], time='years') df['days_delta_today_and_opportunity_created_at'] = self.get_time_delta( future_event=pd.to_datetime('today'), past_event=df['opportunity_created_at'], time='days') df['days_delta_today_and_opportunity_closed_at'] = self.get_time_delta( future_event=pd.to_datetime('today'), past_event=df['opportunity_closed_at'], time='days') df['days_delta_opportunity_closed_at_and_created_at'] = self.get_time_delta( future_event=df['opportunity_closed_at'], past_event=df['opportunity_created_at'], time='days') return df def get_legal_guardian_df(self): legal_guardian_df = self.aurum_db.custom_query(''' SELECT cjr.opportunity_id, json_unquote(json_extract(cjd.data, '$.nombre_completo.nombre')) AS legal_guardian_first_name, json_unquote(json_extract(cjd.data, '$.nombre_completo.apellido_paterno')) AS legal_guardian_paternal_last_name, json_unquote(json_extract(cjd.data, '$.nombre_completo.apellido_materno')) AS legal_guardian_maternal_last_name, json_unquote(json_extract(cjd.data, '$.genero')) AS legal_guardian_gender FROM oro_crm.cj_documents AS cjd JOIN oro_crm.cj_requirements AS cjr ON cjd.id = cjr.resource_id JOIN orocrm_sales_opportunity AS so ON cjr.opportunity_id = so.id JOIN (SELECT cjr.opportunity_id, MAX(cjr.resource_id) AS resource_id FROM oro_crm.cj_requirements cjr WHERE role_type_id = 'LEGAL_GUARDIAN' AND resource_type = 'Document' AND cjr.opportunity_id IS NOT NULL AND cjr.resource_id IS NOT NULL GROUP BY cjr.opportunity_id) tt ON tt.resource_id = cjr.resource_id WHERE cjr.role_type_id = 'LEGAL_GUARDIAN' AND cjr.opportunity_id IS NOT NULL AND cjr.resource_id IS NOT NULL AND so.status_id = 'won'; ''') return legal_guardian_df def get_interest_rate_and_loan_unique_id(self): loany_df = self.aurum_db.custom_query(''' SELECT IFNULL(o.loany_loan_box_uid, o.loan_unique_identifier) loan_unique_identifier, o.id as opportunity_id, o.loan_interest_rate as base_interest_rate FROM oro_crm.orocrm_sales_opportunity as o; ''') return loany_df def start_process(self): self.data_set = self.get_aurum_basetable() def add_indexes(self): for index in aurum_basetable_d['index_column']: if index == 'opportunity_id': self.db_engine.execute( f"ALTER TABLE `{self.DB_ALIAS}`.`{self.TABLE_ALIAS}` ADD INDEX ({index});" )
class OwnersPreprocess(DBPreprocess): DB_ALIAS = "owners" TABLE_ALIAS = "user_opportunities" query_owners = ''' SELECT id opp_id, created_at, owner_names_record, role_names_record, owner_at_record, status_record, status_at_record, budget_amount_value, close_revenue_value FROM ( SELECT o.id, CONVERT_TZ(o.created_at,'UTC','America/Mexico_City') created_at, group_concat(CONCAT(u.first_name,' ', u.last_name) )owner_names_record, group_concat(u_role_name.label )role_names_record, group_concat(CONVERT_TZ(a.logged_at,'UTC','America/Mexico_City') )owner_at_record, status_table.status_record, status_table.status_at_record, o.budget_amount_value, o.close_revenue_value FROM oro_crm.orocrm_sales_opportunity o LEFT JOIN oro_crm.oro_audit a ON a.object_id=o.id LEFT JOIN oro_crm.oro_audit_field af ON af.audit_id=a.id LEFT JOIN oro_crm.oro_user u ON u.username=af.new_text OR CONCAT(u.first_name,' ', u.last_name)=af.new_text OR u.email=af.new_text LEFT JOIN oro_crm.oro_user_access_role u_role_id ON u_role_id.user_id=u.id LEFT JOIN oro_crm.oro_access_role u_role_name ON u_role_name.id=u_role_id.role_id LEFT JOIN ( SELECT group_concat(af.new_text) status_record, group_concat(CONVERT_TZ(a.logged_at,'UTC','America/Mexico_City')) status_at_record, o.id opp_id FROM oro_crm.orocrm_sales_opportunity o LEFT JOIN oro_crm.oro_audit a ON a.object_id=o.id LEFT JOIN oro_crm.oro_audit_field af ON af.audit_id=a.id WHERE a.object_class='Oro\\\Bundle\\\SalesBundle\\\Entity\\\Opportunity' AND af.field='status' GROUP BY o.id ) AS status_table ON status_table.opp_id=o.id WHERE a.object_class='Oro\\\Bundle\\\SalesBundle\\\Entity\\\Opportunity' AND af.field='owner' GROUP BY o.id ) AS audit_owner; ''' def __init__(self, **kwargs): """ Opportunity-Owners relation """ super().__init__(**kwargs) def init_data_set(self): self.crm_connector = DBConnect(CRM_CREDENTIALS) self.data_set = self.crm_connector.custom_query(self.query_owners) def start_process(self): self.init_data_set() self.get_current_owner_info() self.get_current_status_info() self.sort_column_by_another('role_names_record', 'owner_at_record') self.get_last_filter_data() self.get_last_closer_data() self.get_first_filter_data() self.get_first_closer_data() self.get_time_to_first_filter() self.get_time_to_first_closer() self.get_stage_transition_columns() self.drop_auxiliar_columns([ "status_at_record", "status_record", "owner_at_record", "role_names_record", "owner_names_record" ] + self.cols_to_drop) datetime_cols = [ column for column in self.data_set.columns if "_at" in column ] self.ensure_datetime_type(datetime_cols) def get_current_owner_info(self): self.sort_column_by_another('owner_names_record', 'owner_at_record') self.data_set["current_owner_name"] = self.data_set[ 'owner_names_record'].str[-1] self.data_set["current_owner_at"] = self.data_set[ "owner_at_record"].str[-1] def get_current_status_info(self): self.sort_column_by_another('status_record', 'status_at_record') self.data_set["current_status_name"] = self.data_set[ "status_record"].str[-1] self.data_set["current_status_at"] = self.data_set[ "status_at_record"].str[-1] def get_last_filter_data(self): self.extract_feature_relation( list_column="role_names_record", target_columns=["owner_names_record", "owner_at_record"], search_indexer="Sales Filter", first=False, column_names=["last_filter_name", "last_filter_at"]) def get_last_closer_data(self): self.extract_feature_relation( list_column="role_names_record", target_columns=["owner_names_record", "owner_at_record"], search_indexer="Closer", first=False, column_names=["last_closer_name", "last_closer_at"]) def get_first_filter_data(self): self.extract_feature_relation( list_column="role_names_record", target_columns=["owner_names_record", "owner_at_record"], search_indexer="Sales Filter", first=True, column_names=["first_filter_name", "first_filter_at"]) def get_first_closer_data(self): self.extract_feature_relation( list_column="role_names_record", target_columns=["owner_names_record", "owner_at_record"], search_indexer="Closer", first=True, column_names=["first_closer_name", "first_closer_at"]) def get_time_to_first_filter(self): """ In minutes Tip: for not to have noise in the analysis drop data_set.time_to_first_filter < pd.to_timedelta(0) in the dashboard backend :return: """ delta = pd.to_datetime( self.data_set.first_filter_at) - self.data_set.created_at minutes = delta.dt.total_seconds() / 60 minutes = minutes.round(2) self.data_set["time_to_first_filter"] = minutes def get_time_to_first_closer(self): """ In minutes Tip: for not to have noise in the analysis drop data_set.time_to_first_closer < pd.to_timedelta(0) in the dashboard backend :return: """ delta = pd.to_datetime(self.data_set.first_closer_at) - pd.to_datetime( self.data_set.first_filter_at) minutes = delta.dt.total_seconds() / 60 minutes = minutes.round(2) self.data_set["time_to_first_closer"] = minutes def get_stage_transition_columns(self): """ In hours, from opportunity creation to each last status Warning: This works on the last status founded! :return: """ self.sort_column_by_another('status_record', 'status_at_record') for status in constants.SALES_FUNNEL_ORDER_ESP: self.extract_feature_relation( list_column="status_record", target_columns=["status_at_record"], search_indexer=status, first=False, column_names=[ "last_{}_at".format(status.lower().replace(" ", "_").replace( "ú", "u")) ]) self._get_stage_transition_times() def _get_stage_transition_times(self): self.cols_to_drop = [] for status in constants.SALES_FUNNEL_ORDER_ESP: print("from: stage transition times", status) column_name = "last_{}_at".format(status.lower().replace( " ", "_").replace("ú", "u")) self.cols_to_drop.append(column_name) delta = pd.to_datetime( self.data_set[column_name]) - self.data_set['created_at'] hours = delta.dt.total_seconds() / 3600 hours = hours.round(2) self.data_set["{}_delta".format(status.lower().replace( " ", "_").replace("ú", "u"))] = hours
def __init__(self, valid_document_types_id=[1, 2, 6, 7, 12], **kwargs): super().__init__(**kwargs, charset='utf8') self.valid_document_types_id = valid_document_types_id self.aurum_db = DBConnect(CRM_CREDENTIALS)