Beispiel #1
0
 def __init__(self, **kwargs):
     """
     Sources-Opportunity relation
     """
     super().__init__(**kwargs)
     self.crm_connector = DBConnect(CRM_CREDENTIALS)
     self.galileo_connector = DBConnect(GALILEO_DB_CREDENTIALS)
Beispiel #2
0
 def __init__(self, **kwargs):
     """
     Active users and roles
     """
     super().__init__(**kwargs)
     self.crm_connector = DBConnect(CRM_CREDENTIALS)
     self.data_set = self.crm_connector.custom_query(self.query_users)
Beispiel #3
0
 def __init__(self, rfc: str, interval_unit: str, ids: tuple, path: str = os.getcwd()):
     self.rfc = rfc
     self.interval_unit = interval_unit
     self.path = path
     self.ids = ids
     os.makedirs(path, exist_ok=True)
     self.capone_connector = DBConnect(CAPONE_CREDENTIALS)
     self.declarations_df = self.get_all_declarations_for_given_rfc()
Beispiel #4
0
def get_declarations_ids(rfc: str, interval_unit: str, dec_type: str) -> set:
    capone_connector = DBConnect(CAPONE_CREDENTIALS)
    query = f'''
            SELECT id
            FROM declarations
            WHERE rfc = '{rfc}'
            AND type = '{dec_type}'
            AND interval_unit = '{interval_unit}'; 
            '''
    all_ids = capone_connector.custom_query(query)
    return set(all_ids['id'])
Beispiel #5
0
 def get_update_table_data(self):
     mysql_capone = DBConnect(CAPONE_CREDENTIALS)
     extract_query = """
     SELECT credential_rfc client_rfc, handler data_type, state, finished_at
     FROM extractions
     WHERE credential_rfc = '{}';
     """.format(self.rfc)
     extract_table = mysql_capone.custom_query(extract_query)
     update_table = extract_table.groupby(['client_rfc', 'data_type']).apply(lambda x: self.max_selector(x))
     update_table.reset_index(inplace=True, drop=True)
     return update_table
Beispiel #6
0
 def get_all_types_of_declarations_df():
     '''
     This method returns a dataframe that states all the different types of declarations Capone found given its information
     :return: a DataFrame with all the types of declarations capone found
     '''
     capone_connector = DBConnect(CAPONE_CREDENTIALS)
     query = 'SELECT interval_unit, type, complementary FROM declarations'
     declarations_frame = capone_connector.custom_query(query)
     declarations_frame['ind'] = 1
     tipos_declaraciones = declarations_frame.groupby(['interval_unit', 'type', 'complementary'])[
         'ind'].count().reset_index()
     return tipos_declaraciones
Beispiel #7
0
    def __init__(self, credentials=MARKETING_CREDENTIALS):
        super().__init__(credentials=credentials)
        self.aurum_conn = DBConnect(CRM_CREDENTIALS)
        GALILEO_DB_CREDENTIALS['database'] = 'marketing'
        self.galileo_conn = DBConnect(GALILEO_DB_CREDENTIALS)
        self.marketing_utf_conn = create_engine(str(self.db_uri) + "?charset=utf8", encoding="utf-8")

        conn = self.aurum_conn.conn.connect()
        result = conn.execute('SELECT * FROM oro_enum_opportunity_status;')
        _tmp = result.fetchall()
        _tmp = tuple(sorted(_tmp, key=lambda item: item[2]))
        conn.close()
        self.SALES_FUNNEL_ORDER_ESP = [x[1] for x in _tmp]
Beispiel #8
0
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.crm_connector = DBConnect(CRM_CREDENTIALS)
        self.data_set = self.crm_connector.custom_query(
            self.query_tasks_by_opp)
        # self.detect_closed_unsuccessful_tasks() Esto tiene que ser implementado en la notebook

        self.get_last_filter()
        self.get_last_closer()
        self.drop_auxiliar_columns([
            "owner_names_record", "role_names_record",
            "not_closed_tasks_due_dates", "not_closed_tasks_created_at"
        ])
Beispiel #9
0
    def __init__(self, **kwargs):
        print("This is only auditable from: 2017-06-02 11:19:00")
        super().__init__(**kwargs)
        self.crm_connector = DBConnect(CRM_CREDENTIALS)
        self.data_set = self.crm_connector.custom_query(
            self.query_not_closed_tasks_audit)

        self.get_first_open_status_at()
        self.get_last_closed_status_at()
        self.get_current_due_date()
        self.detect_closed_successful_tasks()

        self.drop_auxiliar_columns(
            ["task_status_record", "task_status_at_record", "due_date_record"])
Beispiel #10
0
def preload_transactions(rfc):
    credentials = GALILEO_DB_CREDENTIALS.copy()
    credentials['database'] = 'capone'
    galileo_conn = DBConnect(credentials)

    transactions_query = f"SELECT * FROM transactions WHERE client_rfc = '{rfc}'"
    transactions = galileo_conn.custom_query(transactions_query)
    transactions['fecha'] = pd.to_datetime(transactions['fecha'])

    valid_currencies = ['MXN', 'USD', 'CAD', 'EUR', 'GBP']
    valid_methods = ['PPD', 'PUE']
    catchall_mask(transactions, 'moneda', 'moneda_adjusted', valid_currencies, 'MXN')
    catchall_mask(transactions, 'pay_method', 'pay_method_adjusted', valid_methods, 'PUE')

    pickle_name = f'transactions-{rfc}.pkl.xz'
    upload_df(transactions, pickle_name, base_dir='pickles')
Beispiel #11
0
    def get_razon_social(self):
        mysql_capone = DBConnect(CAPONE_CREDENTIALS)
        print("Updating rfc table for {}".format(self.rfc))
        df = mysql_capone.custom_query(self.razon_social_query)

        df = df.applymap(column_format)

        razon_social = 'N/A'
        issuer_mask = df['issuer_rfc'] == self.rfc
        receiver_mask = df['receiver_rfc'] == self.rfc
        if issuer_mask.any():
            razon_social = df[issuer_mask].loc[0, 'issuer_name']
        elif receiver_mask.any():
            razon_social = df[receiver_mask].loc[0, 'receiver_name']

        return razon_social
Beispiel #12
0
    def __init__(self, **kwargs):
        """
        Pivot: Fecha de creacion de la llamada
        """
        super().__init__(**kwargs)
        self.crm_connector = DBConnect(CRM_CREDENTIALS)
        self.data_set = self.crm_connector.custom_query(self.query)
        self.extract_first_filter_data()
        self.get_contacted_at()
        self.detect_first_call()
        self.get_first_asigned_owner_data()

        self.drop_auxiliar_columns([
            "status_at_record", "status_record", "owner_at_record",
            "role_names_record", "owner_names_record"
        ])
Beispiel #13
0
 def run(self):
     '''
     This method returns a dataframe with 20 (or less if less were found) declarations for each type of declaration Capone detected based on interval_unit, type and complementary
     :return: Dataframe with rfc, interval_unit, type, complementary, presentation_at, pdf and declaration_key columns. The pdf column contains the binary to generate each declaration's pdf.
     '''
     pdfs_by_dec = []
     self.check_for_new_declarations()
     type_of_dec_frame = self.get_all_types_of_declarations_df()
     capone_connector = DBConnect(CAPONE_CREDENTIALS)
     type_of_dec_frame['declaration_key'] = type_of_dec_frame['interval_unit'] + type_of_dec_frame['type'] + \
                                            type_of_dec_frame['complementary']
     for _, row in type_of_dec_frame.iterrows():
         query = self.type_of_declaration_sample_query(row['interval_unit'], row['type'], row['complementary'])
         declarations_frame = capone_connector.custom_query(query)
         declarations_frame['declaration_key'] = row['declaration_key']
         pdfs_by_dec.append(declarations_frame)
     declarations_df = pd.concat(pdfs_by_dec)
     return declarations_df
Beispiel #14
0
class DeclarationsManager:
    """
    'DeclarationsManager' class that allows the user to obtain all available declarations for a given RFC in pdf as
    tempfiles. It has also got a method that erases all pdf files generated in the container.
    If the RFC isn't found, it returns an empty dataframe. The declarations PDFs will be located in the path specified
    by the user and if it doesn't exist, it will be created.
    """

    def __init__(self, rfc: str, interval_unit: str, ids: tuple, path: str = os.getcwd()):
        self.rfc = rfc
        self.interval_unit = interval_unit
        self.path = path
        self.ids = ids
        os.makedirs(path, exist_ok=True)
        self.capone_connector = DBConnect(CAPONE_CREDENTIALS)
        self.declarations_df = self.get_all_declarations_for_given_rfc()

    def get_all_declarations_for_given_rfc(self) -> pd.DataFrame:
        if len(self.ids) == 0:
            return pd.DataFrame()
        params = {'id': self.ids, 'rfc': self.rfc, 'interval': self.interval_unit}
        query = """SELECT 
                   rfc,
                   interval_unit,
                   type, 
                   complementary,
                   presentation_at,
                   id,
                   period,
                   pdf
                   FROM declarations
                   WHERE rfc = %(rfc)s 
                   AND interval_unit = %(interval)s 
                   AND id in %(id)s;"""
        declarations_frame = self.capone_connector.custom_query(query, params=params)
        if declarations_frame.empty:
            return declarations_frame
        dfs_list = []
        for index, row in declarations_frame.iterrows():
            temp_df = pd.DataFrame()
            temp_df = temp_df.append(row)
            pdf_binary = temp_df['pdf'][index]
            t = NamedTemporaryFile(dir=self.path)
            with open(f"{t.name}.pdf", 'wb') as f:
                f.write(pdf_binary)
            temp_df['file_name'] = f.name
            temp_df = temp_df.drop(columns=['pdf'])
            dfs_list.append(temp_df)
        declarations_df = pd.concat(dfs_list)
        return declarations_df

    def remove_pdfs_from_container(self):
        file_list = self.declarations_df['file_name']
        for file in file_list:
            os.remove(os.path.join(self.path, file))
Beispiel #15
0
class VisitorData(DBPreprocess):
    TABLE_ALIAS = 'visits_vpn_left_join'
    DB_ALIAS = 'marketing'

    join_call = Template("""
      SELECT
        v.visitor_id AS tracking_id,
        vpn.source,
        v.created_at as created_at,
        visit_data,
        visit_date
      FROM
        visits v
        LEFT JOIN visitor_phone_numbers vpn
        ON vpn.visitor_id = v.visitor_id
      WHERE $condition;
    """)

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.sherlock_conn = DBConnect(SHERLOCK_CREDENTIALS)

    def call_batch(self, batch_query):
        join_call_df = self.sherlock_conn.custom_query(batch_query)
        return join_call_df.drop_duplicates(subset=['tracking_id', 'visit_date'])

    def start_process(self, start_date, end_date, freq='30D'):
        dates = batch_generator(start=start_date, end=end_date, freq=freq)
        batches = get_substitutions(self.join_call, 'date', 'v.created_at', dates)

        for index, batch in enumerate(batches):
            print(f"Calling batch [{index}]")
            self.data_set = self.call_batch(batch)
            print(f"Sending batch [{index}]")
            self.send_to_db(if_exist="append")
        print("Done.")

    def deduplicate(self):
        dedup_query_1 = f"CREATE TABLE IF NOT EXISTS {self.DB_ALIAS}.tmp LIKE {self.DB_ALIAS}.visits_vpn_left_join;"

        dedup_query_2 = f"""
                INSERT INTO {self.DB_ALIAS}.tmp (SELECT * FROM {self.DB_ALIAS}.visits_vpn_left_join
                GROUP BY tracking_id, visit_date);"""
        dedup_query_3 = f"DROP TABLE {self.DB_ALIAS}.visits_vpn_left_join;"
        dedup_query_4 = f"RENAME TABLE {self.DB_ALIAS}.tmp TO {self.DB_ALIAS}.visits_vpn_left_join;"

        galileo_db_conn = self.db_engine.connect()
        galileo_db_conn.execute(dedup_query_1)
        galileo_db_conn.execute(dedup_query_2)
        galileo_db_conn.execute(dedup_query_3)
        galileo_db_conn.execute(dedup_query_4)
        galileo_db_conn.close()
Beispiel #16
0
class InvestorsPreprocessPortfolio(DBPreprocess):
    DB_ALIAS = "investors"
    TABLE_ALIAS = "outstanding_principal"
    MXN_TO_USD = 1 / usd_mxn_rate

    principal_balances_query = """
        SELECT
          coalesce(nullif(loanrecord.payment_date, NULL ), loan.start_date) payment_date,
          loanrecord.principal_balance,
          loanrecord.loan_id
        FROM loans_loanrecord loanrecord
          LEFT JOIN loans_loan loan ON loanrecord.loan_id = loan.id;
    """

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.loany_connector = DBConnect(LOANY_CREDENTIALS, "postgresql")

    def start_process(self):
        self.data_set = self.get_raw_data()

    def get_raw_data(self):
        master_table = self.loany_connector.custom_query(
            self.principal_balances_query)
        master_table['payment_date'] = pd.to_datetime(
            master_table['payment_date'])
        master_table = master_table.set_index('payment_date')
        master_table = master_table.groupby([pd.Grouper(freq='M'), 'loan_id'
                                             ])['principal_balance'].min()
        master_table = master_table.unstack()
        master_table.ffill(inplace=True)
        master_table = master_table.stack()
        master_table = master_table.reset_index()
        master_table.rename(columns={0: 'principal_balance_mxn'}, inplace=True)
        master_table = master_table.groupby(
            'payment_date')['principal_balance_mxn'].sum()
        master_table = master_table.reset_index()
        master_table['principal_balance_usd'] = master_table[
            'principal_balance_mxn'] * self.MXN_TO_USD
        return master_table
Beispiel #17
0
class UsersPreprocess(DBPreprocess):
    DB_ALIAS = "owners"
    TABLE_ALIAS = "active_users"

    query_users = '''
    SELECT
      concat(u.first_name, ' ', u.last_name) name,
      u_role_name.label role,
      u.enabled enabled
    FROM oro_user u
      LEFT JOIN oro_user_access_role u_role_id ON u_role_id.user_id=u.id
      LEFT JOIN oro_access_role u_role_name ON u_role_name.id=u_role_id.role_id
    WHERE (u_role_name.label='Sales Filter' OR u_role_name.label='Closer')
    AND enabled=1
    '''

    def __init__(self, **kwargs):
        """
        Active users and roles
        """
        super().__init__(**kwargs)
        self.crm_connector = DBConnect(CRM_CREDENTIALS)
        self.data_set = self.crm_connector.custom_query(self.query_users)
Beispiel #18
0
class Funnel(DBPreprocess):
    DB_ALIAS = 'aurum_basetable'
    TABLE_ALIAS = 'funnel'

    def __init__(self, **kwargs):
        super().__init__(**kwargs, charset='utf8')
        self.aurum_db = DBConnect(CRM_CREDENTIALS)

    def start_process(self):
        self.data_set = self.get_funnel()

    def add_indexes(self):
        for index in opportunities_funnel_d['index_column']:
            if index == 'opportunity_id':
                self.db_engine.execute(
                    f"ALTER TABLE `{self.DB_ALIAS}`.`{self.TABLE_ALIAS}` ADD INDEX ({index});")
            if index == 'field_value':
                self.db_engine.execute(
                    f"ALTER TABLE `{self.DB_ALIAS}`.`{self.TABLE_ALIAS}` ADD INDEX ({index} (20));")

    def process_columns(self, funnel_df):
        # map
        mask = funnel_df['field_value'].isin(['Avalúo'])
        funnel_df['field_value'][mask] = 'Avalúo pagado'

        mask = funnel_df['field_value'].isin(
            ['Oferta final enviada', 'Oferta Final Enviada'])
        funnel_df['field_value'][mask] = 'Oferta final'

        mask = funnel_df['field_value'].isin(['Espera de firma'])
        funnel_df['field_value'][mask] = 'En espera de firma'

        mask = funnel_df['field_value'].isin(['Closed Won'])
        funnel_df['field_value'][mask] = 'Ganado'
        
        mask = funnel_df['field_value'].isin(['Closed Lost'])
        funnel_df['field_value'][mask] = 'Perdido'

        mask = funnel_df['field_value'].isin(['Entrevista Agendada'])
        funnel_df['field_value'][mask] = 'Cita agendada'

        mask = funnel_df['field_value'].isin(['Oportunidad Nueva'])
        funnel_df['field_value'][mask] = 'Nuevo'

        mask = funnel_df['field_value'].isin(['Oferta Final Aceptada'])
        funnel_df['field_value'][mask] = 'Oferta final'

        return funnel_df

    def get_funnel(self):
        funnel_df = self.aurum_db.custom_query(
            '''
                SELECT
                        o.id opportunity_id,
                        o.status_id AS last_status,
                        o.created_at AS opportunity_created_at,                        
                        af.field AS field_type,            
                        a.user_id AS status_owner_id,
                        CONCAT(ou.first_name, ' ', ou.last_name) AS status_owner_name,
                        af.new_text AS field_value,
                        a.logged_at AS field_value_at     
                FROM orocrm_sales_opportunity o
                JOIN oro_audit a ON a.object_id=o.id
                JOIN oro_audit_field af ON af.audit_id = a.id
                JOIN oro_user ou ON ou.id = a.user_id
                WHERE a.object_class='Oro\\\Bundle\\\SalesBundle\\\Entity\\\Opportunity' AND af.field IN ('status', 'owner')
                ORDER BY o.id, field_value_at;
            '''
        )
        funnel_df = self.process_columns(funnel_df)
        return funnel_df
Beispiel #19
0
class TasksAuditPreprocess(DBPreprocess):
    """
    Tasks audit
    This is only auditable from: 2017-06-02 11:19:00

    > For user
    created tasks: (task_owner_name == user_name).shape[0]
    closed tasks: (current_task_status == 'Cerrado').shape[0]
    not closed tasks: (current_task_status != 'Cerrado').shape[0]
    closed sucessful tasks:
    """
    query_not_closed_tasks_audit = '''
        SELECT
          ot.task_id,
          convert_tz(t.createdAt,'UTC','America/Mexico_City') task_created_at,
          ts.name current_task_status,
          ot.opportunity_id opp_id,
          t.owner_id,
          u.enabled is_owner_enabled,
          concat(u.first_name, " ", u.last_name) task_owner_name,
          u_role_name.label task_owner_role,
          group_concat(af.new_text) task_status_record,
          group_concat(CONVERT_TZ(a.logged_at,'UTC','America/Mexico_City')) task_status_at_record,
          group_concat(CONVERT_TZ(t.due_date,'UTC','America/Mexico_City')) due_date_record
        FROM oro_rel_f24c741b5154c0033bfb48 ot
          LEFT JOIN oro_audit a ON ot.task_id=a.object_id
          LEFT JOIN oro_audit_field af ON a.id = af.audit_id
          LEFT JOIN orocrm_task t ON ot.task_id = t.id
          LEFT JOIN oro_user u ON t.owner_id = u.id
          LEFT JOIN oro_user_access_role u_role ON u.id = u_role.user_id
          LEFT JOIN oro_access_role u_role_name ON u_role_name.id=u_role.role_id
          LEFT JOIN oro_enum_task_status ts ON t.status_id = ts.id
        WHERE a.object_class="Oro\\\Bundle\\\TaskBundle\\\Entity\\\Task"
        AND
        af.field = "status"
        GROUP BY ot.task_id
        '''

    DB_ALIAS = "operational"
    TABLE_ALIAS = "tasks_audit"

    def __init__(self, **kwargs):
        print("This is only auditable from: 2017-06-02 11:19:00")
        super().__init__(**kwargs)
        self.crm_connector = DBConnect(CRM_CREDENTIALS)
        self.data_set = self.crm_connector.custom_query(
            self.query_not_closed_tasks_audit)

        self.get_first_open_status_at()
        self.get_last_closed_status_at()
        self.get_current_due_date()
        self.detect_closed_successful_tasks()

        self.drop_auxiliar_columns(
            ["task_status_record", "task_status_at_record", "due_date_record"])

    def get_first_open_status_at(self):
        self.extract_feature_relation(list_column="task_status_record",
                                      target_columns=["task_status_at_record"],
                                      search_indexer="Abierto",
                                      first=True,
                                      column_names=["first_open_status_at"])

    def get_last_closed_status_at(self):
        self.extract_feature_relation(list_column="task_status_record",
                                      target_columns=["task_status_at_record"],
                                      search_indexer="Cerrado",
                                      first=False,
                                      column_names=["last_closed_status_at"])

    def get_current_due_date(self):
        due_dates_list = self.data_set["due_date_record"].fillna("").str.split(
            ",")
        self.data_set["current_due_date"] = due_dates_list.apply(
            lambda l: l[-1])

    def detect_closed_successful_tasks(self):
        closed = self.data_set.query("current_task_status == 'Cerrado'")
        is_successful_closed = pd.to_datetime(
            closed.current_due_date) > pd.to_datetime(
                closed.last_closed_status_at)
        is_successful_closed = is_successful_closed.map({True: 1, False: 0})
        self.data_set["is_successful_closed"] = is_successful_closed
Beispiel #20
0
"""The purpose of this document is to define certain environmental constants that
help to control the behavior of the dashboards, e.g. the order of sales
funnel stages so that they are no default-ordered by count
"""

from ds_dependencies.base_preprocess import DBConnect
from ds_dependencies.credentials import CRM_CREDENTIALS

crm_connector = DBConnect(CRM_CREDENTIALS)

query = 'SELECT * FROM oro_enum_opportunity_status;'
sf_stages = crm_connector.custom_query(query)
sf_stages.sort_values(by='priority', inplace=True)

SALES_FUNNEL_ORDER = sf_stages['id'].tolist()

SALES_FUNNEL_ORDER_ESP = sf_stages['name'].tolist()

SALES_FUNNEL_MAPPING = sf_stages[['id', 'name']].set_index('id')['name']

frequencies = ['Diario', 'Semanal', 'Quincenal', 'Mensual']
Beispiel #21
0
 def __init__(self, **kwargs):
     super().__init__(**kwargs, charset='utf8')
     self.aurum_db = DBConnect(CRM_CREDENTIALS)
Beispiel #22
0
 def __init__(self, **kwargs):
     super().__init__(**kwargs)
     self.sherlock_conn = DBConnect(SHERLOCK_CREDENTIALS)
Beispiel #23
0
class OppTasksPreprocess(DBPreprocess):
    """
    Opps-tasks relation
    opps without tasks: '(not_closed_tasks_count == 0) & (open_tasks_count == 0)'
    opps with open tasks: 'open_tasks_count != 0'
    opps with tasks without due date: '(open_tasks_count != 0) & (due_dates_count < open_tasks_count)'
    opps with unsuccessful open tasks : 'not_closed_tasks_count > 0' and dues < created
    """
    query_tasks_by_opp = '''
        SELECT
          o.id opp_id,
          concat(u.first_name, " ", u.last_name) current_owner,
          group_concat(o_u.first_name, " ", o_u.last_name) owner_names_record,
          group_concat(u_role_name.label) role_names_record,
          (
            SELECT
                group_concat(t.id)
            FROM
                oro_rel_f24c741b5154c0033bfb48 r
                LEFT JOIN orocrm_task t ON r.task_id = t.id
            WHERE 1
                AND r.opportunity_id = o.id
                AND t.status_id != 'closed'
          ) AS not_closed_tasks_id_record,
          (
            SELECT
                group_concat(CONVERT_TZ(t.createdAt, 'UTC','America/Mexico_City'))
            FROM
                oro_rel_f24c741b5154c0033bfb48 r
                LEFT JOIN orocrm_task t ON r.task_id = t.id
            WHERE 1
                AND r.opportunity_id = o.id
                AND t.status_id != 'closed'
          ) AS not_closed_tasks_created_at,
          (
            SELECT
                count(t.id)
            FROM
                oro_rel_f24c741b5154c0033bfb48 r
                LEFT JOIN orocrm_task t ON r.task_id = t.id
            WHERE 1
                AND r.opportunity_id = o.id
                AND t.status_id != 'closed'
          ) AS not_closed_tasks_count,
          (
            SELECT
              group_concat(CONVERT_TZ(t.due_date,'UTC','America/Mexico_City'))
          FROM
              oro_rel_f24c741b5154c0033bfb48 r
              LEFT JOIN orocrm_task t ON t.id = r.task_id
          WHERE 1
              AND r.opportunity_id = o.id
              AND t.status_id != 'closed'
          ) AS not_closed_tasks_due_dates,
          (
            SELECT
              count(t.due_date)
          FROM
              oro_rel_f24c741b5154c0033bfb48 r
              LEFT JOIN orocrm_task t ON t.id = r.task_id
          WHERE 1
              AND r.opportunity_id = o.id
              AND t.status_id != 'closed'
          ) AS due_dates_count,
          (
           SELECT
              count(t.id)
            FROM
                oro_rel_f24c741b5154c0033bfb48 r
                LEFT JOIN orocrm_task t ON t.id = r.task_id
            WHERE 1
                AND r.opportunity_id = o.id
                AND t.status_id = 'open'
          ) open_tasks_count

        FROM orocrm_sales_opportunity o
            LEFT JOIN oro_user u ON o.user_owner_id = u.id
            LEFT JOIN oro_audit a ON a.object_id=o.id
            LEFT JOIN oro_audit_field af ON af.audit_id=a.id
            LEFT JOIN oro_user o_u ON o_u.username=af.new_text
            LEFT JOIN oro_user_access_role u_role_id ON u_role_id.user_id=o_u.id
            LEFT JOIN oro_access_role u_role_name ON u_role_name.id=u_role_id.role_id
        WHERE a.object_class='Oro\\\Bundle\\\SalesBundle\\\Entity\\\Opportunity'
          AND af.field='owner'

        GROUP BY o.id
    '''

    DB_ALIAS = "operational"
    TABLE_ALIAS = "tasks_by_opp"

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.crm_connector = DBConnect(CRM_CREDENTIALS)
        self.data_set = self.crm_connector.custom_query(
            self.query_tasks_by_opp)
        # self.detect_closed_unsuccessful_tasks() Esto tiene que ser implementado en la notebook

        self.get_last_filter()
        self.get_last_closer()
        self.drop_auxiliar_columns([
            "owner_names_record", "role_names_record",
            "not_closed_tasks_due_dates", "not_closed_tasks_created_at"
        ])

    def get_last_filter(self):
        self.extract_feature_relation(list_column="role_names_record",
                                      target_columns=["owner_names_record"],
                                      search_indexer="Sales Filter",
                                      first=False,
                                      column_names=["last_filter_name"])

    def get_last_closer(self):
        self.extract_feature_relation(list_column="role_names_record",
                                      target_columns=["owner_names_record"],
                                      search_indexer="Closer",
                                      first=False,
                                      column_names=["last_closer_name"])
Beispiel #24
0
 def init_data_set(self):
     self.crm_connector = DBConnect(CRM_CREDENTIALS)
     self.data_set = self.crm_connector.custom_query(self.query_owners)
Beispiel #25
0
class AurumToSherlock(DBPreprocess):
    TABLE_ALIAS = 'aurum_to_sherlock'
    DB_ALIAS = 'minimalist'

    CAMPAIGN_COLUMNS = ['ga_campaign', 'campaign']

    opp_stage_info_query = Template("""
        SELECT
          o.id opportunity_id,
          group_concat(af.new_text) status,
          MAX(a.logged_at) last_status_at,
          MAX(o.created_at) opp_created_at,
        
        
          leads_table.tracking_id AS tracking_id,
          o.created_at AS time_in_crm,
          o.budget_amount_value AS budget_amount_value,
          o.collateral_value,
          o.notes
        
        
        FROM orocrm_sales_opportunity o
          LEFT JOIN oro_audit a ON a.object_id=o.id
          LEFT JOIN oro_audit_field af ON af.audit_id=a.id
          LEFT JOIN orocrm_sales_lead leads_table ON leads_table.id = o.lead_id
        WHERE $condition
          AND a.object_class='Oro\\\Bundle\\\SalesBundle\\\Entity\\\Opportunity'
          AND af.field='status'
        GROUP BY o.id;
    """)

    online_info_query = Template("""
      SELECT
        tracking_id,
        source,
        visit_data,
        visit_date
      FROM visits_vpn_left_join
      WHERE $condition;
    """)

    def __init__(self, credentials=MARKETING_CREDENTIALS):
        super().__init__(credentials=credentials)
        self.aurum_conn = DBConnect(CRM_CREDENTIALS)
        GALILEO_DB_CREDENTIALS['database'] = 'marketing'
        self.galileo_conn = DBConnect(GALILEO_DB_CREDENTIALS)
        self.marketing_utf_conn = create_engine(str(self.db_uri) + "?charset=utf8", encoding="utf-8")

        conn = self.aurum_conn.conn.connect()
        result = conn.execute('SELECT * FROM oro_enum_opportunity_status;')
        _tmp = result.fetchall()
        _tmp = tuple(sorted(_tmp, key=lambda item: item[2]))
        conn.close()
        self.SALES_FUNNEL_ORDER_ESP = [x[1] for x in _tmp]

    def get_offline_info(self, query):
        opp_stage_info = self.aurum_conn.custom_query(query)
        opp_stage_info['status'] = opp_stage_info['status'].str.split(',')
        for stage in self.SALES_FUNNEL_ORDER_ESP:
            opp_stage_info[stage] = opp_stage_info['status'].apply(lambda x: int(stage in x))
        opp_stage_info = opp_stage_info.drop(['status'], axis=1, inplace=False)
        return opp_stage_info

    def get_online_data(self, query):
        print('importing online data...')

        online_info = self.galileo_conn.custom_query(query)
        online_info = minimalist_utils.online_info_parse(online_info)

        expanded_visit_data = online_info['visit_data'].apply(lambda x: pd.Series(_json_parser(x)))
        del online_info['visit_data']
        online_info = pd.concat([online_info, expanded_visit_data], axis=1)

        online_info.replace('(not set)', np.nan, inplace=True)
        online_info = minimalist_utils.column_condenser(online_info, self.CAMPAIGN_COLUMNS, 'aggregated_campaign')
        online_info.drop(self.CAMPAIGN_COLUMNS, axis=1, inplace=True)

        online_info['paid'] = pd.isnull(online_info['aggregated_campaign']).apply(lambda x: int(not x))

        online_info = online_info.drop(['glcid', 'source'], axis=1, inplace=False)
        return online_info

    def merge_aurum_to_sherlock(self, offline_info, online_info):
        print('Merging online and offline data tables for Sherlock <- Aurum...')
        aurum_to_sherlock = online_info.merge(offline_info, on='tracking_id', how='left')
        aurum_to_sherlock.drop_duplicates(inplace=True)
        aurum_to_sherlock.sort_values(by=['visit_date'], inplace=True)
        aurum_to_sherlock['weight'] = 0.0
        aurum_to_sherlock['amount_times_weight'] = 0.0
        return aurum_to_sherlock

    def format_final_table(self):
        print('Removing tracking id-site visit duplicates')
        table_name = f"{self.DB_ALIAS}.{self.TABLE_ALIAS}"
        dedup_query_1 = f"CREATE TABLE IF NOT EXISTS {self.DB_ALIAS}.tmp LIKE {table_name};"
        dedup_query_2 = f"""
        INSERT INTO {self.DB_ALIAS}.tmp (SELECT * FROM {table_name}
        GROUP BY tracking_id, visit_date);"""
        dedup_query_3 = f"DROP TABLE {table_name};"
        dedup_query_4 = f"RENAME TABLE {self.DB_ALIAS}.tmp TO {table_name};"

        conn = self.db_engine.connect()
        conn.execute(dedup_query_1)
        conn.execute(dedup_query_2)
        conn.execute(dedup_query_3)
        conn.execute(dedup_query_4)
        conn.close()

        print('Adding row index...')
        row_index_query_1 = f"""ALTER TABLE {table_name} ADD row_index INT(11) DEFAULT '0' NOT NULL FIRST;"""
        row_index_query_2 = """SELECT @n:=0;"""
        row_index_query_3 = f"""UPDATE {table_name} SET row_index = @n := @n + 1;"""
        conn = self.db_engine.connect()
        conn.execute(row_index_query_1)
        conn.execute(row_index_query_2)
        conn.execute(row_index_query_3)
        conn.close()

        print('Indexing row index...')
        indexer_query = f"ALTER TABLE {table_name} ADD INDEX (row_index);"
        conn = self.db_engine.connect()
        conn.execute(indexer_query)
        conn.close()

    def start_process(self, start_date, end_date, freq):
        dates = batch_generator(start=start_date, end=end_date, freq=freq)
        online_data_batches = get_substitutions(self.online_info_query, 'date', 'visit_date', dates)

        for idx, batch in enumerate(online_data_batches):
            print(f"Batch [{idx}]")
            online_info = self.get_online_data(batch)
            tracking_ids = list(online_info['tracking_id'].unique())
            searches = batch_generator(on=tracking_ids, freq=4000)
            offline_info_batches = get_substitutions(self.opp_stage_info_query, 'search', 'leads_table.tracking_id',
                                                     searches)
            offline_info = []
            for sub_idx, offline_batch in enumerate(offline_info_batches):
                print(f"sub-batch [{sub_idx}]")
                offline_info.append(self.get_offline_info(offline_batch))
            offline_info = pd.concat(offline_info)
            aurum_to_sherlock = self.merge_aurum_to_sherlock(offline_info, online_info)
            self.data_set = aurum_to_sherlock
            print("Sending to Marketing")
            self.send_to_db(if_exist="append")
Beispiel #26
0
 def get_data(self):
     aurum_db = DBConnect(CRM_CREDENTIALS)
     aurum_df = aurum_db.custom_query(
         """SELECT
                                             cjr.opportunity_id,
                                             cjr.resource_id,
                                             1 as has_questionnaire_flag,
                                             cjr.role_type_id,
                                             sl.source_id,
                                             so.created_at as opportunity_created_at,
                                             so.closed_at as opportunity_closed_at,
                                             so.data_channel_id,
                                             so.customer_association_id,    
                                             b2bc.employees,
                                             oct.first_name AS contact_first_name,
                                             oct.last_name AS contact_last_name,                                                
                                             oct_2.contact_phone_number,
                                             oct_3.contact_email,
                                             cjr.person_type_id,
                                             so.collateral_value,
                                             so.collateral_usage,
                                             so.collateral_usage_type_id,
                                             so.collateral_location,
                                             so.collateral_neighborhood,
                                             so.collateral_municipality,
                                             so.collateral_reg_combined_code,
                                             so.collateral_inegi_code,
                                             so.property_tax_account,
                                             so.collateral_zip_code, 
                                             so.lead_id,
                                             so.contact_id,
                                             so.user_owner_id,
                                             so.status_id,
                                             so.income_type_id,
                                             so.budget_amount_currency,    
                                             so.monthly_sales,
                                             so.budget_amount_value,        
                                             so.monthly_profits,
                                             so.monthly_formal_income,
                                             so.loan_purpose,
                                             so.primary_clients,
                                             so.product_id,
                                             so.cj_score,
                                             so.risk_level_score,
                                             so.risk_level_id,        
                                             so.collateral_type_id,
                                             so.owner_experience_years,
                                             so.debt_service_coverage_ratio,                    
                                             docs.document_type_id,
                                             docs.document,
                                             cjr.resource_subtype as document_name
                                         FROM cj_requirements cjr
                                         LEFT JOIN
                                         (SELECT
                                             cjd.id AS resource_id,
                                             cjd.document_type_id,
                                             cjd.DATA AS 'document'
                                         FROM oro_crm.cj_documents cjd
                                         LEFT JOIN oro_crm.cj_document_types cjdt ON cjdt.id = cjd.document_type_id
                                         WHERE cjd.document_type_id = %s) docs ON docs.resource_id = cjr.resource_id
                                         LEFT JOIN orocrm_sales_opportunity so ON so.id = cjr.opportunity_id
                                         LEFT JOIN orocrm_sales_lead sl ON sl.id = so.lead_id
                                         LEFT JOIN orocrm_sales_customer c ON c.id = so.customer_association_id
                                         LEFT JOIN orocrm_sales_b2bcustomer b2bc ON b2bc.id = c.b2b_customer_188b774c_id
                                         LEFT JOIN orocrm_contact oct ON oct.id = so.contact_id
                                         LEFT JOIN (SELECT owner_id AS contact_id, GROUP_CONCAT(phone SEPARATOR ' / ') AS contact_phone_number FROM orocrm_contact_phone GROUP BY owner_id) oct_2 ON oct_2.contact_id = so.contact_id
                                         LEFT JOIN (SELECT owner_id AS contact_id, GROUP_CONCAT(email SEPARATOR ' / ') AS contact_email FROM orocrm_contact_email GROUP BY owner_id) oct_3 ON oct_3.contact_id = so.contact_id
                                         -- only extract last document submitted
                                         JOIN (SELECT 
                                                 cjr.opportunity_id,
                                                 MAX(cjr.resource_id) AS resource_id
                                             FROM cj_requirements cjr
                                             JOIN orocrm_sales_opportunity so ON so.id = cjr.opportunity_id
                                             JOIN
                                                 (SELECT
                                                     cjd.id AS resource_id,
                                                     cjd.document_type_id,
                                                     cjd.DATA AS 'document'
                                                 FROM oro_crm.cj_documents cjd
                                                 JOIN oro_crm.cj_document_types cjdt ON cjdt.id = cjd.document_type_id
                                                 WHERE cjd.document_type_id = %s) docs ON docs.resource_id = cjr.resource_id  
                                             WHERE 
                                                 cjr.resource_type = 'Document'
                                                 AND cjr.role_type_id = 'ACCREDITED'
                                                 AND cjr.resource_id IS NOT NULL
                                                 AND cjr.opportunity_id IS NOT NULL
                                             GROUP BY cjr.opportunity_id) t ON t.resource_id = cjr.resource_id
                                         WHERE 
                                             cjr.resource_type = 'Document'
                                             AND cjr.role_type_id = 'ACCREDITED'
                                             AND cjr.resource_id IS NOT NULL
                                             AND cjr.opportunity_id IS NOT NULL
                                             AND docs.document IS NOT NULL;"""
         % (self.document_type_id, self.document_type_id))
     return aurum_df
Beispiel #27
0
class SourcesPreprocess(DBPreprocess):
    DB_ALIAS = "marketing"
    TABLE_ALIAS = "sources"
    query_sources = '''
        SELECT
            l.id lead_id,
            o.id opp_id,
            o.name opp_name,
            c.name campaign_id,
            l.source_id,
            l.medium_id,
            o.budget_amount_value,
            o.close_revenue_value,
            o.loan_duration,
            o.loan_interest_rate,
            l.status_id lead_status_id,
            status_table.status_record,
            status_table.status_at_record,
            CONVERT_TZ(o.created_at,'UTC','America/Mexico_City') opp_created_at
        FROM orocrm_sales_opportunity o
        LEFT JOIN orocrm_sales_lead l ON o.lead_id = l.id
        LEFT JOIN orocrm_campaign c ON l.campaign_id = c.id
        LEFT JOIN (

        SELECT
          group_concat(af.new_text) status_record,
          group_concat(CONVERT_TZ(a.logged_at,'UTC','America/Mexico_City')) status_at_record,
          o.id opp_id
        FROM orocrm_sales_opportunity o
          LEFT JOIN oro_audit a ON a.object_id = o.id
          LEFT JOIN oro_audit_field af ON af.audit_id = a.id
        WHERE a.object_class='Oro\\\Bundle\\\SalesBundle\\\Entity\\\Opportunity'
          AND af.field='status'
        GROUP BY o.id

        ) AS status_table ON status_table.opp_id = o.id;
    '''

    query_owners = '''
        SELECT
          ow.opp_id,
          ow.last_filter_name,
          ow.last_closer_name,
          ow.current_owner_name
        FROM owners.user_opportunities ow
    '''

    def __init__(self, **kwargs):
        """
        Sources-Opportunity relation
        """
        super().__init__(**kwargs)
        self.crm_connector = DBConnect(CRM_CREDENTIALS)
        self.galileo_connector = DBConnect(GALILEO_DB_CREDENTIALS)

    def start_process(self):
        self.data_set = self.get_data_set()

        self.fix_rates()
        self.get_current_status_info()
        self.get_stage_transition_columns()

        self.drop_auxiliar_columns(["status_at_record", "status_record"])
        datetime_cols = [
            column for column in self.data_set.columns if "_at" in column
        ]
        self.ensure_datetime_type(datetime_cols)

    def get_data_set(self):
        data_sources = self.crm_connector.custom_query(self.query_sources)
        data_owners = self.galileo_connector.custom_query(self.query_owners)
        data_sources.set_index("opp_id", inplace=True)
        data_owners.set_index("opp_id", inplace=True)
        data_sources.loc[data_owners.index,
                         "last_closer_name"] = data_owners["last_closer_name"]
        data_sources.loc[data_owners.index,
                         "last_filter_name"] = data_owners["last_filter_name"]
        data_sources.loc[
            data_owners.index,
            "current_owner_name"] = data_owners["current_owner_name"]
        return data_sources.reset_index()

    def fix_rates(self):
        self.data_set['loan_interest_rate'] = self.data_set[
            'loan_interest_rate'].apply(lambda rate: rate * 100
                                        if rate < 0.01 else rate)

    def get_current_status_info(self):
        self.sort_column_by_another('status_record', 'status_at_record')
        self.data_set['current_status_name'] = self.data_set[
            'status_record'].str[-1]
        self.data_set['current_status_at'] = self.data_set[
            'status_at_record'].str[-1]

    def get_stage_transition_columns(self):
        """
        Warning: This works on the last status founded!
        :return:
        """
        for status in constants.SALES_FUNNEL_ORDER_ESP:
            self.extract_feature_relation(
                list_column="status_record",
                target_columns=["status_at_record"],
                search_indexer=status,
                first=False,
                column_names=[
                    "last_{}_at".format(status.lower().replace(" ",
                                                               "_").replace(
                                                                   "ú", "u"))
                ])
Beispiel #28
0
class AurumBasetable(DBPreprocess):
    DB_ALIAS = 'aurum_basetable'
    TABLE_ALIAS = 'basetable'

    def __init__(self, valid_document_types_id=[1, 2, 6, 7, 12], **kwargs):
        super().__init__(**kwargs, charset='utf8')
        self.valid_document_types_id = valid_document_types_id
        self.aurum_db = DBConnect(CRM_CREDENTIALS)

    def get_aurum_basetable(self):
        # process all documents
        df = self.get_df()
        # join extra tables
        legal_guardian_df = self.get_legal_guardian_df()
        loany_df = self.get_interest_rate_and_loan_unique_id()
        df = pd.merge(df, legal_guardian_df, how='left', on='opportunity_id')
        df = pd.merge(df, loany_df, how='left', on='opportunity_id')
        # add extra columns (features)
        df = self.get_extra_columns(df)
        return df

    def get_df(self):
        df = [
            AurumBasetableHelper(i).run() for i in self.valid_document_types_id
        ]
        df = pd.concat(df, ignore_index=True, axis=0)
        # add opportunities without questionnaires
        df_wq = self.get_opportunities_without_questionnaires(df)
        # join data frames
        df = pd.concat([df, df_wq], ignore_index=True, axis=0, sort=True)
        # find and drop duplicates
        df_agg = df.groupby('opportunity_id').agg({'opportunity_id': 'count'})
        duplicated_opp_id = df_agg[df_agg['opportunity_id'] > 1].index.tolist()
        if len(duplicated_opp_id) > 0:
            df = df[~df['opportunity_id'].isin(duplicated_opp_id)]
        return df

    def get_opportunities_without_questionnaires_query(self):
        query = """ SELECT
                        cjr.opportunity_id,
                        NULL AS resource_id,
                        0 as has_questionnaire_flag,
                        cjr.role_type_id,
                        sl.source_id,
                        so.created_at AS opportunity_created_at,
                        so.closed_at AS opportunity_closed_at,
                        so.data_channel_id,
                        so.customer_association_id,    
                        b2bc.employees,
                        oct.first_name AS contact_first_name,
                        oct.last_name AS contact_last_name,                                                
                        oct_2.contact_phone_number,
                        oct_3.contact_email,
                        cjr.person_type_id,
                        so.collateral_value,
                        so.collateral_usage,
                        so.collateral_usage_type_id,
                        so.collateral_location,
                        so.collateral_neighborhood,
                        so.collateral_municipality,
                        so.collateral_reg_combined_code,
                        so.collateral_inegi_code,
                        so.property_tax_account,
                        so.collateral_zip_code, 
                        so.lead_id,
                        so.contact_id,
                        so.user_owner_id,
                        so.status_id,
                        so.income_type_id,
                        so.budget_amount_currency,    
                        so.monthly_sales,
                        so.budget_amount_value,        
                        so.monthly_profits,
                        so.monthly_formal_income,
                        so.loan_purpose,
                        so.primary_clients,
                        so.product_id,
                        so.cj_score,
                        so.risk_level_score,
                        so.risk_level_id,        
                        so.collateral_type_id,
                        so.owner_experience_years,
                        so.debt_service_coverage_ratio,
                        NULL AS document_type_id,
                        NULL AS document,
                        NULL AS document_name
                    FROM cj_requirements cjr
                    LEFT JOIN orocrm_sales_opportunity so ON so.id = cjr.opportunity_id
                    LEFT JOIN orocrm_sales_lead sl ON sl.id = so.lead_id
                    LEFT JOIN orocrm_sales_customer c ON c.id = so.customer_association_id
                    LEFT JOIN orocrm_sales_b2bcustomer b2bc ON b2bc.id = c.b2b_customer_188b774c_id
                    LEFT JOIN orocrm_contact oct ON oct.id = so.contact_id
                    LEFT JOIN (SELECT owner_id AS contact_id, GROUP_CONCAT(phone SEPARATOR ' / ') AS contact_phone_number FROM orocrm_contact_phone GROUP BY owner_id) oct_2 ON oct_2.contact_id = so.contact_id
                    LEFT JOIN (SELECT owner_id AS contact_id, GROUP_CONCAT(email SEPARATOR ' / ') AS contact_email FROM orocrm_contact_email GROUP BY owner_id) oct_3 ON oct_3.contact_id = so.contact_id
                    -- extract only the last requirement for each opportunity id to avoid repeated opportunities
                    JOIN (SELECT
                              cjr.opportunity_id,
                              MAX(cjr.id) AS cjr_id
                          FROM cj_requirements cjr 
                          WHERE
                              cjr.role_type_id = 'ACCREDITED'
                              AND cjr.resource_id IS NOT NULL
                              AND cjr.person_type_id IS NOT NULL
                              AND cjr.opportunity_id IS NOT NULL
                          GROUP BY cjr.opportunity_id) t ON t.cjr_id = cjr.id
                    WHERE
                        cjr.role_type_id = 'ACCREDITED'
                        AND cjr.resource_id IS NOT NULL
                        AND cjr.opportunity_id IS NOT NULL
                        AND cjr.person_type_id IS NOT NULL
                        AND cjr.opportunity_id NOT IN %(opportunity_ids)s;"""
        return query

    def get_opportunities_without_questionnaires(self, df):
        query = self.get_opportunities_without_questionnaires_query()
        df_wq = pd.read_sql(
            sql=query,
            con=self.aurum_db.conn,
            params={'opportunity_ids': df['opportunity_id'].unique().tolist()})
        return df_wq

    def get_extra_columns(self, df):
        df = self.get_time_delta_columns(df)
        return df

    def get_time_delta(self, future_event, past_event, time='days'):
        days_delta = future_event - past_event
        days_delta = days_delta.fillna(0)
        days_delta = days_delta.apply(lambda x: x.days)
        if time == 'days':
            return days_delta
        if time == 'years':
            years_delta = np.floor(days_delta / 365).astype(int)
            return years_delta

    def get_time_delta_columns(self, df):
        # cast
        df['fecha_nacimiento'] = pd.to_datetime(df['fecha_nacimiento'])
        df['fecha_constitucion'] = pd.to_datetime(df['fecha_constitucion'])
        df['opportunity_created_at'] = pd.to_datetime(
            df['opportunity_created_at'])
        df['opportunity_closed_at'] = pd.to_datetime(
            df['opportunity_closed_at'])
        # compute
        df['client_age'] = self.get_time_delta(
            future_event=pd.to_datetime('today'),
            past_event=df['fecha_nacimiento'],
            time='years')
        df['company_age'] = self.get_time_delta(
            future_event=pd.to_datetime('today'),
            past_event=df['fecha_constitucion'],
            time='years')
        df['days_delta_today_and_opportunity_created_at'] = self.get_time_delta(
            future_event=pd.to_datetime('today'),
            past_event=df['opportunity_created_at'],
            time='days')
        df['days_delta_today_and_opportunity_closed_at'] = self.get_time_delta(
            future_event=pd.to_datetime('today'),
            past_event=df['opportunity_closed_at'],
            time='days')
        df['days_delta_opportunity_closed_at_and_created_at'] = self.get_time_delta(
            future_event=df['opportunity_closed_at'],
            past_event=df['opportunity_created_at'],
            time='days')
        return df

    def get_legal_guardian_df(self):
        legal_guardian_df = self.aurum_db.custom_query('''
            SELECT
               cjr.opportunity_id,
               json_unquote(json_extract(cjd.data, '$.nombre_completo.nombre')) AS legal_guardian_first_name,
               json_unquote(json_extract(cjd.data, '$.nombre_completo.apellido_paterno')) AS legal_guardian_paternal_last_name,
               json_unquote(json_extract(cjd.data, '$.nombre_completo.apellido_materno')) AS legal_guardian_maternal_last_name,
               json_unquote(json_extract(cjd.data, '$.genero')) AS legal_guardian_gender
            FROM oro_crm.cj_documents AS cjd
            JOIN oro_crm.cj_requirements AS cjr ON cjd.id = cjr.resource_id
            JOIN orocrm_sales_opportunity AS so ON cjr.opportunity_id = so.id
            JOIN (SELECT
              cjr.opportunity_id,
              MAX(cjr.resource_id) AS resource_id
            FROM oro_crm.cj_requirements cjr
            WHERE role_type_id = 'LEGAL_GUARDIAN'
            AND resource_type = 'Document'
            AND cjr.opportunity_id IS NOT NULL
            AND cjr.resource_id IS NOT NULL
            GROUP BY cjr.opportunity_id) tt ON tt.resource_id = cjr.resource_id
            WHERE cjr.role_type_id = 'LEGAL_GUARDIAN'
                  AND cjr.opportunity_id IS NOT NULL
                  AND cjr.resource_id IS NOT NULL
            AND so.status_id = 'won';
            ''')
        return legal_guardian_df

    def get_interest_rate_and_loan_unique_id(self):
        loany_df = self.aurum_db.custom_query('''
            SELECT
                IFNULL(o.loany_loan_box_uid, o.loan_unique_identifier) loan_unique_identifier,
                o.id as opportunity_id,
                o.loan_interest_rate as base_interest_rate
            FROM oro_crm.orocrm_sales_opportunity as o;
            ''')
        return loany_df

    def start_process(self):
        self.data_set = self.get_aurum_basetable()

    def add_indexes(self):
        for index in aurum_basetable_d['index_column']:
            if index == 'opportunity_id':
                self.db_engine.execute(
                    f"ALTER TABLE `{self.DB_ALIAS}`.`{self.TABLE_ALIAS}` ADD INDEX ({index});"
                )
Beispiel #29
0
class OwnersPreprocess(DBPreprocess):
    DB_ALIAS = "owners"
    TABLE_ALIAS = "user_opportunities"

    query_owners = '''
    SELECT
      id opp_id,
      created_at,
      owner_names_record,
      role_names_record,
      owner_at_record,
      status_record,
      status_at_record,
      budget_amount_value,
      close_revenue_value
    FROM (
    SELECT
        o.id,
        CONVERT_TZ(o.created_at,'UTC','America/Mexico_City') created_at,
        group_concat(CONCAT(u.first_name,' ', u.last_name) )owner_names_record,
        group_concat(u_role_name.label )role_names_record,
        group_concat(CONVERT_TZ(a.logged_at,'UTC','America/Mexico_City') )owner_at_record,
        status_table.status_record,
        status_table.status_at_record,
        o.budget_amount_value,
        o.close_revenue_value
      FROM oro_crm.orocrm_sales_opportunity o
        LEFT JOIN oro_crm.oro_audit a ON a.object_id=o.id
        LEFT JOIN oro_crm.oro_audit_field af ON af.audit_id=a.id
    
        LEFT JOIN oro_crm.oro_user u ON u.username=af.new_text OR CONCAT(u.first_name,' ', u.last_name)=af.new_text OR u.email=af.new_text
    
        LEFT JOIN oro_crm.oro_user_access_role u_role_id ON u_role_id.user_id=u.id
        LEFT JOIN oro_crm.oro_access_role u_role_name ON u_role_name.id=u_role_id.role_id
          LEFT JOIN (
    
            SELECT
              group_concat(af.new_text) status_record,
              group_concat(CONVERT_TZ(a.logged_at,'UTC','America/Mexico_City')) status_at_record,
              o.id opp_id
            FROM oro_crm.orocrm_sales_opportunity o
              LEFT JOIN oro_crm.oro_audit a ON a.object_id=o.id
              LEFT JOIN oro_crm.oro_audit_field af ON af.audit_id=a.id
            WHERE a.object_class='Oro\\\Bundle\\\SalesBundle\\\Entity\\\Opportunity'
              AND af.field='status'
            GROUP BY o.id
    
            ) AS status_table ON status_table.opp_id=o.id
          WHERE a.object_class='Oro\\\Bundle\\\SalesBundle\\\Entity\\\Opportunity'
            AND af.field='owner'
          GROUP BY o.id
         ) AS audit_owner;
        '''

    def __init__(self, **kwargs):
        """
        Opportunity-Owners relation
        """
        super().__init__(**kwargs)

    def init_data_set(self):
        self.crm_connector = DBConnect(CRM_CREDENTIALS)
        self.data_set = self.crm_connector.custom_query(self.query_owners)

    def start_process(self):
        self.init_data_set()

        self.get_current_owner_info()
        self.get_current_status_info()

        self.sort_column_by_another('role_names_record', 'owner_at_record')
        self.get_last_filter_data()
        self.get_last_closer_data()
        self.get_first_filter_data()
        self.get_first_closer_data()
        self.get_time_to_first_filter()
        self.get_time_to_first_closer()
        self.get_stage_transition_columns()

        self.drop_auxiliar_columns([
            "status_at_record", "status_record", "owner_at_record",
            "role_names_record", "owner_names_record"
        ] + self.cols_to_drop)

        datetime_cols = [
            column for column in self.data_set.columns if "_at" in column
        ]
        self.ensure_datetime_type(datetime_cols)

    def get_current_owner_info(self):
        self.sort_column_by_another('owner_names_record', 'owner_at_record')
        self.data_set["current_owner_name"] = self.data_set[
            'owner_names_record'].str[-1]
        self.data_set["current_owner_at"] = self.data_set[
            "owner_at_record"].str[-1]

    def get_current_status_info(self):
        self.sort_column_by_another('status_record', 'status_at_record')
        self.data_set["current_status_name"] = self.data_set[
            "status_record"].str[-1]
        self.data_set["current_status_at"] = self.data_set[
            "status_at_record"].str[-1]

    def get_last_filter_data(self):
        self.extract_feature_relation(
            list_column="role_names_record",
            target_columns=["owner_names_record", "owner_at_record"],
            search_indexer="Sales Filter",
            first=False,
            column_names=["last_filter_name", "last_filter_at"])

    def get_last_closer_data(self):
        self.extract_feature_relation(
            list_column="role_names_record",
            target_columns=["owner_names_record", "owner_at_record"],
            search_indexer="Closer",
            first=False,
            column_names=["last_closer_name", "last_closer_at"])

    def get_first_filter_data(self):
        self.extract_feature_relation(
            list_column="role_names_record",
            target_columns=["owner_names_record", "owner_at_record"],
            search_indexer="Sales Filter",
            first=True,
            column_names=["first_filter_name", "first_filter_at"])

    def get_first_closer_data(self):
        self.extract_feature_relation(
            list_column="role_names_record",
            target_columns=["owner_names_record", "owner_at_record"],
            search_indexer="Closer",
            first=True,
            column_names=["first_closer_name", "first_closer_at"])

    def get_time_to_first_filter(self):
        """
        In minutes
        Tip: for not to have noise in the analysis drop data_set.time_to_first_filter < pd.to_timedelta(0)
        in the dashboard backend
        :return:
        """
        delta = pd.to_datetime(
            self.data_set.first_filter_at) - self.data_set.created_at
        minutes = delta.dt.total_seconds() / 60
        minutes = minutes.round(2)
        self.data_set["time_to_first_filter"] = minutes

    def get_time_to_first_closer(self):
        """
        In minutes
        Tip: for not to have noise in the analysis drop data_set.time_to_first_closer < pd.to_timedelta(0)
        in the dashboard backend
        :return:
        """
        delta = pd.to_datetime(self.data_set.first_closer_at) - pd.to_datetime(
            self.data_set.first_filter_at)
        minutes = delta.dt.total_seconds() / 60
        minutes = minutes.round(2)
        self.data_set["time_to_first_closer"] = minutes

    def get_stage_transition_columns(self):
        """
        In hours, from opportunity creation to each last status
        Warning: This works on the last status founded!
        :return:
        """
        self.sort_column_by_another('status_record', 'status_at_record')
        for status in constants.SALES_FUNNEL_ORDER_ESP:
            self.extract_feature_relation(
                list_column="status_record",
                target_columns=["status_at_record"],
                search_indexer=status,
                first=False,
                column_names=[
                    "last_{}_at".format(status.lower().replace(" ",
                                                               "_").replace(
                                                                   "ú", "u"))
                ])
        self._get_stage_transition_times()

    def _get_stage_transition_times(self):
        self.cols_to_drop = []
        for status in constants.SALES_FUNNEL_ORDER_ESP:
            print("from: stage transition times", status)
            column_name = "last_{}_at".format(status.lower().replace(
                " ", "_").replace("ú", "u"))
            self.cols_to_drop.append(column_name)
            delta = pd.to_datetime(
                self.data_set[column_name]) - self.data_set['created_at']
            hours = delta.dt.total_seconds() / 3600
            hours = hours.round(2)
            self.data_set["{}_delta".format(status.lower().replace(
                " ", "_").replace("ú", "u"))] = hours
Beispiel #30
0
 def __init__(self, valid_document_types_id=[1, 2, 6, 7, 12], **kwargs):
     super().__init__(**kwargs, charset='utf8')
     self.valid_document_types_id = valid_document_types_id
     self.aurum_db = DBConnect(CRM_CREDENTIALS)