Python connect_to_database Exemples, creacard_connectors.database_connector.connect_to_database Python Exemples

Exemple #1

0

Afficher le fichier

def generate_person_id_construction(schema):

    # create date ID
    query = """
    select distinct concat("BirthDate","LastName") as "combinaison"
    from "CUSTOMERS"."MASTER_ID"
    where "GoodCombinaison" = 1
    """

    engine = connect_to_database("Postgres", "Creacard_Calypso").CreateEngine()
    data = pd.read_sql(query, con=engine)
    engine.close()

    query = """
    
    CREATE TABLE "{}"."ID_PERSON"(
    
        "PERSON_ID" SERIAL,
        "combinaison" TEXT
    )
    
    """.format(schema)

    engine = connect_to_database("Postgres", "Creacard_Calypso").CreateEngine()
    engine.execute(query)
    engine.close()


    InsertTableIntoDatabase(data, TlbName="ID_PERSON", Schema=schema,
                            database_name="Creacard_Calypso",
                            database_type="Postgres",
                            DropTable=False,
                            InstertInParrell=False)


    query = """
    
    update "CUSTOMERS"."MASTER_ID"
    set "PERSON_ID" = T1."PERSON_ID"
    from "CUSTOMERS"."ID_PERSON" as T1
    where concat("CUSTOMERS"."MASTER_ID"."BirthDate", "CUSTOMERS"."MASTER_ID"."LastName") = T1."combinaison"
    
    """

    engine = connect_to_database("Postgres", "Creacard_Calypso").CreateEngine()
    engine.execute(query)
    engine.close()

    query = """
    
    update "CUSTOMERS"."MASTER_ID"
    set "PERSON_ID" = concat("USER_ID",'_',"MOBILE_ID")
    where "GoodCombinaison" = 0 and "PERSON_ID" is null 
    
    """

    engine = connect_to_database("Postgres", "Creacard_Calypso").CreateEngine()
    engine.execute(query)
    engine.close()

Exemple #2

0

Afficher le fichier

Fichier : dash_table.py Projet : Creacard/calypso_final_version

def update_output_div(input_value):

    engine = connect_to_database("Postgres", "Creacard_Calypso").CreateEngine()

    query = """

    SELECT "table_name"
      FROM "information_schema"."tables"
     WHERE table_schema IN ('{}')

    """.format(input_value)

    data = pd.read_sql(query, con=engine)
    tmp_list = data["table_name"].tolist()
    del data

    if not tmp_list:
        val = ""
    else:
        val = tmp_list[0]

    options = [{'label': i, 'value': i} for i in tmp_list], val

    engine.close()

    return options

Exemple #3

0

Afficher le fichier

def add_fees_others_transactions(database_type, database_name, _year, _month, _day, **kwargs):

    _tlbname = kwargs.get('tlbname', "FEES_TRANSACTIONS")
    _schema = kwargs.get('schema', "TRANSACTIONS")

    date_start = datetime.datetime(_year, _month, _day)
    date_start_cond = str(date_start)[0:10]
    end_date = date_start + datetime.timedelta(days=1)
    end_date = str(end_date)[0:10]

    engine = connect_to_database(database_type, database_name).CreateEngine()

    # check if the date had already treated
    query = """
       select count(*)
       from "{}"."{}"
       where "TransactionTime" >= '{}' and "TransactionTime" < '{}'
       """.format(_schema,_tlbname, date_start_cond, end_date)

    data = pd.read_sql(query, con=engine)

    if data.iloc[0, 0] == 0:

        querytmp = """

                SELECT "CardHolderID","MCC","Fee","Surcharge","TransactionTP","TransactionTime","Currency",
                "CardVPUType", "MerchantAddress", "MerchantCity", "MerchantCountry", "MerchantID", "TransactionID"
                FROM "TRANSACTIONS_MONTHLY"."MONTHLY_TRANSACTIONS_{}"
                where "DebitCredit" IN ('Debit') and "TransactionTP" ~* 'fee' and "TransactionTP" !~* 'reversal'
                and "TransactionResult" = 'APPROVED' and "TransactionTime" >= '{}' and "TransactionTime" < '{}'

           """.format(str(date_start.year) + str(date_start.month), date_start_cond, end_date)

        query = """
           insert into "{}"."{}"
           {}
           """.format(_schema, _tlbname, querytmp)

        engine.execute(query)

        query_update = """
            update "TRANSACTIONS"."FEES_TRANSACTIONS"
            set "Surcharge" = ABS("Surcharge")
            where "TransactionTP" = 'FX Fee' and "Surcharge" < 0
        """

        engine.execute(query_update)


        engine.close()

    else:
        print("this data had been already treated")

Exemple #4

0

Afficher le fichier

def create_master_id(schema):

    query = """
    
    CREATE TABLE "{}"."MASTER_ID" as 
    select *, null::integer as "MOBILE_ID", null::bigint as "USER_ID", null::integer as "CONTACT_ID", null::text as "PERSON_ID",  null::bigint as "MOVIDON_ID"
    from "{}"."TMP_USER_ID"
    
    """.format(schema, schema)

    engine = connect_to_database("Postgres", "Creacard_Calypso").CreateEngine()
    engine.execute(query)
    engine.close()

    query = """
    
    ALTER TABLE "CUSTOMERS"."MASTER_ID"
    ALTER COLUMN "CONTACT_ID" TYPE VARCHAR(50)
    
    """

    engine = connect_to_database("Postgres", "Creacard_Calypso").CreateEngine()
    engine.execute(query)
    engine.close()

Exemple #5

0

Afficher le fichier

def add_new_atm_transactions(database_type, database_name, _year, _month, _day, **kwargs):

    _tlbname = kwargs.get('tlbname', "ATM_TRANSACTIONS")
    _schema = kwargs.get('schema', "TRANSACTIONS")


    date_start = datetime.datetime(_year, _month, _day)
    date_start_cond = str(date_start)[0:10]
    end_date = date_start + datetime.timedelta(days=1)
    end_date = str(end_date)[0:10]

    engine = connect_to_database(database_type, database_name).CreateEngine()

    # check if the date had already treated
    query = """
       select count(*)
       from "{}"."{}"
       where "TransactionTime" >= '{}' and "TransactionTime" < '{}'
       """.format(_schema,_tlbname, date_start_cond, end_date)

    data = pd.read_sql(query, con=engine)

    if data.iloc[0, 0] == 0:

        querytmp = """

                    SELECT "CardHolderID","MCC","Amount","MerchantName","TransactionTime","Currency",
                    "CardVPUType", "MerchantAddress", "MerchantCity", "MerchantCountry", "MerchantID", "TransactionID",
                    CASE WHEN "TransactionTP" in ('ATM International') then 1
                    else 0
                    end as "IsInternational","TransactionTP"
                    FROM "TRANSACTIONS_MONTHLY"."MONTHLY_TRANSACTIONS_{}"
                    where "TransactionTP" IN ('ATM Domestic','ATM International') 
                    and "DebitCredit" IN ('Debit') 
                    and "TransactionResult" = 'APPROVED' and "TransactionTime" >= '{}' and "TransactionTime" < '{}'

           """.format(str(date_start.year) + str(date_start.month), date_start_cond, end_date)

        query = """
           insert into "{}"."{}"
           {}
           """.format(_schema,_tlbname, querytmp)

        engine.execute(query)
        engine.close()

    else:
        print("this data had been already treated")

Exemple #6

0

Afficher le fichier

def add_new_loads_transactions(database_type, database_name, _year, _month, _day, **kwargs):

    _tlbname = kwargs.get('tlbname', "LOADS_TRANSACTIONS")
    _schema = kwargs.get('schema', "TRANSACTIONS")

    date_start = datetime.datetime(_year, _month, _day)
    date_start_cond = str(date_start)[0:10]
    end_date = date_start + datetime.timedelta(days=1)
    end_date = str(end_date)[0:10]

    engine = connect_to_database(database_type, database_name).CreateEngine()

    # check if the date had already treated
    query = """
       select count(*)
       from "{}"."{}"
       where "TransactionTime" >= '{}' and "TransactionTime" < '{}'
       """.format(_schema,_tlbname, date_start_cond, end_date)

    data = pd.read_sql(query, con=engine)

    if data.iloc[0, 0] == 0:

        querytmp = """

                SELECT "CardHolderID","MCC","Amount","TransactionTP","TransactionTime","Currency",
                "CardVPUType", "MerchantAddress", "MerchantCity", "MerchantCountry", "MerchantID", "TransactionID"
                FROM "TRANSACTIONS_MONTHLY"."MONTHLY_TRANSACTIONS_{}"
                WHERE "DebitCredit" IN ('Credit') and "TransactionResult" = 'APPROVED' 
                AND "TransactionTP" IN ('Voucher load','Terminal Load','Sepa Incoming Payment','Card to Card In','INTERNET DEBIT/CREDIT')
                and "TransactionTime" >= '{}' and "TransactionTime" < '{}'

           """.format(str(date_start.year) + str(date_start.month), date_start_cond, end_date)

        query = """
           insert into "{}"."{}"
           {}
           """.format(_schema, _tlbname, querytmp)

        engine.execute(query)
        engine.close()

    else:
        print("this data had been already treated")

Exemple #7

0

Afficher le fichier

def copy_to_csv(query, local_filename, local_folder, connexion_name, **kwargs):
    tic = time.time()

    to_compress = kwargs.get('compression', False)
    _remove_csv_file = kwargs.get('remove_csv_file', False)
    _delimiter =  kwargs.get('delimiter', ';')

    local_filename = local_filename + "_" + str(datetime.datetime.now())[0:10].replace("-", "").replace(":",
                                                                                                        "").replace(
        ".", "").replace(" ", "") + ".csv"

    local_path = local_folder + local_filename

    # extract data from query to csv using a built-in postgres function
    # by passing query as argument and destination file

    cmd = """

    COPY ({}) TO '{}' WITH CSV HEADER DELIMITER '{}';

    """.format(query, local_path, _delimiter)

    engine = connect_to_database("Postgres", connexion_name).CreateEngine()

    engine.execute(cmd)

    engine.close()

    if to_compress:
        cmd = "Compress-Archive " + local_path + " " + local_folder + local_filename.replace(".csv", "") + ".zip"
        p = subprocess.Popen(['powershell.exe', cmd])
        # wait until the process finishs
        p.wait()
        print("compression of the file is finished")

        local_path = local_folder + local_filename.replace(".csv", "") + ".zip"

    if _remove_csv_file:
        os.remove(local_folder + local_filename)

    print("file extraction from postgres took {} seconds".format(time.time() - tic))

    return local_path, local_filename

Exemple #8

0

Afficher le fichier

def insert_into_postgres_copyfrom(df, database_type, database_name, schema, TlbName, **kwargs):

    _encod = kwargs.get('encoding', 'utf-8')

    # data formating before insert

    for date_var in df.dtypes[df.dtypes == "datetime64[ns]"].index:
        df[date_var] = df[date_var].astype(str).replace("NaT", "None")

    for var_name in df.dtypes[df.dtypes == "object"].index:
        df[var_name] = df[var_name].str.replace(",", "")
        df[var_name] = df[var_name].astype('category')

    output = StringIO.StringIO()
    df.reset_index(drop=True).to_csv(output, header=None, index=False, sep=",", na_rep=None, encoding=_encod)
    output.seek(0)

    con = connect_to_database(database_type, database_name).CreateEngine()
    cur = con.connection.cursor()
    cur.copy_from(output, '"{}"."{}"'.format(schema, TlbName), sep=",", null="None")
    con.connection.commit()
    con.close()

Exemple #9

0

Afficher le fichier

Fichier : dash_table.py Projet : Creacard/calypso_final_version

def update_output_div(schema,tlb_name):

    engine = connect_to_database("Postgres", "Creacard_Calypso").CreateEngine()

    query = """

    select column_name,data_type 
    from information_schema.columns 
    where table_name = '{}'
    and table_schema IN ('{}')


    """.format(schema,tlb_name)
    data = pd.read_sql(query, con=engine)

    engine.close()

    col_name = list({"name": i, "id": i} for i in data.columns)
    datas = data.to_dict('records')


    return col_name, datas

Exemple #10

0

Afficher le fichier

def calypso_ids_production(schema_main, connexion_postgres):
    """Compute new ids and ids that changed overtime

        Parameters
        -----------
        schema_main: str
            schema where ids are stored
        connexion_postgres: str
            name of postgres connexction (referred to conf_python file)

    """

    # extract condition (exclusion) of stored on the computer
    if sys.platform == "win32":
        folder_json = os.path.expanduser(
            '~') + "\\conf_python\\unique_id_conditions.json"
    else:
        folder_json = os.environ[
            'HOME'] + "/conf_python/unique_id_conditions.json"
    with open(folder_json, 'r') as JSON:
        conditions = json.load(JSON)

    condition = conditions["exclusion_cartes"]["request"]
    condition_on_email = conditions["condition_email"]["dataframe"]

    # generate new ids and handling customer's information that changed

    # Step - 0 :
    # - extract new card
    # - extract card that was already associated to a user id but with information that changed
    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()

    query = """

    select T1."CardHolderID", T1."NoMobile", lower(T1."Email") as "Email",
    T1."FirstName", T1."LastName", T1."BirthDate", T1."PostCode", T1."Address1", T1."Address2",
    T1."ActivationDate"
    from "CARD_STATUS"."STATUS_CARTES" as T1
    left join "{}"."MASTER_ID" as T2
    on T1."CardHolderID" = T2."CardHolderID"
    where (T1."NoMobile" is not null) and (T1."Email" !~* '.*creacard.*|.*prepaidfinancial.*|.*financial.*')
    and T2."USER_ID" is null

    UNION ALL


    select T1."CardHolderID", T1."NoMobile", lower(T1."Email") as "Email",
    T1."FirstName", T1."LastName", T1."BirthDate", T1."PostCode", T1."Address1", T1."Address2",
    T1."ActivationDate"
    from "CARD_STATUS"."STATUS_CARTES" as T1
    Join(
       select "CardHolderID"
       from "CARD_STATUS"."CHANGE_CUSTOMERS_CARTES"
       where "dt_change" >= date(now() - INTERVAL '1 DAY')::timestamp and
       ("Is_ch_BirthDate" = 1 or "Is_ch_Email" = 1 or "Is_ch_LastName" = 1 or "Is_ch_NoMobile" = 1) and 
       "NoMobile" is not null and "Email" !~* '.*creacard.*|.*prepaidfinancial.*|.*financial.*'
    ) as T2
    on T1."CardHolderID" = T2."CardHolderID"

    """.format(schema_main)

    data = pd.read_sql(query, con=engine)

    engine.close()

    # associtated new ids for identified cards
    if not data.empty:

        for var in [
                "FirstName", "LastName", "Address1", "Address2", "PostCode",
                "Email"
        ]:
            data[var] = data[var].str.encode('utf-8').astype(str)
            data.loc[data[var].isnull(), var] = ""
            data[var] = data[var].str.strip(" ")
            data[var] = data[var].str.replace(" ", "")
            data[var] = data[var].str.lower()

        data = data[~data["Email"].str.contains(
            '.*creacard.*|.*prepaidfinancial.*|.*financial.*', regex=True)]

        data["GoodEmail"] = 1
        data.loc[data["Email"].str.contains(condition_on_email, regex=True),
                 "GoodEmail"] = 0

        data["GoodCombinaison"] = 1
        data.loc[(data["LastName"].str.contains(
            conditions["condition_combinaison"]["LastName"], regex=True)) |
                 (data["BirthDate"].isnull()) |
                 (data["BirthDate"].isin(conditions["condition_combinaison"]
                                         ["BirthDate"].split(","))),
                 "GoodCombinaison"] = 0

        # Delete leading "00" at the start of string.

        data["NoMobile"] = data["NoMobile"].str.replace("^00", "", regex=True)

        # replace .0 at the end$

        data["NoMobile"] = data["NoMobile"].str.replace("\.0$", "", regex=True)

        # delete only literal '|' from string

        data["NoMobile"] = data["NoMobile"].str.replace("\|", "", regex=True)

        query = """

           DROP TABLE IF EXISTS "{}"."TMP_USER_ID" CASCADE

           """.format(schema_main)

        engine = connect_to_database("Postgres",
                                     connexion_postgres).CreateEngine()
        engine.execute(query)
        engine.close()

        query = """


           CREATE TABLE "{}"."TMP_USER_ID"(

               "CardHolderID" VARCHAR(50),
               "NoMobile" TEXT,
               "Email" TEXT,
               "FirstName" TEXT,
               "LastName" TEXT,
               "BirthDate" TEXT,
               "PostCode" TEXT,
               "Address1" TEXT,
               "Address2" TEXT,
               "ActivationDate" timestamp without time zone,
               "GoodEmail" INTEGER,
               "GoodCombinaison" INTEGER,
               "MOBILE_ID" INTEGER,
               "USER_ID" BIGINT,
               "CONTACT_ID" VARCHAR(50),
               "PERSON_ID" TEXT,
               "MOVIDON_ID" BIGINT
           )

           """.format(schema_main)

        engine = connect_to_database("Postgres",
                                     connexion_postgres).CreateEngine()
        engine.execute(query)
        engine.close()

        data = data[~data["NoMobile"].isnull()]
        data["MOBILE_ID"] = None
        data["USER_ID"] = None
        data["CONTACT_ID"] = None
        data["PERSON_ID"] = None
        data["MOVIDON_ID"] = None

        InsertTableIntoDatabase(data,
                                TlbName="TMP_USER_ID",
                                Schema=schema_main,
                                database_name=connexion_postgres,
                                database_type="Postgres",
                                DropTable=False,
                                InstertInParrell=False)

        # STEP 1 : handle new id mobile

        query = """


           update "{}"."TMP_USER_ID"
           set "MOBILE_ID" = T1."MOBILE_ID"
           from "{}"."ID_MOBILE" as T1
           where "{}"."TMP_USER_ID"."NoMobile" = T1."NoMobile"

           """.format(schema_main, schema_main, schema_main)

        engine = connect_to_database("Postgres",
                                     connexion_postgres).CreateEngine()
        engine.execute(query)

        query = """

           select "NoMobile", count(*) as "NUM_CARTES"
           from "CUSTOMERS"."TMP_USER_ID"
           where "MOBILE_ID" is null
           group by "NoMobile"

           """

        data = pd.read_sql(query, con=engine)

        engine.close()

        if not data.empty:
            # as the function serial was used to create the table i.e the ID
            # it is not necessary to specified it using python.
            # the function serial associates automatically and incrementally a
            # MOBILE_ID to a new inserted row
            InsertTableIntoDatabase(data,
                                    TlbName="ID_MOBILE",
                                    Schema='CUSTOMERS',
                                    database_name=connexion_postgres,
                                    database_type="Postgres",
                                    DropTable=False,
                                    InstertInParrell=False)

            query = """

               update "CUSTOMERS"."TMP_USER_ID"
               set "MOBILE_ID" = T1."MOBILE_ID"
               from "CUSTOMERS"."ID_MOBILE" as T1
               where "CUSTOMERS"."TMP_USER_ID"."NoMobile" = T1."NoMobile" and "CUSTOMERS"."TMP_USER_ID"."MOBILE_ID" is null


               """

            engine = connect_to_database("Postgres",
                                         connexion_postgres).CreateEngine()
            engine.execute(query)
            engine.close()

            # STEP 2 : handle new CONTACT_ID

        query = """
           update "CUSTOMERS"."TMP_USER_ID"
           set "CONTACT_ID" = "CardHolderID"
           """

        engine = connect_to_database("Postgres",
                                     connexion_postgres).CreateEngine()
        engine.execute(query)
        engine.close()

        # STEP 3 : handle new PERSON_ID

        query = """


           update "CUSTOMERS"."TMP_USER_ID"
           set "PERSON_ID" = T1."PERSON_ID"
           from "CUSTOMERS"."ID_PERSON" as T1
           where concat("CUSTOMERS"."TMP_USER_ID"."BirthDate", "CUSTOMERS"."TMP_USER_ID"."LastName") = T1."combinaison"
           and "CUSTOMERS"."TMP_USER_ID"."GoodCombinaison" = 1

           """

        engine = connect_to_database("Postgres",
                                     connexion_postgres).CreateEngine()
        engine.execute(query)

        query = """

           select distinct concat("BirthDate", "LastName") as "combinaison"
           from "CUSTOMERS"."TMP_USER_ID"
           where "PERSON_ID" is null and "GoodCombinaison" = 1


           """

        data = pd.read_sql(query, con=engine)

        engine.close()

    if not data.empty:
        # as the function serial was used to create the table i.e the ID
        # it is not necessary to specified it using python.
        # the function serial associates automatically and incrementally a
        # PERSON_ID to a new inserted row
        InsertTableIntoDatabase(data,
                                TlbName="ID_PERSON",
                                Schema='CUSTOMERS',
                                database_name=connexion_postgres,
                                database_type="Postgres",
                                DropTable=False,
                                InstertInParrell=False)

        query = """

           update "CUSTOMERS"."TMP_USER_ID"
           set "PERSON_ID" = T1."PERSON_ID"
           from "CUSTOMERS"."ID_PERSON" as T1
           where concat("CUSTOMERS"."TMP_USER_ID"."BirthDate", "CUSTOMERS"."TMP_USER_ID"."LastName") = T1."combinaison"
           and "CUSTOMERS"."TMP_USER_ID"."GoodCombinaison" = 1 and "CUSTOMERS"."TMP_USER_ID"."PERSON_ID" is null

           """

        engine = connect_to_database("Postgres",
                                     connexion_postgres).CreateEngine()
        engine.execute(query)
        engine.close()

    # step 4.1: make sur that we are keeping the max user id
    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()

    query = """

    select max("USER_ID") as "user_id_max"
    from "CUSTOMERS"."MASTER_ID"

    """

    id_max = pd.read_sql(query, con=engine)
    id_max = int(id_max.loc[0, "user_id_max"])

    engine.close()

    # STEP 4.2: identified cards that can be associated to a know USER_ID
    query = """
       UPDATE "CUSTOMERS"."TMP_USER_ID"
       set "USER_ID" = T1."USER_ID"
       from 
       (select "CardHolderID", "USER_ID"::integer
       from "CUSTOMERS"."MASTER_ID") as T1
       where "CUSTOMERS"."TMP_USER_ID"."CardHolderID" = T1."CardHolderID"
       """
    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)
    engine.close()

    query = """

       update "CUSTOMERS"."TMP_USER_ID"
       set "USER_ID" = T1."USER_ID"
       from "CUSTOMERS"."ID_USER" as T1
       where "CUSTOMERS"."TMP_USER_ID"."NoMobile" = T1."NoMobile" and
       "CUSTOMERS"."TMP_USER_ID"."USER_ID" is null

       """

    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)
    engine.close()

    query = """

       update "CUSTOMERS"."TMP_USER_ID"
       set "USER_ID" = T1."USER_ID"
       from "CUSTOMERS"."ID_USER" as T1
       where "CUSTOMERS"."TMP_USER_ID"."GoodEmail" = 1 and "CUSTOMERS"."TMP_USER_ID"."Email" = T1."Email" and
       "CUSTOMERS"."TMP_USER_ID"."USER_ID" is null

       """

    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)
    engine.close()

    query = """

       update "CUSTOMERS"."TMP_USER_ID"
       set "USER_ID" = T1."USER_ID"
       from "CUSTOMERS"."ID_USER" as T1
       where concat("CUSTOMERS"."TMP_USER_ID"."BirthDate", "CUSTOMERS"."TMP_USER_ID"."LastName") = T1."combinaison"
       and "CUSTOMERS"."TMP_USER_ID"."GoodCombinaison" = 1 and "CUSTOMERS"."TMP_USER_ID"."CONTACT_ID" is null

       """

    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)

    # STEP 4.4: Extract all cards (new cards and cards that were already associated to a USER_ID)
    # in order to re-associate USER_ID based on the sorted algorithm in order to always
    # be sure to keep the homogeneity and unicity of the USER_ID
    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()

    query = """

       select *
       from "CUSTOMERS"."TMP_USER_ID"


       """

    data = pd.read_sql(query, con=engine)
    data["combinaison"] = data["BirthDate"] + data["LastName"]
    data = data[[
        "NoMobile", "Email", "combinaison", "GoodEmail", "GoodCombinaison",
        "USER_ID"
    ]]

    query = """

    select "NoMobile", "Email", "combinaison", "GoodEmail", "GoodCombinaison", "USER_ID"
    from "CUSTOMERS"."ID_USER"


    """

    data_bis = pd.read_sql(query, con=engine)
    data_bis["USER_ID"] = data_bis["USER_ID"].astype(float)

    data = pd.concat([data, data_bis], axis=0)

    user_id = data[~data.duplicated(keep='first')]

    tic = time.time()
    sorted = False
    while sorted is False:

        tmp_user_id = user_id.groupby(
            "NoMobile")["USER_ID"].min().reset_index()
        tmp_user_id.columns = ["NoMobile", "TMP_USER_ID"]
        user_id = pd.merge(user_id, tmp_user_id, on="NoMobile", how="inner")
        user_id["USER_ID"] = user_id["TMP_USER_ID"]
        user_id = user_id.drop(columns='TMP_USER_ID', axis=1)

        tmp_user_id = user_id[user_id["GoodEmail"] == 1].groupby(
            "Email")["USER_ID"].min().reset_index()
        tmp_user_id.columns = ["Email", "TMP_USER_ID"]
        user_id = pd.merge(user_id, tmp_user_id, on="Email", how="left")
        user_id.loc[~user_id["TMP_USER_ID"].isnull(),
                    "USER_ID"] = user_id["TMP_USER_ID"]
        user_id = user_id.drop(columns='TMP_USER_ID', axis=1)

        tmp_user_id = user_id[user_id["GoodCombinaison"] == 1].groupby(
            "combinaison")["USER_ID"].min().reset_index()
        tmp_user_id.columns = ["combinaison", "TMP_USER_ID"]
        user_id = pd.merge(user_id, tmp_user_id, on="combinaison", how="left")
        user_id.loc[~user_id["TMP_USER_ID"].isnull(),
                    "USER_ID"] = user_id["TMP_USER_ID"]
        user_id = user_id.drop(columns='TMP_USER_ID', axis=1)

        non_unique_num = user_id.groupby(
            "NoMobile")["USER_ID"].nunique().sort_values().reset_index()
        non_unique_num = non_unique_num.loc[non_unique_num["USER_ID"] > 1,
                                            "NoMobile"]

        non_unique_email = user_id[user_id["GoodEmail"] == 1].groupby(
            "Email")["USER_ID"].nunique().sort_values().reset_index()
        non_unique_email = non_unique_email.loc[
            non_unique_email["USER_ID"] > 1, "Email"]

        non_unique_combi = user_id[user_id["GoodCombinaison"] == 1].groupby(
            "combinaison")["USER_ID"].nunique().sort_values().reset_index()
        non_unique_combi = non_unique_combi.loc[
            non_unique_combi["USER_ID"] > 1, "combinaison"]

        if (len(non_unique_num) > 0) or (len(non_unique_email) >
                                         0) or (len(non_unique_combi) > 0):
            sorted = False
        else:
            sorted = True

    toc = time.time() - tic

    tmp_use_id = user_id[~user_id["USER_ID"].isnull()]

    # STEP 4.5: associate new user_id to cards that already haven't
    user_id = user_id[user_id["USER_ID"].isnull()]
    user_id = compute_user_id(user_id, last_user_id=id_max)

    # STEP 4.6: Replace all ID_USER table by the new ones
    user_id = pd.concat([user_id, tmp_use_id], axis=0)
    user_id = user_id.reset_index(drop=True)

    query = """
    delete from "CUSTOMERS"."ID_USER"
    """

    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()

    engine.execute(query)
    engine.close()

    InsertTableIntoDatabase(user_id,
                            TlbName="ID_USER",
                            Schema='CUSTOMERS',
                            database_name=connexion_postgres,
                            database_type="Postgres",
                            DropTable=False,
                            InstertInParrell=False)

    # STEP 5: keep tracks of cards for which the user id changed

    # STEP 5.1: keep tracks of cards for which the user id changed

    query = """
    CREATE TABLE "CUSTOMERS"."TMP_MASTER_ID" as 
    SELECT "CardHolderID", "USER_ID"
    from "CUSTOMERS"."MASTER_ID"

    """
    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)
    engine.close()

    # STEP 5.1.1: delete cards already identified from MASTER_ID
    query = """
        delete from "CUSTOMERS"."MASTER_ID"
        where "CardHolderID" in (select T1."CardHolderID"
        from "CUSTOMERS"."TMP_USER_ID" as T1
        inner join "CUSTOMERS"."MASTER_ID" as T2
        ON T1."CardHolderID" = T2."CardHolderID")
        """

    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()

    engine.execute(query)
    engine.close()

    # STEP 5.2: add new cards to master ID
    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    query = """
    select *
    from "CUSTOMERS"."TMP_USER_ID"

    """
    data = pd.read_sql(query, con=engine)
    engine.close()

    data["PERSON_ID"] = data["PERSON_ID"].astype(str)
    data["PERSON_ID"] = data["PERSON_ID"].str.replace("\.0$", "", regex=True)

    InsertTableIntoDatabase(data,
                            TlbName="MASTER_ID",
                            Schema='CUSTOMERS',
                            database_name=connexion_postgres,
                            database_type="Postgres",
                            DropTable=False,
                            InstertInParrell=False)

    # STEP 5.3: update new user id
    query = """

    UPDATE "CUSTOMERS"."MASTER_ID"
    SET "USER_ID" = NULL
    """

    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)
    engine.close()

    query = """
        update "CUSTOMERS"."MASTER_ID"
        set "USER_ID" = T1."USER_ID"
        from(
        select distinct "NoMobile", "USER_ID"
        from "CUSTOMERS"."ID_USER") as T1
        where "CUSTOMERS"."MASTER_ID"."NoMobile" = T1."NoMobile"
    """

    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)
    engine.close()

    # STEP 5.4: store user id that changed
    query = """
    select T3.*
    from(

        select T1.*, T2."USER_ID" as "oth_user_id"
        from "CUSTOMERS"."MASTER_ID" as T1
        INNER JOIN "CUSTOMERS"."TMP_MASTER_ID" as T2
        on T1."CardHolderID" = T2."CardHolderID"

        ) as T3
    where T3."USER_ID" <> T3."oth_user_id"

    """

    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    data = pd.read_sql(query, con=engine)
    engine.close()

    data["USER_ID"] = data["oth_user_id"]

    data = data.drop(columns=["oth_user_id"], axis=1)

    data["dt_change"] = datetime.datetime.now() - datetime.timedelta(days=1)

    data["PERSON_ID"] = data["PERSON_ID"].astype(str)
    data["PERSON_ID"] = data["PERSON_ID"].str.replace("\.0$", "", regex=True)

    # insert these cards into the table that allows to track the change of ID
    # overtime
    InsertTableIntoDatabase(data,
                            TlbName="CHANGE_IDS",
                            Schema='CUSTOMERS',
                            database_name=connexion_postgres,
                            database_type="Postgres",
                            DropTable=False,
                            InstertInParrell=False)

    query = """
    drop table "CUSTOMERS"."TMP_MASTER_ID" cascade
    """
    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)
    engine.close()

    # step 5.5: make sure that all changes are taken into account

    query = """
    
    drop table if exists "CUSTOMERS"."TMP_CHANGES_IDS" cascade;

    create table "CUSTOMERS"."TMP_CHANGES_IDS" as 
    select T2.*, now() as "dt_change"
    from(
            select distinct T1."USER_ID" as "user_id_change", 
            T2."USER_ID" as "user_id_current"
            from "CUSTOMERS"."CHANGE_IDS" as T1
            inner join "CUSTOMERS"."MASTER_ID" as T2
            on T1."CardHolderID" = T2."CardHolderID"
            where T1."dt_change" >= date(now()-interval '1 days')
            ) as T3
    inner join "CUSTOMERS"."MASTER_ID" as T2
    on T2."USER_ID" = T3."user_id_change";
    
    
    
    """

    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)
    engine.close()

    query = """
    
        update "CUSTOMERS"."ID_USER"
    set "USER_ID" = T3."user_id_current"
    from(
        select distinct T1."USER_ID" as "user_id_change", 
        T2."USER_ID" as "user_id_current"
        from "CUSTOMERS"."CHANGE_IDS" as T1
        inner join "CUSTOMERS"."MASTER_ID" as T2
        on T1."CardHolderID" = T2."CardHolderID"
        where T1."dt_change" >= date(now()-interval '1 days')
        ) as T3
    where "CUSTOMERS"."ID_USER"."USER_ID" = T3."user_id_change"
        
    
    """
    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)
    engine.close()

    query = """
    
    insert into "CUSTOMERS"."CHANGE_IDS"
    select * from "CUSTOMERS"."TMP_CHANGES_IDS"
    
    
    """

    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)
    engine.close()

    query = """

    drop table if exists "CUSTOMERS"."TMP_CHANGES_IDS" cascade


    """

    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)
    engine.close()

    query = """

    UPDATE "CUSTOMERS"."MASTER_ID"
    SET "USER_ID" = NULL
    """

    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)
    engine.close()

    query = """
        update "CUSTOMERS"."MASTER_ID"
        set "USER_ID" = T1."USER_ID"
        from(
        select distinct "NoMobile", "USER_ID"
        from "CUSTOMERS"."ID_USER") as T1
        where "CUSTOMERS"."MASTER_ID"."NoMobile" = T1."NoMobile"
    """

    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)
    engine.close()

    # STEP 5.6: associated person id for cards which haven't
    query = """
        update "CUSTOMERS"."MASTER_ID"
        set "PERSON_ID" = concat("USER_ID",'_',"MOBILE_ID")
        where "GoodCombinaison" = 0

    """
    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)
    engine.close()

Exemple #11

0

Afficher le fichier

def CsvToDataBase(FilePath,TlbName,Schema,ingestion_params, **kwargs):
    """Insert a csv using pandas into a database table

        Parameters
        -----------
        FilePath : str
            Path of the folder where .csv files are located
        engine : sqlalchmey create_engine object
            Engin object  & connection to the database from sqlalchemy
        TlbName : str
            Name of the targeted table into the database
        Schema: str
            Indicate the schema where the table is stores into the database
        PreprocessingCsv: dict - optional parameters
            Dictionnary with a function that transform a pandas DataFrame
            and a set of optional arguments with this function.
            Dictionnary args:
                - 'function' = function object
                - 'KeyWords' = dict -- with optional args for the function

    """

    InsertInTheSameTable = kwargs.get('InsertInTheSameTable', None)
    PreprocessingCsv = kwargs.get('PreprocessingCsv', None)
    _use_credentials = kwargs.get('_use_credentials', None)
    _use_conf = kwargs.get('_use_conf', None)
    engine = kwargs.get('use_engine', None)
    logger = kwargs.get('logger', None)
    SizeChunck = kwargs.get('SizeChunck', None)
    database_type = kwargs.get('database_type', None)
    database_name = kwargs.get('database_name', None)

    _was_engine = True

    _num_lines_csv = None

    if engine is None:
        try:
            engine = connect_to_database(database_type, database_name,
                                         _use_credentials=_use_credentials,
                                         _use_conf=_use_conf).CreateEngine()
            _was_engine = False
        except:
            raise

    try:
        if InsertInTheSameTable:
            if PreprocessingCsv is not None:
                F = PreprocessingCsv['function']
                if PreprocessingCsv['KeyWords'] is not None:

                    # read the data from a specific protocole
                    Data = read_csv_protocole(ingestion_params["protocole_type"],
                                       ingestion_params["protocole_name"],
                                       FilePath, ingestion_params["csv_params"],
                                       copy_to_local=ingestion_params["copy_to_filesystem"])

                    Data = F(Data, FilePath, PreprocessingCsv['KeyWords'])
                    _num_lines_csv = Data.shape[0]
                else:

                    # read the data from a specific protocole
                    Data = read_csv_protocole(ingestion_params["protocole_type"],
                                              ingestion_params["protocole_name"],
                                              FilePath, ingestion_params["csv_params"],
                                              copy_to_local=ingestion_params["copy_to_filesystem"])

                    Data = F(Data, FilePath)
                    _num_lines_csv = Data.shape[0]
                    InsertToPostgre(Data,
                                TlbName=TlbName,
                                engine=engine,
                                schema=Schema,
                                DropTable=False, SizeChunck=SizeChunck)
                print("file {} was succesfully inserted".format(FilePath))
            else:

                # read the data from a specific protocole
                Data = read_csv_protocole(ingestion_params["protocole_type"],
                                          ingestion_params["protocole_name"],
                                          FilePath, ingestion_params["csv_params"],
                                          copy_to_local=ingestion_params["copy_to_filesystem"])

                _num_lines_csv = Data.shape[0]
                InsertToPostgre(Data,
                                TlbName=TlbName,
                                engine=engine,
                                schema=Schema,
                                DropTable=False, SizeChunck=SizeChunck)
                print("file {} was succesfully inserted".format(FilePath))
        else:

            if TlbName is None:
                TlbName = FilePath.split('/')[-1].replace(".csv", "")
            if PreprocessingCsv is not None:
                F = PreprocessingCsv['function']
                if PreprocessingCsv['KeyWords'] is not None:

                    # read the data from a specific protocole
                    Data = read_csv_protocole(ingestion_params["protocole_type"],
                                              ingestion_params["protocole_name"],
                                              FilePath, ingestion_params["csv_params"],
                                              copy_to_local=ingestion_params["copy_to_filesystem"])

                    Data = F(Data, FilePath, PreprocessingCsv['KeyWords'])
                    _num_lines_csv = Data.shape[0]
                else:

                    # read the data from a specific protocole
                    Data = read_csv_protocole(ingestion_params["protocole_type"],
                                              ingestion_params["protocole_name"],
                                              FilePath, ingestion_params["csv_params"],
                                              copy_to_local=ingestion_params["copy_to_filesystem"])

                    Data = F(Data, FilePath)
                    _num_lines_csv = Data.shape[0]
                    InsertToPostgre(Data,
                                TlbName=TlbName,
                                engine=engine,
                                schema=Schema,
                                DropTable=True, SizeChunck=SizeChunck)
                print("file {} was succesfully inserted".format(FilePath))
            else:
                Data = pd.read_csv(FilePath)
                _num_lines_csv = Data.shape[0]
                InsertToPostgre(Data, engine, TlbName, schema=Schema, DropTable=True, SizeChunck=SizeChunck)
                print("file {} was succesfully inserted".format(FilePath))
    except Exception as e:
        if logger is not None:
            logger.error(e, exc_info=True)
        else:
            print(e)

    if not _was_engine:
        engine.close()

    _outputs = [FilePath.split("/")[-1], _num_lines_csv]

    return _outputs

Exemple #12

0

Afficher le fichier

Fichier : dash_table.py Projet : Creacard/calypso_final_version

import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import dash_table
from creacard_connectors.database_connector import connect_to_database
import pandas as pd

# extract available schema

engine = connect_to_database("Postgres", "Creacard_Calypso").CreateEngine()


query = """
    select "schema_name"
    from information_schema.schemata
    where "schema_name" !~* '^pg*.' and "schema_name" not in ('information_schema')
"""
data = pd.read_sql(query, con=engine)
list_schema = data["schema_name"].tolist()
del data

engine.close()

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__)



app.layout = html.Div([

Exemple #13

0

Afficher le fichier

Fichier : add_new_card_status.py Projet : Creacard/calypso_final_version

def daily_card_status2(Data, filepath, **kwargs):

    logger = kwargs.get('logger', None)

    # Table parameter for the temporary table
    TableParameter = {}
    TableParameter["ActivationDate"] = "timestamp without time zone"
    TableParameter["Address1"] = "TEXT"
    TableParameter["Address2"] = "TEXT"
    TableParameter["ApplicationName"] = "VARCHAR (50)"
    TableParameter["AvailableBalance"] = "double precision"
    TableParameter["BirthDate"] = "timestamp without time zone"
    TableParameter["CardHolderID"] = "VARCHAR (50)"
    TableParameter["CardStatus"] = "VARCHAR (100)"
    TableParameter["City"] = "VARCHAR (100)"
    TableParameter["Country"] = "VARCHAR (50)"
    TableParameter["CreationDate"] = "timestamp without time zone"
    TableParameter["DistributorCode"] = "INTEGER"
    TableParameter["Email"] = "TEXT"
    TableParameter["ExpirationDate"] = "timestamp without time zone"
    TableParameter["FirstName"] = "TEXT"
    TableParameter["IBAN"] = "TEXT"
    TableParameter["IsExcludedAddress"] = "INTEGER"
    TableParameter["IsRenewal"] = "INTEGER"
    TableParameter["KYC_Status"] = "VARCHAR (50)"
    TableParameter["LastName"] = "TEXT"
    TableParameter["LastChangeDate"] = "timestamp without time zone"
    TableParameter["LastAddressDate"] = "timestamp without time zone"
    TableParameter["LastCustomerDate"] = "timestamp without time zone"
    TableParameter["NoMobile"] = "TEXT"
    TableParameter["PostCode"] = "VARCHAR (50)"
    TableParameter["Programme"] = "VARCHAR (50)"
    TableParameter["RenewalDate"] = "timestamp without time zone"
    TableParameter["UpdateDate"] = "timestamp without time zone"

    keepcol = [
        "CardHolderID", "Email", "FirstName", "LastName", "City", "Country",
        "Card Status", "DistributorCode", "ApplicationName", "Date of Birth",
        "IBAN", "CreatedDate", "UpdatedDate", "Address1", "Address2",
        "PostCode", "KYC Status", "expirydate", "AvailableBalance", "NoMobile",
        "Programme"
    ]

    #### Step 1: Extract the data from the file and keep ony updated data
    # extract filedate
    FileName = filepath.split('/')[-1].replace(".csv", "")

    DateFile = pd.to_datetime(
        FileName.split("-")[1] + "-" + FileName.split("-")[2] + "-" +
        FileName.split("-")[3])

    # based on the file date, identify the appropriate names of columns
    if DateFile > pd.to_datetime('2019-03-12'):

        col_names = [
            "CardHolderID", "Cardnumber", "Email", "FirstName", "LastName",
            "City", "Country", "Card Status", "DistributorCode",
            "ApplicationName", "Date of Birth", "SortCodeAccNum", "IBAN",
            "CreatedDate", "UpdatedDate", "Address1", "Address2", "PostCode",
            "KYC Status", "expirydate", "AvailableBalance", "UDF2", "NoMobile",
            "Programme", "VPVR"
        ]

    elif DateFile < pd.to_datetime('2019-01-16'):

        col_names = [
            "CardHolderID", "Cardnumber", "Email", "FirstName", "LastName",
            "City", "Country", "Card Status", "DistributorCode",
            "ApplicationName", "Date of Birth", "SortCodeAccNum", "IBAN",
            "CreatedDate", "UpdatedDate", "Address1", "Address2", "PostCode",
            "KYC Status", "expirydate"
        ]

    else:

        col_names = [
            "CardHolderID", "Cardnumber", "Email", "FirstName", "LastName",
            "City", "Country", "Card Status", "DistributorCode",
            "ApplicationName", "Date of Birth", "SortCodeAccNum", "IBAN",
            "CreatedDate", "UpdatedDate", "Address1", "Address2", "PostCode",
            "KYC Status", "expirydate", "AvailableBalance", "UDF2", "NoMobile",
            "UDF3", "VPVR"
        ]

    # add the names of columns to the dataframe
    Data.columns = col_names

    # store the missing columns
    missing_columns = list(set(keepcol).difference(col_names))

    if missing_columns:  # if the list is not add new columns to the dataframe
        for col in missing_columns:
            Data[col] = None

    # Store change values

    engine = connect_to_database("Postgres", "Creacard_Calypso").CreateEngine()
    query = """
           select distinct "CardHolderID","CardStatus","KYC_Status" from "CARD_STATUS"."STATUS_CARTES"
       """
    data_current = pd.read_sql(query, con=engine)

    data_current["CardHolderID"] = data_current["CardHolderID"].astype(str)
    data_current["KYC_Status"] = data_current["KYC_Status"].astype(str)
    data_current["CardStatus"] = data_current["CardStatus"].astype(str)

    #### Step 2: Transform the data

    # transform date columns to pd.datetime format in order to have a consistent format
    # of date over the database
    # Only transform updated date
    Data["UpdatedDate"] = pd.to_datetime(Data["UpdatedDate"],
                                         format="%b %d %Y %I:%M%p",
                                         errors='coerce')
    Data["CreatedDate"] = pd.to_datetime(Data["CreatedDate"],
                                         format="%b %d %Y %I:%M%p",
                                         errors='coerce')
    Data["Date of Birth"] = pd.to_datetime(Data["Date of Birth"],
                                           format="%b %d %Y %I:%M%p",
                                           errors='coerce')

    # transform expirydate
    Data["expirydate"] = Data["expirydate"].astype(str)
    Data["expirydate"] = "20" + Data["expirydate"].str[0:2] + "-" + Data[
        "expirydate"].str[2:] + "-01"
    Data["expirydate"] = pd.to_datetime(Data["expirydate"],
                                        format='%Y-%m-%d',
                                        errors='coerce')

    Data = Data[keepcol]

    # condition remove address
    AddressToRemove = [
        "77 OXFORD STREET LONDON", "17 RUE D ORLEANS", "TSA 51760",
        "77 Oxford Street London", "36 CARNABY STREET",
        "36 CARNABY STREET LONDON", "36 CARNABY STREET LONDON", "ADDRESS",
        "17 RUE D ORLEANS PARIS", "CreaCard Espana S L  Paseo de Gracia 59",
        "36 Carnaby Street London",
        "CREACARD SA Pl  Marcel Broodthaers 8 Box 5", "17 Rue D Orleans Paris",
        "CREACARD ESPANA S L  PASEO DE GRACIA 59", "CreaCard 17 rue d Orleans",
        "CREACARD SA PL  MARCEL BROODTHAERS 8 BOX 75",
        "CREACARD SA PL  MARCEL BROODTHAERS 8 BOX 75", "36 Carnaby Street",
        "77 OXFORD STREET"
    ]

    Data["IsExcludedAddress"] = (
        Data.Address1.isin(AddressToRemove)).astype(int)

    Data["ActivationDate"] = pd.NaT
    Data["IsRenewal"] = 0
    Data["RenewalDate"] = pd.NaT
    Data["LastChangeDate"] = pd.NaT
    Data["LastAddressDate"] = pd.NaT
    Data["LastCustomerDate"] = pd.NaT

    Data = Data[sorted(Data.columns)]

    colnames = [
        "ActivationDate", "Address1", "Address2", "ApplicationName",
        "AvailableBalance", "CardStatus", "CardHolderID", "City", "Country",
        "CreationDate", "BirthDate", "DistributorCode", "Email", "FirstName",
        "IBAN", "IsExcludedAddress", "IsRenewal", "KYC_Status",
        "LastAddressDate", "LastChangeDate", "LastCustomerDate", "LastName",
        "NoMobile", "PostCode", "Programme", "RenewalDate", "UpdateDate",
        "ExpirationDate"
    ]

    Data.columns = colnames

    Data = Data[sorted(Data.columns)]

    Data.loc[(Data["KYC_Status"] == '0') | (Data["KYC_Status"] == '0.0') |
             (Data["KYC_Status"] == 0), "KYC_Status"] = 'Anonyme'
    Data.loc[(Data["KYC_Status"] == '1') | (Data["KYC_Status"] == '1.0') |
             (Data["KYC_Status"] == 1), "KYC_Status"] = 'SDD'
    Data.loc[(Data["KYC_Status"] == '2') | (Data["KYC_Status"] == '2.0') |
             (Data["KYC_Status"] == 2), "KYC_Status"] = 'KYC'
    Data.loc[(Data["KYC_Status"] == '3') | (Data["KYC_Status"] == '3.0') |
             (Data["KYC_Status"] == 3), "KYC_Status"] = 'KYC LITE'

    Data["DistributorCode"] = Data["DistributorCode"].fillna(-1)
    Data["DistributorCode"] = Data["DistributorCode"].astype(int)

    Data["CardHolderID"] = Data["CardHolderID"].astype(str)
    Data["KYC_Status"] = Data["KYC_Status"].astype(str)
    Data["CardStatus"] = Data["CardStatus"].astype(str)

    Data.loc[Data["DistributorCode"].isin(["203", "914", "915"]),
             "IsRenewal"] = 1

    Data = Data[sorted(Data.columns)]

    # Delete leading "00" at the start of string.

    Data["NoMobile"] = Data["NoMobile"].str.replace("^00", "", regex=True)

    # replace .0 at the end$

    Data["NoMobile"] = Data["NoMobile"].str.replace("\.0$", "", regex=True)

    # delete only literal '|' from string

    Data["NoMobile"] = Data["NoMobile"].str.replace("\|", "", regex=True)

    # Step 1: Identify

    data_new = Data[["CardHolderID", "CardStatus", "KYC_Status"]]
    outer_join = data_current.merge(data_new, how='outer', indicator=True)
    outer_join = outer_join[outer_join["_merge"] == "right_only"]

    # set 2 : identify new cardholder ID
    new_card_holder_id = set(outer_join["CardHolderID"].unique()).difference(
        data_current["CardHolderID"].unique())

    ### set 3 : insert old values into changes table
    data_to_change = data_current[data_current["CardHolderID"].isin(
        set(outer_join.loc[
            ~outer_join["CardHolderID"].isin(new_card_holder_id),
            "CardHolderID"]))]

    FileName = filepath.split('/')[-1].replace(".csv", "")
    DateFile = pd.to_datetime(
        FileName.split("-")[1] + "-" + FileName.split("-")[2] + "-" +
        FileName.split("-")[3]) - datetime.timedelta(days=1)

    data_to_change["dt_change"] = DateFile

    InsertTableIntoDatabase(data_to_change,
                            "CHANGE_STATUS_CARTES",
                            "CARD_STATUS",
                            "Postgres",
                            "Creacard_Calypso",
                            DropTable=False)

    engine = connect_to_database("Postgres", "Creacard_Calypso").CreateEngine()

    TlbName = "STATUS_CARTES"
    schema = "CARD_STATUS"
    database_type = "Postgres"
    database_name = "Creacard_Calypso"

    query_delete = """
    
    delete from "CARD_STATUS"."STATUS_CARTES"
    
    """
    tic = time.time()
    engine.execute(query_delete)
    print("delete took the data {} seconds".format(time.time() - tic))

    engine.close()

    data = splitDataFrameIntoSmaller(Data, chunkSize=100000)

    num_process = int(multiprocessing.cpu_count() / 4)
    tic = time.time()
    pool = Pool(num_process)
    pool.map(
        partial(insert_into_postgres_copyfrom,
                database_type=database_type,
                database_name=database_name,
                schema=schema,
                TlbName=TlbName), data)

    pool.close()
    pool.terminate()
    pool.join()
    toc = time.time() - tic

    print("ingestion was done in {} seconds ".format(toc))

    ### update the LastChangeDate columns (KYC & card status)

    con_postgres = connect_to_database(database_type,
                                       database_name).CreateEngine()

    query = """

       UPDATE "CARD_STATUS"."STATUS_CARTES"
       SET "LastChangeDate" = T1."max_date"
       FROM( 
       SELECT max("dt_change") as "max_date", "CardHolderID"
       FROM "CARD_STATUS"."CHANGE_STATUS_CARTES"
       GROUP BY "CardHolderID"
       ) as T1
       WHERE "CARD_STATUS"."STATUS_CARTES"."CardHolderID" = T1."CardHolderID"

       """

    con_postgres.execute(query)
    con_postgres.close()

    con_postgres = connect_to_database(database_type,
                                       database_name).CreateEngine()

    query = """

       update "CARD_STATUS"."STATUS_CARTES" as T1
       SET "ActivationDate" = "ActivationTime"
       FROM "CARD_STATUS"."ACTIVATION_REPORT" as T2
       WHERE 
       T1."CardHolderID" = T2."CardHolderID" and 
       "ActivationDate" is null 

       """

    con_postgres.execute(query)
    con_postgres.close()

Exemple #14

0

Afficher le fichier

Fichier : Ingestion.py Projet : Creacard/calypso

def FromCsvToDataBase(ListOfPath, database_type,database_name, Schema, ingestion_params, **kwargs):

    """Insert all .csv from a folder

        Requiered Parameters
        -----------
        ListOfPath: str
            Path of the folder where .csv files are located
        database_type : str
            Type of the database (into your configurations files)
        database_name: str
            name of the database configuration files name (ex: Postgres_calypso)
        schema: str
            name of the schema where the table must be written
        ingestion_params: dict
            standarized dictionnary with paramters for ingestion

            ex:
                params_ingestion = dict()
                params_ingestion["protocole_type"] = "LOCAL" -- FTP or SFTP protocole type where .csv are located
                params_ingestion["protocole_name"] = "" protocole name where .csv are located i.e in configurations files
                params_ingestion["csv_params"] = csv_params -- reading csv paramaters (ex: csv_params = {'sep': ","})
                params_ingestion["copy_to_filesystem"] = destination_copy (dict) -- dictionnary with two fileds:
                    ex: destination_copy = dict()
                        destination_copy["destination_folder"] = folder_2 -- path of teh filesystem where the data mst be duplicated
                        destination_copy["csv_destination_params"] = {'sep': ",", 'index':False} -- writting parameters

        Optional Parameters (**kwargs)
        -----------
        TlbName : str
            Name of the targeted table into the database
        InsertInParrell : Boolean -- default value False
            True if the insertion has to be done in parallel
        InsertInTheSameTable : Boolean -- default value False
            True if the pandas dataframe has to be inserted into the same
            table at each loop
        PreprocessingCsv: dict - optional parameters
            Dictionnary with a function that transform a pandas DataFrame
            and a set of optional arguments with this function.
            Dictionnary args:
                - 'function' = function object
                - 'KeyWords' = dict -- with optional args for the function
        logger: logger object
            logger to get logs from the running function in case
            of errors
        TableDict: dict
            Dictionnary with the postgres types associated to the variables
            ingested from the DataFrame

     """

    TlbName              = kwargs.get('TlbName', None)
    InsertInParrell      = kwargs.get('InsertInParrell', False)
    InsertInTheSameTable = kwargs.get('InsertInTheSameTable', False)
    PreprocessingCsv     = kwargs.get('PreprocessingCsv', None)
    logger               = kwargs.get('logger', None)
    TableDict            = kwargs.get('TableDict', None)
    SizeChunck           = kwargs.get('SizeChunck', 10000)
    NumWorkers           = kwargs.get('NumWorkers', 3)
    _use_credentials     = kwargs.get('_use_credentials', None)
    _use_conf = kwargs.get('_use_conf', None)

    engine = connect_to_database(database_type, database_name,
                                 _use_credentials=_use_credentials,
                                 _use_conf=_use_conf).CreateEngine()

    # Test if the targeted schema exists
    if isinstance(Schema, str):
        if ~db.IsSchemaExist(engine, Schema):
            db.CreateSchema(engine, Schema)

    if InsertInTheSameTable is not None:
        if TableDict is not None:
            try:
                # Check if the table exist
                # The function will automatically create the
                # if it doesn't exist
                if not db.table_exists(engine, TlbName, Schema):
                    db.CreateTable(engine, TlbName, Schema, TableDict)
            except Exception as e:
                if logger is not None:
                    logger.error(e, exc_info=True)
                else:
                    print(e)
        else:
            raise ValueError("You must specify a dictionnary to insert into the same table")

        # The .csv are ingested in the table if the user choose
        # the insertion in parallel
        if InsertInParrell:

            del engine

            if NumWorkers is None:
                # Store the number of available workers
                NbWorkers = multiprocessing.cpu_count() - 1
            else:
                NbWorkers = NumWorkers
            print(".csv simultaneous ingestion of {} files using {} Workers is launched".format(len(ListOfPath), NbWorkers))

            _lines_file = []
            tic = time.time()
            p = ThreadPool(NbWorkers)
            _lines_file.append(p.map(partial(db.CsvToDataBase,
                                             TlbName=TlbName,
                                             Schema=Schema,
                                             ingestion_params=ingestion_params,
                                             logger=logger,
                                             SizeChunck=SizeChunck,
                                             PreprocessingCsv=PreprocessingCsv,
                                             InsertInTheSameTable=True,
                                             database_type=database_type,
                                             database_name=database_name), ListOfPath))
            toc = time.time() - tic
            print(".csv files were succesfully ingested in parallel into the table {} in {} seconds".format(TlbName, toc))

        else:
            _lines_file = []
            tic = time.time()
            for i in ListOfPath:
                _lines_file.append(db.CsvToDataBase(i,
                                                    TlbName=TlbName,
                                                    Schema=Schema,
                                                    SizeChunck=SizeChunck,
                                                    database_type=database_type,
                                                    database_name=database_name,
                                                    ingestion_params=ingestion_params,
                                                    PreprocessingCsv=PreprocessingCsv,
                                                    use_engine = engine,
                                                    InsertInTheSameTable=True))
                print("{} was succesfully ingested".format(i))
            toc = time.time() - tic
            print(".csv files were succesfully ingested into the table {} in {} seconds".format(TlbName, toc))

    else:

        # The .csv are ingested in the table if the user choose
        # the insertion in parallel
        if InsertInParrell:

            del engine

            if NumWorkers is None:
                # Store the number of available workers
                NbWorkers = multiprocessing.cpu_count() - 1
            else:
                NbWorkers = NumWorkers
            print(".csv simultaneous ingestion of {} files using {} Workers is launched".format(len(ListOfPath),
                                                                                                NbWorkers))
            _lines_file = []
            tic = time.time()
            p = ThreadPool(NbWorkers)
            _lines_file.append(p.map(partial(db.CsvToDataBase,
                                             TlbName=TlbName,
                                             Schema=Schema,
                                             logger=logger,
                                             SizeChunck=SizeChunck,
                                             ingestion_params=ingestion_params,
                                             PreprocessingCsv=PreprocessingCsv,
                                             InsertInTheSameTable=False,
                                             database_type=database_type,
                                             database_name=database_name,
                                             use_engine=None), ListOfPath))
            toc = time.time() - tic
            print(
                ".csv files were succesfully ingested in parallel into the table {} in {} seconds".format(TlbName, toc))

        else:
            _lines_file = []
            tic = time.time()
            for i in ListOfPath:
                _lines_file.append(db.CsvToDataBase(i,
                                                    TlbName=TlbName,
                                                    Schema=Schema,
                                                    SizeChunck=SizeChunck,
                                                    PreprocessingCsv=PreprocessingCsv,
                                                    ingestion_params=ingestion_params,
                                                    use_engine=engine,
                                                    database_type=database_type,
                                                    database_name=database_name,
                                                    InsertInTheSameTable=False))
                print("{} was succesfully ingested".format(i))
            toc = time.time() - tic
            print(".csv files were succesfully ingested into the table {} in {} seconds".format(TlbName, toc))

        engine.close()

    return _lines_file

Exemple #15

0

Afficher le fichier

Fichier : Ingestion.py Projet : Creacard/calypso

def InsertTableIntoDatabase(Data, TlbName, Schema, database_type, database_name, **kwargs):
    """Insert a pandas Dataframe
        Requiered Parameters
        -----------
        engine : sqlalchmey create_engine object
            Engin object  & connection to the database from sqlalchemy
        TlbName : str
            Name of the targeted table into the database
        Schema: str
            Indicate the schema where the table is stores into the database

        Optional Parameters (**kwargs)
        -----------
        logger: logger object
            logger to get logs from the running function in case
            of errors
        TableDict: dict
            Dictionnary with the postgres types associated to the variables
            ingested from the DataFrame
        DropTable : Boolean -- default value False
            True if the table has to be dropped before ingestion
        InsertInParrell : Boolean -- default value False
            True if the insertion has to be done in parallel

    """

    InsertInParrell = kwargs.get('InsertInParrell', False)
    SizeParrell     = kwargs.get('SizeParrell', 10000)
    logger          = kwargs.get('logger', None)
    TableDict       = kwargs.get('TableDict', None)
    DropTable       = kwargs.get('DropTable', False)
    SizeChunck      = kwargs.get('SizeChunck', 10000)
    NumWorkers      = kwargs.get('NumWorkers', 3)
    _use_credentials     = kwargs.get('credentials', None)
    _use_conf = kwargs.get('credentials', None)
    engine = kwargs.get('use_engine', None)

    if engine is None:
        try:
            engine = connect_to_database(database_type, database_name,
                                                  _use_credentials=_use_credentials,
                                                  _use_conf=_use_conf).CreateEngine()
        except:
            raise

    # Test if the targeted schema exists
    try:
        if isinstance(Schema, str):
            if ~db.IsSchemaExist(engine, Schema):
                db.CreateSchema(engine, Schema)
        else:
            raise ValueError("'Schema' must have a string format")
    except:
        raise



    # Variables type in postgres
    if TableDict is not None:
        try:
            # Check if the table exist
            if not db.table_exists(engine, TlbName, Schema):
                db.CreateTable(engine, TlbName, Schema, TableDict)
            else:
                if DropTable:
                    metadata = MetaData()
                    TlbObject = Table(TlbName, metadata, schema=Schema)
                    TlbObject.drop(engine)
                    db.CreateTable(engine, TlbName, Schema, TableDict)
        except Exception as e:
            if logger is not None:
                logger.error(e, exc_info=True)
            else:
                print(e)
    else:
        TableDict = CreateDictionnaryType(Data)
        try:
            # Check if the table exist
            if not db.table_exists(engine, TlbName, Schema):
                db.CreateTable(engine, TlbName, Schema, TableDict)
            else:
                if DropTable:
                    metadata = MetaData()
                    TlbObject = Table(TlbName, metadata, schema=Schema)
                    TlbObject.drop(engine)
                    db.CreateTable(engine, TlbName, Schema, TableDict)
        except Exception as e:
            if logger is not None:
                logger.error(e, exc_info=True)
            else:
                print(e)

    # Insert in paralell or not
    if InsertInParrell:

        del engine

        if NumWorkers is None:
            # Store the number of available workers
            NbWorkers = multiprocessing.cpu_count() - 1
        else:
            NbWorkers = NumWorkers
        # Split the data into different chunck size
        DataSplitted = splitDataFrameIntoSmaller(Data, chunkSize=SizeParrell)
        print("Launch multi-insertion of sample {} rows on {} Workers".format(SizeParrell,NbWorkers))

        tic = time.time()
        p = ThreadPool(NbWorkers)
        p.map(partial(db.InsertToPostgre,
                      engine=None,
                      TlbName=TlbName,
                      schema=Schema,
                      DropTable=False,
                      database_type=database_type,
                      database_name=database_name,
                      SizeChunck = SizeChunck,
                      _use_credentials=_use_credentials,
                      _use_conf=_use_conf), DataSplitted)
        p.close()
        p.join()
        toc = time.time() - tic
        print("The DataFrame was succesfully ingested in parallel into the table {} in {} seconds".format(TlbName, toc))

    else:
        tic = time.time()
        db.InsertToPostgre(Data,
                           engine=engine,
                           TlbName=TlbName,
                           schema=Schema,
                           DropTable=False,
                           SizeChunck=SizeChunck)
        toc = time.time() - tic
        print("The DataFrame was succesfully ingested into the table {} in {} seconds".format(TlbName, toc))

    return

Exemple #16

0

Afficher le fichier

Fichier : LaunchRegexExcluded.py Projet : Creacard/calypso

def fill_univers_sous_univers(database_type, database_name, schema, TlbName):

    # Refresh categorisation dictionnaries
    create_update_dictionnaries_categorisation(database_type, database_name)

    engine = connect_to_database(database_type, database_name).CreateEngine()

    # Step 1.1 - MCC code note 1 -- update categories and under categories
    query = """
    UPDATE "{}"."{}"
    SET "UNIVERS" = T4."UNIVERS_DATABASE",
        "SOUS_UNIVERS" = T4."SOUS_UNIVERS_DATABASE"
    from(
        -- Join MCC categories, univers database & MCC database 
        select T3."UNIVERS_DATABASE",T3."SOUS_UNIVERS_DATABASE",T2."MCC" as "MCC_DATABASE" 
        from "REFERENTIEL"."MCC_CATEGORIES" as T1 
        INNER JOIN "REFERENTIEL"."MCC_CODE_LINK" as T2 
        ON T1."MCC_CODE" = T2."MCC_CODE"
        INNER JOIN "REFERENTIEL"."UNIVERS_DESCRIPTION" as T3
        ON T1."UNIVERS" = T3."UNIVERS" and 
        T1."SOUS_UNIVERS" = T3."SOUS_UNIVERS"
        WHERE T1."NOTE" in ('1')
        ) as T4
    WHERE T4."MCC_DATABASE" = "MCC"
    """.format(schema, TlbName)

    tic = time.time()
    engine.execute(query)
    print("update of exclusion was done in {} seconds".format(time.time() -
                                                              tic))

    # Step 1.2 - MCC code note 1 -- Regex excluded
    query = """
        select T3."UNIVERS_DATABASE",T3."SOUS_UNIVERS_DATABASE",T2."MCC" as "MCC_DATABASE",T1."NEW_REGEX" as "REGEX"
        from "REFERENTIEL"."REGEX_EXCLUDED" as T1 
        INNER JOIN "REFERENTIEL"."MCC_CODE_LINK" as T2 
        ON T1."MCC_CODE" = T2."MCC_CODE"
        INNER JOIN "REFERENTIEL"."UNIVERS_DESCRIPTION" as T3
        ON T1."UNIVERS" = T3."UNIVERS" and 
        T1."SOUS_UNIVERS" = T3."SOUS_UNIVERS"
        INNER JOIN "REFERENTIEL"."MCC_CATEGORIES" as T4
        ON T1."MCC_CODE" = T4."MCC_CODE"
        WHERE T4."NOTE" = 1  
    """

    DataRegex = pd.read_sql(query, con=engine)

    print("update of exclusion was done in {} seconds".format(time.time() -
                                                              tic))

    tic = time.time()
    for i in range(0, len(DataRegex)):
        ExcludedRegex(NumRow=i,
                      DataRegex=DataRegex,
                      engine=engine,
                      TlbName=TlbName,
                      schema=schema)

    print("update of exclusion was done in {} seconds".format(time.time() -
                                                              tic))

    # Step 2.1 - MCC code note 0 and 2 -- update categories and under categories
    query = """
    UPDATE "{}"."{}"
    SET "UNIVERS" = T4."UNIVERS_DATABASE",
        "SOUS_UNIVERS" = T4."SOUS_UNIVERS_DATABASE"
    from(
        -- Join MCC categories, univers database & MCC database 
        select T3."UNIVERS_DATABASE",T3."SOUS_UNIVERS_DATABASE",T2."MCC" as "MCC_DATABASE" 
        from "REFERENTIEL"."MCC_CATEGORIES" as T1 
        INNER JOIN "REFERENTIEL"."MCC_CODE_LINK" as T2 
        ON T1."MCC_CODE" = T2."MCC_CODE"
        INNER JOIN "REFERENTIEL"."UNIVERS_DESCRIPTION" as T3
        ON T1."UNIVERS" = T3."UNIVERS" and 
        T1."SOUS_UNIVERS" = T3."SOUS_UNIVERS"
        WHERE T1."NOTE" in ('2','0')
        ) as T4
    WHERE T4."MCC_DATABASE" = "MCC"
    """.format(schema, TlbName)

    tic = time.time()
    engine.execute(query)
    print("update of exclusion was done in {} seconds".format(time.time() -
                                                              tic))

    # Step 2.2 - MCC code note 1 -- Regex excluded
    query = """
        select T3."UNIVERS_DATABASE",T3."SOUS_UNIVERS_DATABASE",T2."MCC" as "MCC_DATABASE",T1."NEW_REGEX" as "REGEX"
        from "REFERENTIEL"."REGEX_EXCLUDED" as T1 
        INNER JOIN "REFERENTIEL"."MCC_CODE_LINK" as T2 
        ON T1."MCC_CODE" = T2."MCC_CODE"
        INNER JOIN "REFERENTIEL"."UNIVERS_DESCRIPTION" as T3
        ON T1."UNIVERS" = T3."UNIVERS" and 
        T1."SOUS_UNIVERS" = T3."SOUS_UNIVERS"
        INNER JOIN "REFERENTIEL"."MCC_CATEGORIES" as T4
        ON T1."MCC_CODE" = T4."MCC_CODE"
        WHERE T4."NOTE" in ('0','2')
    """

    DataRegex = pd.read_sql(query, con=engine)

    tic = time.time()
    for i in range(0, len(DataRegex)):
        ExcludedRegex(NumRow=i,
                      DataRegex=DataRegex,
                      engine=engine,
                      TlbName=TlbName,
                      schema=schema)

    print("update of exclusion was done in {} seconds".format(time.time() -
                                                              tic))

    # step 3 - regex adding

    # Step 3.1 - MCC code note 1 -- Regex excluded
    query = """
      select T3."UNIVERS_DATABASE",T3."SOUS_UNIVERS_DATABASE",
        T1."NEW_REGEX" as "REGEX"
        from "REFERENTIEL"."REGEX_INCLUDED" as T1 
        INNER JOIN "REFERENTIEL"."UNIVERS_DESCRIPTION" as T3
        ON T1."UNIVERS" = T3."UNIVERS" and 
        T1."SOUS_UNIVERS" = T3."SOUS_UNIVERS" 
    """

    DataRegex = pd.read_sql(query, con=engine)

    tic = time.time()
    for i in range(0, len(DataRegex)):
        IncludedRegex(NumRow=i,
                      DataRegex=DataRegex,
                      engine=engine,
                      TlbName=TlbName,
                      schema=schema)

    print("update of exclusion was done in {} seconds".format(time.time() -
                                                              tic))

    engine.close()

Exemple #17

0

Afficher le fichier

def create_update_dictionnaries_categorisation(database_type,database_name):



    engine = connect_to_database(database_type, database_name).CreateEngine()

    CreateSchema(engine, "REFERENTIEL")

    # Conf files location
    if sys.platform == "win32":
        Folder = os.path.expanduser('~') + "\\conf_python\\categorisation_univers\\"
    else:
        Folder = os.environ['HOME'] + "/conf_python/categorisation_univers/"

    # Referentials table
    FileDescription = "description_univers.xlsx"

    DataDescritpion = pd.read_excel(Folder + FileDescription)

    TableParameter = {}
    TableParameter["UNIVERS_DATABASE"] = "VARCHAR (50)"
    TableParameter["SOUS_UNIVERS_DATABASE"] = "VARCHAR (100)"
    TableParameter["UNIVERS"] = "VARCHAR (50)"
    TableParameter["SOUS_UNIVERS"] = "VARCHAR (100)"
    TableParameter["DESCRIPTION"] = "TEXT"

    InsertTableIntoDatabase(DataDescritpion,
                            "UNIVERS_DESCRIPTION",
                            "REFERENTIEL",
                            database_type, database_name,
                            DropTable=True,
                            TableDict=TableParameter)

    del DataDescritpion
    del FileDescription

    # link between MCC and MCC_CODE
    FileMCC = "mcc_code_link.xlsx"

    DataFileMCC = pd.read_excel(Folder + FileMCC, dtype={'MCC_CODE': str, 'MCC': object})

    TableParameter = {}
    TableParameter["MCC_CODE"] = "VARCHAR (20)"
    TableParameter["MCC"] = "TEXT"

    InsertTableIntoDatabase(DataFileMCC,
                            "MCC_CODE_LINK",
                            "REFERENTIEL",
                            database_type, database_name,
                            DropTable=True,
                            TableDict=TableParameter)

    del FileMCC
    del DataFileMCC

    # Ingest MCC_CATEGORIES
    FileMCCCat = "mcc_categories.xlsx"

    DataMCCCat = pd.read_excel(Folder + FileMCCCat)

    TableParameter = {}
    TableParameter["MCC_NAME"] = "TEXT"
    TableParameter["SOUS_UNIVERS"] = "VARCHAR (100)"
    TableParameter["UNIVERS"] = "VARCHAR (50)"
    TableParameter["MCC_CODE"] = "VARCHAR (20)"
    TableParameter["NOTE"] = "INTEGER"

    InsertTableIntoDatabase(DataMCCCat,
                            "MCC_CATEGORIES",
                            "REFERENTIEL",
                            database_type, database_name,
                            DropTable=True,
                            TableDict=TableParameter)

    del FileMCCCat
    del DataMCCCat

    # Regex exclusion
    FileRegexExclu = "regex_merchant.xlsx"

    Data1 = pd.read_excel(Folder + FileRegexExclu, sheet_name='Regex exclu')
    Data1 = Data1[~Data1.UNIVERS.isna()]
    TableParameter = {}
    TableParameter["MCC"] = "TEXT"
    TableParameter["Regex"] = "TEXT"
    TableParameter["UNIVERS"] = "VARCHAR (50)"
    TableParameter["SOUS_UNIVERS"] = "VARCHAR (100)"
    TableParameter["NEW_REGEX"] = "TEXT"
    TableParameter["MCC_CODE"] = "VARCHAR (20)"

    InsertTableIntoDatabase(Data1,
                            "REGEX_EXCLUDED",
                            "REFERENTIEL",
                            database_type, database_name,
                            DropTable=True,
                            TableDict=TableParameter)

    del FileRegexExclu
    del Data1

    # Update MCC code for Regex excluded

    query = """
    UPDATE "REFERENTIEL"."REGEX_EXCLUDED"
    set "MCC_CODE" = T2."MCC_CODE"
    FROM "REFERENTIEL"."MCC_CATEGORIES" AS T2
    where "MCC" = T2."MCC_NAME"
    """

    engine.execute(query)

    # Regex inclusion
    FileRegexExclu = "regex_ajout.xlsx"

    Data1 = pd.read_excel(Folder + FileRegexExclu, sheet_name='Regex ajout')
    Data1 = Data1[~Data1.UNIVERS.isna()]
    TableParameter = {}
    TableParameter["Regex"] = "TEXT"
    TableParameter["UNIVERS"] = "VARCHAR (50)"
    TableParameter["SOUS_UNIVERS"] = "VARCHAR (100)"
    TableParameter["NEW_REGEX"] = "TEXT"

    InsertTableIntoDatabase(Data1,
                            "REGEX_INCLUDED",
                            "REFERENTIEL",
                            database_type, database_name,
                            DropTable=True,
                            TableDict=TableParameter)

    del FileRegexExclu
    del Data1

    engine.close()

Exemple #18

0

Afficher le fichier

def InsertToPostgre(Data, TlbName, schema, engine,  **kwargs):
    """Insert a pandas DataFrame into a database table

        Requiered Parameters
        -----------
        engine : sqlalchmey create_engine object
            Engine object  & connection to the database from sqlalchemy
        TlbName : str
            Name of the targeted table into the database
        Schema: str
            Indicate the schema where the table is stores into the database

        Optional Parameters (**kwargs)
        -----------
        logger: logger object
            logger to get logs from the running function in case
            of errors
        DropTable : Boolean -- default value False
            True if the table has to be dropped before ingestion
        SizeChunck : Integer -- default value 10000


    """

    # store the value of each optional argument
    # if an argument is missing, the default value
    # is none
    logger          = kwargs.get('logger', None)
    DropTable       = kwargs.get('TableDict', False)
    SizeChunck      = kwargs.get('SizeChunck', None)
    TableDict       = kwargs.get('TableDict', None)
    _use_credentials     = kwargs.get('_use_credentials', None)
    _use_conf = kwargs.get('_use_conf', None)
    database_type = kwargs.get('database_type', None)
    database_name = kwargs.get('database_name', None)


    _was_engine = True

    if engine is None:
        try:
            engine = connect_to_database(database_type, database_name,
                                                  _use_credentials=_use_credentials,
                                                  _use_conf=_use_conf).CreateEngine()
            _was_engine = False
        except:
            raise

    # add a default value to DropTable
    if DropTable:
        try:

            # Variables type in postgres
            if TableDict is not None:
                # Check if the table exist
                if not table_exists(engine, TlbName, schema):
                    CreateTable(engine, TlbName, schema, TableDict)

                else:
                    if DropTable:
                        metadata = MetaData()
                        TlbObject = Table(TlbName, metadata, schema=schema)
                        TlbObject.drop(engine)
                        CreateTable(engine, TlbName, schema, TableDict)
            else:
                TableDict = CreateDictionnaryType(Data)

                #Check if the table exist
                if not table_exists(engine, TlbName, schema):
                    CreateTable(engine, TlbName, schema, TableDict)
                else:
                    if DropTable:
                        metadata = MetaData()
                        TlbObject = Table(TlbName, metadata, schema=schema)
                        TlbObject.drop(engine)
                        CreateTable(engine, TlbName, schema, TableDict)

            # Insert into the table
            if SizeChunck is None:
                Data.to_sql(TlbName, con=engine, if_exists='append', schema=schema, index=False)
            else:
                Data.to_sql(TlbName, con=engine, if_exists='append', schema=schema, index=False, chunksize=SizeChunck)

        except Exception as e:
            if logger is not None:
                logger.error(e, exc_info=True)
            else:
                print(e)
    else:
        try:
            if SizeChunck is None:
                Data.to_sql(TlbName, con=engine, if_exists='append', schema=schema, index=False)
            else:
                Data.to_sql(TlbName, con=engine, if_exists='append', schema=schema, index=False, chunksize=SizeChunck)
        except Exception as e:
            if logger is not None:
                logger.error(e, exc_info=True)
            else:
                print(e)

    if not _was_engine:
        engine.close()

Exemple #19

0

Afficher le fichier

def add_new_pos_transactions(database_type, database_name, _year, _month, _day, **kwargs):



    _tlbname = kwargs.get('tlbname', "POS_TRANSACTIONS")
    _schema = kwargs.get('schema', "TRANSACTIONS")

    date_start = datetime.datetime(_year, _month, _day)
    date_start_cond = str(date_start)[0:10]
    end_date = date_start + datetime.timedelta(days=1)
    end_date = str(end_date)[0:10]

    engine = connect_to_database(database_type, database_name).CreateEngine()

    # check if the date had already treated
    query = """
    select count(*)
    from "{}"."{}"
    where "TransactionTime" >= '{}' and "TransactionTime" < '{}'
    """.format(_schema,_tlbname,str(date_start), str(end_date))

    data = pd.read_sql(query, con=engine)

    if data.iloc[0, 0] == 0:

        query = """
                        SELECT "CardHolderID","MCC","Amount","MerchantName","TransactionTime","Currency",
                        "CardVPUType", "MerchantAddress", "MerchantCity", "MerchantCountry", "MerchantID", "TransactionID",
                        CASE WHEN "TransactionTP" in ('POS International') then 1 
                        else 0
                        end as "IsPOSInternational","TransactionTP",
                        '' as "UNIVERS", '' as "SOUS_UNIVERS"
                        FROM "TRANSACTIONS_MONTHLY"."MONTHLY_TRANSACTIONS_{}"
                        where "TransactionTP" IN ('POS International','POS Domestic') 
                        and "DebitCredit" IN ('Debit') 
                        and "TransactionResult" = 'APPROVED' and "TransactionTime" >= '{}' and "TransactionTime" < '{}'
        
        """.format(str(date_start.year) + str(date_start.month), date_start_cond, end_date)

        data = pd.read_sql(query, con=engine)

        if not data.empty:

            # Get the type of each variables
            columns_type = create_dictionnary_type_from_table(engine,"POS_TRANSACTIONS")
            # Create the TMP table for POS TRANSACTIONS
            # Drop the table
            query = """
            DROP TABLE IF EXISTS "TMP_UPDATE"."TMP_POS_TRANSACTIONS"
            """

            engine.execute(query)

            CreateTable(engine, "TMP_POS_TRANSACTIONS", "TMP_UPDATE", columns_type,keep_order=True)

            # Insert into table
            InsertTableIntoDatabase(data,
                                    TlbName="TMP_POS_TRANSACTIONS",
                                    Schema="TMP_UPDATE",
                                    database_type=database_type,
                                    database_name=database_name,
                                    DropTable=False)

            tic =time.time()

            fill_univers_sous_univers(database_type, database_name, "TMP_UPDATE", "TMP_POS_TRANSACTIONS")

            print("categorisation was done in {} seconds".format(time.time() - tic))

            engine = connect_to_database(database_type, database_name).CreateEngine()

            query = """
            insert into "{}"."{}"
            select * from "TMP_UPDATE"."TMP_POS_TRANSACTIONS"
            """.format(_schema, _tlbname)

            engine.execute(query)

            # Drop the table
            query = """
            DROP TABLE IF EXISTS "TMP_UPDATE"."TMP_POS_TRANSACTIONS"
            """

            engine.execute(query)

            engine.close()
        else:
            print("Any data for this date")

    else:
        print("this data had been already treated")

Exemple #20

0

Afficher le fichier

def add_new_others_transactions(database_type, database_name, _year, _month, _day, **kwargs):

    _tlbname = kwargs.get('tlbname', "OTHER_TRANSACTIONS")
    _schema = kwargs.get('schema', "TRANSACTIONS")


    date_start = datetime.datetime(_year, _month, _day)
    date_start_cond = str(date_start)[0:10]
    end_date = date_start + datetime.timedelta(days=1)
    end_date = str(end_date)[0:10]

    engine = connect_to_database(database_type, database_name).CreateEngine()

    # check if the date had already treated
    query = """
       select count(*)
       from "{}"."{}"
       where "TransactionTime" >= '{}' and "TransactionTime" < '{}'
       """.format(_schema,_tlbname, date_start_cond, end_date)

    data = pd.read_sql(query, con=engine)

    if data.iloc[0, 0] == 0:

        querytmp = """

               select "CardHolderID","MCC","Amount","MerchantName","TransactionTP","TransactionTime","Currency",
               "CardVPUType", "MerchantAddress", "MerchantCity", "MerchantCountry", "MerchantID", "TransactionID",
               "DebitCredit",
               CASE 
                   WHEN "TransactionTP" ~* ('reversal') THEN 1
                   ELSE 0
               END AS "IsReversal",

               CASE 
                   WHEN "TransactionTP" ~* ('fee') THEN 1
                   ELSE 0
               END AS "IsFee"

               from "TRANSACTIONS_MONTHLY"."MONTHLY_TRANSACTIONS_{}"

               WHERE "TransactionTP" NOT IN (
               'ATM Domestic','ATM Domestic Fee','ATM International','ATM International Fee','BalanceInquiry fee','FX Fee',
               'Bank Payment fee','Bank Transfer Fee','Batch Load Fee','Card Fee','Card Load Fee','Card Load at Payzone Fee',
               'Card To Card Transfer Fee','Card to Card In','Cash Advance Fee','Decline Fee','Deposit To Card API Fee',
               'INTERNET DEBIT/CREDIT','IVR Fee','InternetDrCrFee','KYC Card Upgrade Fee','Monthly Fee','POS Domestic',
               'POS International','POS International Fee','Paytrail Load Fee','Post Office Fee','RefundFee','Replacement Card Fee',
               'Replacement Card In','SEPA Outgoing Payment Fee','SMS Balance Inquiry fee','SMS Fee','SMS Lock UnLock Fee',
               'Sepa Credit Fee','Sepa Incoming Payment','Sepa Incoming Payment Fee','Terminal Load','Terminal load fee',
               'Upgrade to Physical Fee','Voucher load','Voucher load fee')

               AND "TransactionTP" !~* ('auth')
               and "TransactionTime" >= '{}' and "TransactionTime" < '{}'

           """.format(str(date_start.year) + str(date_start.month), date_start_cond, end_date)

        query = """
           insert into "{}"."{}"
           {}
           """.format(_schema,_tlbname, querytmp)

        engine.execute(query)
        engine.close()

    else:
        print("this data had been already treated")

Exemple #21

0

Afficher le fichier

Fichier : add_new_card_status.py Projet : Creacard/calypso

def daily_card_status2(Data, filepath, database_type, database_name):
    #### constant variables

    # Table parameter for the temporary table
    TableParameter = {}
    TableParameter["ActivationDate"] = "timestamp without time zone"
    TableParameter["Address1"] = "TEXT"
    TableParameter["Address2"] = "TEXT"
    TableParameter["ApplicationName"] = "VARCHAR (50)"
    TableParameter["AvailableBalance"] = "double precision"
    TableParameter["BirthDate"] = "timestamp without time zone"
    TableParameter["CardHolderID"] = "VARCHAR (50)"
    TableParameter["CardStatus"] = "VARCHAR (100)"
    TableParameter["City"] = "VARCHAR (100)"
    TableParameter["Country"] = "VARCHAR (50)"
    TableParameter["CreationDate"] = "timestamp without time zone"
    TableParameter["DistributorCode"] = "INTEGER"
    TableParameter["Email"] = "TEXT"
    TableParameter["ExpirationDate"] = "timestamp without time zone"
    TableParameter["FirstName"] = "TEXT"
    TableParameter["IBAN"] = "TEXT"
    TableParameter["IsExcludedAddress"] = "INTEGER"
    TableParameter["IsRenewal"] = "INTEGER"
    TableParameter["KYC_Status"] = "VARCHAR (50)"
    TableParameter["LastName"] = "TEXT"
    TableParameter["NoMobile"] = "TEXT"
    TableParameter["PostCode"] = "VARCHAR (50)"
    TableParameter["Programme"] = "VARCHAR (50)"
    TableParameter["RenewalDate"] = "timestamp without time zone"
    TableParameter["UpdateBalanceDate"] = "timestamp without time zone"
    TableParameter["UpdateDate"] = "timestamp without time zone"

    keepcol = [
        "CardHolderID", "Email", "FirstName", "LastName", "City", "Country",
        "Card Status", "DistributorCode", "ApplicationName", "Date of Birth",
        "IBAN", "CreatedDate", "UpdatedDate", "Address1", "Address2",
        "PostCode", "KYC Status", "expirydate", "AvailableBalance", "NoMobile",
        "Programme"
    ]

    #### Step 1: Extract the data from the file and keep ony updated data
    # extract filedate
    FileName = filepath.split('/')[-1].replace(".csv", "")

    DateFile = pd.to_datetime(
        FileName.split("-")[1] + "-" + FileName.split("-")[2] + "-" +
        FileName.split("-")[3])

    # based on the file date, identify the appropriate names of columns
    if DateFile > pd.to_datetime('2019-03-12'):

        col_names = [
            "CardHolderID", "Cardnumber", "Email", "FirstName", "LastName",
            "City", "Country", "Card Status", "DistributorCode",
            "ApplicationName", "Date of Birth", "SortCodeAccNum", "IBAN",
            "CreatedDate", "UpdatedDate", "Address1", "Address2", "PostCode",
            "KYC Status", "expirydate", "AvailableBalance", "UDF2", "NoMobile",
            "Programme", "VPVR"
        ]

    elif DateFile < pd.to_datetime('2019-01-16'):

        col_names = [
            "CardHolderID", "Cardnumber", "Email", "FirstName", "LastName",
            "City", "Country", "Card Status", "DistributorCode",
            "ApplicationName", "Date of Birth", "SortCodeAccNum", "IBAN",
            "CreatedDate", "UpdatedDate", "Address1", "Address2", "PostCode",
            "KYC Status", "expirydate"
        ]

    else:

        col_names = [
            "CardHolderID", "Cardnumber", "Email", "FirstName", "LastName",
            "City", "Country", "Card Status", "DistributorCode",
            "ApplicationName", "Date of Birth", "SortCodeAccNum", "IBAN",
            "CreatedDate", "UpdatedDate", "Address1", "Address2", "PostCode",
            "KYC Status", "expirydate", "AvailableBalance", "UDF2", "NoMobile",
            "UDF3", "VPVR"
        ]

    # add the names of columns to the dataframe
    Data.columns = col_names

    # store the missing columns
    missing_columns = list(set(keepcol).difference(col_names))

    if missing_columns:  # if the list is not add new columns to the dataframe
        for col in missing_columns:
            Data[col] = None

    # keep track of available balance
    tmp_available_balance = Data[["CardHolderID", "AvailableBalance"]]
    tmp_available_balance["UpdateBalanceDate"] = datetime.datetime.now()

    # Store change values

    engine = connect_to_database("Postgres", "Creacard_Calypso").CreateEngine()
    query = """
    select distinct "CardHolderID","CardStatus","KYC_Status" from "CARD_STATUS"."STATUS_CARTES"
    """
    data_current = pd.read_sql(query, con=engine)

    data_current["CardHolderID"] = data_current["CardHolderID"].astype(str)
    data_current["KYC_Status"] = data_current["KYC_Status"].astype(str)
    data_current["CardStatus"] = data_current["CardStatus"].astype(str)

    #### Step 2: Transform the data

    # transform date columns to pd.datetime format in order to have a consistent format
    # of date over the database
    # Only transform updated date
    Data["UpdatedDate"] = pd.to_datetime(Data["UpdatedDate"],
                                         format="%b %d %Y %I:%M%p",
                                         errors='coerce')
    Data["CreatedDate"] = pd.to_datetime(Data["CreatedDate"],
                                         format="%b %d %Y %I:%M%p",
                                         errors='coerce')
    Data["Date of Birth"] = pd.to_datetime(Data["Date of Birth"],
                                           format="%b %d %Y %I:%M%p",
                                           errors='coerce')

    # transform expirydate
    Data["expirydate"] = Data["expirydate"].astype(str)
    Data["expirydate"] = "20" + Data["expirydate"].str[0:2] + "-" + Data[
        "expirydate"].str[2:] + "-01"
    Data["expirydate"] = pd.to_datetime(Data["expirydate"],
                                        format='%Y-%m-%d',
                                        errors='coerce')

    Data = Data[keepcol]

    # condition remove address
    AddressToRemove = [
        "77 OXFORD STREET LONDON", "17 RUE D ORLEANS", "TSA 51760",
        "77 Oxford Street London", "36 CARNABY STREET",
        "36 CARNABY STREET LONDON", "36 CARNABY STREET LONDON", "ADDRESS",
        "17 RUE D ORLEANS PARIS", "CreaCard Espana S L  Paseo de Gracia 59",
        "36 Carnaby Street London",
        "CREACARD SA Pl  Marcel Broodthaers 8 Box 5", "17 Rue D Orleans Paris",
        "CREACARD ESPANA S L  PASEO DE GRACIA 59", "CreaCard 17 rue d Orleans",
        "CREACARD SA PL  MARCEL BROODTHAERS 8 BOX 75",
        "CREACARD SA PL  MARCEL BROODTHAERS 8 BOX 75", "36 Carnaby Street",
        "77 OXFORD STREET"
    ]

    Data["IsExcludedAddress"] = (
        Data.Address1.isin(AddressToRemove)).astype(int)

    Data["ActivationDate"] = pd.NaT
    Data["IsRenewal"] = 0
    Data["RenewalDate"] = pd.NaT

    Data = Data[sorted(Data.columns)]

    colnames = [
        "ActivationDate", "Address1", "Address2", "ApplicationName",
        "AvailableBalance", "CardStatus", "CardHolderID", "City", "Country",
        "CreationDate", "BirthDate", "DistributorCode", "Email", "FirstName",
        "IBAN", "IsExcludedAddress", "IsRenewal", "KYC_Status", "LastName",
        "NoMobile", "PostCode", "Programme", "RenewalDate", "UpdateDate",
        "ExpirationDate"
    ]

    Data.columns = colnames

    Data["UpdateBalanceDate"] = datetime.datetime.now()

    Data = Data[sorted(Data.columns)]

    Data.loc[(Data["KYC_Status"] == '0') | (Data["KYC_Status"] == '0.0') |
             (Data["KYC_Status"] == 0), "KYC_Status"] = 'Anonyme'
    Data.loc[(Data["KYC_Status"] == '1') | (Data["KYC_Status"] == '1.0') |
             (Data["KYC_Status"] == 1), "KYC_Status"] = 'SDD'
    Data.loc[(Data["KYC_Status"] == '2') | (Data["KYC_Status"] == '2.0') |
             (Data["KYC_Status"] == 2), "KYC_Status"] = 'KYC'
    Data.loc[(Data["KYC_Status"] == '3') | (Data["KYC_Status"] == '3.0') |
             (Data["KYC_Status"] == 3), "KYC_Status"] = 'KYC LITE'

    Data["DistributorCode"] = Data["DistributorCode"].fillna(-1)
    Data["DistributorCode"] = Data["DistributorCode"].astype(int)

    Data["CardHolderID"] = Data["CardHolderID"].astype(str)
    Data["KYC_Status"] = Data["KYC_Status"].astype(str)
    Data["CardStatus"] = Data["CardStatus"].astype(str)

    # Step 1: Identify

    data_new = Data[["CardHolderID", "CardStatus", "KYC_Status"]]
    outer_join = data_current.merge(data_new, how='outer', indicator=True)
    outer_join = outer_join[outer_join["_merge"] == "right_only"]

    # set 2 : identify new cardholder ID
    new_card_holder_id = set(outer_join["CardHolderID"].unique()).difference(
        data_current["CardHolderID"].unique())

    ### set 3 : insert old values into changes table
    data_to_change = data_current[data_current["CardHolderID"].isin(
        set(outer_join.loc[
            ~outer_join["CardHolderID"].isin(new_card_holder_id),
            "CardHolderID"]))]

    FileName = filepath.split('/')[-1].replace(".csv", "")
    DateFile = pd.to_datetime(
        FileName.split("-")[1] + "-" + FileName.split("-")[2] + "-" +
        FileName.split("-")[3]) - datetime.timedelta(days=1)

    data_to_change["dt_change"] = DateFile

    InsertTableIntoDatabase(data_to_change,
                            "CHANGE_STATUS_CARTES",
                            "CARD_STATUS",
                            "Postgres",
                            "Creacard_Calypso",
                            DropTable=False)

    # find new cardholder ID update + card holder ID change

    DateFile = pd.to_datetime(
        FileName.split("-")[1] + "-" + FileName.split("-")[2] + "-" +
        FileName.split("-")[3])
    update_set = Data[(Data["UpdateDate"] >= DateFile)
                      & (Data["UpdateDate"] < DateFile + pd.Timedelta(days=1))]
    update_set = update_set.reset_index(drop=True)

    import numpy as np
    kk = pd.DataFrame(
        np.concatenate((outer_join["CardHolderID"].unique(),
                        update_set["CardHolderID"].unique()),
                       axis=0))
    kk.columns = ["CardHolderID"]
    kk = kk[kk["CardHolderID"] != '0']

    to_update = Data[Data["CardHolderID"].isin(kk["CardHolderID"].unique())]

    #### Step 3: Load these data into a temporary table

    con_postgres = connect_to_database(database_type,
                                       database_name).CreateEngine()
    query = """
    DROP TABLE IF EXISTS "TMP_UPDATE"."TMP_STATUS_CARTES"
    """
    con_postgres.execute(query)
    con_postgres.close()

    InsertTableIntoDatabase(to_update,
                            "TMP_STATUS_CARTES",
                            "TMP_UPDATE",
                            database_type,
                            database_name,
                            DropTable=True,
                            TableDict=TableParameter,
                            SizeChunck=10000)

    #### Step 5: Update new values

    query_delete = """

       DELETE FROM "CARD_STATUS"."STATUS_CARTES"
       USING "TMP_UPDATE"."TMP_STATUS_CARTES"
       WHERE 
       "CARD_STATUS"."STATUS_CARTES"."CardHolderID" = "TMP_UPDATE"."TMP_STATUS_CARTES"."CardHolderID"

       """

    con_postgres = connect_to_database(database_type,
                                       database_name).CreateEngine()
    con_postgres.execute(query_delete)
    con_postgres.close()

    query_update = """
    
    UPDATE "TMP_UPDATE"."TMP_STATUS_CARTES" 
    SET "IsRenewal" = CASE WHEN "DistributorCode" in ('203','914','915') then 1
    else 0
    end 
    """

    con_postgres = connect_to_database(database_type,
                                       database_name).CreateEngine()
    con_postgres.execute(query_update)
    con_postgres.close()

    query = """

           INSERT INTO "CARD_STATUS"."STATUS_CARTES"
           SELECT *
           FROM "TMP_UPDATE"."TMP_STATUS_CARTES" 

          """

    con_postgres = connect_to_database(database_type,
                                       database_name).CreateEngine()
    con_postgres.execute(query)
    con_postgres.close()

    # drop the temporary table
    con_postgres = connect_to_database(database_type,
                                       database_name).CreateEngine()
    query = """
          DROP TABLE IF EXISTS "TMP_UPDATE"."TMP_STATUS_CARTES"
          """
    con_postgres.execute(query)
    con_postgres.close()

    #### Step 6: Update available balance for all CHID

    tlb_param_balance = dict()
    tlb_param_balance["AvailableBalance"] = "double precision"
    tlb_param_balance["CardHolderID"] = "VARCHAR (50)"
    tlb_param_balance["UpdateBalanceDate"] = "timestamp without time zone"

    con_postgres = connect_to_database(database_type,
                                       database_name).CreateEngine()
    query = """
       DROP TABLE IF EXISTS "TMP_UPDATE"."TMP_AVAILABLE_BALANCE"
       """
    con_postgres.execute(query)
    con_postgres.close()

    InsertTableIntoDatabase(tmp_available_balance,
                            "TMP_AVAILABLE_BALANCE",
                            "TMP_UPDATE",
                            database_type,
                            database_name,
                            DropTable=True,
                            TableDict=tlb_param_balance,
                            SizeChunck=10000)

    con_postgres = connect_to_database(database_type,
                                       database_name).CreateEngine()

    query_balance = """

    UPDATE "CARD_STATUS"."STATUS_CARTES"
    SET "AvailableBalance" = T1."AvailableBalance",
    "UpdateBalanceDate" = T1."UpdateBalanceDate"
    from "TMP_UPDATE"."TMP_AVAILABLE_BALANCE" as T1
    WHERE 
    "CARD_STATUS"."STATUS_CARTES"."CardHolderID" = T1."CardHolderID"

    """
    con_postgres.execute(query_balance)
    con_postgres.close()

    con_postgres = connect_to_database(database_type,
                                       database_name).CreateEngine()
    query = """
       DROP TABLE IF EXISTS "TMP_UPDATE"."TMP_AVAILABLE_BALANCE"
       """
    con_postgres.execute(query)
    con_postgres.close()

    con_postgres = connect_to_database(database_type,
                                       database_name).CreateEngine()

    query = """

    update "CARD_STATUS"."STATUS_CARTES" as T1
    SET "ActivationDate" = "ActivationTime"
    FROM "CARD_STATUS"."ACTIVATION_REPORT" as T2
    WHERE 
    T1."CardHolderID" = T2."CardHolderID" and 
    "ActivationDate" is null 

    """

    con_postgres.execute(query)
    con_postgres.close()

Exemple #22

0

Afficher le fichier

Fichier : tmp_user_id.py Projet : Creacard/calypso_final_version

def create_tmp_id(schema, tlb, schema_main):

    if sys.platform == "win32":
        folder_json = os.path.expanduser('~') + "\\conf_python\\unique_id_conditions.json"
    else:
        folder_json = os.environ['HOME'] + "/conf_python/unique_id_conditions.json"
    with open(folder_json, 'r') as JSON:
        conditions = json.load(JSON)

    condition = conditions["exclusion_cartes"]["request"]
    condition_on_email = conditions["condition_email"]["dataframe"]

    engine = connect_to_database("Postgres", "Creacard_Calypso").CreateEngine()

    CreateSchema(engine, schema_main)

    query = """

      select "CardHolderID", "NoMobile", lower("Email") as "Email", "FirstName", "LastName", "BirthDate", "PostCode", "Address1", "Address2",
      "ActivationDate"
      from "{}"."{}"
      where {}

      """.format(schema, tlb, condition)

    data = pd.read_sql(query, con=engine)

    engine.close()
    for var in ["FirstName", "LastName", "Address1", "Address2", "PostCode", "Email"]:
        data[var] = data[var].str.encode('utf-8').astype(str)
        data.loc[data[var].isnull(), var] = ""
        data[var] = data[var].str.strip(" ")
        data[var] = data[var].str.replace(" ", "")
        data[var] = data[var].str.lower()

    data = data[~data["Email"].str.contains('.*creacard.*|.*prepaidfinancial.*|.*financial.*', regex=True)]

    data["GoodEmail"] = 1
    data.loc[data["Email"].str.contains(condition_on_email, regex=True), "GoodEmail"] = 0

    data["GoodCombinaison"] = 1
    data.loc[(data["LastName"].str.contains(conditions["condition_combinaison"]["LastName"], regex=True)) |
             (data["BirthDate"].isnull()) | (data["BirthDate"].isin(conditions["condition_combinaison"]["BirthDate"].split(","))), "GoodCombinaison"] = 0

    # Delete leading "00" at the start of string.

    data["NoMobile"] = data["NoMobile"].str.replace("^00", "", regex=True)

    # replace .0 at the end$

    data["NoMobile"] = data["NoMobile"].str.replace("\.0$", "", regex=True)

    # delete only literal '|' from string

    data["NoMobile"] = data["NoMobile"].str.replace("\|", "", regex=True)

    query = """
 
    DROP TABLE IF EXISTS "CUSTOMERS"."TMP_USER_ID" CASCADE

    """

    engine = connect_to_database("Postgres", "Creacard_Calypso").CreateEngine()
    engine.execute(query)
    engine.close()

    query = """
    
    
    CREATE TABLE "{}"."TMP_USER_ID"(
    
        "CardHolderID" VARCHAR(50),
        "NoMobile" TEXT,
        "Email" TEXT,
        "FirstName" TEXT,
        "LastName" TEXT,
        "BirthDate" TEXT,
        "PostCode" TEXT,
        "Address1" TEXT, 
        "Address2" TEXT,
        "ActivationDate" timestamp without time zone,
        "GoodEmail" INTEGER,
        "GoodCombinaison" INTEGER
    )
    
    """.format(schema_main)

    engine = connect_to_database("Postgres", "Creacard_Calypso").CreateEngine()
    engine.execute(query)
    engine.close()

    data = data[~data["NoMobile"].isnull()]

    InsertTableIntoDatabase(data, TlbName="TMP_USER_ID", Schema=schema_main,
                            database_name="Creacard_Calypso",
                            database_type="Postgres",
                            DropTable=False,
                            InstertInParrell=False)