예제 #1
0
파일: utils.py 프로젝트: srijagurijala/dbms
def load_order(conn, filename):
    # this function writes to the `account_order` table
    # "order_id";"account_id";"bank_to";"account_to";"amount";"k_symbol"
    """
CREATE TABLE account_order (
    order_id number NOT NULL,
    account_id number NOT NULL,
    bank_to char(2) NOT NULL,
    account_to number NOT NULL,
    amount decimal(10,2) NOT NULL,
    category char(3) NOT NULL,
PRIMARY KEY (order_id),
FOREIGN KEY(account_id) REFERENCES account (account_id),
FOREIGN KEY(account_to) REFERENCES account (account_id)
);
"""
    df = pd.read_csv(filename, sep=';', low_memory=False, nrows=NROWS)
    cols = {'k_symbol': 'category'}
    df.rename(columns=cols, inplace=True)
    log.info("account_order columns: {}".format(df.columns))

    # Apply english appreviations
    #   'POJISTNE'  - INS => stands for Insurance Payment
    #   'SIPO'      - HSE => stands for Household Payment
    #   'LEASING'   - LSE => stands for Leasing Payment
    #   'UVER'      - LOA => stands for Loan Payment
    #   ' '         - UNK => stands for Unknown
    df['category'].replace({'POJISTNE': 'INS'}, inplace=True)
    df['category'].replace({'SIPO': 'HSE'}, inplace=True)
    df['category'].replace({'LEASING': 'LSN'}, inplace=True)
    df['category'].replace({'UVER': 'LOA'}, inplace=True)
    df['category'].replace({' ': 'UNK'}, inplace=True)

    pprint(df)
    log.info('Starting data import for: {} ({} rows)'.format(
        filename, len(df)))

    # dtype : dict of column name to SQL type, default None
    dtype = {
        'order_id': sqlt.Integer,
        'account_id': sqlt.Integer,
        'bank_to': sqlt.NCHAR(2),
        'account_to': sqlt.Integer,
        'amount': sqlt.Numeric,
        'category': sqlt.NCHAR(2),
    }
    df.to_sql('account_order',
              con=conn,
              if_exists='append',
              index=False,
              dtype=dtype)
    log.info('Finished data import for: {}'.format(filename))
예제 #2
0
    def test_reflect_nvarchar(self):
        metadata = self.metadata
        Table(
            "tnv",
            metadata,
            Column("nv_data", sqltypes.NVARCHAR(255)),
            Column("c_data", sqltypes.NCHAR(20)),
        )
        metadata.create_all()
        m2 = MetaData(testing.db)
        t2 = Table("tnv", m2, autoload=True)
        assert isinstance(t2.c.nv_data.type, sqltypes.NVARCHAR)
        assert isinstance(t2.c.c_data.type, sqltypes.NCHAR)

        if testing.against("oracle+cx_oracle"):
            assert isinstance(
                t2.c.nv_data.type.dialect_impl(testing.db.dialect),
                cx_oracle._OracleUnicodeStringNCHAR,
            )

            assert isinstance(
                t2.c.c_data.type.dialect_impl(testing.db.dialect),
                cx_oracle._OracleNChar,
            )

        data = u("m’a réveillé.")
        with testing.db.connect() as conn:
            conn.execute(t2.insert(), dict(nv_data=data, c_data=data))
            nv_data, c_data = conn.execute(t2.select()).first()
            eq_(nv_data, data)
            eq_(c_data, data + (" " * 7))  # char is space padded
            assert isinstance(nv_data, util.text_type)
            assert isinstance(c_data, util.text_type)
예제 #3
0
    def test_reflect_nvarchar(self, metadata, connection):
        Table(
            "tnv",
            metadata,
            Column("nv_data", sqltypes.NVARCHAR(255)),
            Column("c_data", sqltypes.NCHAR(20)),
        )
        metadata.create_all(connection)
        m2 = MetaData()
        t2 = Table("tnv", m2, autoload_with=connection)
        assert isinstance(t2.c.nv_data.type, sqltypes.NVARCHAR)
        assert isinstance(t2.c.c_data.type, sqltypes.NCHAR)

        if testing.against("oracle+cx_oracle"):
            assert isinstance(
                t2.c.nv_data.type.dialect_impl(connection.dialect),
                cx_oracle._OracleUnicodeStringNCHAR,
            )

            assert isinstance(
                t2.c.c_data.type.dialect_impl(connection.dialect),
                cx_oracle._OracleNChar,
            )

        data = "m’a réveillé."
        connection.execute(t2.insert(), dict(nv_data=data, c_data=data))
        nv_data, c_data = connection.execute(t2.select()).first()
        eq_(nv_data, data)
        eq_(c_data, data + (" " * 7))  # char is space padded
        assert isinstance(nv_data, str)
        assert isinstance(c_data, str)
예제 #4
0
파일: utils.py 프로젝트: srijagurijala/dbms
def load_account(conn, filename):
    df = pd.read_csv(filename, sep=';', low_memory=False, nrows=NROWS)
    df['date'] = pd.to_datetime(df['date'], format="%y%m%d")
    df.rename(columns={"date": "created_date"}, inplace=True)
    df.sort_values(by=['account_id'], inplace=True)

    log.info("columns: {}".format(df.columns))
    """
    Replace `frequency` strings with abbreviations

    - POPLATEK MESICNE changed to MONTHLY ISSUANCE (MO)
    - POPLATEK TYDNE changed to WEEKLY ISSUANCE (WE)
    - POPLATEK PO OBRATU change to ISSUANCE AFTER TRANSACTION (AT)
    """

    df['frequency'].replace({'POPLATEK MESICNE': 'MO'}, inplace=True)
    df['frequency'].replace({'POPLATEK TYDNE': 'WE'}, inplace=True)
    df['frequency'].replace({'POPLATEK PO OBRATU': 'AT'}, inplace=True)

    log.info('Starting data import for: {} ({} rows)'.format(
        filename, len(df)))
    """
CREATE TABLE account (
    account_id number NOT NULL,
    district_id number NOT NULL,
    frequency char(2) NOT NULL,
    created_date date NOT NULL,
PRIMARY KEY (account_id),
FOREIGN KEY(district_id) REFERENCES district (district_id) INITIALLY DEFERRED DEFERRABLE
) ;
CREATE INDEX created_date ON account(created_date);
    """
    dtype = {
        'account_id': sqlt.Integer,
        'district_id': sqlt.Integer,
        'frequency': sqlt.NCHAR(2),
        'created_date': sqlt.Date,
    }
    df.to_sql('account',
              con=conn,
              if_exists='append',
              index=False,
              dtype=dtype)
    # df.to_sql('account', con=conn, if_exists='append', index=False)
    log.info('Finished data import for: {}'.format(filename))

    data = conn.execute("SELECT * FROM account WHERE ROWNUM <= 10").fetchall()
    pprint(data)
예제 #5
0
파일: utils.py 프로젝트: srijagurijala/dbms
def load_loan(conn, filename):
    """
CREATE TABLE loan (
    loan_id    number         NOT NULL,
    account_id number         NOT NULL,
    loan_date  date           NOT NULL,
    amount    decimal(10, 2) NOT NULL,
    duration   number,
    payments   decimal(10, 2) NOT NULL,
    status     char(1)        NOT NULL,
PRIMARY KEY (loan_id),
FOREIGN KEY(account_id) REFERENCES account (account_id)
);
"""
    df = pd.read_csv(filename, sep=';', low_memory=False, nrows=NROWS)
    print("== loan df size: {}".format(len(df)))

    cols = {
        'date': 'loan_date',
    }
    df.rename(columns=cols, inplace=True)
    log.info("{} columns: {}".format(filename, df.columns))
    df['loan_date'] = pd.to_datetime(df['loan_date'], format="%y%m%d")

    dtype = {
        'loan_id': sqlt.Integer,
        'account_id': sqlt.Integer,
        'loan_date': sqlt.Date,
        'amount': sqlt.Numeric(10, 2),
        'duration': sqlt.Integer,
        'payments': sqlt.Numeric(10, 2),
        'status': sqlt.NCHAR(1),
    }

    log.info('Starting data import for: {} ({} rows)'.format(
        filename, len(df)))
    df.to_sql('loan', con=conn, if_exists='append', index=False, dtype=dtype)
    log.info('Finished data import for: {}'.format(filename))
예제 #6
0
파일: utils.py 프로젝트: srijagurijala/dbms
def load_transaction(conn, filename):
    """
"trans_id";"account_id";"date";"type";"operation";"amount";"balance";"k_symbol";"bank";"account"

CREATE TABLE transaction (
    trans_id number NOT NULL,
    account_id number NOT NULL,
    created_date date NOT NULL,
    trans_type char(2) NOT NULL, -- CR+=credit, DB=-debit
    operation char(3) NOT NULL,
    amount decimal(10,2) NOT NULL,
    balance decimal(10,2) NOT NULL,
    category char(3) NOT NULL,
    bank_to char(2),
    account_to number,
PRIMARY KEY (trans_id),
FOREIGN KEY(account_id) REFERENCES account (account_id)
);

"""
    # df = pd.read_csv(filename, sep=';', low_memory=False, nrows=NROWS)
    df = pd.read_csv(filename, sep=';', low_memory=False)
    print("== df size: {}".format(len(df)))
    cols = {
        'date': 'created_date',
        'type': 'trans_type',
        'k_symbol': 'category',
        'bank': 'bank_to',
        'account': 'account_to',
    }

    df.rename(columns=cols, inplace=True)
    log.info("{} columns: {}".format(filename, df.columns))

    df['created_date'] = pd.to_datetime(df['created_date'], format="%y%m%d")
    df['trans_type'].replace({'PRIJEM': 'CR'}, inplace=True)  # credit
    df['trans_type'].replace({'VYDAJ': 'DB'}, inplace=True)  # debit
    df['trans_type'].replace({'VYBER': 'DB'}, inplace=True)  # debit

    # operation
    #   PREVOD NA UCET   208283 - remittance to another bank (REM)
    #   PREVOD Z UCTU     65226 - collection from another bank (COL)
    #   VKLAD            156743 - credit in cash (CRE)
    #   VYBER            434918 - withdrawal in cash (WCA)
    #   VYBER KARTOU       8036 - credit card withdrawal (WCC)
    df['operation'].replace({'PREVOD NA UCET': 'REM'}, inplace=True)
    df['operation'].replace({'PREVOD Z UCTU': 'COL'}, inplace=True)
    df['operation'].replace({'VKLAD': 'CRE'}, inplace=True)
    df['operation'].replace({'VYBER': 'WCA'}, inplace=True)
    df['operation'].replace({'VYBER KARTOU': 'WCC'}, inplace=True)

    # category
    #  DUCHOD         30338 - old-age pension
    #  POJISTNE       18500 - insurance payment
    #  SANKC. UROK     1577 - sanction interest if negative balance
    #  SIPO          118065 - household
    #  SLUZBY        155832 - payment for statement
    #  UROK          183114 - interest credited
    #  UVER           13580 -  loan payment
    df['category'].replace({'DUCHOD': 'PEN'}, inplace=True)  # pension
    df['category'].replace({'POJISTNE': 'INS'}, inplace=True)  # insurance
    df['category'].replace({'SANKC. UROK': 'INB'},
                           inplace=True)  # interest negative balance
    df['category'].replace({'SIPO': 'HSE'}, inplace=True)  # household
    df['category'].replace({'SLUZBY': 'PST'},
                           inplace=True)  # payment for statement
    df['category'].replace({'UROK': 'INC'}, inplace=True)  # interest credited
    df['category'].replace({'UVER': 'LOA'}, inplace=True)  # loan payment

    # print("operation: {}".format(df.groupby('operation').size()))
    # print("category: {}".format(df.groupby('category').size()))

    df['category'] = df['category'].replace(np.nan, 'UNK')
    df['category'] = df['category'].replace(' ', 'UNK')
    df['category'] = df['category'].replace('', 'UNK')
    df['operation'] = df['operation'].replace(np.nan, 'UNK')
    df['operation'] = df['operation'].replace(' ', 'UNK')
    df['operation'] = df['operation'].replace('', 'UNK')

    print("trans_type: {}".format(df.groupby('trans_type').size()))
    print("operation: {}".format(df.groupby('operation').size()))
    print("category: {}".format(df.groupby('category').size()))

    dtype = {
        'trans_id': sqlt.Integer,
        'account_id': sqlt.Integer,
        'create_date': sqlt.Date,
        'trans_type': sqlt.NCHAR(2),
        'operation': sqlt.NCHAR(3),
        'amount': sqlt.Numeric(10, 2),
        'balance': sqlt.Numeric(10, 2),
        'category': sqlt.NCHAR(3),
        'bank_to': sqlt.NCHAR(2),
        'account_to': sqlt.Integer,
    }

    log.info('Starting data import for: {} ({} rows)'.format(
        filename, len(df)))
    # df2 = df.iloc[269000:270000]
    # print("== df2 size: {}".format(len(df2)))
    # df2.to_sql('transaction2', con=conn, if_exists='append', index=False, dtype=dtype, chunksize=200)
    df.to_sql('transaction',
              con=conn,
              if_exists='append',
              index=False,
              dtype=dtype,
              chunksize=200)
    log.info('Finished data import for: {}'.format(filename))