예제 #1
0
    def get_data(self, q=None, nrow=0, table=True, **kwargs):
        '''
        note q can be a table name
        Ref: 
           http://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#io-sql
        Options:
          **kargs can have the pandas options
          parse_dates=['Date'] - explicitly force columns to be parsed as dates
          parse_dates={'Date': '%Y-%m-%d'}
          index_col='id' - the name of the column as the DataFrame index
          columns=['Col_1', 'Col_2'] -  specify a subset of columns to be read
        '''
        if q is None:
            if self.cursorisopen:
                self.get_data_from_cursor(nrow=nrow)
            else:
                print('WARNING: Cursor is not open and no query is passed.')
                res = []
        else:
            #get data by doing database query
            if table:
                res = psql.read_sql(q, self.engine, **kwargs)
            else:
                res = psql.read_sql_table(q, self.engine, **kwargs)
            #cur = self.get_cursor(q)
            #res = self.get_data_from_cursor(cur=cur, nrow=nrow)
            #cur.close()

        return res
예제 #2
0
    def test_default_date_load(self):
        df = sql.read_sql_table("types_test_data", self.conn)

        # IMPORTANT - sqlite has no native date type, so shouldn't parse, but
        # MySQL SHOULD be converted.
        self.assertTrue(issubclass(df.DateCol.dtype.type, np.datetime64),
                        "DateCol loaded with incorrect type")
예제 #3
0
    def test_default_date_load(self):
        df = sql.read_sql_table("types_test_data", self.conn)

        # IMPORTANT - sqlite has no native date type, so shouldn't parse, but
        # MySQL SHOULD be converted.
        self.assertTrue(issubclass(df.DateCol.dtype.type, np.datetime64),
                        "DateCol loaded with incorrect type")
예제 #4
0
    def test_read_table_columns(self):
        # test columns argument in read_table
        sql.to_sql(self.test_frame1, 'test_frame', self.conn)

        cols = ['A', 'B']
        result = sql.read_sql_table('test_frame', self.conn, columns=cols)
        self.assertEqual(result.columns.tolist(), cols,
                         "Columns not correctly selected")
예제 #5
0
    def test_read_table_columns(self):
        # test columns argument in read_table
        sql.to_sql(self.test_frame1, 'test_frame', self.conn)

        cols = ['A', 'B']
        result = sql.read_sql_table('test_frame', self.conn, columns=cols)
        self.assertEqual(result.columns.tolist(), cols,
                         "Columns not correctly selected")
예제 #6
0
    def test_date_parsing(self):
        # No Parsing
        df = sql.read_sql_table("types_test_data", self.conn)

        df = sql.read_sql_table("types_test_data", self.conn,
                                parse_dates=['DateCol'])
        self.assertTrue(issubclass(df.DateCol.dtype.type, np.datetime64),
                        "DateCol loaded with incorrect type")

        df = sql.read_sql_table("types_test_data", self.conn,
                                parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'})
        self.assertTrue(issubclass(df.DateCol.dtype.type, np.datetime64),
                        "DateCol loaded with incorrect type")

        df = sql.read_sql_table("types_test_data", self.conn, parse_dates={
                            'DateCol': {'format': '%Y-%m-%d %H:%M:%S'}})
        self.assertTrue(issubclass(df.DateCol.dtype.type, np.datetime64),
                        "IntDateCol loaded with incorrect type")

        df = sql.read_sql_table(
            "types_test_data", self.conn, parse_dates=['IntDateCol'])
        self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64),
                        "IntDateCol loaded with incorrect type")

        df = sql.read_sql_table(
            "types_test_data", self.conn, parse_dates={'IntDateCol': 's'})
        self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64),
                        "IntDateCol loaded with incorrect type")

        df = sql.read_sql_table(
            "types_test_data", self.conn, parse_dates={'IntDateCol': {'unit': 's'}})
        self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64),
                        "IntDateCol loaded with incorrect type")
예제 #7
0
    def test_read_table_index_col(self):
        # test columns argument in read_table
        sql.to_sql(self.test_frame1, 'test_frame', self.conn)

        result = sql.read_sql_table('test_frame', self.conn, index_col="index")
        self.assertEqual(result.index.names, ["index"],
                         "index_col not correctly set")

        result = sql.read_sql_table('test_frame', self.conn, index_col=["A", "B"])
        self.assertEqual(result.index.names, ["A", "B"],
                         "index_col not correctly set")

        result = sql.read_sql_table('test_frame', self.conn, index_col=["A", "B"],
                                columns=["C", "D"])
        self.assertEqual(result.index.names, ["A", "B"],
                         "index_col not correctly set")
        self.assertEqual(result.columns.tolist(), ["C", "D"],
                         "columns not set correctly whith index_col")
예제 #8
0
    def test_read_sql_delegate(self):
        iris_frame1 = sql.read_sql_query("SELECT * FROM iris", self.conn)
        iris_frame2 = sql.read_sql("SELECT * FROM iris", self.conn)
        tm.assert_frame_equal(
            iris_frame1, iris_frame2,
            "read_sql and read_sql_query have not the same"
            " result with a query")

        iris_frame1 = sql.read_sql_table('iris', self.conn)
        iris_frame2 = sql.read_sql('iris', self.conn)
        tm.assert_frame_equal(iris_frame1, iris_frame2)
예제 #9
0
    def test_mixed_dtype_insert(self):
        # see GH6509
        s1 = Series(2**25 + 1,dtype=np.int32)
        s2 = Series(0.0,dtype=np.float32)
        df = DataFrame({'s1': s1, 's2': s2})

        # write and read again
        df.to_sql("test_read_write", self.conn, index=False)
        df2 = sql.read_sql_table("test_read_write", self.conn)

        tm.assert_frame_equal(df, df2, check_dtype=False, check_exact=True)
예제 #10
0
    def test_mixed_dtype_insert(self):
        # see GH6509
        s1 = Series(2**25 + 1, dtype=np.int32)
        s2 = Series(0.0, dtype=np.float32)
        df = DataFrame({'s1': s1, 's2': s2})

        # write and read again
        df.to_sql("test_read_write", self.conn, index=False)
        df2 = sql.read_sql_table("test_read_write", self.conn)

        tm.assert_frame_equal(df, df2, check_dtype=False, check_exact=True)
예제 #11
0
    def test_read_sql_delegate(self):
        iris_frame1 = sql.read_sql_query(
            "SELECT * FROM iris", self.conn)
        iris_frame2 = sql.read_sql(
            "SELECT * FROM iris", self.conn)
        tm.assert_frame_equal(iris_frame1, iris_frame2,
                              "read_sql and read_sql_query have not the same"
                              " result with a query")

        iris_frame1 = sql.read_sql_table('iris', self.conn)
        iris_frame2 = sql.read_sql('iris', self.conn)
        tm.assert_frame_equal(iris_frame1, iris_frame2)
예제 #12
0
    def test_read_table_index_col(self):
        # test columns argument in read_table
        sql.to_sql(self.test_frame1, 'test_frame', self.conn)

        result = sql.read_sql_table('test_frame', self.conn, index_col="index")
        self.assertEqual(result.index.names, ["index"],
                         "index_col not correctly set")

        result = sql.read_sql_table('test_frame',
                                    self.conn,
                                    index_col=["A", "B"])
        self.assertEqual(result.index.names, ["A", "B"],
                         "index_col not correctly set")

        result = sql.read_sql_table('test_frame',
                                    self.conn,
                                    index_col=["A", "B"],
                                    columns=["C", "D"])
        self.assertEqual(result.index.names, ["A", "B"],
                         "index_col not correctly set")
        self.assertEqual(result.columns.tolist(), ["C", "D"],
                         "columns not set correctly whith index_col")
예제 #13
0
    def test_default_type_conversion(self):
        df = sql.read_sql_table("types_test_data", self.conn)

        self.assertTrue(issubclass(df.FloatCol.dtype.type, np.floating),
                        "FloatCol loaded with incorrect type")
        self.assertTrue(issubclass(df.IntCol.dtype.type, np.integer),
                        "IntCol loaded with incorrect type")
        self.assertTrue(issubclass(df.BoolCol.dtype.type, np.bool_),
                        "BoolCol loaded with incorrect type")

        # Int column with NA values stays as float
        self.assertTrue(issubclass(df.IntColWithNull.dtype.type, np.floating),
                        "IntColWithNull loaded with incorrect type")
        # Bool column with NA values becomes object
        self.assertTrue(issubclass(df.BoolColWithNull.dtype.type, np.object),
                        "BoolColWithNull loaded with incorrect type")
예제 #14
0
    def test_default_type_conversion(self):
        df = sql.read_sql_table("types_test_data", self.conn)

        self.assertTrue(issubclass(df.FloatCol.dtype.type, np.floating),
                        "FloatCol loaded with incorrect type")
        self.assertTrue(issubclass(df.IntCol.dtype.type, np.integer),
                        "IntCol loaded with incorrect type")
        self.assertTrue(issubclass(df.BoolCol.dtype.type, np.bool_),
                        "BoolCol loaded with incorrect type")

        # Int column with NA values stays as float
        self.assertTrue(issubclass(df.IntColWithNull.dtype.type, np.floating),
                        "IntColWithNull loaded with incorrect type")
        # Bool column with NA values becomes object
        self.assertTrue(issubclass(df.BoolColWithNull.dtype.type, np.object),
                        "BoolColWithNull loaded with incorrect type")
예제 #15
0
    def test_default_type_conversion(self):
        df = sql.read_sql_table("types_test_data", self.conn)

        self.assertTrue(issubclass(df.FloatCol.dtype.type, np.floating),
                        "FloatCol loaded with incorrect type")
        self.assertTrue(issubclass(df.IntCol.dtype.type, np.integer),
                        "IntCol loaded with incorrect type")
        # MySQL has no real BOOL type (it's an alias for TINYINT)
        self.assertTrue(issubclass(df.BoolCol.dtype.type, np.integer),
                        "BoolCol loaded with incorrect type")

        # Int column with NA values stays as float
        self.assertTrue(issubclass(df.IntColWithNull.dtype.type, np.floating),
                        "IntColWithNull loaded with incorrect type")
        # Bool column with NA = int column with NA values => becomes float
        self.assertTrue(issubclass(df.BoolColWithNull.dtype.type, np.floating),
                        "BoolColWithNull loaded with incorrect type")
예제 #16
0
    def test_default_type_conversion(self):
        df = sql.read_sql_table("types_test_data", self.conn)

        self.assertTrue(issubclass(df.FloatCol.dtype.type, np.floating),
                        "FloatCol loaded with incorrect type")
        self.assertTrue(issubclass(df.IntCol.dtype.type, np.integer),
                        "IntCol loaded with incorrect type")
        # MySQL has no real BOOL type (it's an alias for TINYINT)
        self.assertTrue(issubclass(df.BoolCol.dtype.type, np.integer),
                        "BoolCol loaded with incorrect type")

        # Int column with NA values stays as float
        self.assertTrue(issubclass(df.IntColWithNull.dtype.type, np.floating),
                        "IntColWithNull loaded with incorrect type")
        # Bool column with NA = int column with NA values => becomes float
        self.assertTrue(issubclass(df.BoolColWithNull.dtype.type, np.floating),
                        "BoolColWithNull loaded with incorrect type")
예제 #17
0
    def test_default_type_conversion(self):
        df = sql.read_sql_table("types_test_data", self.conn)

        self.assertTrue(issubclass(df.FloatCol.dtype.type, np.floating),
                        "FloatCol loaded with incorrect type")
        self.assertTrue(issubclass(df.IntCol.dtype.type, np.integer),
                        "IntCol loaded with incorrect type")
        # sqlite has no boolean type, so integer type is returned
        self.assertTrue(issubclass(df.BoolCol.dtype.type, np.integer),
                        "BoolCol loaded with incorrect type")

        # Int column with NA values stays as float
        self.assertTrue(issubclass(df.IntColWithNull.dtype.type, np.floating),
                        "IntColWithNull loaded with incorrect type")
        # Non-native Bool column with NA values stays as float
        self.assertTrue(issubclass(df.BoolColWithNull.dtype.type, np.floating),
                        "BoolColWithNull loaded with incorrect type")
예제 #18
0
    def test_default_type_conversion(self):
        df = sql.read_sql_table("types_test_data", self.conn)

        self.assertTrue(issubclass(df.FloatCol.dtype.type, np.floating),
                        "FloatCol loaded with incorrect type")
        self.assertTrue(issubclass(df.IntCol.dtype.type, np.integer),
                        "IntCol loaded with incorrect type")
        # sqlite has no boolean type, so integer type is returned
        self.assertTrue(issubclass(df.BoolCol.dtype.type, np.integer),
                        "BoolCol loaded with incorrect type")

        # Int column with NA values stays as float
        self.assertTrue(issubclass(df.IntColWithNull.dtype.type, np.floating),
                        "IntColWithNull loaded with incorrect type")
        # Non-native Bool column with NA values stays as float
        self.assertTrue(issubclass(df.BoolColWithNull.dtype.type, np.floating),
                        "BoolColWithNull loaded with incorrect type")
예제 #19
0
    def test_date_parsing(self):
        # No Parsing
        df = sql.read_sql_table("types_test_data", self.conn)

        df = sql.read_sql_table("types_test_data",
                                self.conn,
                                parse_dates=['DateCol'])
        self.assertTrue(issubclass(df.DateCol.dtype.type, np.datetime64),
                        "DateCol loaded with incorrect type")

        df = sql.read_sql_table("types_test_data",
                                self.conn,
                                parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'})
        self.assertTrue(issubclass(df.DateCol.dtype.type, np.datetime64),
                        "DateCol loaded with incorrect type")

        df = sql.read_sql_table(
            "types_test_data",
            self.conn,
            parse_dates={'DateCol': {
                'format': '%Y-%m-%d %H:%M:%S'
            }})
        self.assertTrue(issubclass(df.DateCol.dtype.type, np.datetime64),
                        "IntDateCol loaded with incorrect type")

        df = sql.read_sql_table("types_test_data",
                                self.conn,
                                parse_dates=['IntDateCol'])
        self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64),
                        "IntDateCol loaded with incorrect type")

        df = sql.read_sql_table("types_test_data",
                                self.conn,
                                parse_dates={'IntDateCol': 's'})
        self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64),
                        "IntDateCol loaded with incorrect type")

        df = sql.read_sql_table("types_test_data",
                                self.conn,
                                parse_dates={'IntDateCol': {
                                    'unit': 's'
                                }})
        self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64),
                        "IntDateCol loaded with incorrect type")
예제 #20
0
    def migrate_db_to_rds(self, tb_name):
        src_conn = self.engine_transform_data.connect()

        dst_conn = self.engine_source.connect()
        try:
            df = sqlio.read_sql_table(tb_name, src_conn, schema='public')

            text_field = self.check_type(tb_name)

            df.to_sql(tb_name,
                      dst_conn,
                      schema='public',
                      index=False,
                      if_exists='append',
                      method='multi',
                      dtype=text_field)

        except psycopg2.OperationalError:
            print('OperationalError!!!!')
            pass

        finally:
            self.engine_transform_data.dispose()
            self.engine_source.dispose()
def encrypt_sql_database_values(base_db_name='', base_db_user='', base_db_password='',
                                enc_db_name='', enc_db_user='', enc_db_password=''):
    """
    Encryption of database to allow posting and using on AWS server for personal portfolio.
    This database protects Hyundai's private emission report data, while still allowing demo to 
    visualize similar trends in data.
    Two types of encryption:
        - Encrypt text fields by creating a dictionary to randomly shuffle letters/numbers for each value.
        - Encrypt numbers by selecting some factor based vehicle name.
    :param base_db_name: name of database to be encrypted
    :param base_db_user: name of user for database
    :param base_db_password: password for database user
    :param enc_db_name: Name of new database containing encrypted values
    :param enc_db_user: 
    :param enc_db_password: 
    :return: 
    """

    # Step 0: Import required libraries
    import psycopg2
    import sqlalchemy
    import numpy as np
    import pandas as pd
    import pandas.io.sql as pdsql
    from random import uniform as rand_unf

    # Step 1: Connect to database.
    connection_str = "{0}:{1}@localhost/{2}".format(base_db_user, base_db_password, base_db_name.replace("\\",""))
    engine = sqlalchemy.create_engine("postgresql+psycopg2://"+connection_str)

    # Step 2: Load in data as pandas dataframes to minimize calls to SQL Database
    bag_em = pdsql.read_sql_table('bag_em', engine)
    dynamometer = pdsql.read_sql_table('dynamometer', engine)
    fuel = pdsql.read_sql_table('fuel', engine)
    fuel_economy = pdsql.read_sql_table('fuel_economy', engine)
    general = pdsql.read_sql_table('general', engine)
    pm = pdsql.read_sql_table('pm', engine)
    test_info = pdsql.read_sql_table('test_info', engine)
    time_info = pdsql.read_sql_table('time_info', engine)
    vehicle = pdsql.read_sql_table('vehicle', engine)
    j2951 = pdsql.read_sql_table('j2951', engine)



    # Step 3: Create link of test_id to vehicle
    id_veh_map = {}
    for test, row in vehicle.iterrows():
        id_veh_map[row['_id']] = row['vin']

    # Step 4: Loop through bag emissions and create list of factors for each
    # print bag_em.head()
    veh_em_offset = {}
    for key, veh_name in id_veh_map.iteritems():
        if veh_name not in veh_em_offset.keys():
            veh_em_offset[veh_name] = rand_unf(0.6, 1.4)
    for col_name in bag_em.columns:
        if col_name not in ['_id', 'phase', 'emission_type']:
            bag_em[col_name] = bag_em[col_name] * \
                                         bag_em['_id'].apply(lambda x: veh_em_offset[id_veh_map[x]])
    # print bag_em.head()

    # Step 5: Create new random numbers for dynamometer coefficients
    # (diff random number for inertia and other coefficients)
    # print dynamometer.head()
    veh_dyno_offset = {}
    for key in veh_em_offset.keys():
        veh_dyno_offset[key] = {}
        veh_dyno_offset[key]['inertia'] = rand_unf(0.6, 1.4)
        veh_dyno_offset[key]['coef'] = rand_unf(0.7,1.3)
    for col_name in dynamometer.columns:
        if col_name == 'inertia':
            dynamometer[col_name] = dynamometer[col_name] * \
                dynamometer['_id'].apply(lambda x: veh_dyno_offset[id_veh_map[x]]['inertia'])
        elif col_name not in ['_id', 'inertia']:
            dynamometer[col_name] = dynamometer[col_name] * \
                dynamometer['_id'].apply(lambda x: veh_dyno_offset[id_veh_map[x]]['coef'])
    # print dynamometer.head()


    # Step 6: Randomize Fuel Numbers (multiply entire table by the same random number
    # print fuel.head()
    fuel_name_map = {}
    i = 1
    for fuel_name in fuel['fuel'].unique():
        fuel_name_map[fuel_name] = 'Fuel #'+str(i)
        i+=1
    # print fuel_name_map
    fuel['fuel'] = fuel['fuel'].apply(lambda x: fuel_name_map[x])
    for col_name in fuel.columns:
        if col_name not in ['_id', 'fuel', 'r_f']:
            fuel_factor = rand_unf(0.7, 1.4)
            fuel[col_name] = fuel[col_name] * fuel_factor
    # print fuel.head()


    # Step 7: Randomize general info:
    #   - modify nmog_ratio, hcho_ratio, and driver/operator/requestor names
    #   - remove test_options, shift_list, event_list, and remarks
    # print general.head()

    general['nmog_ratio'] = general['nmog_ratio'] * rand_unf(0.7,1.3)
    general['hcho_ratio'] = general['hcho_ratio'] * rand_unf(0.7,1.3)
    driver_map = {}
    requestor_map = {}
    i = 1
    for driver in sorted(general['driver'].unique()):
        driver_map[driver] = 'Technician #'+str(i)
        i+=1
    for operator in sorted(general['operator'].unique()):
        if operator not in driver_map.keys():
            driver_map[operator] = 'Technician #' + str(i)
            i += 1
    # print driver_map
    i = 1
    for requestor in general['requestor'].unique():
        requestor_map[requestor] = 'Engineer #' + str(i)
        i += 1
    # print requestor_map
    general['driver'] = general['driver'].apply(lambda x: driver_map[x])
    general['operator'] = general['operator'].apply(lambda x: driver_map[x])
    general['requestor'] = general['requestor'].apply(lambda x: requestor_map[x])

    general['test_description'] = ''
    general['event_list'] = ''
    general['remarks_pre'] = ''
    general['shift_list'] = ''
    general['remarks_post'] = ''
    general['test_options'] = ''

    # print general.head()


    # Step 8: Randomize pm data (mass_emission and mass_per_distance - select random factor by vehicle)
    for col_name in pm.columns:
        if col_name not in ['_id', 'phase']:
            pm[col_name] = pm[col_name] * \
                           pm['_id'].apply(lambda x: veh_em_offset[id_veh_map[x]])


    # Step 9: Randomize vehicle info (all of it, except for Odometer and tire pressure)
    vehicle['axle_ratio'] = vehicle['axle_ratio'].apply(lambda x: np.nan)
    vehicle['curb_weight'] = dynamometer['inertia']
    vehicle['engine_catalyst'] = None
    vehicle['engine_code'] = None
    engine_desc_map = {}
    i = 1
    for desc in vehicle['engine_description'].unique():
        engine_desc_map[desc] = "Engine Type #"+str(i)
        i += 1
    vehicle['engine_description'] = vehicle['engine_description'].apply(lambda x: engine_desc_map[x])
    rand_factor = rand_unf(0.7,1.3)
    vehicle['engine_displacement'] = vehicle['engine_displacement'].apply(lambda x: round(x*rand_factor,1))
    vehicle['gross_weight'] = dynamometer['inertia']
    vehicle['shift_point'] = None

    test_group_map = {}
    i = 1
    for group in vehicle['test_group'].unique():
        if group == None:
            test_group_map[group] = None
        else:
            test_group_map[group] = "Test Group #" + str(i)
            i += 1
    vehicle['test_group'] = vehicle['test_group'].apply(lambda x: test_group_map[x])

    transmission_map = {}
    i = 1
    for trans in vehicle['transmission'].unique():
        transmission_map[trans] = "T" + str(i)
        i+=1
    vehicle['transmission'] = vehicle['transmission'].apply(lambda x: transmission_map[x])

    vehicle['trim_level'] = None

    vin_map = {}
    for vin in vehicle['vin'].unique():
        vin_map[vin] = id_generator(size=8)
    vehicle['vin'] = vehicle['vin'].apply(lambda x: vin_map[x])

    veh_map = {}
    i = 1
    for model in vehicle['vehicle_model'].unique():
        veh_map[model] = "Vehicle" + str(i)
        i += 1
    vehicle['vehicle_model'] = vehicle['vehicle_model'].apply(lambda x: veh_map[x])
    vehicle['tire_size'] = None

    # Step 10: Randomize j2951 data
    for col_name in j2951.columns:
        if col_name not in ['_id', 'cycle', 'cycle_id']:
            j2951[col_name] = j2951[col_name] * rand_unf(0.7, 1.3)

    #  Step 11: Randomize _id names among all tables
    for key in id_veh_map.keys():
        id_veh_map[key] = id_generator(size=8)+'_'+id_generator(size=8)+'_'+id_generator(size=3)

    bag_em['_id'] = bag_em['_id'].apply(lambda x: id_veh_map[x])
    dynamometer['_id'] = dynamometer['_id'].apply(lambda x: id_veh_map[x])
    fuel['_id'] = fuel['_id'].apply(lambda x: id_veh_map[x])
    fuel_economy['_id'] = fuel_economy['_id'].apply(lambda x: id_veh_map[x])
    general['_id'] = general['_id'].apply(lambda x: id_veh_map[x])
    pm['_id'] = pm['_id'].apply(lambda x: id_veh_map[x])
    test_info['_id'] = test_info['_id'].apply(lambda x: id_veh_map[x])
    time_info['_id'] = time_info['_id'].apply(lambda x: id_veh_map[x])
    vehicle['_id'] = vehicle['_id'].apply(lambda x: id_veh_map[x])
    j2951['_id'] = j2951['_id'].apply(lambda x: id_veh_map[x])

    # Write new tables to new database (enc_db_name/user/password)
    ## Reset
    table_names = pdsql.read_sql_query("select table_name from information_schema.tables where table_schema='public'",
                                       engine)
    reset_query = ""
    for table in table_names.values:
        reset_query += "DELETE FROM " + table + "; "

    conn = psycopg2.connect(host="postgres-instance.cm2vo1ykiz5s.us-east-2.rds.amazonaws.com",
                            port=5432,
                            dbname=enc_db_name,
                            user=enc_db_user,
                            password=enc_db_password)
    cur = conn.cursor()
    cur.execute(reset_query[0])
    conn.commit()
    cur.close()
    conn.close()

    connection_str = "postgresql://{0}:{1}@postgres-instance.cm2vo1ykiz5s.us-east-2.rds.amazonaws.com:5432/{2}".format(enc_db_user,
                                                    enc_db_password,
                                                    enc_db_name.replace("\\", ""))

    engine_aws = sqlalchemy.create_engine(connection_str)

    print 'general...'
    general.to_sql('general', engine_aws, if_exists='append', index=False)
    print 'dynamometer...'
    dynamometer.to_sql('dynamometer', engine_aws, if_exists='append', index=False)
    print 'fuel...'
    fuel.to_sql('fuel', engine_aws, if_exists='append', index=False)
    print 'fuel_economy...'
    fuel_economy.to_sql('fuel_economy', engine_aws, if_exists='append', index=False)
    print 'pm...'
    pm.to_sql('pm', engine_aws, if_exists='append', index=False)
    print 'test_info...'
    test_info.to_sql('test_info', engine_aws, if_exists='append', index=False)
    print 'time_info...'
    time_info.to_sql('time_info', engine_aws, if_exists='append', index=False)
    print 'vehicle...'
    vehicle.to_sql('vehicle', engine_aws, if_exists='append', index=False)
    print 'j2951...'
    j2951.to_sql('j2951', engine_aws, if_exists='append', index=False)
    print 'bag_em...'
    bag_em.to_sql('bag_em', engine_aws, if_exists='append', index=False)
예제 #22
0
import pandas as pd 
from sqlalchemy import create_engine
from pandas.io import sql

def to_xml(df, filename=None, mode='w', keys=None):
	def row_to_xml(row):
		xml = ['<item>']
		for field in row.index:
			xml.append('<field name="{0}"><![CDATA[{1}]]></field>'.format(field, row[field]))
		xml.append('</item>')
		return '\n'.join(xml)
	res = '<?xml version="1.0" encoding="ISO-8859-1"?>\n'+'<data>\n'+'\n'.join(df.apply(row_to_xml, axis=1))+'\n</data>'
	if filename is None:
		return res
	with open(filename, mode) as f:
	
		print res
		f.write(res)
pd.DataFrame.to_xml = to_xml

engine = create_engine('mysql://*****:*****@localhost/alterrefront', echo=True)
#cnx = engine.connect()
#query = "SELECT url, title, description, media, price, period, category, subcategory FROM ads LIMIT 100"
#results   = cnx.execute(query)
#fetchall  = results.fetchall()
df        = sql.read_sql_table("ads", con=engine) 
dataframe = out = df[df['location'].str.contains('^(.*rouen.*)$')]
#cnx.close()
keys = ["url", "title", "description", "media", "price", "period","category", "subcategory"]
dataframe.to_xml('flux.xml', keys=keys)
예제 #23
0
 def test_read_table(self):
     iris_frame = sql.read_sql_table("iris", con=self.conn)
     self._check_iris_loaded_frame(iris_frame)
예제 #24
0
 def test_read_table_columns(self):
     iris_frame = sql.read_sql_table(
         "iris", con=self.conn, columns=['SepalLength', 'SepalLength'])
     tm.equalContents(
         iris_frame.columns.values, ['SepalLength', 'SepalLength'])
예제 #25
0
 def test_read_table(self):
     iris_frame = sql.read_sql_table("iris", con=self.conn)
     self._check_iris_loaded_frame(iris_frame)
예제 #26
0
 def test_read_table_columns(self):
     iris_frame = sql.read_sql_table("iris",
                                     con=self.conn,
                                     columns=['SepalLength', 'SepalLength'])
     tm.equalContents(iris_frame.columns.values,
                      ['SepalLength', 'SepalLength'])