def fast_postgresql_to_df(table, schema): engine = table.bind conn = engine.raw_connection() with conn.cursor() as cur: with io.StringIO() as f: table_name = str(table) if not isinstance(table, Table): table_name = '({})'.format(table_name) sql = "COPY {table_name} TO STDOUT WITH (FORMAT CSV, HEADER TRUE)".format( table_name=table_name) cur.copy_expert(sql, f) f.seek(0) # reading csv csv_loader = CsvDataStore(schema, f, with_header=True) df = csv_loader.load() #df = pandas.read_csv(f) for col in schema.cols: if isinstance(col, dt): # converting datetime column df[col.name] = pandas.to_datetime( df[col.name], format="%Y-%m-%d %H:%M:%S", coerce=True) if isinstance(col, big_dt): # converting big_dt column strptime = datetime.datetime.strptime parse_func = (lambda x: strptime(x, "%Y-%m-%d %H:%M:%S")) df[col.name] = df[col.name].map(parse_func, na_action='ignore') return df
def fast_postgresql_to_df(table, schema): engine = table.bind conn = engine.raw_connection() with conn.cursor() as cur: with io.StringIO() as f: table_name = str(table) if not isinstance(table, Table): table_name = '({})'.format(table_name) sql = "COPY {table_name} TO STDOUT WITH (FORMAT CSV, HEADER TRUE)".format( table_name=table_name) cur.copy_expert(sql, f) f.seek(0) # reading csv csv_loader = CsvDataStore(schema, f, with_header=True) df = csv_loader.load() #df = pandas.read_csv(f) for col in schema.cols: if isinstance(col, dt): # converting datetime column df[col.name] = pandas.to_datetime(df[col.name], format="%Y-%m-%d %H:%M:%S", coerce=True) if isinstance(col, big_dt): # converting big_dt column strptime = datetime.datetime.strptime parse_func = (lambda x: strptime(x, "%Y-%m-%d %H:%M:%S")) df[col.name] = df[col.name].map(parse_func, na_action='ignore') return df
def df_to_csv(file_name, df): if not os.path.splitext(file_name)[1] == '.csv': file_name = os.path.join(file_name, '.csv') sp = os.path.splitext(file_name)[0] local_storage_dir = mimic_login.get_local_storage_dir() file_path = os.path.join(local_storage_dir, file_name) store = CsvDataStore(PartialSchema(sp), file_path) store.store(df) return FileLink(os.path.relpath(file_path), result_html_prefix='Right-click and save: ')
def fast_mysql_to_df(table, schema): from chatto_transform.config import config f = tempfile.NamedTemporaryFile('w', suffix='.csv', dir=config.data_dir + 'tmp') try: f.close() if not isinstance(table, Table): compiled = table.compile() table_name = '({})'.format(str(compiled)) params = [compiled.params[k] for k in compiled.positiontup] else: table_name = str(table) params = [] # converting to csv sql = """SELECT {cols} FROM {table} AS t INTO OUTFILE '{filename}' FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' ESCAPED BY '\\\\' LINES TERMINATED BY '\n'""".format(cols=', '.join( '`' + colname + '`' for colname in schema.col_names()), filename=f.name, table=table_name) table.bind.execute(sql, *params) # reading csv csv_loader = CsvDataStore(schema, f.name, with_header=False, na_values=['\\N']) df = csv_loader.load() #df = pandas.read_csv(f.name, header=None, names=schema.col_names(), na_values=['\\N']) finally: os.remove(f.name) # for col in schema.cols: # if isinstance(col, dt): # # converting datetime column # df[col.name] = pandas.to_datetime(df[col.name], format="%Y-%m-%d %H:%M:%S", coerce=True) # if isinstance(col, big_dt): # # converting big_dt column # strptime = datetime.datetime.strptime # parse_func = (lambda x: strptime(x, "%Y-%m-%d %H:%M:%S")) # df[col.name] = df[col.name].map(parse_func, na_action='ignore') return df
def fast_mysql_to_df(table, schema): from chatto_transform.config import config f = tempfile.NamedTemporaryFile('w', suffix='.csv', dir=config.data_dir+'tmp') try: f.close() if not isinstance(table, Table): compiled = table.compile() table_name = '({})'.format(str(compiled)) params = [compiled.params[k] for k in compiled.positiontup] else: table_name = str(table) params = [] # converting to csv sql = """SELECT {cols} FROM {table} AS t INTO OUTFILE '{filename}' FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' ESCAPED BY '\\\\' LINES TERMINATED BY '\n'""".format( cols=', '.join('`'+colname+'`' for colname in schema.col_names()), filename=f.name, table=table_name) table.bind.execute(sql, *params) # reading csv csv_loader = CsvDataStore(schema, f.name, with_header=False, na_values=['\\N']) df = csv_loader.load() #df = pandas.read_csv(f.name, header=None, names=schema.col_names(), na_values=['\\N']) finally: os.remove(f.name) # for col in schema.cols: # if isinstance(col, dt): # # converting datetime column # df[col.name] = pandas.to_datetime(df[col.name], format="%Y-%m-%d %H:%M:%S", coerce=True) # if isinstance(col, big_dt): # # converting big_dt column # strptime = datetime.datetime.strptime # parse_func = (lambda x: strptime(x, "%Y-%m-%d %H:%M:%S")) # df[col.name] = df[col.name].map(parse_func, na_action='ignore') return df
def load_csv(file_path, schema): store = CsvDataStore(schema, file_path) df = store.load() return df
def df_to_csv(file_path, df, schema): store = CsvDataStore(schema, file_path) store.store(df) return FileLink(file_path, result_html_prefix='Right-click and save: ')
def df_to_csv(file_path, df, schema): store = CsvDataStore(schema, file_path) store.store(df)