def append_csv_to_sql_table(tbl, csv, bind=None, **kwargs): bind = getbind(tbl, bind) dialect = bind.dialect.name # move things to a temporary S3 bucket if we're using redshift and we # aren't already in S3 if dialect == 'redshift' and not isinstance(csv, S3(CSV)): csv = convert(Temp(S3(CSV)), csv, **kwargs) elif dialect != 'redshift' and isinstance(csv, S3(CSV)): csv = convert(Temp(CSV), csv, has_header=csv.has_header, **kwargs) elif dialect == 'hive': from .ssh import SSH return append(tbl, convert(Temp(SSH(CSV)), csv, **kwargs), **kwargs) kwargs = merge(csv.dialect, kwargs) stmt = CopyFromCSV(tbl, csv, bind=bind, **kwargs) if dialect == 'postgresql': with bind.begin() as c: with csv.open() as f: c.connection.cursor().copy_expert(literal_compile(stmt), f) else: with bind.begin() as conn: conn.execute(stmt) return tbl
def append_csv_to_sql_table(tbl, csv, **kwargs): dialect = tbl.bind.dialect.name # move things to a temporary S3 bucket if we're using redshift and we # aren't already in S3 if dialect == 'redshift' and not isinstance(csv, S3(CSV)): csv = convert(Temp(S3(CSV)), csv, **kwargs) elif dialect != 'redshift' and isinstance(csv, S3(CSV)): csv = convert(Temp(CSV), csv, has_header=csv.has_header, **kwargs) elif dialect == 'hive': from .ssh import SSH return append(tbl, convert(Temp(SSH(CSV)), csv, **kwargs), **kwargs) statement = copy_command(dialect, tbl, csv, **kwargs) execute_copy(dialect, tbl.bind, statement) return tbl
def copy_redshift(dialect, tbl, csv, schema_name=None, **kwargs): assert isinstance(csv, S3(CSV)) assert csv.path.startswith('s3://') cfg = boto.Config() aws_access_key_id = cfg.get('Credentials', 'aws_access_key_id') aws_secret_access_key = cfg.get('Credentials', 'aws_secret_access_key') options = dict(delimiter=kwargs.get('delimiter', csv.dialect.get('delimiter', ',')), ignore_header=int(kwargs.get('has_header', csv.has_header)), empty_as_null=True, blanks_as_null=False, compression=kwargs.get('compression', '')) if schema_name is None: # 'public' by default, this is a postgres convention schema_name = (tbl.schema or sa.inspect(tbl.bind).default_schema_name) cmd = CopyCommand(schema_name=schema_name, table_name=tbl.name, data_location=csv.path, access_key=aws_access_key_id, secret_key=aws_secret_access_key, options=options, format='CSV') return re.sub(r'\s+(;)', r'\1', re.sub(r'\s+', ' ', str(cmd))).strip()
def compile_from_csv_redshift(element, compiler, **kwargs): assert isinstance(element.csv, S3(CSV)) assert element.csv.path.startswith('s3://') cfg = boto.Config() aws_access_key_id = cfg.get('Credentials', 'aws_access_key_id') aws_secret_access_key = cfg.get('Credentials', 'aws_secret_access_key') options = dict(delimiter=element.delimiter, ignore_header=int(element.header), empty_as_null=True, blanks_as_null=False, compression=getattr(element, 'compression', '')) if getattr(element, 'schema_name', None) is None: # 'public' by default, this is a postgres convention schema_name = (element.element.schema or sa.inspect(element.bind).default_schema_name) cmd = CopyCommand(schema_name=schema_name, table_name=element.element.name, data_location=element.csv.path, access_key=aws_access_key_id, secret_key=aws_secret_access_key, options=options, format='CSV') return re.sub(r'\s+(;)', r'\1', re.sub(r'\s+', ' ', str(cmd))).strip()
def append_csv_to_sql_table(tbl, csv, **kwargs): dialect = tbl.bind.dialect.name # move things to a temporary S3 bucket if we're using redshift and we # aren't already in S3 if dialect == 'redshift' and not isinstance(csv, S3(CSV)): csv = convert(Temp(S3(CSV)), csv, **kwargs) elif dialect != 'redshift' and isinstance(csv, S3(CSV)): csv = convert(Temp(CSV), csv, has_header=csv.has_header, **kwargs) elif dialect == 'hive': from .ssh import SSH return append(tbl, convert(Temp(SSH(CSV)), csv, **kwargs), **kwargs) kwargs = merge(csv.dialect, kwargs) stmt = CopyFromCSV(tbl, csv, **kwargs) with tbl.bind.begin() as conn: conn.execute(stmt) return tbl
def compile_from_csv_redshift(element, compiler, **kwargs): assert isinstance(element.csv, S3(CSV)) assert element.csv.path.startswith('s3://') cfg = boto.Config() aws_access_key_id = cfg.get('Credentials', 'aws_access_key_id') aws_secret_access_key = cfg.get('Credentials', 'aws_secret_access_key') compression = getattr(element, 'compression', '').upper() or None cmd = CopyCommand(table=element.element, data_location=element.csv.path, access_key_id=aws_access_key_id, secret_access_key=aws_secret_access_key, format='CSV', delimiter=element.delimiter, ignore_header=int(element.header), empty_as_null=True, blanks_as_null=False, compression=compression) return compiler.process(cmd)