Esempio n. 1
0
def append_csv_to_sql_table(tbl, csv, bind=None, **kwargs):
    bind = getbind(tbl, bind)
    dialect = bind.dialect.name

    # move things to a temporary S3 bucket if we're using redshift and we
    # aren't already in S3
    if dialect == 'redshift' and not isinstance(csv, S3(CSV)):
        csv = convert(Temp(S3(CSV)), csv, **kwargs)
    elif dialect != 'redshift' and isinstance(csv, S3(CSV)):
        csv = convert(Temp(CSV), csv, has_header=csv.has_header, **kwargs)
    elif dialect == 'hive':
        from .ssh import SSH
        return append(tbl, convert(Temp(SSH(CSV)), csv, **kwargs), **kwargs)

    kwargs = merge(csv.dialect, kwargs)
    stmt = CopyFromCSV(tbl, csv, bind=bind, **kwargs)

    if dialect == 'postgresql':
        with bind.begin() as c:
            with csv.open() as f:
                c.connection.cursor().copy_expert(literal_compile(stmt), f)
    else:
        with bind.begin() as conn:
            conn.execute(stmt)
    return tbl
Esempio n. 2
0
def append_csv_to_sql_table(tbl, csv, **kwargs):
    dialect = tbl.bind.dialect.name

    # move things to a temporary S3 bucket if we're using redshift and we
    # aren't already in S3
    if dialect == 'redshift' and not isinstance(csv, S3(CSV)):
        csv = convert(Temp(S3(CSV)), csv, **kwargs)
    elif dialect != 'redshift' and isinstance(csv, S3(CSV)):
        csv = convert(Temp(CSV), csv, has_header=csv.has_header, **kwargs)
    elif dialect == 'hive':
        from .ssh import SSH
        return append(tbl, convert(Temp(SSH(CSV)), csv, **kwargs), **kwargs)

    statement = copy_command(dialect, tbl, csv, **kwargs)
    execute_copy(dialect, tbl.bind, statement)
    return tbl
Esempio n. 3
0
    def copy_redshift(dialect, tbl, csv, schema_name=None, **kwargs):
        assert isinstance(csv, S3(CSV))
        assert csv.path.startswith('s3://')

        cfg = boto.Config()

        aws_access_key_id = cfg.get('Credentials', 'aws_access_key_id')
        aws_secret_access_key = cfg.get('Credentials', 'aws_secret_access_key')

        options = dict(delimiter=kwargs.get('delimiter',
                                            csv.dialect.get('delimiter', ',')),
                       ignore_header=int(kwargs.get('has_header',
                                                    csv.has_header)),
                       empty_as_null=True,
                       blanks_as_null=False,
                       compression=kwargs.get('compression', ''))

        if schema_name is None:
            # 'public' by default, this is a postgres convention
            schema_name = (tbl.schema or
                           sa.inspect(tbl.bind).default_schema_name)
        cmd = CopyCommand(schema_name=schema_name,
                          table_name=tbl.name,
                          data_location=csv.path,
                          access_key=aws_access_key_id,
                          secret_key=aws_secret_access_key,
                          options=options,
                          format='CSV')
        return re.sub(r'\s+(;)', r'\1', re.sub(r'\s+', ' ', str(cmd))).strip()
Esempio n. 4
0
    def compile_from_csv_redshift(element, compiler, **kwargs):
        assert isinstance(element.csv, S3(CSV))
        assert element.csv.path.startswith('s3://')

        cfg = boto.Config()

        aws_access_key_id = cfg.get('Credentials', 'aws_access_key_id')
        aws_secret_access_key = cfg.get('Credentials', 'aws_secret_access_key')

        options = dict(delimiter=element.delimiter,
                       ignore_header=int(element.header),
                       empty_as_null=True,
                       blanks_as_null=False,
                       compression=getattr(element, 'compression', ''))

        if getattr(element, 'schema_name', None) is None:
            # 'public' by default, this is a postgres convention
            schema_name = (element.element.schema
                           or sa.inspect(element.bind).default_schema_name)
        cmd = CopyCommand(schema_name=schema_name,
                          table_name=element.element.name,
                          data_location=element.csv.path,
                          access_key=aws_access_key_id,
                          secret_key=aws_secret_access_key,
                          options=options,
                          format='CSV')
        return re.sub(r'\s+(;)', r'\1', re.sub(r'\s+', ' ', str(cmd))).strip()
Esempio n. 5
0
def append_csv_to_sql_table(tbl, csv, **kwargs):
    dialect = tbl.bind.dialect.name

    # move things to a temporary S3 bucket if we're using redshift and we
    # aren't already in S3
    if dialect == 'redshift' and not isinstance(csv, S3(CSV)):
        csv = convert(Temp(S3(CSV)), csv, **kwargs)
    elif dialect != 'redshift' and isinstance(csv, S3(CSV)):
        csv = convert(Temp(CSV), csv, has_header=csv.has_header, **kwargs)
    elif dialect == 'hive':
        from .ssh import SSH
        return append(tbl, convert(Temp(SSH(CSV)), csv, **kwargs), **kwargs)

    kwargs = merge(csv.dialect, kwargs)
    stmt = CopyFromCSV(tbl, csv, **kwargs)
    with tbl.bind.begin() as conn:
        conn.execute(stmt)
    return tbl
Esempio n. 6
0
    def compile_from_csv_redshift(element, compiler, **kwargs):
        assert isinstance(element.csv, S3(CSV))
        assert element.csv.path.startswith('s3://')

        cfg = boto.Config()

        aws_access_key_id = cfg.get('Credentials', 'aws_access_key_id')
        aws_secret_access_key = cfg.get('Credentials', 'aws_secret_access_key')

        compression = getattr(element, 'compression', '').upper() or None
        cmd = CopyCommand(table=element.element,
                          data_location=element.csv.path,
                          access_key_id=aws_access_key_id,
                          secret_access_key=aws_secret_access_key,
                          format='CSV',
                          delimiter=element.delimiter,
                          ignore_header=int(element.header),
                          empty_as_null=True,
                          blanks_as_null=False,
                          compression=compression)
        return compiler.process(cmd)