def copy_redshift(dialect, tbl, csv, schema_name=None, **kwargs): assert isinstance(csv, S3(CSV)) assert csv.path.startswith('s3://') cfg = boto.Config() aws_access_key_id = cfg.get('Credentials', 'aws_access_key_id') aws_secret_access_key = cfg.get('Credentials', 'aws_secret_access_key') options = dict(delimiter=kwargs.get('delimiter', csv.dialect.get('delimiter', ',')), ignore_header=int(kwargs.get('has_header', csv.has_header)), empty_as_null=True, blanks_as_null=False, compression=kwargs.get('compression', '')) if schema_name is None: # 'public' by default, this is a postgres convention schema_name = (tbl.schema or sa.inspect(tbl.bind).default_schema_name) cmd = CopyCommand(schema_name=schema_name, table_name=tbl.name, data_location=csv.path, access_key=aws_access_key_id, secret_key=aws_secret_access_key, options=options, format='CSV') return re.sub(r'\s+(;)', r'\1', re.sub(r'\s+', ' ', str(cmd))).strip()
def get_s3_connection(aws_access_key_id=None, aws_secret_access_key=None, anon=False, profile_name=None, **kwargs): import boto if profile_name: return boto.connect_s3(profile_name=profile_name) cfg = boto.Config() if aws_access_key_id is None: aws_access_key_id = cfg.get('Credentials', 'aws_access_key_id') if aws_access_key_id is None: aws_access_key_id = os.environ.get('AWS_ACCESS_KEY_ID') if aws_secret_access_key is None: aws_secret_access_key = cfg.get('Credentials', 'aws_secret_access_key') if aws_secret_access_key is None: aws_secret_access_key = os.environ.get('AWS_SECRET_ACCESS_KEY') # anon is False but we didn't provide any credentials so try anonymously anon = (not anon and aws_access_key_id is None and aws_secret_access_key is None) return boto.connect_s3(aws_access_key_id, aws_secret_access_key, anon=anon)
def compile_from_csv_redshift(element, compiler, **kwargs): assert isinstance(element.csv, S3(CSV)) assert element.csv.path.startswith('s3://') cfg = boto.Config() aws_access_key_id = cfg.get('Credentials', 'aws_access_key_id') aws_secret_access_key = cfg.get('Credentials', 'aws_secret_access_key') options = dict(delimiter=element.delimiter, ignore_header=int(element.header), empty_as_null=True, blanks_as_null=False, compression=getattr(element, 'compression', '')) if getattr(element, 'schema_name', None) is None: # 'public' by default, this is a postgres convention schema_name = (element.element.schema or sa.inspect(element.bind).default_schema_name) cmd = CopyCommand(schema_name=schema_name, table_name=element.element.name, data_location=element.csv.path, access_key=aws_access_key_id, secret_key=aws_secret_access_key, options=options, format='CSV') return re.sub(r'\s+(;)', r'\1', re.sub(r'\s+', ' ', str(cmd))).strip()
def get_provider(): global boto_config global cred_provider if not cred_provider: if boto_config: boto.provider.config = boto.Config(boto_config) cred_provider = boto.provider.get_default() return cred_provider
def __init__(self, volume_owner, volume_replica, sx, volume_size=None, s3_context=None, volume_prefix=None, subdir=None, volume_meta=None, worker_num=1, stream_type=ChunkedStream): self.volume_size = volume_size self.volume_owner = volume_owner self.volume_replica = volume_replica self.volume_meta = volume_meta self.sx = sx self.stream_type = stream_type if not isinstance(worker_num, (int, long)) or worker_num <= 0: raise S3ImportException( 'Number of workers must be a positive integer') self.worker_num = worker_num self.volume_prefix = volume_prefix self.subdir = subdir if s3_context is not None: _config_backup = boto.config boto.config = boto.Config(do_load=False) s3 = boto.connect_s3( aws_access_key_id=s3_context.access_key_id, aws_secret_access_key=s3_context.secret_access_key, host=s3_context.host, port=s3_context.port, is_secure=s3_context.is_secure, validate_certs=s3_context.validate_certs, calling_format=CaseSensitiveCallingFormat()) boto.config = _config_backup else: s3 = boto.connect_s3(calling_format=CaseSensitiveCallingFormat()) self.s3 = s3 # In spite of what is written in boto's documentation, SSL certificates # aren't verified by default in boto. The following check fixes this # behaviour. if self.s3.https_validate_certificates is None: self.s3.https_validate_certificates = True self._keyiter = iter(()) self._iter_lock = threading.Lock() self._stopping_event = threading.Event() self._exception_queue = Queue.Queue() self._event_timeout = 60 self._join_timeout = 1
def reboot_aws_frontends(config_path): cfg = boto.Config() cfg.load_from_path(os.path.abspath(config_path)) instances_id = cfg.get('SHOWCASE', 'frontend_instances_id') access_key = cfg.get('SHOWCASE', 'aws_access_key_id') secret_key = cfg.get('SHOWCASE', 'aws_secret_access_key') region = cfg.get('AWS', 'region') conn = boto.ec2.connect_to_region(region, aws_access_key_id=access_key, aws_secret_access_key=secret_key) reservations = conn.get_all_instances(filters={"tag:Name": instances_id}) instances = [i for r in reservations for i in r.instances] for instance in instances: instance.reboot() # wait 5 minutes after reboot so instances come available time.sleep(5 * 60)
def compile_from_csv_redshift(element, compiler, **kwargs): assert isinstance(element.csv, S3(CSV)) assert element.csv.path.startswith('s3://') cfg = boto.Config() aws_access_key_id = cfg.get('Credentials', 'aws_access_key_id') aws_secret_access_key = cfg.get('Credentials', 'aws_secret_access_key') compression = getattr(element, 'compression', '').upper() or None cmd = CopyCommand(table=element.element, data_location=element.csv.path, access_key_id=aws_access_key_id, secret_access_key=aws_secret_access_key, format='CSV', delimiter=element.delimiter, ignore_header=int(element.header), empty_as_null=True, blanks_as_null=False, compression=compression) return compiler.process(cmd)
def read_config(config_file): cfg = boto.Config() cfg.load_from_path(os.path.abspath(config_file)) return cfg
def getConn(section='Credentials'): boto_config = boto.Config() return S3Connection(boto_config.get(section, 'aws_access_key_id'), boto_config.get(section, 'aws_secret_access_key'))