def __init__(self, uri=getenv('DATABASE_URL'), table=getenv('STASH_TABLE_NAME'), chuncksize=20, encryptkey=getenv('DAS_ENCRYPT_KEY', get_default_data_key()), # TODO (steven_c) consider removeing index=False, indexcolumns=None, **xargs): self.uri = uri self.chuncksize = chuncksize self.table = table self.index = index self.indexcolumns = indexcolumns self.conn = dataset.connect(uri, reflect_metadata=False, engine_kwargs={'pool_recycle': 3600, 'convert_unicode': True, 'encoding': 'utf-8', }) self.conn.begin() try: self.tbl = self.conn.load_table(self.table) except NoSuchTableError: self.tbl = self.conn.get_table(self.table, primary_id='id', primary_type='String(36)') if bool(self.index) and bool(self.indexcolumns): if isinstance(self.indexcolumns[0], basestring): self.tbl.create_index(columns=self.indexcolumns) elif isinstance(self.indexcolumns[0], list): for ic in self.indexcolumns: self.tbl.create_index(columns=ic) self.stack = []
def __init__(self, parenturi, encrypt=False, removeExtIfEncrypt=True, encryptkey=getenv('DAS_ENCRYPT_KEY', get_default_data_key()), vcores=1, threads=50, batch=200, **otherArgsForS3): self.vcores = vcores self.threads = threads self.batch = batch self.parentpath = Path(urlsplit(parenturi).path) self.encrypt = encrypt self.removeExtIfEncrypt = removeExtIfEncrypt self.parsedParentUri = ParseUri(parenturi) self.post_it = partial(_s3_stash_objects_parallel, vcores=vcores, threads=threads, bucket=str(self.parsedParentUri.bucket_id), encrypt=encrypt, **otherArgsForS3) if encrypt: self.envelope = partial(encrypt_it, key=encryptkey) else: self.envelope = pass_through self.innerstack = [] self.outerstack = []
def __init__(self, parenturi, encrypt=False, removeExtIfEncrypt=True, encryptkey=getenv('DAS_ENCRYPT_KEY', get_default_data_key()), **xargs): self.parentpath = Path(urlsplit(parenturi).path) self.encrypt = encrypt self.removeExtIfEncrypt = removeExtIfEncrypt if not self.parentpath.is_dir(): self.parentpath.mkdir(parents=True) if encrypt: self.envelope = partial(encrypt_it, key=encryptkey) else: self.envelope = pass_through
def __init__(self, parenturi, encrypt=False, removeExtIfEncrypt=True, encryptkey=getenv('DAS_ENCRYPT_KEY', get_default_data_key()), **otherArgsForS3): self.parentpath = Path(urlsplit(parenturi).path) self.encrypt = encrypt self.removeExtIfEncrypt = removeExtIfEncrypt self.parsedParentUri = ParseUri(parenturi) self.client = boto3.client('s3', config=Config(signature_version='s3v4')) self.post_it = partial(s3_stash_object, client=self.client, bucket=str(self.parsedParentUri.bucket_id), encrypt=encrypt, **otherArgsForS3) if encrypt: self.envelope = partial(encrypt_it, key=encryptkey) else: self.envelope = pass_through
def file_stash(parenturi, encrypt=False, pool=False, encryptkey=getenv('DAS_ENCRYPT_KEY', get_default_data_key()), **xargs): """ parenturi - is the directory or bucket (and 'folder') where the files will be stored. Returns the appropriate obj for stashing based on the parenturi. """ parseduri = ParseUri(parenturi) if parseduri.scheme in {'file'}: return LocalFileStash(parenturi, encrypt=encrypt, encryptkey=encryptkey, **xargs) elif parseduri.scheme in {'s3', 's3n'}: if pool: return S3FileStashPool(parenturi=parenturi, encrypt=encrypt, encryptkey=encryptkey, **xargs) else: return S3FileStash(parenturi, encrypt=encrypt, encryptkey=encryptkey, **xargs)