records_batch = list() n = 0 for n, row in enumerate(rows): if use_fast_batch_method: current_record = {k: v for k, v in zip(columns, row)} records_batch.append(current_record) elif skip_errors: try: cur.execute(query, row) except TypeError or IndexError as e: # TypeError: not all arguments converted during string formatting self.log('Error line: {}'.format(str(row)), level=LoggingLevel.Debug, verbose=verbose) self.log('{}: {}'.format(e.__class__.__name__, e), level=LoggingLevel.Error) if (n + 1) % step == 0: if use_fast_batch_method: self.execute_batch(query, records_batch, step, cursor=cur) records_batch = list() if not progress.get_position(): progress.update(0) conn.commit() progress.update(n) gc.collect() if use_fast_batch_method: self.execute_batch(query, records_batch, step, cursor=cur) conn.commit() progress.finish(n) if return_count: return n ConnType.add_classes(PostgresDatabase)
step: AutoCount = AUTO, **kwargs ) -> Stream: if Auto.is_defined(data): kwargs['data'] = data stream_type = Auto.delayed_acquire(stream_type, self.get_stream_type) assert not ex, 'ex-argument for LocalFile.to_stream() not supported (got {})'.format(ex) return self.to_stream_type(stream_type=stream_type, step=step, **kwargs) @classmethod def get_default_folder(cls) -> Connector: return cls._default_folder @classmethod def set_default_folder(cls, folder: ConnectorInterface) -> None: cls._default_folder = folder def _get_field_getter(self, field: UniKey, item_type: Union[ItemType, Auto] = AUTO, default=None): if self.get_struct(): if isinstance(field, ARRAY_TYPES): fields_positions = self.get_fields_positions(field) return lambda i: tuple([i[p] for p in fields_positions]) else: field_position = self.get_field_position(field) return lambda i: i[field_position] else: return super()._get_field_getter(field, item_type=item_type, default=default) ConnType.add_classes(LocalFile)
elif create_if_not_yet: return SingletonLogger() @staticmethod def get_default_child_type() -> ConnType: return ConnType.LocalFolder @classmethod def get_default_child_class(cls) -> Class: child_class = cls.get_default_child_type().get_class if not arg.is_defined(child_class): child_class = cls.get_default_child_obj_class() return child_class def get_folders(self) -> Iterable: for name, folder in self.get_children(): yield folder def folder(self, name, **kwargs) -> ConnectorInterface: return self.child(name, parent=self, **kwargs) def get_path_delimiter(self) -> str: return self._path_delimiter @staticmethod def get_full_path() -> str: return os.getcwd() ConnType.add_classes(LocalStorage)
if arg.is_defined(access_key) and hasattr(bucket, 'set_access_key'): bucket.set_access_key(access_key) if arg.is_defined(secret_key) and hasattr(bucket, 'set_secret_key'): bucket.set_secret_key(secret_key) else: bucket_class = self.get_default_child_obj_class() bucket = bucket_class( name=name, storage=self, access_key=arg.delayed_acquire(access_key, self.get_access_key), secret_key=arg.delayed_acquire(secret_key, self.get_secret_key), ) return bucket def get_resource_properties(self) -> dict: return dict( service_name=self.get_service_name(), endpoint_url=self.get_endpoint_url(), ) @staticmethod def _get_covert_props() -> tuple: return COVERT_PROPS ConnType.add_classes(S3Storage)
storage_class=DEFAULT_STORAGE_CLASS, encoding='utf8', verbose: bool = True): lines = self._get_lines_from_stream(stream) data = bytes('\n'.join(lines), encoding=encoding) response = self.put_object(data=data, storage_class=storage_class) is_done = response.get('ResponseMetadata').get( 'HTTPStatusCode') == HTTP_OK if is_done: return self else: raise ValueError(response) def to_stream(self, stream_type: Union[StreamType, str, Auto] = AUTO, **kwargs) -> Stream: stream_class = StreamType(stream_type).get_class() return stream_class(self.get_data(), **kwargs) def get_expected_count(self) -> Optional[int]: return self._count def get_count(self) -> Optional[int]: return None # not available property def is_empty(self) -> bool: return None # not available property ConnType.add_classes(S3Object)
def get_existing_file_names(self) -> Iterable: for name in self.list_existing_names(): path = self.get_file_path(name) if os.path.isfile(path): yield name def list_existing_file_names(self) -> Iterable: return list(self.get_existing_file_names()) def all_existing_files(self, **kwargs) -> Iterable: for name in self.list_existing_file_names(): children = self.get_children() if name in children: yield children[name] else: yield self.file(name, **kwargs) def connect_all(self, inplace: bool = True, **kwargs) -> Union[list, Native]: files = list(self.all_existing_files(**kwargs)) if inplace: return files else: return self @staticmethod def _assume_native(obj) -> Native: return obj ConnType.add_classes(LocalFolder)
def from_stream(self, stream: Stream, verbose: bool = True) -> Native: partition = self.get_partition() assert partition, 'suffix and partition must be defined' partition = partition.from_stream(stream, verbose=verbose) self.set_partition(partition, inplace=True) return self def to_stream( self, data: Union[Iterable, Auto] = AUTO, name: AutoName = AUTO, stream_type: Union[StreamType, Auto] = AUTO, ex: OptionalFields = None, **kwargs ) -> Stream: partition = self.get_partition() assert partition, 'suffix and partition must be defined' return partition.to_stream(data=data, name=name, stream_type=stream_type, ex=ex, **kwargs) FolderType.set_dict_classes( { FolderType.LocalFolder: LocalFolder, FolderType.LocalMask: LocalMask, FolderType.PartitionedLocalFile: PartitionedLocalFile, } ) ConnType.add_classes( LocalFolder, LocalMask, PartitionedLocalFile, )
return self.get_name() def get_folder(self, skip_missing: bool = False) -> HierarchicFolder: parent = self.get_parent() if not skip_missing: assert isinstance(parent, HierarchicFolder) return parent def get_folder_path(self) -> str: return self.get_folder().get_path() def get_mask_path(self) -> str: return self.get_folder_path() + self.get_path_delimiter() + self.get_mask() def get_path(self, with_mask: bool = True) -> str: if with_mask: return self.get_mask_path() else: return self.get_folder_path() def yield_existing_names(self) -> Iterable: for name in self.get_folder().list_existing_names(): if fnmatch.fnmatch(name, self.get_mask()): yield name def list_existing_names(self) -> list: return list(self.yield_existing_names()) ConnType.add_classes(LocalMask)
bucket = self.get_bucket() if hasattr( bucket, 'get_existing_object_props'): # isinstance(bucket, S3Bucket) return bucket.get_existing_object_props( prefix=self.get_path_in_bucket()) else: raise TypeError( 'Expected parent bucket as S3Bucket, got {}'.format(bucket)) def get_existing_object_names(self) -> Generator: for object_props in self.get_existing_object_props(): name = object_props.get('Key') if name: yield name def list_existing_names(self) -> list: return list(self.get_existing_object_names()) def get_existing_folder_names(self) -> Generator: for prefix_props in self.get_existing_prefixes(): name = prefix_props.get('Prefix') if name: yield name def list_existing_folder_names(self) -> list: return list(self.get_existing_folder_names()) ConnType.add_classes(S3Folder)
if str_filters: message = 'Example with filters: {}'.format(str_filters) else: message = 'Example without any filters:' else: message = '[EXAMPLE_NOT_FOUND] Example with this filters not found: {}'.format( str_filters) stream_example = None item_example = self.get_one_item() if item_example: if example_str_len: for k, v in item_example.items(): v = str(v) if len(v) > example_str_len: fixed_len = example_str_len - len(CONTINUE_SYMBOL) if fixed_len < 0: fixed_len = 0 continue_symbol = CONTINUE_SYMBOL[:example_str_len] else: continue_symbol = CONTINUE_SYMBOL item_example[k] = str(v)[:fixed_len] + continue_symbol else: item_example = dict() stream_example = None message = '[EMPTY_DATA] There are no valid data in {}'.format( self.__repr__()) return item_example, stream_example, message ConnType.add_classes(Table)
kwargs['Prefix'] = prefix return self.get_client().list_objects(**kwargs) def get_existing_object_names(self, prefix: Optional[str] = None) -> Generator: for object_props in self.get_existing_object_props(prefix=prefix).get( 'Contents', []): name = object_props.get('Key') if name: yield name def list_existing_names(self, prefix: Optional[str] = None) -> list: return list(self.get_existing_object_names(prefix=prefix)) def get_existing_folder_names(self, prefix: Optional[str] = None) -> Generator: for prefix_props in self.get_existing_prefixes(prefix=prefix): name = prefix_props.get('Prefix') if name: yield name def list_existing_folder_names(self, prefix: Optional[str] = None) -> list: return list(self.get_existing_folder_names(prefix=prefix)) @staticmethod def _get_covert_props() -> tuple: return COVERT_PROPS ConnType.add_classes(S3Bucket)