def get_child_class_by_type(type_name: Union[ConnType, FolderType, str]) -> Class: if isinstance(type_name, str): try: conn_type = ConnType(type_name) except ValueError: conn_type = FolderType(type_name) child_class = conn_type.get_class() return child_class
def get_child_class_by_name_and_type(self, name: str, filetype: Union[ConnType, ContentType, Auto] = AUTO) -> Class: if arg.is_defined(filetype): return ConnType(filetype).get_class() else: supposed_type = self.get_type_by_name(name) if supposed_type: return supposed_type.get_class()
def get_class(conn_type: Union[ConnType, Type, str]) -> Type: if conn_type in CONN_CLASSES: return conn_type elif isinstance(conn_type, str): conn_type = ConnType(conn_type) message = 'conn_type must be an instance of ConnType (but {} as type {} received)' assert isinstance(conn_type, ConnType), TypeError( message.format(conn_type, type(conn_type))) return conn_type.get_class()
def folder(self, name: str, folder_type: Union[ConnType, FolderType, Auto] = AUTO, **kwargs) -> ConnectorInterface: if not arg.is_defined(folder_type): folder_type = self.get_type_by_name(name) if folder_type == ConnType.LocalFile: folder_type = ConnType.LocalFolder folder_class = ConnType(folder_type).get_class() folder_obj = folder_class(name, parent=self, **kwargs) self.add_folder(folder_obj) return folder_obj
def get_dialect_type(database_type) -> Optional[DialectType]: conn_type_name = ConnType(database_type).get_value() return DICT_DB_TO_DIALECT_TYPE.get(conn_type_name)
FILE_CLASS_NAMES = tuple([c.__name__ for c in FILE_CLASSES]) DICT_EXT_TO_CLASS = { c.get_default_file_extension.__get__(c): c for c in CONN_CLASSES if c in FILE_CLASSES and not c.__name__.startswith('Abstract') } DICT_DB_TO_DIALECT_TYPE = { PostgresDatabase.__name__: DialectType.Postgres, ClickhouseDatabase.__name__: DialectType.Clickhouse, } DB_CLASS_NAMES = DICT_DB_TO_DIALECT_TYPE.keys() _context: Context = None _local_storage: Optional[LocalStorage] = None PostgresDatabase.cx = _context ConnType.set_dict_classes(DICT_CONN_CLASSES, skip_missing=True) AbstractStorage.set_parent_obj_classes([ContextInterface]) AbstractDatabase.set_child_obj_classes([Table]) Table.set_parent_obj_classes( [AbstractDatabase, PostgresDatabase, ClickhouseDatabase]) LocalStorage.set_child_obj_classes( [LocalFolder, LocalMask, LocalFile, PartitionedLocalFile]) LocalFolder.set_parent_obj_classes([LocalStorage, LocalFolder]) LocalFolder.set_child_obj_classes( [LocalFile, PartitionedLocalFile, LocalMask, LocalFolder, LocalStorage]) S3Storage.set_child_obj_classes([S3Bucket]) S3Bucket.set_parent_obj_classes([S3Storage]) S3Bucket.set_child_obj_classes([S3Folder, S3Object]) S3Folder.set_parent_obj_classes([S3Bucket, S3Folder]) S3Folder.set_child_obj_classes([S3Folder, S3Object])
records_batch = list() n = 0 for n, row in enumerate(rows): if use_fast_batch_method: current_record = {k: v for k, v in zip(columns, row)} records_batch.append(current_record) elif skip_errors: try: cur.execute(query, row) except TypeError or IndexError as e: # TypeError: not all arguments converted during string formatting self.log('Error line: {}'.format(str(row)), level=LoggingLevel.Debug, verbose=verbose) self.log('{}: {}'.format(e.__class__.__name__, e), level=LoggingLevel.Error) if (n + 1) % step == 0: if use_fast_batch_method: self.execute_batch(query, records_batch, step, cursor=cur) records_batch = list() if not progress.get_position(): progress.update(0) conn.commit() progress.update(n) gc.collect() if use_fast_batch_method: self.execute_batch(query, records_batch, step, cursor=cur) conn.commit() progress.finish(n) if return_count: return n ConnType.add_classes(PostgresDatabase)
def get_conn_type(self) -> ConnType: conn_type = ConnType.detect(self) if isinstance(conn_type, ConnType): return conn_type
if arg.is_defined(access_key) and hasattr(bucket, 'set_access_key'): bucket.set_access_key(access_key) if arg.is_defined(secret_key) and hasattr(bucket, 'set_secret_key'): bucket.set_secret_key(secret_key) else: bucket_class = self.get_default_child_obj_class() bucket = bucket_class( name=name, storage=self, access_key=arg.delayed_acquire(access_key, self.get_access_key), secret_key=arg.delayed_acquire(secret_key, self.get_secret_key), ) return bucket def get_resource_properties(self) -> dict: return dict( service_name=self.get_service_name(), endpoint_url=self.get_endpoint_url(), ) @staticmethod def _get_covert_props() -> tuple: return COVERT_PROPS ConnType.add_classes(S3Storage)
elif create_if_not_yet: return SingletonLogger() @staticmethod def get_default_child_type() -> ConnType: return ConnType.LocalFolder @classmethod def get_default_child_class(cls) -> Class: child_class = cls.get_default_child_type().get_class if not arg.is_defined(child_class): child_class = cls.get_default_child_obj_class() return child_class def get_folders(self) -> Iterable: for name, folder in self.get_children(): yield folder def folder(self, name, **kwargs) -> ConnectorInterface: return self.child(name, parent=self, **kwargs) def get_path_delimiter(self) -> str: return self._path_delimiter @staticmethod def get_full_path() -> str: return os.getcwd() ConnType.add_classes(LocalStorage)
if str_filters: message = 'Example with filters: {}'.format(str_filters) else: message = 'Example without any filters:' else: message = '[EXAMPLE_NOT_FOUND] Example with this filters not found: {}'.format( str_filters) stream_example = None item_example = self.get_one_item() if item_example: if example_str_len: for k, v in item_example.items(): v = str(v) if len(v) > example_str_len: fixed_len = example_str_len - len(CONTINUE_SYMBOL) if fixed_len < 0: fixed_len = 0 continue_symbol = CONTINUE_SYMBOL[:example_str_len] else: continue_symbol = CONTINUE_SYMBOL item_example[k] = str(v)[:fixed_len] + continue_symbol else: item_example = dict() stream_example = None message = '[EMPTY_DATA] There are no valid data in {}'.format( self.__repr__()) return item_example, stream_example, message ConnType.add_classes(Table)
def get_type_by_ext(ext, default: ConnType = ConnType.TextFile) -> ConnType: conn_class = DICT_EXT_TO_CLASS.get(ext) if conn_class: return ConnType(conn_class.__name__) else: return default
def get_existing_file_names(self) -> Iterable: for name in self.list_existing_names(): path = self.get_file_path(name) if os.path.isfile(path): yield name def list_existing_file_names(self) -> Iterable: return list(self.get_existing_file_names()) def all_existing_files(self, **kwargs) -> Iterable: for name in self.list_existing_file_names(): children = self.get_children() if name in children: yield children[name] else: yield self.file(name, **kwargs) def connect_all(self, inplace: bool = True, **kwargs) -> Union[list, Native]: files = list(self.all_existing_files(**kwargs)) if inplace: return files else: return self @staticmethod def _assume_native(obj) -> Native: return obj ConnType.add_classes(LocalFolder)
storage_class=DEFAULT_STORAGE_CLASS, encoding='utf8', verbose: bool = True): lines = self._get_lines_from_stream(stream) data = bytes('\n'.join(lines), encoding=encoding) response = self.put_object(data=data, storage_class=storage_class) is_done = response.get('ResponseMetadata').get( 'HTTPStatusCode') == HTTP_OK if is_done: return self else: raise ValueError(response) def to_stream(self, stream_type: Union[StreamType, str, Auto] = AUTO, **kwargs) -> Stream: stream_class = StreamType(stream_type).get_class() return stream_class(self.get_data(), **kwargs) def get_expected_count(self) -> Optional[int]: return self._count def get_count(self) -> Optional[int]: return None # not available property def is_empty(self) -> bool: return None # not available property ConnType.add_classes(S3Object)
def from_stream(self, stream: Stream, verbose: bool = True) -> Native: partition = self.get_partition() assert partition, 'suffix and partition must be defined' partition = partition.from_stream(stream, verbose=verbose) self.set_partition(partition, inplace=True) return self def to_stream( self, data: Union[Iterable, Auto] = AUTO, name: AutoName = AUTO, stream_type: Union[StreamType, Auto] = AUTO, ex: OptionalFields = None, **kwargs ) -> Stream: partition = self.get_partition() assert partition, 'suffix and partition must be defined' return partition.to_stream(data=data, name=name, stream_type=stream_type, ex=ex, **kwargs) FolderType.set_dict_classes( { FolderType.LocalFolder: LocalFolder, FolderType.LocalMask: LocalMask, FolderType.PartitionedLocalFile: PartitionedLocalFile, } ) ConnType.add_classes( LocalFolder, LocalMask, PartitionedLocalFile, )
return self.get_name() def get_folder(self, skip_missing: bool = False) -> HierarchicFolder: parent = self.get_parent() if not skip_missing: assert isinstance(parent, HierarchicFolder) return parent def get_folder_path(self) -> str: return self.get_folder().get_path() def get_mask_path(self) -> str: return self.get_folder_path() + self.get_path_delimiter() + self.get_mask() def get_path(self, with_mask: bool = True) -> str: if with_mask: return self.get_mask_path() else: return self.get_folder_path() def yield_existing_names(self) -> Iterable: for name in self.get_folder().list_existing_names(): if fnmatch.fnmatch(name, self.get_mask()): yield name def list_existing_names(self) -> list: return list(self.yield_existing_names()) ConnType.add_classes(LocalMask)
bucket = self.get_bucket() if hasattr( bucket, 'get_existing_object_props'): # isinstance(bucket, S3Bucket) return bucket.get_existing_object_props( prefix=self.get_path_in_bucket()) else: raise TypeError( 'Expected parent bucket as S3Bucket, got {}'.format(bucket)) def get_existing_object_names(self) -> Generator: for object_props in self.get_existing_object_props(): name = object_props.get('Key') if name: yield name def list_existing_names(self) -> list: return list(self.get_existing_object_names()) def get_existing_folder_names(self) -> Generator: for prefix_props in self.get_existing_prefixes(): name = prefix_props.get('Prefix') if name: yield name def list_existing_folder_names(self) -> list: return list(self.get_existing_folder_names()) ConnType.add_classes(S3Folder)
step: AutoCount = AUTO, **kwargs ) -> Stream: if Auto.is_defined(data): kwargs['data'] = data stream_type = Auto.delayed_acquire(stream_type, self.get_stream_type) assert not ex, 'ex-argument for LocalFile.to_stream() not supported (got {})'.format(ex) return self.to_stream_type(stream_type=stream_type, step=step, **kwargs) @classmethod def get_default_folder(cls) -> Connector: return cls._default_folder @classmethod def set_default_folder(cls, folder: ConnectorInterface) -> None: cls._default_folder = folder def _get_field_getter(self, field: UniKey, item_type: Union[ItemType, Auto] = AUTO, default=None): if self.get_struct(): if isinstance(field, ARRAY_TYPES): fields_positions = self.get_fields_positions(field) return lambda i: tuple([i[p] for p in fields_positions]) else: field_position = self.get_field_position(field) return lambda i: i[field_position] else: return super()._get_field_getter(field, item_type=item_type, default=default) ConnType.add_classes(LocalFile)
kwargs['Prefix'] = prefix return self.get_client().list_objects(**kwargs) def get_existing_object_names(self, prefix: Optional[str] = None) -> Generator: for object_props in self.get_existing_object_props(prefix=prefix).get( 'Contents', []): name = object_props.get('Key') if name: yield name def list_existing_names(self, prefix: Optional[str] = None) -> list: return list(self.get_existing_object_names(prefix=prefix)) def get_existing_folder_names(self, prefix: Optional[str] = None) -> Generator: for prefix_props in self.get_existing_prefixes(prefix=prefix): name = prefix_props.get('Prefix') if name: yield name def list_existing_folder_names(self, prefix: Optional[str] = None) -> list: return list(self.get_existing_folder_names(prefix=prefix)) @staticmethod def _get_covert_props() -> tuple: return COVERT_PROPS ConnType.add_classes(S3Bucket)