예제 #1
0
 def get_child_class_by_type(type_name: Union[ConnType, FolderType, str]) -> Class:
     if isinstance(type_name, str):
         try:
             conn_type = ConnType(type_name)
         except ValueError:
             conn_type = FolderType(type_name)
     child_class = conn_type.get_class()
     return child_class
예제 #2
0
 def get_child_class_by_name_and_type(self, name: str, filetype: Union[ConnType, ContentType, Auto] = AUTO) -> Class:
     if arg.is_defined(filetype):
         return ConnType(filetype).get_class()
     else:
         supposed_type = self.get_type_by_name(name)
         if supposed_type:
             return supposed_type.get_class()
예제 #3
0
def get_class(conn_type: Union[ConnType, Type, str]) -> Type:
    if conn_type in CONN_CLASSES:
        return conn_type
    elif isinstance(conn_type, str):
        conn_type = ConnType(conn_type)
    message = 'conn_type must be an instance of ConnType (but {} as type {} received)'
    assert isinstance(conn_type, ConnType), TypeError(
        message.format(conn_type, type(conn_type)))
    return conn_type.get_class()
예제 #4
0
 def folder(self, name: str, folder_type: Union[ConnType, FolderType, Auto] = AUTO, **kwargs) -> ConnectorInterface:
     if not arg.is_defined(folder_type):
         folder_type = self.get_type_by_name(name)
         if folder_type == ConnType.LocalFile:
             folder_type = ConnType.LocalFolder
     folder_class = ConnType(folder_type).get_class()
     folder_obj = folder_class(name, parent=self, **kwargs)
     self.add_folder(folder_obj)
     return folder_obj
예제 #5
0
def get_dialect_type(database_type) -> Optional[DialectType]:
    conn_type_name = ConnType(database_type).get_value()
    return DICT_DB_TO_DIALECT_TYPE.get(conn_type_name)
예제 #6
0
FILE_CLASS_NAMES = tuple([c.__name__ for c in FILE_CLASSES])
DICT_EXT_TO_CLASS = {
    c.get_default_file_extension.__get__(c): c
    for c in CONN_CLASSES
    if c in FILE_CLASSES and not c.__name__.startswith('Abstract')
}
DICT_DB_TO_DIALECT_TYPE = {
    PostgresDatabase.__name__: DialectType.Postgres,
    ClickhouseDatabase.__name__: DialectType.Clickhouse,
}
DB_CLASS_NAMES = DICT_DB_TO_DIALECT_TYPE.keys()

_context: Context = None
_local_storage: Optional[LocalStorage] = None
PostgresDatabase.cx = _context
ConnType.set_dict_classes(DICT_CONN_CLASSES, skip_missing=True)

AbstractStorage.set_parent_obj_classes([ContextInterface])
AbstractDatabase.set_child_obj_classes([Table])
Table.set_parent_obj_classes(
    [AbstractDatabase, PostgresDatabase, ClickhouseDatabase])
LocalStorage.set_child_obj_classes(
    [LocalFolder, LocalMask, LocalFile, PartitionedLocalFile])
LocalFolder.set_parent_obj_classes([LocalStorage, LocalFolder])
LocalFolder.set_child_obj_classes(
    [LocalFile, PartitionedLocalFile, LocalMask, LocalFolder, LocalStorage])
S3Storage.set_child_obj_classes([S3Bucket])
S3Bucket.set_parent_obj_classes([S3Storage])
S3Bucket.set_child_obj_classes([S3Folder, S3Object])
S3Folder.set_parent_obj_classes([S3Bucket, S3Folder])
S3Folder.set_child_obj_classes([S3Folder, S3Object])
예제 #7
0
        records_batch = list()
        n = 0
        for n, row in enumerate(rows):
            if use_fast_batch_method:
                current_record = {k: v for k, v in zip(columns, row)}
                records_batch.append(current_record)
            elif skip_errors:
                try:
                    cur.execute(query, row)
                except TypeError or IndexError as e:  # TypeError: not all arguments converted during string formatting
                    self.log('Error line: {}'.format(str(row)), level=LoggingLevel.Debug, verbose=verbose)
                    self.log('{}: {}'.format(e.__class__.__name__, e), level=LoggingLevel.Error)
            if (n + 1) % step == 0:
                if use_fast_batch_method:
                    self.execute_batch(query, records_batch, step, cursor=cur)
                    records_batch = list()
                if not progress.get_position():
                    progress.update(0)
                conn.commit()
                progress.update(n)
                gc.collect()
        if use_fast_batch_method:
            self.execute_batch(query, records_batch, step, cursor=cur)
        conn.commit()
        progress.finish(n)
        if return_count:
            return n


ConnType.add_classes(PostgresDatabase)
예제 #8
0
 def get_conn_type(self) -> ConnType:
     conn_type = ConnType.detect(self)
     if isinstance(conn_type, ConnType):
         return conn_type
예제 #9
0
파일: s3_storage.py 프로젝트: az365/snakee
            if arg.is_defined(access_key) and hasattr(bucket,
                                                      'set_access_key'):
                bucket.set_access_key(access_key)
            if arg.is_defined(secret_key) and hasattr(bucket,
                                                      'set_secret_key'):
                bucket.set_secret_key(secret_key)
        else:
            bucket_class = self.get_default_child_obj_class()
            bucket = bucket_class(
                name=name,
                storage=self,
                access_key=arg.delayed_acquire(access_key,
                                               self.get_access_key),
                secret_key=arg.delayed_acquire(secret_key,
                                               self.get_secret_key),
            )
        return bucket

    def get_resource_properties(self) -> dict:
        return dict(
            service_name=self.get_service_name(),
            endpoint_url=self.get_endpoint_url(),
        )

    @staticmethod
    def _get_covert_props() -> tuple:
        return COVERT_PROPS


ConnType.add_classes(S3Storage)
예제 #10
0
        elif create_if_not_yet:
            return SingletonLogger()

    @staticmethod
    def get_default_child_type() -> ConnType:
        return ConnType.LocalFolder

    @classmethod
    def get_default_child_class(cls) -> Class:
        child_class = cls.get_default_child_type().get_class
        if not arg.is_defined(child_class):
            child_class = cls.get_default_child_obj_class()
        return child_class

    def get_folders(self) -> Iterable:
        for name, folder in self.get_children():
            yield folder

    def folder(self, name, **kwargs) -> ConnectorInterface:
        return self.child(name, parent=self, **kwargs)

    def get_path_delimiter(self) -> str:
        return self._path_delimiter

    @staticmethod
    def get_full_path() -> str:
        return os.getcwd()


ConnType.add_classes(LocalStorage)
예제 #11
0
파일: table.py 프로젝트: az365/snakee
            if str_filters:
                message = 'Example with filters: {}'.format(str_filters)
            else:
                message = 'Example without any filters:'
        else:
            message = '[EXAMPLE_NOT_FOUND] Example with this filters not found: {}'.format(
                str_filters)
            stream_example = None
            item_example = self.get_one_item()
        if item_example:
            if example_str_len:
                for k, v in item_example.items():
                    v = str(v)
                    if len(v) > example_str_len:
                        fixed_len = example_str_len - len(CONTINUE_SYMBOL)
                        if fixed_len < 0:
                            fixed_len = 0
                            continue_symbol = CONTINUE_SYMBOL[:example_str_len]
                        else:
                            continue_symbol = CONTINUE_SYMBOL
                        item_example[k] = str(v)[:fixed_len] + continue_symbol
        else:
            item_example = dict()
            stream_example = None
            message = '[EMPTY_DATA] There are no valid data in {}'.format(
                self.__repr__())
        return item_example, stream_example, message


ConnType.add_classes(Table)
예제 #12
0
def get_type_by_ext(ext, default: ConnType = ConnType.TextFile) -> ConnType:
    conn_class = DICT_EXT_TO_CLASS.get(ext)
    if conn_class:
        return ConnType(conn_class.__name__)
    else:
        return default
예제 #13
0
    def get_existing_file_names(self) -> Iterable:
        for name in self.list_existing_names():
            path = self.get_file_path(name)
            if os.path.isfile(path):
                yield name

    def list_existing_file_names(self) -> Iterable:
        return list(self.get_existing_file_names())

    def all_existing_files(self, **kwargs) -> Iterable:
        for name in self.list_existing_file_names():
            children = self.get_children()
            if name in children:
                yield children[name]
            else:
                yield self.file(name, **kwargs)

    def connect_all(self, inplace: bool = True, **kwargs) -> Union[list, Native]:
        files = list(self.all_existing_files(**kwargs))
        if inplace:
            return files
        else:
            return self

    @staticmethod
    def _assume_native(obj) -> Native:
        return obj


ConnType.add_classes(LocalFolder)
예제 #14
0
                    storage_class=DEFAULT_STORAGE_CLASS,
                    encoding='utf8',
                    verbose: bool = True):
        lines = self._get_lines_from_stream(stream)
        data = bytes('\n'.join(lines), encoding=encoding)
        response = self.put_object(data=data, storage_class=storage_class)
        is_done = response.get('ResponseMetadata').get(
            'HTTPStatusCode') == HTTP_OK
        if is_done:
            return self
        else:
            raise ValueError(response)

    def to_stream(self,
                  stream_type: Union[StreamType, str, Auto] = AUTO,
                  **kwargs) -> Stream:
        stream_class = StreamType(stream_type).get_class()
        return stream_class(self.get_data(), **kwargs)

    def get_expected_count(self) -> Optional[int]:
        return self._count

    def get_count(self) -> Optional[int]:
        return None  # not available property

    def is_empty(self) -> bool:
        return None  # not available property


ConnType.add_classes(S3Object)
예제 #15
0
    def from_stream(self, stream: Stream, verbose: bool = True) -> Native:
        partition = self.get_partition()
        assert partition, 'suffix and partition must be defined'
        partition = partition.from_stream(stream, verbose=verbose)
        self.set_partition(partition, inplace=True)
        return self

    def to_stream(
            self,
            data: Union[Iterable, Auto] = AUTO, name: AutoName = AUTO,
            stream_type: Union[StreamType, Auto] = AUTO, ex: OptionalFields = None,
            **kwargs
    ) -> Stream:
        partition = self.get_partition()
        assert partition, 'suffix and partition must be defined'
        return partition.to_stream(data=data, name=name, stream_type=stream_type, ex=ex, **kwargs)


FolderType.set_dict_classes(
    {
        FolderType.LocalFolder: LocalFolder,
        FolderType.LocalMask: LocalMask,
        FolderType.PartitionedLocalFile: PartitionedLocalFile,
    }
)
ConnType.add_classes(
    LocalFolder,
    LocalMask,
    PartitionedLocalFile,
)
예제 #16
0
파일: local_mask.py 프로젝트: az365/snakee
        return self.get_name()

    def get_folder(self, skip_missing: bool = False) -> HierarchicFolder:
        parent = self.get_parent()
        if not skip_missing:
            assert isinstance(parent, HierarchicFolder)
        return parent

    def get_folder_path(self) -> str:
        return self.get_folder().get_path()

    def get_mask_path(self) -> str:
        return self.get_folder_path() + self.get_path_delimiter() + self.get_mask()

    def get_path(self, with_mask: bool = True) -> str:
        if with_mask:
            return self.get_mask_path()
        else:
            return self.get_folder_path()

    def yield_existing_names(self) -> Iterable:
        for name in self.get_folder().list_existing_names():
            if fnmatch.fnmatch(name, self.get_mask()):
                yield name

    def list_existing_names(self) -> list:
        return list(self.yield_existing_names())


ConnType.add_classes(LocalMask)
예제 #17
0
파일: s3_folder.py 프로젝트: az365/snakee
        bucket = self.get_bucket()
        if hasattr(
                bucket,
                'get_existing_object_props'):  # isinstance(bucket, S3Bucket)
            return bucket.get_existing_object_props(
                prefix=self.get_path_in_bucket())
        else:
            raise TypeError(
                'Expected parent bucket as S3Bucket, got {}'.format(bucket))

    def get_existing_object_names(self) -> Generator:
        for object_props in self.get_existing_object_props():
            name = object_props.get('Key')
            if name:
                yield name

    def list_existing_names(self) -> list:
        return list(self.get_existing_object_names())

    def get_existing_folder_names(self) -> Generator:
        for prefix_props in self.get_existing_prefixes():
            name = prefix_props.get('Prefix')
            if name:
                yield name

    def list_existing_folder_names(self) -> list:
        return list(self.get_existing_folder_names())


ConnType.add_classes(S3Folder)
예제 #18
0
            step: AutoCount = AUTO,
            **kwargs
    ) -> Stream:
        if Auto.is_defined(data):
            kwargs['data'] = data
        stream_type = Auto.delayed_acquire(stream_type, self.get_stream_type)
        assert not ex, 'ex-argument for LocalFile.to_stream() not supported (got {})'.format(ex)
        return self.to_stream_type(stream_type=stream_type, step=step, **kwargs)

    @classmethod
    def get_default_folder(cls) -> Connector:
        return cls._default_folder

    @classmethod
    def set_default_folder(cls, folder: ConnectorInterface) -> None:
        cls._default_folder = folder

    def _get_field_getter(self, field: UniKey, item_type: Union[ItemType, Auto] = AUTO, default=None):
        if self.get_struct():
            if isinstance(field, ARRAY_TYPES):
                fields_positions = self.get_fields_positions(field)
                return lambda i: tuple([i[p] for p in fields_positions])
            else:
                field_position = self.get_field_position(field)
                return lambda i: i[field_position]
        else:
            return super()._get_field_getter(field, item_type=item_type, default=default)


ConnType.add_classes(LocalFile)
예제 #19
0
파일: s3_bucket.py 프로젝트: az365/snakee
            kwargs['Prefix'] = prefix
        return self.get_client().list_objects(**kwargs)

    def get_existing_object_names(self,
                                  prefix: Optional[str] = None) -> Generator:
        for object_props in self.get_existing_object_props(prefix=prefix).get(
                'Contents', []):
            name = object_props.get('Key')
            if name:
                yield name

    def list_existing_names(self, prefix: Optional[str] = None) -> list:
        return list(self.get_existing_object_names(prefix=prefix))

    def get_existing_folder_names(self,
                                  prefix: Optional[str] = None) -> Generator:
        for prefix_props in self.get_existing_prefixes(prefix=prefix):
            name = prefix_props.get('Prefix')
            if name:
                yield name

    def list_existing_folder_names(self, prefix: Optional[str] = None) -> list:
        return list(self.get_existing_folder_names(prefix=prefix))

    @staticmethod
    def _get_covert_props() -> tuple:
        return COVERT_PROPS


ConnType.add_classes(S3Bucket)