def __init__(self, config, plugin_config): Connector.__init__(self, config, plugin_config) self.sharepoint_list_title = self.config.get("sharepoint_list_title") self.auth_type = config.get('auth_type') logger.info('init:sharepoint_list_title={}, auth_type={}'.format(self.sharepoint_list_title, self.auth_type)) self.column_ids = {} self.column_names = {} self.expand_lookup = config.get("expand_lookup", False) self.column_to_expand = {} self.metadata_to_retrieve = config.get("metadata_to_retrieve", []) self.display_metadata = len(self.metadata_to_retrieve) > 0 self.client = SharePointClient(config)
def __init__(self, root, config, plugin_config): """ :param root: the root path for this provider :param config: the dict of the configuration of the object :param plugin_config: contains the plugin settings """ if len(root) > 0 and root[0] == '/': root = root[1:] self.root = root self.provider_root = "/" logger.info('init:root={}'.format(self.root)) self.client = SharePointClient(config)
def __init__(self, config, plugin_config): Connector.__init__(self, config, plugin_config) logger.info('SharePoint Online plugin connector v1.0.8') self.sharepoint_list_title = self.config.get("sharepoint_list_title") self.auth_type = config.get('auth_type') logger.info('init:sharepoint_list_title={}, auth_type={}'.format( self.sharepoint_list_title, self.auth_type)) self.column_ids = {} self.column_names = {} self.column_entity_property_name = {} self.columns_to_format = [] self.dss_column_name = {} self.column_sharepoint_type = {} self.expand_lookup = config.get("expand_lookup", False) self.metadata_to_retrieve = config.get("metadata_to_retrieve", []) advanced_parameters = config.get("advanced_parameters", False) self.write_mode = "create" if not advanced_parameters: self.max_workers = 1 # no multithread per default self.batch_size = 100 self.sharepoint_list_view_title = "" else: self.max_workers = config.get("max_workers", 1) self.batch_size = config.get("batch_size", 100) self.sharepoint_list_view_title = config.get( "sharepoint_list_view_title", "") logger.info( "init:advanced_parameters={}, max_workers={}, batch_size={}". format(advanced_parameters, self.max_workers, self.batch_size)) self.metadata_to_retrieve.append("Title") self.display_metadata = len(self.metadata_to_retrieve) > 0 self.client = SharePointClient(config) self.sharepoint_list_view_id = None if self.sharepoint_list_view_title: self.sharepoint_list_view_id = self.get_view_id( self.sharepoint_list_title, self.sharepoint_list_view_title)
class SharePointFSProvider(FSProvider): def __init__(self, root, config, plugin_config): """ :param root: the root path for this provider :param config: the dict of the configuration of the object :param plugin_config: contains the plugin settings """ if len(root) > 0 and root[0] == '/': root = root[1:] self.root = root self.provider_root = "/" logger.info('init:root={}'.format(self.root)) self.client = SharePointClient(config) # util methods def get_full_path(self, path): path_elts = [ self.provider_root, get_rel_path(self.root), get_rel_path(path) ] path_elts = [e for e in path_elts if len(e) > 0] return os.path.join(*path_elts) def close(self): logger.info('close') def stat(self, path): full_path = get_lnt_path(self.get_full_path(path)) logger.info('stat:path="{}", full_path="{}"'.format(path, full_path)) files = self.client.get_files(full_path) folders = self.client.get_folders(full_path) if has_sharepoint_items(files) or has_sharepoint_items(folders): return { DSSConstants.PATH: get_lnt_path(path), DSSConstants.SIZE: 0, DSSConstants.IS_DIRECTORY: True } path_to_item, item_name = os.path.split(full_path) files = self.client.get_files(path_to_item) folders = self.client.get_folders(path_to_item) file = extract_item_from(item_name, files) folder = extract_item_from(item_name, folders) if folder is not None: return { DSSConstants.PATH: get_lnt_path(path), DSSConstants.SIZE: 0, DSSConstants.LAST_MODIFIED: get_last_modified(folder), DSSConstants.IS_DIRECTORY: True } if file is not None: return { DSSConstants.PATH: get_lnt_path(path), DSSConstants.SIZE: get_size(file), DSSConstants.LAST_MODIFIED: get_last_modified(file), DSSConstants.IS_DIRECTORY: False } return None def set_last_modified(self, path, last_modified): full_path = self.get_full_path(path) logger.info('set_last_modified: path="{}", full_path="{}"'.format( path, full_path)) return False def browse(self, path): path = get_rel_path(path) full_path = get_lnt_path(self.get_full_path(path)) logger.info('browse:path="{}", full_path="{}"'.format(path, full_path)) folders = self.client.get_folders(full_path) files = self.client.get_files(full_path) children = [] for file in loop_sharepoint_items(files): children.append({ DSSConstants.FULL_PATH: get_lnt_path(os.path.join(path, get_name(file))), DSSConstants.EXISTS: True, DSSConstants.DIRECTORY: False, DSSConstants.SIZE: get_size(file), DSSConstants.LAST_MODIFIED: get_last_modified(file) }) for folder in loop_sharepoint_items(folders): children.append({ DSSConstants.FULL_PATH: get_lnt_path(os.path.join(path, get_name(folder))), DSSConstants.EXISTS: True, DSSConstants.DIRECTORY: True, DSSConstants.SIZE: 0, DSSConstants.LAST_MODIFIED: get_last_modified(folder) }) if len(children) > 0: return { DSSConstants.FULL_PATH: get_lnt_path(path), DSSConstants.EXISTS: True, DSSConstants.DIRECTORY: True, DSSConstants.CHILDREN: children } path_to_file, file_name = os.path.split(full_path) files = self.client.get_files(path_to_file) for file in loop_sharepoint_items(files): if get_name(file) == file_name: return { DSSConstants.FULL_PATH: get_lnt_path(path), DSSConstants.EXISTS: True, DSSConstants.SIZE: get_size(file), DSSConstants.LAST_MODIFIED: get_last_modified(file), DSSConstants.DIRECTORY: False } parent_path, item_name = os.path.split(full_path) folders = self.client.get_folders(parent_path) folder = extract_item_from(item_name, folders) if folder is None: ret = {DSSConstants.FULL_PATH: None, DSSConstants.EXISTS: False} else: ret = { DSSConstants.FULL_PATH: get_lnt_path(path), DSSConstants.EXISTS: True, DSSConstants.DIRECTORY: True, DSSConstants.SIZE: 0 } return ret def enumerate(self, path, first_non_empty): path = get_rel_path(path) full_path = get_lnt_path(self.get_full_path(path)) logger.info('enumerate:path={},fullpath={}'.format(path, full_path)) path_to_item, item_name = os.path.split(full_path) ret = self.list_recursive(path, full_path, first_non_empty) return ret def list_recursive(self, path, full_path, first_non_empty): paths = [] folders = self.client.get_folders(full_path) for folder in loop_sharepoint_items(folders): paths.extend( self.list_recursive( get_lnt_path(os.path.join(path, get_name(folder))), get_lnt_path(os.path.join(full_path, get_name(folder))), first_non_empty)) files = self.client.get_files(full_path) for file in loop_sharepoint_items(files): paths.append({ DSSConstants.PATH: get_lnt_path(os.path.join(path, get_name(file))), DSSConstants.LAST_MODIFIED: get_last_modified(file), DSSConstants.SIZE: get_size(file) }) if first_non_empty: return paths return paths def delete_recursive(self, path): full_path = self.get_full_path(path) logger.info('delete_recursive:path={},fullpath={}'.format( path, full_path)) assert_path_is_not_root(full_path) path_to_item, item_name = os.path.split(full_path.rstrip("/")) files = self.client.get_files(path_to_item) folders = self.client.get_folders(path_to_item) file = extract_item_from(item_name, files) folder = extract_item_from(item_name, folders) if file is not None and folder is not None: raise Exception( "Ambiguous naming with file / folder {}".format(item_name)) if file is not None: self.client.delete_file(get_lnt_path(full_path)) return 1 if folder is not None: self.client.delete_folder(get_lnt_path(full_path)) return 1 return 0 def move(self, from_path, to_path): full_from_path = self.get_full_path(from_path) full_to_path = self.get_full_path(to_path) logger.info('move:from={},to={}'.format(full_from_path, full_to_path)) response = self.client.move_file(full_from_path, full_to_path) return SharePointConstants.RESULTS_CONTAINER_V2 in response and SharePointConstants.MOVE_TO in response[ SharePointConstants.RESULTS_CONTAINER_V2] def read(self, path, stream, limit): full_path = self.get_full_path(path) logger.info('read:full_path={}'.format(full_path)) response = self.client.get_file_content(full_path) bio = BytesIO(response.content) shutil.copyfileobj(bio, stream) def write(self, path, stream): full_path = self.get_full_path(path) logger.info('write:path="{}", full_path="{}"'.format(path, full_path)) bio = BytesIO() shutil.copyfileobj(stream, bio) bio.seek(0) data = bio.read() create_path(self.client, full_path) response = self.client.write_file_content(full_path, data) logger.info("write:response={}".format(response))
class SharePointListsConnector(Connector): def __init__(self, config, plugin_config): Connector.__init__(self, config, plugin_config) logger.info('SharePoint Online plugin connector v1.0.8') self.sharepoint_list_title = self.config.get("sharepoint_list_title") self.auth_type = config.get('auth_type') logger.info('init:sharepoint_list_title={}, auth_type={}'.format( self.sharepoint_list_title, self.auth_type)) self.column_ids = {} self.column_names = {} self.column_entity_property_name = {} self.columns_to_format = [] self.dss_column_name = {} self.column_sharepoint_type = {} self.expand_lookup = config.get("expand_lookup", False) self.metadata_to_retrieve = config.get("metadata_to_retrieve", []) advanced_parameters = config.get("advanced_parameters", False) self.write_mode = "create" if not advanced_parameters: self.max_workers = 1 # no multithread per default self.batch_size = 100 self.sharepoint_list_view_title = "" else: self.max_workers = config.get("max_workers", 1) self.batch_size = config.get("batch_size", 100) self.sharepoint_list_view_title = config.get( "sharepoint_list_view_title", "") logger.info( "init:advanced_parameters={}, max_workers={}, batch_size={}". format(advanced_parameters, self.max_workers, self.batch_size)) self.metadata_to_retrieve.append("Title") self.display_metadata = len(self.metadata_to_retrieve) > 0 self.client = SharePointClient(config) self.sharepoint_list_view_id = None if self.sharepoint_list_view_title: self.sharepoint_list_view_id = self.get_view_id( self.sharepoint_list_title, self.sharepoint_list_view_title) def get_view_id(self, list_title, view_title): if not list_title: return None views = self.client.get_list_views(list_title) for view in views: if view.get("Title") == view_title: return view.get("Id") raise ValueError("View '{}' does not exist in list '{}'.".format( view_title, list_title)) def get_read_schema(self): logger.info('get_read_schema') sharepoint_columns = self.client.get_list_fields( self.sharepoint_list_title) dss_columns = [] self.column_ids = {} self.column_names = {} self.column_entity_property_name = {} self.columns_to_format = [] for column in sharepoint_columns: logger.info("get_read_schema:{}/{}/{}/{}/{}/{}".format( column[SharePointConstants.TITLE_COLUMN], column[SharePointConstants.TYPE_AS_STRING], column[SharePointConstants.STATIC_NAME], column[SharePointConstants.INTERNAL_NAME], column[SharePointConstants.ENTITY_PROPERTY_NAME], self.is_column_displayable(column))) if self.is_column_displayable(column): sharepoint_type = get_dss_type( column[SharePointConstants.TYPE_AS_STRING]) self.column_sharepoint_type[column[ SharePointConstants.STATIC_NAME]] = column[ SharePointConstants.TYPE_AS_STRING] if sharepoint_type is not None: dss_columns.append({ SharePointConstants.NAME_COLUMN: column[SharePointConstants.TITLE_COLUMN], SharePointConstants.TYPE_COLUMN: sharepoint_type }) self.column_ids[column[ SharePointConstants.STATIC_NAME]] = sharepoint_type self.column_names[column[ SharePointConstants.STATIC_NAME]] = column[ SharePointConstants.TITLE_COLUMN] self.column_entity_property_name[column[ SharePointConstants.STATIC_NAME]] = column[ SharePointConstants.ENTITY_PROPERTY_NAME] self.dss_column_name[column[ SharePointConstants.STATIC_NAME]] = column[ SharePointConstants.TITLE_COLUMN] self.dss_column_name[column[ SharePointConstants.ENTITY_PROPERTY_NAME]] = column[ SharePointConstants.TITLE_COLUMN] if sharepoint_type == "date": self.columns_to_format.append( (column[SharePointConstants.STATIC_NAME], sharepoint_type)) logger.info( "get_read_schema: Schema updated with {}".format(dss_columns)) return {SharePointConstants.COLUMNS: dss_columns} @staticmethod def get_column_lookup_field(column_static_name): if column_static_name in SharePointConstants.EXPENDABLES_FIELDS: return SharePointConstants.EXPENDABLES_FIELDS.get( column_static_name) return None def is_column_displayable(self, column): if self.display_metadata and (column['StaticName'] in self.metadata_to_retrieve): return True return (not column[SharePointConstants.HIDDEN_COLUMN]) @staticmethod def must_column_display_be_forced(column): return column[SharePointConstants.TYPE_AS_STRING] in ["Calculated"] @staticmethod def is_column_expendable(column): return (not column[SharePointConstants.HIDDEN_COLUMN]) \ and (not column[SharePointConstants.READ_ONLY_FIELD]) def generate_rows(self, dataset_schema=None, dataset_partitioning=None, partition_id=None, records_limit=-1): if self.column_ids == {}: self.get_read_schema() logger.info( 'generate_row:dataset_schema={}, dataset_partitioning={}, partition_id={}, records_limit={}' .format(dataset_schema, dataset_partitioning, partition_id, records_limit)) page = {} record_count = 0 is_first_run = True is_record_limit = records_limit > 0 while is_first_run or self.is_not_last_page(page): is_first_run = False page = self.client.get_list_items( self.sharepoint_list_title, params=self.get_requests_params(page)) rows = self.get_page_rows(page) for row in rows: row = self.format_row(row) yield column_ids_to_names(self.dss_column_name, row) record_count += len(rows) if is_record_limit and record_count >= records_limit: break @staticmethod def is_not_last_page(page): return "Row" in page and "NextHref" in page def get_requests_params(self, page): next_page_query_string = page.get("NextHref", "") next_page_requests_params = parse_query_string_to_dict( next_page_query_string) if self.sharepoint_list_view_id: next_page_requests_params.update( {"View": self.sharepoint_list_view_id}) return next_page_requests_params @staticmethod def get_page_rows(page): return page.get("Row", "") def format_row(self, row): for column_to_format, type_to_process in self.columns_to_format: value = row.get(column_to_format) if value: row[column_to_format] = sharepoint_to_dss_date(value) return row def get_writer(self, dataset_schema=None, dataset_partitioning=None, partition_id=None): assert_list_title(self.sharepoint_list_title) return SharePointListWriter(self.config, self, dataset_schema, dataset_partitioning, partition_id, max_workers=self.max_workers, batch_size=self.batch_size, write_mode=self.write_mode) def get_partitioning(self): logger.info('get_partitioning') raise Exception("Unimplemented") def list_partitions(self, partitioning): logger.info('list_partitions:partitioning={}'.format(partitioning)) return [] def partition_exists(self, partitioning, partition_id): logger.info('partition_exists:partitioning={}, partition_id={}'.format( partitioning, partition_id)) raise Exception("unimplemented") def get_records_count(self, partitioning=None, partition_id=None): logger.info( 'get_records_count:partitioning={}, partition_id={}'.format( partitioning, partition_id)) raise Exception("unimplemented")
class SharePointListsConnector(Connector): def __init__(self, config, plugin_config): Connector.__init__(self, config, plugin_config) self.sharepoint_list_title = self.config.get("sharepoint_list_title") self.auth_type = config.get('auth_type') logger.info('init:sharepoint_list_title={}, auth_type={}'.format(self.sharepoint_list_title, self.auth_type)) self.column_ids = {} self.column_names = {} self.expand_lookup = config.get("expand_lookup", False) self.column_to_expand = {} self.metadata_to_retrieve = config.get("metadata_to_retrieve", []) self.display_metadata = len(self.metadata_to_retrieve) > 0 self.client = SharePointClient(config) def get_read_schema(self): logger.info('get_read_schema') response = self.client.get_list_fields(self.sharepoint_list_title) if is_response_empty(response) or len(response[SharePointConstants.RESULTS_CONTAINER_V2][SharePointConstants.RESULTS]) < 1: return None columns = [] self.column_ids = {} self.column_names = {} has_expandable_columns = False for column in extract_results(response): if self.is_column_displayable(column): sharepoint_type = get_dss_type(column[SharePointConstants.TYPE_AS_STRING]) if sharepoint_type is not None: columns.append({ SharePointConstants.NAME_COLUMN: column[SharePointConstants.TITLE_COLUMN], SharePointConstants.TYPE_COLUMN: sharepoint_type }) self.column_ids[column[SharePointConstants.ENTITY_PROPERTY_NAME]] = sharepoint_type self.column_names[column[SharePointConstants.ENTITY_PROPERTY_NAME]] = column[SharePointConstants.TITLE_COLUMN] if self.expand_lookup: if column[SharePointConstants.TYPE_AS_STRING] == "Lookup": self.column_to_expand.update({column[SharePointConstants.STATIC_NAME]: column[SharePointConstants.LOOKUP_FIELD]}) has_expandable_columns = True else: self.column_to_expand.update({ column[SharePointConstants.STATIC_NAME]: self.get_column_lookup_field(column[SharePointConstants.STATIC_NAME]) }) if not has_expandable_columns: self.column_to_expand = {} return { SharePointConstants.COLUMNS: columns } @staticmethod def get_column_lookup_field(column_static_name): if column_static_name in SharePointConstants.EXPENDABLES_FIELDS: return SharePointConstants.EXPENDABLES_FIELDS.get(column_static_name) return None def is_column_displayable(self, column): if self.display_metadata and (column['StaticName'] in self.metadata_to_retrieve): return True return (not column[SharePointConstants.HIDDEN_COLUMN]) \ and (not column[SharePointConstants.READ_ONLY_FIELD]) def generate_rows(self, dataset_schema=None, dataset_partitioning=None, partition_id=None, records_limit=-1): if self.column_ids == {}: self.get_read_schema() logger.info('generate_row:dataset_schema={}, dataset_partitioning={}, partition_id={}'.format( dataset_schema, dataset_partitioning, partition_id )) response = self.client.get_list_all_items(self.sharepoint_list_title, self.column_to_expand) if is_response_empty(response): if is_error(response): raise Exception("Error: {}".format(response[SharePointConstants.ERROR_CONTAINER][SharePointConstants.MESSAGE][SharePointConstants.VALUE])) else: raise Exception("Error when interacting with SharePoint") if self.column_to_expand == {}: for item in extract_results(response): yield matched_item(self.column_ids, self.column_names, item) else: for item in extract_results(response): yield expand_matched_item(self.column_ids, self.column_names, item, column_to_expand=self.column_to_expand) def get_writer(self, dataset_schema=None, dataset_partitioning=None, partition_id=None): assert_list_title(self.sharepoint_list_title) return SharePointListWriter(self.config, self, dataset_schema, dataset_partitioning, partition_id) def get_partitioning(self): logger.info('get_partitioning') raise Exception("Unimplemented") def list_partitions(self, partitioning): logger.info('list_partitions:partitioning={}'.format(partitioning)) return [] def partition_exists(self, partitioning, partition_id): logger.info('partition_exists:partitioning={}, partition_id={}'.format(partitioning, partition_id)) raise Exception("unimplemented") def get_records_count(self, partitioning=None, partition_id=None): logger.info('get_records_count:partitioning={}, partition_id={}'.format(partitioning, partition_id)) raise Exception("unimplemented")