class SharePointListsConnector(Connector): def __init__(self, config, plugin_config): Connector.__init__(self, config, plugin_config) logger.info('SharePoint Online plugin connector v1.0.8') self.sharepoint_list_title = self.config.get("sharepoint_list_title") self.auth_type = config.get('auth_type') logger.info('init:sharepoint_list_title={}, auth_type={}'.format( self.sharepoint_list_title, self.auth_type)) self.column_ids = {} self.column_names = {} self.column_entity_property_name = {} self.columns_to_format = [] self.dss_column_name = {} self.column_sharepoint_type = {} self.expand_lookup = config.get("expand_lookup", False) self.metadata_to_retrieve = config.get("metadata_to_retrieve", []) advanced_parameters = config.get("advanced_parameters", False) self.write_mode = "create" if not advanced_parameters: self.max_workers = 1 # no multithread per default self.batch_size = 100 self.sharepoint_list_view_title = "" else: self.max_workers = config.get("max_workers", 1) self.batch_size = config.get("batch_size", 100) self.sharepoint_list_view_title = config.get( "sharepoint_list_view_title", "") logger.info( "init:advanced_parameters={}, max_workers={}, batch_size={}". format(advanced_parameters, self.max_workers, self.batch_size)) self.metadata_to_retrieve.append("Title") self.display_metadata = len(self.metadata_to_retrieve) > 0 self.client = SharePointClient(config) self.sharepoint_list_view_id = None if self.sharepoint_list_view_title: self.sharepoint_list_view_id = self.get_view_id( self.sharepoint_list_title, self.sharepoint_list_view_title) def get_view_id(self, list_title, view_title): if not list_title: return None views = self.client.get_list_views(list_title) for view in views: if view.get("Title") == view_title: return view.get("Id") raise ValueError("View '{}' does not exist in list '{}'.".format( view_title, list_title)) def get_read_schema(self): logger.info('get_read_schema') sharepoint_columns = self.client.get_list_fields( self.sharepoint_list_title) dss_columns = [] self.column_ids = {} self.column_names = {} self.column_entity_property_name = {} self.columns_to_format = [] for column in sharepoint_columns: logger.info("get_read_schema:{}/{}/{}/{}/{}/{}".format( column[SharePointConstants.TITLE_COLUMN], column[SharePointConstants.TYPE_AS_STRING], column[SharePointConstants.STATIC_NAME], column[SharePointConstants.INTERNAL_NAME], column[SharePointConstants.ENTITY_PROPERTY_NAME], self.is_column_displayable(column))) if self.is_column_displayable(column): sharepoint_type = get_dss_type( column[SharePointConstants.TYPE_AS_STRING]) self.column_sharepoint_type[column[ SharePointConstants.STATIC_NAME]] = column[ SharePointConstants.TYPE_AS_STRING] if sharepoint_type is not None: dss_columns.append({ SharePointConstants.NAME_COLUMN: column[SharePointConstants.TITLE_COLUMN], SharePointConstants.TYPE_COLUMN: sharepoint_type }) self.column_ids[column[ SharePointConstants.STATIC_NAME]] = sharepoint_type self.column_names[column[ SharePointConstants.STATIC_NAME]] = column[ SharePointConstants.TITLE_COLUMN] self.column_entity_property_name[column[ SharePointConstants.STATIC_NAME]] = column[ SharePointConstants.ENTITY_PROPERTY_NAME] self.dss_column_name[column[ SharePointConstants.STATIC_NAME]] = column[ SharePointConstants.TITLE_COLUMN] self.dss_column_name[column[ SharePointConstants.ENTITY_PROPERTY_NAME]] = column[ SharePointConstants.TITLE_COLUMN] if sharepoint_type == "date": self.columns_to_format.append( (column[SharePointConstants.STATIC_NAME], sharepoint_type)) logger.info( "get_read_schema: Schema updated with {}".format(dss_columns)) return {SharePointConstants.COLUMNS: dss_columns} @staticmethod def get_column_lookup_field(column_static_name): if column_static_name in SharePointConstants.EXPENDABLES_FIELDS: return SharePointConstants.EXPENDABLES_FIELDS.get( column_static_name) return None def is_column_displayable(self, column): if self.display_metadata and (column['StaticName'] in self.metadata_to_retrieve): return True return (not column[SharePointConstants.HIDDEN_COLUMN]) @staticmethod def must_column_display_be_forced(column): return column[SharePointConstants.TYPE_AS_STRING] in ["Calculated"] @staticmethod def is_column_expendable(column): return (not column[SharePointConstants.HIDDEN_COLUMN]) \ and (not column[SharePointConstants.READ_ONLY_FIELD]) def generate_rows(self, dataset_schema=None, dataset_partitioning=None, partition_id=None, records_limit=-1): if self.column_ids == {}: self.get_read_schema() logger.info( 'generate_row:dataset_schema={}, dataset_partitioning={}, partition_id={}, records_limit={}' .format(dataset_schema, dataset_partitioning, partition_id, records_limit)) page = {} record_count = 0 is_first_run = True is_record_limit = records_limit > 0 while is_first_run or self.is_not_last_page(page): is_first_run = False page = self.client.get_list_items( self.sharepoint_list_title, params=self.get_requests_params(page)) rows = self.get_page_rows(page) for row in rows: row = self.format_row(row) yield column_ids_to_names(self.dss_column_name, row) record_count += len(rows) if is_record_limit and record_count >= records_limit: break @staticmethod def is_not_last_page(page): return "Row" in page and "NextHref" in page def get_requests_params(self, page): next_page_query_string = page.get("NextHref", "") next_page_requests_params = parse_query_string_to_dict( next_page_query_string) if self.sharepoint_list_view_id: next_page_requests_params.update( {"View": self.sharepoint_list_view_id}) return next_page_requests_params @staticmethod def get_page_rows(page): return page.get("Row", "") def format_row(self, row): for column_to_format, type_to_process in self.columns_to_format: value = row.get(column_to_format) if value: row[column_to_format] = sharepoint_to_dss_date(value) return row def get_writer(self, dataset_schema=None, dataset_partitioning=None, partition_id=None): assert_list_title(self.sharepoint_list_title) return SharePointListWriter(self.config, self, dataset_schema, dataset_partitioning, partition_id, max_workers=self.max_workers, batch_size=self.batch_size, write_mode=self.write_mode) def get_partitioning(self): logger.info('get_partitioning') raise Exception("Unimplemented") def list_partitions(self, partitioning): logger.info('list_partitions:partitioning={}'.format(partitioning)) return [] def partition_exists(self, partitioning, partition_id): logger.info('partition_exists:partitioning={}, partition_id={}'.format( partitioning, partition_id)) raise Exception("unimplemented") def get_records_count(self, partitioning=None, partition_id=None): logger.info( 'get_records_count:partitioning={}, partition_id={}'.format( partitioning, partition_id)) raise Exception("unimplemented")
class SharePointListsConnector(Connector): def __init__(self, config, plugin_config): Connector.__init__(self, config, plugin_config) self.sharepoint_list_title = self.config.get("sharepoint_list_title") self.auth_type = config.get('auth_type') logger.info('init:sharepoint_list_title={}, auth_type={}'.format(self.sharepoint_list_title, self.auth_type)) self.column_ids = {} self.column_names = {} self.expand_lookup = config.get("expand_lookup", False) self.column_to_expand = {} self.metadata_to_retrieve = config.get("metadata_to_retrieve", []) self.display_metadata = len(self.metadata_to_retrieve) > 0 self.client = SharePointClient(config) def get_read_schema(self): logger.info('get_read_schema') response = self.client.get_list_fields(self.sharepoint_list_title) if is_response_empty(response) or len(response[SharePointConstants.RESULTS_CONTAINER_V2][SharePointConstants.RESULTS]) < 1: return None columns = [] self.column_ids = {} self.column_names = {} has_expandable_columns = False for column in extract_results(response): if self.is_column_displayable(column): sharepoint_type = get_dss_type(column[SharePointConstants.TYPE_AS_STRING]) if sharepoint_type is not None: columns.append({ SharePointConstants.NAME_COLUMN: column[SharePointConstants.TITLE_COLUMN], SharePointConstants.TYPE_COLUMN: sharepoint_type }) self.column_ids[column[SharePointConstants.ENTITY_PROPERTY_NAME]] = sharepoint_type self.column_names[column[SharePointConstants.ENTITY_PROPERTY_NAME]] = column[SharePointConstants.TITLE_COLUMN] if self.expand_lookup: if column[SharePointConstants.TYPE_AS_STRING] == "Lookup": self.column_to_expand.update({column[SharePointConstants.STATIC_NAME]: column[SharePointConstants.LOOKUP_FIELD]}) has_expandable_columns = True else: self.column_to_expand.update({ column[SharePointConstants.STATIC_NAME]: self.get_column_lookup_field(column[SharePointConstants.STATIC_NAME]) }) if not has_expandable_columns: self.column_to_expand = {} return { SharePointConstants.COLUMNS: columns } @staticmethod def get_column_lookup_field(column_static_name): if column_static_name in SharePointConstants.EXPENDABLES_FIELDS: return SharePointConstants.EXPENDABLES_FIELDS.get(column_static_name) return None def is_column_displayable(self, column): if self.display_metadata and (column['StaticName'] in self.metadata_to_retrieve): return True return (not column[SharePointConstants.HIDDEN_COLUMN]) \ and (not column[SharePointConstants.READ_ONLY_FIELD]) def generate_rows(self, dataset_schema=None, dataset_partitioning=None, partition_id=None, records_limit=-1): if self.column_ids == {}: self.get_read_schema() logger.info('generate_row:dataset_schema={}, dataset_partitioning={}, partition_id={}'.format( dataset_schema, dataset_partitioning, partition_id )) response = self.client.get_list_all_items(self.sharepoint_list_title, self.column_to_expand) if is_response_empty(response): if is_error(response): raise Exception("Error: {}".format(response[SharePointConstants.ERROR_CONTAINER][SharePointConstants.MESSAGE][SharePointConstants.VALUE])) else: raise Exception("Error when interacting with SharePoint") if self.column_to_expand == {}: for item in extract_results(response): yield matched_item(self.column_ids, self.column_names, item) else: for item in extract_results(response): yield expand_matched_item(self.column_ids, self.column_names, item, column_to_expand=self.column_to_expand) def get_writer(self, dataset_schema=None, dataset_partitioning=None, partition_id=None): assert_list_title(self.sharepoint_list_title) return SharePointListWriter(self.config, self, dataset_schema, dataset_partitioning, partition_id) def get_partitioning(self): logger.info('get_partitioning') raise Exception("Unimplemented") def list_partitions(self, partitioning): logger.info('list_partitions:partitioning={}'.format(partitioning)) return [] def partition_exists(self, partitioning, partition_id): logger.info('partition_exists:partitioning={}, partition_id={}'.format(partitioning, partition_id)) raise Exception("unimplemented") def get_records_count(self, partitioning=None, partition_id=None): logger.info('get_records_count:partitioning={}, partition_id={}'.format(partitioning, partition_id)) raise Exception("unimplemented")