def get_window(self, last_time_epoch): start_time_epoch = last_time_epoch + self.MOVING_WINDOW_DELTA end_time_epoch = get_current_timestamp() - self.collection_config['END_TIME_EPOCH_OFFSET_SECONDS'] if end_time_epoch < start_time_epoch: # initially last_time_epoch is same as current_time_stamp so endtime becomes lesser than starttime end_time_epoch = get_current_timestamp() return start_time_epoch, end_time_epoch
def get_window(self, last_time_epoch): start_time_epoch = last_time_epoch + self.MOVING_WINDOW_DELTA end_time_epoch = get_current_timestamp() - self.collection_config['END_TIME_EPOCH_OFFSET_SECONDS'] MIN_REQUEST_WINDOW_LENGTH = 60 while not (end_time_epoch - start_time_epoch > MIN_REQUEST_WINDOW_LENGTH): # initially last_time_epoch is same as current_time_stamp so endtime becomes lesser than starttime time.sleep(MIN_REQUEST_WINDOW_LENGTH) end_time_epoch = get_current_timestamp() - self.collection_config['END_TIME_EPOCH_OFFSET_SECONDS'] return start_time_epoch, end_time_epoch
def get_state(self): key = self.get_key() if not self.kvstore.has_key(key): oldest, latest = self.get_window(get_current_timestamp()) self.save_state({"fetch_before": latest}) obj = self.kvstore.get(key) return obj
def __init__(self): self.project_dir = self.get_current_dir() super(NetskopeCollector, self).__init__(self.project_dir) self.api_config = self.config['Netskope'] self.netskope_conn = SessionPool(self.collection_config['MAX_RETRY'], self.collection_config['BACKOFF_FACTOR'], logger=self.log) self.netskope_session = self.netskope_conn.get_request_session() if self.collection_config['BACKFILL_DAYS'] > 90: raise Exception('BACKFILL_DAYS cannot be more than 90 days') self.DEFAULT_START_TIME_EPOCH = get_current_timestamp() - self.collection_config['BACKFILL_DAYS']*24*60*60
def save_state(self, cursor, users): self.kvstore.set(self.get_key(), cursor) if len(users) > 0: for user_data in users: self.kvstore.set( user_data["id"], { "updated": user_data["updated"], "lastSent": get_current_timestamp(), "user_name": user_data["name"] })
def get_state(self): key = self.get_key() if not self.kvstore.has_key(key): oldest, latest = self.get_window(get_current_timestamp()) self.save_state({"fetch_before": latest}) obj = self.kvstore.get(key) if self.kvstore.get("Access_logs_Previous_before_time", 0) == 0: self.kvstore.set("Access_logs_Previous_before_time", self.DEFAULT_START_TIME_EPOCH) return obj
def put_channels_data(self, channels, number, key, cursor, channels_to_be_sent): ids = self.batchsize_chunking(channels, channels_to_be_sent) for channels in ids: obj = { "ids": channels, "last_fetched": get_current_timestamp(), "cursor": cursor } self.kvstore.set(self.get_key() + key + str(number), obj) self.kvstore.set(key + "channel_page_number", number) number = number + 1
def _get_process_names(self): if not self.kvstore.has_key('processes'): self._set_processes() current_timestamp = get_current_timestamp(milliseconds=True) processes = self.kvstore.get('processes') if current_timestamp - processes['last_set_date'] > self.DATA_REFRESH_TIME or (len(processes['process_ids']) == 0): self._set_processes() processes = self.kvstore.get('processes') process_ids, hostnames = processes['process_ids'], processes['hostnames'] return process_ids, hostnames
def _get_database_names(self): if not self.kvstore.has_key('database_names'): process_ids, _ = self._get_process_names() self._set_database_names(process_ids) current_timestamp = get_current_timestamp(milliseconds=True) databases = self.kvstore.get('database_names') if current_timestamp - databases['last_set_date'] > self.DATA_REFRESH_TIME or (len(databases['values']) == 0): process_ids, _ = self._get_process_names() self._set_database_names(process_ids) database_names = self.kvstore.get('database_names')['values'] return database_names
def save_state(self, cursor, data): # Get frequent channels current page frequent_channel_page_number = self.kvstore.get( "frequent_channel_page_number") frequent_channel_page_number = 1 if frequent_channel_page_number is None else frequent_channel_page_number frequent_channels = self.kvstore.get(self.get_key() + self.frequent + str(frequent_channel_page_number)) frequent_channels = [] if frequent_channels is None or frequent_channels["ids"] is None \ else frequent_channels["ids"] # Get in-frequent channels current page in_frequent_channel_page_number = self.kvstore.get( "in_frequent_channel_page_number") in_frequent_channel_page_number = 1 if in_frequent_channel_page_number is None \ else in_frequent_channel_page_number infrequent_channels = self.kvstore.get( self.get_key() + self.in_frequent + str(in_frequent_channel_page_number)) infrequent_channels = [] if infrequent_channels is None or infrequent_channels["ids"] is None \ else infrequent_channels["ids"] # Update the frequent and infrequent list as per threshold provided by user if data is not None: for channel in data: channel_id = channel["channel_id"] channel_name = channel["channel_name"] messages_details = self.kvstore.get(channel_id) if self.enable_infrequent_channels \ and messages_details is not None \ and "fetch_oldest" in messages_details \ and get_current_timestamp() - messages_details.get("fetch_oldest") > \ self.infrequent_channel_threshold: infrequent_channels.append(channel_id + "#" + channel_name) else: frequent_channels.append(channel_id + "#" + channel_name) # segregate list into chunks of User provided chunks and save them in database. self.put_channels_data(frequent_channels, frequent_channel_page_number, self.frequent, cursor, self.frequent_channels_to_be_sent) self.put_channels_data(infrequent_channels, in_frequent_channel_page_number, self.in_frequent, cursor, self.infrequent_channels_to_be_sent)
def _transform_user_data(self, user_data): user_id = user_data["id"] email = "-" if "profile" in user_data and "email" in user_data["profile"]: email = user_data["profile"]["email"] # check if the data is present in key value store and send only if there is any change in user data. last_updated = None last_sent = None if self.kvstore.has_key(user_id): user = self.kvstore.get(user_id) last_updated = user["updated"] last_sent = user["lastSent"] # Send user data every 24 hours and meanwhile if updated send it if last_updated == user_data["updated"] and get_current_timestamp( ) - last_sent < self.data_refresh_time: self.log.debug("user already present") else: transformed_user_data = { "id": user_data.get("id"), "name": user_data.get("name"), "deleted": user_data.get("deleted", False), "real_name": user_data.get("real_name", "-"), "tz": user_data.get("tz", "-"), "tz_label": user_data.get("tz_label", "-"), "is_admin": user_data.get("is_admin", False), "is_owner": user_data.get("is_owner", False), "is_primary_owner": user_data.get("is_primary_owner", False), "is_restricted": user_data.get("is_restricted", False), "is_ultra_restricted": user_data.get("is_ultra_restricted", False), "is_bot": user_data.get("is_bot", False), "is_app_user": user_data.get("is_app_user", False), "updated": user_data.get("updated"), "has_2fa": user_data.get("has_2fa", False), "teamName": self.team_name, "email": email, "billable": self._billing_info(user_id), "logType": "UserLog" } return transformed_user_data return None
def set_new_end_epoch_time(self, event_type, start_time_epoch): params = { 'token': self.api_config['TOKEN'], 'limit': 1, 'starttime': start_time_epoch, 'endtime': get_current_timestamp(), 'skip': 0, 'type': event_type } url = self.get_endpoint_url(event_type) success, respjson = ClientMixin.make_request(url, method=self.api_config['FETCH_METHOD'], session=self.netskope_session, params=params, logger=self.log, TIMEOUT=self.collection_config['TIMEOUT'], MAX_RETRY=self.collection_config['MAX_RETRY'], BACKOFF_FACTOR=self.collection_config['BACKOFF_FACTOR']) start_date = convert_epoch_to_utc_date(params['starttime']) end_date = convert_epoch_to_utc_date(params['endtime']) if success and respjson["status"] == "success" and len(respjson["data"]) > 0: obj = self.set_fetch_state(event_type, start_time_epoch, respjson["data"][0]["timestamp"], respjson["data"][0]["timestamp"]) self.log.info(f'''Creating task for {event_type} from {start_date} to {end_date}''') return obj else: self.log.info(f'''No events are available for {event_type} from {start_date} to {end_date}''') return None
def build_task_params(self): self.log.info("Building task Parameters............") tasks = [] shuffle_tasks = [] self._set_basic_data() if 'LOG_TYPES' in self.api_config: # ************** USER LOGS PROCESS ************** if "USER_LOGS" in self.api_config['LOG_TYPES']: tasks.append( UsersDataAPI(self.kvstore, self.config, self.team_name, self.user_logs_data_refresh_time)) # ************** CHANNEL LOGS PROCESS ************** # Get frequent and infrequent channel list. Call infrequent channels based on last call time. call_in_frequent_channels = False # check if infrequent channels need to be called if self.enable_infrequent_channels and \ get_current_timestamp() - self.kvstore.get("in_frequent_channel_last_call_time", 0) \ > self.infrequent_channel_messages_fetch_time: self.log.info("Infrequent channels will be sent") call_in_frequent_channels = True if call_in_frequent_channels: channels = self._get_channel_ids("in_frequent_") if self.kvstore.get("in_frequent_channel_page_current_index") \ == self.kvstore.get("in_frequent_channel_page_number"): self.kvstore.set("in_frequent_channel_last_call_time", get_current_timestamp()) else: channels = self._get_channel_ids("frequent_") if "CHANNELS_MESSAGES_LOGS" in self.api_config['LOG_TYPES']: # Append all channels to shuffle tasks if channels is not None and "ids" in channels: channels_ids = channels["ids"] for channels_id in channels_ids: channel = channels_id.split("#") shuffle_tasks.append( ChannelsMessagesAPI(self.kvstore, self.config, channel[0], channel[1], self.team_name)) # ************** ACCESS LOGS PROCESS ************** if "ACCESS_LOGS" in self.api_config['LOG_TYPES']: page = self.kvstore.get("Access_logs_page_index", 1) next_page = page + self.PAGE_COUNTER max_page = min( self.kvstore.get("Access_logs_max_page", next_page), self.MAX_PAGE) if page >= max_page: self.kvstore.set("Access_logs_page_index", 1) self.kvstore.set( "Access_logs_Previous_before_time", self.kvstore.get("AccessLogs").get("fetch_before")) self.kvstore.delete("AccessLogs") self.kvstore.delete("Access_logs_max_page") else: for page_number in range(page, next_page): tasks.append( AccessLogsAPI(self.kvstore, self.config, page_number, self.team_name)) self.kvstore.set("Access_logs_page_index", next_page) # ************** AUDIT LOGS PROCESS ************** if "AUDIT_LOGS" in self.api_config[ 'LOG_TYPES'] and "AUDIT_LOG_URL" in self.api_config: self._get_audit_actions(self.api_config["AUDIT_LOG_URL"]) shuffle_tasks.append( AuditLogsAPI(self.kvstore, self.config, self.api_config["AUDIT_LOG_URL"], self.team_name, self.WorkspaceAuditActions, self.UserAuditActions, self.ChannelAuditActions, self.FileAuditActions, self.AppAuditActions, self.OtherAuditActions)) shuffle(shuffle_tasks) tasks.extend(shuffle_tasks) self.log.info("Building task Parameters Done.") return tasks
def _set_disk_names(self, process_ids): disks = self._get_all_disks_from_host(process_ids) self.kvstore.set("disk_names", {"last_set_date": get_current_timestamp(milliseconds=True), "values": disks})
def _set_processes(self): process_ids, hostnames, cluster_mapping = self._get_all_processes_from_project() self.kvstore.set("processes", {"last_set_date": get_current_timestamp(milliseconds=True), "process_ids": process_ids, "hostnames": hostnames}) self.kvstore.set("cluster_mapping", {"last_set_date": get_current_timestamp(milliseconds=True), "values": cluster_mapping})
def _set_database_names(self, process_ids): database_names = self._get_all_databases(process_ids) self.kvstore.set("database_names", {"last_set_date": get_current_timestamp(milliseconds=True), "values": database_names})