def _query_events( self, start, stop, visibility=[], forExport=False, time_range=None, index_name=INTROSPECTION_INDEX_NAME): ''' :param start: datetime.date :param stop: datetime.date can be the same as start :param visibility: :param forExport: true if this is for export and forces visibility values to visibility field on events :param time_range: {start,stop} the timecode range to limit event _time :param index_name: specifies which index to query for telemetry events (default: _introspection) :return: ''' if isinstance(start, datetime) or isinstance(stop, datetime): raise ("Requires_date_not_datetime") i = InstrumentationIndex(splunkrc=self._splunkrc, index_name=index_name) result = [] def process_events(events): for data in events: self._transform_data(data) result.append(data) profile = report.start_profiling() i.process_new_events( start, stop, process_events, visibility=visibility, time_range=time_range) report.report("query_telemetry", {"count": len(result)}, profile) if forExport: result = self._mark_visibility(result, visibility, 'manual') return result
def package_send(self, dateRange): """Auto send and log data. First we look at our index and check the start, stop, and visibility Next we query based on that, and send it. """ visibility = self.instance_profile.visibility if visibility is False: return False time_range = {"start": INST_EXECUTION_START_TIME, "stop": utcNow()} events = self._query_events(dateRange['start'], dateRange['stop'], visibility, False, time_range=time_range) if len(events) == 0: report.report('send-canceled', True) return False return self._send_package(events, dateRange['start'], dateRange['stop'], time_range=time_range)
def should_input_run(telemetry_conf_service): ''' Compares current time with the scheduledDay and scheduledHour to determine whether Input should execute or not :param telemetry_conf_service: Service for telemetry.conf :return: True if current time matched scheduling in telemetry.conf ''' scheduled_day = telemetry_conf_service.content.get('scheduledDay') scheduled_hour = telemetry_conf_service.content.get('scheduledHour') # Compare day and hour to time now now = datetime.datetime.now() should_run = False if ((scheduled_day == '*' or scheduled_day == str(now.weekday())) and (scheduled_hour == str(now.hour))): should_run = True report.report( 'schedule-data', { 'schedule': { 'day': scheduled_day, 'hour': scheduled_hour }, 'now': { 'day': str(now.weekday()), 'hour': str(now.hour) }, 'should_run': should_run }) return should_run
def prepare_collection(self): payload = self.service.request(KV_STORE_ENDPOINT["config"], method="GET", headers=self.headers, owner=self.splunkrc['owner'], app=self.splunkrc['app']) collections = self.parse_collection(payload) if COLLECTION_NAME not in collections: report.report('noKVStore', True)
def collect(self, dateRange): ''' collects events based on schema and indexes through EventIndexer :return: ''' self.event_indexer = EventIndexer() self._run_collection(dateRange, self.append_via_socket) report.report("events_indexed", self.event_indexer.count()) time.sleep(2)
def phase_1(self, dateRange, index_name=INTROSPECTION_INDEX_NAME): ''' phase 1 does not check visibility phase 1 runs data collection for data points marked for phase 1 in schema and indexes through EventIndexer :param: dateRange :param:index_name: collected data is indexed in the index_name that is provided (default _introspection) :return: ''' self.event_indexer = EventIndexer(index_name) self._run_collection(dateRange, self.append_via_socket, 1) report.report("events_indexed", self.event_indexer.count()) time.sleep(2)
def send_data(self, data, sleep=6): time.sleep(sleep) n = 100 groups = [data[i:i + n] for i in range(0, len(data), n)] profile = report.start_profiling() for group in groups: profile2 = report.start_profiling() length = self.send_events(group) report.report("SendData.log[]", { "count": len(group), "length": length }, profile2) report.report("SendData.count", {"count": len(data)}, profile)
def process_input_params(telemetry_conf_service, args): ''' Processes Input date range params and sets reportStartDate in telemetery.conf :param telemetry_conf_service: Service for telemetry.conf :param args: List of arguments passed to Scripted input :return: ''' report_start_date = telemetry_conf_service.content.get('reportStartDate') report.report('reportStartDate', report_start_date) normalize_date_range_params(args, report_start_date) validate_date_range(args) # update the reportStartDate before triggering input.py telemetry_conf_service.update({'reportStartDate': args.stop_date})
def eval_instance(self): req_list = [ { "requirements": [ 'indexer', '!search_peer', '!cluster_slave', '!shc_member', '!cluster_master', '!shc_captain', '!cluster_search_head' ], "label": "Single", "result": True }, { "requirements": ['cluster_master'], "label": "Cluster Master", "result": True }, { "requirements": ['!cluster_master', 'in_cluster'], "label": "Cluster Member not Cluster Master", "result": False }, # assume we are already not a cluster member from the above requirements { "requirements": ['shc_captain'], "label": "Search Captain in a non cluster", "result": True }, { "requirements": [ '!cluster_master', 'search_head', '!search_peer', '!in_cluster', '!cluster_slave', '!shc_member' ], "label": "Single Search Head", "result": True }, ] for req in req_list: result = evaluate_roles(self.roles, req["requirements"]) if result: report.report("instance.type", req["label"]) return req["result"] else: report.report("instance.type", None)
def run_phase_1_for_all_nodes(dateRange, schema_file): ''' phase 1 runs by all nodes to collect role based data and index to data to _introspection phase 1 does not check opt in options :param profile :param dateRange :param schema_file :return: None ''' report.report('Running_Phase[]', 1) ms = load_schema(schema_file, '*') sm = ScheduleManager(ms, dataPointFactory) # to add phase 1 and ignore visibility sm.phase_1(dateRange, INTROSPECTION_INDEX_NAME)
def run_input(dateRange): profile = get_instance_profile() pre_run(profile) logging.info("INST Started") try: run_phase_1_for_all_nodes(dateRange, INST_SCHEMA_FILE) except Exception as ex: report.report('input.error', str(ex)) if can_run_phase2(profile): try: run_phase_2(profile, dateRange, INST_SCHEMA_FILE) report.send() except Exception as ex: report.report('input.error', str(ex)) logging.info("INST Done")
def run_phase_2(profile, dateRange, schema_file): ''' phase 2 runs by lead node only and only runs when a deployment is opted in. sm.phase_2() does the following: - collects and indexes data points marked as phase = 2 - query data collected by phase = 1 and phase = 2 and send the data to splunkx :param profile :param dateRange :param schema_file :return: None ''' report.report('Running_Phase[]', 2) ms = load_schema(schema_file, profile.visibility) sm = ScheduleManager(ms, dataPointFactory) sleep(5) sm.phase_2(dateRange, INTROSPECTION_INDEX_NAME)
def validate_date_range(args): # SPL-153360 This can happen when the user has gone from no opt-in to some opt-in # on the same day of the scheduled collection, before the script has run. This is # due to the TelemetryHandler.cpp file, which detects the switch from no opt-in to # some opt-in and sets the reportStartDate to today. # # When the script finally runs, it has an default stop date of yesterday, but # reportStartDate sets the lower bound, which is today in that case. We do not # want to generate alarming error messages, so just log the occurrence and exit # gracefully. if args.stop_date < args.start_date: report.report( 'collection-canceled', { 'reason': 'Start date is after stop date. No data to collect.', 'start_date': args.start_date, 'stop_date': args.stop_date }) exit(0)
def pre_run(profile): ''' Do some work to keep the environment healthy - sync deployment id from CM to current node - sync salt from CM to current node - retry transaction if retryTransaction in telemtry.conf is not empty :param profile :return: None ''' profile.sync_deployment_id() profile.sync_salt() # if current node is a single search head or a seach head captain in SHC env # call profile.retry_transaction() to retry sync telemetry conf values to Cluster Master # TelemetryHandler.cpp SHOULD sync telemetry.conf to CM already whenever any value is changed. # This is to handle the case when it failed if (profile.roles.get('search_head') and not profile.roles.get('shc_member')) \ or profile.roles.get('sh_captain'): report.report("profile.retry_transaction", True) profile.retry_transaction()
def _collect_class_data(self, classDef, dateRange, callback=None): ''' run data collections and call callbacks on it. ''' try: if not isinstance(dateRange, dict): dateRange = {"start": dateRange} dateRange["stop"] = dateRange.get("stop") or dateRange.get("start") if isinstance(dateRange["start"], datetime) or isinstance( dateRange["stop"], datetime): raise "Requires_date_not_datetime" dataPoints = classDef.getDataPoints() for dataPoint in dataPoints: report.start_profiling() dataPointResult = self.collect_data_point(dataPoint, dateRange) if hasattr(dataPointResult, 'job'): try: report.report( 'components[]', { "component": classDef.component, "runDuration": float(dataPointResult.job["runDuration"]), "scanCount": int(dataPointResult.job["scanCount"]), "resultCount": int(dataPointResult.job["resultCount"]), "isFailed": dataPointResult.job["isFailed"], "searchProviders": len(dataPointResult.job["searchProviders"]), "sid": dataPointResult.job["sid"] }) except: report.report( 'components[]', { "component": classDef.component, "error": "could not log report" }) dataPointResult = [ self.data_point_results_transform(classDef, event, dateRange) for event in dataPointResult ] callback(dataPointResult) except Exception as e: report.report('exceptions[]', str(e))
def _send_package( self, events, start, stop, method='auto', visibility=None, time_range=None): """Sending package and log it. If offline (or quickdraw not available), log failed to the index. events = events from index start = from datetime picker stop = from datetime picker method = ['auto', 'manual'] visibility = [anonymous, license] """ visibility = visibility or self._get_visibility(events) count = len(events) self.sl.send_attempted(start, stop, visibility=visibility, time_range=time_range, method=method, count=count) try: if self.deliverySchema.url: events = self._mark_visibility(events, visibility, method) self.sd.send_data(events) self.sl.send_completed(start, stop, visibility=visibility, time_range=time_range, method=method, count=count) else: raise Exception('Quickdraw is not available') except binding.HTTPError as e: logger.error(e) self.sl.send_failed(start, stop, visibility=visibility, time_range=time_range, method=method, count=None) report.report("send_failed", True) raise except Exception as e: logger.error(e) self.sl.send_failed(start, stop, visibility=visibility, time_range=time_range, method=method, count=None) report.report("send_failed", True) raise except Exception: logger.error("Unknown Error") self.sl.send_failed(start, stop, visibility=visibility, time_range=time_range, method=method, count=None) report.report("send_failed", True) raise
def can_run_phase2(profile): ''' determine if current node can run phase 2 the requirement is that the current node needs to be the lead node and that the deployment is opted-in (profile.visibility is not empty) :param profile :return: Boolean ''' if is_lead_node(profile.roles) is False: report.report("lead node", False) return False report.report("lead node", True) report.report("profile.visibility", profile.visibility) if not profile.visibility: report.report("not-opted-in", True) return False if not profile.opt_in_is_up_to_date(): report.report("opt-in-out-of-date-license-only", True) report.report("profile.cluster_mode", profile.profile.get('cluster_mode')) report.report("profile.roles", profile.roles) if profile.server_info.get('product_type') == "splunk": report.report("instance.type", 'Cloud') return False return True
import logging from splunk_instrumentation.report import report from time import sleep from splunk_instrumentation.schedule_manager import ScheduleManager from splunk_instrumentation.dataPoints.data_point import dataPointFactory from splunk_instrumentation.metrics.metrics_schema import load_schema from splunk_instrumentation.metrics.instance_profile import get_instance_profile, is_lead_node from splunk_instrumentation.constants import INTROSPECTION_INDEX_NAME logging.root.setLevel(INST_DEBUG_LEVEL) formatter = logging.Formatter('%(levelname)s %(message)s') handler = logging.StreamHandler(stream=sys.stderr) handler.setFormatter(formatter) logging.root.addHandler(handler) report.report('executionID', INST_EXECUTION_ID) def pre_run(profile): ''' Do some work to keep the environment healthy - sync deployment id from CM to current node - sync salt from CM to current node - retry transaction if retryTransaction in telemtry.conf is not empty :param profile :return: None ''' profile.sync_deployment_id() profile.sync_salt()
def _send_package(self, events, start, stop, method='auto', visibility=None, time_range=None): """Sending package and log it. If offline (or quickdraw not available), log failed to the index. If on cloud, log events to splunk_instrumentation_cloud.log, instead of sending to quickdraw events = events from index start = from datetime picker stop = from datetime picker method = ['auto', 'manual'] visibility = [anonymous, license] """ visibility = visibility or self._get_visibility(events) count = len(events) self.sl.send_attempted(start, stop, visibility=visibility, time_range=time_range, method=method, count=count) try: events = self._mark_visibility(events, visibility, method) if self.is_cloud: self.sd.bundle_DTOs(events) for event in events: dataLogger.info(json.dumps(event, default=json_serial)) else: if self.deliverySchema.url: self.sd.send_data(events) else: raise Exception('Quickdraw is not available') self.sl.send_completed(start, stop, visibility=visibility, time_range=time_range, method=method, count=count) except binding.HTTPError as e: logger.error(e) self.sl.send_failed(start, stop, visibility=visibility, time_range=time_range, method=method, count=None) report.report("send_failed", True) raise except Exception as e: logger.error(e) self.sl.send_failed(start, stop, visibility=visibility, time_range=time_range, method=method, count=None) report.report("send_failed", True) raise except Exception: logger.error("Unknown Error") self.sl.send_failed(start, stop, visibility=visibility, time_range=time_range, method=method, count=None) report.report("send_failed", True) raise