Esempio n. 1
0
class snowIncidentCommand(GeneratingCommand):

    assigned = Option(require=True, validate=validators.List())
    assigned_by = Option(require=False)
    daysAgo = Option(require=False, validate=validators.Integer(0))
    active = Option(require=True, validate=validators.Boolean())
    limit = Option(require=False, validate=validators.Integer(0))
    env = Option(require=False)

    def generate(self):
        self.logger.debug('snowIncidentCommand: %s', self)
        searchinfo = self.metadata.searchinfo
        app = AppConf(searchinfo.splunkd_uri, searchinfo.session_key)
        env = self.env.lower() if self.env else 'production'
        conf = app.get_config('getsnow')[env]
        assigned_by = 'assignment_group' if self.assigned_by == 'group' else 'assigned_to'
        assignment = {'table': 'sys_user_group', 'field': 'name'} if self.assigned_by == 'group' else {'table': 'sys_user', 'field': 'user_name'}
        limit = self.limit if self.limit else 10000
        snowincident = snow(conf['url'], conf['user'], conf['password'])
        sids = snowincident.getsysid(assignment['table'], assignment['field'], self.assigned)
        filters = snowincident.filterbuilder(assigned_by, sids)
        glide = 'sys_created_on>=javascript:gs.daysAgo({})'.format(self.daysAgo) if self.daysAgo else ''
        url = snowincident.reqencode(filters, table='incident', glide_system=glide, active=self.active, sysparm_limit=limit)
        for record in snowincident.getrecords(url):
            record = snowincident.updatevalue(record, sourcetype='snow:incident')
            record['_raw'] = json.dumps(record)
            record = dictexpand(record)
            yield record
Esempio n. 2
0
class UDPCommand(GeneratingCommand):
    #count = Option(require=True, validate=validators.Integer())
    port = Option(require=True, validate=validators.Integer())
    message = Option(require=True)
    ip = Option(require=True)

    def generate(self):
	IPADDR = self.ip
	PORTNUM = self.port
	PACKETDATA = self.message

	s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM, 0)
	s.settimeout(5)
	try:
    # connect the socket, think of it as connecting the cable to the address location
		s.connect((IPADDR, PORTNUM))
 
    # send the command
		s.send(PACKETDATA)
	except:
    		pass
 
	data = s.recv(4096).decode("UTF-8")
#for i in range(1, self.count + 1):
	#yield {'_time': time.time(), 'event_no': i, '_raw': data }
	yield {'_time': time.time(), '_raw': data }

# Recieve UDP response
#data = s.recv(4096)

# close the socket
	s.close()
Esempio n. 3
0
class functCommand(StreamingCommand):

    fieldname = Option(
        doc='''
        **Syntax:** **fieldname=***<fieldname>*
        **Description:** Name of the field that will hold the match count''',
        require=True, validate=validators.Fieldname())

    char_limit = Option(
        doc='''
        **Syntax:** **char_limit=***<positive int>*
        **Description:** Determines how many characters in a field to process. Default is 150''',
        require=False, validate=validators.Integer(maximum=10000), default=150)

    def stream(self, records):
        self.logger.debug('functCommand: %s', self)  # logs command line
        for record in records:
            for fieldname in self.fieldnames:
		char_limit = self.char_limit
		x = record[fieldname][0:char_limit]
		if re.search(r'\W{1}', record[fieldname]):
    		   x = re.sub(r'\w', "", x)
    		   x = re.sub(r'\s', "_", x)
    		   record[self.fieldname] = x
  		else:
    		   x = re.sub(r'[B-Z]', "A", x)
    		   x = re.sub(r'[b-z]', "a", x)
    		   x = re.sub(r'[0-8]', "9", x)
		   x = re.sub(r'\s', "w", x)
		   record[self.fieldname] = x
            yield record
Esempio n. 4
0
class gentestCommand(GeneratingCommand):
    count = Option(require=True, validate=validators.Integer())

    def generate(self):
        output_dict = {}

        for i in range(self.count):
            output_dict['_time'] = time.time()
            output_dict['_raw'] = "HELLO WORLD!"
            yield output_dict
class GenerateTextCommand(GeneratingCommand):

    count = Option(require=True, validate=validators.Integer(0))
    text = Option(require=True)

    def generate(self):
        text = self.text
        self.logger.debug("Generating %d events with text %s" % (self.count, self.text))
        for i in range(1, self.count + 1):
            yield {'_serial': i, '_time': time.time(), '_raw': six.text_type(i) + '. ' + text}
class StubbedReportingCommand(ReportingCommand):
    boolean = Option(
        doc='''
        **Syntax:** **boolean=***<value>*
        **Description:** A boolean value''',
        require=False, validate=validators.Boolean())

    duration = Option(
        doc='''
        **Syntax:** **duration=***<value>*
        **Description:** A length of time''',
        validate=validators.Duration())

    fieldname = Option(
        doc='''
        **Syntax:** **fieldname=***<value>*
        **Description:** Name of a field''',
        validate=validators.Fieldname())

    file = Option(
        doc='''
        **Syntax:** **file=***<value>*
        **Description:** Name of a file''',
        validate=validators.File(mode='r'))

    integer = Option(
        doc='''
        **Syntax:** **integer=***<value>*
        **Description:** An integer value''',
        validate=validators.Integer())

    optionname = Option(
        doc='''
        **Syntax:** **optionname=***<value>*
        **Description:** The name of an option (used internally)''',
        validate=validators.OptionName())

    regularexpression = Option(
        doc='''
        **Syntax:** **regularexpression=***<value>*
        **Description:** Regular expression pattern to match''',
        validate=validators.RegularExpression())

    set = Option(
        doc='''
        **Syntax:** **set=***<value>*
        **Description:** Regular expression pattern to match''',
        validate=validators.Set("foo", "bar", "test"))

    @Configuration()
    def map(self, records):
        pass

    def reduce(self, records):
        pass
Esempio n. 7
0
 def __init__(self, **kwargs):
     super(ValidateTestSize, self).__init__()
     int_args = {
         'minimum': kwargs.get('int_minimum'),
         'maximum': kwargs.get('int_maximum'),
     }
     float_args = {
         'minimum': kwargs.get('float_minimum'),
         'maximum': kwargs.get('float_maximum'),
     }
     self.validate_int = validators.Integer(**int_args)
     self.validate_float = ValidateFloat(**float_args)
Esempio n. 8
0
class SleepCommand(StreamingCommand):
    time = Option(doc='''
        **Syntax:** **dnstimeout=***<dnstimeout>*
        **Description:** time to sleep in seconds''',
                  require=True,
                  validate=validators.Integer())

    def stream(self, records):

        self.logging_level = "INFO"
        sleeptime = self.time
        # if dnsserver is not set use default form configfile one
        if self.time is None:
            sleeptime = 1

        time.sleep(sleeptime)

        for record in records:

            yield record
Esempio n. 9
0
class snowUserCommand(GeneratingCommand):

    user_name = Option(require=True, validate=validators.List())
    daysAgo = Option(require=False, validate=validators.Integer(0))
    env = Option(require=False)

    def generate(self):
        self.logger.debug('snowuser: %s', self)
        searchinfo = self.metadata.searchinfo
        app = AppConf(searchinfo.splunkd_uri, searchinfo.session_key)
        env = self.env.lower() if self.env else 'production'
        conf = app.get_config('getsnow')[env]
        snowuser = snow(conf['url'], conf['user'], conf['password'])
        filters = snowuser.filterbuilder('user_name', self.user_name)
        query_string = snowuser.reqencode(filters, 'sys_user')
        user_sid = []
        for record in snowuser.getrecords(query_string):
            user_sid.append(record['sys_id'])
            record = snowuser.updatevalue(record, sourcetype='snow:user')
            record['_raw'] = json.dumps(record)
            record = dictexpand(record)
            yield record
        filters = snowuser.filterbuilder('assigned_to', user_sid)
        url = snowuser.reqencode(filters, table='alm_asset')
        for record in snowuser.getrecords(url):
            record = snowuser.updatevalue(record, sourcetype='snow:asset')
            record['_raw'] = json.dumps(record)
            record = dictexpand(record)
            yield record
        filters = snowuser.filterbuilder('opened_by', user_sid)
        glide = 'sys_created_on>=javascript:gs.daysAgo({})'.format(
            self.daysAgo) if self.daysAgo else ''
        url = snowuser.reqencode(filters, table='incident', glide_system=glide)
        for record in snowuser.getrecords(url):
            record = snowuser.updatevalue(record, sourcetype='snow:incident')
            record['_raw'] = json.dumps(record)
            record = dictexpand(record)
            yield record
class DatabricksJobCommand(GeneratingCommand):
    """Custom Command of databricksjob."""

    # Take input from user using parameters
    job_id = Option(require=True, validate=validators.Integer(0))
    notebook_params = Option(require=False)

    def generate(self):
        """Generating custom command."""
        _LOGGER.info("Initiating databricksjob command")
        kv_log_info = {
            "user": self._metadata.searchinfo.username,
            "created_time": time.time(),
            "param": self._metadata.searchinfo.args,
            "run_id": "-",
            "output_url": "-",
            "result_url": "-",
            "command_status": "Failed",
            "error": "-",
        }

        session_key = self._metadata.searchinfo.session_key

        try:
            # Get job details
            client = com.DatabricksClient(session_key)

            payload = {
                "job_id": self.job_id,
            }

            _LOGGER.info(
                "Fetching job details before submitting the execution.")
            response = client.databricks_api("get",
                                             const.GET_JOB_ENDPOINT,
                                             args=payload)

            job_settings = response["settings"]
            tasks_list = list(set(job_settings.keys()))

            if "notebook_task" not in tasks_list:
                raise Exception(
                    "Given job does not contains the notebook task. Hence terminating the execution."
                )
            if ("spark_jar_task" in tasks_list
                    or "spark_python_task" in tasks_list
                    or "spark_submit_task" in tasks_list):
                raise Exception(
                    "Given job contains one of the following task in addition to the notebook task. "
                    "(spark_jar_task, spark_python_task and spark_submit_task) "
                    "Hence terminating the execution.")

            # Request for executing the job
            _LOGGER.info("Preparing request body for execution.")
            payload["notebook_params"] = utils.format_to_json_parameters(
                self.notebook_params)

            _LOGGER.info("Submitting job for execution.")
            response = client.databricks_api("post",
                                             const.EXECUTE_JOB_ENDPOINT,
                                             data=payload)

            kv_log_info.update(response)
            run_id = response["run_id"]
            _LOGGER.info("Successfully executed the job with ID: {}.".format(
                self.job_id))

            # Request to get the run_id details
            _LOGGER.info("Fetching details for run ID: {}.".format(run_id))
            args = {"run_id": run_id}
            response = client.databricks_api("get",
                                             const.GET_RUN_ENDPOINT,
                                             args=args)

            output_url = response.get("run_page_url")
            if output_url:
                result_url = output_url.rstrip("/") + "/resultsOnly"
                kv_log_info["output_url"] = output_url
                kv_log_info["result_url"] = result_url
                kv_log_info["command_status"] = "Success"
                _LOGGER.info("Output url returned: {}".format(output_url))

        except Exception as e:
            _LOGGER.error(e)
            _LOGGER.error(traceback.format_exc())
            kv_log_info["error"] = str(e)
            self.write_error(str(e))
            exit(1)

        finally:
            updated_kv_info = utils.update_kv_store_collection(
                self._metadata.searchinfo.splunkd_uri,
                const.KV_COLLECTION_NAME_EXECUTE_JOB,
                session_key,
                kv_log_info,
            )

        yield updated_kv_info
Esempio n. 11
0
class GenerateTextCommand(GeneratingCommand):

    account = Option(require=True)
    opt = Option(require=True, validate=validators.Integer(0))

    def jira_url(self, url, endpoint):
        # For Splunk Cloud vetting, the URL must start with https://
        if not url.startswith("https://"):
            return 'https://%s/rest/api/latest/%s' % (url, endpoint)

        else:
            return '%s/rest/api/latest/%s' % (url, endpoint)

    def get_jira_info(self, jira_headers, url, ssl_verify, proxy_dict,
                      endpoint):
        response = requests.get(url=self.jira_url(url, endpoint),
                                headers=jira_headers,
                                verify=ssl_verify,
                                proxies=proxy_dict)
        return response.json()

    def generate(self):
        storage_passwords = self.service.storage_passwords

        # global configuration
        conf_file = "ta_service_desk_simple_addon_settings"
        confs = self.service.confs[str(conf_file)]
        jira_passthrough_mode = None
        proxy_enabled = "0"
        proxy_url = None
        proxy_dict = None
        proxy_username = None
        for stanza in confs:
            if stanza.name == "advanced_configuration":
                for key, value in stanza.content.items():
                    if key == "jira_passthrough_mode":
                        jira_passthrough_mode = value
            if stanza.name == "proxy":
                for key, value in stanza.content.items():
                    if key == "proxy_enabled":
                        proxy_enabled = value
                    if key == "proxy_port":
                        proxy_port = value
                    if key == "proxy_rdns":
                        proxy_rdns = value
                    if key == "proxy_type":
                        proxy_type = value
                    if key == "proxy_url":
                        proxy_url = value
                    if key == "proxy_username":
                        proxy_username = value

        if proxy_enabled == "1":

            # get proxy password
            if proxy_username:
                proxy_password = None

                # get proxy password, if any
                credential_realm = '__REST_CREDENTIAL__#TA-jira-service-desk-simple-addon#configs/conf-ta_service_desk_simple_addon_settings'
                for credential in storage_passwords:
                    if credential.content.get('realm') == str(credential_realm) \
                        and credential.content.get('clear_password').find('proxy_password') > 0:
                        proxy_password = json.loads(
                            credential.content.get('clear_password')).get(
                                'proxy_password')
                        break

                if proxy_type == 'http':
                    proxy_dict = {
                        "http":
                        "http://" + proxy_username + ":" + proxy_password +
                        "@" + proxy_url + ":" + proxy_port,
                        "https":
                        "https://" + proxy_username + ":" + proxy_password +
                        "@" + proxy_url + ":" + proxy_port
                    }
                else:
                    proxy_dict = {
                        "http":
                        str(proxy_type) + "://" + proxy_username + ":" +
                        proxy_password + "@" + proxy_url + ":" + proxy_port,
                        "https":
                        str(proxy_type) + "://" + proxy_username + ":" +
                        proxy_password + "@" + proxy_url + ":" + proxy_port
                    }

            else:
                proxy_dict = {
                    "http": proxy_url + ":" + proxy_port,
                    "https": proxy_url + ":" + proxy_port
                }

        # get all acounts
        accounts = []
        conf_file = "ta_service_desk_simple_addon_account"
        confs = self.service.confs[str(conf_file)]
        for stanza in confs:
            # get all accounts
            for name in stanza.name:
                accounts.append(stanza.name)
                break

        # run
        if self.account == '_all':

            for account in accounts:

                # account configuration
                jira_ssl_certificate_validation = None
                jira_ssl_certificate_path = None
                username = None
                password = None

                conf_file = "ta_service_desk_simple_addon_account"
                confs = self.service.confs[str(conf_file)]
                for stanza in confs:

                    if stanza.name == str(account):
                        for key, value in stanza.content.items():
                            if key == "jira_url":
                                jira_url = value
                            if key == "jira_ssl_certificate_validation":
                                jira_ssl_certificate_validation = value
                            if key == "jira_ssl_certificate_path":
                                jira_ssl_certificate_path = value
                            if key == 'auth_type':
                                auth_type = value
                            if key == 'jira_auth_mode':
                                jira_auth_mode = value
                            if key == 'username':
                                username = value

                # end of get configuration

                credential_username = str(account) + '``splunk_cred_sep``1'
                credential_realm = '__REST_CREDENTIAL__#TA-jira-service-desk-simple-addon#configs/conf-ta_service_desk_simple_addon_account'
                for credential in storage_passwords:
                    if credential.content.get('username') == str(credential_username) \
                        and credential.content.get('realm') == str(credential_realm) \
                        and credential.content.get('clear_password').find('password') > 0:
                        password = json.loads(
                            credential.content.get('clear_password')).get(
                                'password')
                        break

                # Build the authentication header for JIRA
                if str(jira_auth_mode) == 'basic':
                    authorization = username + ':' + password
                    b64_auth = base64.b64encode(
                        authorization.encode()).decode()
                    jira_headers = {
                        'Authorization': 'Basic %s' % b64_auth,
                        'Content-Type': 'application/json',
                    }
                elif str(jira_auth_mode) == 'pat':
                    jira_headers = {
                        'Authorization': 'Bearer %s' % str(password),
                        'Content-Type': 'application/json',
                    }

                if jira_ssl_certificate_validation:
                    if jira_ssl_certificate_validation == '0':
                        ssl_verify = False
                    elif jira_ssl_certificate_validation == '1' and jira_ssl_certificate_path and os.path.isfile(
                            jira_ssl_certificate_path):
                        ssl_verify = str(jira_ssl_certificate_path)
                    elif jira_ssl_certificate_validation == '1':
                        ssl_verify = True

                if self.opt == 1:
                    for project in self.get_jira_info(jira_headers, jira_url,
                                                      ssl_verify, proxy_dict,
                                                      'project'):
                        usercreds = {
                            '_time':
                            time.time(),
                            'account':
                            str(account),
                            'key':
                            project.get('key'),
                            'key_projects':
                            project.get('key') + " - " + project.get('name')
                        }
                        yield usercreds

                if self.opt == 2:
                    for issue in self.get_jira_info(jira_headers, jira_url,
                                                    ssl_verify, proxy_dict,
                                                    'issuetype'):
                        usercreds = {
                            '_time': time.time(),
                            'account': str(account),
                            'issues': issue.get('name')
                        }
                        yield usercreds

                if self.opt == 3:
                    for priority in self.get_jira_info(jira_headers, jira_url,
                                                       ssl_verify, proxy_dict,
                                                       'priority'):
                        usercreds = {
                            '_time': time.time(),
                            'account': str(account),
                            'priorities': priority.get('name')
                        }
                        yield usercreds

                if self.opt == 4:
                    for status in self.get_jira_info(jira_headers, jira_url,
                                                     ssl_verify, proxy_dict,
                                                     'status'):
                        result = {
                            '_time':
                            time.time(),
                            'account':
                            str(account),
                            'status':
                            status.get('name'),
                            'statusCategory':
                            status.get('statusCategory').get('name')
                        }
                        yield result

        else:

            # account configuration
            isfound = False
            jira_ssl_certificate_validation = None
            jira_ssl_certificate_path = None
            username = None
            password = None

            conf_file = "ta_service_desk_simple_addon_account"
            confs = self.service.confs[str(conf_file)]
            for stanza in confs:

                if stanza.name == str(self.account):
                    isfound = True
                    for key, value in stanza.content.items():
                        if key == "jira_url":
                            jira_url = value
                        if key == "jira_ssl_certificate_validation":
                            jira_ssl_certificate_validation = value
                        if key == "jira_ssl_certificate_path":
                            jira_ssl_certificate_path = value
                        if key == 'auth_type':
                            auth_type = value
                        if key == 'jira_auth_mode':
                            jira_auth_mode = value
                        if key == 'username':
                            username = value

            # end of get configuration

            # Stop here if we cannot find the submitted account
            if not isfound:
                self.logger.fatal(
                    'This acount has not been configured on this instance, cannot proceed!: %s',
                    self)

            # else get the password
            else:
                credential_username = str(
                    self.account) + '``splunk_cred_sep``1'
                credential_realm = '__REST_CREDENTIAL__#TA-jira-service-desk-simple-addon#configs/conf-ta_service_desk_simple_addon_account'
                for credential in storage_passwords:
                    if credential.content.get('username') == str(credential_username) \
                        and credential.content.get('realm') == str(credential_realm) \
                        and credential.content.get('clear_password').find('password') > 0:
                        password = json.loads(
                            credential.content.get('clear_password')).get(
                                'password')
                        break

            # Build the authentication header for JIRA
            if str(jira_auth_mode) == 'basic':
                authorization = username + ':' + password
                b64_auth = base64.b64encode(authorization.encode()).decode()
                jira_headers = {
                    'Authorization': 'Basic %s' % b64_auth,
                    'Content-Type': 'application/json',
                }
            elif str(jira_auth_mode) == 'pat':
                jira_headers = {
                    'Authorization': 'Bearer %s' % str(password),
                    'Content-Type': 'application/json',
                }

            if jira_ssl_certificate_validation:
                if jira_ssl_certificate_validation == '0':
                    ssl_verify = False
                elif jira_ssl_certificate_validation == '1' and jira_ssl_certificate_path and os.path.isfile(
                        jira_ssl_certificate_path):
                    ssl_verify = str(jira_ssl_certificate_path)
                elif jira_ssl_certificate_validation == '1':
                    ssl_verify = True

            if self.opt == 1:
                for project in self.get_jira_info(jira_headers, jira_url,
                                                  ssl_verify, proxy_dict,
                                                  'project'):
                    usercreds = {
                        '_time':
                        time.time(),
                        'account':
                        str(self.account),
                        'key':
                        project.get('key'),
                        'key_projects':
                        project.get('key') + " - " + project.get('name')
                    }
                    yield usercreds

            if self.opt == 2:
                for issue in self.get_jira_info(jira_headers, jira_url,
                                                ssl_verify, proxy_dict,
                                                'issuetype'):
                    usercreds = {
                        '_time': time.time(),
                        'account': str(self.account),
                        'issues': issue.get('name')
                    }
                    yield usercreds

            if self.opt == 3:
                for priority in self.get_jira_info(jira_headers, jira_url,
                                                   ssl_verify, proxy_dict,
                                                   'priority'):
                    usercreds = {
                        '_time': time.time(),
                        'account': str(self.account),
                        'priorities': priority.get('name')
                    }
                    yield usercreds

            if self.opt == 4:
                for status in self.get_jira_info(jira_headers, jira_url,
                                                 ssl_verify, proxy_dict,
                                                 'status'):
                    result = {
                        '_time': time.time(),
                        'account': str(self.account),
                        'status': status.get('name'),
                        'statusCategory':
                        status.get('statusCategory').get('name')
                    }
                    yield result
Esempio n. 12
0
class DatabricksQueryCommand(GeneratingCommand):
    """Custom Command of databricksquery."""

    # Take input from user using parameters
    cluster = Option(require=False)
    query = Option(require=True)
    command_timeout = Option(require=False, validate=validators.Integer(minimum=1))

    def generate(self):
        """Generating custom command."""
        _LOGGER.info("Initiating databricksquery command")
        command_timeout_in_seconds = self.command_timeout or const.COMMAND_TIMEOUT_IN_SECONDS
        _LOGGER.info("Setting command timeout to {} seconds.".format(command_timeout_in_seconds))

        # Get session key
        session_key = self._metadata.searchinfo.session_key

        try:
            # Fetching cluster name
            self.cluster = self.cluster or utils.get_databricks_configs().get("cluster_name")
            if not self.cluster:
                raise Exception(
                    "Databricks cluster is required to execute this custom command. "
                    "Provide a cluster parameter or configure the cluster in the TA's configuration page."
                )

            # Request to get cluster ID
            _LOGGER.info("Requesting cluster ID for cluster: {}.".format(self.cluster))
            cluster_id = com.get_cluster_id(session_key, self.cluster)
            _LOGGER.info("Cluster ID received: {}.".format(cluster_id))

            # Request to create context
            _LOGGER.info("Creating Context in cluster.")
            payload = {"language": "sql", "clusterId": cluster_id}
            response = com.databricks_api(
                "post", const.CONTEXT_ENDPOINT, session_key, data=payload
            )

            context_id = response.get("id")
            _LOGGER.info("Context created: {}.".format(context_id))

            # Request to execute command
            _LOGGER.info("Submitting SQL query for execution.")
            payload["contextId"] = context_id
            payload["command"] = self.query
            response = com.databricks_api(
                "post", const.COMMAND_ENDPOINT, session_key, data=payload
            )

            command_id = response.get("id")
            _LOGGER.info("Query submitted, command id: {}.".format(command_id))

            # pulling mechanism
            _LOGGER.info("Fetching query execution status.")
            status = None
            args = {
                "clusterId": cluster_id,
                "contextId": context_id,
                "commandId": command_id,
            }

            total_wait_time = 0
            while total_wait_time <= command_timeout_in_seconds:
                response = com.databricks_api(
                    "get", const.STATUS_ENDPOINT, session_key, args=args
                )

                status = response.get("status")
                _LOGGER.info("Query execution status: {}.".format(status))

                if status in ("Cancelled", "Error"):
                    raise Exception(
                        "Could not complete the query execution. Status: {}.".format(status)
                    )

                elif status == "Finished":
                    if response["results"]["resultType"] == "error":
                        msg = response["results"].get("summary", "Error encountered while executing query.")
                        raise Exception(str(msg))

                    if response["results"]["resultType"] != "table":
                        raise Exception("Encountered unknown result type, terminating the execution.")

                    if response["results"].get("truncated", True):
                        self.write_warning("Results are truncated due to Databricks API limitations.")

                    _LOGGER.info("Query execution successful. Preparing data.")

                    # Prepare list of Headers
                    headers = response["results"]["schema"]
                    schema = []
                    for header in headers:
                        field = header.get("name")
                        schema.append(field)

                    # Fetch Data
                    data = response["results"]["data"]

                    for d in data:
                        yield dict(zip(schema, d))

                    _LOGGER.info("Data parsed successfully.")
                    break

                seconds_to_timeout = command_timeout_in_seconds - total_wait_time

                if seconds_to_timeout < const.COMMAND_SLEEP_INTERVAL_IN_SECONDS:

                    if not seconds_to_timeout:
                        total_wait_time += 1
                        continue

                    _LOGGER.info(
                        "Query execution in progress, will retry after {} seconds.".format(
                            str(seconds_to_timeout)))
                    time.sleep(seconds_to_timeout)
                    total_wait_time += seconds_to_timeout
                    continue

                _LOGGER.info(
                    "Query execution in progress, will retry after {} seconds.".format(
                        str(const.COMMAND_SLEEP_INTERVAL_IN_SECONDS)))
                time.sleep(const.COMMAND_SLEEP_INTERVAL_IN_SECONDS)
                total_wait_time += const.COMMAND_SLEEP_INTERVAL_IN_SECONDS
            else:
                # Timeout scenario
                msg = "Command execution timed out. Last status: {}.".format(status)
                _LOGGER.info(msg)
                self.write_error(msg)

            # Destroy the context to free-up space in Databricks
            if context_id:
                _LOGGER.info("Deleting context.")
                payload = {"contextId": context_id, "clusterId": cluster_id}
                _ = com.databricks_api(
                    "post", const.CONTEXT_DESTROY_ENDPOINT, session_key, data=payload
                )
                _LOGGER.info("Context deleted successfully.")

        except Exception as e:
            _LOGGER.error(e)
            _LOGGER.error(traceback.format_exc())
            self.write_error(str(e))
Esempio n. 13
0
class MongoConnectCommand(GeneratingCommand):
    """ %(synopsis)

    ##Syntax

    %(syntax)

    ##Description

    %(description)

    ##TODO:

    """

    s = Option(require=False)
    db = Option(require=False, default='test')
    col = Option(require=False, default='tweets')
    earliest = Option(require=False,
                      default=(datetime.now() -
                               timedelta(hours=4)).strftime("%x %X"))
    latest = Option(require=False, default=datetime.now().strftime("%x %X"))
    limit = Option(require=False, default=10, validate=validators.Integer())

    _mongo_conf = configparser.ConfigParser()
    _mongo_conf.read(os.path.dirname(__file__) + '/../default/mongo.conf')
    _props_conf = configparser.ConfigParser()
    _props_conf.read(os.path.dirname(__file__) + '/../default/props.conf')
    _transforms_conf = configparser.ConfigParser()
    _transforms_conf.read(
        os.path.dirname(__file__) + '/../default/transforms.conf')

    _client = MongoClient(host='127.0.0.1',
                          port=27017,
                          username='******',
                          password='******',
                          authSource='admin')

    kv = re.compile(r"\b(\w+)\s*?=\s*([^=]*)(?=\s+\w+\s*=|$)")
    re_alias = re.compile(r"(\w+) as (\w+)")

    # Add more strings that confuse the parser in the list
    UNINTERESTING = set(
        chain(dateutil.parser.parserinfo.JUMP,
              dateutil.parser.parserinfo.PERTAIN, ['a']))

    _extracts = {}
    _transforms = {}
    _aliases = {}

    def _get_date(self, tokens):
        for end in xrange(len(tokens), 0, -1):
            region = tokens[:end]
            if all(token.isspace() or token in self.UNINTERESTING
                   for token in region):
                continue
            text = ''.join(region)
            try:
                date = dateutil.parser.parse(text)
                return end, date
            except ValueError:
                pass

    def find_dates(self, text, max_tokens=50, allow_overlapping=False):
        tokens = filter(None, re.split(r'(\S+|\W+)', text))
        skip_dates_ending_before = 0
        for start in xrange(len(tokens)):
            region = tokens[start:start + max_tokens]
            result = self._get_date(region)
            if result is not None:
                end, date = result
                if allow_overlapping or end > skip_dates_ending_before:
                    skip_dates_ending_before = end
                    yield date

    def init(self):
        # Initialize sourcetypes, props, aliases and transforms
        for sourcetype in self._props_conf:
            for key, value in self._props_conf[sourcetype].items():
                if key.startswith('extract-'):
                    if not sourcetype in self._extracts:
                        self._extracts[sourcetype] = []
                    self._extracts[sourcetype].append(
                        re.compile(value.replace('?<', '?P<')))
                if key.startswith('report-'):
                    if not sourcetype in self._transforms:
                        self._transforms[sourcetype] = []
                    if value in self._transforms_conf:
                        delim = self._transforms_conf[value]['DELIMS'].replace(
                            '"', '')
                        fields = self._transforms_conf[value][
                            'FIELDS'].replace('"', '').split(',')
                        transform = {}
                        transform['delim'] = delim
                        transform['fields'] = fields
                        self._transforms[sourcetype].append(transform)
                if key.startswith('fieldalias-'):
                    if not sourcetype in self._aliases:
                        self._aliases[sourcetype] = {}
                    match = self.re_alias.match(value)
                    if match:
                        field, alias = match.groups()
                        self._aliases[sourcetype][field] = alias

        # Initialize database
        self.database = self._client[self.db]
        #self.collection = self.database[self.col]

    def flatten(self, _dict, key=""):
        if key != "":
            key = key + '_'
        for k, v in _dict.items():
            if isinstance(v, list):
                for elt in v:
                    if isinstance(elt, dict):
                        for k2, v2 in self.flatten(elt, key + k):
                            yield k2, v2
                    else:
                        yield key + k, elt
            elif isinstance(v, dict):
                for k2, v2 in self.flatten(v, key + k):
                    yield k2, v2
            else:
                if k != 'id' and k != 'id_str':
                    yield key + k, v

    def generate(self):
        self.init()

        rets = []
        fields = {}

        q = {}
        if self.s:
            q = {'$text': {'$search': self.s}}
        q['_time'] = {}
        q['_time']['$gte'] = dateutil.parser.parse(self.earliest)
        q['_time']['$lte'] = dateutil.parser.parse(self.latest)
        s = [('_time', -1)]
        collections = self.col.split(',')
        for collection in collections:
            for doc in self.database[collection].find(q).sort(s).limit(
                    self.limit):
                ret = {}
                try:
                    try:
                        if '_time' in doc:
                            ret['_time'] = doc['_time'].strftime("%s.%f")
                            del doc['_time']
                        else:
                            for datefield in self._mongo_conf['fields'][
                                    'DateFields'].split(','):
                                if datefield in doc:
                                    for date in self.find_dates(
                                            doc[datefield],
                                            allow_overlapping=False):
                                        ret['_time'] = date.strftime("%s.%f")
                                        break
                                    if '_time' in ret:
                                        break
                    except Exception as e:
                        #print("ERROR: ", str(e))
                        ret['_raw'] = "Error: %s." % str(e)
                    if not '_time' in ret:
                        ret['_time'] = time.time()
                    #print(ret['_time'])
                    ret['_raw'] = str(
                        doc['message']) if 'message' in doc else dumps(doc)
                    if 'source' in doc:
                        del doc['source']
                    ret['source'] = doc[
                        '_source'] if '_source' in doc else self.db
                    if 'sourcetype' in doc:
                        del doc['sourcetype']
                    ret['sourcetype'] = doc[
                        '_sourcetype'] if '_sourcetype' in doc else self.col
                    sourcetype = ret['sourcetype']
                    #for field in doc:
                    #    ret[field] = doc[field]
                    for field, value in self.flatten(doc):
                        #print("KV: ", field, value)
                        ret[field] = value  #doc[field]
                    for (field, value) in self.kv.findall(ret['_raw']):
                        ret[field] = value.replace('"', '')
                    if sourcetype in self._extracts:
                        for extract in self._extracts[sourcetype]:
                            match = extract.search(ret['_raw'])
                            if match:
                                for field, value in match.groupdict().items():
                                    ret[field] = value
                    if sourcetype in self._transforms:
                        for transform in self._transforms[sourcetype]:
                            f = 0
                            for value in (list(
                                    reader([ret['_raw']],
                                           delimiter=str(
                                               transform['delim'])))[0]):
                                if f >= len(transform['fields']):
                                    break
                                if transform['fields'][f] != '':
                                    ret[transform['fields'][f]] = value
                                f = f + 1
                    if sourcetype in self._aliases:
                        for field, value in ret.items():
                            if field in self._aliases[sourcetype]:
                                ret[self._aliases[sourcetype]
                                    [field]] = ret[field]
                    for field in ret:
                        if not field in fields:
                            fields[field] = 1
                except Exception as e:
                    ret['_raw'] = "Error: %s." % str(e)
                    pass
                rets.append(ret)
        for ret in rets:
            for field in fields:
                if not field in ret:
                    ret[field] = ''
            yield ret
Esempio n. 14
0
class BlaggertCommand(StreamingCommand):

    opt_token = Option(doc='''
        **Syntax:** **token=***<fieldname>*
        **Description:** HEC token to use.
        **Default:** None''',
                       name='token',
                       require=True,
                       validate=validators.Fieldname())

    opt_server = Option(doc='''
        **Syntax:** **server=***<fieldname>*
        **Description:** Server to send the payload to.
        **Default:** localhost''',
                        name='server',
                        require=False,
                        default='localhost',
                        validate=validators.Fieldname())

    opt_port = Option(doc='''
        **Syntax:** **port=***<fieldname>*
        **Description:** HEC Port, not fortified red wine.
        **Default:** 8088''',
                      name='port',
                      require=False,
                      default=8088,
                      validate=validators.Integer())

    def __init__(self):
        super(BlaggertCommand, self).__init__()

    def prepare(self):
        return

    def stream(self, records):

        # Put your event transformation code here
        url = "https://{}:{}/services/collector/event".format(
            self.opt_server, self.opt_port)
        headers = {"Authorization": "Splunk {}".format(self.opt_token)}
        for record in records:
            self.logger.info('Record {0}'.format(record))
            t2 = time.time()

            payload = {"event": {"event_id": str(uuid.uuid4())}}
            for k, v in record.iteritems():
                payload["event"][k] = v

            payload_str = json.dumps(payload)
            self.logger.info('send to HEC url={} - payload='.format(
                url, payload_str))
            try:
                res = requests.post(url,
                                    data=payload_str,
                                    headers=headers,
                                    verify=False)
                res.raise_for_status()
                self.logger.info("Sweet as {} {}".format(
                    res.status_code, res.text))
                record["blaggert_says"] = "Done it"
            except Exception as e:
                self.logger.error('Send HEC Caught exception: {}'.format(e))
                record["blaggert_says"] = "Buggered it {}".format(e)

            yield record
Esempio n. 15
0
class Outliers(OptionRemoteStreamingCommand):
    threshold = Option(require=False,
                       default=0.01,
                       validate=FloatValidator(minimum=0, maximum=1))

    # One-Class SVM arguments
    kernel = Option(require=False, default='rbf')
    degree = Option(require=False,
                    default=3,
                    validate=validators.Integer(minimum=1))
    gamma = Option(require=False,
                   default=0.1,
                   validate=FloatValidator(minimum=0, maximum=1))
    coef0 = Option(require=False, default=0.0, validate=FloatValidator())

    # Covariance Estimator arguments
    support_fraction = Option(require=False,
                              validate=FloatValidator(minimum=0, maximum=1))
    showmah = Option(require=False,
                     default=False,
                     validate=validators.Boolean())

    classifier = Option(require=False, default='one_class_svm')

    code = """
import os, sys, numbers, math
import numpy as np
import scipy.sparse as sp
from scipy import stats

from sklearn import svm
from sklearn.covariance import EllipticEnvelope
from sklearn.feature_extraction.text import HashingVectorizer

if __name__ == '__channelexec__':
	args = channel.receive()

	fraction = 1 - args['threshold']
	fields = args.get('fieldnames') or ['_raw']
	by_fields = None
	try:
		by_index = fields.index("by")
		by_fields = fields[(by_index+1):]
		fields = fields[:by_index]
	except:
		pass
	classifier = args['classifier']

	svm_args = {
		'nu': 0.95 * fraction + 0.05,
		'kernel': args['kernel'],
		'degree': args['degree'],
		'gamma': args['gamma'],
		'coef0': args['coef0']
	}

	rc_args = {
		'contamination': args['threshold'],
		'support_fraction': args['support_fraction']
	}

	classifiers = {
		'one_class_svm': svm.OneClassSVM(**svm_args),
		'covariance_estimator': EllipticEnvelope(**rc_args)
	}

	records = []
	for record in channel:
		if not record:
			break
		records.append(record)

	if records:
		vectorizer = HashingVectorizer(ngram_range=(1,3), n_features=int(math.sqrt(len(records))))
		X = sp.lil_matrix((len(records),vectorizer.n_features))

		for i, record in enumerate(records):
			nums = []
			strs = []
			for field in fields:
				if isinstance(record.get(field), numbers.Number):
					nums.append(record[field])
				else:
					strs.append(str(record.get(field) or ""))
			if nums:
				X[i] = np.array(nums, dtype=np.float64)
			elif strs:
				X[i] = vectorizer.transform([" ".join(strs)])

		X = X.toarray()
		y_pred = None
		mah = None

		clf = classifiers.get(classifier)
		if clf:
			try:
				clf.fit(X)
				y = clf.decision_function(X).ravel()
				threshold = stats.scoreatpercentile(y, 100 * fraction)
				y_pred = y > threshold
				if classifier == 'covariance_estimator' and args['showmah']:
					mah = clf.mahalanobis(X)
			except ValueError:
				y_pred = np.zeros((X.shape[0]))

			for i, y in enumerate(y_pred):
				if y:
					record = records[i]
					if mah is not None:
						record['mahalanobis'] = mah[i].item()
					channel.send(record)
		else:
			channel.send({ "error": "Incorrect classifier specified %s" % classifier })
"""

    def __dir__(self):
        return [
            'threshold', 'kernel', 'degree', 'gamma', 'coef0',
            'support_fraction', 'showmah', 'classifier'
        ]
Esempio n. 16
0
class EsCommand(GeneratingCommand):
    """ Generates events that are the result of a query against Elasticsearch

  ##Syntax

  .. code-block::
      es index=<string> | q=<string> | fields=<string> | oldest=<string> | earl=<string> | limit=<int>

  ##Description

  The :code:`es` issue a query to ElasticSearch, where the 
  query is specified in :code:`q`.

  ##Example

  .. code-block::
      | es oldest=now-100d earliest=now query="some text" index=nagios* limit=1000 field=message

  This example generates events drawn from the result of the query 

  """

    index = Option(doc='', require=False, default="*")

    q = Option(doc='', require=True)

    fields = Option(doc='', require=False, default="message")

    oldest = Option(doc='', require=False, default="now")

    earl = Option(doc='', require=False, default="now-1d")

    limit = Option(doc='',
                   require=False,
                   validate=validators.Integer(),
                   default=100)

    def generate(self):

        #self.logger.debug('SimulateCommand: %s' % self)  # log command line

        config = self.get_configuration()

        #pp = pprint.PrettyPrinter(indent=4)
        self.logger.debug('Setup ES')
        es = Elasticsearch()
        body = {
            "size": limit,
            "query": {
                "filtered": {
                    "query": {
                        "query_string": {
                            "query": q
                        }
                    }
                }
            }
        }
        #pp.pprint(body);
        res = es.search(size=50, index=index, body=body)

        # if response.status_code != 200:
        #   yield {'ERROR': results['error']['text']}
        #   return

        # date_time = '2014-12-21T16:11:18.419Z'
        # pattern = '%Y-%m-%dT%H:%M:%S.%fZ'

        for hit in res['hits']['hits']:
            yield self.getEvent(hit)

    def getEvent(self, result):

        # hit["_source"][defaultField] = hit["_source"][defaultField].replace('"',' ');
        # epochTimestamp = hit['_source']['@timestamp'];
        # hit['_source']['_epoch'] = int(time.mktime(time.strptime(epochTimestamp, pattern)))
        # hit['_source']["_raw"]=hit['_source'][defaultField]

        event = {
            '_time': time.time(),
            '_index': result['_index'],
            '_type': result['_type'],
            '_id': result['_id'],
            '_score': result['_score']
        }

        event["_raw"] = json.dumps(result)

        return event

    def get_configuration(self):
        sourcePath = os.path.dirname(os.path.abspath(__file__))
        config_file = open(sourcePath + '/config.json')
        return json.load(config_file)

    def __init__(self):
        super(GeneratingCommand, self).__init__()