Example #1
0
    def predict_splunk_search(self, search_string, X_fields, Y_field,
                              output_field):
        '''
		returns a string that contains the correct field
		'''
        splunk_search = 'search %s | ' % search_string
        x = sm.to_one_hot(X_fields,
                          onehot_mapping=self.feature_onehot_mapping,
                          ordering=self.onehot_ordering)
        class_log_probs = sm.array(self.log_prob_suff_stats)

        # dot the two
        x_dot_logprobs = sm.dot(x, class_log_probs.T())
        priors = sm.array(self.log_prob_priors)

        # add the priors
        final_probs = sm.add(x_dot_logprobs, priors)

        argmax_sa = sm.argmax(final_probs)
        argmax_sa.rename('argmax_prob')
        # now the field argmax_prob_0_0 is the index of new_prob_vec's maximum argument
        case_mapping_string = sm.case_mapping(self.class_mapping,
                                              'argmax_prob_0_0', output_field)
        splunk_search += splunk_concat(argmax_sa.string, case_mapping_string)

        return splunk_search
Example #2
0
	def predict_splunk_search(self, search_string, feature_fields, class_field, output_field):
		'''
		makes a search string that populates events with a new prediction field
		'''
		# 1: search the right events
		splunk_search = 'search %s | ' % search_string 

		# 2: initialize feature fector and get the meandiff vector
		'''
		TODO COMMENT: if can pass in self.sufficient_statistics as a vector rather than first going to splunk string, would be better
		'''
		features = sm.array(feature_fields)
		suffstats = sm.array(self.sufficient_statistics)
		meandiff = sm.sub(features,suffstats)

		# 3: now we're making the exp term in the multivariate gaussian pdf. it's meandiff dot cov dot meandiff.T
		# first we get dot(meandiff, inv_cov_matrix)
		icm = sm.array(self.inv_cov_matrix)
		temp = sm.dot(meandiff,icm)
		# cxn * nxn -> cxn
		# now cxn * nxc => cxc
		final = sm.dot(temp, meandiff.T())
		# finally we only want the elemnts on the diagonals
		final_expterms = sm.diag(final)
		# and we scale by -.5
		multiplied_expterms = sm.mul(final_expterms,-.5)
		# multiplied_expterms.rename('expterm')
		# make the pi term and ln it
		pi_term = np.pi**(len(feature_fields)/float(2))
		multterm = sm.ln(sm.array((1/(self.cov_det_root*pi_term))))
		prob_vec = sm.array(self.log_prob_priors)
		# splunk vector broadcasting takes care of the rest
		new_prob_vec = sm.add(sm.sub(prob_vec,multterm),multiplied_expterms)
		new_prob_vec.rename('prob')
		# make argmax splunkarray
		argmax_sa = sm.argmax(new_prob_vec)
		argmax_sa.rename('argmax_prob')
		# now the field argmax_prob_0_0 is the index of new_prob_vec's maximum argument
		case_mapping_string = sm.case_mapping(self.class_mapping, 'argmax_prob_0_0', output_field)
		splunk_search += splunk_concat(argmax_sa.string, case_mapping_string)
		
		# eval string needs to change, but all math is done
		# splunk_search += 'eval %s=if(prob_0_0>prob_0_1,"%s","%s")' % (output_field, self.class_mapping[0], self.class_mapping[1]) ## NEED TO CHANGE THE STRINGS 0, 1!!!


		
		return splunk_search
Example #3
0
	def predict_splunk_search(self, search_string, X_fields, Y_field, output_field):
		'''
		uses splunkmath to predict every event in the splunk search.
		'''
		# make a bias term
		splunk_search = 'search %s | eval bias=1' % search_string
		# instantiate arrays for x and theta
		x = sm.array(X_fields + ['bias']) # this is sorta hacky as well. maybe make it easier to do this with splunkmath?
		theta = sm.array(self.theta)
		# do the dot product
		h_x = sm.dot(theta, sm.transpose(x)) # by convention we need to tranpose x... maybe this is the wrong convention.
		h_x.rename_elem(0,0,output_field)
		splunk_search_string = splunk_concat(splunk_search, h_x.string)
		
		return splunk_search_string
Example #4
0
	def project_splunk_search(self, search_string, X_fields, output_field):
		'''
		uses splunkmath to project every event in the splunk search.
		'''
		# initialize search
		splunk_search = 'search %s'
		# instantiate arrays for x and components
		x = sm.array(X_fields)
		components = sm.array(self.components)
		# do the dot product
		projection = sm.dot(x, components)
		# rename the vector to output_field; projected field names will be output_field_i_j
		projection.rename(output_field)
		splunk_search_string = splunk_concat(splunk_search, projection.string)
		
		return splunk_search_string
Example #5
0
    def predict_splunk_search(self, search_string, X_fields, Y_field,
                              output_field):
        '''
		uses splunkmath to predict every event in the splunk search.
		'''
        # make a bias term
        splunk_search = 'search %s | eval bias=1' % search_string
        # instantiate arrays for x and theta
        x = sm.array(
            X_fields + ['bias']
        )  # this is sorta hacky as well. maybe make it easier to do this with splunkmath?
        theta = sm.array(self.theta)
        # do the dot product
        h_x = sm.dot(
            theta, sm.transpose(x)
        )  # by convention we need to tranpose x... maybe this is the wrong convention.
        h_x.rename_elem(0, 0, output_field)
        splunk_search_string = splunk_concat(splunk_search, h_x.string)

        return splunk_search_string
Example #6
0
	def predict_splunk_search(self, search_string, X_fields, Y_field, output_field):
		'''
		returns a string that contains the correct field
		'''
		splunk_search = 'search %s | ' % search_string 
		x = sm.to_one_hot(X_fields, onehot_mapping=self.feature_onehot_mapping, ordering=self.onehot_ordering)
		class_log_probs = sm.array(self.log_prob_suff_stats)

		# dot the two
		x_dot_logprobs = sm.dot(x, class_log_probs.T())
		priors = sm.array(self.log_prob_priors)

		# add the priors
		final_probs = sm.add(x_dot_logprobs, priors)

		argmax_sa = sm.argmax(final_probs)
		argmax_sa.rename('argmax_prob')
		# now the field argmax_prob_0_0 is the index of new_prob_vec's maximum argument
		case_mapping_string = sm.case_mapping(self.class_mapping, 'argmax_prob_0_0', output_field)
		splunk_search += splunk_concat(argmax_sa.string, case_mapping_string)
		
		return splunk_search