Exemple #1
0
    def predict_splunk_search(self, search_string, X_fields, Y_field,
                              output_field):
        '''
		returns a string that contains the correct field
		'''
        splunk_search = 'search %s | ' % search_string
        x = sm.to_one_hot(X_fields,
                          onehot_mapping=self.feature_onehot_mapping,
                          ordering=self.onehot_ordering)
        class_log_probs = sm.array(self.log_prob_suff_stats)

        # dot the two
        x_dot_logprobs = sm.dot(x, class_log_probs.T())
        priors = sm.array(self.log_prob_priors)

        # add the priors
        final_probs = sm.add(x_dot_logprobs, priors)

        argmax_sa = sm.argmax(final_probs)
        argmax_sa.rename('argmax_prob')
        # now the field argmax_prob_0_0 is the index of new_prob_vec's maximum argument
        case_mapping_string = sm.case_mapping(self.class_mapping,
                                              'argmax_prob_0_0', output_field)
        splunk_search += splunk_concat(argmax_sa.string, case_mapping_string)

        return splunk_search
Exemple #2
0
	def predict_splunk_search(self, search_string, X_fields, Y_field, output_field):
		'''
		uses splunkmath to predict every event in the splunk search.
		'''
		splunk_search = 'search %s' % search_string

		x = sm.array(X_fields)
		feature_averages = sm.array(self.feature_averages)
		feature_variances = sm.array(self.feature_variances)
		priors = sm.array(self.log_prob_priors)

		expterms = sm.div(sm.pow(sm.sub(x,feature_averages), 2), feature_variances)
		expterms = sm.mul(expterms, -.5)

		pi_terms = sm.mul(sm.ln(sm.mul(sm.mul(feature_variances,2),np.pi)), -.5)

		feature_probs = sm.add(pi_terms, expterms)

		class_probs = sm.sum(feature_probs, axis=1)

		final_probs = sm.add(priors, class_probs)
		final_probs.rename('final_probabilities')
		argmax_string = sm.argmax(final_probs) 

		strings_concatenated = splunk_concat(splunk_search, final_probs.string)
		strings_concatenated = splunk_concat(strings_concatenated, argmax_string)
		# now in the splunksearch, the field final_probabilities_maxval contains either 0,1,2... etc. so we add a final thing
		# this should be decomposed later
		final_string = 'eval %s = case(%s)' % (output_field, ','.join(['%s=%s, %s' % ('argmax_final_probabilities', i, i) for i in range(self.num_classes)]))
		return splunk_concat(strings_concatenated, final_string)
Exemple #3
0
	def predict_splunk_search(self, search_string, feature_fields, class_field, output_field):
		'''
		makes a search string that populates events with a new prediction field
		'''
		# 1: search the right events
		splunk_search = 'search %s | ' % search_string 

		# 2: initialize feature fector and get the meandiff vector
		'''
		TODO COMMENT: if can pass in self.sufficient_statistics as a vector rather than first going to splunk string, would be better
		'''
		features = sm.array(feature_fields)
		suffstats = sm.array(self.sufficient_statistics)
		meandiff = sm.sub(features,suffstats)

		# 3: now we're making the exp term in the multivariate gaussian pdf. it's meandiff dot cov dot meandiff.T
		# first we get dot(meandiff, inv_cov_matrix)
		icm = sm.array(self.inv_cov_matrix)
		temp = sm.dot(meandiff,icm)
		# cxn * nxn -> cxn
		# now cxn * nxc => cxc
		final = sm.dot(temp, meandiff.T())
		# finally we only want the elemnts on the diagonals
		final_expterms = sm.diag(final)
		# and we scale by -.5
		multiplied_expterms = sm.mul(final_expterms,-.5)
		# multiplied_expterms.rename('expterm')
		# make the pi term and ln it
		pi_term = np.pi**(len(feature_fields)/float(2))
		multterm = sm.ln(sm.array((1/(self.cov_det_root*pi_term))))
		prob_vec = sm.array(self.log_prob_priors)
		# splunk vector broadcasting takes care of the rest
		new_prob_vec = sm.add(sm.sub(prob_vec,multterm),multiplied_expterms)
		new_prob_vec.rename('prob')
		# make argmax splunkarray
		argmax_sa = sm.argmax(new_prob_vec)
		argmax_sa.rename('argmax_prob')
		# now the field argmax_prob_0_0 is the index of new_prob_vec's maximum argument
		case_mapping_string = sm.case_mapping(self.class_mapping, 'argmax_prob_0_0', output_field)
		splunk_search += splunk_concat(argmax_sa.string, case_mapping_string)
		
		# eval string needs to change, but all math is done
		# splunk_search += 'eval %s=if(prob_0_0>prob_0_1,"%s","%s")' % (output_field, self.class_mapping[0], self.class_mapping[1]) ## NEED TO CHANGE THE STRINGS 0, 1!!!


		
		return splunk_search
Exemple #4
0
	def predict_splunk_search(self, search_string, X_fields, Y_field, output_field):
		'''
		returns a string that contains the correct field
		'''
		splunk_search = 'search %s | ' % search_string 
		x = sm.to_one_hot(X_fields, onehot_mapping=self.feature_onehot_mapping, ordering=self.onehot_ordering)
		class_log_probs = sm.array(self.log_prob_suff_stats)

		# dot the two
		x_dot_logprobs = sm.dot(x, class_log_probs.T())
		priors = sm.array(self.log_prob_priors)

		# add the priors
		final_probs = sm.add(x_dot_logprobs, priors)

		argmax_sa = sm.argmax(final_probs)
		argmax_sa.rename('argmax_prob')
		# now the field argmax_prob_0_0 is the index of new_prob_vec's maximum argument
		case_mapping_string = sm.case_mapping(self.class_mapping, 'argmax_prob_0_0', output_field)
		splunk_search += splunk_concat(argmax_sa.string, case_mapping_string)
		
		return splunk_search
Exemple #5
0
    def predict_splunk_search(self, search_string, X_fields, Y_field,
                              output_field):
        '''
		uses splunkmath to predict every event in the splunk search.
		'''
        splunk_search = 'search %s' % search_string

        x = sm.array(X_fields)
        feature_averages = sm.array(self.feature_averages)
        feature_variances = sm.array(self.feature_variances)
        priors = sm.array(self.log_prob_priors)

        expterms = sm.div(sm.pow(sm.sub(x, feature_averages), 2),
                          feature_variances)
        expterms = sm.mul(expterms, -.5)

        pi_terms = sm.mul(sm.ln(sm.mul(sm.mul(feature_variances, 2), np.pi)),
                          -.5)

        feature_probs = sm.add(pi_terms, expterms)

        class_probs = sm.sum(feature_probs, axis=1)

        final_probs = sm.add(priors, class_probs)
        final_probs.rename('final_probabilities')
        argmax_string = sm.argmax(final_probs)

        strings_concatenated = splunk_concat(splunk_search, final_probs.string)
        strings_concatenated = splunk_concat(strings_concatenated,
                                             argmax_string)
        # now in the splunksearch, the field final_probabilities_maxval contains either 0,1,2... etc. so we add a final thing
        # this should be decomposed later
        final_string = 'eval %s = case(%s)' % (output_field, ','.join([
            '%s=%s, %s' % ('argmax_final_probabilities', i, i)
            for i in range(self.num_classes)
        ]))
        return splunk_concat(strings_concatenated, final_string)