コード例 #1
0
    def onevsall2(anomalies, observed):
        """
        This takes an observed data and tests them against each
        set of anomalies and sees which one has the best score.
        This is not a onevsall impelementation but more of a
        one class classifier used against each class. I don't know
        if it is any better or worse, but it is much faster than
        onevsall, so I'll leave it here for now.

        The Java code used this method.
        """
        observed_h = None
        output = " "

        #somehow this is generating the same scores for training data.. weird
        for k in sorted(anomalies.keys()):
            if observed_h is None:
                observed_h = Histograms(0, 0)
                features = anomalies[k].get_features()
                for i in range(len(features)):
                    observed_h.insert_one(features[i],
                                          0,
                                          value=observed[0][i],
                                          use_internal_time=True)

            #print anomalies[k].get_histograms()
            #import IPython
            #IPython.embed()
            output += str(k) + " : " + np.array_str(
                SVMCalc.test(anomalies[k], observed_h)) + "\n"

        return output
コード例 #2
0
ファイル: anomalyIO.py プロジェクト: igoryok/essence
    def getFakeAnomalies():
        ret = dict()
        ret[(0,0)] = Histograms(2, 1)
        for t in range(500):
            ret[(0,0)].insert_one("10.0.0.1", t, 
                                  value = 210 + random.randint(0,200), 
                                  use_internal_time = True)
            ret[(0,0)].insert_one("10.0.0.2", t, 
                                  value = 810 + random.randint(0,200), 
                                  use_internal_time = True)
            ret[(0,0)].insert_one("10.0.0.3", t, 
                                  value = 210 + random.randint(0,200), 
                                  use_internal_time = True)
            ret[(0,0)].next_row()

        ret[(1,1)] = Histograms(2, 1)
        for t in range(500):
            ret[(1,1)].insert_one("10.0.0.1", t, 
                                  value = 0 + random.randint(0,200), 
                                  use_internal_time = True)
            ret[(1,1)].insert_one("10.0.0.2", t, 
                                  value = 900 + random.randint(0,200), 
                                  use_internal_time = True)
            ret[(1,1)].insert_one("10.0.0.3", t, 
                                  value = 0 + random.randint(0,200), 
                                  use_internal_time = True)
            ret[(1,1)].next_row()

        ret[(2,2)] = Histograms(2, 1)
        for t in range(500):
            ret[(2,2)].insert_one("10.0.0.1", t, 
                                  value = 0 + random.randint(0,100), 
                                  use_internal_time = True)
            ret[(2,2)].insert_one("10.0.0.2", t, 
                                  value = 0 + random.randint(0,100), 
                                  use_internal_time = True)
            ret[(2,2)].insert_one("10.0.0.3", t, 
                                  value = 0 + random.randint(0,100), 
                                  use_internal_time = True)
            ret[(2,2)].next_row()
        ret[(3,3)] = Histograms(2, 1)
        for t in range(500):
            ret[(3,3)].insert_one("10.0.0.1", t, 
                                  value = 1000 + random.randint(0,200), 
                                  use_internal_time = True)
            ret[(3,3)].insert_one("10.0.0.2", t, 
                                  value = 11200 + random.randint(0,200), 
                                  use_internal_time = True)
            ret[(3,3)].insert_one("10.0.0.3", t, 
                                  value = 1000 + random.randint(0,200), 
                                  use_internal_time = True)
            ret[(3,3)].next_row()
        return ret
コード例 #3
0
ファイル: cassandraIO.py プロジェクト: igoryok/essence
    def get_histogram(self, sample_window_sec, slide_window_sec, filter_name,
                      filter_value, features_keep):
        """
        filter_name and filter_value is to define one thing that we're making
        histograms for. For example, source_addr = 10.0.0.1 would build
        histograms for all packets originating from 10.0.0.1 and with whatever
        desired features
        
        features_keep is a tuple of strings representing field names in the 
        database. If the feature is
        inside the text_values map, just pass in the key for the map and this
        code will automatically try the various maps looking for it. We 
        assume the same key name doesn't appear in multiple maps

        As a result the ret dict contains a flat keyspace
        """
        ret = Histograms(sample_window_sec, slide_window_sec)

        self._result = self._session.execute("SELECT * FROM " + self._table)

        temp_data = dict()

        count = 0
        for res in self._result:
            count += 1

            features = ()
            res_dict = res._asdict()

            if res_dict[filter_name] != filter_value:
                continue

            if res.source_addr in temp_data:
                temp_data[res_dict["dest_addr"]] += 1
            else:
                temp_data[res_dict["dest_addr"]] = 0

            for f in features_keep.split(","):
                f = f.strip()
                if f in res_dict:
                    features = features + (res_dict[f], )
                elif f in res_dict["text_values"]:
                    features = features + (res_dict["text_values"][f], )
                else:
                    raise Exception("Could not find field " + f)

            sec = time.mktime(res.time_stamp.timetuple())

            ret.insert_one(','.join(map(str, features)), sec)
        return ret
コード例 #4
0
def main():
    start = datetime.now()
    parser = ArgumentParser("Updates all relevant files.")
    parser.add_argument("--ucr",
                        action="store_true",
                        default=False,
                        help="Add ER column to updated UCR file.")
    parser.add_argument("--hist",
                        action="store_true",
                        default=False,
                        help="Plot new histograms.")
    args = parser.parse_args()
    if args.ucr:
        print("\n\tUpdating ER status in UCR records...")
        er = ERupdate()
        er.getERstatus()
    print("\n\tCalculating adversity scores in UPDB records...")
    a = Adversity()
    a.getAdversityScores()
    print("\n\tMerging UPDB and UCR records...")
    merger = DatabaseMerger()
    merger.merge()
    # Get summaries
    print("\n\tImputing missing data...")
    i = Impute()
    i.imputeRecords()
    summarize(args.ucr)
    print("\n\tCalculating totals from merged records...")
    c = Counter()
    c.writeXLSX()
    c.printComplete()
    if args.hist:
        Histograms(c)
    print(("\tTotal runtime: {}\n").format(datetime.now() - start))
コード例 #5
0
ファイル: daemon_service.py プロジェクト: igoryok/essence
def getfakedata():
    global next_id
    global hist_dict
    train_h = Histograms.get_fake_histogram_train()
    test_h = Histograms.get_fake_histogram_test()
    hist_dict[next_id] = train_h

    output = "Dataset ID: " + str(next_id) + "\n"
    for f in train_h.get_features():
        output += f + "\n"

    next_id += 1
    train_h.print_histograms()
    hist_dict[next_id] = test_h

    output += "Dataset ID: " + str(next_id) + "\n"
    for f in test_h.get_features():
        output += f + "\n"

    next_id += 1
    test_h.print_histograms()
    return Response(output, mimetype='text/plain')
コード例 #6
0
ファイル: daemon_service.py プロジェクト: dpinney/essence
def getfakedata():
    global next_id
    global hist_dict
    train_h = Histograms.get_fake_histogram_train()
    test_h = Histograms.get_fake_histogram_test()
    hist_dict[next_id] = train_h

    output = "Dataset ID: " + str(next_id) + "\n"
    for f in train_h.get_features():
        output += f + "\n"

    next_id += 1
    train_h.print_histograms()
    hist_dict[next_id] = test_h

    output += "Dataset ID: " + str(next_id) + "\n"
    for f in test_h.get_features():
        output += f + "\n"

    next_id += 1
    test_h.print_histograms()
    return Response(output, mimetype='text/plain')
コード例 #7
0
						print "[setup_limits] WARNING : Didn't find tree {} in input file, but did find {}. Changing the tree name, but try to fix this.".format(tree_name, backup_tree_name)
						tree_name = backup_tree_name
					else:
						print "[setup_limits] ERROR : Didn't find tree {} in input file, nor {}. Quitting!".format(tree_name, backup_tree_name)
						sys.exit(1)
				# Check that the "NEvents" histogram is present
				h_NEvents = f.Get("NEvents")
				if not h_NEvents:
					if "data" in sample:
						print "[setup_limits] ERROR : NEvents histogram in not in this file! It is probably corrupt. This is data, so this problem is fatal."
						sys.exit(1)
					else:
						print "[setup_limits] WARNING : NEvents histogram in not in this file! It is probably corrupt. This is MC, so I am skipping the file. But, you probably want to remove from the input list."
						sample_files[sample].remove(filename)
				
			limit_histogrammer = Histograms(sample, tree_name=tree_name, jet_type=args.jet_type)
			if args.label:
				extra_tag = "_" + args.label
			else:
				extra_tag = ""
			output_file_basename ="histograms_{}_{}_{}{}.root".format(sample, args.jet_type, args.year, extra_tag) 
			if args.output_folder:
				limit_histogrammer.set_output_path("{}/{}".format(args.output_folder, output_file_basename))
			else:
				limit_histogrammer.set_output_path("/uscms/home/dryu/DAZSLE/data/histograms/tmp/{}".format(output_file_basename))
			for filename in files_to_run:
				print "Input file {}".format(filename)
				limit_histogrammer.add_file(filename)
			#limit_histogrammer.set_jet_type(args.jet_type)
			if "JetHT" in sample or "SingleMu" in sample:
				limit_histogrammer.set_data_source("data")
コード例 #8
0
 def task_handler(doc_id, user_id, task_id, data, g, cmd):
     if g is not None:
         if g.canvas is not None:
             g.canvas.get_tk_widget().destroy()
         if g.toolbar is not None:
             g.toolbar.destroy()
             g.toolbar = None
         if g.listbox is not None:
             g.listbox.destroy()
     if task_id == "2a":
         if cmd and doc_id not in TaskManager.get_all_documents(
                 data) or doc_id is None:
             print("Please Provide a Valid Document ID")
         else:
             histogram = Histograms(
                 TaskManager.get_countries(
                     doc_id,
                     TaskManager.filter_data(data, "subject_doc_id",
                                             doc_id)), "Task 2A", cmd)
             if not cmd:
                 TaskManager.plot_figure_gui(g, histogram)
     elif task_id == "2b":
         if cmd and doc_id not in TaskManager.get_all_documents(
                 data) or doc_id is None:
             print("Please Provide a Valid Document ID")
         else:
             histogram = Histograms(
                 TaskManager.get_continents(
                     doc_id,
                     TaskManager.filter_data(data, "subject_doc_id",
                                             doc_id)), "Task 2B", cmd)
             if not cmd:
                 TaskManager.plot_figure_gui(g, histogram)
     elif task_id == "3a":
         histogram = Histograms(TaskManager.simple_get_all_browser(data),
                                "Task 3A", cmd)
         if not cmd:
             TaskManager.plot_figure_gui(g, histogram)
     elif task_id == "3b":
         histogram = Histograms(TaskManager.get_all_browser(data),
                                "Task 3B", cmd)
         if not cmd:
             TaskManager.plot_figure_gui(g, histogram)
     elif task_id == "4":
         top10 = TaskManager.get_top_10(data)
         if cmd:
             print(top10)
         else:
             TaskManager.load_list(g, top10)
     elif task_id == "5a":
         users = TaskManager.get_all_users_by_doc(doc_id, data)
         if cmd:
             print(users)
         else:
             TaskManager.load_list(g, users)
     elif task_id == "5b":
         docs = TaskManager.get_all_documents_by_user(user_id, data)
         if cmd:
             print(docs)
         else:
             TaskManager.load_list(g, docs)
     elif task_id == "5c":
         also_likes = TaskManager.task5(data, doc_id, user_id, None)
         if cmd:
             print(also_likes)
         else:
             TaskManager.load_list(g, also_likes)
     elif task_id == "5d":
         also_likes = TaskManager.task5(data, doc_id, user_id,
                                        TaskManager.sort_by_readership)
         if cmd:
             print(also_likes)
         else:
             TaskManager.load_list(g, also_likes)
     elif task_id == "5e":
         also_likes = TaskManager.task5(data, doc_id, user_id,
                                        TaskManager.sort_by_number)
         if cmd:
             print(also_likes)
         else:
             TaskManager.load_list(g, also_likes)
     else:
         if cmd:
             print("Invalid Task")
コード例 #9
0
ファイル: anomalyIO.py プロジェクト: igoryok/essence
    def getAnomalies(self, testStart = None, testEnd = None, 
                     trainStart = None, trainEnd = None,
                     filterValue = None, targetType = None, 
                     algorithm = None, userState = None, 
                     userCause = None):
        """
        Returns a dictionary where each key is a tuple of (cause, state)
        and each value is a Histogram() of observed anomalous data
        tagged by a user as having that cause/state
        """

        arg = ""
        if testStart is not None:
	    arg += "&detectionTimeWindowStart=" + str(testStart)
	if testEnd is not None:
	    arg += "&detectionTimeWindowEnd=" + str(testEnd)
	if trainStart is not None:
	    arg += "&trainingTimeWindowStart=" + str(trainStart)
	if trainEnd is not None:
	    arg += "&trainingTimeWindowEnd=" + str(trainEnd)
	if filterValue is not None:
	    arg += "&sourceValue=" + sourceValue
	if targetType is not None:
	    arg += "&targetType=" + targetType
	if algorithm is not None:
	    arg += "&algorithm=" + algorithm
	if userCause is not None:
	    arg += "&userCause=" + userCause
	if userState is not None:
	    arg += "&userState=" + userState
        print "arg is ",arg
        resp = requests.get('http://' + self.host + 
                                '/essence-services/anomaly/query/?' + arg)
        if resp.status_code != 200:
            # This means something went wrong.
            raise ApiError('GET /tasks/ {}'.format(resp.status_code))


        cause = -1
        state = -1

        ret = dict()
        for i in resp.json():

            if 'userCause' in i and 'id' in i['userCause']:
                cause = i['userCause']['id']
            if 'userState' in i and 'id' in i['userCause']:
                state = i['userCause']['id']

            if (cause, state) not in ret:
                # for why we init Histogram this way, see __init__ where it
                # takes in a matrix as an argument and converts it into
                # internal format. We're basically doing the same thing here
                # but skipping the step of making the matrix to pass into init
                ret[(cause, state)] = Histograms(-1, -1)

            hist = ret[(cause, state)]
            if 'anomalyEntries' not in i:
                continue
            for entries in i['anomalyEntries']:
                hist.insert_one(entries['sequenceNumber'], -1, 
                                use_internal_time = True)
            hist.next_row()
                    
        return ret