예제 #1
0
    def test_compOutlier(self):
        """Test outlier detection.
        """

        pfile = "passFunTime.pickle"
        with open(pfile, 'rb') as handle:
            passData = pickle.load(handle)

        passOtl = outlier.Outlier("test.cfg")
        funId = 0
        passOtl.compOutlier(passData, funId)
        passFunOutl = np.array(passOtl.getOutlier())
        passFunOutlId = passData[passFunOutl == -1][:, 6]
        passNumOutl = len(passFunOutlId)

        self.assertEqual(passNumOutl, 1)

        pfile = "failFunTime.pickle"
        with open(pfile, 'rb') as handle:
            failData = pickle.load(handle)

        failOtl = outlier.Outlier("test.cfg")
        funId = 0
        failOtl.compOutlier(failData, funId)
        failFunOutl = np.array(failOtl.getOutlier())
        failFunOutlId = failData[failFunOutl == -1][:, 6]
        failNumOutl = len(failFunOutlId)

        self.assertEqual(failNumOutl, 0)
 def generate_outlier(self, X_test, y_test, dist, percent):
     normalized, not_normalized = None, None
     for i in range(self.y.shape[1]):
         out = outlier.Outlier(i, X_test, y_test, self.s,
                               self.size_trajectory)
         traj_anom_norm, traj_anom = out.get_noise_trajectory(dist, percent)
         if i == 0:
             not_normalized = traj_anom
             normalized = traj_anom_norm
         else:
             not_normalized = np.concatenate([not_normalized, traj_anom])
             normalized = np.concatenate([normalized, traj_anom_norm])
     return not_normalized, normalized
예제 #3
0
def outlier_test(wsize, factor, low, high):
    data = np.random.randint(low, high, 1000)
    out = outlier.Outlier(wsize, factor, max_drift=0.001)
    success_count = 0
    count = 0

    for d in data:
        out.add(d)
        if not out.rs.ready:
            continue

        a = np.array(list(out.rs))
        s = np.std(a)
        m = np.mean(a)

        d = np.random.randint(low + high // 2, high * factor)
        np_outlier = d >= (factor * s + m)

        if out.check(d) == np_outlier:
            success_count += 1
        count += 1

    return success_count / count
예제 #4
0
def data_file():
    fname = tk.filedialog.askopenfilename(filetypes=[("JSON", ".json")], defaultextension='.json')
    if fname is not '':
        data_file.outlier = outlier.Outlier(fname)
    scr2.insert(tk.END, datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + '\n打开文件 ' + fname + '\n\n')
예제 #5
0
if parseMethod == "BP":
    # Determine event types
    eventTypeDict = prs.getEventType()
    numEventTypes = len(eventTypeDict)
    eventTypeList = [None] * numEventTypes
    assert (numEventTypes > 0), "No event types detected (Assertion)...\n"
    for i in range(0, numEventTypes):
        eventTypeList[i] = eventTypeDict[i]

    # Initialize event class
    evn = event.Event(sys.argv[1])
    evn.setEventType(eventTypeList)

    # Initialize outlier class
    otl = outlier.Outlier(sys.argv[1])

    # Initialize visualizer class
    maxDepth = int(config['Visualizer']['MaxFunDepth'])
    viz = visualizer.Visualizer(sys.argv[1])

    # Reset visualization server
    viz.sendReset()

    # In nonstreaming mode send function map and event type to the visualization server
    viz.sendEventType(eventTypeList, 0)
    funMap = prs.getFunMap()
    viz.sendFunMap(list(funMap.values()), 0)
else:
    # Initialize event object
    evn = event.Event(sys.argv[1])
예제 #6
0
def populate_initial_data(wsize):
    out = outlier.Outlier(wsize, 3)
    for _ in range(wsize):
        out.add(random.randint(0, 100))
    return out
예제 #7
0
    """train_df = pd.read_csv("../datasets/ecg_prep/train1.csv", sep=";", squeeze=True, index_col=0).sort_index()
    test_df = pd.read_csv("../datasets/ecg_prep/test1.csv", sep=";", squeeze=True, index_col=0).sort_index()
    outl_df = pd.read_csv("../datasets/ecg_prep/val1.csv", sep=";", squeeze=True, index_col=0).sort_index()"""
    train_df = pd.read_csv("../datasets/monthly_sunspots/train.csv",
                           sep=";",
                           squeeze=True)
    test_df = pd.read_csv("../datasets/monthly_sunspots/test.csv",
                          sep=";",
                          squeeze=True)
    outl_df = pd.read_csv("../datasets/monthly_sunspots/outl.csv",
                          sep=";",
                          squeeze=True)

    N_outl_ts = 10
    for i in range(N_outl_ts):
        outl = outlier.Outlier(type="contextual", n_outliers=1, size=25)

        outl_data, outl_idxs = outl.generate(data=outl_df.values.copy(),
                                             finetuning=[1],
                                             constant=False)
        labels = np.zeros(len(outl_df))
        plt.plot(outl_data, label="Original")
        for o in outl_idxs:
            plt.plot(o, outl_data[o], label="Outlier")
            labels[o] = True
        plt.legend(loc="upper right")

        name_ds = "data" + str(i) + "/"
        os.mkdir(PREFIX + name_ds)
        np.save(Path(PREFIX + name_ds + "train"), train_df.values)
        np.save(Path(PREFIX + name_ds + "test"), test_df.values)
예제 #8
0
#%%
X_tokens, key_set = get_uber_tokens(X)

X_train, y_train, X_val, y_val, X_test, y_test = get_splitted_data(X, y)

X_tokens_train, _ = get_uber_tokens(X_train)

X_tokens_val, _ = get_uber_tokens(X_val)

X_tokens_test, _ = get_uber_tokens(X_test)

#%%
# Aqui gero anomalias dos dados de teste e vou colocando em ordem por label ex:0, 1...
for i in range(y.shape[1]):
    out = outlier.Outlier(i, X_test, y_test, s)
    traj_anom_norm, traj_anom = out.get_noise_trajectory(1., 0.3)
    if i == 0:
        traj_anom_test = traj_anom
    else:
        traj_anom_test = np.concatenate([traj_anom_test, traj_anom])

traj_anom_test[0:1, :, 1] == X_test[148:149, :, 1]


#%%
def get_all_tokens(old_list, new_list):
    set_old = set(old_list)
    set_new = set(new_list)
    diff = set_new.difference(set_old)
    old_list += diff
예제 #9
0
def test_no_outlier():
    data = np.random.random(100)

    out = outlier.Outlier(100, 5)
    for d in data:
        assert not out.add_and_check(d)