def datasets(): my_datasets = [] anomaly_names = ["bandwidth", "download", "packet_loss"] node_names = ["cassandra", "bono", "sprout"] for n in node_names: my_data = [] for idx, a in enumerate(anomaly_names): for _ in range(3): my_data.append( RawData(x=np.random.rand(50, 10), y=idx, headers=[], node_name=n, anomaly_name=a)) dataset = AnomalyDataset(n, anomaly_file_paths={}, ref_header=["blocker"]) dataset.data = my_data my_datasets.append(dataset) return my_datasets
def to_tensor(data): x, y = data.x, data.y x = torch.from_numpy(x).float().to(self.device) y = torch.from_numpy(np.array([y])).long().to(self.device) return RawData.create_from_ref(data, x=x, y=y)
def arr_norm(data): x = data.x nom = (x - self.arr_min) * (self.target_max - self.target_min) x = self.target_min + nom / self.denom return RawData.create_from_ref(data, x=x)
def kth_difference(data): x = data.x my_list = [] for i in range(self.k): idx = i + 1 diff = x[idx:] - x[:-idx] if self.k > 1 and idx < self.k: diff = diff[:-(self.k - idx)] my_list.append(diff) x = np.stack(my_list).mean(0) if self.append: seq_length = min(x.shape[0], data.x.shape[0]) x = np.concatenate([x[:seq_length], data.x[:seq_length]], axis=1) return RawData.create_from_ref(data, x=x)
def train_list(): arr = np.random.rand(200, 10) return [RawData(x=arr, y=1)]
def tensor_whiten(data): x = data.x x = np.dot(x, self.sigma_neg_sqrt) return RawData.create_from_ref(data, x=x)
def log_scale(data): x = data.x x = self.log_func(x + 1) # Prevent negative infinity by adding 1 return RawData.create_from_ref(data, x=x)
def arr_stand(data): x = data.x x = (x - self.arr_mean) / self.arr_std return RawData.create_from_ref(data, x=x)