def trainModelPipe(self, loss_type):
        encoder = EncoderRNN(input_size=self.n_features,
                             hidden_size=self.hidden_size,
                             num_grulstm_layers=self.num_grulstm_layers,
                             batch_size=self.batch_size).to(self.device)
        decoder = DecoderRNN(input_size=1,
                             hidden_size=self.hidden_size,
                             num_grulstm_layers=self.num_grulstm_layers,
                             fc_units=16,
                             output_size=1).to(self.device)

        net_gru = Net_GRU(encoder, decoder, self.N_output,
                          self.device).to(self.device)

        self.train_model(net_gru,
                         batch_size=self.batch_size,
                         loss_type=loss_type,
                         learning_rate=0.001,
                         epochs=500,
                         gamma=self.gamma,
                         print_every=50,
                         eval_every=50,
                         verbose=1,
                         alpha=self.alpha)

        return net_gru
Esempio n. 2
0
def run_seq2seq():
    (x_train, y_train), (x_test, y_test) = dataloader.load_HDFS(
        'data/HDFS_100k.log_structured.csv',
        label_file='data/anomaly_label.csv',
        train_ratio=0.8)

    tknzr = Tokenizer(lower=True, split=" ")
    tknzr.fit_on_texts(x_train)

    # making sequences:
    X_train = tknzr.texts_to_sequences(x_train)
    X_test = tknzr.texts_to_sequences(x_test)

    X_train = [(x, x) for x in X_train]
    X_test = [(x, x) for x in X_test]

    print(tknzr.word_index.keys())
    print(X_test)

    hidden_size = 32
    encoder1 = EncoderRNN(len(tknzr.word_index.keys()) + 3,
                          hidden_size).to(device)
    decoder1 = DecoderRNN(hidden_size,
                          len(tknzr.word_index.keys()) + 3).to(device)
    encoder_hidden = encoder1.initHidden()

    trainIters(encoder1, decoder1, X_train, 5000, print_every=100)

    testing_pairs = [tensorsFromPair(i) for i in X_test]

    y_pred_outputs = []

    for iter in range(1, len(testing_pairs) + 1):
        testing_pair = testing_pairs[iter - 1]
        input_tensor = testing_pair[0]
        target_tensor = testing_pair[1]

        input_length = input_tensor.size(0)

        encoder_outputs = torch.zeros(MAX_LENGTH,
                                      encoder1.hidden_size,
                                      device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder1(input_tensor[ei],
                                                      encoder_hidden)
            encoder_outputs[ei] = encoder_output[0, 0]

        y_pred_outputs.append(encoder_output.cpu().data.numpy().flatten())

        # x_test_output = encoder1(x_test)
    # kmeans = KMeans(n_clusters=2)
    # y_pred = kmeans.fit_predict(y_pred_outputs)

    dbscan = DBSCAN(eps=0.075, min_samples=100, metric="cosine")
    y_pred = dbscan.fit_predict(y_pred_outputs).tolist()

    y_pred = np.array([1 if i == -1 else 0 for i in y_pred])

    print(len(y_pred))
    print(len(y_test))

    print(y_pred)
    print(y_test)

    print(Counter(y_pred))
    print(Counter(y_test))

    print("Homogeneity Score: %s" % str(homogeneity_score(y_test, y_pred)))
    print("completeness_score: %s" % str(completeness_score(y_test, y_pred)))
    print("v_measure_score: %s" % str(v_measure_score(y_test, y_pred)))
    print("F1 score %s" % str(f1_score(y_test, y_pred)))
    print("Precision score %s" % str(precision_score(y_test, y_pred)))
    print("Recall score %s" % str(recall_score(y_test, y_pred)))
Esempio n. 3
0
    print(' Eval mse= ',
          np.array(losses_mse).mean(), ' dtw= ',
          np.array(losses_dtw).mean(), ' tdi= ',
          np.array(losses_tdi).mean())


print(f"batch_size: {batch_size}")

## TODO run with dtw implementation
encoder = EncoderRNN(input_size=3,
                     hidden_size=128,
                     num_grulstm_layers=2,
                     batch_size=batch_size).to(device)
decoder = DecoderRNN(input_size=1,
                     hidden_size=128,
                     num_grulstm_layers=2,
                     fc_units=16,
                     output_size=1).to(device)
net_gru_dtw = Net_GRU(encoder, decoder, N_output, device).to(device)

train_model(net_gru_dtw,
            batch_size=batch_size,
            loss_type='dtw',
            learning_rate=0.001,
            epochs=500,
            gamma=gamma,
            print_every=50,
            eval_every=50,
            verbose=1,
            alpha=alpha,
            target_mean=target_log_mean,
    y_train = torch.from_numpy(y_train).contiguous()

    x_val = torch.from_numpy(x_val).contiguous()
    y_val = torch.from_numpy(y_val).contiguous()

    targets_train = y_train[:, :, :, [0]]
    features_train = y_train[:, :, :, 1:]

    targets_val = y_val[:, :, :, [0]]
    features_val = y_val[:, :, :, 1:]

    targets_test = y_test[:, :, :, [0]]
    features_test = y_test[:, :, :, 1:]

    encoder = EncoderRNN(input_size, hidden_size, n_layers, dropout)
    decoder = DecoderRNN(input_size, hidden_size, output_size, n_layers,
                         dropout)

    if os.path.isfile(encoder_checkpoint):
        print("Loading encoder checkpoint...")
        encoder.load_state_dict(torch.load(encoder_checkpoint))

    if os.path.isfile(decoder_checkpoint):
        print("Loading decoder checkpoint...")
        decoder.load_state_dict(torch.load(decoder_checkpoint))

    encoder_optimizer = optim.Adam(encoder.parameters(), lr=lr)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=lr)

    if use_cuda:
        encoder.cuda()
        decoder.cuda()