예제 #1
0
from CF import *
from DataLoader import *
import numpy as np
from sklearn.model_selection import train_test_split

if __name__ == '__main__':
    filePath = "./data/ml-20m/ratings.csv"

    print("Loading data:")
    dataLoader = DataLoader(filePath)
    Y_data = dataLoader.readFile()
    print("Done!\n--------------------")

    y = np.zeros(Y_data.shape[0])
    rate_train, rate_test, y_train, y_test = train_test_split(Y_data,
                                                              y,
                                                              test_size=0.2)

    print("User-user Collaborative Filtering: ")
    cf = CF(rate_train, 5)
    cf.fit()
    # cf.print_recommendation()
    print("-------------------------")

    print("\nTESTING: \nComputing RMSE: ")
    cf.RMSE(rate_test)
예제 #2
0
@author: ivis
"""

from ELM import *
from DataLoader import *
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from torchvision import transforms
import time
from Time import *

##data loader
transformations = transforms.Compose([transforms.ToTensor()])
train_np = DataLoader('data/', 'train', transformations)
test_np = DataLoader('data/', 'test', transformations)

##ELM parameters
num_images_train = 28099
num_images_test = 7025
input_size = train_np[0][0].shape[0] * train_np[0][0].shape[1] * train_np[0][
    0].shape[2]
output_size = 5
hidden_size = 1000

##process data
"""
train_data = np.zeros((num_images_train, input_size))
train_label = np.zeros(num_images_train)
for i in range(num_images_train):
예제 #3
0
import time
start_time = time.time()


def list_gen(inp_data):
    input_data = []
    for i in range(0, 80):
        input_data.append([0])
    for key in inp_data:
        input_data[key] = [1]
    return input_data


n = kn.KinoNetwork([80, 100, 200, 400, 200, 100, 80])

dataloader = dl.DataLoader()

data = dataloader.returnData('2015-01-02 10:00', '2015-01-05 21:30')

counter = [0, 0, 0, 0, 0, 0]

for i in range(0, len(data) - 1):
    input_data = list_gen(data[i])

    output = n.feedforward(input_data).tolist()
    output2 = heapq.nlargest(20, output)
    output3 = []

    for key in output2:
        output3.append(output.index(key) + 1)
    chance = 0
예제 #4
0
def run():
    df = pd.read_csv(
        CONFIG.input_path).sample(frac=1).reset_index(drop=True).fillna("")
    print('------- [INFO] TOKENIZING -------\n')

    if not os.path.exists('input/word_to_idx.pickle') or not os.path.exists(
            'input/idx_to_word.pickle'):
        pickle.dump(loader.vocab.word_to_idx,
                    open('input/word_to_idx.pickle', 'wb'))
        pickle.dump(loader.vocab.idx_to_word,
                    open('input/idx_to_word.pickle', 'wb'))

    train_data = df[df['is_duplicate'] == 1]

    val_data = df[:10000]

    train_data = DataLoader.DataLoader(train_data)
    val_data = DataLoader.DataLoader(val_data)

    pad_idx = train_data.vocab.word_to_idx['<PAD>']

    train_loader = torch.utils.data.DataLoader(
        train_data,
        batch_size=CONFIG.Batch_Size,
        num_workers=2,
        pin_memory=True,
        collate_fn=DataLoader.MyCollate(pad_idx))

    val_loader = torch.utils.data.DataLoader(
        val_data,
        num_workers=2,
        batch_size=CONFIG.Batch_Size,
        pin_memory=True,
        collate_fn=DataLoader.MyCollate(pad_idx))

    if torch.cuda.is_available():
        accelarator = 'cuda'
        torch.backends.cudnn.benchmark = True
    else:
        accelarator = 'cpu'

    device = torch.device(accelarator)

    model = DocSimModel.DocSimModel(voacb_size=len(
        train_data.vocab.word_to_idx),
                                    embed_dims=CONFIG.embed_dims,
                                    hidden_dims=CONFIG.hidden_dims,
                                    num_layers=CONFIG.num_layers,
                                    bidirectional=CONFIG.bidirectional,
                                    dropout=CONFIG.dropout,
                                    out_dims=CONFIG.out_dims)

    model = model.to(device)
    optimizer = transformers.AdamW(model.parameters(),
                                   lr=CONFIG.LR,
                                   weight_decay=1e-2)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        threshold=CONFIG.scheduler_threshold,
        mode='min',
        patience=CONFIG.scheduler_patience,
        factor=CONFIG.scheduler_decay_factor)

    if os.path.exists(CONFIG.CHECKPOINT):
        checkpoint = torch.load(CONFIG.CHECKPOINT)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        checkpointed_epoch = checkpoint['epoch']
        print(
            f'\n-------------- [INFO] LOADING CHECKPOINT | EPOCH -> {checkpoint["epoch"]} | LOSS = {checkpoint["loss"]}--------'
        )
    else:
        checkpointed_epoch = 0

    best_auc_roc = -1e4
    print(
        '\n------------------------------ [INFO] STARTING TRAINING --------------------------------\n'
    )
    for epoch in range(checkpointed_epoch, CONFIG.Epochs):
        train_loss = engine.train_fn(model, train_loader, optimizer, scheduler,
                                     device)
        val_auc_roc, val_loss = engine.eval_fn(model, val_loader, device)
        print(
            f'EPOCH -> {epoch+1}/ {CONFIG.Epochs} | TRAIN LOSS = {train_loss} | VAL AUC SCORE = {val_auc_roc} | VAL LOSS = {val_loss} | LR = {optimizer.param_groups[0]["lr"]}\n'
        )
        scheduler.step(val_auc_roc)
        if best_auc_roc < val_auc_roc:
            best_auc_roc = val_auc_roc
            best_model = model.state_dict()
            torch.save(best_model, CONFIG.MODEL_PATH)

        torch.save(
            {
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': val_loss,
                'auc_roc': val_auc_roc
            }, CONFIG.CHECKPOINT)
예제 #5
0
import os
from DataLoader import *

# hyper parameter
T = 5  # must be the same as during training
outclass = 21  # must be the same as during trainin
epoch = 0  # the epoch number of the model to load
save_dir = './predictions/'
data_dir = './test_data/'
model_dir = './ckpt/'
model_dir = os.path.join(model_dir, 'lstm_pm_epoch{}.ckpt'.format(epoch))
batch_size = 1

# load data
dataset = Feeder(data_dir=data_dir, train=False, temporal=T, joints=outclass)
dl = DataLoader(dataset, batch_size, shuffle=False)
print('Dataset Loaded')

# hyper parameter

# load data

if not os.path.exists(save_dir):
    os.mkdir(save_dir)

# **************************************** test all images ****************************************

# print('********* test data *********')

#placeholder for the image
image = tf.placeholder(tf.float32,
예제 #6
0
from Classifier import WebClassifier
import WebScrapper
import DataLoader

if __name__ == "__main__":
    # init data loader
    loader = DataLoader.DataLoader(verbose=True)

    # try to load previous repository
    loader.loadRepoFromJSON('repo.json')

    # load links and categories from Excel
    loader.loadClassesAndCategoriesFromExcel(r'Categories.xlsx')

    # if site was not present in loader's WordRepository object, pull it here
    loader.scrapMissingSites()

    # get data for classifier
    pages, classes, images = loader.getPagesClassesAndImagesCount()

    # save repo for the next time
    loader.saveToJSON('repo.json')

    clf = WebClassifier()
    clf.loadData(pages, classes, list(images.values()))
    clf.saveToDataToFile('wyniki.txt')

    site = 'https://likegeeks.com/python-gui-examples-tkinter-tutorial/'
    print('predicting category for ', site, '...')
    data = WebScrapper.Scrapper().scrapPage(site)
    clf.predict(data[0], data[1])
예제 #7
0
    start = time.time()

    #####################
    # SETUP
    #####################

    # data = DataPreprocess('./toy_preprocessed')
    # data.process_train_val_data('./toy_data', 2, 2)
    # data.process_challenge_data('./challenge_data')
    # Run script 'trainingValidationSplit.py'

    BATCH_SIZE = 50
    EPOCH = 1

    dataset = DataLoader('./toy_preprocessed/id_dicts')
    training_set = dataset.get_traing_set('./toy_train', BATCH_SIZE, 123)
    validation_sets = dataset.get_validation_sets('./toy_val')
    challenge_sets = dataset.get_challenge_sets(
        './toy_preprocessed/challenge_data')

    model = DAE(BATCH_SIZE)
    opt = keras.optimizers.Adam()

    #####################
    # TRAIN MODEL
    #####################

    # print("Initial Training")

    # count = 0
예제 #8
0
def run():
    mean = (0.5, 0.5, 0.5)
    std = (0.5, 0.5, 0.5)
    transforms = alb.Compose([
        alb.Normalize(mean, std, always_apply=True),
        alb.Resize(50, 200, always_apply=True)
    ])

    dataset = DataLoader.DataLoader(transforms)
    pickle.dump(dataset.vocab.word_to_idx,
                open('input/word_to_idx.pickle', 'wb'))
    pickle.dump(dataset.vocab.idx_to_word,
                open('input/idx_to_word.pickle', 'wb'))

    dataset_size = int(len(dataset))
    indexex = list(range(dataset_size))
    train_index, val_index = indexex[int(CONFIG.val_size * dataset_size
                                         ):], indexex[:int(CONFIG.val_size *
                                                           dataset_size)]
    train_sampler = torch.utils.data.sampler.RandomSampler(train_index)
    val_sampler = torch.utils.data.sampler.RandomSampler(val_index)

    train_loader = torch.utils.data.DataLoader(dataset,
                                               batch_size=CONFIG.Batch_Size,
                                               num_workers=4,
                                               pin_memory=True,
                                               sampler=train_sampler)

    val_loader = torch.utils.data.DataLoader(dataset,
                                             batch_size=CONFIG.Batch_Size,
                                             num_workers=4,
                                             pin_memory=True,
                                             sampler=val_sampler)

    if torch.cuda.is_available():
        accelarator = 'cuda'
        torch.backends.cudnn.benchmark = True
    else:
        accelarator = 'cpu'

    device = torch.device(accelarator)

    num_classes = len(dataset.vocab.word_to_idx) + 1

    model = CaptchaModel.CaptchaModel(input_channels=CONFIG.input_channels,
                                      out_channels=CONFIG.out_channels,
                                      kernel_size=CONFIG.kernel_size,
                                      conv_dropout=CONFIG.conv_dropout,
                                      max_pool_size=CONFIG.max_pool_size,
                                      num_conv_layers=CONFIG.num_conv_layers,
                                      input_dims=CONFIG.input_dims,
                                      hidden_dims=CONFIG.hidden_dims,
                                      num_layers=CONFIG.num_layers,
                                      rnn_dropout=CONFIG.rnn_dropout,
                                      num_classes=num_classes)

    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           threshold=0.01,
                                                           mode='min')
    blank = num_classes - 1

    best_loss = 1e4

    print('------ [INFO] STARTING TRAINING ------')
    for epoch in range(CONFIG.Epochs):
        train_loss = engine.train_fn(model, train_loader, optimizer, blank,
                                     device)
        val_loss = engine.eval_fn(model, val_loader, blank, device)
        scheduler.step(val_loss)
        print(
            f'EPOCH -> {epoch}/{CONFIG.Epochs} | TRAIN LOSS = {train_loss} | VAL LOSS = {val_loss}'
        )
        if best_loss > val_loss:
            best_loss = val_loss
            best_model = model.state_dict()
            predict.predict('input/captcha_images_v2/8y6b3.png')
    torch.save(best_model, CONFIG.MODEL_PATH)
예제 #9
0
    def __init__(self):
        # Input shape
        self.img_rows = 128
        self.img_cols = 128
        self.channels = 3
        self.img_shape = (self.img_rows, self.img_cols, self.channels)

        # Configure data loader
        self.dataset_name = 'FaceToSticker'
        # Use the DataLoader object to import a preprocessed dataset
        self.data_loader = DataLoader.DataLoader(
            dataset_name=self.dataset_name,
            img_res=(self.img_rows, self.img_cols))

        # Calculate output shape of D (PatchGAN)
        patch = int(self.img_rows / 2**4)
        self.disc_patch = (patch, patch, 1)

        # Number of filters in the first layer of G and D
        self.gf = 32
        self.df = 64

        # Loss weights
        self.lambda_cycle = 10.0  # Cycle-consistency loss
        self.lambda_id = 0.9 * self.lambda_cycle  # Identity loss

        optimizer = Adam(0.0002, 0.5)

        # Build and compile the discriminators
        self.d_A = self.build_discriminator()
        self.d_B = self.build_discriminator()
        self.d_A.compile(loss='mse', optimizer=optimizer, metrics=['accuracy'])
        self.d_B.compile(loss='mse', optimizer=optimizer, metrics=['accuracy'])

        # -------------------------
        # Construct Computational
        #   Graph of Generators
        # -------------------------

        # Build the generators
        self.g_AB = self.build_generator()
        self.g_BA = self.build_generator()

        # Input images from both domains
        img_A = Input(shape=self.img_shape)
        img_B = Input(shape=self.img_shape)

        # Translate images to the other domain
        fake_B = self.g_AB(img_A)
        fake_A = self.g_BA(img_B)
        # Translate images back to original domain
        reconstr_A = self.g_BA(fake_B)
        reconstr_B = self.g_AB(fake_A)
        # Identity mapping of images
        img_A_id = self.g_BA(img_A)
        img_B_id = self.g_AB(img_B)

        # For the combined model we will only train the generators
        self.d_A.trainable = False
        self.d_B.trainable = False
        self.d_A.compile(loss='mse', optimizer=optimizer, metrics=['accuracy'])
        self.d_B.compile(loss='mse', optimizer=optimizer, metrics=['accuracy'])

        # Discriminators determines validity of translated images
        valid_A = self.d_A(fake_A)
        valid_B = self.d_B(fake_B)

        # Combined model trains generators to fool discriminators
        self.combined = Model(inputs=[img_A, img_B],
                              outputs=[
                                  valid_A, valid_B, reconstr_A, reconstr_B,
                                  img_A_id, img_B_id
                              ])
        self.combined.compile(loss=['mse', 'mse', 'mae', 'mae', 'mae', 'mae'],
                              loss_weights=[
                                  1, 1, self.lambda_cycle, self.lambda_cycle,
                                  self.lambda_id, self.lambda_id
                              ],
                              optimizer=optimizer)
예제 #10
0
    def __init__(self):
        # Input shape
        self.channels = 3
        self.lr_height = 128  # Low resolution height
        self.lr_width = 128  # Low resolution width
        self.lr_shape = (self.lr_height, self.lr_width, self.channels)
        self.hr_height = 128  # High resolution height
        self.hr_width = 128  # High resolution width
        self.hr_shape = (self.hr_height, self.hr_width, self.channels)

        # Number of residual blocks in the generator
        self.n_residual_blocks = 8

        # Following parameter and optimizer set as recommended in paper
        self.n_critic = 5
        self.clip_value = 0.01
        optimizer = RMSprop(lr=0.00005)

        # optimizer = Adam(0.0002, 0.5)
        # optimizer1 = RMSprop(lr=0.0001)

        # We use a pre-trained VGG19 model to extract image features from the high resolution
        # and the generated high resolution images and minimize the mse between them
        self.vgg = self.build_vgg()
        self.vgg.trainable = False
        self.vgg.compile(loss='mse', optimizer=optimizer, metrics=['accuracy'])

        # Configure data loader
        self.dataset_name = 'random_dataset'
        self.predict_dir = 'predict'
        self.data_loader = DataLoader(dataset_name=self.dataset_name,
                                      img_res=(self.hr_width, self.hr_height))

        # Calculate output shape of D (PatchGAN)
        patch = int(self.hr_height / 2**4)
        self.disc_patch = (2, 1)

        # Number of filters in the first layer of G and D
        self.gf = 64
        self.df = 64

        # Build and compile the discriminator
        # self.discriminator = self.build_discriminator()
        # self.discriminator.summary()
        # self.discriminator.compile(loss='mse',
        #     optimizer=optimizer,
        #     metrics=['accuracy'])

        # Build and compile the critic
        self.discriminator = self.build_critic()
        self.discriminator.compile(loss=self.wasserstein_loss,
                                   optimizer=optimizer,
                                   metrics=['accuracy'])

        # Build the generator
        self.generator = self.dense_gener()
        self.generator.summary()
        # High res. and low res. images
        img_hr = Input(shape=self.hr_shape)
        img_lr = Input(shape=self.lr_shape)

        # Generate high res. version from low res.
        fake_hr = self.generator(img_lr)

        # Extract image features of the generated img
        fake_features = self.vgg(fake_hr)

        # For the combined model we will only train the generator
        self.discriminator.trainable = False

        # Discriminator determines validity of generated high res. images
        validity = self.discriminator(fake_hr)

        self.combined = Model([img_lr, img_hr], [validity, fake_features])

        self.combined.compile(loss=[self.wasserstein_loss, 'mse'],
                              loss_weights=[1e-3, 1],
                              optimizer=optimizer)
예제 #11
0
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

import DataLoader as D
import Utils as U
from VRAE_tf import VRAE_tf

data_loader = D.DataLoader(D.Vocab('europarl_tvae_tf', D.Level.WORD))

flags = tf.app.flags

flags.DEFINE_string('model_name', 'VRAE_tf', '')
flags.DEFINE_string('ckpt_path', './results/VRAE_tf/ckpt/', '')
flags.DEFINE_string('logs_path', './results/VRAE_tf/logs/', '')
flags.DEFINE_integer('batch_size', 32, '')
flags.DEFINE_integer(
    'steps', U.epoch_to_step(10, data_loader.train_size, batch_size=32), '')
flags.DEFINE_integer('lr', 0.001, 'learning rate')
flags.DEFINE_integer('z_size', 32, '')
flags.DEFINE_integer('max_seq_len', 15, '')
flags.DEFINE_integer('n_layers', 1, '')
flags.DEFINE_integer('embed_size', 512, '')
flags.DEFINE_integer('vocab_size', data_loader.vocab_size, '')
flags.DEFINE_integer('hidden_size', 512, '')
flags.DEFINE_bool('kl_anealing', True, '是否使用kl_anealing技巧')
flags.DEFINE_float('beta', 1.0, 'kl_loss coef')
flags.DEFINE_float('gamma', 5, '')
예제 #12
0
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import DataLoader as dl
from sklearn.linear_model import LinearRegression
from datetime import datetime

dLoad = dl.DataLoader()
df = dLoad.PrepareDataSet()
print(df)

data = df.sort_index(ascending=True, axis=0)
new_data = pd.DataFrame(index=range(0,len(df)),columns=['Date','Close'])

for i in range(0,len(data)):
    new_data['Date'][i] = data['Date'][i].strftime('%Y%m%d')
    new_data['Close'][i] = data['Close'][i]

train = new_data[:770]
valid = new_data[770:]

x_train = train.drop('Close', axis=1)
y_train = train['Close']
x_valid = valid.drop('Close', axis=1)
y_valid = valid['Close']

from sklearn import neighbors
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
예제 #13
0
def main(cfg: DictConfig) -> None:
    # set up mlflow experiment id
    mlflow.set_tracking_uri(f"file://{to_absolute_path(cfg.path_to_mlflow)}")
    experiment = mlflow.get_experiment_by_name(cfg.experiment_name)

    if experiment is not None:
        run_kwargs = {'experiment_id': experiment.experiment_id}
        if cfg["pretrained"] is not None:  # initialise with pretrained run, otherwise create a new run
            run_kwargs['run_id'] = cfg["pretrained"]["run_id"]
    else:  # create new experiment
        experiment_id = mlflow.create_experiment(cfg.experiment_name)
        run_kwargs = {'experiment_id': experiment_id}

    # run the training with mlflow tracking
    with mlflow.start_run(**run_kwargs) as main_run:
        if cfg["pretrained"] is not None:
            mlflow.start_run(experiment_id=run_kwargs['experiment_id'],
                             nested=True)
        active_run = mlflow.active_run()
        run_id = active_run.info.run_id

        setup_gpu(cfg.gpu_cfg)
        training_cfg = OmegaConf.to_object(
            cfg.training_cfg)  # convert to python dictionary
        scaling_cfg = to_absolute_path(cfg.scaling_cfg)
        dataloader = DataLoader.DataLoader(training_cfg, scaling_cfg)
        setup = dataloader.config["SetupNN"]
        TauLosses.SetSFs(*setup["TauLossesSFs"])
        print("loss consts:", TauLosses.Le_sf, TauLosses.Lmu_sf,
              TauLosses.Ltau_sf, TauLosses.Ljet_sf)

        if setup["using_new_loss"]: tf.config.run_functions_eagerly(True)
        netConf_full = dataloader.get_net_config()

        if dataloader.input_type == "Adversarial":
            model = create_model(
                netConf_full,
                dataloader.model_name,
                loss=setup["loss"],
                use_newloss=setup["using_new_loss"],
                use_AdvDataset=True,
                adv_param=dataloader.adversarial_parameter,
                n_adv_tau=dataloader.adv_batch_size,
                adv_learning_rate=dataloader.adv_learning_rate)
        else:
            model = create_model(netConf_full,
                                 dataloader.model_name,
                                 loss=setup["loss"],
                                 use_newloss=setup["using_new_loss"])

        if cfg.pretrained is None:
            print(
                "Warning: no pretrained NN -> training will be started from scratch"
            )
            old_opt = None
        else:
            print("Warning: training will be started from pretrained model.")
            print(
                f"Model: run_id={cfg.pretrained.run_id}, experiment_id={cfg.pretrained.experiment_id}, model={cfg.pretrained.starting_model}"
            )

            path_to_pretrain = to_absolute_path(
                f'{cfg.path_to_mlflow}/{cfg.pretrained.experiment_id}/{cfg.pretrained.run_id}/artifacts/'
            )
            old_model = load_model(
                path_to_pretrain +
                f"/model_checkpoints/{cfg.pretrained.starting_model}",
                compile=False,
                custom_objects=None)
            for layer in model.layers:
                weights_found = False
                for old_layer in old_model.layers:
                    if layer.name == old_layer.name:
                        layer.set_weights(old_layer.get_weights())
                        weights_found = True
                        break
                if not weights_found:
                    print(f"Weights for layer '{layer.name}' not found.")
            old_opt = old_model.optimizer
            old_vars = [var.name for var in old_model.trainable_variables]

        compile_model(model, setup["optimizer_name"], setup["learning_rate"],
                      setup["metrics"], setup["schedule_decay"])
        fit_hist = run_training(model,
                                dataloader,
                                False,
                                cfg.log_suffix,
                                setup["using_new_loss"],
                                old_opt=old_opt)

        # log NN params
        for net_type in [
                'tau_net', 'comp_net', 'comp_merge_net', 'conv_2d_net',
                'dense_net'
        ]:
            mlflow.log_params({
                f'{net_type}_{k}': v
                for k, v in cfg.training_cfg.SetupNN[net_type].items()
            })
        mlflow.log_params({
            f'TauLossesSFs_{i}': v
            for i, v in enumerate(cfg.training_cfg.SetupNN.TauLossesSFs)
        })
        with open(
                to_absolute_path(
                    f'{cfg.path_to_mlflow}/{run_kwargs["experiment_id"]}/{run_id}/artifacts/model_summary.txt'
                )) as f:
            for l in f:
                if (s := 'Trainable params: ') in l:
                    mlflow.log_param('n_train_params',
                                     int(l.split(s)[-1].replace(',', '')))

        # log training related files
        mlflow.log_dict(training_cfg, 'input_cfg/training_cfg.yaml')
        mlflow.log_artifact(scaling_cfg, 'input_cfg')
        mlflow.log_artifact(to_absolute_path("Training_CNN.py"), 'input_cfg')
        mlflow.log_artifact(to_absolute_path("common.py"), 'input_cfg')

        # log hydra files
        mlflow.log_artifacts('.hydra', 'input_cfg/hydra')
        mlflow.log_artifact('Training_CNN.log', 'input_cfg/hydra')

        # log misc. info
        mlflow.log_param('run_id', run_id)
        mlflow.log_param('git_commit', _get_git_commit(to_absolute_path('.')))
        print(
            f'\nTraining has finished! Corresponding MLflow experiment name (ID): {cfg.experiment_name}({run_kwargs["experiment_id"]}), and run ID: {run_id}\n'
        )
        mlflow.end_run()

        # Temporary workaround to kill additional subprocesses that have not exited correctly
        try:
            current_process = psutil.Process()
            children = current_process.children(recursive=True)
            for child in children:
                child.kill()
        except:
            pass
예제 #14
0
                    help="Training config")
args = parser.parse_args()

save_path = args.save_path  # "/home/russell/tfdata/testing"
scaling_cfg = args.scaling_cfg  #"../../configs/ShuffleMergeSpectral_trainingSamples-2_files_0_50.json"
training_cfg_path = args.training_cfg  #../../configs/training_v1.yaml

with open(training_cfg_path) as file:
    training_cfg = yaml.full_load(file)
    print("Training Config Loaded")

training_cfg["SetupNN"]["n_batches"] = args.n_batches
training_cfg["SetupNN"][
    "n_batches_val"] = 0  # only generate training data as train/val split done later in training
training_cfg["SetupNN"]["validation_split"] = 0
training_cfg["Setup"]["input_type"] = "ROOT"  # make ROOT so generator loads

dataloader = DataLoader.DataLoader(training_cfg, scaling_cfg)
print("DataLoader Created")
gen_train = dataloader.get_generator(primary_set=True,
                                     return_weights=dataloader.use_weights,
                                     show_progress=True)
print("Generator Loaded")
input_shape, input_types = dataloader.get_input_config()
print("Input shapes and Types acquired")
data_train = tf.data.Dataset.from_generator(
    gen_train, output_types=input_types,
    output_shapes=input_shape).prefetch(tf.data.AUTOTUNE)
print("Dataset extracted from DataLoader")
tf.data.experimental.save(data_train, save_path, compression="GZIP")
print("Conversion Complete")
예제 #15
0
        self.model.fit(X_train, y_train)

        # save model
        self.save_model()

        val_predictions = self.model.predict(X_val)

        # save training result
        self.save_result(accuracy_score(val_predictions, y_val) * 100)

        print("Accuracy valuation - {}: {} %".format(self.model_name, accuracy_score(val_predictions, y_val) * 100))


if __name__ == '__main__':
    # get training data
    data_loader = DataLoader('./data/train/train.txt', 'latin-1')
    file_content = data_loader.read_file()
    X_train_raw, y_train_raw = data_loader.get_data(file_content)

    # preprocessing training data
    nlp = NLP()
    X_train_preprocessed = nlp.preprocessing(X_train_raw)

    # transform text data to vector
    transform = FeatureExtraction(X_train_preprocessed)

    print("1. Using count vectorizer: ")
    X_train_vector = transform.count_vect()

    X_train, y_train = X_train_vector, y_train_raw
    # traning model
예제 #16
0
from keras.models import Model
from keras.layers import Input, Dense, Dropout
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import load_model
from keras import utils
import numpy as np
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import DataLoader
import os
import json

train_dl = DataLoader.DataLoader(DataLoader.TRAINING_DATA_DIR)
dev_dl = DataLoader.DataLoader(DataLoader.DEV_DATA_DIR)
test_dl = DataLoader.DataLoader(DataLoader.TEST_DATA_DIR)


print("Train samples: ", train_dl.samples)
print("Dev samples: ", dev_dl.samples)
print("Test samples: ", test_dl.samples)

input = Input(shape=(DataLoader.SENT_FEATURES, ))
dense0 = Dense(512, activation="relu")(input)
dropout = Dropout(0.5)(dense0)
dense1 = Dense(256, activation="relu")(dropout)
dropout = Dropout(0.5)(dense1)
dense2 = Dense(3, activation="softmax")(dropout)

model = Model(inputs=input, outputs=dense2)
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["categorical_accuracy"])
예제 #17
0
so it is like resnet18 + fc(customized)
"""

if __name__ == '__main__':
    data_dir = 'F:\img_training\data'
    data_transform = {
        'train': transforms.Compose([
            DataLoader.Rescale(256),
            DataLoader.RandomCrop(224),
            # transforms.Resize(224, 224),
            DataLoader.ToTensor(),
            # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            # transforms.ToPILImage
        ])}
    image_dataset = DataLoader.img_dataset(root_dir=data_dir, transform=data_transform['train'])
    dataloader = DataLoader.DataLoader(image_dataset, batch_size=4, shuffle=True, num_workers=4)

    dataset_sizes = len(image_dataset)
    model_conv = models.resnet18(pretrained=True)
    for param in model_conv.parameters():
        param.requires_grad = False

    # Parameters of newly constructed modules have requires_grad=True by default
    num_ftrs = model_conv.fc.in_features
    model_conv.fc = nn.Linear(num_ftrs, 12)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model_conv = model_conv.to(device)

    # Observe that only parameters of final layer are being optimized as
    # opposed to before.
    optimizer_conv = torch.optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9)
예제 #18
0
from datetime import datetime as dt
import dash
from dash.dependencies import Input, Output
import dash_html_components as html
import dash_core_components as dcc

import plotly.graph_objects as go
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import MinMaxScaler
import DataLoader

dataLoader = DataLoader.DataLoader("./Sensor_Weather_Data_Challenge.csv")
df = dataLoader.getDf()
scalar = MinMaxScaler()

clusterDf = df.iloc[:, 0:14].copy()
clusterDf["maxValue"] = clusterDf.iloc[:, 0:13].max(axis=1)
clusterDf.drop(columns=[
    "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12",
    "d13"
],
               inplace=True)
x_scaled = scalar.fit_transform(clusterDf)
clusterDf = pd.DataFrame(data=x_scaled, index=clusterDf.index)
cDf = clusterDf.sample(frac=1)
nObs = len(cDf)
splitNo = round(0.7 * nObs)
cDf = cDf.head(splitNo)
import DataLoader

data_loader = DataLoader.DataLoader()
# data_loader.load_places_for_city(city_id=777934, use_last_cursor=True)
DataLoader.DataLoader.search_places_for_city('5b01ac9aff93a20480b397a9', use_last_cursor=True)
예제 #20
0
def main():
    args = parse_arguments()

    lr_disc = args.lr_disc
    lr_gen = args.lr_gen
    num_epochs = args.num_epochs
    data_dir = args.data_dir
    save_interval = args.save_interval
    wt_recon = args.wt_recon
    wt_KL = args.wt_KL

    dataset = KITTIDataset(folder_name=data_dir,
                           transform=transforms.Compose([
                               RandomVerticalFlip(),
                               RandomHorizontalFlip(),
                               RandomCrop([320, 896]),
                               Normalize(),
                               ToTensor()
                           ]))

    dataloader = DataLoader(dataset,
                            batch_size=64,
                            shuffle=True,
                            num_workers=4)

    # create required directories
    results_dir = os.path.join(os.getcwd(), "results")
    # models_dir = os.path.join(os.getcwd(), "saved_models")

    timestamp = datetime.now().strftime("%Y-%m-%d_%I-%M-%S_%p")
    curr_dir = os.path.join(results_dir, timestamp)

    disc_save_path = os.path.join(curr_dir, "disc_lrd_{}".format(lr_disc))
    gen_save_path = os.path.join(curr_dir, "gen_lrg_{}".format(lr_gen))

    make_dirs([results_dir, curr_dir])

    ## create generator and discriminator instances
    model_gen = gen().to(DEVICE)
    model_disc = disc().to(DEVICE)

    RCLoss = nn.L1Loss()
    # criterion = nn.BCELoss()

    losses_GG = []
    losses_DD = []
    losses_RR = []
    mean_fake_probs_arr = []
    std_fake_probs_arr = []
    # train the GAN model
    for epoch in range(num_epochs):
        losses_D = []
        losses_G = []
        losses_Rec = []
        fake_probs = []

        for batch_ndx, frames in enumerate(dataloader):

            # my data
            # frames =  np.random.randint(0, high=1, size=(4,2,320,896))
            # frames =  torch.tensor(frames).to(DEVICE, dtype=torch.float)
            frames = frames.to(DEVICE).float()
            frames1 = frames[:, 0:1, :, :]
            frames2 = frames[:, 1:2, :, :]
            # train discriminator
            with torch.no_grad():
                optical_flow, mean, logvar = model_gen(frames)
                frame2_fake = warp(frames1, optical_flow)

            outDis_real = model_disc(frames1)
            lossD_real = torch.log(outDis_real)

            outDis_fake = model_disc(frame2_fake)

            lossD_fake = torch.log(1.0 - outDis_fake)
            loss_dis = lossD_real + lossD_fake
            loss_dis = -0.5 * loss_dis.mean()

            # calculate customized GAN loss for discriminator

            model_disc.optimizer.zero_grad()
            loss_dis.backward()
            model_disc.optimizer.step()

            losses_D.append(loss_dis.item())

            # train generator
            optical_flow, mean, logvar = model_gen(frames)
            frame2_fake = warp(frames1, optical_flow)

            model_disc.optimizer.zero_grad()

            outDis_fake = model_disc(frame2_fake)

            loss_KLD = -0.5 * torch.sum(1 + logvar - mean * mean -
                                        torch.exp(logvar))
            loss_gen = -torch.log(outDis_fake)
            loss_gen = loss_gen.mean()

            loss_recons = RCLoss(frame2_fake, frames2)

            total_gen_loss = loss_gen + wt_recon * loss_recons + wt_KL * loss_KLD

            model_gen.optimizer.zero_grad()
            total_gen_loss.backward()
            model_gen.optimizer.step()

            losses_G.append(loss_gen.item())
            losses_Rec.append(loss_recons.item())
            fake_probs.extend(outDis_fake.clone().detach().cpu().numpy())

            print(
                "Epoch: [{}/{}], Batch_num: {}, Discriminator loss: {:.4f}, Generator loss: {:.4f}, Recons_Loss: {:.4f}, fake_prob: {:.4f}"
                .format(epoch, num_epochs, batch_ndx, losses_D[-1],
                        losses_G[-1], loss_recons, np.mean(fake_probs)))

        losses_GG.append(np.mean(losses_G))
        losses_DD.append(np.mean(losses_D))
        losses_RR.append(np.mean(losses_Rec))
        mean_fake_probs_arr.append(np.mean(fake_probs))
        std_fake_probs_arr.append(np.std(fake_probs))

        print(
            "Epoch: [{}/{}], Discriminator loss: {:.4f}, Generator loss: {:.4f}, recons_loss: {:.4f} fake_prob: {:.4f}"
            .format(epoch + 1, num_epochs, losses_DD[-1], losses_GG[-1],
                    losses_RR[-1], mean_fake_probs_arr[-1]))

        if (epoch + 1) % save_interval == 0:
            save_model(model_disc, epoch, model_disc.optimizer,
                       disc_save_path + "epoch_{}.pth".format(epoch))
            save_model(model_gen, epoch, model_gen.optimizer,
                       gen_save_path + "epoch_{}.pth".format(epoch))

    plot_props([losses_GG, losses_DD, losses_RR, mean_fake_probs_arr], [
        "Generator_loss", "Discriminator_loss", "Reconstruction_loss",
        "disc_fake_prob"
    ], curr_dir)
예제 #21
0
    return answers


def get_the_answer_unclassified(print_answers, best_sentence, best_score,
                                question):
    import csv
    answers = ""
    with open('resources/Single_FaQ.csv') as csvfile:
        csv_content = csv.reader(csvfile, delimiter='\t')
        for row in csv_content:
            if row[1] == best_sentence:
                answers = row[2]
    if print_answers:
        print_question_answer(question, answers, best_score)
    return answers


if __name__ == '__main__':
    print(dl)
    loader = dl.DataLoader('resources/Single_FaQ.csv')
    feature_set = loader.get_feature_set()
    train_set_length = round(len(feature_set) / 2)
    train_set = feature_set[:train_set_length]
    test_set = feature_set[train_set_length:]
    classifier = nltk.NaiveBayesClassifier.train(train_set)
    print('Actual Value: ' + test_set[0][1])
    print('Predicted Value: ' + classifier.classify(test_set[0][0]))
    # print("Classifier accuracy percent:",(nltk.classify.accuracy(classifier, test_set))*100)
    # print(classifier.show_most_informative_features(15))
    get_questions_from_user(True)
예제 #22
0
def main(args=None):
    log = open('/home/binhnguyen/PycharmProjects/Sentence_Compression/ver_1.0/log.txt','w')

    BATCH = 64
    SENTENCE_LEN = 50
    glove_model = load_glove()
    train, test, valid = SC("train",SENTENCE_LEN,glove_model) , SC("test",SENTENCE_LEN,glove_model), SC("valid",SENTENCE_LEN,glove_model)
    train_data = DataLoader(train, batch_size= BATCH, shuffle=True)
    test_data = DataLoader(test , batch_size=BATCH , shuffle=False)
    valid_data = DataLoader(valid , batch_size=BATCH,shuffle=False)

    model = LSTMnet(input_dim= 100, hidden_dim= 128 )
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),lr=0.01)
    print("Load model done!!")

    for epoch in range(20):
        train_loss = 0.0
        for data in train_data:
            feature, label = data
            label = label.view(-1)
            feature, label = Variable(feature.float()), Variable(label.squeeze())
            y_hat = model(feature)
            loss = criterion(y_hat, label)
            train_loss += loss.data.item()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        correct = 0
        total = 0
        test_loss = 0
        with torch.no_grad():
            for data in test_data:
                feature, label = data
                label = label.view(-1)
                feature, label = Variable(feature.float()), Variable(label.squeeze())
                outputs = model(feature)
                _, predicted = torch.max(outputs.data, 1)
                total += label.size(0)
                correct += (predicted == label).sum().item()
                test_loss += criterion(outputs , label).data.item()

        test_accuracy = correct * 100.0 / total

        correct = 0
        total = 0
        valid_loss = 0
        with torch.no_grad():
            for data in valid_data:
                feature, label = data
                label = label.view(-1)
                feature, label = Variable(feature.float()), Variable(label.squeeze())
                outputs = model(feature)
                _, predicted = torch.max(outputs.data, 1)
                total += label.size(0)
                correct += (predicted == label).sum().item()
                valid_loss += criterion(outputs, label).data.item()
        valid_accuracy = correct * 100.0 / total

        print('Epoch {}'.format(epoch), file=log)
        print('Train Loss = %0.2f'%(train_loss), file = log)
        print("Test Loss = %0.2f .. Accuracy Test = %0.2f" % (test_loss, test_accuracy) , file = log)
        print('Valid Loss = %0.2f .. Accuracy Valid = %0.2f' % (valid_loss , valid_accuracy) , file = log)
        print("--------------------",file=log)

    log.close()
    data_list = [  # First data set (Positional Gestures)
        image_val_data_location_1_1, image_val_data_location_2_1,
        image_val_data_location_3_1, image_val_data_location_4_1,
        image_val_data_location_5_1, image_val_data_location_6_1,
        image_val_data_location_7_1, image_val_data_location_8_1,
        image_val_data_location_9_1, image_val_data_location_10_1]


    # if training mode is false, then data list can just be an empty array
    if not train_mode:
        data_list = []

    # create data loader
    # if data_list is an empty array then calling set_elements_to_train will be an error 
    data = DataLoader.DataLoader(data_paths=data_list, size_x=res_x,
                                 size_y=res_y, num_inputs=raw_input_size,
                                 num_outputs=raw_output_size, black_white=True)


    # # just for testing purpose
    # raw_RGB = data.load_image(real_time_path)  # loader raw image
    # raw_RGB = np.array(raw_RGB, dtype=np.float32)
    #
    # pre = net.predict(np.array([raw_RGB]))
    # for p in pre:
    #     for i in p:
    #         print(i*100, end="  ")
    # exit(0)
    # # testing ends here

    # if in training mode
예제 #24
0
def main():
    args = parse_arguments()

    lr_disc = args.lr_disc
    lr_gen = args.lr_gen
    num_epochs = args.num_epochs
    data_dir = args.data_dir
    save_interval = args.save_interval
    wt_recon = args.wt_recon
    wt_KL = args.wt_KL

    dataset = KITTIDataset(folder_name=data_dir,
                           transform=transforms.Compose([
                               RandomVerticalFlip(),
                               RandomHorizontalFlip(),
                               RandomCrop([320, 896]),
                               Normalize(),
                               ToTensor()
                           ]))

    dataloader = DataLoader(dataset,
                            batch_size=64,
                            shuffle=True,
                            num_workers=4)

    # create required directories
    results_dir = os.path.join(os.getcwd(), "results")
    # models_dir = os.path.join(os.getcwd(), "saved_models")

    timestamp = datetime.now().strftime("%Y-%m-%d_%I-%M-%S_%p")
    curr_dir = os.path.join(results_dir, timestamp)

    # disc_save_path = os.path.join(curr_dir, "disc_lrd_{}".format(lr_disc))
    gen_save_path = os.path.join(curr_dir, "gen_lrg_{}".format(lr_gen))

    make_dirs([results_dir, curr_dir])

    ## create generator and discriminator instances
    model_gen = gen().to(DEVICE)
    # model_disc = disc().to(DEVICE)

    RCLoss = nn.L1Loss()
    # criterion = nn.BCELoss()

    losses_GG = []
    losses_DD = []
    losses_RR = []
    mean_fake_probs_arr = []
    std_fake_probs_arr = []
    # train the GAN model

    save_sample_flag = False
    for epoch in range(num_epochs):
        losses_D = []
        losses_G = []
        losses_Rec = []
        fake_probs = []
        if (epoch % 2 == 0):
            save_sample_flag = True
        for batch_ndx, frames in enumerate(dataloader):

            # process data
            ##########################################
            frames = frames.to(DEVICE).float()
            frames1 = frames[:, 0:1, :, :]
            frames2 = frames[:, 1:2, :, :]

            # train generator
            #########################################
            optical_flow, frame2_fake, total_gen_loss, loss_recons = train_vae(
                frames, frames1, frames2, RCLoss, wt_recon, wt_KL, model_gen)
            losses_G.append(total_gen_loss * 1.0)
            losses_Rec.append(loss_recons * 1.0)

            # save images, and flow
            ##########################################
            if (save_sample_flag):
                save_samples(frame2_fake.clone().detach().cpu().numpy(),
                             curr_dir, epoch, "predicted")
                save_samples(frames1.cpu().numpy(), curr_dir, epoch,
                             "actual_frame1")
                save_samples(frames2.cpu().numpy(), curr_dir, epoch,
                             "actual_frame2")
                save_flow(optical_flow.clone().detach().cpu().numpy(),
                          curr_dir, epoch, "flow")
                save_sample_flag = False

            print(
                "Epoch: [{}/{}], Batch_num: {}, Generator loss: {:.4f}, Recons_Loss: {:.4f}"
                .format(epoch, num_epochs, batch_ndx, losses_G[-1],
                        loss_recons))

        losses_GG.append(np.mean(losses_G))
        losses_RR.append(np.mean(losses_Rec))

        print("Epoch: [{}/{}], Generator loss: {:.4f}, recons_loss: {:.4f}".
              format(epoch + 1, num_epochs, losses_GG[-1], losses_RR[-1]))

        # save model
        ##################################################
        if (epoch + 1) % save_interval == 0:
            save_model(model_gen, epoch, model_gen.optimizer,
                       gen_save_path + "epoch_{}.pth".format(epoch))

    plot_props([losses_GG, losses_RR],
               ["Generator_loss", "Reconstruction_loss"], curr_dir)
예제 #25
0
xfrom todloop.routines import DataLoader
from todloop.tod import TODLoader
from todloop.base import TODLoop
from reduction_routines import TimeSeries, PlotGlitches, Energy,SaveEvents, NPixelStudy, EnergyStudy,CRCorrelationFilter
from calibration.routines import FixOpticalSign, CalibrateTOD


"""
INITIALIZE TODLoop
"""
loop = TODLoop()
todid = raw_input("Enter TOD id:") 
tod_id = int(todid)
loop.add_tod_list("../data/covered_tods.txt")
loop.add_routine(DataLoader(input_dir="../outputs/covered_tods_cosig/", output_key="cuts"))

"""
LOAD TOD DATA
"""
loop.add_routine(TODLoader(output_key="tod_data"))
loop.add_routine(FixOpticalSign(input_key="tod_data", output_key="tod_data"))
loop.add_routine(CalibrateTOD(input_key="tod_data",output_key="tod_data"))

"""
ROUTINES
"""
loop.add_routine(TimeSeries(tod_key="tod_data",output_key="timeseries"))
loop.add_routine(Energy(timeseries_key="timeseries",output_key="energy_calculator"))
loop.add_routine(CRCorrelationFilter(timeseries_key="timeseries",cosig_key = "cuts",tod_key="tod_data", output_key= "cr_cuts"))
loop.add_routine(PlotGlitches(tag=tod_id,cosig_key="cr_cuts",tod_key="tod_data",timeseries_key = "timeseries"))
#loop.add_routine(SaveEvents(tag=tod_id,cosig_key ="cr_cuts",tod_key="tod_data",energy_key="energy_calculator",output_key="events"))
예제 #26
0
def main(argv):

    #load configuration
    parameters = load_configuration()

    #load parameters

    #dataset
    path_to_dataset = parameters['path_to_dataset']
    load_size = parameters['load_size']

    #SAX
    alphabet_size = parameters['alphabet_size']
    paa_size = parameters['paa_size']
    window_size = parameters['window_size']
    step = parameters['step']
    substring_size = parameters['substring_size']

    #smoothing
    threshold_freq = parameters['threshold_freq']

    #projections
    prj_size = parameters['prj_size']
    prj_iterations = parameters['prj_iterations']
    anomaly_threshold = parameters['anomaly_threshold']

    #loading data
    loader = DataLoader.DataLoader(path_to_dataset)
    data = DataTypes.Data()

    #loader.load_all(data,200)
    loader.load_subset(data, load_size, 100)

    #period from which extract anomalies
    begin_date = datetime.datetime.fromtimestamp(data.index_to_time[0])
    end_date = datetime.datetime.fromtimestamp(data.index_to_time[load_size -
                                                                  1])

    if parameters['power_type'] == -1:
        tank = parameters['tank']
        sensor_type = parameters['sensor_type']
        #print(data.measures[0])
        print("Loading of %i tank %i  data from %s to %s " %
              (sensor_type, tank, begin_date, end_date))
        s_values = [
            data.measures[i][0][tank][sensor_type]
            for i in range(0, len(data.measures))
        ]
    else:
        power_type = parameters['power_type']
        print("Loading measures of power %i from %s to %s " %
              (power_type, begin_date, end_date))
        s_values = [
            data.measures[i][1][power_type]
            for i in range(0, len(data.measures))
        ]

    len_serie = len(s_values)
    hash_table_substrings = {}

    #getting first n alphabet letters
    alphabet = get_alphabet_letters(alphabet_size)
    #creating hash table indexed by all of substrings of length k
    hash_table_substrings = get_hash_table(alphabet, prj_size)

    #list containg score for each window
    anomalies_score = []

    for index in range(0, len_serie, step):
        begin = index
        end = begin + window_size

        if end < len_serie:
            window_values = s_values[begin:end]
            window_znorm = znorm(s_values)
            window_paa = paa(window_znorm, paa_size)
            window_string = ts_to_string(window_paa,
                                         cuts_for_asize(alphabet_size))

            #each character of the string corresponds to k values of the series
            k = window_size // paa_size

            #get smoothed string
            window_smoothed = smoothing(window_string, threshold_freq)

            #fill hash table by applying random projection
            hash_table_substrings = put_in_bucket(hash_table_substrings,
                                                  window_smoothed, begin,
                                                  prj_iterations, prj_size,
                                                  substring_size, k)

            total = 0
            for key, values in hash_table_substrings.items():
                total = total + len(values)

            buckets_with_anomalies, bucket_freq = analyzed_bucket(
                hash_table_substrings, total, anomaly_threshold)
            #number of bucket with anomalies
            n_buckets_anomalies = len(buckets_with_anomalies.keys())

            #getting score for current window
            avg_window_score = getting_score(hash_table_substrings,
                                             buckets_with_anomalies,
                                             n_buckets_anomalies)
            anomalies_score.append(avg_window_score)

            #reset table
            hash_table_substrings = get_hash_table(alphabet, prj_size)

        else:
            break

    print(anomalies_score)
예제 #27
0
k = 5
SOURCE_L = "/home/pf/pfstaff/projects/ruzicka/TiledDataset_256x256_32ov/2012_strip"+str(k)+"_256x256_over32_png/"
SOURCE_R = "/home/pf/pfstaff/projects/ruzicka/TiledDataset_256x256_32ov/2015_strip"+str(k)+"_256x256_over32_png/"
SOURCE_Y = "/home/pf/pfstaff/projects/ruzicka/CleanedVectors_manually_256x256_32over/vector_strip"+str(k)+"_256x256_over32/"

import numpy as np

import Settings
import mock
args = mock.Mock()
args.name = "test"
settings = Settings.Settings(args)
import DataLoader, DataPreprocesser, Debugger
import DatasetInstance_OurAerial
dataLoader = DataLoader.DataLoader(settings)
datasetInstance = DatasetInstance_OurAerial.DatasetInstance_OurAerial(settings, dataLoader, "256_cleanManual")

#"""
paths_2012 = [SOURCE_L]
paths_2015 = [SOURCE_R]
paths_vectors = [SOURCE_Y]

files_paths_2012 = datasetInstance.load_path_lists(paths_2012)
all_2012_png_paths, edge_tile_2012, total_tiles_2012 = datasetInstance.process_path_lists(files_paths_2012, paths_2012)
files_paths_2015 = datasetInstance.load_path_lists(paths_2015)
all_2015_png_paths, _, _ = datasetInstance.process_path_lists(files_paths_2015, paths_2015)

files_vectors = datasetInstance.load_path_lists(paths_vectors)
all_vector_paths = datasetInstance.process_path_lists_for_vectors(files_vectors, paths_vectors, edge_tile_2012, total_tiles_2012)

예제 #28
0
def load_data():
    retval = DataTypes.Data()
    loader = DataLoader.DataLoader("../dataset/")
    #loader.load_subset(retval, 5000)
    loader.load_all(retval)
    return retval