Exemple #1
0
def main():

    # Set the log file for debuging use
    utils.set_logger(os.path.join(os.getcwd(), 'train.log'))

    logging.info('Loading datasets...')

    data_loader = DataLoader(DATA_PATH)

    X_train, Y_train, X_val, Y_val = data_loader.get_train_data()
    X_test, Y_test = data_loader.get_test_data()

    logging.info('Building the model...')
    my_model = seq2class()  # NEED TO PASS PARAMETERS SHIT

    print("Here is our model: ")
    print(my_model.model.summary())

    logging.info('Training....')
    history = my_model.model.fit(X_train, Y_train, epochs=EPOCHS, verbose=1, batch_size=BATCH_SIZE, validation_data=(X_val, Y_val))

    logging.info(f"train loss: {history.history['loss']}")
    logging.info(f"val loss: {history.history['val_loss']}")

    logging.info(f"train accuracy: {history.history['acc']}")
    logging.info(f"val accuracy: {history.history['val_acc']}")
    # Plotting the loss history #
    plot = utils.Plotting(history)
    # plot.plot_loss()
    # plot.plot_accuracy()

    print('Testing...')
    loss, accuracy = my_model.model.evaluate(X_test, Y_test)
    logging.info('Testing loss', loss)
    logging.info("Test accuracy", accuracy)
def evaluate_from_workspace(workspace_dir):
    global args, data_loader
    """
        Evaluate the model on the test set.
    """
    data_dir = workspace_dir
    model_dir = os.path.join(data_dir, "model")

    # Load the parameters
    args = parser.parse_args()
    json_path = os.path.join(model_dir, 'params.json')
    assert os.path.isfile(
        json_path), "No json configuration file found at {}".format(json_path)
    params = utils.Params(json_path)
    params.data_dir = data_dir if data_dir else args.data_dir
    params.model_dir = model_dir if model_dir else args.model_dir

    # use GPU if available
    params.cuda = torch.cuda.is_available()  # use GPU is available

    # Set the random seed for reproducible experiments
    torch.manual_seed(230)
    if params.cuda: torch.cuda.manual_seed(230)

    # Get the logger
    utils.set_logger(os.path.join(params.model_dir, 'evaluate.log'))

    # Create the input data pipeline
    logging.info("Creating the dataset...")

    # load data
    data_loader = DataLoader(params.data_dir, params)
    data = data_loader.load_data_from_dir(['test'], params.data_dir)
    test_data = data['test']

    # specify the test set size
    params.test_size = test_data['size']
    test_data_iterator = data_loader.data_iterator(test_data, params)

    logging.info("- done.")

    # Define the model
    model = net.Net(params).cuda() if params.cuda else net.Net(params)

    loss_fn = net.loss_fn
    metrics = net.metrics

    logging.info("Starting evaluation")

    # Reload weights from the saved file
    utils.load_checkpoint(
        os.path.join(params.model_dir, args.restore_file + '.pth.tar'), model)

    # Evaluate
    num_steps = (params.test_size + 1) // params.batch_size
    test_metrics = evaluate(model, loss_fn, test_data_iterator, metrics,
                            params, num_steps)
    save_path = os.path.join(params.model_dir,
                             "metrics_test_{}.json".format(args.restore_file))
    utils.save_dict_to_json(test_metrics, save_path)
Exemple #3
0
    def setUp(self):
        self.files = list(Path("../data").iterdir())
        self.files = sorted(
            self.files,
            key=lambda x: int(
                str(x).split("/")[-1].split(".")[0].split("_")[-1]))

        self.dataloader = DataLoader(files=self.files)
Exemple #4
0
from torch import device as dev

MODEL_DIR = 'experiments/base_model/'
DATA_DIR = 'data/'
params = utils.Params(MODEL_DIR + 'params.json')
params.vocab_size = 25
params.number_of_classes = 10
params.cuda = torch.cuda.is_available()

weights = MODEL_DIR + 'best.pth'

model = net.Net(params).cuda() if params.cuda else net.Net(params)
checkpoint = torch.load(weights, map_location=dev('cpu'))
model.load_state_dict(checkpoint['state_dict'])

data_loader = DataLoader(DATA_DIR, params)
data = data_loader.load_data(['train', 'val'], DATA_DIR)
train_data = data['train']
train_data_iterator = data_loader.data_iterator(train_data,
                                                params,
                                                shuffle=True)
train_batch, _ = next(train_data_iterator)

val_data = data['val']
val_data_iterator = data_loader.data_iterator(val_data, params, shuffle=False)
val_batch, _ = next(val_data_iterator)
explainer = shap.KernelExplainer(model.forward, train_batch[:1])
vals = train_batch[:10]

shap_values = explainer.shap_values(train_batch[:10])
shap.force_plot(explainer.expected_value[0], shap_values[0][0],
Exemple #5
0
import test_context
import re

from model.data_loader import DataLoader

dl = DataLoader('data/xml/CoreOnly.xml')

for mon in dl.monsters:
    for atype in [mon.traits, mon.actions, mon.reactions, mon.legendaries]:
        for act in atype:
            if type(act.text) is list:
                t = ''
                for at in act.text:
                    if at is not None:
                        t += at
            elif act.text is not None:
                t = act.text
            if t:
                m = re.search(r'[0-9]+d[0-9]+', t)
                if m and act.attack is None:
                    print('{}: {}'.format(mon.name, act.name))
Exemple #6
0
    # use GPU if available
    params.cuda = torch.cuda.is_available()

    # Set the random seed for reproducible experiments
    # torch.manual_seed(230)
    # if params.cuda:
    #     torch.cuda.manual_seed(230)

    # Set the logger
    utils.set_logger(os.path.join(args.model_dir, 'train.log'))

    # Create the input data pipeline
    logging.info("Loading the datasets...")

    # load data
    data_loader = DataLoader(params)
    data = data_loader.load_data(['train', 'dev'])
    train_data = data['train']
    val_data = data['dev']

    # specify the train and val dataset sizes
    params.train_size = train_data['size']
    params.val_size = val_data['size']

    logging.info("- done.")

    # Define the model and optimizer
    model = net.Net(params).cuda() if params.cuda else net.Net(params)
    optimizer = optim.Adam(model.parameters(), lr=params.learning_rate)

    # fetch loss function and metrics
Exemple #7
0
    # use GPU if available
    params.cuda = torch.cuda.is_available()

    # Set the random seed for reproducible experiments
    # torch.manual_seed(230)
    # if params.cuda: torch.cuda.manual_seed(230)

    # Set the logger
    utils.set_logger(os.path.join(args.model_dir, 'train.log'))

    # Create the input data pipeline
    logging.info("Loading the datasets...")

    # load data
    data_loader = DataLoader(params)
    train_data_path = os.path.join(args.data_dir, 'train', 'train_data.json')
    data_loader.load_data(train_data_path,
                          split='train',
                          size_limit=args.dataset_size_limit)
    # data_loader.split_data(split_ratio=params.split_ratio)
    val_data_path = os.path.join(args.data_dir, 'val', 'val_data.json')
    data_loader.load_data(val_data_path,
                          'val',
                          size_limit=args.dataset_size_limit)

    # specify the train and val dataset sizes( append in data_loader
    params.train_size = data_loader.get_dataset_size('train')
    params.val_size = data_loader.get_dataset_size('val')

    logging.info("- done.")
Exemple #8
0
    params = utils.Params(json_path)

    params.cuda = torch.cuda.is_available()

    torch.manual_seed(230)
    if params.cuda:
        torch.cuda.manual_seed(230)

    # Set the logger
    utils.set_logger(os.path.join(args.model_dir, 'train.log'))

    # Create the input data pipeline
    logging.info("Loading the datasets...")

    # load data
    data_loader = DataLoader('data/', params)
    data = data_loader.load_data(['train', 'val'], 'data/')
    train_data = data['train']
    val_data = data['val']

    # specify the train and val dataset sizes
    params.train_size = train_data['size']
    params.val_size = val_data['size']

    logging.info('- done.')

    # Define the model and optimizer
    model = net.Net(params).cuda() if params.cuda else net.Net(params)
    optimizer = optim.Adam(model.parameters(), lr=params.learning_rate)

    # fetch loss function and metrics
Exemple #9
0
params = utils.Params(json_path)


# In[3]:


# use GPU if available
params.cuda = torch.cuda.is_available()
params.dict


# In[4]:


# load data
data_loader = DataLoader(data_dir, params)
data = data_loader.load_data(['train', 'val', 'test'])
train_data = data['train']
val_data = data['val']
test_data = data['test']

# specify the train and val dataset sizes
params.train_size = train_data['size']
params.val_size = val_data['size']
params.test_size = test_data['size']

params.pad_tag_ind = data_loader.tag_map[params.pad_tag]


# In[5]:
    # use GPU if available
    params.cuda = torch.cuda.is_available()  # use GPU is available

    # Set the random seed for reproducible experiments
    torch.manual_seed(230)
    if params.cuda:
        torch.cuda.manual_seed(230)

    # Get the logger
    utils.set_logger(os.path.join(args.model_dir, 'evaluate.log'))

    # Create the input data pipeline
    logging.info("Creating the dataset...")

    # load data
    data_loader = DataLoader('data/', params)
    data = data_loader.load_data(['test'], 'data/')
    test_data = data['test']

    # specify the test set size
    params.test_size = test_data['size']
    test_data_iterator = data_loader.data_iterator(test_data, params)

    logging.info("- done.")

    # Define the model
    model = net.Net(params).cuda() if params.cuda else net.Net(params)

    loss_fn = net.loss_fn
    metrics = net.metrics
params.number_of_classes = 10
params.cuda = torch.cuda.is_available()

# weights = MODEL_DIR + 'best.pth'
classes = [
    'Extracellular', 'Plastid', 'Cytoplasm', 'Mitochondrion', 'Nucleus',
    'Endoplasmic.reticulum', 'Golgi.apparatus', 'Cell.membrane',
    'Lysosome/Vacuole', 'Peroxisome'
]

# model = net.Net(params).cuda() if params.cuda else net.Net(params)
# checkpoint = torch.load(weights, map_location=dev('cpu'))
# model.load_state_dict(checkpoint['state_dict'])
# model.eval()

loader = DataLoader(DATA_DIR, params)
# data = loader.load_data(['train', 'val'], DATA_DIR)
# train_data = data['train']
# train_data_iterator = loader.data_iterator(train_data, params, shuffle=False)
# train_batch, label_batch = next(train_data_iterator)
# sentences_file = DATA_DIR + 'train/sentences.txt'
# labels_file = DATA_DIR + 'train/labels.txt'
# sentences = []
# with open(sentences_file) as f:
#     for sent in f.read().splitlines():
#         sentences.append(sent)
# labels = []
# with open(labels_file) as f:
#     for lab in f.read().splitlines():
#         labels.append(lab)
token_reference = TokenReferenceBase(reference_token_idx=loader.pad_ind)
Exemple #12
0
    @staticmethod
    def exact_match_score(outputs, data_batch):
        preds = outputs_to_preds(outputs)
        preds = preds_to_answers(preds, data_batch['tkd_c'])
        scores = [
            metric_max_over_ground_truths(exact_match_score, pred,
                                          data_batch['gts'][i])
            for i, pred in enumerate(preds)
        ]
        return np.mean(scores)

    @staticmethod
    def f1_score(outputs, data_batch):
        preds = outputs_to_preds(outputs)
        preds = preds_to_answers(preds, data_batch['tkd_c'])
        scores = [
            metric_max_over_ground_truths(f1_score, pred, data_batch['gts'][i])
            for i, pred in enumerate(preds)
        ]
        return np.mean(scores)


if __name__ == '__main__':
    from utils.model_utils import Params
    params = Params('../model_dir/configs.json')
    params.update('../data/dataset_configs.json')
    from model.data_loader import DataLoader
    data_loader = DataLoader(params)
    _model = Model(params)
    print(_model)
Exemple #13
0
    job_name = "emb{}_lr{}_k{}_bs{}_f{}_dr{}_{}_{}".format(
        params.emb, params.learning_rate, params.kernels, params.batch_size,
        params.filters, params.dropout, params.model, params.task)
    # Create a new folder in parent_dir with unique_name "job_name"
    model_dir = os.path.join(params.save_path, 'fold' + str(params.fold),
                             job_name)
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    # Set the logger
    utils.set_logger(os.path.join(model_dir, 'train.log'))

    logging.info("Loading the datasets...")

    # load data
    data_loader = DataLoader(params.local_data, params)

    data = data_loader.load_data(['train', 'val', 'test'], params.local_data)
    train_data = data['train']
    val_data = data['val']
    test_data = data['test']

    # specify the train and val dataset sizes
    params.train_size = train_data['size']
    params.val_size = val_data['size']

    logging.info("- done.")

    # Define the model and optimizer
    device = torch.device(
        "cuda:0" if params.cuda else sys.exit("gpu unavailable"))
Exemple #14
0
    # Set the logger
    job_name = "emb{}_lr{}_k{}_bs{}_f{}_dr{}_{}_{}".format(
        params.emb, params.learning_rate, params.kernels, params.batch_size,
        params.filters, params.dropout, params.model, params.task)
    # Create a new folder in parent_dir with unique_name "job_name"
    model_dir = os.path.join(params.save_path, 'fold' + str(params.fold),
                             job_name)
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    utils.set_logger(os.path.join(model_dir, 'eval.log'))

    logging.info("Loading the datasets...")

    # load data
    data_loader = DataLoader(params.data_dir, params)

    data = data_loader.load_data(['test'], params.data_dir)
    test_data = data['test']

    # specify the train and val dataset sizes
    params.test_size = test_data['size']

    logging.info("- done.")

    # Define the model and optimizer
    device = torch.device(
        "cuda:0" if params.cuda else sys.exit("gpu unavailable"))
    if params.model == "cnn_text":
        if params.emb == 'w2v':
            model = models.CNN_Text(data_loader.weights_w2v, params)
Exemple #15
0
def predict_from_workspace(workspace_dir, input_data):
    """
        Evaluate the model on the test set.
    """
    global args, data_loader

    data_dir = workspace_dir
    model_dir = os.path.join(data_dir, "model")

    # Load the parameters
    args = parser.parse_args()
    trgt_json_path = os.path.join(model_dir, 'params.json')
    assert os.path.isfile(
        trgt_json_path), "No json configuration file found at {}".format(
            trgt_json_path)

    params = utils.Params(trgt_json_path)
    params.data_dir = data_dir if data_dir else args.data_dir
    params.model_dir = model_dir if model_dir else args.model_dir

    # use GPU if available
    params.cuda = torch.cuda.is_available()  # use GPU is available

    # Set the random seed for reproducible experiments
    torch.manual_seed(230)
    if params.cuda: torch.cuda.manual_seed(230)

    # Get the logger
    utils.set_logger(os.path.join(params.model_dir, 'evaluate.log'))

    # Create the input data pipeline
    logging.info("Creating the dataset...")

    # load data
    data_loader = DataLoader(params.data_dir, params)
    data = data_loader.load_data_for_predict(input_data)
    batch_sentences = data["predict"]["data"]

    # compute length of longest sentence in batch
    batch_max_len = max([len(s) for s in batch_sentences])

    # prepare a numpy array with the data, initialising the data with pad_ind and all labels with -1
    # initialising labels to -1 differentiates tokens with tags from PADding tokens
    batch_data = data_loader.pad_ind * np.ones(
        (len(batch_sentences), batch_max_len))

    # copy the data to the numpy array
    for j in range(len(batch_sentences)):
        cur_len = len(batch_sentences[j])
        batch_data[j][:cur_len] = batch_sentences[j]

    logging.info("- done.")

    # Define the model
    model = net.Net(params).cuda() if params.cuda else net.Net(params)

    logging.info("Starting prediction")

    # Reload weights from the saved file
    utils.load_checkpoint(
        os.path.join(params.model_dir, args.restore_file + '.pth.tar'), model)

    # Evaluate
    results = predict(model, batch_data)

    return results
Exemple #16
0
def train_from_workspace(workspace_dir):
    global args, data_loader

    data_dir = workspace_dir
    model_dir = os.path.join(data_dir, "model")

    # Load the parameters from json file
    args = parser.parse_args()
    src_json_path = os.path.join(args.model_dir, 'params.json')
    assert os.path.isfile(src_json_path), "No json configuration file found at {}".format(src_json_path)

    trgt_json_path = os.path.join(model_dir, 'params.json')
    if not os.path.exists(model_dir):
        print("Workspace Model Directory does not exist! Making directory {}".format(model_dir))
        os.mkdir(model_dir)
    else:
        print("Workspace Model Directory exists! ")

    shutil.copyfile(src_json_path, trgt_json_path)

    params = utils.Params(trgt_json_path)
    params.data_dir = data_dir if data_dir else args.data_dir
    params.model_dir = model_dir if model_dir else args.model_dir

    # use GPU if available
    params.cuda = torch.cuda.is_available()

    # Set the random seed for reproducible experiments
    torch.manual_seed(230)
    if params.cuda: torch.cuda.manual_seed(230)

    # Set the logger
    utils.set_logger(os.path.join(params.model_dir, 'train.log'))

    # Create the input data pipeline
    logging.info("Loading the datasets...")

    # load data
    data_loader = DataLoader(params.data_dir, params)
    data = data_loader.load_data_from_dir(['train', 'val'], params.data_dir)
    train_data = data['train']
    val_data = data['val']

    # specify the train and val dataset sizes
    params.train_size = train_data['size']
    params.val_size = val_data['size']

    logging.info("- done.")

    # Define the model and optimizer
    model = net.Net(params).cuda() if params.cuda else net.Net(params)
    optimizer = optim.Adam(model.parameters(), lr=params.learning_rate)

    # fetch loss function and metrics
    loss_fn = net.loss_fn
    metrics = net.metrics

    # Train the model
    logging.info("Starting training for {} epoch(s)".format(params.num_epochs))
    best_eval_acc = train_and_evaluate(model, train_data, val_data, optimizer, loss_fn, metrics, params, params.model_dir,
                       args.restore_file)

    return best_eval_acc
Exemple #17
0
from model.analize import Analize
from model.data_loader import DataLoader
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import itertools as it
from pandas import DataFrame

X_trained = ['Milk', 'Frozen']
label = ['Customer']
from model.preprocess import Preprocess

data = DataLoader('./data.csv', ';', ',')
file = data.read_file()

prepared = Preprocess().prepare_input(X_trained, file)

model = Analize().fit(prepared)

rs = file['Milk']
rs = rs.to_frame(name='Milk')

rs2 = file['Frozen']
rs2 = rs2.to_frame(name='Frozen')

lab = file['Customer ']
lab = lab.to_frame(name='Customer')

rs3 = pd.DataFrame(model)
rs3.columns = ['Cluster']
Exemple #18
0
    # use GPU if available
    params.cuda = torch.cuda.is_available()     # use GPU is available

    # Set the random seed for reproducible experiments
    torch.manual_seed(230)
    if params.cuda: torch.cuda.manual_seed(230)
        
    # Get the logger
    utils.set_logger(os.path.join(args.model_dir, 'evaluate.log'))

    # Create the input data pipeline
    logging.info("Creating the dataset...")

    # load data
    data_loader = DataLoader(args.data_dir, params)
    data = data_loader.load_data(['test'], args.data_dir)
    test_data = data['test']

    # specify the test set size
    params.test_size = test_data['size']
    test_data_iterator = data_loader.data_iterator(test_data, params)

    logging.info("- done.")

    # Load embeddings
    gen_emb = np.load(os.path.join(args.emb_dir, 'gen.npy'))
    domain_emb = np.load(os.path.join(args.emb_dir, 'domain.npy'))

    # Define the model
    model = net.Net(params, gen_emb, domain_emb).cuda() if params.cuda else net.Net(params, gen_emb, domain_emb)
import test_context

from model.data_loader import DataLoader

dl = DataLoader('data/xml/CoreOnly.xml')
dl.print_stats()
    # use GPU if available
    params.cuda = torch.cuda.is_available()

    # Set the random seed for reproducible experiments
    torch.manual_seed(230)
    if params.cuda: torch.cuda.manual_seed(230)

    # Set the logger
    utils.set_logger(os.path.join(args.model_dir, 'train.log'))

    # Create the input data pipeline
    logging.info("Loading the datasets...")

    # load data
    data_loader = DataLoader(args.data_dir, params)
    data = data_loader.load_data(['train', 'val'], args.data_dir)
    train_data = data['train']
    val_data = data['val']

    # specify the train and val dataset sizes
    params.train_size = train_data['size']
    params.val_size = val_data['size']

    logging.info("- done.")

    # Define the model and optimizer
    model = net.Net(params).cuda() if params.cuda else net.Net(params)
    torch.save(model, 'model_for_visulization.pth')
    optimizer = optim.Adam(model.parameters(), lr=params.learning_rate)
Exemple #21
0
    # use GPU if available
    params.cuda = torch.cuda.is_available()  # use GPU is available

    # Set the random seed for reproducible experiments
    torch.manual_seed(230)
    if params.cuda: torch.cuda.manual_seed(230)

    # Get the logger
    utils.set_logger(os.path.join(args.model_dir, 'evaluate.log'))

    # Create the input data pipeline
    logging.info("Creating the dataset...")

    # load data
    print(params)
    data_loader = DataLoader(params)
    data = data_loader.load_data(args.test_data_path)

    # specify the test set size
    params.test_size = data_loader.get_dataset_size('all')
    test_data_iterator = data_loader.data_iterator(split='all', batch_size=params.batch_size)

    logging.info("- done.")

    # Define the model
    model = net.Model(params).cuda() if params.cuda else net.Model(params)

    loss_fn = model.loss_fn
    metrics = {
        'EM': model.exact_match_score,
        'f1': model.f1_score