Ejemplo n.º 1
0
def test_qm9():
    dataset = datasets.QM9(amount=100)
    dl = DisjointLoader(dataset, batch_size=batch_size)
    dl.__next__()

    bl = BatchLoader(dataset, batch_size=batch_size)
    bl.__next__()
def train_test_val_data(dataset, epochs=400, batch_size=1, path="../data/"):
    # dataset = CircuitDataset(path=path, transforms=transforms)
    # # Parameters
    # F = dataset.n_node_features  # Dimension of node features
    # n_out = dataset.n_labels  # Dimension of the target

    # Train/valid/test split
    idxs = np.random.permutation(len(dataset))
    split_va, split_te = int(0.6 * len(dataset)), int(0.8 * len(dataset))
    idx_tr, idx_va, idx_te = np.split(idxs, [split_va, split_te])
    print(idx_tr, idx_va, idx_te)

    dataset_tr = dataset[idx_tr]
    dataset_va = dataset[idx_va]
    dataset_te = dataset[idx_te]

    loader_tr = DisjointLoader(dataset_tr,
                               batch_size=batch_size,
                               epochs=epochs,
                               node_level=True)
    loader_va = DisjointLoader(dataset_va,
                               batch_size=batch_size,
                               node_level=True)
    loader_te = DisjointLoader(dataset_te,
                               batch_size=batch_size,
                               node_level=True)
    return loader_tr, loader_va, loader_te
Ejemplo n.º 3
0
def test_qm7():
    dataset = datasets.QM7()
    dl = DisjointLoader(dataset, batch_size=batch_size)
    dl.__next__()

    bl = BatchLoader(dataset, batch_size=batch_size)
    bl.__next__()
Ejemplo n.º 4
0
def test_disjoint():
    data = TestDataset()
    loader = DisjointLoader(data,
                            batch_size=batch_size,
                            epochs=1,
                            shuffle=False)
    batches = list(loader)

    (x, a, e, i), y = batches[-1]
    n = sum(ns[-graphs_in_batch:])
    assert x.shape == (n, f)
    assert a.shape == (n, n)
    assert len(e.shape) == 2 and e.shape[1] == s  # Avoid counting edges
    assert i.shape == (n, )
    assert y.shape == (graphs_in_batch, 2)
    assert loader.steps_per_epoch == np.ceil(len(data) / batch_size)
    signature = loader.tf_signature()
    assert len(signature[0]) == 4
Ejemplo n.º 5
0
def test_tud():
    # Edge labels + edge attributes
    dataset = datasets.TUDataset('BZR_MD', clean=False)
    dl = DisjointLoader(dataset, batch_size=batch_size)
    dl.__next__()

    bl = BatchLoader(dataset, batch_size=batch_size)
    bl.__next__()

    # Node labels + node attributes + clean version
    dataset = datasets.TUDataset('ENZYMES', clean=True)
    dl = DisjointLoader(dataset, batch_size=batch_size)
    dl.__next__()

    bl = BatchLoader(dataset, batch_size=batch_size)
    bl.__next__()
Ejemplo n.º 6
0
def test_disjoint_node():
    data = TestDatasetDsjNode()
    loader = DisjointLoader(
        data, node_level=True, batch_size=batch_size, epochs=1, shuffle=False
    )
    batches = [b for b in loader]

    (x, a, e, i), y = batches[-1]
    n = sum(ns[-graphs_in_batch:])
    assert x.shape == (n, f)
    assert a.shape == (n, n)
    assert len(e.shape) == 2 and e.shape[1] == s  # Avoid counting edges
    assert i.shape == (n,)
    assert y.shape == (n, 2)
    assert loader.steps_per_epoch == np.ceil(len(data) / batch_size)
Ejemplo n.º 7
0
def test_disjoint():
    data = TestDataset()
    loader = DisjointLoader(data,
                            batch_size=batch_size,
                            epochs=1,
                            shuffle=False)
    batches = [b for b in loader]

    (x, a, e, i), y = batches[-1]
    n = sum(ns[-graphs_in_batch:])
    assert x.shape == (n, f)
    assert a.shape == (n, n)
    assert len(e.shape) == 2 and e.shape[1] == s  # Avoid counting edges
    assert i.shape == (n, )
    assert y.shape == (graphs_in_batch, 2)
Ejemplo n.º 8
0
def save_checkpoint(name, model):
    os.makedirs(f'{logdir}/{name}', exist_ok=True)
    loader = DisjointLoader(dataset_tr, batch_size=batch_size, epochs=1)
    all_pred_types = []
    all_actual_types = []
    print('>>> saving checkpoint <<<')
    for batch in loader:
        nodes, adj, edges = batch[0]
        actions, targets, mask = forward(model, *batch, training=False)
        pred_types, actual_types = log_prediction(nodes, targets, actions, mask)
        print('pred_types:', pred_types)
        print('actual_types:', actual_types)

        all_pred_types.extend(pred_types)
        all_actual_types.extend(actual_types)

    unique, counts = np.unique(all_actual_types, return_counts=True)
    label_dist = dict(zip(unique, counts))

    # confusion matrix
    import pandas as pd
    import seaborn as sn
    from matplotlib import pyplot as plt

    all_possible_types = [ i + 1 for i in range(max(*all_actual_types, *all_pred_types)) ]
    actual_df = pd.Categorical(all_actual_types, categories=all_possible_types)
    predicted_df = pd.Categorical(all_pred_types, categories=[*all_possible_types, 'Totals'])
    cm = pd.crosstab(actual_df, predicted_df, rownames=['Actual'], colnames=['Predicted'])

    for idx in all_actual_types:
        if idx not in all_pred_types:
            cm[idx] = 0

    totals = [ sum(row) for (_, row) in cm.iterrows() ]
    cm['Totals'] = totals
    sorted_cols = sorted([ c for c in cm.columns if type(c) is int ])
    sorted_cols.append('Totals')
    cm = cm.reindex(sorted_cols, axis=1)

    sn.heatmap(cm, annot=True)
    plt.title(f'confusion matrix ({name})')
    plt.savefig(f'{logdir}/{name}/confusion_matrix.png')
    plt.clf()

    # save the model(s)
    model.save(f'{logdir}/{name}/model')
Ejemplo n.º 9
0
# Best config
batch_size = 32
learning_rate = 0.01
epochs = 400

# Read data
data = TUDataset('PROTEINS')

# Train/test split
np.random.shuffle(data)
split = int(0.8 * len(data))
data_tr, data_te = data[:split], data[split:]

# Data loader
loader_tr = DisjointLoader(data_tr, batch_size=batch_size, epochs=epochs)
loader_te = DisjointLoader(data_te, batch_size=batch_size)

# Create model
model = GeneralGNN(data.n_labels, activation='softmax')
optimizer = Adam(learning_rate)
model.compile('adam', 'categorical_crossentropy', metrics=['categorical_accuracy'])


# Evaluation function
def evaluate(loader):
    step = 0
    results = []
    for batch in loader:
        step += 1
        loss, acc = model.test_on_batch(*batch)
Ejemplo n.º 10
0
def test_data(construct_dict):
    """
    Train a model given a construction dictionairy
    """

    # Setup Log
    wandblog = construct_dict["wandblog"]
    if wandblog:
        import wandb
        run = wandb.init(project='datagen',
                         entity="chri862z",
                         group=construct_dict["group"],
                         config=construct_dict,
                         reinit=True,
                         settings=wandb.Settings(start_method="fork"))
        wandb.run.name = construct_dict['model_name'] + '_' + construct_dict[
            'experiment_name'] + '_' + str(wandb.run.id)

    import dev.datawhere as dl
    graph_data = dl.graph_data
    dataset_train = graph_data(**construct_dict['data_params'],
                               traintest='train',
                               i_train=construct_dict['data_params']['n_steps']
                               - 1)
    dataset_test = graph_data(**construct_dict['data_params'],
                              traintest='test',
                              i_test=construct_dict['data_params']['n_steps'] -
                              1)
    dataset_val = dataset_test
    batch_size = 512

    print('Loaded datasets')

    loader_train = DisjointLoader(dataset_train,
                                  epochs=1,
                                  batch_size=batch_size)
    loader_test = DisjointLoader(dataset_test, batch_size=batch_size, epochs=1)

    # Define training function
    @tf.function(input_signature=loader_train.tf_signature(),
                 experimental_relax_shapes=True)
    def train_step(inputs, targets):
        with tf.GradientTape() as tape:
            predictions = model(inputs, training=True)
            targets = tf.cast(targets, tf.float32)
            loss = loss_func(predictions, targets)
            loss += sum(model.losses)

        gradients = tape.gradient(loss, model.trainable_variables)
        opt.apply_gradients(zip(gradients, model.trainable_variables))
        return loss

    @tf.function(input_signature=loader_test.tf_signature(),
                 experimental_relax_shapes=True)
    def test_step(inputs, targets):
        predictions = model(inputs, training=False)
        targets = tf.cast(targets, tf.float32)
        out = loss_func(predictions, targets)

        return predictions, targets, out

    print("Data generated, everything looks good!")
    return 1
Ejemplo n.º 11
0
################################################################################
dataset = QM9(amount=1000)  # Set amount=None to train on whole dataset

# Parameters
F = dataset.n_node_features  # Dimension of node features
S = dataset.n_edge_features  # Dimension of edge features
n_out = dataset.n_labels  # Dimension of the target

# Train/test split
idxs = np.random.permutation(len(dataset))
split = int(0.9 * len(dataset))
idx_tr, idx_te = np.split(idxs, [split])
dataset_tr, dataset_te = dataset[idx_tr], dataset[idx_te]

loader_tr = DisjointLoader(dataset_tr,
                           batch_size=batch_size,
                           epochs=epochs,
                           node_level=False)
loader_te = DisjointLoader(dataset_te,
                           batch_size=batch_size,
                           epochs=1,
                           node_level=False)  # load() output:  X, A, E, I

################################################################################
# BUILD MODEL
################################################################################
X_in = Input(shape=(F, ), name='X_in')
A_in = Input(shape=(None, ), sparse=True, name='A_in')
E_in = Input(shape=(S, ), name='E_in')
I_in = Input(shape=(), name='segment_ids_in', dtype=tf.int32)

X_1 = ECCConv(32, activation='relu')([X_in, A_in, E_in])
Ejemplo n.º 12
0
dataset = MyDataset(1000, transforms=NormalizeAdj())

# Parameters
F = dataset.n_node_features  # Dimension of node features
n_out = dataset.n_labels  # Dimension of the target

# Train/valid/test split
idxs = np.random.permutation(len(dataset))
split_va, split_te = int(0.8 * len(dataset)), int(0.9 * len(dataset))
idx_tr, idx_va, idx_te = np.split(idxs, [split_va, split_te])
dataset_tr = dataset[idx_tr]
dataset_va = dataset[idx_va]
dataset_te = dataset[idx_te]

loader_tr = DisjointLoader(dataset_tr, batch_size=batch_size, epochs=epochs)
loader_va = DisjointLoader(dataset_va, batch_size=batch_size)
loader_te = DisjointLoader(dataset_te, batch_size=batch_size)

################################################################################
# BUILD (unnecessarily big) MODEL
################################################################################
X_in = Input(shape=(F, ), name="X_in")
A_in = Input(shape=(None, ), sparse=True)
I_in = Input(shape=(), name="segment_ids_in", dtype=tf.int32)

X_1 = GCSConv(32, activation="relu")([X_in, A_in])
X_1, A_1, I_1 = TopKPool(ratio=0.5)([X_1, A_in, I_in])
X_2 = GCSConv(32, activation="relu")([X_1, A_1])
X_2, A_2, I_2 = TopKPool(ratio=0.5)([X_2, A_1, I_1])
X_3 = GCSConv(32, activation="relu")([X_2, A_2])
Ejemplo n.º 13
0
def analyze_train(construct_dict):
    """
    Train a model given a construction dictionairy
    """

    # Setup Log
    wandblog = construct_dict["wandblog"]
    if wandblog:
        import wandb
        run = wandb.init(project=construct_dict["experiment"],
                         entity="chri862z",
                         group=construct_dict["group"],
                         config=construct_dict,
                         reinit=True,
                         settings=wandb.Settings(start_method="fork"))
        wandb.run.name = construct_dict['model_name'] + '_' + construct_dict[
            'experiment_name'] + '_' + str(wandb.run.id)

    ################################################
    #   Load dataset                              #
    ################################################
    from dev.data_load import graph_data
    #load dataset
    epochs = int(construct_dict['run_params']['epochs'])
    batch_size = int(construct_dict['run_params']['batch_size'])

    dataset = graph_data(**construct_dict['data_params'])

    idx_lists = dataset.index_lists
    # Split data
    dataset_train = dataset[idx_lists[0]]
    dataset_val = dataset[idx_lists[1]]
    dataset_test = dataset[idx_lists[2]]

    loader_train = DisjointLoader(dataset_train,
                                  epochs=epochs,
                                  batch_size=batch_size)
    loader_test = DisjointLoader(dataset_test, batch_size=batch_size, epochs=1)

    ###############################################
    #   Setup other run params                     #
    ################################################

    early_stop = construct_dict['run_params']['early_stop']
    patience = construct_dict['run_params']['patience']
    val_epoch = construct_dict['run_params']['val_epoch']

    print('check')
    ################################################
    #   Setup model, loss, lr schedule and metrics #
    ################################################

    # Get model, metrics, lr_schedule and loss function
    model, model_path = setup_model(construct_dict)
    loss_func = get_loss_func(construct_dict['run_params']['loss_func'])
    metrics = get_metrics(construct_dict['run_params']['metrics'])
    performance_plot = get_performance(
        construct_dict['run_params']['performance_plot'])
    lr_schedule = get_lr_schedule(construct_dict)
    save_path = osp.join(model_path, wandb.run.name)

    if not osp.isdir(save_path):
        os.makedirs(save_path)
        print('New folder for saving run made')

    # Learning rate and optimizer
    learning_rate = next(lr_schedule)
    opt = Adam(learning_rate)

    ################################################
    #   Set up TF functions and validation step   #
    ################################################

    # Define training function
    @tf.function(input_signature=loader_train.tf_signature(),
                 experimental_relax_shapes=True)
    def train_step(inputs, targets):
        with tf.GradientTape() as tape:
            predictions = model(inputs, training=True)
            targets = tf.cast(targets, tf.float32)
            loss = loss_func(predictions, targets)
            loss += sum(model.losses)

        gradients = tape.gradient(loss, model.trainable_variables)
        opt.apply_gradients(zip(gradients, model.trainable_variables))
        return loss

    @tf.function(input_signature=loader_test.tf_signature(),
                 experimental_relax_shapes=True)
    def test_step(inputs, targets):
        predictions = model(inputs, training=False)
        targets = tf.cast(targets, tf.float32)
        out = loss_func(predictions, targets)

        return predictions, targets, out

    def validation(loader):
        loss = 0
        prediction_list, target_list = [], []
        for batch in loader:
            inputs, targets = batch
            predictions, targets, out = test_step(inputs, targets)
            loss += out

            prediction_list.append(predictions)
            target_list.append(targets)

        y_reco = tf.concat(prediction_list, axis=0)
        y_true = tf.concat(target_list, axis=0)
        y_true = tf.cast(y_true, tf.float32)

        loss, loss_from = loss_func(y_reco, y_true, re=True)

        energy, e_old, alpha, zeni, azi = metrics(y_reco, y_true)

        return loss, loss_from, [energy, e_old, alpha, zeni, azi]

    @tf.function(experimental_relax_shapes=True)
    def gradient_importance(inputs, targets, j):
        with tf.GradientTape() as tape:
            tape.watch(inputs[0])
            predictions = model(
                inputs, training=False
            )[:, j]  # needs to be under the gradient tape to be tracked

        grads = tape.gradient(predictions, inputs[0])
        grads = tf.where(tf.math.is_nan(grads), tf.zeros_like(grads), grads)

        grads = tf.math.segment_mean(tf.math.abs(grads), inputs[2], name=None)
        return grads

    ################################################
    #  Train Model                                 #
    ################################################
    options = tf.profiler.experimental.ProfilerOptions(host_tracer_level=3,
                                                       python_tracer_level=1,
                                                       device_tracer_level=1)
    log_dir = 'tmp/board/' + wandb.run.name
    tf.profiler.experimental.start(log_dir, options=options)
    tot_time = 0
    current_batch = 0
    current_epoch = 1
    loss = 0
    lowest_loss = np.inf
    early_stop = 1
    early_stop_counter = 0
    pbar = tqdm(total=loader_train.steps_per_epoch, position=0, leave=True)
    start_time = time.time()
    summarylist = []
    for batch in loader_train:
        inputs, targets = batch
        out = train_step(inputs, targets)
        loss += out
        if current_epoch == 1 and current_batch == 0:
            model.summary()
            if wandblog:
                summary = model.summary(print_fn=summarylist.append)
                table = wandb.Table(columns=["Layers"])
                for s in summarylist:
                    table.add_data(s)
                wandb.log({'Model summary': table})
        current_batch += 1
        pbar.update(1)
        pbar.set_description(
            f"Epoch {current_epoch} / {epochs}; Avg_loss: {loss / current_batch:.6f}"
        )

        if current_batch == loader_train.steps_per_epoch:
            t = time.time() - start_time
            tot_time += t
            print(
                f"Epoch {current_epoch} of {epochs} done in {t:.2f} seconds using learning rate: {learning_rate:.2E}"
            )
            print(
                f"Avg loss of train: {loss / loader_train.steps_per_epoch:.6f}"
            )

            loader_val = DisjointLoader(dataset_val,
                                        epochs=1,
                                        batch_size=batch_size)
            val_loss, val_loss_from, val_metric = validation(loader_val)
            ##################
            ## TensorBoard ###
            ##################

            # tb_callback=tensorflow.keras.callbacks.TensorBoard(log_dir = log_dir,
            #                                      histogram_freq = 1,
            #                                      profile_batch = '500,520')
            # consider to start your looping after a few steps of training, so that the profiling does not consider initialization overhead

            #-------------------------#

            # tb_callback.set_model(model)
            if wandblog:
                wandb.log({
                    "Train Loss": loss / loader_train.steps_per_epoch,
                    "Validation Loss": val_loss,
                    "w(log(E))": val_metric[1],
                    "Energy bias": val_metric[0][1],
                    "Energy sig-1": val_metric[0][0],
                    "Energy sig+1": val_metric[0][2],
                    "Solid angle 68th": val_metric[2][3],
                    "Angle bias": val_metric[2][1],
                    "Angle sig-1": val_metric[2][0],
                    "Angle sig+1": val_metric[2][2],
                    "zenith 68th": val_metric[3][3],
                    "zenith bias": val_metric[3][1],
                    "zenith sig-1": val_metric[3][0],
                    "zenith sig+1": val_metric[3][2],
                    "azimuth 68th": val_metric[4][3],
                    "azimuth bias": val_metric[4][1],
                    "azimuth sig-1": val_metric[4][0],
                    "azimuth sig+1": val_metric[4][2],
                    "Learning rate": learning_rate
                })
            ###gradient_tracker, could possible made in a less sucky way
            grad_dict = {
                'energy': {
                    'dom_x': 1,
                    'dom_y': 1,
                    'dom_z': 1,
                    'time': 1,
                    'logcharge': 1,
                    'SRT': 1
                },
                'zenith': {
                    'dom_x': 1,
                    'dom_y': 1,
                    'dom_z': 1,
                    'time': 1,
                    'logcharge': 1,
                    'SRT': 1
                },
                'azimuth': {
                    'dom_x': 1,
                    'dom_y': 1,
                    'dom_z': 1,
                    'time': 1,
                    'logcharge': 1,
                    'SRT': 1
                },
                'sig_zeni': {
                    'dom_x': 1,
                    'dom_y': 1,
                    'dom_z': 1,
                    'time': 1,
                    'logcharge': 1,
                    'SRT': 1
                },
                'sig_azi': {
                    'dom_x': 1,
                    'dom_y': 1,
                    'dom_z': 1,
                    'time': 1,
                    'logcharge': 1,
                    'SRT': 1
                }
            }

            keys = list(grad_dict.keys())
            feats = list(grad_dict[keys[0]].keys())
            for j in range(len(keys)):
                grads = gradient_importance(inputs, targets, j)
                grads_av = tf.reduce_mean(grads, axis=0)
                grads_av = grads_av / tf.reduce_sum(grads_av)  #softmax
                for i, feat in enumerate(feats):
                    grad_dict[keys[j]][feat] = grads_av[i]
            if wandblog:
                wandb.log(grad_dict)
            print("\n")
            if not construct_dict['run_params']['zeniazi_metric']:
                print(f"Avg loss of validation: {val_loss:.6f}")
                print(
                    f"Loss from:  Energy: {val_loss_from[0]:.6f} \t Angle: {val_loss_from[1]:.6f} "
                )
                print(
                    f"Energy: bias = {val_metric[0][1]:.6f} sig_range = {val_metric[0][0]:.6f}<->{val_metric[0][2]:.6f}, old metric {val_metric[1]:.6f}\
                    \n Angle: bias = {val_metric[2][1]:.6f} sig_range = {val_metric[2][0]:.6f}<->{val_metric[2][2]:.6f}, old metric {val_metric[2][3]:.6f}"
                )
            else:
                print(f"Avg loss of validation: {val_loss:.6f}")
                print(
                    f"Loss from:  Energy: {val_loss_from[0]:.6f} \t Angle: {val_loss_from[1]:.6f} "
                )
                print(
                    f"Energy: bias = {val_metric[0][1]:.6f} sig_range = {val_metric[0][0]:.6f}<->{val_metric[0][2]:.6f}, old metric {val_metric[1]:.6f}\
                    \n Angle: bias = {val_metric[2][1]:.6f} sig_range = {val_metric[2][0]:.6f}<->{val_metric[2][2]:.6f}, old metric {val_metric[2][3]:.6f}\
                    \n Zenith: bias = {val_metric[3][1]:.6f} sig_range = {val_metric[3][0]:.6f}<->{val_metric[3][2]:.6f}, old metric {val_metric[3][3]:.6f}\
                    \n Azimuth: bias = {val_metric[4][1]:.6f} sig_range = {val_metric[4][0]:.6f}<->{val_metric[4][2]:.6f}, old metric {val_metric[4][3]:.6f}"
                )

            if val_loss < lowest_loss:
                early_stop_counter = 0
                lowest_loss = val_loss
            else:
                early_stop_counter += 1
            print(
                f'Early stop counter: {early_stop_counter}/{patience}, lowest val loss was {lowest_loss:.6f}'
            )
            if early_stop and (early_stop_counter >= patience):
                model.save(save_path)
                print(
                    f"Stopped training. No improvement was seen in {patience} epochs"
                )
                return current_epoch

            if current_epoch != epochs:
                pbar = tqdm(total=loader_train.steps_per_epoch,
                            position=0,
                            leave=True)

            learning_rate = next(lr_schedule)
            opt.learning_rate.assign(learning_rate)

            time_avg = tot_time / current_epoch
            if current_epoch % val_epoch == 0:
                model.save(save_path)
                print("Model saved")
                if wandblog:
                    loader_test = DisjointLoader(dataset_test,
                                                 batch_size=batch_size,
                                                 epochs=1)
                    fig, _ = performance_plot(loader_test,
                                              test_step,
                                              metrics,
                                              save=True,
                                              save_path=save_path)
                    title = "performanceplot_" + str(current_epoch)
                    wandb.log({title: [wandb.Image(fig, caption=title)]})

            loss = 0
            start_time = time.time()
            current_epoch += 1
            current_batch = 0
    tf.profiler.experimental.stop()
    return current_epoch
    run.finish()
Ejemplo n.º 14
0
    model = tf.keras.models.load_model(
        f'../from_config/trained_models/IceCube/{args.run}')
    model.compile()
batch_size = 512
#just give the same database as you would normally run it on
dataset =graph_data(n_data=100000,skip=0, restart=0, transform=True,\
                    transform_path='../db_files/muongun/transformers.pkl',
                    db_path= '../db_files/muongun/rasmus_classification_muon_3neutrino_3mio.db')

#../../../../pcs557/databases/dev_lvl7_mu_nu_e_classification_v003----IC8611_oscNext_003_final/data/meta/transformers.pkl
#../../../../pcs557/databases/dev_lvl7_mu_nu_e_classification_v003---IC8611_oscNext_003_final/data/IC8611_oscNext_003_final.db

## get out relevant stuff
train, val, test = dataset.index_lists
dataset_test = dataset[test]
loader = DisjointLoader(dataset_test, batch_size=batch_size, epochs=1)
df_event = dataset.df_event

### define func


@tf.function(input_signature=loader.tf_signature(),
             experimental_relax_shapes=True)
def test_step(inputs, targets):
    predictions = model(inputs, training=False)
    targets = tf.cast(targets, tf.float32)

    return predictions, targets


## def predict func
Ejemplo n.º 15
0
def train_model(construct_dict):
    """
    Train a model given a construction dictionairy
    """

    # Setup Log
    wandblog = construct_dict["wandblog"]
    if wandblog:
        import wandb
        run = wandb.init(project=construct_dict["experiment"],
                         entity="chri862z",
                         group=construct_dict["group"],
                         config=construct_dict)
        wandb.run.name = construct_dict['model_name'] + wandb.run.id

    ################################################
    #   Load dataset                              #
    ################################################
    from dev.data_load import graph_data
    #load dataset
    epochs = construct_dict['run_params']['epochs']
    batch_size = construct_dict['run_params']['batch_size']

    dataset = graph_data(**construct_dict['data_params'])

    idx_lists = dataset.index_lists
    # Split data
    dataset_train = dataset[idx_lists[0]]
    dataset_val = dataset[idx_lists[1]]
    dataset_test = dataset[idx_lists[2]]

    loader_train = DisjointLoader(dataset_train,
                                  epochs=epochs,
                                  batch_size=batch_size)
    loader_test = DisjointLoader(dataset_test, batch_size=batch_size, epochs=1)

    ###############################################
    #   Setup other run params                     #
    ################################################

    early_stop = construct_dict['run_params']['early_stop']
    patience = construct_dict['run_params']['patience']
    val_epoch = construct_dict['run_params']['val_epoch']

    print('check')
    ################################################
    #   Setup model, loss, lr schedule and metrics #
    ################################################

    # Get model, metrics, lr_schedule and loss function
    model, model_path = setup_model(construct_dict)
    loss_func = get_loss_func(construct_dict['run_params']['loss_func'])
    metrics = get_metrics(construct_dict['run_params']['metrics'])
    performance_plot = get_performance(
        construct_dict['run_params']['performance_plot'])
    lr_schedule = get_lr_schedule(construct_dict)
    save_path = model_path + wandb.run.name

    if not osp.isdir(save_path):
        os.makedirs(save_path)
        print('New folder for saving run made')

    # Learning rate and optimizer
    learning_rate = next(lr_schedule)
    opt = Adam(learning_rate)

    ################################################
    #   Set up TF functions and validation step   #
    ################################################

    # Define training function
    @tf.function(input_signature=loader_train.tf_signature(),
                 experimental_relax_shapes=True)
    def train_step(inputs, targets):
        with tf.GradientTape() as tape:
            predictions = model(inputs, training=True)
            targets = tf.cast(targets, tf.float32)
            loss = loss_func(predictions, targets)
            loss += sum(model.losses)

        gradients = tape.gradient(loss, model.trainable_variables)
        opt.apply_gradients(zip(gradients, model.trainable_variables))
        return loss

    @tf.function(input_signature=loader_test.tf_signature(),
                 experimental_relax_shapes=True)
    def test_step(inputs, targets):
        predictions = model(inputs, training=False)
        targets = tf.cast(targets, tf.float32)
        out = loss_func(predictions, targets)

        return predictions, targets, out

    def validation(loader):
        loss = 0
        prediction_list, target_list = [], []
        for batch in loader:
            inputs, targets = batch
            predictions, targets, out = test_step(inputs, targets)
            loss += out

            prediction_list.append(predictions)
            target_list.append(targets)

        y_reco = tf.concat(prediction_list, axis=0)
        y_true = tf.concat(target_list, axis=0)
        y_true = tf.cast(y_true, tf.float32)

        loss, loss_from = loss_func(y_reco, y_true, re=True)

        energy, angle, old = metrics(y_reco, y_true)

        return loss, loss_from, [energy, angle, old]

    ################################################
    #  Train Model                                 #
    ################################################

    tot_time = 0
    current_batch = 0
    current_epoch = 1
    loss = 0
    lowest_loss = np.inf
    early_stop = 1
    early_stop_counter = 0
    pbar = tqdm(total=loader_train.steps_per_epoch, position=0, leave=True)
    start_time = time.time()
    summarylist = []
    for batch in loader_train:
        inputs, targets = batch
        out = train_step(inputs, targets)
        loss += out
        if current_epoch == 1 and current_batch == 0:
            model.summary()
            if wandblog:
                summary = model.summary(print_fn=summarylist.append)
                table = wandb.Table(columns=["Layers"])
                for s in summarylist:
                    table.add_data(s)
                wandb.log({'Model summary': table})
        current_batch += 1
        pbar.update(1)
        pbar.set_description(
            f"Epoch {current_epoch} / {epochs}; Avg_loss: {loss / current_batch:.6f}"
        )

        if current_batch == loader_train.steps_per_epoch:
            t = time.time() - start_time
            tot_time += t
            print(
                f"Epoch {current_epoch} of {epochs} done in {t:.2f} seconds using learning rate: {learning_rate:.2E}"
            )
            print(
                f"Avg loss of train: {loss / loader_train.steps_per_epoch:.6f}"
            )

            loader_val = DisjointLoader(dataset_val,
                                        epochs=1,
                                        batch_size=batch_size)
            val_loss, val_loss_from, val_metric = validation(loader_val)
            if wandblog:
                wandb.log({
                    "Train Loss": loss / loader_train.steps_per_epoch,
                    "Validation Loss": val_loss,
                    "Energy metric": val_metric[2][0],
                    "Energy bias": val_metric[0][1],
                    "Energy sig-1": val_metric[0][0],
                    "Energy sig+1": val_metric[0][2],
                    "Angle metric": val_metric[2][1],
                    "Angle bias": val_metric[1][1],
                    "Angle sig-1": val_metric[1][0],
                    "Angle sig+1": val_metric[1][2],
                    "Learning rate": learning_rate
                })

            print(f"Avg loss of validation: {val_loss:.6f}")
            print(
                f"Loss from:  Energy: {val_loss_from[0]:.6f} \t Angle: {val_loss_from[1]:.6f} "
            )
            print(
                f"Energy: bias = {val_metric[0][1]:.6f} sig_range = {val_metric[0][0]:.6f}<->{val_metric[0][2]:.6f}, old metric {val_metric[2][0]:.6f}\
                \n Angle: bias = {val_metric[1][1]:.6f} sig_range = {val_metric[1][0]:.6f}<->{val_metric[1][2]:.6f}, old metric {val_metric[2][1]:.6f}"
            )

            if val_loss < lowest_loss:
                early_stop_counter = 0
                lowest_loss = val_loss
            else:
                early_stop_counter += 1
            print(
                f'Early stop counter: {early_stop_counter}/{patience}, lowest val loss was {lowest_loss:.6f}'
            )
            if early_stop and (early_stop_counter >= patience):
                model.save(save_path)
                print(
                    f"Stopped training. No improvement was seen in {patience} epochs"
                )
                return current_epoch

            if current_epoch != epochs:
                pbar = tqdm(total=loader_train.steps_per_epoch,
                            position=0,
                            leave=True)

            learning_rate = next(lr_schedule)
            opt.learning_rate.assign(learning_rate)

            time_avg = tot_time / current_epoch
            if current_epoch % val_epoch == 0:
                model.save(save_path)
                print("Model saved")
                if wandblog:
                    loader_test = DisjointLoader(dataset_test,
                                                 batch_size=batch_size,
                                                 epochs=1)
                    fig, ax = performance_plot(loader_test,
                                               test_step,
                                               metrics,
                                               save=True,
                                               save_path=save_path)
                    title = "performanceplot_" + str(current_epoch)
                    wandb.log({title: [wandb.Image(fig, caption=title)]})

            loss = 0
            start_time = time.time()
            current_epoch += 1
            current_batch = 0
    return current_epoch
    run.finish()
Ejemplo n.º 16
0
def train_model(construct_dict):
    """
    Train a model given a construction dictionairy
    """

    # Setup Log
    wandblog = construct_dict["wandblog"]
    if wandblog:
        print('Logging to wandb')
        import wandb
        run = wandb.init(project=construct_dict["experiment"],
                         entity="chri862z",
                         group=construct_dict["group"],
                         config=construct_dict,
                         reinit=True,
                         settings=wandb.Settings(start_method="fork"))
        wandb.run.name = construct_dict['model_name'] + '_' + construct_dict[
            'experiment_name'] + '_' + str(wandb.run.id)

    ################################################
    #   Load dataset                              #
    ################################################
    # import dev.submit_traindata as dl
    # # reload(dl)
    # dataset_train=dl.graph_data(**construct_dict['data_params'])

    import dev.datawhere as dl
    graph_data = dl.graph_data
    dataset_test = graph_data(**construct_dict['data_params'],
                              traintest='test')

    graph_data.traintest = 'train'
    epochs = int(construct_dict['run_params']['epochs'])
    batch_size = int(construct_dict['run_params']['batch_size'])

    print('Loaded datasets')

    loader_test = DisjointLoader(dataset_test, batch_size=batch_size, epochs=1)
    dataset_val = dataset_test

    ###############################################
    #   Setup other run params                     #
    ################################################

    early_stop = construct_dict['run_params']['early_stop']
    patience = construct_dict['run_params']['patience']
    val_epoch = construct_dict['run_params']['val_epoch']

    print('check')
    ################################################
    #   Setup model, loss, lr schedule and metrics #
    ################################################

    # Get model, metrics, lr_schedule and loss function
    if construct_dict['run_params']['retrain_model'] == False:
        model, model_path = setup_model(construct_dict)
    else:
        model_path = osp.join(cwd, "trained_models/IceCube_neutrino",
                              construct_dict['run_params']['retrain_model'])
        model = tf.keras.models.load_model(model_path)
        model.compile()

    loss_func = get_loss_func(construct_dict['run_params']['loss_func'])
    metrics = get_metrics(construct_dict['run_params']['metrics'])
    performance_plot = get_performance(
        construct_dict['run_params']['performance_plot'])
    lr_schedule = get_lr_schedule(construct_dict)
    save_path = osp.join(model_path, wandb.run.name)

    if not osp.isdir(save_path):
        os.makedirs(save_path)
        print('New folder for saving run made')

    # Learning rate and optimizer
    learning_rate = next(lr_schedule)
    opt = Adam(learning_rate)

    ################################################
    #   Set up TF functions and validation step   #
    ################################################

    # Define training function
    @tf.function(input_signature=loader_test.tf_signature(),
                 experimental_relax_shapes=True)
    def train_step(inputs, targets):
        with tf.GradientTape() as tape:
            predictions = model(inputs, training=True)
            targets = tf.cast(targets, tf.float32)
            loss = loss_func(predictions, targets)
            loss += sum(model.losses)

        gradients = tape.gradient(loss, model.trainable_variables)
        opt.apply_gradients(zip(gradients, model.trainable_variables))
        return loss

    @tf.function(input_signature=loader_test.tf_signature(),
                 experimental_relax_shapes=True)
    def test_step(inputs, targets):
        predictions = model(inputs, training=False)
        targets = tf.cast(targets, tf.float32)
        out = loss_func(predictions, targets)

        return predictions, targets, out

    def validation(loader):
        loss = 0
        prediction_list, target_list = [], []
        for batch in loader:
            inputs, targets = batch
            predictions, targets, out = test_step(inputs, targets)
            loss += out

            prediction_list.append(predictions)
            target_list.append(targets)

        y_reco = tf.concat(prediction_list, axis=0)
        y_true = tf.concat(target_list, axis=0)
        y_true = tf.cast(y_true, tf.float32)

        loss, loss_from = loss_func(y_reco, y_true, re=True)

        energy, e_old, alpha, zeni, azi = metrics(y_reco, y_true)

        return loss, loss_from, [energy, e_old, alpha, zeni, azi]

    ################################################
    #  Train Model                                 #
    ################################################
    n_steps = construct_dict['data_params']['n_steps']
    dataset_train = graph_data(**construct_dict['data_params'],
                               traintest='train')
    loader_train = DisjointLoader(dataset_train,
                                  epochs=1,
                                  batch_size=batch_size)
    steps_per_epoch = loader_train.steps_per_epoch
    tot_time = 0
    current_batch = 0
    current_epoch = 1
    loss = 0
    lowest_loss = np.inf
    early_stop = 1
    early_stop_counter = 0
    pbar = tqdm(total=steps_per_epoch * n_steps, position=0, leave=True)
    start_time = time.time()
    summarylist = []
    for j in range(epochs):
        for i in range(n_steps):
            dataset_train = graph_data(**construct_dict['data_params'],
                                       traintest='train',
                                       i_train=i)
            loader_train = DisjointLoader(dataset_train,
                                          epochs=1,
                                          batch_size=batch_size)
            for batch in loader_train:
                inputs, targets = batch
                out = train_step(inputs, targets)
                loss += out
                if current_epoch == 1 and current_batch == 0:
                    model.summary()
                    if wandblog:
                        summary = model.summary(print_fn=summarylist.append)
                        table = wandb.Table(columns=["Layers"])
                        for s in summarylist:
                            table.add_data(s)
                        wandb.log({'Model summary': table})
                current_batch += 1
                pbar.update(1)
                pbar.set_description(
                    f"Epoch {current_epoch} / {epochs}; Avg_loss: {loss / current_batch:.6f}"
                )

                if current_batch == steps_per_epoch * n_steps:
                    # if current_batch == :
                    t = time.time() - start_time
                    tot_time += t
                    print(
                        f"Epoch {current_epoch} of {epochs} done in {t:.2f} seconds using learning rate: {learning_rate:.2E}"
                    )
                    print(
                        f"Avg loss of train: {loss / (steps_per_epoch*n_steps):.6f}"
                    )

                    loader_val = DisjointLoader(dataset_val,
                                                epochs=1,
                                                batch_size=batch_size)
                    val_loss, val_loss_from, val_metric = validation(
                        loader_val)
                    if wandblog:
                        wandb.log({
                            "Train Loss":
                            loss / (steps_per_epoch * n_steps),
                            "Validation Loss":
                            val_loss,
                            "w(log(E))":
                            val_metric[1],
                            "Energy bias":
                            val_metric[0][1],
                            "Energy sig-1":
                            val_metric[0][0],
                            "Energy sig+1":
                            val_metric[0][2],
                            "Solid angle 68th":
                            val_metric[2][3],
                            "Angle bias":
                            val_metric[2][1],
                            "Angle sig-1":
                            val_metric[2][0],
                            "Angle sig+1":
                            val_metric[2][2],
                            "zenith 68th":
                            val_metric[3][3],
                            "zenith bias":
                            val_metric[3][1],
                            "zenith sig-1":
                            val_metric[3][0],
                            "zenith sig+1":
                            val_metric[3][2],
                            "azimuth 68th":
                            val_metric[4][3],
                            "azimuth bias":
                            val_metric[4][1],
                            "azimuth sig-1":
                            val_metric[4][0],
                            "azimuth sig+1":
                            val_metric[4][2],
                            "Learning rate":
                            learning_rate
                        })
                    print("\n")

                    print(f"Avg loss of validation: {val_loss:.6f}")
                    print(
                        f"Loss from:  Energy: {val_loss_from[0]:.6f} \t Zenith: {val_loss_from[1]:.6f} \t Azimuth {val_loss_from[2]:.6f}"
                    )
                    print(
                        f"Energy: bias = {val_metric[0][1]:.6f} sig_range = {val_metric[0][0]:.6f}<->{val_metric[0][2]:.6f}, old metric {val_metric[1]:.6f}\
                        \n Angle: bias = {val_metric[2][1]:.6f} sig_range = {val_metric[2][0]:.6f}<->{val_metric[2][2]:.6f}, old metric {val_metric[2][3]:.6f}\
                        \n Zenith: bias = {val_metric[3][1]:.6f} sig_range = {val_metric[3][0]:.6f}<->{val_metric[3][2]:.6f}, old metric {val_metric[3][3]:.6f}\
                        \n Azimuth: bias = {val_metric[4][1]:.6f} sig_range = {val_metric[4][0]:.6f}<->{val_metric[4][2]:.6f}, old metric {val_metric[4][3]:.6f}"
                    )

                    if val_loss < lowest_loss:
                        early_stop_counter = 0
                        lowest_loss = val_loss
                    else:
                        early_stop_counter += 1
                    print(
                        f'Early stop counter: {early_stop_counter}/{patience}, lowest val loss was {lowest_loss:.6f}'
                    )
                    if early_stop and (early_stop_counter >= patience):
                        model.save(save_path)
                        print(
                            f"Stopped training. No improvement was seen in {patience} epochs"
                        )
                        return current_epoch

                    if current_epoch != epochs:
                        pbar = tqdm(total=steps_per_epoch * n_steps,
                                    position=0,
                                    leave=True)

                    learning_rate = next(lr_schedule)
                    opt.learning_rate.assign(learning_rate)

                    time_avg = tot_time / current_epoch
                    if current_epoch % val_epoch == 0:
                        model.save(save_path)
                        print("Model saved")
                        if wandblog:
                            loader_test = DisjointLoader(dataset_test,
                                                         batch_size=batch_size,
                                                         epochs=1)
                            fig, _ = performance_plot(loader_test,
                                                      test_step,
                                                      metrics,
                                                      bins=20,
                                                      save=True,
                                                      save_path=save_path)
                            title = "performanceplot_" + str(current_epoch)
                            wandb.log(
                                {title: [wandb.Image(fig, caption=title)]})

                    loss = 0
                    start_time = time.time()
                    current_epoch += 1
                    current_batch = 0
    return current_epoch
    run.finish()
Ejemplo n.º 17
0
def pipeline():
    featurearr, simarr, labelarr=load_data()
    xarr, yarr, aarr, edge_attrarr=graphdatageneration(featurearr, simarr, labelarr)

    dataset = MyDataset(xarr,yarr,aarr,edge_attrarr)

    np.random.seed(10)
    # Train/test split
    idxs = np.random.permutation(len(dataset))
    split = int(0.8 * len(dataset))
    idx_tr, idx_te = np.split(idxs, [split])
    dataset_tr, dataset_te = dataset[idx_tr], dataset[idx_te]
    loader_tr = DisjointLoader(dataset_tr, batch_size=32, epochs=30,shuffle=True)
    loader_te = DisjointLoader(dataset_te, batch_size=32, epochs=1,shuffle=True)

    model=buildmodel(dataset)

    opt = optimizers.Adam(lr=learning_rate)
    loss_fn = losses.MeanSquaredError()


    @tf.function(input_signature=loader_tr.tf_signature(), experimental_relax_shapes=True)
    def train_step(inputs, target):
        with tf.GradientTape() as tape:
            predictions = model(inputs, training=True)
            loss = loss_fn(target, predictions)
            mae=losses.MeanAbsoluteError()(target, predictions)
            mape=losses.MeanAbsolutePercentageError()(target, predictions)

            loss += sum(model.losses)
        gradients = tape.gradient(loss, model.trainable_variables)
        opt.apply_gradients(zip(gradients, model.trainable_variables))
        return loss,mae,mape

    print("training")
    current_batch = 0
    model_loss = 0
    total_mape=0
    total_mae=0
    for batch in loader_tr:
        outs,mae,mape= train_step(*batch)

        model_loss += outs
        total_mae+=mae
        total_mape+=mape
        current_batch += 1
        if current_batch == loader_tr.steps_per_epoch:
            print("MSE: {}".format(model_loss / loader_tr.steps_per_epoch),
                  "MAE: {}".format(total_mae/ loader_tr.steps_per_epoch),
                  "MAPE: {}".format(total_mape/ loader_tr.steps_per_epoch))
            model_loss = 0
            total_mae = 0
            total_mape = 0
            current_batch = 0


    print("testing")
    model_loss = 0
    model_mae=0
    model_mape = 0
    for batch in loader_te:
        inputs, target = batch
        predictions = model(inputs, training=False)
        model_loss += loss_fn(target, predictions)
        model_mae += losses.MeanAbsoluteError()(target, predictions)
        model_mape+= losses.MeanAbsolutePercentageError()(target, predictions)

    model_loss /= loader_te.steps_per_epoch
    model_mae /= loader_te.steps_per_epoch
    model_mape /= loader_te.steps_per_epoch
    print("Done. Test MSE: {}".format(model_loss),
          "Test MAE: {}".format(model_mae),
          "Test MAPE: {}".format(model_mape))
    model.save('/home/som/lab/seed-yzj/newpaper4/laboratory/model/fusion.hdf5')
Ejemplo n.º 18
0
# Load data
dataset = datasets.omitted_with_actions(exp_config.files, shuffle=False)
#dataset = dataset[0:2]
#np.set_printoptions(threshold=100000)

# Train/valid/test split
idxs = np.random.permutation(len(dataset))
split_va, split_te = int(0.8 * len(dataset)), int(0.9 * len(dataset))
idx_tr, idx_va, idx_te = np.split(idxs, [split_va, split_te])
dataset_tr = dataset[idx_tr]
dataset_va = dataset[idx_va]
dataset_te = dataset[idx_te]

print('dataset size:', len(dataset))
dataset_tr = dataset  # FIXME: Using "entire" dataset for now
loader_tr = DisjointLoader(dataset_tr, batch_size=batch_size, epochs=epochs)
loader_va = DisjointLoader(dataset_va, batch_size=batch_size)
loader_te = DisjointLoader(dataset_te, batch_size=batch_size)

# Parameters
channels = 8            # Number of channel in each head of the first GAT layer
n_attn_heads = 8        # Number of attention heads in first GAT layer
F = dataset.n_node_features
dropout = 0.6           # Dropout rate for the features and adjacency matrix
dropout = 0.  # FIXME: remove
l2_reg = 5e-6           # L2 regularization rate
learning_rate = exp_config.lr
epochs = exp_config.epochs
es_patience = 100       # Patience for early stopping

# Model definition
Ejemplo n.º 19
0
ogb_dataset = GraphPropPredDataset(name=dataset_name)
dataset = OGB(ogb_dataset)

# Parameters
F = dataset.n_node_features  # Dimension of node features
S = dataset.n_edge_features  # Dimension of edge features
n_out = dataset.n_labels  # Dimension of the target

# Train/test split
idx = ogb_dataset.get_idx_split()
idx_tr, idx_va, idx_te = idx["train"], idx["valid"], idx["test"]
dataset_tr = dataset[idx_tr]
dataset_va = dataset[idx_va]
dataset_te = dataset[idx_te]

loader_tr = DisjointLoader(dataset_tr, batch_size=batch_size, epochs=epochs)
loader_te = DisjointLoader(dataset_te, batch_size=batch_size, epochs=1)

################################################################################
# Build model
################################################################################
X_in = Input(shape=(F,))
A_in = Input(shape=(None,), sparse=True)
E_in = Input(shape=(S,))
I_in = Input(shape=(), dtype=tf.int64)

X_1 = ECCConv(32, activation="relu")([X_in, A_in, E_in])
X_2 = ECCConv(32, activation="relu")([X_1, A_in, E_in])
X_3 = GlobalSumPool()([X_2, I_in])
output = Dense(n_out, activation="sigmoid")(X_3)
Ejemplo n.º 20
0
        "restart":   False,
        "transform_path": "../db_files/dev_lvl7/transformers.pkl",
        "db_path": "../db_files/dev_lvl7/dev_lvl7_mu_nu_e_classification_v003.db",
        "features":   ["dom_x", "dom_y", "dom_z", "dom_time", "charge_log10", "width", "rqe"],
        "targets":    ["energy_log10", "zenith","azimuth","event_no"],
        "database": "submit"
    }
import dev.testtraindata as dl
reload(dl)
graph_data=dl.graph_data
dataset_train=graph_data(**data_params, traintest='train')
dataset_test=graph_data(**data_params, traintest='mix')

dataset_val=dataset_test

loader_train = DisjointLoader(dataset_train, epochs=epochs, batch_size=batch_size) # the different loaders work very very differently, beware
loader_test = DisjointLoader(dataset_test, batch_size=batch_size, epochs=1)

loss_func             = get_loss_func(loss_method)
metrics               = get_metrics('energy_angle_zeniazi')
performance_plot      = get_performance("performance_vM2D")
import dev.lr_schedules as lr_module

lr_generator = getattr(lr_module, 'classic')

lr_schedule  = lr_generator(1e-5, 0, 0.95)()


if wandblog:
    import wandb
    run = wandb.init(project = 'IceCube_neutrino', entity = "chri862z", group='new_loss', reinit=True, settings=wandb.Settings(start_method="fork"))