コード例 #1
0
labels = np.expand_dims(labels, axis=-1)
graph_state = np.array([len(x) for x in nodes])

# Make train test split
labels_train, labels_test, nodes_train, nodes_test, edges_train, edges_test, edge_indices_train, edge_indices_test, graph_state_train, graph_state_test = train_test_split(
    labels,
    nodes,
    edges,
    edge_indices,
    graph_state,
    test_size=0.10,
    random_state=42)

# Convert to tf.RaggedTensor or tf.tensor
# a copy of the data is generated by ragged_tensor_from_nested_numpy()
nodes_train, edges_train, edge_indices_train, graph_state_train = ragged_tensor_from_nested_numpy(
    nodes_train), ragged_tensor_from_nested_numpy(
        edges_train), ragged_tensor_from_nested_numpy(
            edge_indices_train), tf.constant(graph_state_train)

nodes_test, edges_test, edge_indices_test, graph_state_test = ragged_tensor_from_nested_numpy(
    nodes_test), ragged_tensor_from_nested_numpy(
        edges_test), ragged_tensor_from_nested_numpy(
            edge_indices_test), tf.constant(graph_state_test)

xtrain = nodes_train, edges_train, edge_indices_train, graph_state_train
xtest = nodes_test, edges_test, edge_indices_test, graph_state_test
ytrain = labels_train
ytest = labels_test

model = make_inorp(
    input_node_shape=[None],
コード例 #2
0
labels = np.array(labels == np.arange(7), dtype=np.float)

# Make test/train split
# Since only one graph in the dataset
# Use a mask to hide test nodes labels
inds = np.arange(len(labels))
ind_train, ind_val = train_test_split(inds, test_size=0.10, random_state=0)
val_mask = np.zeros_like(inds)
train_mask = np.zeros_like(inds)
val_mask[ind_val] = 1
train_mask[ind_train] = 1
val_mask = np.expand_dims(val_mask, axis=0)  # One graph in batch
train_mask = np.expand_dims(train_mask, axis=0)  # One graph in batch

# Make ragged graph tensors with 1 graph in batch
nodes, edges, edge_indices = ragged_tensor_from_nested_numpy(
    [nodes]), ragged_tensor_from_nested_numpy([
        edge_weight
    ]), ragged_tensor_from_nested_numpy([edge_index])  # One graph in batch

# Set training data. But requires mask and batch-dimension of 1
xtrain = nodes, edges, edge_indices
ytrain = np.expand_dims(labels, axis=0)  # One graph in batch

model = make_gat(
    input_node_shape=[None, 1433],
    input_edge_shape=[None, 1],
    # Output
    output_embedd={"output_mode": 'node'},
    output_mlp={
        "use_bias": [True, True, False],
        "units": [64, 32, 7],
コード例 #3
0
labels, nodes, edge_indices, edges, atoms = mutagenicity_graph()
for i in range(len(labels)):
    # edge_indices[i], edges[i] = add_self_loops_to_edge_indices(edge_indices[i], np.expand_dims(edges[i],axis=-1))
    edges[i] = np.expand_dims(edges[i], axis=-1).astype(np.float32)  # Make edge feature dimension
for i in range(len(labels)):
    nodes[i] = np.array(
        np.expand_dims(nodes[i], axis=-1) == np.array([[1, 3, 6, 7, 8, 9, 11, 15, 16, 17, 19, 20, 35, 53]])
        , dtype=np.int)  # Make One-Hot encoding

# Train Test split
labels_train, labels_test, nodes_train, nodes_test, edges_train, edges_test, edge_indices_train, edge_indices_test = train_test_split(
    labels, nodes, edges, edge_indices, train_size=0.8, random_state=1)

# Convert to tf.RaggedTensor or tf.tensor
# a copy of the data is generated by ragged_tensor_from_nested_numpy()
nodes_train, edges_train, edge_indices_train = ragged_tensor_from_nested_numpy(
    nodes_train), ragged_tensor_from_nested_numpy(edges_train), ragged_tensor_from_nested_numpy(
    edge_indices_train)

nodes_test, edges_test, edge_indices_test = ragged_tensor_from_nested_numpy(
    nodes_test), ragged_tensor_from_nested_numpy(edges_test), ragged_tensor_from_nested_numpy(
    edge_indices_test)

xtrain = nodes_train, edges_train, edge_indices_train
xtest = nodes_test, edges_test, edge_indices_test
ytrain = np.expand_dims(labels_train, axis=-1)
ytest = np.expand_dims(labels_test, axis=-1)

model_args = {"input_node_shape": [None, 14],
              "input_edge_shape": [None, 1],
              "input_embedd": {'input_node_vocab': 55, "input_node_embedd": 64},
              # Output
コード例 #4
0
def train_model_energy(i=0, out_dir=None, mode='training'):
    r"""Train an energy model. Uses precomputed feature. Always require scaler.

    Args:
        i (int, optional): Model index. The default is 0.
        out_dir (str, optional): Directory for this training. The default is None.
        mode (str, optional): Fit-mode to take from hyper-parameters. The default is 'training'.

    Raises:
        ValueError: Wrong input shape.

    Returns:
        error_val (list): Validation error for (energy,gradient).
    """
    i = int(i)
    np_eps = np.finfo(float).eps

    # Load everything from folder
    training_config = load_json_file(
        os.path.join(out_dir, mode + "_config.json"))
    model_config = load_json_file(os.path.join(out_dir, "model_config.json"))
    i_train = np.load(os.path.join(out_dir, "train_index.npy"))
    i_val = np.load(os.path.join(out_dir, "test_index.npy"))
    scaler_config = load_json_file(os.path.join(out_dir, "scaler_config.json"))

    # training parameters
    unit_label_energy = training_config['unit_energy']
    epo = training_config['epo']
    batch_size = training_config['batch_size']
    epostep = training_config['epostep']
    initialize_weights = training_config['initialize_weights']
    learning_rate = training_config['learning_rate']
    use_callbacks = training_config['callbacks']
    range_dist = model_config["config"]["schnet_kwargs"]["gauss_args"][
        "distance"]

    # Load data.
    data_dir = os.path.dirname(out_dir)
    xyz = read_xyz_file(os.path.join(data_dir, "geometries.xyz"))
    coords = [np.array(x[1]) for x in xyz]
    atoms = [np.array([global_proton_dict[at] for at in x[0]]) for x in xyz]
    range_indices = [
        define_adjacency_from_distance(coordinates_to_distancematrix(x),
                                       max_distance=range_dist)[1]
        for x in coords
    ]
    y = load_json_file(os.path.join(data_dir, "energies.json"))
    y = np.array(y)

    # Fit stats dir
    dir_save = os.path.join(out_dir, "fit_stats")
    os.makedirs(dir_save, exist_ok=True)

    # cbks,Learning rate schedule
    cbks = []
    for x in use_callbacks:
        if isinstance(x, dict):
            # tf.keras.utils.get_registered_object()
            cb = tf.keras.utils.deserialize_keras_object(x)
            cbks.append(cb)

    # Make Model
    # Only works for Energy model here
    assert model_config[
        "class_name"] == "SchnetEnergy", "Training script only for SchnetEnergy"
    out_model = SchnetEnergy(**model_config["config"])

    # Look for loading weights
    if not initialize_weights:
        out_model.load_weights(os.path.join(out_dir, "model_weights.h5"))
        print("Info: Load old weights at:",
              os.path.join(out_dir, "model_weights.h5"))
    else:
        print("Info: Making new initialized weights.")

    # Recalculate standardization
    scaler = EnergyStandardScaler(**scaler_config["config"])
    scaler.fit(x=None, y=y[i_train])
    _, y1 = scaler.transform(x=None, y=y)

    # Train Test split
    xtrain = [
        ragged_tensor_from_nested_numpy([atoms[i] for i in i_train]),
        ragged_tensor_from_nested_numpy([coords[i] for i in i_train]),
        ragged_tensor_from_nested_numpy([range_indices[i] for i in i_train])
    ]
    xval = [
        ragged_tensor_from_nested_numpy([atoms[i] for i in i_val]),
        ragged_tensor_from_nested_numpy([coords[i] for i in i_val]),
        ragged_tensor_from_nested_numpy([range_indices[i] for i in i_val])
    ]
    ytrain = y1[i_train]
    yval = y1[i_val]

    # Compile model
    # This is only for metric to without std.
    scaled_metric = ScaledMeanAbsoluteError(
        scaling_shape=scaler.energy_std.shape)
    scaled_metric.set_scale(scaler.energy_std)
    optimizer = tf.keras.optimizers.Adam(lr=learning_rate)
    lr_metric = get_lr_metric(optimizer)
    out_model.compile(optimizer=optimizer,
                      loss='mean_squared_error',
                      metrics=[scaled_metric, lr_metric, r2_metric])

    scaler.print_params_info()

    out_model.summary()
    print("")
    print("Start fit.")
    hist = out_model.fit(x=xtrain,
                         y=ytrain,
                         epochs=epo,
                         batch_size=batch_size,
                         callbacks=cbks,
                         validation_freq=epostep,
                         validation_data=(xval, yval),
                         verbose=2)
    print("End fit.")
    print("")

    outname = os.path.join(dir_save, "history.json")
    outhist = {
        a: np.array(b, dtype=np.float64).tolist()
        for a, b in hist.history.items()
    }
    with open(outname, 'w') as f:
        json.dump(outhist, f)

    print("Info: Saving auto-scaler to file...")
    scaler.save_weights(os.path.join(out_dir, "scaler_weights.npy"))

    # Plot and Save
    yval_plot = y[i_val]
    ytrain_plot = y[i_train]
    # Convert back scaler
    pval = out_model.predict(xval)
    ptrain = out_model.predict(xtrain)
    _, pval = scaler.inverse_transform(y=pval)
    _, ptrain = scaler.inverse_transform(y=ptrain)

    print("Info: Predicted Energy shape:", ptrain.shape)
    print("Info: Predicted Gradient shape:", ptrain.shape)
    print("Info: Plot fit stats...")

    # Plot
    plot_loss_curves(hist.history['mean_absolute_error'],
                     hist.history['val_mean_absolute_error'],
                     val_step=epostep,
                     save_plot_to_file=True,
                     dir_save=dir_save,
                     filename='fit' + str(i),
                     filetypeout='.png',
                     unit_loss=unit_label_energy,
                     loss_name="MAE",
                     plot_title="Energy")

    plot_scatter_prediction(pval,
                            yval_plot,
                            save_plot_to_file=True,
                            dir_save=dir_save,
                            filename='fit' + str(i),
                            filetypeout='.png',
                            unit_actual=unit_label_energy,
                            unit_predicted=unit_label_energy,
                            plot_title="Prediction")

    plot_learning_curve(hist.history['lr'],
                        filename='fit' + str(i),
                        dir_save=dir_save)

    # Safe fitting Error MAE
    pval = out_model.predict(xval)
    ptrain = out_model.predict(xtrain)
    _, pval = scaler.inverse_transform(y=pval)
    _, ptrain = scaler.inverse_transform(y=ptrain)

    error_val = np.mean(np.abs(pval - y[i_val]))
    error_train = np.mean(np.abs(ptrain - y[i_train]))
    print("error_val:", error_val)
    print("error_train:", error_train)
    error_dict = {"train": error_train.tolist(), "valid": error_val.tolist()}
    save_json_file(error_dict, os.path.join(out_dir, "fit_error.json"))

    print("Info: Saving model to file...")
    out_model.save_weights(os.path.join(out_dir, "model_weights.h5"))
    out_model.save(os.path.join(out_dir, "model_tf"))

    return error_val
コード例 #5
0
edge_indices, _, angle_indices = datasets.get_angle_index(edge_indices)
# Select LUMO as target and convert into eV from H
# Standardize output with scikit-learn std-scaler
labels = labels[:, 7:8] * 27.2114
scaler = StandardScaler(with_std=False, with_mean=True)
labels = scaler.fit_transform(labels)
data_unit = 'eV'

# Train Test split
labels_train, labels_test, nodes_train, nodes_test, coord_train, coord_test, edge_indices_train, edge_indices_test, angle_indices_train, angle_indices_test = train_test_split(
    labels, nodes, coord, edge_indices, angle_indices, test_size=0.10, random_state=42)
del labels, nodes, coord, edge_indices, angle_indices  # Free memory after split, if possible

# Convert to tf.RaggedTensor or tf.tensor
# a copy of the data is generated by ragged_tensor_from_nested_numpy()
nodes_train, coord_train, edge_indices_train, angle_indices_train = ragged_tensor_from_nested_numpy(
    nodes_train), ragged_tensor_from_nested_numpy(coord_train), ragged_tensor_from_nested_numpy(
    edge_indices_train), ragged_tensor_from_nested_numpy(angle_indices_train)

nodes_test, coord_test, edge_indices_test, angle_indices_test = ragged_tensor_from_nested_numpy(
    nodes_test), ragged_tensor_from_nested_numpy(coord_test), ragged_tensor_from_nested_numpy(
    edge_indices_test), ragged_tensor_from_nested_numpy(angle_indices_test)

# Define input and output data
xtrain = nodes_train, coord_train, edge_indices_train, angle_indices_train
xtest = nodes_test, coord_test, edge_indices_test, angle_indices_test
ytrain = labels_train
ytest = labels_test

# Get Model with matching input and output properties
model = make_dimnet_pp(input_node_shape=[None],
                       input_embedd={'input_node_vocab': 10,
コード例 #6
0
ファイル: schnet_e.py プロジェクト: aimat-lab/NNsForMD
 def to_tensor_input(self, x):
     atoms = ragged_tensor_from_nested_numpy(x[0])
     coords = ragged_tensor_from_nested_numpy(x[1])
     edge_idx = ragged_tensor_from_nested_numpy(x[2])
     return [atoms, coords, edge_idx]