Example #1
0
    def createDataloader(self, traindb='Data/dataset_10_12_train_combined.db', benchdb='Data/dataset_10_12_test.db',
                         traindata='../../Data/combined1618/', benchdata='../../Data/test/',
                         indexpath='../../Data/INDEX_refined_data.2016.2018',
                         properties=['KD'], threshold=10, cutoff=8, numVal=150, featureset=False,
                         trainBatchsize=8, valBatchsize=1, benchBatchsize=1, natoms=None, props=False, ntrain=4444, ntest=290, splitfile=None,
                         noProtons=False):
        f = open("log.txt", "a")
        f.writelines(str(datetime.datetime.now()) + ' call of createLoader' + '\n')
        f.close()
        train = schnetpack.data.AtomsData(traindb,
                                          available_properties=properties,
                                          environment_provider=schnetpack.environment.TorchEnvironmentProvider(cutoff,
                                                                                                               torch.device(
                                                                                                                   'cpu')))

        if featureset:
            if len(train) == 0:
                PreprocessingSchnet.createDatabaseFromFeatureset(train,
                                                                 threshold=threshold,
                                                                 featureFile=traindata, length=ntrain, noProtons=noProtons)
        else:
            if (len(train) == 0):
                PreprocessingSchnet.createDatabase(train, threshold=threshold, data_path=traindata,
                                                   index_path=indexpath)

        bench = schnetpack.data.AtomsData(benchdb,
                                          available_properties=properties,
                                          environment_provider=schnetpack.environment.TorchEnvironmentProvider(cutoff,
                                                                                                               torch.device(
                                                                                                                   'cpu')))

        if featureset:
            if len(bench) == 0:
                PreprocessingSchnet.createDatabaseFromFeatureset(bench,
                                                                 threshold=threshold,
                                                                 featureFile=benchdata,
                                                                 length=ntest, noProtons=noProtons)
        else:
            if (len(bench) == 0):
                PreprocessingSchnet.createDatabase(bench, data_path=benchdata, threshold=threshold,
                                                   index_path=indexpath)

        train, val, test = schnetpack.train_test_split(data=train, num_val=numVal, num_train=len(train) - numVal, split_file=splitfile, log='log.txt')

        print(len(train), len(bench), len(val))

        # Create Dataloader for Training
        train_loader = schnetpack.AtomsLoader(train, batch_size=trainBatchsize, shuffle=True, natoms=natoms,
                                              props=props)
        val_loader = schnetpack.AtomsLoader(val, batch_size=valBatchsize, shuffle=False, natoms=natoms, props=props)
        bench_loader = schnetpack.AtomsLoader(bench, batch_size=benchBatchsize, shuffle=False, natoms=natoms, props=props)

        return train_loader, val_loader, bench_loader
Example #2
0
def get_data(args, properties):
    split_file = os.path.join(args.model_dir, "split.npz") if not args.split_file else args.split_file
    dataset = QM9(args.db, load_only=properties)
    train, val, test = spk.train_test_split(
        dataset,
        num_train=args.ntr,
        num_val=args.nva,
        split_file=split_file
    )
    assert len(train) == args.ntr
    assert len(val) == args.nva
    train_loader = spk.AtomsLoader(train, batch_size=args.bs, shuffle=True, num_workers=args.num_workers)
    val_loader = spk.AtomsLoader(val, batch_size=args.bs, num_workers=args.num_workers)
    test_loader = spk.AtomsLoader(test, batch_size=args.bs, num_workers=args.num_workers)
    return dataset, split_file, train_loader, val_loader, test_loader
Example #3
0
def schnet_predict_F(self, indices):
    m = self.curr_model

    test = self.dataset.create_subset(indices)

    ind0 = indices[0]

    import schnetpack as spk

    test_loader = spk.AtomsLoader(test, batch_size=100)
    preds = []

    import torch

    if torch.cuda.is_available():
        device = "cuda"
    else:
        device = "cpu"

    for count, batch in enumerate(test_loader):
        print(f"{count}/{len(test_loader)}", end="\r")

        batch = {k: v.to(device) for k, v in batch.items()}
        preds.append(m(batch)["forces"].detach().cpu().numpy())

    F = np.concatenate(preds)
    return F.reshape(len(F), -1)
Example #4
0
def predict_E_MD17(self, indices):
    m = self.MD17_model
    test = self.dataset.create_subset(indices)

    import schnetpack as spk
    test_loader = spk.AtomsLoader(test, batch_size=1000)
    preds = []
    for count, batch in enumerate(test_loader):
        #TBA currently only supports single batch
        preds.append(m(batch)['energy'].detach().cpu().numpy())

    return np.concatenate(preds)
def test_set_stds():
    dataset = QM9('qm9.db')
    _, _, test = spk.train_test_split(
        dataset,
        num_train=109000,
        num_val=1000,
        split_file='pst.npz'
    )
    test_loader = spk.AtomsLoader(test, batch_size=len(test))
    for item in test_loader:
        break
    outs = {tn: item[tn].std() for tn in TARGET_NAMES}
    return outs
def gnn_pred(cif_file):

    # device = torch.device("cuda" if args.cuda else "cpu")
    device = "cpu"
    sch_model = torch.load(os.path.join("./schnetpack/model", 'best_model'),
                           map_location=torch.device(device))
    test_dataset = AtomsData('./cod_predict.db')
    test_loader = spk.AtomsLoader(test_dataset, batch_size=32)
    prediction_list = []
    for count, batch in enumerate(test_loader):

        # move batch to GPU, if necessary
        print('before batch')
        batch = {k: v.to(device) for k, v in batch.items()}
        print('after batch')
        # apply model
        pred = sch_model(batch)
        prediction_list.extend(
            pred['band_gap'].detach().cpu().numpy().flatten().tolist())

    return prediction_list[0]
Example #7
0
def schnet_predict_E(self, indices):
    m = self.curr_model
    test = self.dataset.create_subset(indices)

    import schnetpack as spk
    test_loader = spk.AtomsLoader(test, batch_size=100)
    preds = []

    import torch
    if torch.cuda.is_available():
        device = 'cuda'
    else:
        device = 'cpu'

    for count, batch in enumerate(test_loader):
        print(f'{count}/{len(test_loader)}', end='\r')

        batch = {k: v.to(device) for k, v in batch.items()}
        preds.append(m(batch)['energy'].detach().cpu().numpy())

    return np.concatenate(preds)
Example #8
0

logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"))

# basic settings
model_dir = "qm9_model"
os.makedirs(model_dir)
properties = [QM9.U0]

# data preparation
logging.info("get dataset")
dataset = QM9("data/qm9.db", properties=[QM9.U0])
train, val, test = spk.train_test_split(
    dataset, 1000, 100, os.path.join(model_dir, "split.npz")
)
train_loader = spk.AtomsLoader(train, batch_size=64)
val_loader = spk.AtomsLoader(val, batch_size=64)

# statistics
atomrefs = dataset.get_atomrefs(properties)
means, stddevs = train_loader.get_statistics(
    properties, per_atom=True, atomrefs=atomrefs
)

# model build
logging.info("build model")
representation = spk.SchNet(n_interactions=6)
output_modules = [
    spk.Atomwise(
        property=QM9.U0,
        mean=means[QM9.U0],
Example #9
0
                                                     length=4444)

bench = schnetpack.data.AtomsData('Data/dataset_10_12_test_features.db', available_properties=['KD', 'props'],
                                  environment_provider=schnetpack.environment.TorchEnvironmentProvider(10.,
                                                                                                       torch.device(
                                                                                                           'cpu')))
if len(bench) == 0:
    PreprocessingSchnet.createDatabaseFromFeatureset(bench, featureFile='Data/Schnet/testSchnetKDeep', threshold=12,
                                                     length=290)

train, val, test = schnetpack.train_test_split(data=train, num_val=150, num_train=len(train) - 150)

print(len(train), len(bench), len(val))

# Create Dataloader for Training
train_loader = schnetpack.AtomsLoader(train, batch_size=8, shuffle=True, natoms=None, props=False)
val_loader = schnetpack.AtomsLoader(val, batch_size=1, shuffle=False, natoms=None, props=False)

# Call ShiftedSigmoid for Activation -> You can use, but you do not have to
act = ShiftedSigmoid()

# Create Model and Optimizer -> Please Note, that here a modified Fork from the original schnetpack is used
# This allows a Noise on the Positions, also the reducing Featurevektor in interaction-layer is part of the modification
model = schnetpack.representation.SchNet(use_noise=False, noise_mean=0.0, noise_std=0.1, chargeEmbedding = True,
                                         ownFeatures = False, nFeatures = 8, finalFeature = None,
                                         max_z=200, n_atom_basis=20, n_filters=[32, 24, 16, 8, 4], n_gaussians=25,
                                         normalize_filter=False, coupled_interactions=False, trainable_gaussians=False,
                                         n_interactions=5, cutoff=2.5,
                                         cutoff_network=schnetpack.nn.cutoff.CosineCutoff)
# Modification of schnetpack allows an activation-function on the output of the output-network
d = schnetpack.atomistic.Atomwise(n_in=20, aggregation_mode='avg',
Example #10
0
def main(args):

    #building model and dataset
    device = torch.device("cuda" if args.cuda else "cpu")
    environment_provider = spk.environment.AseEnvironmentProvider(cutoff=5.0)
    omdb = './omdb'

    if args.mode == "train":

        if not os.path.exists(os.path.join(args.model_path)):
            os.makedirs(args.model_path)

        spk.utils.spk_utils.set_random_seed(None)
        if not os.path.exists('omdb'):
            os.makedirs(omdb)

        omdData = OrganicMaterialsDatabase(
            args.datapath,
            download=False,
            load_only=[args.property],
            environment_provider=environment_provider)
        # split_path = os.path.join(args.model_path, "split.npz")
        split_path = os.path.join(
            '/home/s3754715/gnn_molecule/schnetpack/model_2020-06-23-18-44-59',
            "split.npz")
        train, val, test = spk.train_test_split(data=omdData,
                                                num_train=9000,
                                                num_val=1000,
                                                split_file=split_path)
        print('-----------')
        print(len(train))
        print(len(val))
        print(len(test))
        print('-------------')
        train_loader = spk.AtomsLoader(train,
                                       batch_size=16,
                                       sampler=RandomSampler(train),
                                       num_workers=4
                                       #pin_memory=True
                                       )
        val_loader = spk.AtomsLoader(val, batch_size=16, num_workers=2)
        test_loader = spk.AtomsLoader(test, batch_size=16, num_workers=2)
        atomref = omdData.get_atomref(args.property)
        mean, stddev = get_statistics(
            args=args,
            split_path=split_path,
            train_loader=train_loader,
            atomref=atomref,
            divide_by_atoms=get_divide_by_atoms(args),
            logging=logging)
        # means, stddevs = train_loader.get_statistics(
        # 	args.property, get_divide_by_atoms(args),atomref
        # )
        model_train = model(args, omdData, atomref, mean, stddev)
        trainer = train_model(args, model_train, train_loader, val_loader)
        print('started training')
        trainer.train(device=device, n_epochs=args.n_epochs)
        print('training finished')
        sch_model = torch.load(os.path.join(args.model_path, 'best_model'))

        err = 0
        sch_model.eval()
        for count, batch in enumerate(test_loader):
            # move batch to GPU, if necessary
            batch = {k: v.to(device) for k, v in batch.items()}

            # apply model
            pred = sch_model(batch)

            # calculate absolute error
            tmp = torch.sum(
                torch.abs(pred[args.property] - batch[args.property]))
            tmp = tmp.detach().cpu().numpy(
            )  # detach from graph & convert to numpy
            err += tmp
            print(tmp)
            # log progress
            percent = '{:3.2f}'.format(count / len(test_loader) * 100)
            print('Progress:',
                  percent + '%' + ' ' * (5 - len(percent)),
                  end="\r")

        err /= len(test)
        print('Test MAE', np.round(err, 3), 'eV =',
              np.round(err / (kcal / mol), 3), 'kcal/mol')

        #plot results
        plot_results(args)

    elif args.mode == "pred":
        print('predictionsss')
        sch_model = torch.load(os.path.join(args.model_path, 'best_model'),
                               map_location=torch.device(device))
        #reading test data
        # test_dataset = AtomsData('./cod_predict.db')
        # test_loader = spk.AtomsLoader(test_dataset, batch_size=32)

        #reading stored cod list
        #cod_list = np.load('./cod_id_list_old.npy')
        omdData = OrganicMaterialsDatabase(
            args.datapath,
            download=True,
            load_only=[args.property],
            environment_provider=environment_provider)
        split_path = os.path.join(args.model_path, "split.npz")
        train, val, test = spk.train_test_split(data=omdData,
                                                num_train=9000,
                                                num_val=1000,
                                                split_file=split_path)
        print(len(test))
        test_loader = spk.AtomsLoader(
            test,
            batch_size=32,  #num_workers=2
        )
        mean_abs_err = 0
        prediction_list = []
        actual_value_list = []
        print('Started generating predictions')
        for count, batch in enumerate(test_loader):

            # move batch to GPU, if necessary
            print('before batch')
            batch = {k: v.to(device) for k, v in batch.items()}
            print('after batch')
            # apply model
            pred = sch_model(batch)
            prediction_list.extend(
                pred['band_gap'].detach().cpu().numpy().flatten().tolist())
            actual_value_list.extend(
                batch['band_gap'].detach().cpu().numpy().flatten().tolist())
            # log progress
            percent = '{:3.2f}'.format(count / len(test_loader) * 100)
            print('Progress:',
                  percent + '%' + ' ' * (5 - len(percent)),
                  end="\r")

        cod_arr = np.genfromtxt(
            os.path.join(
                '/home/s3754715/gnn_molecule/schnetpack/dataset/OMDB-GAP1_v1.1',
                'CODids.csv'))
        cod_list = cod_arr[10000:].tolist()
        results_df = pd.DataFrame({
            'cod': cod_list,
            'prediction': prediction_list,
            'actual': actual_value_list
        })
        results_df.to_csv('./predictions.csv')
Example #11
0
def run(split_path,dataset_path,n_train=None,n_val=None,n_epochs=1000):

	storage_dir="Info"
	if not os.path.exists(storage_dir):
		os.makedirs(storage_dir)

	if os.path.exists(os.path.join(storage_dir,"checkpoints")):
		shutil.rmtree(os.path.join(storage_dir,"checkpoints"))

	if os.path.exists(os.path.join(storage_dir,"log.csv")):
		os.remove(os.path.join(storage_dir,"log.csv"))

	if os.path.exists(os.path.join(storage_dir,"best_model")):
		os.remove(os.path.join(storage_dir,"best_model"))

	data=MD17(dataset_path)

	atoms,properties=data.get_properties(0)

	train,val,test=spk.train_test_split(
		data=data,
		split_file=split_path,
		)
	
	train_loader = spk.AtomsLoader(train, batch_size=100, shuffle=True)
	val_loader = spk.AtomsLoader(val, batch_size=100)

	means, stddevs = train_loader.get_statistics(
		spk.datasets.MD17.energy, divide_by_atoms=True
	)

	with open("out.txt","w+") as file:
		file.write("IN MD17_train")

	print('Mean atomization energy / atom:      {:12.4f} [kcal/mol]'.format(means[MD17.energy][0]))
	print('Std. dev. atomization energy / atom: {:12.4f} [kcal/mol]'.format(stddevs[MD17.energy][0]))

	n_features=64
	schnet = spk.representation.SchNet(
		n_atom_basis=n_features,
		n_filters=n_features,
		n_gaussians=25,
		n_interactions=6,
		cutoff=5.,
		cutoff_network=spk.nn.cutoff.CosineCutoff
	)


	energy_model = spk.atomistic.Atomwise(
		n_in=n_features,
		property=MD17.energy,
		mean=means[MD17.energy],
		stddev=stddevs[MD17.energy],
		derivative=MD17.forces,
		negative_dr=True
	)

	model = spk.AtomisticModel(representation=schnet, output_modules=energy_model)

	# tradeoff
	rho_tradeoff = 0.1
	optimizer=Adam(model.parameters(),lr=1e-3)

	# loss function
	def loss(batch, result):
		# compute the mean squared error on the energies
		diff_energy = batch[MD17.energy]-result[MD17.energy]
		err_sq_energy = torch.mean(diff_energy ** 2)

		# compute the mean squared error on the forces
		diff_forces = batch[MD17.forces]-result[MD17.forces]
		err_sq_forces = torch.mean(diff_forces ** 2)

		# build the combined loss function
		err_sq = rho_tradeoff*err_sq_energy + (1-rho_tradeoff)*err_sq_forces

		return err_sq


	# set up metrics
	metrics = [
		spk.metrics.MeanAbsoluteError(MD17.energy),
		spk.metrics.MeanAbsoluteError(MD17.forces)
	]

	# construct hooks
	hooks = [
		trn.CSVHook(log_path=storage_dir, metrics=metrics),
		trn.ReduceLROnPlateauHook(
			optimizer,
			patience=150, factor=0.8, min_lr=1e-6,
			stop_after_min=True
		)
	]

	trainer = trn.Trainer(
		model_path=storage_dir,
		model=model,
		hooks=hooks,
		loss_fn=loss,
		optimizer=optimizer,
		train_loader=train_loader,
		validation_loader=val_loader,
	)

	# check if a GPU is available and use a CPU otherwise
	if torch.cuda.is_available():
		device = "cuda"
	else:
		device = "cpu"

	# determine number of epochs and train
	trainer.train(
		device=device,
		n_epochs=n_epochs 
		)

	os.rename(os.path.join(storage_dir,"best_model"),os.path.join(storage_dir,"model_new"))
Example #12
0
def schnet_train_default(self, train_indices, model_path, old_model_path,
                         schnet_args):

    import schnetpack as spk
    import schnetpack.train as trn
    import torch

    n_val = schnet_args.get("n_val", 100)

    #  LOADING train, val, test
    if type(train_indices) == int:
        n_train = train_indices

        # Preparing storage
        storage = os.path.join(self.temp_dir, f"schnet_{n_train}")
        if not os.path.exists(storage):
            os.mkdir(storage)
        split_path = os.path.join(storage, "split.npz")

        train, val, test = spk.train_test_split(data=self.dataset,
                                                num_train=n_train,
                                                num_val=n_val,
                                                split_file=split_path)

    else:
        n_train = len(train_indices)

        # Preparing storage
        storage = os.path.join(self.temp_dir, f"schnet_{n_train}")
        if not os.path.exists(storage):
            os.mkdir(storage)
        split_path = os.path.join(storage, "split.npz")

        all_ind = np.arange(len(self.dataset))

        #  train
        train_ind = train_indices
        all_ind = np.delete(all_ind, train_ind)

        # val
        val_ind_ind = np.random.choice(np.arange(len(all_ind)),
                                       n_val,
                                       replace=False)
        val_ind = all_ind[val_ind_ind]
        all_ind = np.delete(all_ind, val_ind_ind)

        split_dict = {
            "train_idx": train_ind,
            "val_idx": val_ind,
            "test_idx": all_ind,
        }
        np.savez_compressed(split_path, **split_dict)

        train, val, test = spk.train_test_split(data=self.dataset,
                                                split_file=split_path)

    print_ongoing_process(f"Preparing SchNet training, {len(train)} points",
                          True)

    data = self.dataset

    batch_size = schnet_args.get("batch_size", 10)
    n_features = schnet_args.get("n_features", 64)
    n_gaussians = schnet_args.get("n_gaussians", 25)
    n_interactions = schnet_args.get("n_interactions", 6)
    cutoff = schnet_args.get("cutoff", 5.0)
    learning_rate = schnet_args.get("learning_rate", 1e-3)
    rho_tradeoff = schnet_args.get("rho_tradeoff", 0.1)
    patience = schnet_args.get("patience", 5)
    n_epochs = schnet_args.get("n_epochs", 100)

    #  PRINTING INFO
    i = {}
    i["batch_size"], i["n_features"] = batch_size, n_features
    i["n_gaussians"], i["n_interactions"] = n_gaussians, n_interactions
    i["cutoff"], i["learning_rate"] = cutoff, learning_rate
    i["rho_tradeoff"], i["patience"] = rho_tradeoff, patience
    i["n_epochs"], i["n_val"] = n_epochs, n_val
    print_table("Parameters", None, None, i, width=20)
    print()

    train_loader = spk.AtomsLoader(train, shuffle=True, batch_size=batch_size)
    val_loader = spk.AtomsLoader(val, batch_size=batch_size)

    #  STATISTICS + PRINTS
    means, stddevs = train_loader.get_statistics("energy",
                                                 divide_by_atoms=True)
    print_info(
        "Mean atomization energy / atom:      {:12.4f} [kcal/mol]".format(
            means["energy"][0]))
    print_info(
        "Std. dev. atomization energy / atom: {:12.4f} [kcal/mol]".format(
            stddevs["energy"][0]))

    #  LOADING MODEL
    print_ongoing_process("Loading representation and model")
    schnet = spk.representation.SchNet(
        n_atom_basis=n_features,
        n_filters=n_features,
        n_gaussians=n_gaussians,
        n_interactions=n_interactions,
        cutoff=cutoff,
        cutoff_network=spk.nn.cutoff.CosineCutoff,
    )

    energy_model = spk.atomistic.Atomwise(
        n_in=n_features,
        property="energy",
        mean=means["energy"],
        stddev=stddevs["energy"],
        derivative="forces",
        negative_dr=True,
    )

    model = spk.AtomisticModel(representation=schnet,
                               output_modules=energy_model)
    print_ongoing_process("Loading representation and model", True)

    #  OPTIMIZER AND LOSS
    print_ongoing_process("Defining loss function and optimizer")
    from torch.optim import Adam

    optimizer = Adam(model.parameters(), lr=learning_rate)

    def loss(batch, result):

        # compute the mean squared error on the energies
        diff_energy = batch["energy"] - result["energy"]
        err_sq_energy = torch.mean(diff_energy**2)

        # compute the mean squared error on the forces
        diff_forces = batch["forces"] - result["forces"]
        err_sq_forces = torch.mean(diff_forces**2)

        # build the combined loss function
        err_sq = rho_tradeoff * err_sq_energy + (1 -
                                                 rho_tradeoff) * err_sq_forces

        return err_sq

    print_ongoing_process("Defining loss function and optimizer", True)

    # METRICS AND HOOKS
    print_ongoing_process("Setting up metrics and hooks")
    metrics = [
        spk.metrics.MeanAbsoluteError("energy"),
        spk.metrics.MeanAbsoluteError("forces"),
    ]

    hooks = [
        trn.CSVHook(log_path=storage, metrics=metrics),
        trn.ReduceLROnPlateauHook(optimizer,
                                  patience=5,
                                  factor=0.8,
                                  min_lr=1e-6,
                                  stop_after_min=True),
    ]
    print_ongoing_process("Setting up metrics and hooks", True)

    print_ongoing_process("Setting up trainer")

    trainer = trn.Trainer(
        model_path=storage,
        model=model,
        hooks=hooks,
        loss_fn=loss,
        optimizer=optimizer,
        train_loader=train_loader,
        validation_loader=val_loader,
    )

    print_ongoing_process("Setting up trainer", True)

    if torch.cuda.is_available():
        device = "cuda"
        print_info(f"Cuda cores found, training on GPU")

    else:
        device = "cpu"
        print_info(f"No cuda cores found, training on CPU")

    print_ongoing_process(f"Training {n_epochs} ecpochs, out in {storage}")
    trainer.train(device=device, n_epochs=n_epochs)
    print_ongoing_process(f"Training {n_epochs} epochs, out in {storage}",
                          True)

    os.mkdir(model_path)

    os.rename(os.path.join(storage, "best_model"),
              os.path.join(model_path, "model"))
    shutil.copy(split_path, os.path.join(model_path, "split.npz"))
Example #13
0
from schnetpack.train.metrics import MeanAbsoluteError
from schnetpack.train import build_mse_loss

logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"))

# basic settings
model_dir = "qm9_model"
os.makedirs(model_dir)
properties = [QM9.U0]

# data preparation
logging.info("get dataset")
dataset = QM9("data/qm9.db", load_only=[QM9.U0])
train, val, test = spk.train_test_split(dataset, 1000, 100,
                                        os.path.join(model_dir, "split.npz"))
train_loader = spk.AtomsLoader(train, batch_size=64, shuffle=True)
val_loader = spk.AtomsLoader(val, batch_size=64)

# statistics
atomrefs = dataset.get_atomrefs(properties)
means, stddevs = train_loader.get_statistics(properties,
                                             get_atomwise_statistics=True,
                                             single_atom_ref=atomrefs)

# model build
logging.info("build model")
representation = spk.SchNet(n_interactions=6)
output_modules = [
    spk.Atomwise(
        property=QM9.U0,
        mean=means[QM9.U0],
Example #14
0
def main(args):
    print('predictionsss')
    device = torch.device("cuda" if args.cuda else "cpu")
    environment_provider = spk.environment.AseEnvironmentProvider(cutoff=5.0)

    sch_model = torch.load(os.path.join(args.model_path, 'best_model'),
                           map_location=torch.device(device))

    # sch_model.representation.embedding.register_forward_hook(inputExtract)
    sch_model.output_modules[0].out_net[1].out_net[1].register_forward_hook(
        outputExtract)

    # for name, module in sch_model.named_modules():
    # 	print(name)

    #reading test data
    # test_dataset = AtomsData('./cod_predict.db')
    # test_loader = spk.AtomsLoader(test_dataset, batch_size=32)

    #reading stored cod list
    #cod_list = np.load('./cod_id_list_old.npy')
    omdData = OrganicMaterialsDatabase(
        args.datapath,
        download=False,
        load_only=[args.property],
        environment_provider=environment_provider)
    split_path = os.path.join(args.model_path, "split.npz")
    train, val, test = spk.train_test_split(data=omdData,
                                            num_train=9000,
                                            num_val=1000,
                                            split_file=split_path)
    #constructing chemical formula and COD array
    for rec_num in range(0, len(omdData)):
        chem_formula = omdData.get_atoms(idx=rec_num).get_chemical_formula()
        formula_dict[chem_formula] = rec_num

    cod_array = np.genfromtxt(os.path.join(
        '/home/s3754715/gnn_molecule/schnetpack/dataset/OMDB-GAP1_v1.1',
        'CODids.csv'),
                              delimiter=',',
                              dtype=(int))

    #to fetch the COD using chemical formula from dictionary at random ids to construct data to print in graph
    for id in [0, 10, 20, 50]:
        atom_id_input_arr.append(test.get_atoms(idx=id).get_chemical_symbols())
        chem_formula = test.get_atoms(idx=id).get_chemical_formula()
        cod.append(cod_array[formula_dict[chem_formula]])
        print(test[id]['_neighbors'].numpy().shape)
        neighbour_list.append(test[id]['_neighbors'].numpy().tolist())

    test_loader = spk.AtomsLoader(
        test,
        batch_size=1,  #num_workers=2
    )
    mean_abs_err = 0
    prediction_list = []
    actual_value_list = []
    print('Started generating predictions')
    #to stop pred after reaching max rec_ct and start constructing graph
    rec_id = 0
    for count, batch in enumerate(test_loader):

        rec_id += 1
        if (rec_id > 51):
            break
        # move batch to GPU, if necessary
        batch = {k: v.to(device) for k, v in batch.items()}
        # apply model
        pred = sch_model(batch)
        prediction_list.extend(
            pred['band_gap'].detach().cpu().numpy().flatten().tolist())
        actual_value_list.extend(
            batch['band_gap'].detach().cpu().numpy().flatten().tolist())
        # log progress
        percent = '{:3.2f}'.format(count / len(test_loader) * 100)
        print('Progress:', percent + '%' + ' ' * (5 - len(percent)), end="\r")

    constGraph()
Example #15
0
# %%
# import matplotlib.pyplot as plt
# from ase.visualize import view
#
# view(atoms, viewer='x3d')
# plt.show()
# %%
train, val, test = spk.train_test_split(
    data=cspbbr3_data,
    num_train=100 - 000,
    num_val=100 - 000,
    split_file=os.path.join(forcetut, "split.npz"),
)

train_loader = spk.AtomsLoader(train, batch_size=batch_size, shuffle=True)
val_loader = spk.AtomsLoader(val, batch_size=batch_size)

# print(next(iter(train_loader)))
# %%
means, stddevs = train_loader.get_statistics(
    'energy',  # divide_by_atoms=True
)

print('Mean atomization energy / atom:      {:12.4f} [eV]'.format(
    means['energy'][0]))
print('Std. dev. atomization energy / atom: {:12.4f} [eV]'.format(
    stddevs['energy'][0]))

# %%
n_features = 8