def main(args):

    mol_list, output_list = readTestData(args)
    if os.path.exists("./cod_predict.db"):
        os.remove("./cod_predict.db")
    new_dataset = AtomsData('./cod_predict.db',
                            available_properties=['band_gap'])
    print('Number of test instances ' + str(len(output_list)))
    new_dataset.add_systems(mol_list, output_list)
Exemple #2
0
def convert_to_db(file_name):
    mol_list = []
    property_list = []
    atoms = read(os.path.join("./", file_name), index=':')
    property_list.append({'band_gap': np.array([-97208.40600498248], dtype=np.float32)})
    mol_list.extend(atoms)
    if os.path.exists("./cod_predict.db"):
        os.remove("./cod_predict.db")
    new_dataset = AtomsData('./cod_predict.db', available_properties=['band_gap'])
    new_dataset.add_systems(mol_list, property_list)
Exemple #3
0
 def _write_database(self):
     if len(self.samples) > 0:
         dataset = AtomsData(
             self.dataset, available_properties=self.samples_thresholds[0]
         )
         dataset.add_systems(self.samples, self.samples_thresholds)
         logging.info(
             "{:d} samples written to {:s}.".format(len(self.samples), self.dataset)
         )
     else:
         logging.info("No samples collected.")
def get_all_features_spatial(df, end):
    df = shuffle(df)
    df = df.reset_index(drop=True)
    df = df[:end]
    xyz_all = ''
    for i, row in df.iterrows():
        #print(row['h**o'])
        xyz = row['xyz']
        xyz_new = xyz.split("\n", 2)[0] + '\n' + str(
            row['h**o']) + '\n' + xyz.split("\n", 2)[2]
        xyz_all = xyz_all + xyz_new

    with open("coord.xyz", "w") as xyz_file:
        xyz_file.write(xyz_all)

    atoms = read('coord.xyz', index=':10')
    property_list = []
    for at in atoms:
        # All properties need to be stored as numpy arrays.
        # Note: The shape for scalars should be (1,), not ()
        # Note: GPUs work best with float32 data
        h**o = np.array([float(list(at.info.keys())[0])], dtype=np.float32)
        property_list.append({'h**o': h**o})

    #print('Properties:', property_list)
    new_dataset = AtomsData('./new_dataset.db', available_properties=['h**o'])
    new_dataset.add_systems(atoms, property_list)
    '''
    print('Number of reference calculations:', len(new_dataset))
    print('Available properties:')

    for p in new_dataset.available_properties:
        print('-', p)
    print()    

    example = new_dataset[0]
    print('Properties of molecule with id 0:')

    for k, v in example.items():
        print('-', k, ':', v.shape)
    '''
    return (new_dataset)
def gnn_pred(cif_file):

    # device = torch.device("cuda" if args.cuda else "cpu")
    device = "cpu"
    sch_model = torch.load(os.path.join("./schnetpack/model", 'best_model'),
                           map_location=torch.device(device))
    test_dataset = AtomsData('./cod_predict.db')
    test_loader = spk.AtomsLoader(test_dataset, batch_size=32)
    prediction_list = []
    for count, batch in enumerate(test_loader):

        # move batch to GPU, if necessary
        print('before batch')
        batch = {k: v.to(device) for k, v in batch.items()}
        print('after batch')
        # apply model
        pred = sch_model(batch)
        prediction_list.extend(
            pred['band_gap'].detach().cpu().numpy().flatten().tolist())

    return prediction_list[0]
Exemple #6
0
    args = arg_parser.parse_args()
    run_params = args.__dict__
    params_hash = hashlib.sha256(
        json.dumps(run_params).encode()).hexdigest()[:6]

    # Determine the output directory
    test_dir = os.path.join(
        'networks',
        f'T{args.num_messages}_b{args.batch_size}_n{args.num_epochs}_{params_hash}'
    )
    os.makedirs(test_dir)
    with open(os.path.join(test_dir, 'config.json'), 'w') as fp:
        json.dump(run_params, fp)

    # Making the data loaders
    train_data = AtomsData('datasets/train.db')
    train_loader = AtomsLoader(train_data,
                               args.batch_size,
                               shuffle=True,
                               pin_memory=True,
                               num_workers=2)
    test_data = AtomsData('datasets/test.db')
    test_loader = AtomsLoader(test_data, args.batch_size)
    valid_data = AtomsData('datasets/valid.db')
    valid_loader = AtomsLoader(valid_data,
                               args.batch_size,
                               pin_memory=True,
                               num_workers=2)

    # Make the model
    mean, std = train_loader.get_statistics('ip',
Exemple #7
0
    def load_dataset(self):
        ''' Loads the dataset and stores it in `.dataset`

		Currently supported:
		xyz format - needs to be extended with energy in the comment and forces
		npz format - as given by sGDML, needs to contain 'R', 'E', 'F'
		db  format - as given by schnetpack
	   
		'''
        path = self.args['dataset_file']

        if path is None:
            print_error(
                f"No dataset given. Please use the -d arg followed by the path to the dataset."
            )
        elif not os.path.exists(path):
            print_error(f"Dataset path {path} is not valid.")

        ext = os.path.splitext(path)[-1]
        #xyz file
        if ext == ".xyz":
            print_ongoing_process(f"Loading xyz file {path}")
            try:
                file = open(path)
                dat = read_concat_ext_xyz(file)
                data = {
                    'R': np.array(dat[0]),
                    'z': dat[1],
                    'E': np.reshape(dat[2], (len(dat[2]), 1)),
                    'F': np.array(dat[3])
                }
            except Exception as e:
                print(e)
                print_error("Couldn't load .xyz file.")

            print_ongoing_process(f"Loaded xyz file {path}", True)

        #npz file
        elif ext == ".npz":
            print_ongoing_process(f"Loading npz file {path}")
            try:
                data = np.load(path, allow_pickle=True)
            except Exception as e:
                print(e)
                print_error("Couldn't load .npz file.")

            print_ongoing_process(f"Loaded npz file {path}", True)

        # schnetpack .db
        elif ext == '.db':
            print_ongoing_process(f"Loading db file {path}")

            from schnetpack import AtomsData
            data = AtomsData(path)

            print_ongoing_process(f"Loaded db file {path}", True)

        else:
            print_error(
                f"Unsupported data type {ext} for given dataset {path} (xyz, npz, schnetpack .db supported)."
            )

        self.dataset = data
        self.dataset_path = path
        if self.get_para('load_dataset', 'post_processing') is not None:
            print_ongoing_process('Post-processing dataset')
            self.call_para('load_dataset', 'post_processing', args=[self])
            print_ongoing_process('Post-processing dataset', True)
Exemple #8
0
for at in tqdm(atoms):
    # All properties need to be stored as numpy arrays.
    # Note: The shape for scalars should be (1,), not ()
    # Note: GPUs work best with float32 data
    # print(at.info.keys())
    # print(list(at.info.keys()))
    # energy = np.array([float(list(at.info.keys())[0])], dtype=np.float32)
    energy = np.array([float(at.info['energy'])], dtype=np.float32)
    forces = np.array(at.get_forces(), dtype=np.float32)
    property_list.append({'energy': energy, 'forces': forces})
    # print(energy)
    # print(type(energy))

# print('Properties:', property_list)

new_dataset = AtomsData('./40-cspbbr3-300K.db',
                        available_properties=['energy', 'forces'])
new_dataset.add_systems(atoms, property_list)

print('Number of reference calculations:', len(new_dataset))
print('Available properties:')

for p in new_dataset.available_properties:
    print('-', p)
print()

example = new_dataset[0]
print('Properties of molecule with id 0:')

for k, v in example.items():
    print('-', k, ':', v.shape)