def main(args): mol_list, output_list = readTestData(args) if os.path.exists("./cod_predict.db"): os.remove("./cod_predict.db") new_dataset = AtomsData('./cod_predict.db', available_properties=['band_gap']) print('Number of test instances ' + str(len(output_list))) new_dataset.add_systems(mol_list, output_list)
def convert_to_db(file_name): mol_list = [] property_list = [] atoms = read(os.path.join("./", file_name), index=':') property_list.append({'band_gap': np.array([-97208.40600498248], dtype=np.float32)}) mol_list.extend(atoms) if os.path.exists("./cod_predict.db"): os.remove("./cod_predict.db") new_dataset = AtomsData('./cod_predict.db', available_properties=['band_gap']) new_dataset.add_systems(mol_list, property_list)
def _write_database(self): if len(self.samples) > 0: dataset = AtomsData( self.dataset, available_properties=self.samples_thresholds[0] ) dataset.add_systems(self.samples, self.samples_thresholds) logging.info( "{:d} samples written to {:s}.".format(len(self.samples), self.dataset) ) else: logging.info("No samples collected.")
def get_all_features_spatial(df, end): df = shuffle(df) df = df.reset_index(drop=True) df = df[:end] xyz_all = '' for i, row in df.iterrows(): #print(row['h**o']) xyz = row['xyz'] xyz_new = xyz.split("\n", 2)[0] + '\n' + str( row['h**o']) + '\n' + xyz.split("\n", 2)[2] xyz_all = xyz_all + xyz_new with open("coord.xyz", "w") as xyz_file: xyz_file.write(xyz_all) atoms = read('coord.xyz', index=':10') property_list = [] for at in atoms: # All properties need to be stored as numpy arrays. # Note: The shape for scalars should be (1,), not () # Note: GPUs work best with float32 data h**o = np.array([float(list(at.info.keys())[0])], dtype=np.float32) property_list.append({'h**o': h**o}) #print('Properties:', property_list) new_dataset = AtomsData('./new_dataset.db', available_properties=['h**o']) new_dataset.add_systems(atoms, property_list) ''' print('Number of reference calculations:', len(new_dataset)) print('Available properties:') for p in new_dataset.available_properties: print('-', p) print() example = new_dataset[0] print('Properties of molecule with id 0:') for k, v in example.items(): print('-', k, ':', v.shape) ''' return (new_dataset)
def gnn_pred(cif_file): # device = torch.device("cuda" if args.cuda else "cpu") device = "cpu" sch_model = torch.load(os.path.join("./schnetpack/model", 'best_model'), map_location=torch.device(device)) test_dataset = AtomsData('./cod_predict.db') test_loader = spk.AtomsLoader(test_dataset, batch_size=32) prediction_list = [] for count, batch in enumerate(test_loader): # move batch to GPU, if necessary print('before batch') batch = {k: v.to(device) for k, v in batch.items()} print('after batch') # apply model pred = sch_model(batch) prediction_list.extend( pred['band_gap'].detach().cpu().numpy().flatten().tolist()) return prediction_list[0]
args = arg_parser.parse_args() run_params = args.__dict__ params_hash = hashlib.sha256( json.dumps(run_params).encode()).hexdigest()[:6] # Determine the output directory test_dir = os.path.join( 'networks', f'T{args.num_messages}_b{args.batch_size}_n{args.num_epochs}_{params_hash}' ) os.makedirs(test_dir) with open(os.path.join(test_dir, 'config.json'), 'w') as fp: json.dump(run_params, fp) # Making the data loaders train_data = AtomsData('datasets/train.db') train_loader = AtomsLoader(train_data, args.batch_size, shuffle=True, pin_memory=True, num_workers=2) test_data = AtomsData('datasets/test.db') test_loader = AtomsLoader(test_data, args.batch_size) valid_data = AtomsData('datasets/valid.db') valid_loader = AtomsLoader(valid_data, args.batch_size, pin_memory=True, num_workers=2) # Make the model mean, std = train_loader.get_statistics('ip',
def load_dataset(self): ''' Loads the dataset and stores it in `.dataset` Currently supported: xyz format - needs to be extended with energy in the comment and forces npz format - as given by sGDML, needs to contain 'R', 'E', 'F' db format - as given by schnetpack ''' path = self.args['dataset_file'] if path is None: print_error( f"No dataset given. Please use the -d arg followed by the path to the dataset." ) elif not os.path.exists(path): print_error(f"Dataset path {path} is not valid.") ext = os.path.splitext(path)[-1] #xyz file if ext == ".xyz": print_ongoing_process(f"Loading xyz file {path}") try: file = open(path) dat = read_concat_ext_xyz(file) data = { 'R': np.array(dat[0]), 'z': dat[1], 'E': np.reshape(dat[2], (len(dat[2]), 1)), 'F': np.array(dat[3]) } except Exception as e: print(e) print_error("Couldn't load .xyz file.") print_ongoing_process(f"Loaded xyz file {path}", True) #npz file elif ext == ".npz": print_ongoing_process(f"Loading npz file {path}") try: data = np.load(path, allow_pickle=True) except Exception as e: print(e) print_error("Couldn't load .npz file.") print_ongoing_process(f"Loaded npz file {path}", True) # schnetpack .db elif ext == '.db': print_ongoing_process(f"Loading db file {path}") from schnetpack import AtomsData data = AtomsData(path) print_ongoing_process(f"Loaded db file {path}", True) else: print_error( f"Unsupported data type {ext} for given dataset {path} (xyz, npz, schnetpack .db supported)." ) self.dataset = data self.dataset_path = path if self.get_para('load_dataset', 'post_processing') is not None: print_ongoing_process('Post-processing dataset') self.call_para('load_dataset', 'post_processing', args=[self]) print_ongoing_process('Post-processing dataset', True)
for at in tqdm(atoms): # All properties need to be stored as numpy arrays. # Note: The shape for scalars should be (1,), not () # Note: GPUs work best with float32 data # print(at.info.keys()) # print(list(at.info.keys())) # energy = np.array([float(list(at.info.keys())[0])], dtype=np.float32) energy = np.array([float(at.info['energy'])], dtype=np.float32) forces = np.array(at.get_forces(), dtype=np.float32) property_list.append({'energy': energy, 'forces': forces}) # print(energy) # print(type(energy)) # print('Properties:', property_list) new_dataset = AtomsData('./40-cspbbr3-300K.db', available_properties=['energy', 'forces']) new_dataset.add_systems(atoms, property_list) print('Number of reference calculations:', len(new_dataset)) print('Available properties:') for p in new_dataset.available_properties: print('-', p) print() example = new_dataset[0] print('Properties of molecule with id 0:') for k, v in example.items(): print('-', k, ':', v.shape)