def FixedFluxScan(self, reaction, lo, hi, n_p): rv = DataSets.DataSet(ItemNames=[reaction, "ObjVal"]) lo = float(lo) hi = float(hi) inc = (hi - lo) / (n_p - 1) cur = lo for n in range(n_p): self.SetFixedFlux({reaction: cur}) self.Solve(False) sol = self.GetPrimSol() if len(sol) == 0: obval = float("NaN") # indicate fail else: obval = self.GetObjVal() sol["ObjVal"] = obval sol[reaction] = cur rv.UpdateFromDic(sol) cur += inc return rv
if len(train_dirs) > 1: for i in range(1, len(train_dirs)): data.load_batch_set(train_dirs[i]) add = data.batch_set for key in out: out[key] = np.concatenate((out[key], add[key])) return out out_pd = pd.read_excel( '/Users/jiedeng/GD/ppt/2020/extreme_filter4_with_state.xlsx') deepmds = out_pd['local_path'].values i = 8 data = DataSets(deepmds[i], 'set', shuffle_test=False) train_data = get_train_data(data) pca = PCA(n_components=2) projected = pca.fit_transform(train_data['coord']) print(projected.shape) plt.scatter(projected[:, 0], projected[:, 1], c=digits.target, edgecolor='none', alpha=0.5, cmap=plt.cm.get_cmap('spectral', 10)) plt.xlabel('component 1') plt.ylabel('component 2')
def __init__(self, systems, set_prefix, batch_size, test_size, rcut, do_norm=False): self.system_dirs = systems self.nsystems = len(self.system_dirs) self.batch_size = batch_size if isinstance(self.batch_size, int): self.batch_size = self.batch_size * np.ones(self.nsystems, dtype=int) assert (isinstance(self.batch_size, (list, np.ndarray))) assert (len(self.batch_size) == self.nsystems) self.data_systems = [] self.ntypes = [] self.natoms = [] self.natoms_vec = [] self.nbatches = [] self.ncopies = [] for ii in self.system_dirs: self.data_systems.append(DataSets(ii, set_prefix, do_norm)) sys_all_types = np.loadtxt(os.path.join(ii, "type.raw")).astype(int) self.ntypes.append(np.max(sys_all_types) + 1) self.sys_ntypes = max(self.ntypes) for ii in range(self.nsystems): self.natoms.append(self.data_systems[ii].get_natoms()) self.natoms_vec.append(self.data_systems[ii].get_natoms_vec( self.sys_ntypes).astype(int)) self.nbatches.append(self.data_systems[ii].get_sys_numb_batch( self.batch_size[ii])) self.ncopies.append(self.data_systems[ii].get_ncopies()) # check the size of data if they satisfy the requirement of batch and test for ii in range(self.nsystems): chk_ret = self.data_systems[ii].check_batch_size( self.batch_size[ii]) if chk_ret is not None: raise RuntimeError( " required batch size %d is larger than the size %d of the dataset %s" % (self.batch_size[ii], chk_ret[1], chk_ret[0])) chk_ret = self.data_systems[ii].check_test_size(test_size) if chk_ret is not None: raise RuntimeError( " required test size %d is larger than the size %d of the dataset %s" % (test_size, chk_ret[1], chk_ret[0])) for ii in range(self.nsystems): print( "# find system %s :\t %6d atoms\t %10d batches copied by %s" % (self.system_dirs[ii], self.natoms[ii], self.nbatches[ii], self.ncopies[ii])) self.prob_nbatches = self.nbatches / np.sum(self.nbatches) self.test_prop_c = [] self.test_energy = [] self.test_force = [] self.test_virial = [] self.test_coord = [] self.test_box = [] self.test_type = [] self.default_mesh = [] for ii in range(self.nsystems): test_prop_c, test_energy, test_force, test_virial, test_coord, test_box, test_type \ = self.data_systems[ii].get_test () self.test_prop_c.append(test_prop_c) self.test_energy.append(test_energy) self.test_force.append(test_force) self.test_virial.append(test_virial) self.test_coord.append(test_coord) self.test_box.append(test_box) self.test_type.append(test_type) ncell = np.ones(3, dtype=np.int32) cell_size = np.max(rcut) avg_box = np.average(test_box, axis=0) avg_box = np.reshape(avg_box, [3, 3]) for ii in range(3): ncell[ii] = int(np.linalg.norm(avg_box[ii]) / cell_size) if (ncell[ii] < 2): ncell[ii] = 2 default_mesh = np.zeros(6, dtype=np.int32) default_mesh[3] = ncell[0] default_mesh[4] = ncell[1] default_mesh[5] = ncell[2] self.default_mesh.append(default_mesh) self.pick_idx = 0
def __init__(self, systems, set_prefix, batch_size, test_size, rcut, run_opt=None): self.system_dirs = systems self.nsystems = len(self.system_dirs) self.batch_size = batch_size if isinstance(self.batch_size, int): self.batch_size = self.batch_size * np.ones(self.nsystems, dtype=int) assert (isinstance(self.batch_size, (list, np.ndarray))) assert (len(self.batch_size) == self.nsystems) self.data_systems = [] self.ntypes = [] self.natoms = [] self.natoms_vec = [] self.nbatches = [] for ii in self.system_dirs: self.data_systems.append(DataSets(ii, set_prefix)) sys_all_types = np.loadtxt(os.path.join(ii, "type.raw")).astype(int) self.ntypes.append(np.max(sys_all_types) + 1) self.sys_ntypes = max(self.ntypes) type_map = [] for ii in range(self.nsystems): self.natoms.append(self.data_systems[ii].get_natoms()) self.natoms_vec.append(self.data_systems[ii].get_natoms_vec( self.sys_ntypes).astype(int)) self.nbatches.append(self.data_systems[ii].get_sys_numb_batch( self.batch_size[ii])) type_map.append(self.data_systems[ii].get_type_map()) self.type_map = self.check_type_map_consistency(type_map) # check frame parameters has_fparam = [ii.numb_fparam() for ii in self.data_systems] for ii in has_fparam: if ii != has_fparam[0]: raise RuntimeError( "if any system has frame parameter, then all systems should have the same number of frame parameter" ) self.has_fparam = has_fparam[0] # check the size of data if they satisfy the requirement of batch and test for ii in range(self.nsystems): chk_ret = self.data_systems[ii].check_batch_size( self.batch_size[ii]) if chk_ret is not None: raise RuntimeError ("system %s required batch size %d is larger than the size %d of the dataset %s" % \ (self.system_dirs[ii], self.batch_size[ii], chk_ret[1], chk_ret[0])) chk_ret = self.data_systems[ii].check_test_size(test_size) if chk_ret is not None: print("WARNNING: system %s required test size %d is larger than the size %d of the dataset %s" % \ (self.system_dirs[ii], test_size, chk_ret[1], chk_ret[0])) if run_opt is not None: self.print_summary(run_opt) self.prob_nbatches = [float(i) for i in self.nbatches] / np.sum(self.nbatches) self.test_data = collections.defaultdict(list) self.default_mesh = [] for ii in range(self.nsystems): test_system_data = self.data_systems[ii].get_test() for nn in test_system_data: self.test_data[nn].append(test_system_data[nn]) cell_size = np.max(rcut) avg_box = np.average(test_system_data["box"], axis=0) avg_box = np.reshape(avg_box, [3, 3]) ncell = (np.linalg.norm(avg_box, axis=1) / cell_size).astype( np.int32) ncell[ncell < 2] = 2 default_mesh = np.zeros(6, dtype=np.int32) default_mesh[3:6] = ncell self.default_mesh.append(default_mesh) self.pick_idx = 0