def runCode(self, parrent, name, **kwargs): worker, name = \ super(parrent, self).run(kwargs['program'], name, **kwargs) if 'no_subfolder' not in kwargs or not kwargs['no_subfolder']: self.setting['root_dir'] = name def run(): if 'charge' in kwargs: self.setChargeMultiplicity(kwargs['charge'], 1) inp = self.write(name, **kwargs) new_name = None if 'new_name' in kwargs: new_name = kwargs['new_name'] return worker.start(inp, new_name) if not os.path.exists(name): return run() elif 'rename_if_exist' in self.setting\ and self.setting['rename_if_exist']: n_name = len(glob.glob("%s_[0-9]" % name)) + 1 new_name = name + '_%d' % n_name while os.path.exists(new_name): n_name = n_name + 1 new_name = name + '_%d' % n_name kwargs['root_dir'] = new_name qtk.warning("folder %s exists, running under %s" % (name, new_name)) return run() elif self.setting['overwrite']: qtk.warning("Overwrite existing folder %s" % name) return run() else: qtk.report("QMInp.run", "%s exists" % name)
def PPCheck(xc, element, pp_file_str, **kwargs): if xc == 'lda': xc = 'pade' ne = qtk.n2ve(element) try: if 'dcacp' in kwargs and kwargs['dcacp']: pp_path = os.path.join(xc.upper(), "%s_DCACP_%s" %\ (element, xc.upper())) if element in qtk.setting.dcacp_dict: pp_path = pp_path + "_%s" % qtk.setting.dcacp_dict[element] pp_file = os.path.join(qtk.setting.cpmd_dcacp_url, pp_path) else: pp_path = os.path.join(xc, element + '-q' + str(qtk.n2ve(element))) pp_file = os.path.join(qtk.setting.cpmd_pp_url, pp_path) saved_pp_path = os.path.join(qtk.setting.cpmd_pp, pp_file_str) if not os.path.exists(saved_pp_path) \ and qtk.setting.download_pp: if pp_file: new_pp = os.path.join(qtk.setting.cpmd_pp, pp_file_str) pp_content = urllib2.urlopen(pp_file).read() qtk.report('PPCheck', 'pp file %s not found in %s, ' \ % (pp_file_str, qtk.setting.cpmd_pp) + \ 'but found on internet, download now...') new_pp_file = open(new_pp, 'w') new_pp_file.write(pp_content) new_pp_file.close() pp_file = new_pp return saved_pp_path except: qtk.warning('something wrong with pseudopotential')
def converged(self): # first step is set to NOT converge if len(self.penalty)>1: # if get to target if self.penalty[-1] < self.cutoff: self.logfile.close() return True # if stuck at best solution elif self.penalty[-1] == self.penalty[-2]: if self.conv_itr > self.converge_length: self.logfile.close() return True else: self.conv_itr = self.conv_itr + 1 return False # if reach max step elif self.step >= self.max_step: qtk.report("Optimizer", "max_step reached stopping", color='red') self.logfile.close() return True else: self.conv_itr = 0 return False else: return False
def push(self, penalty, result, coord): self.step += 1 self.penalty.append(penalty) self.result.append(result) self.coord.append(coord) qtk.report("Optimizer", "result:%.4E penalty:%.4E coord%s itr:%d"\ % (result, penalty, coord, self.step)) self.write_log()
def PPCheck(xc, pp_theory, pp_path, element): theory_dict = { 'lda': 'pade', 'pbe0': 'pbe', 'pbesol': 'pbe', 'hse06': 'pbe', } name = '%s_%s_%s' % (element, xc, pp_theory) pp_file = os.path.join(pp_path, name) if not os.path.exists(pp_file) and qtk.setting.download_pp: if pp_theory != 'nlcc': if pp_theory in theory_dict.keys(): pp_theory = theory_dict[pp_theory] url_root = qtk.setting.bigdft_pp_url element_str = element + '-q%d' % qtk.n2ve(element) url = url_root + '%s/%s' % (pp_theory, element_str) page = False try: page = urllib2.urlopen(url).readlines() pattern = re.compile(r'^.*</*pre>.*$') pp_se = filter(pattern.match, page) pp_start = page.index(pp_se[0]) pp_end = page.index(pp_se[1]) page = page[pp_start:pp_end] page[0] = page[0].split('>')[-1] except: qtk.warning('something wrong with url:%s' % url) pp = ''.join(page) else: url = qtk.setting.bigdft_pp_nlcc_url page = urllib2.urlopen(url).readlines() string = filter(lambda x: '"psppar.%s' % element in x, page)[-1] index = page.index(string) + 2 pp = [] itr = 0 while '</pre>' not in page[index + itr] \ and index + itr < len(page)\ and itr < 20: pp.append(page[index + itr]) itr = itr + 1 pp = ''.join(pp) if pp: qtk.report('', 'pp file %s not found in %s.' %\ (name, pp_path) +\ ' But found in cp2k page, download now...') new_pp_file = open(pp_file, 'w') new_pp_file.write(pp) new_pp_file.close() return pp_file
def push(self, penalty, result, coord): self.step += 1 self.penalty.append(penalty) self.result.append(result) self.coord.append(coord) qtk.report("Optimizer", "result:%.4E penalty:%.4E coord%s itr:%d"\ % (result, penalty, coord, self.step)) self.write_log() if self.step > 0 \ and self.step % self.dump_len ==0\ and len(self.coord) > 3*self.dump_len: self.dump()
def PPCheck(xc, pp_theory, pp_path, element): theory_dict = { 'lda': 'pade', } name = '%s_%s_%s' % (element, xc, pp_theory) pp_file = os.path.join(pp_path, name) if not os.path.exists(pp_file) and qtk.setting.download_pp: if pp_theory != 'nlcc': if pp_theory in theory_dict.keys(): pp_theory = theory_dict[pp_theory] url_root = qtk.setting.bigdft_pp_url element_str = element + '-q%d' % qtk.n2ve(element) url = url_root + '%s/%s' % (pp_theory, element_str) page = False try: page = urllib2.urlopen(url).readlines() pattern = re.compile(r'^.*</*pre>.*$') pp_se = filter(pattern.match, page) pp_start = page.index(pp_se[0]) pp_end = page.index(pp_se[1]) page = page[pp_start:pp_end] page[0] = page[0].split('>')[-1] except: qtk.warning('something wrong with url:%s' % url) pp = ''.join(page) else: url = qtk.setting.bigdft_pp_nlcc_url page = urllib2.urlopen(url).readlines() string = filter(lambda x: '"psppar.%s' % element in x, page)[-1] index = page.index(string) + 2 pp = [] itr = 0 while '</pre>' not in page[index + itr] \ and index + itr < len(page)\ and itr < 20: pp.append(page[index + itr]) itr = itr + 1 pp = ''.join(pp) if pp: qtk.report('', 'pp file %s not found in %s.' %\ (name, pp_path) +\ ' But found in cp2k page, download now...') new_pp_file = open(pp_file, 'w') new_pp_file.write(pp) new_pp_file.close() return pp_file
def PPCheck(xc, element, pp_file_str, **kwargs): ne = qtk.n2ve(element) saved_pp_path = os.path.join(qtk.setting.espresso_pp, pp_file_str) if not os.path.exists(saved_pp_path) and qtk.setting.download_pp: qtk.status("PPCheck", pp_file_str) url = os.path.join(qtk.setting.espresso_pp_url, pp_file_str) try: pp_content = urllib2.urlopen(url).read() qtk.report('', 'pp file %s not found in %s. ' \ % (pp_file_str, qtk.setting.espresso_pp) + \ 'but found in espresso page, download now...') new_pp_file = open(saved_pp_path, 'w') new_pp_file.write(pp_content) new_pp_file.close() except: qtk.warning('something wrong with pseudopotential')
def view(self, name=None): tmp = copy.deepcopy(self) if qtk.imported('pymol'): qtk.report("Molecule", "initializing pymol...", color=None) import pymol pymol.finish_launching() else: pymol.cmd.reinitialize() sleep(0.5) if name: tmp_file = name + "_tmp_" + str(Molecule.mol_id) + '.xyz' else: tmp_file = 'pymol_tmp_' + str(Molecule.mol_id) + '.xyz' Molecule.mol_id = Molecule.mol_id + 1 tmp.write_xyz(tmp_file) pymol.cmd.load(tmp_file) os.remove(tmp_file)
def push(self, penalty, result, coord): self.penalty.append(penalty) self.result.append(result) self.current_penalty = penalty qtk.report("MonteCarlo", "step:%d T:%f penalty:%f result:%f "%\ (self.step, self.T, penalty, result) + \ "coord:%s" % coord, color='green') if self.annealing: self.decrease_T() self.coord.append([coord, {'T':"%.3E" % self.T}]) else: self.coord.append(coord) self.write_log() if self.step > 0 \ and self.step % self.dump_len ==0\ and len(self.coord) > 3*self.dump_len: self.dump()
def push(self, penalty, result, coord): self.penalty.append(penalty) self.result.append(result) self.current_penalty = penalty qtk.report("MonteCarlo", "step:%d T:%f penalty:%f result:%f "%\ (self.step, self.T, penalty, result) + \ "coord:%s" % coord, color='green') if self.annealing: self.decrease_T() self.coord.append([coord, {'T': "%.3E" % self.T}]) else: self.coord.append(coord) self.write_log() if self.step > 0 \ and self.step % self.dump_len ==0\ and len(self.coord) > 3*self.dump_len: self.dump()
def stScore( data, n_samples_list=None, alphas=[1e-11], n_components_list=None, ols_components=None, regression_matrix=None, cv=None, threads=1, st_setting={}, ): vec = data['E'] qtk.report("ML.tools.stScores setting", "\n", "alphas:", alphas, "\n", "n_components_list:", n_components_list, "\n", "ols_components:", ols_components, "\n", "n_samples_list:", n_samples_list, "\n", "cross_validation:", cv, "\n", "cv_threads:", threads, "\n", "final score format: [alphas, gammas, samples, cv]") selected_components_list = [ _get_best_components_from_folds(n_components, ols_components)\ for n_components in n_components_list] all_st_scores = [] for alpha in alphas: alpha_scores = [] all_st_scores.append(alpha_scores) for selected_components in selected_components_list: component_scores = [] alpha_scores.append(component_scores) reg = regression_matrix[:, selected_components] for n_samples in n_samples_list: #print((len(selected_components), n_samples), end=" ") #sys.stdout.flush() cv_ = [(train[:n_samples], test) for train, test in cv] scores = cross_val_score(Ridge(alpha=1e-8), reg, vec, cv=cv_, n_jobs=threads, scoring='mean_absolute_error') component_scores.append(scores) return -np.array(all_st_scores)
def remoteRun(cmd, status, session): qtk.report('submit', status) qtk.report('submit-remote-command', cmd) ssh_stdin, ssh_stdout, ssh_stderr = \ session.exec_command(cmd) sshout = trimMsg(ssh_stdout) ssherr = trimMsg(ssh_stderr) if len(sshout) > 0: qtk.report('submit-remote-output', sshout) if len(ssherr) > 0: qtk.report('submit-remote-error', ssherr)
def PPCheck(xc, element, pp_file_str, **kwargs): ne = qtk.n2ve(element) try: pp_path = os.path.join(xc, element + '-q' + str(qtk.n2ve(element))) pp_file = os.path.join(qtk.setting.cpmd_pp_url, pp_path) saved_pp_path = os.path.join(qtk.setting.cpmd_pp, pp_file_str) if not os.path.exists(saved_pp_path) and qtk.setting.download_pp: if pp_file: new_pp = os.path.join(qtk.setting.cpmd_pp, pp_file_str) pp_content = urllib2.urlopen(pp_file).read() qtk.report('', 'pp file %s not found in %s. ' \ % (pp_file_str, qtk.setting.cpmd_pp) + \ 'but found in cp2k page, download now...') new_pp_file = open(new_pp, 'w') new_pp_file.write(pp_content) new_pp_file.close() pp_file = new_pp return saved_pp_path except: qtk.warning('something wrong with pseudopotential')
def write(self, cpmd_name, espresso_name): if not os.path.exists(cpmd_name): qtk.report("PP", "writing cpmd PP file") cpmd_write(self, cpmd_name) cpmd_exists = False else: cpmd_exists = True qtk.prompt('cpmd pp path:%s exist' % cpmd_name) if (cpmd_name == espresso_name and not cpmd_exists)\ or not os.path.exists(espresso_name): qtk.report("PP", 'start converting Goedecker PP') conv_pp = sp.Popen("%s %s" % \ (qtk.setting.espresso_cpmd2upf_exe, cpmd_name), shell=True) conv_pp.wait() if conv_pp.returncode != 0: qtk.warning('conversion failed...') else: os.rename(cpmd_name + '.UPF', espresso_name) else: qtk.prompt('espresso pp path:%s exist' % espresso_name)
def __init__( self, A, **kwargs): if 'fraction' not in kwargs: fraction = 0.9 else: fraction = kwargs['fraction'] if 'axis' not in kwargs: axis = 0 else: axis = kwargs['axis'] if 'scale' in kwargs: scale = kwargs['scale'] else: scale = True self.data_original = A A = copy.deepcopy(A) self.std = A.std(axis=axis) self.mean = A.mean(axis=axis) qtk.report("PCA", "centering data") A -= self.mean if scale: qtk.report('PCA', 'rescaling data') std = self.std std[std == 0 ] = 1 A /= std assert 0 <= fraction # A = U . diag(d) . Vt, O( m n^2 ), lapack_lite -- self.U, self.d, self.Vt = np.linalg.svd( A, full_matrices=False ) assert np.all( self.d[:-1] >= self.d[1:] ) # sorted self.eigen = self.d**2 self.sumvariance = np.cumsum(self.eigen) self.sumvariance /= self.sumvariance[-1] self.npc = np.searchsorted( self.sumvariance, fraction ) + 1 self.dinv = np.array([ 1/d if d > self.d[0] * 1e-6 else 0 for d in self.d ]) self.data = A
def __init__(self, A, **kwargs): if 'fraction' not in kwargs: fraction = 0.9 else: fraction = kwargs['fraction'] if 'axis' not in kwargs: axis = 0 else: axis = kwargs['axis'] if 'scale' in kwargs: scale = kwargs['scale'] else: scale = True self.data_original = A A = copy.deepcopy(A) self.std = A.std(axis=axis) self.mean = A.mean(axis=axis) qtk.report("PCA", "centering data") A -= self.mean if scale: qtk.report('PCA', 'rescaling data') std = self.std std[std == 0] = 1 A /= std assert 0 <= fraction # A = U . diag(d) . Vt, O( m n^2 ), lapack_lite -- self.U, self.d, self.Vt = np.linalg.svd(A, full_matrices=False) assert np.all(self.d[:-1] >= self.d[1:]) # sorted self.eigen = self.d**2 self.sumvariance = np.cumsum(self.eigen) self.sumvariance /= self.sumvariance[-1] self.npc = np.searchsorted(self.sumvariance, fraction) + 1 self.dinv = np.array( [1 / d if d > self.d[0] * 1e-6 else 0 for d in self.d]) self.data = A
def setDescriptor(self, descriptor, **kwargs): self.descriptor = descriptor if 'threads' in kwargs: self.threads = int(kwargs['threads']) else: self.threads = 1 qtk.report("DataSet", "reading folder", self.path) qtk.report("Descriptor", self.descriptor) if descriptor == 'CoulombMatrix': if 'matrix_size' not in kwargs: qtk.warning("matrix size not assigend, " + \ "using default value") qtk.warning("matrix size WILL CHANGE " + \ "according to numer of atoms in the molecule") self.matrix_size = 0 else: self.matrix_size = kwargs['matrix_size'] else: qtk.exit("descriptor" + descriptor + "is not implemented") if self.threads > 1: data_list = [] for data in sorted(\ glob.glob(self.path + '/' + self.pattern)): data_list.append([descriptor, self.matrix_size, {'xyz':data}]) self.data = qtk.parallelize(DataPoint, data_list, threads=self.threads) else: for data in sorted(\ glob.glob(self.path + '/' + self.pattern)): self.data.append(\ DataPoint(descriptor, self.matrix_size, xyz=data)\ ) self.data_size = len(self.data)
def runCode(self, parrent, name, **kwargs): worker, name = \ super(parrent, self).run(kwargs['program'], name, **kwargs) if 'no_subfolder' not in kwargs or not kwargs['no_subfolder']: self.setting['root_dir'] = name def run(): if 'charge' in kwargs: self.setChargeMultiplicity(kwargs['charge'], 1) inp = self.write(name, **kwargs) new_name = None if 'new_name' in kwargs: new_name = kwargs['new_name'] return worker.start(inp, new_name) if not os.path.exists(name): return run() elif self.setting['overwrite']: qtk.warning("Overwrite existing folder %s" % name) return run() else: qtk.report("QMInp.run", "%s exists" % name)
def setDescriptor(self, descriptor, **kwargs): self.descriptor = descriptor if 'threads' in kwargs: self.threads = int(kwargs['threads']) else: self.threads = 1 qtk.report("DataSet", "reading folder", self.path) qtk.report("Descriptor", self.descriptor) if descriptor == 'CoulombMatrix': if 'matrix_size' not in kwargs: qtk.warning("matrix size not assigend, " + \ "using default value") qtk.warning("matrix size WILL CHANGE " + \ "according to numer of atoms in the molecule") self.matrix_size = 0 else: self.matrix_size = kwargs['matrix_size'] else: qtk.exit("descriptor" + descriptor + "is not implemented") if self.threads > 1: data_list = [] for data in sorted(\ glob.glob(self.path + '/' + self.pattern)): data_list.append([descriptor, self.matrix_size, {'xyz': data}]) self.data = qtk.parallelize(DataPoint, data_list, threads=self.threads) else: for data in sorted(\ glob.glob(self.path + '/' + self.pattern)): self.data.append(\ DataPoint(descriptor, self.matrix_size, xyz=data)\ ) self.data_size = len(self.data)
def sample(self, *args): def boltzmann(dE): if self.T > 0: return np.exp(-abs(dE) / float(self.T)) else: return 0 if len(args) == 1: new_coord = args[0] else: new_coord = self.getInput() penalty, out = self.evaluate(new_coord, self.target_input) if len(self.coord) > 0: # accept if penalty < current_penalty # otherwise go through MonteCarlo cycle if penalty >= self.current_penalty: rand = random.uniform(0, 1) diff = penalty - self.current_penalty boltz = boltzmann(diff) qtk.report("MonteCarlo", "accept worse results?", "rand:%.4f boltz:%.4f dE:%.4f itr:%i"\ % (rand, boltz, diff, self.sample_itr)) # rejection from MonteCarlo cycle, froceed recursion if rand > boltz: qtk.report("MonteCarlo", "new move rejected", color='yellow') new_coord = self.getInput() # generate new random inp self.sample_itr += 1 # record MonteCarlo iteration if self.parallel == 1 or self.step == 1: # iterative run for serial job penalty, out, _ = self.sample() # only first finished thread put to queue elif self.qout.empty(): # iterative run for parallel case try: penalty, out, _ = self.sample() print penalty, out # error produced for NoneType return except TypeError: qtk.report("MonteCarlo", "job done from another thread") # others return None else: out = None penalty = None new_coord = None # serial return if self.parallel == 1 or self.step == 1: return penalty, out, new_coord # parallel case, put to queue instead of return elif self.qout.empty() and type(penalty) != None: self.qout.put([penalty, out, new_coord])
def sample(self, *args): def boltzmann(dE): if self.T > 0: return np.exp(-abs(dE)/float(self.T)) else: return 0 if len(args) == 1: new_coord = args[0] else: new_coord = self.getInput() penalty, out = self.evaluate(new_coord, self.penalty_input) if len(self.coord) > 0: # accept if penalty < current_penalty # otherwise go through MonteCarlo cycle if penalty >= self.current_penalty: rand = random.uniform(0, 1) diff = penalty - self.current_penalty boltz = boltzmann(diff) qtk.report("MonteCarlo", "accept worse results?", "rand:%.4f boltz:%.4f dE:%.4f itr:%i"\ % (rand, boltz, diff, self.sample_itr)) # rejection from MonteCarlo cycle, froceed recursion if rand > boltz: qtk.report("MonteCarlo", "new move rejected", color='yellow') new_coord = self.getInput() # generate new random inp self.sample_itr += 1 # record MonteCarlo iteration if self.parallel == 1 or self.step == 1: # iterative run for serial job penalty, out, _ = self.sample() # only first finished thread put to queue elif self.qout.empty(): # iterative run for parallel case try: penalty, out, _ = self.sample() print penalty, out # error produced for NoneType return except TypeError: qtk.report("MonteCarlo", "job done from another thread") # others return None else: out = None penalty = None new_coord = None # serial return if self.parallel == 1 or self.step == 1: return penalty, out, new_coord # parallel case, put to queue instead of return elif self.qout.empty() and type(penalty) != None: self.qout.put([penalty, out, new_coord])
def findBonds(self, ratio=setting.bond_ratio, **kwargs): del self.segments del self.bond_types self.segments = [] self.bond_types = {} if 'no_report' not in kwargs or not kwargs['no_report']: qtk.report("Molecule", "finding bonds with cutoff ratio", ratio) def to_graph(l): G = networkx.Graph() for part in l: # each sublist is a bunch of nodes G.add_nodes_from(part) # it also imlies a number of edges: G.add_edges_from(to_edges(part)) return G def to_edges(l): """ treat `l` as a Graph and returns it's edges to_edges(['a','b','c','d']) -> [(a,b), (b,c),(c,d)] """ it = iter(l) last = next(it) for current in it: yield last, current last = current itr = 0 bond_list = [] bonded = [False for i in range(self.N)] for i in xrange(self.N): for j in xrange(i+1, self.N): d_ij = np.linalg.norm(self.R[i,:] - self.R[j,:]) atom_i = getattr(pt, self.type_list[i]) atom_j = getattr(pt, self.type_list[j]) Ri = atom_i.covalent_radius + \ atom_i.covalent_radius_uncertainty Rj = atom_j.covalent_radius + \ atom_j.covalent_radius_uncertainty Dij = (Ri+Rj) * float(ratio) if d_ij < Dij: bonded[i] = True bonded[j] = True if self.Z[i] < self.Z[j]: atom_begin = self.Z[i] atom_end = self.Z[j] index_begin = i index_end = j else: atom_begin = self.Z[j] atom_end = self.Z[i] index_begin = j index_end = i self.bonds[itr] = {'atom_begin' : atom_begin, 'index_begin' : index_begin, 'atom_end' : atom_end, 'index_end' : index_end, 'length' : d_ij} bond_list.append([i, j]) type_begin = qtk.Z2n(atom_begin) type_end = qtk.Z2n(atom_end) bond_table = qtk.data.elements.bond_table bond_keys = [] bond_keys = [ type_begin + _ + type_end for _ in ['-', '=', '#'] ] try: bond_type_ind = np.argmin( abs( np.array([ bond_table[k][0] for k in bond_keys if k in bond_table.keys() ]) - d_ij ) ) except Exception as _e: qtk.warning( "error while processing bond" +\ str(bond_keys) + "with error message %s" % str(_e)) bond_type_ind = -1 bond_type = bond_keys[bond_type_ind] self.bonds[itr]['name'] = bond_type try: bond_energy = \ bond_table[bond_keys[bond_type_ind]][1] * \ qtk.convE(1, 'kj-kcal')[0] except: bond_energy = np.nan self.bonds[itr]['energy'] = bond_energy if np.isnan(bond_energy): qtk.warning("Non-tabliated covalent bond %s" % bond_type) if bond_type in self.bond_types: self.bond_types[bond_type] += 1 else: self.bond_types[bond_type] = 1 itr += 1 segments = list(connected_components(to_graph(bond_list))) for s in range(len(segments)): segment = list(segments[s]) new_mol = self.getSegment(segment, **kwargs) ns = len(self.segments) new_mol.name = new_mol.name + '_%d' % ns self.segments.append(new_mol) for s in [i for i in range(self.N) if not bonded[i]]: segment = [s] new_mol = self.getSegment(segment, **kwargs) ns = len(self.segments) new_mol.name = new_mol.name + '_%d' % ns self.segments.append(new_mol)
def returnError(ioStr, unitStr): msg = 'supported units are:\n' for key in Eh.iterkeys(): msg = msg + key + '\n' qtk.report(msg, color=None) qtk.exit(ioStr + " unit: " + unitStr + " is not reconized")
def findBonds(self, ratio=setting.bond_ratio, **kwargs): del self.segments del self.bond_types self.segments = [] self.bond_types = {} if 'no_report' not in kwargs or not kwargs['no_report']: qtk.report("Molecule", "finding bonds with cutoff ratio", ratio) def to_graph(l): G = networkx.Graph() for part in l: # each sublist is a bunch of nodes G.add_nodes_from(part) # it also imlies a number of edges: G.add_edges_from(to_edges(part)) return G def to_edges(l): """ treat `l` as a Graph and returns it's edges to_edges(['a','b','c','d']) -> [(a,b), (b,c),(c,d)] """ it = iter(l) last = next(it) for current in it: yield last, current last = current itr = 0 bond_list = [] bonded = [False for i in range(self.N)] for i in xrange(self.N): for j in xrange(i+1, self.N): d_ij = np.linalg.norm(self.R[i,:] - self.R[j,:]) atom_i = getattr(pt, self.type_list[i]) atom_j = getattr(pt, self.type_list[j]) Ri = atom_i.covalent_radius + \ atom_i.covalent_radius_uncertainty Rj = atom_j.covalent_radius + \ atom_j.covalent_radius_uncertainty Dij = (Ri+Rj) * float(ratio) if d_ij < Dij: bonded[i] = True bonded[j] = True if self.Z[i] < self.Z[j]: atom_begin = self.Z[i] atom_end = self.Z[j] index_begin = i index_end = j else: atom_begin = self.Z[j] atom_end = self.Z[i] index_begin = j index_end = i self.bonds[itr] = {'atom_begin' : atom_begin, 'index_begin' : index_begin, 'atom_end' : atom_end, 'index_end' : index_end, 'length' : d_ij} bond_list.append([i, j]) type_begin = qtk.Z2n(atom_begin) type_end = qtk.Z2n(atom_end) bond_table = qtk.data.elements.bond_table bond_keys = [] bond_keys = [ type_begin + _ + type_end for _ in ['-', '=', '#'] ] try: bond_type_ind = np.argmin( abs( np.array([ bond_table[k][0] for k in bond_keys if k in bond_table.keys() ]) - d_ij ) ) except Exception as _e: self.write_xyz() qtk.exit( "error while processing bond" +\ str(bond_keys) + "with error message %s" % str(_e)) bond_type = bond_keys[bond_type_ind] self.bonds[itr]['name'] = bond_type bond_energy = \ bond_table[bond_keys[bond_type_ind]][1] * \ qtk.convE(1, 'kj-kcal')[0] self.bonds[itr]['energy'] = bond_energy if np.isnan(bond_energy): qtk.warning("Non-tabliated covalent bond %s" % bond_type) if bond_type in self.bond_types: self.bond_types[bond_type] += 1 else: self.bond_types[bond_type] = 1 itr += 1 segments = list(connected_components(to_graph(bond_list))) for s in range(len(segments)): segment = list(segments[s]) new_mol = self.getSegment(segment, **kwargs) ns = len(self.segments) new_mol.name = new_mol.name + '_%d' % ns self.segments.append(new_mol) for s in [i for i in range(self.N) if not bonded[i]]: segment = [s] new_mol = self.getSegment(segment, **kwargs) ns = len(self.segments) new_mol.name = new_mol.name + '_%d' % ns self.segments.append(new_mol)
def __init__(self, molecule, **kwargs): GenericMDInput.__init__(molecule, **kwargs) if 'theory' not in kwargs: self.setting['theory'] = 'PBE' if 'vdw' not in kwargs: self.setting['vdw'] = kwargs['vdw'] if 'cutoff' not in kwargs: self.setting['cutoff'] = 100 if 'periodic' not in kwargs: self.setting['periodic'] = True if 'em_step' not in kwargs: self.setting['em_step'] = 200 if 'eq_step' not in kwargs: self.setting['eq_step'] = 200 if 'md_step' not in kwargs: self.setting['md_step'] = 5000 if 'md_mode' not in kwargs: self.setting['md_mode'] = 'BOMD' # celldm can always be overwritten if 'celldm' not in kwargs: # for the case of molecule xyz input (molecule) # set default orthorombic celldm if not self.molecule.celldm: box = self.molecule.getBox() # set defualt margin in specified in setting.py, # grow with box size if 'margin' not in kwargs: m = qtk.setting.box_margin self.setting['margin'] = max(m, max(box) / 5.) edge = np.array([min(self.molecule.R[:,i])\ for i in range(3)]) self.molecule.shift(self.setting['margin'] - edge) box = 2 * self.setting['margin'] + box self.setting['celldm'] = np.append(box, [0, 0, 0]) else: self.setting['celldm'] = self.molecule.celldm for key in self.md_setting.iterkeys(): qtk.report("MDJob", "setting", key, ":", self.md_setting[key]) em_setting = copy.deepcopy(self.setting) em_setting['md_step'] = em_setting['em_step'] em_setting['mode'] = 'geopt' em_setting['info'] = ' MD em job with ' + molecule self.emInp = qtk.QMInp(molecule, **em_setting) eq_setting = copy.deepcopy(self.setting) eq_setting['md_step'] = eq_setting['eq_step'] eq_setting['info'] = ' MD eq job with ' + molecule eq_setting['mode'] = self.md_setting['md_mode'] self.eqInp = qtk.QMInp(molecule, **eq_setting) md_setting = copy.deepcopy(self.setting) md_setting['mode'] = self.md_setting['md_mode'] md_setting['info'] = ' MD md job with ' + molecule self.mdInp = qtk.QMInp(molecule, **md_setting)
def Ev_ccs(ccs_coord, ccs_span, vacancy_index, **kwargs): """ single point calculation of vacancy energy in crystal either reference (true) or predicted (pred) calculations can be assigned vacancy_index starts from 1 """ if 'QMInp' not in kwargs: qtk.exit("kwargs: 'QMInp' is missing.\n"\ + "It should be set to QMInp object of "\ + "system without vacancies.\n"\ + "It is necessary for inp settings") base_inp = kwargs['QMInp'] qm_setting = {} if 'qm_setting' in kwargs: qm_setting = kwargs['qm_setting'] if 'pref' in kwargs and 'vref' in kwargs: alchem = True perfect_ref = kwargs['pref'] vacancy_ref = kwargs['vref'] elif 'pref' not in kwargs and 'vref' not in kwargs: alchem = False freeE = qtk.QMOut('freeAtom/freeAtom.out') freeE.inUnit('ev') if 'threads' in kwargs: _threads = kwargs['threads'] else: _threads = 1 inp_wov = qtk.QMInp(ccs_span.generate(**ccs_coord)) inp_wv = qtk.QMInp(ccs_span.generate(**ccs_coord)) inp_wv.removeAtoms(vacancy_index) inp_wv.setChargeMultiplicity(0, 2) perfect = 'ev_perfect' + str(os.getpid()) vacancy = 'ev_vacancy' + str(os.getpid()) perfectinp = perfect + '.inp' vacancyinp = vacancy + '.inp' inp_wov.molecule.name = perfectinp inp_wv.molecule.name = vacancyinp if os.path.exists(perfect): shutil.rmtree(perfect) if os.path.exists(vacancy): shutil.rmtree(vacancy) print ccs_coord if alchem: out_wov = qtk.Al1st(inp_wov, ref_dir=perfect_ref, **qm_setting) out_wv = qtk.Al1st(inp_wv, ref_dir=vacancy_ref, **qm_setting) else: out_wov = inp_wov.run(**qm_setting) out_wv = inp_wv.run(**qm_setting) try: os.remove(perfectinp) os.remove(vacancyinp) except OSError: shutil.rmtree(perfectinp) shutil.rmtree(vacancyinp) out_wov.inUnit('ev') out_wv.inUnit('ev') final = out_wov - out_wv - freeE msg = str(out_wov.Et) + '-(' + str(out_wv.Et) + \ '+' + str(freeE.Et) + ') = ' + str(final.Et) qtk.report('trial Ev', msg) return final.Et
def write(self, name=None, **kwargs): self.setting.update(kwargs) self.setting['root_dir'] = name self.setting['no_molecule'] = False if name: self.setting['output'] = True name = os.path.splitext(name)[0] else: self.setting['output'] = False incar, molecule = \ super(PlanewaveInput, self).write('INCAR', **self.setting) self.setting['no_molecule'] = True kpoints = \ super(PlanewaveInput, self).write('KPOINTS', **self.setting) poscar = \ super(PlanewaveInput, self).write('POSCAR', **self.setting) potcar = \ super(PlanewaveInput, self).write('POTCAR', **self.setting) # !!!!!!!!!!! TODO !!!!!!!!!!! # Center molecule # charge multiplicity # optimizer # # write CPMD to modulize structure manipulation? PPPath = [] n_list = [] R_list = [] def catPOTCAR(path): if os.path.exists(path): PPPath.append(path) else: qtk.exit("PP file: " + path + " not found") def getNlist(atom_number): n_list.append(atom_number) def getRlist(coord): R_list.append(coord) # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # construct to POSCAR and POTCAR data molecule.sort() type_index = molecule.index type_list = molecule.type_list Z = molecule.Z self.pp_path = None if 'pp_path' not in self.setting: if 'pp_theory' not in self.setting: theory_dict = { 'pbe': 'pbe', 'pbe0': 'pbe', 'hse06': 'pbe', 'hse03': 'pbe', 'lda': 'lda', } if self.setting['theory'] not in theory_dict: qtk.warning("%s is not supported, change theory to LDA" \ % (self.setting['theory'])) self.setting['theory'] = 'lda' theory = theory_dict[self.setting['theory']] if theory.lower() not in ['pbe', 'lda']: qtk.warning('xc: %s is not supported, using LDA PP' % \ theory.upper()) theory = 'LDA' self.pp_path = qtk.setting.vasp_pp + '_%s_%s' % \ (theory.upper(), self.setting['pp_type'].upper()) else: self.pp_path = qtk.setting.vasp_pp + '_%s_%s' % \ (self.setting['pp_theory'].upper(), self.setting['pp_type'].upper()) else: self.pp_path = self.setting['pp_path'] for atom_type in xrange(0,len(type_index)-1): type_n = type_index[atom_type+1] - type_index[atom_type] # check special PP folder # not yet implemented # default PP path type_name = type_list[type_index[atom_type]] AtomPP = os.path.join(self.pp_path, type_name, 'POTCAR') catPOTCAR(AtomPP) getNlist(type_n) for I in\ xrange(type_index[atom_type],type_index[atom_type+1]): getRlist(molecule.R[I][:]) # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # write to INCAR and generate POTCAR qtk.report("vasp.inp", "writing", "INCAR") incar.write("SYSTEM = %s\n" % self.setting['info']) incar.write("ISMEAR = 0\n") incar.write("IBRION = 2\n") if 'restart' in self.setting and self.setting['restart']: incar.write("ISTART = 1\n") if 'cutoff' in self.setting: cutoff = self.setting['cutoff'] incar.write("ENCUT = %.2f" % (cutoff * 13.605698066)) incar.write(" # in eV, that is %.1f Ry\n" % cutoff) if 'scf_step' in self.setting: incar.write('NELM = %d\n' % self.setting['scf_step']) if 'vdw' in self.setting: vdw = self.setting['vdw'].lower() if vdw != 'none': if vdw=='d2': incar.write("IVDW = 10\n") elif vdw=='d3': incar.write("IVDW = 11\n") elif vdw=='d3-bj': incar.write("IVDW = 12\n") elif vdw=='mbd': incar.write("IVDW = 202\n") elif vdw=='mbd_iter': incar.write("IVDW = 212\n") else: qtk.exit("VDW '%s' is not supported for VASP" % vdw) if 'ks_states' in self.setting: vs = int(round(self.molecule.getValenceElectrons() / 2.0)) nbnd = vs + self.setting['ks_states'] incar.write("NBANDS = %d\n" % nbnd) if 'full_kmesh' in self.setting and self.setting['full_kmesh']: incar.write("ISYM = -1\n") if self.setting['theory'] == 'pbe0': incar.write("LHFCALC = .TRUE.\n") incar.write("GGA = PE\n") elif self.setting['theory'] == 'hse06': incar.write("GGA = PE\n") incar.write("\n##HSE setting\n") incar.write("LHFCALC = .TRUE.\n") incar.write("HFSCREEN = 0.2 \n") incar.write("ALGO = D\n") if molecule.charge != 0: nve = molecule.getValenceElectrons() incar.write("NELECT = %d\n" % (nve)) if 'save_density'not in self.setting\ or not self.setting['save_density']: incar.write("LCHARG = .FALSE.\n") if 'scalapack' not in self.setting: incar.write("LSCALAPACK = .FALSE.\n") elif not self.setting['scalapack']: incar.write("LSCALAPACK = .FALSE.\n") incar.close() # !!!!!!!!!!!!!!!! # write to KPOINTS qtk.report("vasp.inp", "writing", "KPOINTS") if 'kmesh' not in self.setting: kpoints.write("Gamma-point only\n") kpoints.write(" 1 ! one k-point\n") kpoints.write("rec ! in units of reciprocal vector\n") kpoints.write(" 0 0 0 1 ! coordinates and weight\n") else: k1, k2, k3 = self.setting['kmesh'] kpoints.write("Automatic mesh\n") kpoints.write(" 0 ! number of k-points = 0") kpoints.write(" ->automatic generation scheme\n") kpoints.write("Gamma ! generate a Gamma centered grid\n") kpoints.write(" %d %d %d ! number of k grids\n" % (k1, k2, k3)) kpoints.close(no_cleanup=True) # !!!!!!!!!!!!!!! # write to POSCAR qtk.report("vasp.inp", "writing", "POSCAR") poscar.write(self.setting['info'] + '\n') poscar.write("1.0\n") self.celldm2lattice() for i in range(3): for j in range(3): poscar.write(" %7.4f" % self.setting['lattice'][i,j]) poscar.write(" ! lattic vector a(%d)\n" %i) for n in n_list: poscar.write(str(n) + ' ') poscar.write("! number of atoms in order of POTCAR\n") poscar.write("cart ! cartesian coordinates\n") for R in R_list: for X in R: poscar.write(" %7.4f" % X) poscar.write("\n") poscar.close(no_cleanup=True) # !!!!!!!!!!!!!!! # write to POTCAR qtk.report("vasp.inp", "writing", "POTCAR") for PP_file in PPPath: qtk.report("vasp.inp.POTCAR", PP_file) if name: with open(PP_file) as PP: for line in PP: potcar.write(str(line)) else: potcar.write("cat %s\n" % PP_file) potcar.close(no_cleanup=True) return incar
def libxc_report(inp, xc_id, flag): for k, v in xc_dict.iteritems(): if v == xc_id: key = k qtk.report("libxc_%s" % flag, "xc: %s, id: %d\n" % (key, xc_id)) break
def PPString(inp, mol, i, n, outFile): """ append PP file names to inp.pp_files """ alchemy = re.compile('^\w*2\w*_\d\d\d$') ppstr = re.sub('\*', '', mol.string[i]) if ppstr: PPStr = ppstr pp_root, pp_ext = os.path.split(ppstr) else: if inp.setting['pp_type'] == 'geodecker': element = mol.type_list[i].title() if 'd_shell' in inp.setting: if type(inp.setting['d_shell']) is not list: inp.setting['d_shell'] = [inp.setting['d_shell']] if qtk.n2ve(mol.type_list[i].title()) > 10: shell = '-d' elif 'd_shell' in inp.setting \ and element in inp.setting['d_shell']: shell = '-d' else: element = qtk.element[mol.type_list[i].title()] if element.group < 3 and mol.Z[i] > 1: if mol.Z[i] != 3: shell = '-sp' else: shell = '-s' else: shell = '' pp_xc_dict = { 'lda': 'pz', 'pbe0': 'pbe', 'b3lyp': 'blyp', } pp_xc = inp.setting['pp_theory'].lower() if pp_xc in pp_xc_dict: pp_xc = pp_xc_dict[pp_xc] PPStr = ''.join([c for c in mol.type_list[i] if not c.isdigit()])\ + '.' + pp_xc + shell + '-hgh.UPF' elif inp.setting['pp_type'] == 'cpmd': PPStr = PPName(inp, mol, i, n) xc = inp.setting['pp_theory'].lower() if not mol.string[i]: if inp.setting['pp_type'] == 'geodecker': PPCheck(pp_xc, mol.type_list[i].title(), PPStr) elif inp.setting['pp_type'] == 'cpmd': saved_pp = PPCheck_cpmd(pp_xc, mol.type_list[i].title(), PPStr) new_pp1 = saved_pp + '.UPF' conv_pp = sp.Popen("%s %s" % \ (qtk.setting.espresso_cpmd2upf_exe, saved_pp), shell=True) conv_pp.wait() new_pp1_file = os.path.split(new_pp1)[1] new_pp1_trg = os.path.join(qtk.setting.espresso_pp, new_pp1_file) if not os.path.exists(new_pp1_trg): shutil.copy(new_pp1, qtk.setting.espresso_pp) PPStr = PPStr + '.UPF' elif alchemy.match(mol.string[i]): cpmd_pp = alchemyPP(xc, PPStr) new_pp1 = cpmd_pp + '.UPF' if not os.path.exists(new_pp1): qtk.report('espresso', "rewrite Goedecker's PP to UPF") conv_pp = sp.Popen("%s %s" % \ (qtk.setting.espresso_cpmd2upf_exe, cpmd_pp), shell=True) conv_pp.wait() if conv_pp.returncode != 0: # dirty fix for espresso alchemy conversion routine qtk.warning('conversion failed..., trying path end points') root, _ = os.path.splitext(PPStr) element_str = re.sub('_.*', '', root) element1 = re.sub('2.*', '', element_str) element2 = re.sub('.*2', '', element_str) fraction = float(re.sub('.*_', '', root))/100 if fraction == 0.0: strpp = element1 + "_q" + str(qtk.n2ve(element1)) +\ "_" + xc + '.psp' elif fraction == 1.0: strpp = element2 + "_q" + str(qtk.n2ve(element2)) +\ "_" + xc + '.psp' else: qtk.exit("PP conversion failed for intermediate lambda") strpp = os.path.join(qtk.setting.cpmd_pp, strpp) conv_pp = sp.Popen("%s %s" % \ (qtk.setting.espresso_cpmd2upf_exe, strpp), shell=True) conv_pp.wait() os.rename(strpp + '.UPF', new_pp1) new_pp1_file = os.path.split(new_pp1)[1] new_pp1_trg = os.path.join(qtk.setting.espresso_pp, new_pp1_file) if not os.path.exists(new_pp1_trg): shutil.copy(new_pp1, qtk.setting.espresso_pp) PPStr = PPStr + '.UPF' return PPStr
def submit(inp_list, root, **remote_settings): necessary_list = [ 'ip', 'submission_script', ] default_dict = { 'username': None, 'password': None, 'flags': None, 'timeout': 40, } for k, v in default_dict.iteritems(): exec "%s = %s" % (k, v) if len(inp_list) * 5 > 40: timeout = len(inp_list) * 5 if 'password' in remote_settings: password = remote_settings['password'] if 'username' in remote_settings: username = remote_settings['username'] if 'remote_path' not in remote_settings: remote_path = './%s' % root else: remote_path = remote_settings['remote_path'] if 'timeout' in remote_settings: timeout = remote_settings['timeout'] if 'prefix' in remote_settings: prefix = remote_settings['prefix'] else: prefix = '' if 'flags' in remote_settings: flags = remote_settings['flags'] if 'threads' not in remote_settings: threads = inp_list[0].setting['threads'] else: threads = remote_settings['threads'] if threads != inp_list[0].setting['threads']: qtk.report('submit', 'reset job threads to %d' % threads) for inp in inp_list: inp.setting['threads'] = threads if 'qthreads' not in remote_settings: qthreads = threads else: qthreads = remote_settings['qthreads'] for s in necessary_list: if s not in remote_settings: qtk.exit('cluster setting:%s not defined' % s) else: exec "%s = '%s'" % (s, remote_settings[s]) if type(inp_list) is not list: inp_list = [inp_list] program = inp_list[0].setting['program'] if os.path.exists(root): if 'overwrite' in remote_settings \ and remote_settings['overwrite']: qtk.warning("root directory %s exist, overwrite..." % root) shutil.rmtree(root) cwd = os.getcwd() os.makedirs(root) os.chdir(root) for inp in inp_list: inp.write(inp.molecule.name) os.chdir(cwd) else: qtk.warning("root directory %s exist, uploading existing folder"\ % root) else: cwd = os.getcwd() os.makedirs(root) os.chdir(root) for inp in inp_list: inp.write(inp.molecule.name) os.chdir(cwd) if 'compress' not in remote_settings: remote_settings['compress'] = False if len(inp_list) > 5: remote_settings['compress'] = True if remote_settings['compress']: qtk.report("submit", "compressing input files") cmd = 'tar -zcf %s %s' % (root + '.tar.gz', root) run = sp.Popen(cmd, shell=True, stdin=sp.PIPE) run.stdin.flush() run.communicate() run.wait() rootToSend = root + '.tar.gz' remote_dest = remote_path + '.tar.gz' qtk.report("submit", "compression completed") else: rootToSend = root remote_dest = remote_path paramiko_kwargs = {} if username: paramiko_kwargs['username'] = username if password: paramiko_kwargs['password'] = password ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) ssh.load_system_host_keys() ssh.connect(ip, **paramiko_kwargs) ssh_stdin, ssh_stdout, ssh_stderr = \ ssh.exec_command('ls %s' % remote_path) sshout = ssh_stdout.read() if len(sshout) > 0: if 'overwrite' in remote_settings \ and remote_settings['overwrite']: status = 'remote path %s exists, overwrite...' % remote_path cmd = 'rm -r %s' % remote_path remoteRun(cmd, status, ssh) else: qtk.exit('remote path %s exists' % remote_path) ssh_newkey = 'Are you sure you want to continue connecting' patterns = [ssh_newkey, '[Pp]assword:', pexpect.EOF] if username: userStr = username + '@' else: userStr = '' cmd = 'scp -qr %s %s%s:%s' % (rootToSend, userStr, ip, remote_dest) qtk.report('submit', 'scp input files...') qtk.report('submit-remote_command', cmd) p = pexpect.spawn(cmd) i = p.expect(patterns, timeout=timeout) if i == 0: qtk.report('submit', 'adding %s to known_hosts' % ip) p.sendline('yes') i = p.expect(patterns, timeout=timeout) if i == 1: p.sendline(password) i = p.expect(patterns, timeout=timeout) if i == 2: if not p.before: qtk.report('submit', 'scp completed') else: qtk.warning('scp message: %s' % p.before) if remote_settings['compress']: status = "decompress remote input files" cmd = 'tar xf %s' % rootToSend remoteRun(cmd, status, ssh) status = "remove remote tar file" cmd = 'rm %s' % rootToSend remoteRun(cmd, status, ssh) qtk.report('submit', 'done') exe = qtk.setting.program_dict[program] cmd = "%s \"%s\" %s %d %d '%s' %s" % (submission_script, exe, remote_path, threads, qthreads, flags, prefix) status = 'submitting jobs...' remoteRun(cmd, status, ssh) ssh.exec_command("echo %s > %s/cmd.log" % (cmd, remote_path)) qtk.report('submit', 'submission completed') ssh.close() if 'debug' in remote_settings and remote_settings['debug']: pass else: qtk.report('submit', 'cleanup local files') shutil.rmtree(root) if os.path.exists(root + '.tar.gz'): os.remove(root + '.tar.gz')
def genRefInp(x, y, n_pair, **kwargs): """ generate cpmd input files for graphene reference calculation x: number of graphene 4-atom unit cells in x y: number of graphene 4-atom unit cells in y n_pair: number of BN pairs in the system kwargs: max_sample """ if "path" in kwargs: qtk.report("creating folder", kwargs["path"]) os.makedirs(kwargs["path"]) os.chdir(kwargs["path"]) print "%screating inp folder...%s" % (qtk.bcolors.OKGREEN, qtk.bcolors.ENDC) if not os.path.exists("inp"): os.makedirs("inp") else: print "%sinp folder exist, baking up to back_inp...%s" % (qtk.bcolors.WARNING, qtk.bcolors.ENDC) try: shutil.rmtree("back_inp") except: pass os.rename("inp", "back_inp") os.makedirs("inp") N = 4 * x * y if "max_sample" in kwargs: max_sample = kwargs["max_sample"] else: total = math.factorial(N - 1) denum1 = math.factorial(n_pair) ** 2 denum2 = math.factorial(N - 1 - 2 * n_pair) max_sample = total / (denum1 * denum2) digit = len(str(max_sample)) name_xyz = "gph%d-%d.xyz" % (x, y) name_ccs = "ccs%d-%d.txt" % (x, y) namev_xyz = "gph%d-%dv.xyz" % (x, y) namev_ccs = "ccs%d-%dv.txt" % (x, y) header = "gph%d%d_" % (x, y) ccs_file = open(name_ccs, "w") ccsv_file = open(namev_ccs, "w") print >> ccs_file, "mutation_list:\n" + " %d:%d -> 5:7\n" % (2, N) + "end" print >> ccsv_file, "mutation_list:\n" + " %d:%d -> 5:7\n" % (1, N - 1) + "end" ccs_file.close() ccsv_file.close() print "%sthe following files are written: %s" % (qtk.bcolors.OKGREEN, qtk.bcolors.ENDC) print " %s\n %s\n %s\n %s\n" % (name_xyz, namev_xyz, name_ccs, namev_ccs) graphene = generate(x, y) graphene.write_xyz(name_xyz) graphenev = graphene.remove_atom(1) graphene.write_xyz(namev_xyz) space = qtk.CCS(name_xyz, name_ccs) space_v = qtk.CCS(namev_xyz, namev_ccs) flat = [item for sublist in space.mutation_list for item in sublist] # geometry setup mol_base = graphene dx = mol_base.R[1, 0] mol_base.sort_coord() dy = mol_base.R[1, 1] center = mol_base.R[0] + [0, 0, -10] mol_base.center(center) x_max = mol_base.R[-1, 0] y_max = mol_base.R[-1, 1] celldm = [round(x_max + dx, 4), round(y_max + dy, 4), 20, 0, 0, 0] # input setup gph = qtk.QMInp(name_xyz, "cpmd", info="gph") gph.setCelldm(celldm) gph.periodic() gph.setSCFStep(500) gphv = qtk.QMInp(namev_xyz, "cpmd", info="gphv") gphv.setCelldm(celldm) gphv.periodic() gphv.setSCFStep(500) name = "inp/%s%s.inp" % (header, str(0).zfill(digit)) namev = "inp/%s%sv.inp" % (header, str(0).zfill(digit)) gph.write(name) gphv.write(namev) c_base = [6 for i in range(len(flat))] itr = 1 for n_bn in range(1, n_pair + 1): for n_comb in it.combinations(range(len(flat)), n_bn): n_list = list(n_comb) rest = [index for index in range(len(flat)) if index not in list(n_comb)] for b_comb in it.combinations(rest, n_bn): name = "inp/%s%s.inp" % (header, str(itr).zfill(digit)) namev = "inp/%s%sv.inp" % (header, str(itr).zfill(digit)) b_list = list(b_comb) c_list = [index for index in rest if index not in list(n_comb)] atom_list = copy.deepcopy(c_base) for i in b_list: atom_list[i] = 5 for i in n_list: atom_list[i] = 7 valid = True mutate_mol = space.generate(mutation=[atom_list]) mutate_molv = space_v.generate(mutation=[atom_list]) mutate_mol.find_bonds() if not all(key in mutate_mol.bond_types for key in ("B-B", "N-N")): gph.setCenter([0, 0, -celldm[2] / 2]) gph.setStructure(mutate_mol) gph.write(name) gphv.setCenter([0, 0, -celldm[2] / 2]) gphv.setStructure(mutate_molv) gphv.write(namev) itr += 1 if itr > max_sample: msg = "%sDONE! %d generated%s" % (qtk.bcolors.OKGREEN, itr - 1, qtk.bcolors.ENDC) sys.exit(msg) print "%sDONE! %d generated%s" % (qtk.bcolors.OKGREEN, itr - 1, qtk.bcolors.ENDC) itr = 1
def krrScore( data, n_samples=None, kernels=['laplacian'], cv=None, threads=1, alphas=[1e-11], gammas=[1e-5], descriptors=OrderedDict({coulomb_matrices: { 'nuclear_charges': True }}), return_key=False, report=False, ): """ return scores in the format of input parameter structure """ E = data['E'] if n_samples is None: n_samples = [ int(len(E) / 10.), int(len(E) / 5.), int(len(E) / 2.), ] def listWrap(param): if '__getitem__' not in dir(param): param = [param] return param #descriptors = listWrap(descriptors) alphas = listWrap(alphas) gammas = listWrap(gammas) n_samples = listWrap(n_samples) #if type(descriptor_settings) is not list: # descriptor_settings = [descriptor_settings] if not isinstance(descriptors, OrderedDict): if descriptors is None: descriptors = OrderedDict({None: None}) elif type(descriptors) is type(coulomb_matrices): descriptors = OrderedDict({descriptors: {}}) elif type(descriptors) is list \ and type(descriptors[0]) is tuple: descriptors = OrderedDict(descriptors) if type(kernels) is not list: kernels = [kernels] if cv is None: cv = ShuffleSplit(len(E), n_iter=5, test_size=.1) try: cv_fold = cv.n_iter except: cv_fold = len(cv) input_key = OrderedDict() input_key['descriptors'] = descriptors input_key['kernels'] = kernels input_key['alphas'] = alphas input_key['gammas'] = gammas input_key['n_samples'] = n_samples input_key['cv_fold'] = cv_fold output_key = OrderedDict() for k, v in input_key.items(): if k == 'cv_fold': if cv_fold > 1: output_key[k] = cv_fold else: if len(v) > 1: output_key[k] = v if report: qtk.report("ML.tools.krrScores setting", "\n", "kernel:", kernels, "\n", "alphas:", alphas, "\n", "gammas:", gammas, "\n", "n_samples:", n_samples, "\n", "cv_threads:", threads, "\n", "cv_fold:", cv_fold, "\n", "final score format: ", output_key.keys()) all_scores = [] for descriptor, dsetting in descriptors.items(): descriptor_scores = [] all_scores.append(descriptor_scores) if descriptor is not None: dsetting = copy.deepcopy(dsetting) if 'nuclear_charges' in dsetting\ and dsetting['nuclear_charges']: dsetting['nuclear_charges'] = data['Z'] matrix_list = descriptor(data['xyz'], **dsetting) else: matrix_list = data['X'] for kernel in kernels: kernel_scores = [] descriptor_scores.append(kernel_scores) for alpha in alphas: alpha_scores = [] kernel_scores.append(alpha_scores) for gamma in gammas: gamma_scores = [] alpha_scores.append(gamma_scores) kernel_ridge = KernelRidge(alpha=alpha, gamma=gamma, kernel=kernel) for n_sample in n_samples: if report: qtk.report("ML.tools.krrScores, processing", "\n", " descriptor =", descriptor, "\n", " descriptor_setting =", dsetting, "\n", " kernel =", kernel, "\n", " alpha =", alpha, "\n", " gamma =", gamma, "\n", " n_sample = ", n_sample) cv_ = [(train[:n_sample], test) for train, test in cv] scores = cross_val_score(kernel_ridge, matrix_list.reshape( len(matrix_list), -1), E, cv=cv_, n_jobs=threads, scoring='mean_absolute_error') gamma_scores.append(scores) if report: qtk.report( "", "best score:", np.min(np.abs(scores)), "\n", ) if report: qtk.report("", "final format:", output_key.keys()) if return_key: return np.squeeze(-np.array(all_scores)), output_key else: return np.squeeze(-np.array(all_scores))
def AlGaX_EvOpt(structure, vacancy_ind, ccs_span, **kwargs): qm_setting = {} if 'qm_setting' in kwargs: qm_setting = kwargs['qm_setting'] qm_setting['save_restart'] = True if 'QMInp' in kwargs: baseinp = kwargs['QMInp'] else: baseinp = qtk.QMInp(structure, program='cpmd') if 'T' in kwargs: _T = kwargs['T'] else: _T = 1 if 'target' in kwargs: _target = kwargs['target'] else: _target = 0 if 'log_file' in kwargs: logfile = kwargs['log_file'] else: logfile = 'AlGaX_EvOpt.log' if 'threads' in kwargs: _threads = kwargs['threads'] else: _threads = qtk.cpu_count if 'threads_per_job' in kwargs: _threadspj = kwargs['threads_per_job'] else: _threadspj = _threads _parallel = int(_threads / _threadspj) if 'optimizer' in kwargs: _optimizer = kwargs['optimizer'] if _optimizer == 'GA': if 'population_size' in kwargs: _population_size = kwargs['population_size'] else: _population_size = qtk.setting.cpu_count else: _optimizer = 'MC' ccs = qtk.CCS(structure, ccs_span) inpp = qtk.QMInp(structure, **qm_setting) inpp.setting['info'] = 'Ev_per_ref' if not os.path.exists('pref/pref.out'): inpp.run('pref') inpv = qtk.QMInp(structure, **qm_setting) inpv.removeAtoms(vacancy_ind) inpv.setChargeMultiplicity(0, 2) inpv.setting['info'] = 'Ev_vac_ref' if not os.path.exists('vref/vref.out'): inpv.run('vref') inpa = qtk.QMInp(structure, **qm_setting) inpa.isolateAtoms(vacancy_ind) inpa.setChargeMultiplicity(0, 2) inpa.setting['info'] = 'freeAtom' if not os.path.exists('freeAtom/freeAtom.out'): inpa.run('freeAtom') freeAtomOut = qtk.QMOut('freeAtom/freeAtom.out') tmp, init_ccs_coord = ccs.random() qm_setting['threads'] = _threadspj penalty_setting = { 'QMInp': baseinp, 'freeAtomE': freeAtomOut.Et, 'qm_setting': qm_setting, } if 'alchemy' in kwargs and kwargs['alchemy']: penalty_setting['pref'] = 'pref' penalty_setting['vref'] = 'vref' input_list = [ccs, vacancy_ind, penalty_setting] def genCCSInp(): _coord = ccs.random()[1] return _coord op_setting = { 'power': 1, 'log_file': logfile, 'target': _target, 'parallel': _parallel, 'T': _T, } qtk.report('start optimizer') if _optimizer == 'MC': cylopt = qop.MonteCarlo(Ev_ccs, input_list, genCCSInp, **op_setting) elif _optimizer == 'GA': cylopt = qop.GeneticOptimizer(Ev_ccs, input_list, genCCSInp, ccs.mate, _population_size, **op_setting) qtk.report('optimizer initialized') cylopt.run()
def qmRunJob(inp, name): qtk.report("qmRunJob", "runing qmjob:'%s'" % inp, 'with name:', name) return inp.run(name)
def training(self, size, **kwargs): """ set up training set, test set, and calculate alphas """ # 'lambda' is a python keyword... if 'deviation' in kwargs: self._lambda = kwargs['deviation'] else: self._lambda = 0 qtk.report("Deviation parameter", self._lambda) template = copy.deepcopy(self.data) index = range(self.data_size) rd.shuffle(index) max_index = size i = 0 self.training_set = [] self.training_index = [] reference_vector = [] ref_coord = [] # keep the flexibility for unset reference datapoint while i < max_index: if i == self.data_size: qtk.exit("reference not set") data_point = template[index[i]] if not np.isnan(data_point.ref): self.training_set.append(data_point) self.training_index.append(index[i]) reference_vector.append(data_point.getRef()) ref_coord.append(data_point.getVector()) else: max_index += 1 i += 1 self.refVector = np.array(reference_vector) self.refCoord = np.array(ref_coord) self.rest_set = [ data for i, data in enumerate(template) if i not in self.training_index ] def reset_alpha(i): self.rest_set[i].alpha = np.nan vreset_alpha = np.vectorize(reset_alpha) vreset_alpha(range(len(self.rest_set))) rows, columns = self.refCoord.shape qtk.progress("Kernel", "generating",\ "%dx%d" % (size, size), "kernel matrix...") # exteral C module self.kernelMatrix = km.kernel_matrix(self.refCoord, rows, columns, self.kernel, self.klargs) qtk.done() if 'eigen' in kwargs and kwargs['eigen']: qtk.progress("Kernel", "proceed diagonalization...") self.kernelEW, self.kernelEV =\ np.linalg.eigh(self.kernelMatrix) qtk.done() qtk.progress("Kernel", "inverting...") self.alphas = np.dot(np.linalg.inv(self.kernelMatrix\ + self._lambda*np.eye(size))\ ,self.refVector) qtk.done() def set_alpha(i): self.training_set[i].alpha = self.alphas[i] vset_alpha = np.vectorize(set_alpha) vset_alpha(range(len(self.training_set)))
def setKernel(self, kernelName, *klargs): self.kernel = kernelName self.klargs = klargs qtk.report("Kernel", self.kernel ,list(self.klargs))
def __init__(self, xyz_file, parameter_file, **kwargs): self.structure = qtk.toMolecule(xyz_file) # mutation related variables self.mutation_list = [] self.mutation_target = [] self.mutation_size = 0 # stretching related variables self.stretching_list = [] self.stretching_direction = [] self.stretching_range = [] # rotation related variables self.rotation_list = [] self.rotation_center = [] self.rotation_axis = [] self.rotation_range = [] # replacing realted variables # not yet implemented # constraint variables # self.constraint = False self.forbiden_bonds = [] self.ztotal = 0 self.vtotal = 0 self.element_count = {} # setup all parameters self.read_param(parameter_file) self.coor = flatten([ ['m' for _ in flatten(self.mutation_list)], ['s' for _ in flatten(self.stretching_list)], ['r' for _ in flatten(self.rotation_list)], ]) MList = self.mutation_list _flatten = [item for sublist in MList for item in sublist] vlen = np.vectorize(len) try: lenList = vlen(MList) except TypeError: lenList = [len(MList[0]) for i in range(len(MList))] if not qtk.setting.quiet: report_itr = False if self.mutation_list: report_itr += True qtk.report('', "===== CCS REPORT =====", color=None) qtk.report("generating molecule", xyz_file) qtk.report("ccs parameter file", parameter_file) qtk.report("mutation indices", self.mutation_list) qtk.report("target atomic numbers", self.mutation_target) qtk.report("length of mutation vector", len(_flatten), "<=>", lenList) #print "" if self.stretching_list: qtk.report("stretching indices", self.stretching_list) qtk.report("stretching range", self.stretching_range) qtk.report("stretching direction indices", self.stretching_direction) #print "" if self.rotation_list: qtk.report("rotation indices", self.rotation_list) qtk.report("rotation center", self.rotation_center) qtk.report("rotation axis", self.rotation_axis) qtk.report("rotation range", self.rotation_range) #print "" qtk.status("ccs coordinate", self.coor) qtk.report('', "========= END ========", color=None)
def setKernel(self, kernelName, *klargs): self.kernel = kernelName self.klargs = klargs qtk.report("Kernel", self.kernel, list(self.klargs))
def PPCheck(xc, element, pp_file_str, **kwargs): pp_file = None pp_content = None if xc == 'lda': xc = 'pade' elif xc == 'pbe0': xc = 'pbe' ne = qtk.n2ve(element) try: if 'dcacp' in kwargs and kwargs['dcacp']\ and element in qtk.setting.dcscp_list: pp_path = os.path.join(xc.upper(), "%s_DCACP_%s" %\ (element, xc.upper())) if element in qtk.setting.dcacp_dict: pp_path = pp_path + "_%s" % qtk.setting.dcacp_dict[element] #pp_file = os.path.join(qtk.setting.cpmd_dcacp_url, pp_path) else: pp_path = os.path.join(xc, element + '-q' + str(qtk.n2ve(element))) pp_file = os.path.join(qtk.setting.cpmd_pp_url, pp_path) saved_pp_path = os.path.join(qtk.setting.cpmd_pp, pp_file_str) if not os.path.exists(saved_pp_path) and qtk.setting.download_pp: new_pp = os.path.join(qtk.setting.cpmd_pp, pp_file_str) if 'dcacp' in kwargs and kwargs['dcacp']\ and element in qtk.setting.dcscp_list: root_list = filter(None, qtk.setting.cpmd_dcacp_url.split('/')) root = '//'.join(root_list[:2]) url = qtk.setting.cpmd_dcacp_url html = ''.join(urllib2.urlopen(url).readlines()) pp_links = BeautifulSoup(html).body.findAll( 'a', attrs={'class': 'table'} ) if kwargs['pp_type'].title() == 'Goedecker': pp_flag = r'/SG/' elif kwargs['pp_type'].upper() == 'MT': pp_flag = r'/MT/' pp_path = filter(lambda x: xc.upper() in x and pp_flag in x, [l['href'] for l in pp_links if l.text == element.title()]) pp_content = urllib2.urlopen(root + pp_path[0]).readlines() elif pp_file: pp_content = urllib2.urlopen(pp_file).readlines() pattern = re.compile(r'^.*</*pre>.*$') pp_se = filter(pattern.match, pp_content) pp_start = pp_content.index(pp_se[0]) pp_end = pp_content.index(pp_se[1]) pp_content = pp_content[pp_start:pp_end] pp_content[0] = pp_content[0].split('>')[-1] if pp_content: for i in range(len(pp_content)): pp_str = pp_content[i] pp_content[i] = pp_str.replace('&', '&') qtk.report('PPCheck', 'pp file %s not found in %s, ' \ % (pp_file_str, qtk.setting.cpmd_pp) + \ 'download now...') new_pp_file = open(new_pp, 'w') new_pp_file.write(''.join(pp_content)) new_pp_file.close() pp_file = new_pp return saved_pp_path except Exception as e: qtk.warning('something wrong with pseudopotential with error: '+\ str(e))
def predict(self, size, **kwargs): template = copy.deepcopy(self.rest_set) index = range(len(self.rest_set)) rd.shuffle(index) test_list = index[:size] i = 0 self.test_set = [self.rest_set[i] for i in test_list] self.kernelVectors = np.atleast_2d([]) self.testCoord = np.array([data.getVector() for data in self.test_set]) rrows, rcolumns = self.refCoord.shape trows, tcolumns = self.testCoord.shape # exteral C module qtk.progress("Predition", "generating",\ "%dx%d" % (trows, rrows),\ "kernel projection..." ) self.kernelVectors = kv.kernel_vectors(self.refCoord, rrows, rcolumns, self.testCoord, trows, tcolumns, self.kernel, self.klargs) qtk.done() test_true = [] test_pred = [] for i in range(len(self.test_set)): prediction = np.dot(self.kernelVectors[i], self.alphas) self.test_set[i].prediction = prediction test_true.append(self.test_set[i].ref) test_pred.append(prediction) # analize and status report self.testTrue = np.array(test_true) self.testPred = np.array(test_pred) self.error = abs(self.testPred - self.testTrue) self.MAE = sum(self.error) / len(self.error) self.RMSE = np.sqrt(sum(self.error**2) / len(self.testTrue)) max_index = list(self.error).index(max(self.error)) min_index = list(self.error).index(min(self.error)) max_name = self.test_set[max_index].getName() min_name = self.test_set[min_index].getName() qtk.report("predicted MAE", self.MAE) qtk.report("predicted RMSE", self.RMSE) qtk.report("Maximum error", max(self.error), max_name) qtk.report("Minimum error", min(self.error), min_name) def error_estimate(ker, vec): tmp = np.vstack([ker, vec]) K = np.vstack([tmp.T, np.append(vec, 1)]).T old = np.linalg.det(ker) new = np.linalg.det(K) #kvec = np.dot(ker, vec.T) #nvec = np.linalg.norm(vec) #nkvec = np.linalg.norm(kvec) # angle #return np.arccos(np.dot(kvec,vec)/nvec/nkvec) # length return new / old #ee = error_estimate(self.kernelMatrix, self.kernelVectors[0]) def error_i(i): return error_estimate(self.kernelMatrix, self.kernelVectors[i]) verror = np.vectorize(error_i) self.errorEstimate = verror(range(trows))
def genRefInp(x, y, n_pair, **kwargs): """ generate cpmd input files for graphene reference calculation x: number of graphene 4-atom unit cells in x y: number of graphene 4-atom unit cells in y n_pair: number of BN pairs in the system kwargs: max_sample """ if 'path' in kwargs: qtk.report("creating folder", kwargs['path']) os.makedirs(kwargs['path']) os.chdir(kwargs['path']) print "%screating inp folder...%s"\ % (qtk.bcolors.OKGREEN, qtk.bcolors.ENDC) if not os.path.exists('inp'): os.makedirs('inp') else: print "%sinp folder exist, baking up to back_inp...%s"\ % (qtk.bcolors.WARNING, qtk.bcolors.ENDC) try: shutil.rmtree('back_inp') except: pass os.rename('inp', 'back_inp') os.makedirs('inp') N = 4 * x * y if 'max_sample' in kwargs: max_sample = kwargs['max_sample'] else: total = math.factorial(N - 1) denum1 = math.factorial(n_pair)**2 denum2 = math.factorial(N - 1 - 2 * n_pair) max_sample = total / (denum1 * denum2) digit = len(str(max_sample)) name_xyz = "gph%d-%d.xyz" % (x, y) name_ccs = "ccs%d-%d.txt" % (x, y) namev_xyz = "gph%d-%dv.xyz" % (x, y) namev_ccs = "ccs%d-%dv.txt" % (x, y) header = "gph%d%d_" % (x, y) ccs_file = open(name_ccs, "w") ccsv_file = open(namev_ccs, "w") print >> ccs_file, "mutation_list:\n"+\ " %d:%d -> 5:7\n" % (2, N) + \ "end" print >> ccsv_file, "mutation_list:\n"+\ " %d:%d -> 5:7\n" % (1, N-1) + \ "end" ccs_file.close() ccsv_file.close() print "%sthe following files are written: %s"\ % (qtk.bcolors.OKGREEN, qtk.bcolors.ENDC) print " %s\n %s\n %s\n %s\n" % (name_xyz, namev_xyz,\ name_ccs, namev_ccs) graphene = generate(x, y) graphene.write_xyz(name_xyz) graphenev = graphene.remove_atom(1) graphene.write_xyz(namev_xyz) space = qtk.CCS(name_xyz, name_ccs) space_v = qtk.CCS(namev_xyz, namev_ccs) flat = [item for sublist in space.mutation_list \ for item in sublist] # geometry setup mol_base = graphene dx = mol_base.R[1, 0] mol_base.sort_coord() dy = mol_base.R[1, 1] center = mol_base.R[0] + [0, 0, -10] mol_base.center(center) x_max = mol_base.R[-1, 0] y_max = mol_base.R[-1, 1] celldm = [round(x_max + dx, 4), round(y_max + dy, 4), 20, 0, 0, 0] # input setup gph = qtk.QMInp(name_xyz, 'cpmd', info='gph') gph.setCelldm(celldm) gph.periodic() gph.setSCFStep(500) gphv = qtk.QMInp(namev_xyz, 'cpmd', info='gphv') gphv.setCelldm(celldm) gphv.periodic() gphv.setSCFStep(500) name = "inp/%s%s.inp" % (header, str(0).zfill(digit)) namev = "inp/%s%sv.inp" % (header, str(0).zfill(digit)) gph.write(name) gphv.write(namev) c_base = [6 for i in range(len(flat))] itr = 1 for n_bn in range(1, n_pair + 1): for n_comb in it.combinations(range(len(flat)), n_bn): n_list = list(n_comb) rest = [index for index in range(len(flat)) \ if index not in list(n_comb)] for b_comb in it.combinations(rest, n_bn): name = "inp/%s%s.inp" % (header, str(itr).zfill(digit)) namev = "inp/%s%sv.inp" % (header, str(itr).zfill(digit)) b_list = list(b_comb) c_list = [index for index in rest\ if index not in list(n_comb)] atom_list = copy.deepcopy(c_base) for i in b_list: atom_list[i] = 5 for i in n_list: atom_list[i] = 7 valid = True mutate_mol = space.generate(mutation=[atom_list]) mutate_molv = space_v.generate(mutation=[atom_list]) mutate_mol.find_bonds() if not all (key in mutate_mol.bond_types \ for key in ('B-B', 'N-N')): gph.setCenter([0, 0, -celldm[2] / 2]) gph.setStructure(mutate_mol) gph.write(name) gphv.setCenter([0, 0, -celldm[2] / 2]) gphv.setStructure(mutate_molv) gphv.write(namev) itr += 1 if itr > max_sample: msg = "%sDONE! %d generated%s"\ % (qtk.bcolors.OKGREEN, itr-1, qtk.bcolors.ENDC) sys.exit(msg) print "%sDONE! %d generated%s"\ % (qtk.bcolors.OKGREEN, itr-1, qtk.bcolors.ENDC) itr = 1
def write(self, name=None, **kwargs): self.setting.update(kwargs) self.setting['root_dir'] = name self.setting['no_molecule'] = False if name: self.setting['output'] = True name = os.path.splitext(name)[0] else: self.setting['output'] = False incar, molecule = \ super(PlanewaveInput, self).write('INCAR', **self.setting) self.setting['no_molecule'] = True kpoints = \ super(PlanewaveInput, self).write('KPOINTS', **self.setting) poscar = \ super(PlanewaveInput, self).write('POSCAR', **self.setting) potcar = \ super(PlanewaveInput, self).write('POTCAR', **self.setting) # !!!!!!!!!!! TODO !!!!!!!!!!! # Center molecule # charge multiplicity # optimizer # # write CPMD to modulize structure manipulation? PPPath = [] n_list = [] R_list = [] def catPOTCAR(path): if os.path.exists(path): PPPath.append(path) else: qtk.exit("PP file: " + path + " not found") def getNlist(atom_number): n_list.append(atom_number) def getRlist(coord): R_list.append(coord) # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # construct to POSCAR and POTCAR data molecule.sort() type_index = molecule.index type_list = molecule.type_list Z = molecule.Z self.pp_path = None if 'pp_path' not in self.setting: if 'pp_theory' not in self.setting: theory_dict = { 'pbe': 'pbe', 'pbe0': 'pbe', 'hse06': 'pbe', 'hse03': 'pbe', 'lda': 'lda', } if self.setting['theory'] not in theory_dict: qtk.warning("%s is not supported, change theory to LDA" \ % (self.setting['theory'])) self.setting['theory'] = 'lda' theory = theory_dict[self.setting['theory']] if theory.lower() not in ['pbe', 'lda']: qtk.warning('xc: %s is not supported, using LDA PP' % \ theory.upper()) theory = 'LDA' self.pp_path = qtk.setting.vasp_pp + '_%s_%s' % \ (theory.upper(), self.setting['pp_type'].upper()) else: self.pp_path = qtk.setting.vasp_pp + '_%s_%s' % \ (self.setting['pp_theory'].upper(), self.setting['pp_type'].upper()) else: self.pp_path = self.setting['pp_path'] if hasattr(molecule, 'R_scale') and molecule.scale: R_mode = 'scaled' mol_R = molecule.R_scale.copy() for i in range(3): s = molecule.scale[i] mol_R[:, i] = mol_R[:, i] / s else: R_mode = 'cartesian' mol_R = molecule.R.copy() for atom_type in xrange(0, len(type_index) - 1): type_n = type_index[atom_type + 1] - type_index[atom_type] # check special PP folder # not yet implemented # default PP path type_name = type_list[type_index[atom_type]] AtomPP = os.path.join(self.pp_path, type_name, 'POTCAR') catPOTCAR(AtomPP) getNlist(type_n) for I in\ xrange(type_index[atom_type],type_index[atom_type+1]): getRlist(mol_R[I][:]) # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # write to INCAR and generate POTCAR qtk.report("vasp.inp", "writing", "INCAR") incar.write("SYSTEM = %s\n" % self.setting['info']) incar.write("ISMEAR = 0\n") incar.write("IBRION = 2\n") if 'restart' in self.setting and self.setting['restart']: incar.write("ISTART = 1\n") if 'cutoff' in self.setting: cutoff = self.setting['cutoff'] incar.write("ENCUT = %.2f" % (cutoff * 13.605698066)) incar.write(" # in eV, that is %.1f Ry\n" % cutoff) if 'scf_step' in self.setting: incar.write('NELM = %d\n' % self.setting['scf_step']) if 'vdw' in self.setting: vdw = self.setting['vdw'].lower() if vdw != 'none': if vdw == 'd2': incar.write("IVDW = 10\n") elif vdw == 'd3': incar.write("IVDW = 11\n") elif vdw == 'd3-bj': incar.write("IVDW = 12\n") elif vdw == 'mbd': incar.write("IVDW = 202\n") elif vdw == 'mbd_iter': incar.write("IVDW = 212\n") else: qtk.exit("VDW '%s' is not supported for VASP" % vdw) if 'ks_states' in self.setting: vs = int(round(self.molecule.getValenceElectrons() / 2.0)) nbnd = vs + self.setting['ks_states'] incar.write("NBANDS = %d\n" % nbnd) if 'full_kmesh' in self.setting and self.setting['full_kmesh']: incar.write("ISYM = -1\n") if self.setting['theory'] == 'pbe0': incar.write("LHFCALC = .TRUE.\n") incar.write("GGA = PE\n") elif self.setting['theory'] == 'hse06': incar.write("GGA = PE\n") incar.write("\n##HSE setting\n") incar.write("LHFCALC = .TRUE.\n") incar.write("HFSCREEN = 0.2 \n") incar.write("ALGO = D\n") if molecule.charge != 0: nve = molecule.getValenceElectrons() incar.write("NELECT = %d\n" % (nve)) if 'save_density'not in self.setting\ or not self.setting['save_density']: incar.write("LCHARG = .FALSE.\n") if 'scalapack' not in self.setting: incar.write("LSCALAPACK = .FALSE.\n") elif not self.setting['scalapack']: incar.write("LSCALAPACK = .FALSE.\n") if 'vasp_additional_setting' in self.setting: for added in self.setting['vasp_additional_setting']: incar.write(added + '\n') incar.close() # !!!!!!!!!!!!!!!! # write to KPOINTS qtk.report("vasp.inp", "writing", "KPOINTS") if 'kmesh' not in self.setting and 'band_scan' not in self.setting: kpoints.write("Gamma-point only\n") kpoints.write(" 1 ! one k-point\n") kpoints.write("rec ! in units of reciprocal vector\n") kpoints.write(" 0 0 0 1 ! coordinates and weight\n") elif 'kmesh' in self.setting and 'band_scan' not in self.setting: k1, k2, k3 = self.setting['kmesh'] kpoints.write("Automatic mesh\n") kpoints.write(" 0 ! number of k-points = 0") kpoints.write(" ->automatic generation scheme\n") kpoints.write("Gamma ! generate a Gamma centered grid\n") kpoints.write(" %d %d %d ! number of k grids\n" % (k1, k2, k3)) elif 'kmesh' in self.setting and 'band_scan' in self.setting: bnds = self.setting['band_scan'] if len(bnds[0]) != len(bnds[1]) - 1: qtk.exit('band_scan format: [lst_div, lst_coord]') lst_div, lst_coord = self.setting['band_scan'] kpt_weight_list = self.MPMesh(self.setting['kmesh'], return_count=True) kpt_scan_list = self.bandScanMesh(lst_div, lst_coord) kpts = np.vstack([kpt_weight_list, kpt_scan_list]) kpoints.write("Explicit k-points list\n") kpoints.write(" %d\n" % len(kpts)) kpoints.write("Reciprocal lattice\n") for kpt in kpts: for k_ind in range(3): kpoints.write(" % 7.4f" % kpt[k_ind]) kpoints.write("%8.4f\n" % kpt[3]) kpoints.close(no_cleanup=True) # !!!!!!!!!!!!!!! # write to POSCAR qtk.report("vasp.inp", "writing", "POSCAR") poscar.write(self.setting['info'] + '\n') poscar.write("1.0\n") self.celldm2lattice() for i in range(3): for j in range(3): poscar.write(" %7.4f" % self.setting['lattice'][i, j]) poscar.write(" ! lattic vector a(%d)\n" % i) for n in n_list: poscar.write(str(n) + ' ') poscar.write("! number of atoms in order of POTCAR\n") if R_mode == 'cartesian': poscar.write("cart ! cartesian coordinates\n") else: poscar.write("direct ! fractional coordinates\n") for R in R_list: for X in R: poscar.write(" %7.4f" % X) poscar.write("\n") poscar.close(no_cleanup=True) # !!!!!!!!!!!!!!! # write to POTCAR qtk.report("vasp.inp", "writing", "POTCAR") for PP_file in PPPath: qtk.report("vasp.inp.POTCAR", PP_file) if name: with open(PP_file) as PP: for line in PP: potcar.write(str(line)) else: potcar.write("cat %s\n" % PP_file) potcar.close(no_cleanup=True) return incar
def AlGaX_EvOpt(structure, vacancy_ind, ccs_span, **kwargs): qm_setting = {} if 'qm_setting' in kwargs: qm_setting = kwargs['qm_setting'] qm_setting['save_restart'] = True if 'QMInp' in kwargs: baseinp = kwargs['QMInp'] else: baseinp = qtk.QMInp(structure, program='cpmd') if 'T' in kwargs: _T = kwargs['T'] else: _T = 1 if 'target' in kwargs: _target = kwargs['target'] else: _target = 0 if 'log_file' in kwargs: logfile = kwargs['log_file'] else: logfile = 'AlGaX_EvOpt.log' if 'threads' in kwargs: _threads = kwargs['threads'] else: _threads = qtk.cpu_count if 'threads_per_job' in kwargs: _threadspj = kwargs['threads_per_job'] else: _threadspj = _threads _parallel = int(_threads/_threadspj) if 'optimizer' in kwargs: _optimizer = kwargs['optimizer'] if _optimizer == 'GA': if 'population_size' in kwargs: _population_size = kwargs['population_size'] else: _population_size = qtk.setting.cpu_count else: _optimizer = 'MC' ccs = qtk.CCS(structure, ccs_span) inpp = qtk.QMInp(structure, **qm_setting) inpp.setting['info'] = 'Ev_per_ref' if not os.path.exists('pref/pref.out'): inpp.run('pref') inpv = qtk.QMInp(structure, **qm_setting) inpv.removeAtoms(vacancy_ind) inpv.setChargeMultiplicity(0, 2) inpv.setting['info'] = 'Ev_vac_ref' if not os.path.exists('vref/vref.out'): inpv.run('vref') inpa = qtk.QMInp(structure, **qm_setting) inpa.isolateAtoms(vacancy_ind) inpa.setChargeMultiplicity(0, 2) inpa.setting['info'] = 'freeAtom' if not os.path.exists('freeAtom/freeAtom.out'): inpa.run('freeAtom') freeAtomOut = qtk.QMOut('freeAtom/freeAtom.out') tmp , init_ccs_coord = ccs.random() qm_setting['threads'] = _threadspj penalty_setting = { 'QMInp':baseinp, 'freeAtomE':freeAtomOut.Et, 'qm_setting': qm_setting, } if 'alchemy' in kwargs and kwargs['alchemy']: penalty_setting['pref'] = 'pref' penalty_setting['vref'] = 'vref' input_list = [ccs, vacancy_ind, penalty_setting] def genCCSInp(): _coord = ccs.random()[1] return _coord op_setting = { 'power': 1, 'log_file': logfile, 'target': _target, 'parallel': _parallel, 'T': _T, } qtk.report('start optimizer') if _optimizer == 'MC': cylopt = qop.MonteCarlo(Ev_ccs, input_list, genCCSInp, **op_setting) elif _optimizer == 'GA': cylopt = qop.GeneticOptimizer(Ev_ccs, input_list, genCCSInp, ccs.mate, _population_size, **op_setting) qtk.report('optimizer initialized') cylopt.run()
def PPString(inp, mol, i, n, outFile): """ append PP file names to inp.pp_files """ alchemy = re.compile('^\w*2\w*_\d\d\d$') ppstr = re.sub('\*', '', mol.string[i]) if ppstr: PPStr = ppstr pp_root, pp_ext = os.path.split(ppstr) else: if inp.setting['pp_type'] == 'geodecker': element = mol.type_list[i].title() if 'd_shell' in inp.setting: if type(inp.setting['d_shell']) is not list: inp.setting['d_shell'] = [inp.setting['d_shell']] if qtk.n2ve(mol.type_list[i].title()) > 10: shell = '-d' elif 'd_shell' in inp.setting \ and element in inp.setting['d_shell']: shell = '-d' else: element = qtk.element[mol.type_list[i].title()] if element.group < 3 and mol.Z[i] > 1: if mol.Z[i] != 3: shell = '-sp' else: shell = '-s' else: shell = '' pp_xc_dict = { 'lda': 'pz', 'pbe0': 'pbe', 'b3lyp': 'blyp', } pp_xc = inp.setting['pp_theory'].lower() if pp_xc in pp_xc_dict: pp_xc = pp_xc_dict[pp_xc] PPStr = ''.join([c for c in mol.type_list[i] if not c.isdigit()])\ + '.' + pp_xc + shell + '-hgh.UPF' elif inp.setting['pp_type'] == 'cpmd': PPStr = PPName(inp, mol, i, n) xc = inp.setting['pp_theory'].lower() if not mol.string[i]: if inp.setting['pp_type'] == 'geodecker': PPCheck(pp_xc, mol.type_list[i].title(), PPStr) elif inp.setting['pp_type'] == 'cpmd': saved_pp = PPCheck_cpmd(pp_xc, mol.type_list[i].title(), PPStr) new_pp1 = saved_pp + '.UPF' conv_pp = sp.Popen("%s %s" % \ (qtk.setting.espresso_cpmd2upf_exe, saved_pp), shell=True) conv_pp.wait() new_pp1_file = os.path.split(new_pp1)[1] new_pp1_trg = os.path.join(qtk.setting.espresso_pp, new_pp1_file) if not os.path.exists(new_pp1_trg): shutil.copy(new_pp1, qtk.setting.espresso_pp) PPStr = PPStr + '.UPF' elif alchemy.match(mol.string[i]): cpmd_pp = alchemyPP(xc, PPStr) new_pp1 = cpmd_pp + '.UPF' if not os.path.exists(new_pp1): qtk.report('espresso', "rewrite Goedecker's PP to UPF") conv_pp = sp.Popen("%s %s" % \ (qtk.setting.espresso_cpmd2upf_exe, cpmd_pp), shell=True) conv_pp.wait() if conv_pp.returncode != 0: # dirty fix for espresso alchemy conversion routine qtk.warning('conversion failed..., trying path end points') root, _ = os.path.splitext(PPStr) element_str = re.sub('_.*', '', root) element1 = re.sub('2.*', '', element_str) element2 = re.sub('.*2', '', element_str) fraction = float(re.sub('.*_', '', root)) / 100 if fraction == 0.0: strpp = element1 + "_q" + str(qtk.n2ve(element1)) +\ "_" + xc + '.psp' elif fraction == 1.0: strpp = element2 + "_q" + str(qtk.n2ve(element2)) +\ "_" + xc + '.psp' else: qtk.exit("PP conversion failed for intermediate lambda") strpp = os.path.join(qtk.setting.cpmd_pp, strpp) conv_pp = sp.Popen("%s %s" % \ (qtk.setting.espresso_cpmd2upf_exe, strpp), shell=True) conv_pp.wait() os.rename(strpp + '.UPF', new_pp1) new_pp1_file = os.path.split(new_pp1)[1] new_pp1_trg = os.path.join(qtk.setting.espresso_pp, new_pp1_file) if not os.path.exists(new_pp1_trg): shutil.copy(new_pp1, qtk.setting.espresso_pp) PPStr = PPStr + '.UPF' return PPStr
def training(self, size, **kwargs): """ set up training set, test set, and calculate alphas """ # 'lambda' is a python keyword... if 'deviation' in kwargs: self._lambda = kwargs['deviation'] else: self._lambda = 0 qtk.report("Deviation parameter", self._lambda) template = copy.deepcopy(self.data) index = range(self.data_size) rd.shuffle(index) max_index = size i = 0 self.training_set = [] self.training_index = [] reference_vector = [] ref_coord = [] # keep the flexibility for unset reference datapoint while i<max_index: if i==self.data_size: qtk.exit("reference not set") data_point = template[index[i]] if not np.isnan(data_point.ref): self.training_set.append(data_point) self.training_index.append(index[i]) reference_vector.append(data_point.getRef()) ref_coord.append(data_point.getVector()) else: max_index += 1 i += 1 self.refVector = np.array(reference_vector) self.refCoord = np.array(ref_coord) self.rest_set = [data for i, data in enumerate(template) if i not in self.training_index] def reset_alpha(i): self.rest_set[i].alpha = np.nan vreset_alpha = np.vectorize(reset_alpha) vreset_alpha(range(len(self.rest_set))) rows, columns = self.refCoord.shape qtk.progress("Kernel", "generating",\ "%dx%d" % (size, size), "kernel matrix...") # exteral C module self.kernelMatrix = km.kernel_matrix(self.refCoord, rows, columns, self.kernel,self.klargs) qtk.done() if 'eigen' in kwargs and kwargs['eigen']: qtk.progress("Kernel", "proceed diagonalization...") self.kernelEW, self.kernelEV =\ np.linalg.eigh(self.kernelMatrix) qtk.done() qtk.progress("Kernel", "inverting...") self.alphas = np.dot(np.linalg.inv(self.kernelMatrix\ + self._lambda*np.eye(size))\ ,self.refVector) qtk.done() def set_alpha(i): self.training_set[i].alpha = self.alphas[i] vset_alpha = np.vectorize(set_alpha) vset_alpha(range(len(self.training_set)))
def predict(self, size, **kwargs): template = copy.deepcopy(self.rest_set) index = range(len(self.rest_set)) rd.shuffle(index) test_list = index[:size] i = 0 self.test_set = [self.rest_set[i] for i in test_list] self.kernelVectors = np.atleast_2d([]) self.testCoord = np.array( [data.getVector() for data in self.test_set]) rrows, rcolumns = self.refCoord.shape trows, tcolumns = self.testCoord.shape # exteral C module qtk.progress("Predition", "generating",\ "%dx%d" % (trows, rrows),\ "kernel projection..." ) self.kernelVectors = kv.kernel_vectors(self.refCoord, rrows, rcolumns, self.testCoord, trows, tcolumns, self.kernel,self.klargs) qtk.done() test_true = [] test_pred = [] for i in range(len(self.test_set)): prediction = np.dot(self.kernelVectors[i], self.alphas) self.test_set[i].prediction = prediction test_true.append(self.test_set[i].ref) test_pred.append(prediction) # analize and status report self.testTrue = np.array(test_true) self.testPred = np.array(test_pred) self.error = abs(self.testPred - self.testTrue) self.MAE = sum(self.error)/len(self.error) self.RMSE = np.sqrt(sum(self.error**2)/len(self.testTrue)) max_index = list(self.error).index(max(self.error)) min_index = list(self.error).index(min(self.error)) max_name = self.test_set[max_index].getName() min_name = self.test_set[min_index].getName() qtk.report("predicted MAE", self.MAE) qtk.report("predicted RMSE", self.RMSE) qtk.report("Maximum error", max(self.error), max_name) qtk.report("Minimum error", min(self.error), min_name) def error_estimate(ker, vec): tmp = np.vstack([ker, vec]) K = np.vstack([tmp.T, np.append(vec,1)]).T old = np.linalg.det(ker) new = np.linalg.det(K) #kvec = np.dot(ker, vec.T) #nvec = np.linalg.norm(vec) #nkvec = np.linalg.norm(kvec) # angle #return np.arccos(np.dot(kvec,vec)/nvec/nkvec) # length return new/old #ee = error_estimate(self.kernelMatrix, self.kernelVectors[0]) def error_i(i): return error_estimate(self.kernelMatrix, self.kernelVectors[i]) verror = np.vectorize(error_i) self.errorEstimate = verror(range(trows))
def krrScore(data, n_samples = None, kernels = ['laplacian'], cv = None, threads = 1, alphas = [1e-11], gammas = [1e-5], descriptors = OrderedDict({ coulomb_matrices: {'nuclear_charges': True} }), return_key = False, report = False, ): """ return scores in the format of input parameter structure """ E = data['E'] if n_samples is None: n_samples = [ int(len(E) / 10.), int(len(E) / 5.), int(len(E) / 2.), ] def listWrap(param): if '__getitem__' not in dir(param): param = [param] return param #descriptors = listWrap(descriptors) alphas = listWrap(alphas) gammas = listWrap(gammas) n_samples = listWrap(n_samples) #if type(descriptor_settings) is not list: # descriptor_settings = [descriptor_settings] if not isinstance(descriptors, OrderedDict): if descriptors is None: descriptors = OrderedDict({None:None}) elif type(descriptors) is type(coulomb_matrices): descriptors = OrderedDict({descriptors: {}}) elif type(descriptors) is list \ and type(descriptors[0]) is tuple: descriptors = OrderedDict(descriptors) if type(kernels) is not list: kernels = [kernels] if cv is None: cv = ShuffleSplit(len(E), n_iter=5, test_size=.1) try: cv_fold = cv.n_iter except: cv_fold = len(cv) input_key = OrderedDict() input_key['descriptors'] = descriptors input_key['kernels'] = kernels input_key['alphas'] = alphas input_key['gammas'] = gammas input_key['n_samples'] = n_samples input_key['cv_fold'] = cv_fold output_key = OrderedDict() for k, v in input_key.items(): if k == 'cv_fold': if cv_fold > 1: output_key[k] = cv_fold else: if len(v) > 1: output_key[k] = v if report: qtk.report("ML.tools.krrScores setting", "\n", "kernel:", kernels, "\n", "alphas:", alphas, "\n", "gammas:", gammas, "\n", "n_samples:", n_samples, "\n", "cv_threads:", threads, "\n", "cv_fold:", cv_fold, "\n", "final score format: ", output_key.keys()) all_scores = [] for descriptor, dsetting in descriptors.items(): descriptor_scores = [] all_scores.append(descriptor_scores) if descriptor is not None: dsetting = copy.deepcopy(dsetting) if 'nuclear_charges' in dsetting\ and dsetting['nuclear_charges']: dsetting['nuclear_charges'] = data['Z'] matrix_list = descriptor(data['xyz'], **dsetting) else: matrix_list = data['X'] for kernel in kernels: kernel_scores = [] descriptor_scores.append(kernel_scores) for alpha in alphas: alpha_scores = [] kernel_scores.append(alpha_scores) for gamma in gammas: gamma_scores = [] alpha_scores.append(gamma_scores) kernel_ridge = KernelRidge(alpha=alpha, gamma=gamma, kernel=kernel) for n_sample in n_samples: if report: qtk.report( "ML.tools.krrScores, processing", "\n", " descriptor =", descriptor, "\n", " descriptor_setting =", dsetting, "\n", " kernel =", kernel, "\n", " alpha =", alpha, "\n", " gamma =", gamma, "\n", " n_sample = ", n_sample ) cv_ = [(train[:n_sample], test) for train, test in cv] scores = cross_val_score(kernel_ridge, matrix_list.reshape( len(matrix_list), -1 ), E, cv=cv_, n_jobs=threads, scoring='mean_absolute_error') gamma_scores.append(scores) if report: qtk.report( "", "best score:", np.min(np.abs(scores)), "\n", ) if report: qtk.report("", "final format:", output_key.keys()) if return_key: return np.squeeze(-np.array(all_scores)), output_key else: return np.squeeze(-np.array(all_scores))