def get_dat(lags, preds, start_train, end_train, end_test): data = pd.read_csv('link_travel_time_local.csv.gz', compression='gzip', parse_dates=True, index_col=0) ## Sort links by order data, order = sort_links(data, '1416:1417', '7051:2056') ## Make a link order column e.g here the neighbouring links for link 1 are 0 and 2. data['link_order'] = data['link_ref'].astype('category') not_in_list = data['link_order'].cat.categories.difference(order) data['link_order'] = data['link_order'].cat.set_categories(np.hstack( (order, not_in_list)), ordered=True) data['link_order'] = data['link_order'].cat.codes ## Add week of day column [Monday, ..., Sunday] = [0, ..., 6] data['Weekday'] = data.index.weekday_name data = data.sort_values('link_order') print("Number of observations = ", len(data)) print("Number of links = ", len(data['link_ref'].unique())) data_train, data_test = split_df(data, start_train=start_train, end_train=end_train, end_test=end_test) print("\nTraining from", data_train.sort_index().index[0], "to", data_train.sort_index().index[-1]) print("Testing from", data_test.sort_index().index[0], "to", data_test.sort_index().index[-1]) ## Transform train and test set using the mean and std for train set. means_df, scales, low_df, upr_df = fit_scale(data_train, order) ix_train, ts_train, rm_mean_train, rm_scale_train, w_train, lns_train = transform( data_train, means_df, scales, order, freq='15min') ix_test, ts_test, rm_mean_test, rm_scale_test, w_test, lns_test = transform( data_test, means_df, scales, order, freq='15min') ## Create rolling window tensor ## - y_mean and y_std are arrays where columns are each link and ## the rows corresponding to the mean and std of each data point ## at that weekday. ## - y_num_meas indicates how many measurements are in the time window ## for a given link X_train, y_train, y_ix_train, y_mean_train, y_std_train, y_num_meas_train = roll( ix_train, ts_train, rm_mean_train, rm_scale_train, w_train, lags, preds) X_test, y_test, y_ix_test, y_mean_test, y_std_test, y_num_meas_test = roll( ix_test, ts_test, rm_mean_test, rm_scale_test, w_test, lags, preds) return X_train, X_test, y_train, y_test, y_std_train, y_std_test, y_num_meas_train, y_num_meas_test
def get_embedded_solver(self, schmidt_basis, kind="interacting"): """ Prepares an embedded solver for the given Schmidt basis. Args: schmidt_basis (tuple, list): a list of frozen and active orbitals; kind (int): local fragment ID for the non-interacting bath formulation or "interacting" otherwise; Returns: The embedded solver. """ f1, f2, a1, a2 = schmidt_basis n_frozen = f1.shape[1] + f2.shape[1] n_active = a1.shape[1] + a2.shape[1] logger.debug1(self.__mol__, "Active orbitals (domain+embedding): {:d}+{:d}".format( a1.shape[1], a2.shape[1], )) logger.debug1(self.__mol__, "Frozen orbitals (domain+embedding): {:d}+{:d}".format( f1.shape[1], f2.shape[1], )) hcore_ao = self.__mf_solver__.get_hcore() if kind in self.__domains__: if self.umat is not None: hcore_ao = hcore_ao + self.get_umat(exclude=kind) schmidt_projection = self.__orthogonal_basis_inv__.T.dot(numpy.concatenate((a1, a2), axis=1)) logger.debug(self.__mol__, "Transforming active orbitals ...") hcore = transform(hcore_ao, schmidt_projection) n = a1.shape[1] partial_eri = transform(restore(1, self.__mf_solver__._eri, hcore_ao.shape[0]), schmidt_projection[:, :n]) eri = numpy.zeros((n_active,) * 4, dtype=partial_eri.dtype) eri[:n, :n, :n, :n] = partial_eri e_vac = self.__mf_solver__.energy_nuc() elif kind == "interacting": schmidt_projection = self.__orthogonal_basis_inv__.T.dot(numpy.concatenate(schmidt_basis, axis=1)) logger.debug(self.__mol__, "Transforming orbitals ...") hcore = transform(hcore_ao, schmidt_projection) eri = transform(restore(1, self.__mf_solver__._eri, hcore_ao.shape[0]), schmidt_projection) e_vac = self.__mf_solver__.energy_nuc() logger.debug1(self.__mol__, "Freezing external orbitals ...") hcore, eri, e_vac = freeze(hcore, eri, n_frozen, e_vac=e_vac) else: raise ValueError("Unknown kind: {}".format(kind)) return self.__correlated_solver__( hcore, eri, nelectron=n_active, e_vac=e_vac, verbose=self.__nested_verbosity__, )
def parametrize_umat_full(self, parameters): """ Calculates the u-matrix in the full basis. Args: parameters (numpy.ndarray): parameters of the u-matrix; Returns: The u-matrix value. """ return transform(self.parametrize_umat(parameters), self.umat_projector.T)
def gradients(self): """ Calculates gradients for the target function. Returns: Gradients. """ return 2 * numpy.einsum( "ijkl,kl->ij", self.raw_gradients(), transform(self.driver.make_rdm1(), self.dm_projector) - self.reference_dm ).reshape(-1)
def raw_gradients(self): """ Calculates gradients using the following expression for the derivative: .. math:: \frac{\partial D}{\partial \delta} = C_{occ} Z^\dagger C_{virt}^\dagger + C_{virt} Z C_{occ}^\dagger, .. math:: Z = - \frac{C_{vir}^\dagger H^{(1)} C_{occ}}{E_{vir} - E_{occ}} Returns: Gradients. """ # Occupied and virtual MOs occ = self.driver.mo_coeff[:, self.driver.mo_occ > 1] occ_e = self.driver.mo_energy[self.driver.mo_occ > 1] virt = self.driver.mo_coeff[:, self.driver.mo_occ <= 1] virt_e = self.driver.mo_energy[self.driver.mo_occ <= 1] gap = virt_e.min() - occ_e.max() logger.debug1(self.__log__, "| energy gap {:.3e}".format( gap, )) if virt_e.min() - occ_e.max() < 1e-4: logger.warn(self.__log__, "The energy gap is too small: {:.3e}".format(gap)) # Project MOs occ_umat = transform(occ, self.umat_projector, axes=0) virt_umat = transform(virt, self.umat_projector, axes=0) occ_dm = transform(occ, self.dm_projector, axes=0) virt_dm = transform(virt, self.dm_projector, axes=0) denominator = occ_e[numpy.newaxis, :] - virt_e[:, numpy.newaxis] numenator = virt_umat[:, numpy.newaxis, :, numpy.newaxis] * occ_umat[numpy.newaxis, :, numpy.newaxis, :] z = numenator / denominator[numpy.newaxis, numpy.newaxis, :, :] # Important: The "2" prefactor here indicates the fact that the spin-resctricted # density matrix consists of doubly-occupied states return 2*(numpy.einsum("ij,abkj,lk->abil", occ_dm, z, virt_dm) + numpy.einsum("ij,abjk,lk->abil", virt_dm, z, occ_dm))
def hello_world(): df = pd.read_csv('data/promotion.csv', sep=',', encoding='SHIFT-JIS', parse_dates=[2, 3]) df = common.transform(df) print(df) data = df.drop(["Time", "performance", "date"], axis=1) #target = df['y'] target = df['performance'] data_train_s, data_test_s, label_train_s, label_test_s = cross_validation.train_test_split( data, target, test_size=0.01) parameters = { 'n_estimators': [100, 500], 'learning_rate': [0.1], 'max_depth': [4], 'min_samples_leaf': [9], 'max_features': [1.0, 0.3] } clf_cv = grid_search.GridSearchCV(GradientBoostingRegressor(), parameters, cv=4, scoring='neg_mean_absolute_error') clf_cv.fit(data_train_s, label_train_s) print("Best Model Parameter: ", clf_cv.best_params_) print("Best Model Score: ", clf_cv.best_score_) file_name = "data/model_temp.pkl" # 学習した分類器を保存する。 joblib.dump(clf_cv, file_name, compress=True) print("Model save process normally end.") S3_BUCKET = os.environ.get('S3_BUCKET') file_type = "application/zip" s3 = boto3.resource('s3') # s3へのファイルアップロード s3.meta.client.upload_file(file_name, S3_BUCKET, 'model.pkl') return "recommendation model created and S3 upload succeeded!!"
def f(self, parameters): """ Finds the norm of the density matrix difference as a function of parameters. Args: parameters (numpy.ndarray): parameters to set; Returns: The density matrix difference. """ self.assign_parameters(parameters) self.driver.kernel() logger.debug1(self.__log__, "| total energy {:.10f}".format(self.driver.e_tot)) self.cleanup_parameters() return self.target_dm_function(transform(self.driver.make_rdm1(), self.dm_projector))
def iter_schmidt_basis(self): """ Retrieves Schmidt single-particle basis sets for each of the unique domain. Returns: Domain ID, domain basis functions and Schmidt single-particle basis sets. Schmidt basis includes domain frozen orbitals, embedding frozen orbitals, domain active orbitals and embedding active orbitals. """ occ = self.__mf_solver__.mo_coeff[:, self.__mf_solver__.mo_occ > 1] occ_orth = transform(occ, self.__orthogonal_basis__, axes=0) for domain_id, domain_list in self.__domains__.items(): d = domain_list[0] ffaa = get_sd_schmidt_basis(occ_orth, d, threshold=self.__schmidt_threshold__) n = len(domain_list[0]) if ffaa[2].shape[1] == n: ffaa[2][d, :] = numpy.eye(n) yield domain_id, d, ffaa
def hello_world(): df = pd.read_csv('data/promotion.csv', sep=',', encoding='SHIFT-JIS', parse_dates=[2, 3]) df = common.transform(df) print(df) data = df.drop(["Time", "performance", "date"], axis=1) #target = df['y'] target = df['performance'] data_train_s, data_test_s, label_train_s, label_test_s = cross_validation.train_test_split( data, target, test_size=0.01) parameters = { 'n_estimators': [100, 500], 'learning_rate': [0.1], 'max_depth': [4], 'min_samples_leaf': [9], 'max_features': [1.0, 0.3] } clf_cv = grid_search.GridSearchCV(GradientBoostingRegressor(), parameters, cv=4, scoring='neg_mean_absolute_error') clf_cv.fit(data_train_s, label_train_s) print("Best Model Parameter: ", clf_cv.best_params_) print("Best Model Score: ", clf_cv.best_score_) # 学習した分類器を保存する。 joblib.dump(clf_cv, 'data/model_v2.pkl', compress=True) return "recommendation model created!!"
def kernel(self, tolerance="default", maxiter=30): """ Performs a self-consistent DMET calculation. Args: tolerance (float): convergence criterion; maxiter (int): maximal number of iterations; """ if tolerance == "default": tolerance = self.conv_tol self.convergence_history = [] # mixers = dict((k, diis.DIIS()) for k in self.__domains__.keys()) while True: logger.info(self.__mol__, "DMET step {:d}".format( len(self.convergence_history), )) mf = self.run_mf_kernel() umat = {} logger.debug1(self.__mol__, "Mean-field solver total energy E = {:.10f}".format( mf.e_tot, )) self.e_tot = 0 total_occupation = 0 domain_ids = [] embedded_solvers = [] replica_numbers = [] schmidt_bases = [] # Build embedded solvers logger.info(self.__mol__, "Building embedded solvers ...") for domain_id, domain_basis, schmidt_basis in self.iter_schmidt_basis(): domain_ids.append(domain_id) if self.__style__ == "interacting-bath": embedded_solvers.append(self.get_embedded_solver(schmidt_basis)) elif self.__style__ == "non-interacting-bath": embedded_solvers.append(self.get_embedded_solver(schmidt_basis, kind=domain_id)) else: raise ValueError("Internal error: unknown style '{}'".format(self.__style__)) replica_numbers.append(len(self.__domains__[domain_id])) schmidt_bases.append(schmidt_basis[2:]) # Fit chemical potential logger.info(self.__mol__, "Fitting chemical potential ...") GlobalChemicalPotentialFit( embedded_solvers, replica_numbers, self.__mol__.nelectron, log=self.__mol__, ).kernel() # Fit the u-matrix logger.info(self.__mol__, "Fitting the u-matrix ...") for domain_id, embedded_solver, schmidt_basis, nreplica in zip(domain_ids, embedded_solvers, schmidt_bases, replica_numbers): logger.debug(self.__mol__, "Domain {}".format(domain_id)) logger.debug1(self.__mol__, "Primary basis: {}".format(self.__domains__[domain_id][0])) if len(self.__domains__[domain_id]) > 1: for i, b in enumerate(self.__domains__[domain_id][1:]): logger.debug1(self.__mol__, "Secondary basis {:d}: {}".format(i, b)) logger.debug1(self.__mol__, "Correlated solver total energy E = {:.10f}".format( embedded_solver.e_tot, )) n_active_domain = schmidt_basis[0].shape[1] # TODO: fix this; no need to recalculate hcore partial_energy = embedded_solver.partial_etot( slice(n_active_domain), transform( self.__mf_solver__.get_hcore(), self.__orthogonal_basis_inv__.T.dot(numpy.concatenate(schmidt_basis, axis=1)), ), ) self.e_tot += nreplica * partial_energy logger.debug1(self.__mol__, "Correlated solver partial energy E = {:.10f}".format( partial_energy, )) partial_occupation = embedded_solver.partial_nelec(slice(n_active_domain)) total_occupation += nreplica * partial_occupation logger.debug1(self.__mol__, "Correlated solver partial occupation N = {:.10f}".format( partial_occupation, )) logger.debug2(self.__mol__, "Correlated solver density matrix: {}".format(embedded_solver.make_rdm1())) if tolerance is not None: # Continue with self-consistency nscf_mf = NonSelfConsistentMeanField(mf) nscf_mf.kernel() sc = self.__self_consistency__( nscf_mf, self.__orthogonal_basis__.dot(numpy.concatenate(schmidt_basis, axis=1)), embedded_solver, log=self.__mol__, ) sc.kernel(x0=None) local_umat = sc.parametrize_umat_full(sc.final_parameters) umat[domain_id] = local_umat logger.debug(self.__mol__, "Parameters: {}".format( sc.final_parameters, )) self.e_tot += mf.energy_nuc() if tolerance is not None: self.convergence_history.append(self.convergence_measure(umat)) self.umat = umat # self.umat = dict((k, mixers[k].update(umat[k])) for k in self.__domains__.keys()) logger.info(self.__mol__, "E = {:.10f} delta = {:.3e} q = {:.3e} max(umat) = {:.3e}".format( self.e_tot, self.convergence_history[-1], self.__mol__.nelectron - total_occupation, max(v.max() for v in self.umat.values()), )) else: logger.info(self.__mol__, "E = {:.10f} q = {:.3e}".format( self.e_tot, self.__mol__.nelectron - total_occupation, )) if tolerance is None or self.convergence_history[-1] < tolerance: return self.e_tot if maxiter is not None and len(self.convergence_history) >= maxiter: raise RuntimeError("The maximal number of iterations {:d} reached. The error {:.3e} is still above the requested tolerance of {:.3e}".format( maxiter, self.convergence_history[-1], tolerance, ))
def energy_2(domains, w_occ, amplitude_calculator=None, with_t2=True): """ Calculates the second-order energy correction in domain setup. Args: domains (iterable): a list of domains; w_occ (float): a parameter splitting the second-order energy contributions between occupied and virtual molecular orbitals; amplitude_calculator (func): calculator of second-order amplitudes. If None, then MP2 amplitudes are calculated; with_t2 (bool): whether to save amplitudes; Returns: The energy correction. """ result = 0 if with_t2: result_t2 = [] else: result_t2 = None for domain in domains: occupations = domain.occupations selection_occ = numpy.argwhere(occupations >= 1)[:, 0] selection_virt = numpy.argwhere(occupations < 1)[:, 0] psi = domain.psi psi_occ = psi[:, selection_occ] psi_virt = psi[:, selection_virt] core_mask = numpy.diag(domain.partition_matrix)[:, numpy.newaxis] psi_occ_core = psi_occ * core_mask psi_virt_core = psi_virt * core_mask __ov = common.transform(common.transform(domain.eri, psi_occ, axes=2), psi_virt, axes=3) xvov = common.transform(common.transform(__ov, psi_occ_core, axes=0), psi_virt, axes=1) oxov = common.transform(common.transform(__ov, psi_occ, axes=0), psi_virt_core, axes=1) if amplitude_calculator is None: e = domain.e e_occ = e[selection_occ] e_virt = e[selection_virt] ovov = common.transform(common.transform(__ov, psi_occ, axes=0), psi_virt, axes=1) t1 = None t2 = ovov / ( e_occ[:, numpy.newaxis, numpy.newaxis, numpy.newaxis] - e_virt[numpy.newaxis, :, numpy.newaxis, numpy.newaxis] + e_occ[numpy.newaxis, numpy.newaxis, :, numpy.newaxis] - e_virt[numpy.newaxis, numpy.newaxis, numpy.newaxis, :]) else: t1, t2 = amplitude_calculator(domain) amplitudes = 0 if t2 is not None: amplitudes = t2 if t1 is not None: amplitudes += numpy.einsum("ia,jb->iajb", t1, t1) if amplitudes is not 0: result += ( (xvov * w_occ + oxov * (1.0 - w_occ)) * (2 * amplitudes - numpy.swapaxes(amplitudes, 0, 2))).sum() if result_t2 is not None: result_t2.append(amplitudes) return result, result_t2
import pandas as pd from sklearn import svm, grid_search, cross_validation, metrics from sklearn.ensemble import GradientBoostingRegressor from sklearn.externals import joblib from sklearn.preprocessing import LabelEncoder import common #original utility import boto3 df = pd.read_csv('data/promotion.csv', sep=',', encoding='SHIFT-JIS', parse_dates=[2, 3]) df = common.transform(df) print(df) data = df.drop(["Time", "performance", "date"], axis=1) #target = df['y'] target = df['performance'] data_train_s, data_test_s, label_train_s, label_test_s = cross_validation.train_test_split( data, target, test_size=0.01) parameters = { 'n_estimators': [100, 500], 'learning_rate': [0.1], 'max_depth': [4], 'min_samples_leaf': [9],