Beispiel #1
0
def get_dat(lags, preds, start_train, end_train, end_test):
    data = pd.read_csv('link_travel_time_local.csv.gz',
                       compression='gzip',
                       parse_dates=True,
                       index_col=0)

    ## Sort links by order
    data, order = sort_links(data, '1416:1417', '7051:2056')
    ## Make a link order column e.g here the neighbouring links for link 1 are 0 and 2.
    data['link_order'] = data['link_ref'].astype('category')
    not_in_list = data['link_order'].cat.categories.difference(order)
    data['link_order'] = data['link_order'].cat.set_categories(np.hstack(
        (order, not_in_list)),
                                                               ordered=True)
    data['link_order'] = data['link_order'].cat.codes
    ## Add week of day column [Monday, ..., Sunday] = [0, ..., 6]
    data['Weekday'] = data.index.weekday_name
    data = data.sort_values('link_order')

    print("Number of observations = ", len(data))
    print("Number of links = ", len(data['link_ref'].unique()))

    data_train, data_test = split_df(data,
                                     start_train=start_train,
                                     end_train=end_train,
                                     end_test=end_test)
    print("\nTraining from",
          data_train.sort_index().index[0], "to",
          data_train.sort_index().index[-1])
    print("Testing  from",
          data_test.sort_index().index[0], "to",
          data_test.sort_index().index[-1])

    ## Transform train and test set using the mean and std for train set.
    means_df, scales, low_df, upr_df = fit_scale(data_train, order)
    ix_train, ts_train, rm_mean_train, rm_scale_train, w_train, lns_train = transform(
        data_train, means_df, scales, order, freq='15min')
    ix_test, ts_test, rm_mean_test, rm_scale_test, w_test, lns_test = transform(
        data_test, means_df, scales, order, freq='15min')
    ## Create rolling window tensor
    ##  - y_mean and y_std are arrays where columns are each link and
    ##    the rows corresponding to the mean and std of each data point
    ##    at that weekday.

    ##  - y_num_meas indicates how many measurements are in the time window
    ##    for a given link
    X_train, y_train, y_ix_train, y_mean_train, y_std_train, y_num_meas_train = roll(
        ix_train, ts_train, rm_mean_train, rm_scale_train, w_train, lags,
        preds)
    X_test, y_test, y_ix_test, y_mean_test, y_std_test, y_num_meas_test = roll(
        ix_test, ts_test, rm_mean_test, rm_scale_test, w_test, lags, preds)
    return X_train, X_test, y_train, y_test, y_std_train, y_std_test, y_num_meas_train, y_num_meas_test
Beispiel #2
0
    def get_embedded_solver(self, schmidt_basis, kind="interacting"):
        """
        Prepares an embedded solver for the given Schmidt basis.
        Args:
            schmidt_basis (tuple, list): a list of frozen and active orbitals;
            kind (int): local fragment ID for the non-interacting bath formulation or "interacting" otherwise;

        Returns:
            The embedded solver.
        """
        f1, f2, a1, a2 = schmidt_basis
        n_frozen = f1.shape[1] + f2.shape[1]
        n_active = a1.shape[1] + a2.shape[1]
        logger.debug1(self.__mol__, "Active orbitals (domain+embedding): {:d}+{:d}".format(
            a1.shape[1], a2.shape[1],
        ))
        logger.debug1(self.__mol__, "Frozen orbitals (domain+embedding): {:d}+{:d}".format(
            f1.shape[1], f2.shape[1],
        ))
        hcore_ao = self.__mf_solver__.get_hcore()

        if kind in self.__domains__:
            if self.umat is not None:
                hcore_ao = hcore_ao + self.get_umat(exclude=kind)
            schmidt_projection = self.__orthogonal_basis_inv__.T.dot(numpy.concatenate((a1, a2), axis=1))
            logger.debug(self.__mol__, "Transforming active orbitals ...")
            hcore = transform(hcore_ao, schmidt_projection)
            n = a1.shape[1]
            partial_eri = transform(restore(1, self.__mf_solver__._eri, hcore_ao.shape[0]), schmidt_projection[:, :n])
            eri = numpy.zeros((n_active,) * 4, dtype=partial_eri.dtype)
            eri[:n, :n, :n, :n] = partial_eri
            e_vac = self.__mf_solver__.energy_nuc()

        elif kind == "interacting":
            schmidt_projection = self.__orthogonal_basis_inv__.T.dot(numpy.concatenate(schmidt_basis, axis=1))
            logger.debug(self.__mol__, "Transforming orbitals ...")
            hcore = transform(hcore_ao, schmidt_projection)
            eri = transform(restore(1, self.__mf_solver__._eri, hcore_ao.shape[0]), schmidt_projection)
            e_vac = self.__mf_solver__.energy_nuc()
            logger.debug1(self.__mol__, "Freezing external orbitals ...")
            hcore, eri, e_vac = freeze(hcore, eri, n_frozen, e_vac=e_vac)

        else:
            raise ValueError("Unknown kind: {}".format(kind))

        return self.__correlated_solver__(
            hcore,
            eri,
            nelectron=n_active,
            e_vac=e_vac,
            verbose=self.__nested_verbosity__,
        )
Beispiel #3
0
    def parametrize_umat_full(self, parameters):
        """
        Calculates the u-matrix in the full basis.
        Args:
            parameters (numpy.ndarray): parameters of the u-matrix;

        Returns:
            The u-matrix value.
        """
        return transform(self.parametrize_umat(parameters), self.umat_projector.T)
Beispiel #4
0
 def gradients(self):
     """
     Calculates gradients for the target function.
     Returns:
         Gradients.
     """
     return 2 * numpy.einsum(
         "ijkl,kl->ij",
         self.raw_gradients(),
         transform(self.driver.make_rdm1(), self.dm_projector) - self.reference_dm
     ).reshape(-1)
Beispiel #5
0
    def raw_gradients(self):
        """
        Calculates gradients using the following expression for the derivative:

        .. math::
            \frac{\partial D}{\partial \delta} = C_{occ} Z^\dagger C_{virt}^\dagger + C_{virt} Z C_{occ}^\dagger,

        .. math::
            Z = - \frac{C_{vir}^\dagger H^{(1)} C_{occ}}{E_{vir} - E_{occ}}

        Returns:
            Gradients.
        """
        # Occupied and virtual MOs
        occ = self.driver.mo_coeff[:, self.driver.mo_occ > 1]
        occ_e = self.driver.mo_energy[self.driver.mo_occ > 1]
        virt = self.driver.mo_coeff[:, self.driver.mo_occ <= 1]
        virt_e = self.driver.mo_energy[self.driver.mo_occ <= 1]

        gap = virt_e.min() - occ_e.max()
        logger.debug1(self.__log__, "| energy gap {:.3e}".format(
            gap,
        ))

        if virt_e.min() - occ_e.max() < 1e-4:
            logger.warn(self.__log__, "The energy gap is too small: {:.3e}".format(gap))

        # Project MOs
        occ_umat = transform(occ, self.umat_projector, axes=0)
        virt_umat = transform(virt, self.umat_projector, axes=0)
        occ_dm = transform(occ, self.dm_projector, axes=0)
        virt_dm = transform(virt, self.dm_projector, axes=0)

        denominator = occ_e[numpy.newaxis, :] - virt_e[:, numpy.newaxis]
        numenator = virt_umat[:, numpy.newaxis, :, numpy.newaxis] * occ_umat[numpy.newaxis, :, numpy.newaxis, :]
        z = numenator / denominator[numpy.newaxis, numpy.newaxis, :, :]

        # Important: The "2" prefactor here indicates the fact that the spin-resctricted
        # density matrix consists of doubly-occupied states
        return 2*(numpy.einsum("ij,abkj,lk->abil", occ_dm, z, virt_dm) + numpy.einsum("ij,abjk,lk->abil", virt_dm, z, occ_dm))
Beispiel #6
0
def hello_world():
    df = pd.read_csv('data/promotion.csv',
                     sep=',',
                     encoding='SHIFT-JIS',
                     parse_dates=[2, 3])

    df = common.transform(df)

    print(df)

    data = df.drop(["Time", "performance", "date"], axis=1)

    #target = df['y']
    target = df['performance']

    data_train_s, data_test_s, label_train_s, label_test_s = cross_validation.train_test_split(
        data, target, test_size=0.01)
    parameters = {
        'n_estimators': [100, 500],
        'learning_rate': [0.1],
        'max_depth': [4],
        'min_samples_leaf': [9],
        'max_features': [1.0, 0.3]
    }

    clf_cv = grid_search.GridSearchCV(GradientBoostingRegressor(),
                                      parameters,
                                      cv=4,
                                      scoring='neg_mean_absolute_error')

    clf_cv.fit(data_train_s, label_train_s)

    print("Best Model Parameter: ", clf_cv.best_params_)
    print("Best Model Score: ", clf_cv.best_score_)

    file_name = "data/model_temp.pkl"

    # 学習した分類器を保存する。
    joblib.dump(clf_cv, file_name, compress=True)

    print("Model save process normally end.")

    S3_BUCKET = os.environ.get('S3_BUCKET')

    file_type = "application/zip"

    s3 = boto3.resource('s3')

    # s3へのファイルアップロード
    s3.meta.client.upload_file(file_name, S3_BUCKET, 'model.pkl')

    return "recommendation model created and S3 upload succeeded!!"
Beispiel #7
0
    def f(self, parameters):
        """
        Finds the norm of the density matrix difference as a function of parameters.
        Args:
            parameters (numpy.ndarray): parameters to set;

        Returns:
            The density matrix difference.
        """
        self.assign_parameters(parameters)
        self.driver.kernel()
        logger.debug1(self.__log__, "| total energy {:.10f}".format(self.driver.e_tot))
        self.cleanup_parameters()
        return self.target_dm_function(transform(self.driver.make_rdm1(), self.dm_projector))
Beispiel #8
0
    def iter_schmidt_basis(self):
        """
        Retrieves Schmidt single-particle basis sets for each of the unique domain.

        Returns:
            Domain ID, domain basis functions and Schmidt single-particle basis sets. Schmidt basis includes
            domain frozen orbitals, embedding frozen orbitals, domain active orbitals and embedding active orbitals.
        """
        occ = self.__mf_solver__.mo_coeff[:, self.__mf_solver__.mo_occ > 1]
        occ_orth = transform(occ, self.__orthogonal_basis__, axes=0)
        for domain_id, domain_list in self.__domains__.items():
            d = domain_list[0]
            ffaa = get_sd_schmidt_basis(occ_orth, d, threshold=self.__schmidt_threshold__)
            n = len(domain_list[0])
            if ffaa[2].shape[1] == n:
                ffaa[2][d, :] = numpy.eye(n)
            yield domain_id, d, ffaa
Beispiel #9
0
def hello_world():
    df = pd.read_csv('data/promotion.csv',
                     sep=',',
                     encoding='SHIFT-JIS',
                     parse_dates=[2, 3])

    df = common.transform(df)

    print(df)

    data = df.drop(["Time", "performance", "date"], axis=1)

    #target = df['y']
    target = df['performance']

    data_train_s, data_test_s, label_train_s, label_test_s = cross_validation.train_test_split(
        data, target, test_size=0.01)
    parameters = {
        'n_estimators': [100, 500],
        'learning_rate': [0.1],
        'max_depth': [4],
        'min_samples_leaf': [9],
        'max_features': [1.0, 0.3]
    }

    clf_cv = grid_search.GridSearchCV(GradientBoostingRegressor(),
                                      parameters,
                                      cv=4,
                                      scoring='neg_mean_absolute_error')

    clf_cv.fit(data_train_s, label_train_s)

    print("Best Model Parameter: ", clf_cv.best_params_)
    print("Best Model Score: ", clf_cv.best_score_)

    # 学習した分類器を保存する。
    joblib.dump(clf_cv, 'data/model_v2.pkl', compress=True)

    return "recommendation model created!!"
Beispiel #10
0
    def kernel(self, tolerance="default", maxiter=30):
        """
        Performs a self-consistent DMET calculation.
        Args:
            tolerance (float): convergence criterion;
            maxiter (int): maximal number of iterations;
        """
        if tolerance == "default":
            tolerance = self.conv_tol

        self.convergence_history = []

        # mixers = dict((k, diis.DIIS()) for k in self.__domains__.keys())

        while True:

            logger.info(self.__mol__, "DMET step {:d}".format(
                len(self.convergence_history),
            ))

            mf = self.run_mf_kernel()

            umat = {}

            logger.debug1(self.__mol__, "Mean-field solver total energy E = {:.10f}".format(
                mf.e_tot,
            ))

            self.e_tot = 0
            total_occupation = 0

            domain_ids = []
            embedded_solvers = []
            replica_numbers = []
            schmidt_bases = []

            # Build embedded solvers
            logger.info(self.__mol__, "Building embedded solvers ...")
            for domain_id, domain_basis, schmidt_basis in self.iter_schmidt_basis():

                domain_ids.append(domain_id)
                if self.__style__ == "interacting-bath":
                    embedded_solvers.append(self.get_embedded_solver(schmidt_basis))
                elif self.__style__ == "non-interacting-bath":
                    embedded_solvers.append(self.get_embedded_solver(schmidt_basis, kind=domain_id))
                else:
                    raise ValueError("Internal error: unknown style '{}'".format(self.__style__))
                replica_numbers.append(len(self.__domains__[domain_id]))
                schmidt_bases.append(schmidt_basis[2:])

            # Fit chemical potential
            logger.info(self.__mol__, "Fitting chemical potential ...")
            GlobalChemicalPotentialFit(
                embedded_solvers,
                replica_numbers,
                self.__mol__.nelectron,
                log=self.__mol__,
            ).kernel()

            # Fit the u-matrix
            logger.info(self.__mol__, "Fitting the u-matrix ...")
            for domain_id, embedded_solver, schmidt_basis, nreplica in zip(domain_ids, embedded_solvers, schmidt_bases, replica_numbers):

                logger.debug(self.__mol__, "Domain {}".format(domain_id))
                logger.debug1(self.__mol__, "Primary basis: {}".format(self.__domains__[domain_id][0]))
                if len(self.__domains__[domain_id]) > 1:
                    for i, b in enumerate(self.__domains__[domain_id][1:]):
                        logger.debug1(self.__mol__, "Secondary basis {:d}: {}".format(i, b))

                logger.debug1(self.__mol__, "Correlated solver total energy E = {:.10f}".format(
                    embedded_solver.e_tot,
                ))

                n_active_domain = schmidt_basis[0].shape[1]
                # TODO: fix this; no need to recalculate hcore
                partial_energy = embedded_solver.partial_etot(
                    slice(n_active_domain),
                    transform(
                        self.__mf_solver__.get_hcore(),
                        self.__orthogonal_basis_inv__.T.dot(numpy.concatenate(schmidt_basis, axis=1)),
                    ),
                )
                self.e_tot += nreplica * partial_energy

                logger.debug1(self.__mol__, "Correlated solver partial energy E = {:.10f}".format(
                    partial_energy,
                ))

                partial_occupation = embedded_solver.partial_nelec(slice(n_active_domain))
                total_occupation += nreplica * partial_occupation

                logger.debug1(self.__mol__, "Correlated solver partial occupation N = {:.10f}".format(
                    partial_occupation,
                ))

                logger.debug2(self.__mol__, "Correlated solver density matrix: {}".format(embedded_solver.make_rdm1()))

                if tolerance is not None:
                    # Continue with self-consistency
                    nscf_mf = NonSelfConsistentMeanField(mf)
                    nscf_mf.kernel()

                    sc = self.__self_consistency__(
                        nscf_mf,
                        self.__orthogonal_basis__.dot(numpy.concatenate(schmidt_basis, axis=1)),
                        embedded_solver,
                        log=self.__mol__,
                    )
                    sc.kernel(x0=None)

                    local_umat = sc.parametrize_umat_full(sc.final_parameters)
                    umat[domain_id] = local_umat

                    logger.debug(self.__mol__, "Parameters: {}".format(
                        sc.final_parameters,
                    ))

            self.e_tot += mf.energy_nuc()

            if tolerance is not None:
                self.convergence_history.append(self.convergence_measure(umat))
                self.umat = umat
                # self.umat = dict((k, mixers[k].update(umat[k])) for k in self.__domains__.keys())

                logger.info(self.__mol__, "E = {:.10f} delta = {:.3e} q = {:.3e} max(umat) = {:.3e}".format(
                    self.e_tot,
                    self.convergence_history[-1],
                    self.__mol__.nelectron - total_occupation,
                    max(v.max() for v in self.umat.values()),
                ))

            else:

                logger.info(self.__mol__, "E = {:.10f} q = {:.3e}".format(
                    self.e_tot,
                    self.__mol__.nelectron - total_occupation,
                ))

            if tolerance is None or self.convergence_history[-1] < tolerance:
                return self.e_tot

            if maxiter is not None and len(self.convergence_history) >= maxiter:
                raise RuntimeError("The maximal number of iterations {:d} reached. The error {:.3e} is still above the requested tolerance of {:.3e}".format(
                    maxiter,
                    self.convergence_history[-1],
                    tolerance,
                ))
Beispiel #11
0
def energy_2(domains, w_occ, amplitude_calculator=None, with_t2=True):
    """
    Calculates the second-order energy correction in domain setup.
    Args:
        domains (iterable): a list of domains;
        w_occ (float): a parameter splitting the second-order energy contributions between occupied and virtual
        molecular orbitals;
        amplitude_calculator (func): calculator of second-order amplitudes. If None, then MP2 amplitudes are calculated;
        with_t2 (bool): whether to save amplitudes;

    Returns:
        The energy correction.
    """
    result = 0
    if with_t2:
        result_t2 = []
    else:
        result_t2 = None
    for domain in domains:

        occupations = domain.occupations
        selection_occ = numpy.argwhere(occupations >= 1)[:, 0]
        selection_virt = numpy.argwhere(occupations < 1)[:, 0]

        psi = domain.psi
        psi_occ = psi[:, selection_occ]
        psi_virt = psi[:, selection_virt]

        core_mask = numpy.diag(domain.partition_matrix)[:, numpy.newaxis]
        psi_occ_core = psi_occ * core_mask
        psi_virt_core = psi_virt * core_mask

        __ov = common.transform(common.transform(domain.eri, psi_occ, axes=2),
                                psi_virt,
                                axes=3)
        xvov = common.transform(common.transform(__ov, psi_occ_core, axes=0),
                                psi_virt,
                                axes=1)
        oxov = common.transform(common.transform(__ov, psi_occ, axes=0),
                                psi_virt_core,
                                axes=1)

        if amplitude_calculator is None:
            e = domain.e
            e_occ = e[selection_occ]
            e_virt = e[selection_virt]

            ovov = common.transform(common.transform(__ov, psi_occ, axes=0),
                                    psi_virt,
                                    axes=1)
            t1 = None
            t2 = ovov / (
                e_occ[:, numpy.newaxis, numpy.newaxis, numpy.newaxis] -
                e_virt[numpy.newaxis, :, numpy.newaxis, numpy.newaxis] +
                e_occ[numpy.newaxis, numpy.newaxis, :, numpy.newaxis] -
                e_virt[numpy.newaxis, numpy.newaxis, numpy.newaxis, :])
        else:
            t1, t2 = amplitude_calculator(domain)

        amplitudes = 0
        if t2 is not None:
            amplitudes = t2
        if t1 is not None:
            amplitudes += numpy.einsum("ia,jb->iajb", t1, t1)

        if amplitudes is not 0:
            result += (
                (xvov * w_occ + oxov * (1.0 - w_occ)) *
                (2 * amplitudes - numpy.swapaxes(amplitudes, 0, 2))).sum()
        if result_t2 is not None:
            result_t2.append(amplitudes)

    return result, result_t2
Beispiel #12
0
import pandas as pd
from sklearn import svm, grid_search, cross_validation, metrics
from sklearn.ensemble import GradientBoostingRegressor

from sklearn.externals import joblib
from sklearn.preprocessing import LabelEncoder

import common  #original utility
import boto3

df = pd.read_csv('data/promotion.csv',
                 sep=',',
                 encoding='SHIFT-JIS',
                 parse_dates=[2, 3])

df = common.transform(df)

print(df)

data = df.drop(["Time", "performance", "date"], axis=1)

#target = df['y']
target = df['performance']

data_train_s, data_test_s, label_train_s, label_test_s = cross_validation.train_test_split(
    data, target, test_size=0.01)
parameters = {
    'n_estimators': [100, 500],
    'learning_rate': [0.1],
    'max_depth': [4],
    'min_samples_leaf': [9],