예제 #1
0
    def test_flatten_model(self):
        """flatten_model returns a pandas.Series with all the params to recreate a model."""
        # Setup
        model = GaussianMultivariate()
        X = np.eye(3)
        model.fit(X)

        expected_result = pd.Series({
            'covariance__0__0': 1.5000000000000004,
            'covariance__1__0': -0.7500000000000003,
            'covariance__1__1': 1.5000000000000004,
            'covariance__2__0': -0.7500000000000003,
            'covariance__2__1': -0.7500000000000003,
            'covariance__2__2': 1.5000000000000007,
            'distribs__0__mean': 0.33333333333333331,
            'distribs__0__std': -0.7520386983881371,
            'distribs__1__mean': 0.33333333333333331,
            'distribs__1__std': -0.7520386983881371,
            'distribs__2__mean': 0.33333333333333331,
            'distribs__2__std': -0.7520386983881371,
        })
        data_navigator = MagicMock()
        modeler = Modeler(data_navigator)

        # Run
        result = modeler.flatten_model(model)

        # Check
        assert np.isclose(result, expected_result).all()
예제 #2
0
    def test_fit_sample_distribution_dict(self):
        data = sample_trivariate_xyz()
        model = GaussianMultivariate(distribution={'x': GaussianKDE()})
        model.fit(data)

        sampled_data = model.sample(10)
        assert sampled_data.shape == (10, 3)
예제 #3
0
def fit_copula_to_z2_data(name=None, do_plot=False):
    """ Example of fitting a copula to z2 stream data

    :param   name:  stream name
    :return: copula  obj
    """
    name = name or 'z2~helicopter_psi~helicopter_theta~70.json'
    assert 'z2~' in name, "Expecting a bivariate stream"

    lagged_values = get_stream_lagged_values(name=name)
    normalized_points = [
        mr.norminv(mr.from_zcurve(zvalue=z, dim=2)) for z in lagged_values
    ]
    npitch, nyaw = zip(*normalized_points)

    copula = GaussianMultivariate()
    X = np.array([npitch, nyaw]).transpose()
    copula.fit(X)

    synthetic_points = copula.sample(len(X))
    spitch = synthetic_points[0]
    syaw = synthetic_points[1]
    if do_plot:
        plt.scatter(spitch, syaw)
        plt.xlabel('Simulated Pitch - normalized')
        plt.ylabel('Simulated Yaw - normalized')
        plt.show()
    return copula
예제 #4
0
    def test_fit_sample_distribution_name(self):
        data = sample_trivariate_xyz()
        model = GaussianMultivariate(
            'copulas.univariate.gaussian_kde.GaussianKDE')
        model.fit(data)

        sampled_data = model.sample(10)
        assert sampled_data.shape == (10, 3)
예제 #5
0
 def __init__(self, load, mode, pv_connection, ev_connection, ev_max_connection, pld_pred):
     self.load = pd.DataFrame(load)
     self.mode = mode
     self.copula = GaussianMultivariate()
     self.pv_connection = pv_connection
     self.ev_connection = ev_connection
     self.ev_max_connection = ev_max_connection
     self.pld_pred = pld_pred
예제 #6
0
 def test_gaussiankde_arguments(self):
     size = 1000
     low = 0
     high = 9
     data = randint.rvs(low, high, size=size) + norm.rvs(0, 0.1, size=size)
     dist = GaussianMultivariate(distribution=GaussianKDE(bw_method=0.01))
     dist.fit(data)
     samples = dist.sample(size).to_numpy()[0]
     d, p = ks_2samp(data, samples)
     assert p >= 0.05
예제 #7
0
    def test_get_instance_instance_fitted(self):
        """Try to get a new instance from a fitted instance"""
        # Run
        gaussian = GaussianMultivariate()
        gaussian.fit(pd.DataFrame({'a_field': list(range(10))}))
        instance = get_instance(gaussian)

        # Asserts
        assert not instance.fitted
        assert isinstance(instance, GaussianMultivariate)
예제 #8
0
    def test_fit_sample_distribution_dict_multiple(self):
        data = sample_trivariate_xyz()
        model = GaussianMultivariate(
            distribution={
                'x': Univariate(parametric=ParametricType.PARAMETRIC),
                'y': BetaUnivariate(),
                'z': GaussianKDE()
            })
        model.fit(data)

        sampled_data = model.sample(10)
        assert sampled_data.shape == (10, 3)
예제 #9
0
    def fit(self, table_data):
        """Fit the model to the table.

        Impute the table data before fit the model.

        Args:
            table_data (pandas.DataFrame):
                Data to be fitted.
        """
        table_data = impute(table_data)
        self.model = GaussianMultivariate(distribution=self.distribution)
        self.model.fit(table_data)
예제 #10
0
def get_CB_preds(errors, pred1, pred2, time_serie, copulas_family, method,
                 error_type):

    if copulas_family == 'gumbel':
        errors = uniformise_normal_data(errors)
        copula = Gumbel()

    else:
        copula = GaussianMultivariate()

    copula.fit(errors)

    synthetic_errors = copula.sample(len(errors))

    #norm = stats.distributions.norm()
    #synthetic_errors = norm.ppf(synthetic_errors)

    if copulas_family == 'gumbel':
        cb_pred1 = pred1 - synthetic_errors[:, 0]
        cb_pred2 = pred2 - synthetic_errors[:, 1]
    else:
        cb_pred1 = pred1 - synthetic_errors.iloc[:, 0]
        cb_pred2 = pred2 - synthetic_errors.iloc[:, 1]

    cb_pred = get_combined_forecast(cb_pred1, cb_pred2, time_serie, error_type,
                                    method)

    return cb_pred
예제 #11
0
def fit_and_sample(lagged_zvalues:[[float]],num:int, copula=None):
    """ Example of fitting a copula function, and sampling
           lagged_zvalues: [ [z1,z2,z3] ]  distributed N(0,1) margins, roughly
           copula : Something from https://pypi.org/project/copulas/
           returns: [ [z1, z2, z3] ]  representative sample
    """
    # Remark: It's lazy to just sample synthetic data
    # Some more evenly spaced sampling would be preferable.
    # See https://www.microprediction.com/blog/lottery for discussion

    df = pd.DataFrame(data=lagged_zvalues)
    if copula is None:
        copula = GaussianMultivariate()
    copula.fit(df)
    synthetic = copula.sample(num)
    return synthetic.values.tolist()
예제 #12
0
    def test_get_instance_instance(self):
        """Try to get a new instance from a instance"""
        # Run
        instance = get_instance(GaussianMultivariate())

        # Asserts
        assert not instance.fitted
        assert isinstance(instance, GaussianMultivariate)
예제 #13
0
    def test_cdf(self):
        data = sample_trivariate_xyz()
        model = GaussianMultivariate()
        model.fit(data)

        sampled_data = model.sample(10)

        # Test CDF
        cdf = model.cumulative_distribution(sampled_data)
        assert (0 <= cdf).all() and (cdf <= 1).all()

        # Test CDF increasing function
        for column in sampled_data.columns:
            sorted_data = sampled_data.sort_values(column)
            other_columns = data.columns.to_list()
            other_columns.remove(column)
            row = sorted_data.sample(1).iloc[0]
            for column in other_columns:
                sorted_data[column] = row[column]

            cdf = model.cumulative_distribution(sorted_data)
            diffs = np.diff(
                cdf
            ) + 0.001  # Add tolerance to avoid floating precision issues.
            assert (diffs >= 0).all()
예제 #14
0
    def test_save_load(self):
        data = sample_trivariate_xyz()
        model = GaussianMultivariate()
        model.fit(data)

        sampled_data = model.sample(10)

        path_to_model = os.path.join(self.test_dir.name, "model.pkl")
        model.save(path_to_model)
        model2 = GaussianMultivariate.load(path_to_model)

        pdf = model.probability_density(sampled_data)
        pdf2 = model2.probability_density(sampled_data)
        assert np.all(np.isclose(pdf, pdf2, atol=0.01))

        cdf = model.cumulative_distribution(sampled_data)
        cdf2 = model2.cumulative_distribution(sampled_data)
        assert np.all(np.isclose(cdf, cdf2, atol=0.01))
예제 #15
0
def get_errors_sample(errors, copula="Gumbel"):
    '''
    Parameters
    ----------
    errors : numpy array of shape (n,2)
        The errors to fit with a copula
    copula : string
        "Gumbel": fit with a Gumbel copula
        "Normal": fit with a Normal copula

    Returns
    synthetic : a new sample of errors obtain with the JPD of the errors
    -------
    None.
    '''

    ### Transforming the np.array to a dataframe
    df = pd.DataFrame(errors, columns=['res1', 'res2'])

    ### Transforming the errors series such that there is no 0 or 1
    # because this is leading to problems when using the copula
    df = pd.DataFrame(np.where(df == 0, 0.00000001,
                               np.where(df == 1, 0.99999999, df)),
                      columns=df.columns)
    scaler = MinMaxScaler()
    df = pd.DataFrame(scaler.fit_transform(df.values), columns=df.columns)

    ### Selecting afor fiting the errors series
    if copula == "Gumbel":
        c = gumbel.Gumbel()
    elif copula == "Normal":
        c = GaussianMultivariate()


### Fiting the copula and getting the parameters
    c.fit(df.values)
    #copulas_parameters=c.to_dict()

    ### Generating a sample from the copula
    synthetic = c.sample(len(df))
    synthetic = scaler.inverse_transform(synthetic)

    return synthetic
예제 #16
0
    def test_get_instance_instance_distribution(self):
        """Try to get a new instance from a instance with distribution"""
        # Run
        instance = get_instance(
            GaussianMultivariate(
                distribution='copulas.univariate.truncnorm.TruncNorm'))

        # Asserts
        assert not instance.fitted
        assert isinstance(instance, GaussianMultivariate)
        assert instance.distribution == 'copulas.univariate.truncnorm.TruncNorm'
예제 #17
0
    def test_to_dict_from_dict(self):
        data = sample_trivariate_xyz()
        model = GaussianMultivariate()
        model.fit(data)

        sampled_data = model.sample(10)

        params = model.to_dict()
        model2 = GaussianMultivariate.from_dict(params)

        pdf = model.probability_density(sampled_data)
        pdf2 = model2.probability_density(sampled_data)
        assert np.all(np.isclose(pdf, pdf2, atol=0.01))

        cdf = model.cumulative_distribution(sampled_data)
        cdf2 = model2.cumulative_distribution(sampled_data)
        assert np.all(np.isclose(cdf, cdf2, atol=0.01))
예제 #18
0
def fit_and_sample(lagged_zvalues: [[float]], num: int, copula=None):
    """ Example of creating a "sample" of future values
    
           lagged_zvalues:     [ [z1,z2,z3] ]  distributed N(0,1) margins, roughly
           copula :            Something from https://pypi.org/project/copulas/
           returns:            [ [z1, z2, z3] ]  representative sample

        Swap out this function for whatever you like. 
    """
    # Remark 1: It's lazy to just sample synthetic data
    # Remark 2: Any multivariate density estimation could go here.
    # Remark 3: If you prefer uniform margin, use mw.get_lagged_copulas(name=name, count= 5000)
    #
    # See https://www.microprediction.com/blog/lottery for discussion of this "game"

    df = pd.DataFrame(data=lagged_zvalues)
    if copula is None:
        copula = GaussianMultivariate()  # <---
    copula.fit(df)
    synthetic = copula.sample(num)
    return synthetic.values.tolist()
예제 #19
0
def testMITCopulas():
    import warnings
    warnings.filterwarnings('ignore')

    from copulas.datasets import sample_trivariate_xyz
    from copulas.multivariate import GaussianMultivariate
    from copulas.visualization import compare_3d

    # Load a dataset with 3 columns that are not independent
    real_data = sample_trivariate_xyz()

    # Fit a gaussian copula to the data
    copula = GaussianMultivariate()
    copula.fit(real_data)

    # Sample synthetic data
    synthetic_data = copula.sample(len(real_data))

    # Plot the real and the synthetic data to compare
    compare_3d(real_data, synthetic_data)
    return True
예제 #20
0
def get_more_thermal_params(N=100,F_2x=3.84):
    
    from copulas.multivariate import GaussianMultivariate
    
    d1_d2_q1_copula = GaussianMultivariate.load(Path(__file__).parent / "./Parameter_Sets/d1_d2_q1_CMIP6_copula.pkl")

    d1_d2_q1_df = d1_d2_q1_copula.sample(10*N)

    while (d1_d2_q1_df<0).any(axis=1).sum() != 0:
        d1_d2_q1_df.loc[(d1_d2_q1_df<0).any(axis=1)] = d1_d2_q1_copula.sample((d1_d2_q1_df<0).any(axis=1).sum()).values

    d2_samples = d1_d2_q1_df['d2'].values
    d3_samples = d1_d2_q1_df['d1'].values
    q3_samples = d1_d2_q1_df['q1'].values

    d1_samples = sp.stats.truncnorm(-2,2,loc=283,scale=116).rvs(10*N)

    TCR_samples = np.random.lognormal(np.log(2.5)/2,np.log(2.5)/(2*1.645),10*N)
    RWF_samples = sp.stats.truncnorm(-2.75,2.75,loc=0.582,scale=0.06).rvs(10*N)
    ECS_samples = TCR_samples/RWF_samples

    d = np.array([d1_samples,d2_samples,d3_samples])

    k = 1-(d/70)*(1-np.exp(-70/d))

    q = ((TCR_samples/F_2x - k[2]*q3_samples)[np.newaxis,:] - np.roll(k[:2],axis=0,shift=1)*(ECS_samples/F_2x - q3_samples)[np.newaxis,:])/(k[:2] - np.roll(k[:2],axis=0,shift=1))

    sample_df = pd.DataFrame(index=['d','q'],columns = [1,2,3]).apply(pd.to_numeric)
    df_list = []

    i=0
    j=0

    while j<N:

        curr_df = sample_df.copy()
        curr_df.loc['d'] = d[:,i]
        curr_df.loc['q',3] = q3_samples[i]
        curr_df.loc['q',[1,2]] = q[:,i]

        if curr_df.loc['q',2]<=0:
            i+=1
            continue

        df_list += [curr_df]
        j+=1
        i+=1

    thermal_params = pd.concat(df_list,axis=1,keys=['therm'+str(x) for x in np.arange(N)])
    
    return thermal_params
예제 #21
0
def fit_and_sample(lagged_zvalues: [[float]], num: int, copula=None):
    """ Example of fitting a copula function, and sampling
           lagged_zvalues: [ [z1,z2,z3] ]  distributed N(0,1) margins, roughly
           copula : Something from https://pypi.org/project/copulas/
           returns: [ [z1, z2, z3] ]  representative sample

    """
    # This is the part you'll want to change.
    # Remark 1: It's lazy to just sample synthetic data
    # Some more evenly spaced sampling would be preferable.
    # Remark 2: Any multivariate density estimation could go here.
    # Remark 3: If you want to literally fit to a Copula (i.e. roughly uniform margins)
    # then you might want to use mw.get_lagged_copulas(name=name, count= 5000) instead
    #
    # See https://www.microprediction.com/blog/lottery for discussion of why evenly
    # spaced samples are likely to serve you better.

    df = pd.DataFrame(data=lagged_zvalues)
    if copula is None:
        copula = GaussianMultivariate()  # <---
    copula.fit(df)
    synthetic = copula.sample(num)
    return synthetic.values.tolist()
예제 #22
0
    def test_pdf(self):
        data = sample_trivariate_xyz()
        model = GaussianMultivariate()
        model.fit(data)

        sampled_data = model.sample(10)

        # Test PDF
        pdf = model.probability_density(sampled_data)
        assert (0 < pdf).all()
예제 #23
0
    def set_parameters(self, parameters):
        """Set copula model parameters.

        Add additional keys after unflatte the parameters
        in order to set expected parameters for the copula.

        Args:
            dict:
                Copula flatten parameters.
        """
        parameters = unflatten_dict(parameters)
        parameters.setdefault('fitted', True)
        parameters.setdefault('distribution', self.distribution)

        parameters = self._unflatten_gaussian_copula(parameters)

        self.model = GaussianMultivariate.from_dict(parameters)
예제 #24
0
    def test_fit_sample(self):
        data = sample_trivariate_xyz()
        model = GaussianMultivariate()
        model.fit(data)

        for N in [10, 50, 100]:
            assert len(model.sample(N)) == N

        sampled_data = model.sample(10)

        assert sampled_data.shape == (10, 3)
        for column in data.columns:
            assert column in sampled_data
예제 #25
0
def copula_based(X,Y):
    """
    Calculate joint PDF/CDF using copula
    """

    import pandas as pd
    from copulas.multivariate import GaussianMultivariate
    
    # fit gaussian copula
    data=pd.DataFrame(list(zip(X,Y)),columns=['P','T'])
    dist=GaussianMultivariate()
    dist.fit(data)

    sampled=dist.sample(1)
    sampled.at[0,'P']=np.mean(X)
    sampled.at[0,'T']=np.mean(Y)
    
    # find pdf/cdf at mean value
    pdf=dist.pdf(sampled)
    cdf=dist.cumulative_distribution(sampled)
    return [pdf,cdf]
예제 #26
0
    def test_cdf(self):
        data = sample_trivariate_xyz()
        model = GaussianMultivariate()
        model.fit(data)

        sampled_data = model.sample(10)

        # Test CDF
        cdf = model.cumulative_distribution(sampled_data)
        assert (0 < cdf).all() and (cdf < 1).all()

        # Test CDF increasing function
        for column in sampled_data.columns:
            sorted_data = sampled_data.sort_values(column)
            other_columns = data.columns.to_list()
            other_columns.remove(column)
            row = sorted_data.sample(1).iloc[0]
            for column in other_columns:
                sorted_data[column] = row[column]

            cdf = model.cumulative_distribution(sorted_data)
            assert (np.diff(cdf) >= 0).all()
예제 #27
0
def test_conditional_sampling():
    condition = np.random.randint(1, 4, size=3000)
    conditioned = np.random.normal(loc=1, scale=1, size=3000) * condition
    data = pd.DataFrame({
        'a': condition,
        'b': condition,
        'c': conditioned,
    })

    gm = GaussianMultivariate()
    gm.fit(data)

    sampled = gm.sample(3000, conditions={'b': 1})

    np.testing.assert_allclose(sampled['a'].mean(), 1, atol=.5)
    np.testing.assert_allclose(sampled['b'].mean(), 1, atol=.5)
    np.testing.assert_allclose(sampled['c'].mean(), 1, atol=.5)

    sampled = gm.sample(3000, conditions={'a': 3, 'b': 3})

    np.testing.assert_allclose(sampled['a'].mean(), 3, atol=.5)
    np.testing.assert_allclose(sampled['b'].mean(), 3, atol=.5)
    np.testing.assert_allclose(sampled['c'].mean(), 3, atol=.5)
예제 #28
0
    #print(type(XR))
    ########################################copula#########################################

    conv = []
    scales = []
    As = []

    for i in range(len(XL)):
        sbl = XL[i].flatten()
        sbr = XR[i].flatten()
        conc = np.empty((sbl.shape[0], 2))
        conc[:, 0] = sbr[:]
        conc[:, 1] = sbl[:]
        #print(conc.shape)
        copula = GaussianMultivariate(distribution=GammaUnivariate)
        copula.fit(conc)
        XX = np.array(copula.to_dict()['covariance'])
        xx = XX.flatten()
        conv.append(xx)
        UNI = copula.to_dict()['univariates'][0]  #avec les sbr de droite
        scales.append(UNI["scale"])
        As.append(UNI["a"])
    A = np.array(As)
    B = np.array(scales)
    C = np.array(conv)
    distribution = np.empty((A.shape[0], 6))
    distribution[:, 0] = A
    distribution[:, 1] = B
    distribution[:, 2:
                 6] = C  # distribution[shape scale c[0] c[1] c[2] c[3] ] pour chaque sb
예제 #29
0
class GaussianCopula(SDVModel):
    """Model wrapping ``copulas.multivariate.GaussianMultivariate`` copula.

    Args:
        distribution (copulas.univariate.Univariate or str):
            Copulas univariate distribution to use.

    Example:
        The example below shows simple usage case where a ``GaussianMultivariate``
        is being created and its ``fit`` and ``sample`` methods are being called.

        >>> model = GaussianMultivariate()
        >>> model.fit(pd.DataFrame({'a_field': list(range(10))}))
        >>> model.sample(5)
            a_field
        0  4.796559
        1  7.395329
        2  7.400417
        3  2.794212
        4  1.925887
    """

    DISTRIBUTION = GaussianUnivariate
    distribution = None
    model = None

    def __init__(self, distribution=None):
        self.distribution = distribution or self.DISTRIBUTION

    def fit(self, table_data):
        """Fit the model to the table.

        Impute the table data before fit the model.

        Args:
            table_data (pandas.DataFrame):
                Data to be fitted.
        """
        table_data = impute(table_data)
        self.model = GaussianMultivariate(distribution=self.distribution)
        self.model.fit(table_data)

    def sample(self, num_samples):
        """Sample ``num_samples`` rows from the model.

        Args:
            num_samples (int):
                Amount of rows to sample.

        Returns:
            pandas.DataFrame:
                Sampled data with the number of rows specified in ``num_samples``.
        """
        return self.model.sample(num_samples)

    def get_parameters(self):
        """Get copula model parameters.

        Compute model ``covariance`` and ``distribution.std``
        before it returns the flatten dict.

        Returns:
            dict:
                Copula flatten parameters.
        """
        values = list()
        triangle = np.tril(self.model.covariance)

        for index, row in enumerate(triangle.tolist()):
            values.append(row[:index + 1])

        self.model.covariance = np.array(values)
        params = self.model.to_dict()
        univariates = dict()
        for name, univariate in zip(params.pop('columns'),
                                    params['univariates']):
            univariates[name] = univariate
            if 'scale' in univariate:
                scale = univariate['scale']
                if scale == 0:
                    scale = EPSILON

                univariate['scale'] = np.log(scale)

        params['univariates'] = univariates

        return flatten_dict(params)

    def _prepare_sampled_covariance(self, covariance):
        """Prepare a covariance matrix.

        Args:
            covariance (list):
                covariance after unflattening model parameters.

        Result:
            list[list]:
                symmetric Positive semi-definite matrix.
        """
        covariance = np.array(square_matrix(covariance))
        covariance = (covariance + covariance.T -
                      (np.identity(covariance.shape[0]) * covariance))

        if not check_matrix_symmetric_positive_definite(covariance):
            covariance = make_positive_definite(covariance)

        return covariance.tolist()

    def _unflatten_gaussian_copula(self, model_parameters):
        """Prepare unflattened model params to recreate Gaussian Multivariate instance.

        The preparations consist basically in:

            - Transform sampled negative standard deviations from distributions into positive
              numbers

            - Ensure the covariance matrix is a valid symmetric positive-semidefinite matrix.

            - Add string parameters kept inside the class (as they can't be modelled),
              like ``distribution_type``.

        Args:
            model_parameters (dict):
                Sampled and reestructured model parameters.

        Returns:
            dict:
                Model parameters ready to recreate the model.
        """
        univariate_kwargs = {'type': model_parameters['distribution']}

        columns = list()
        univariates = list()
        for column, univariate in model_parameters['univariates'].items():
            columns.append(column)
            univariate.update(univariate_kwargs)
            univariate['scale'] = np.exp(univariate['scale'])
            univariates.append(univariate)

        model_parameters['univariates'] = univariates
        model_parameters['columns'] = columns

        covariance = model_parameters.get('covariance')
        model_parameters['covariance'] = self._prepare_sampled_covariance(
            covariance)

        return model_parameters

    def set_parameters(self, parameters):
        """Set copula model parameters.

        Add additional keys after unflatte the parameters
        in order to set expected parameters for the copula.

        Args:
            dict:
                Copula flatten parameters.
        """
        parameters = unflatten_dict(parameters)
        parameters.setdefault('fitted', True)
        parameters.setdefault('distribution', self.distribution)

        parameters = self._unflatten_gaussian_copula(parameters)

        self.model = GaussianMultivariate.from_dict(parameters)
예제 #30
0
from botocore import UNSIGNED
from botocore.client import Config
from scipy.stats import ks_2samp

from copulas import get_instance
from copulas.multivariate import GaussianMultivariate, VineCopula
from copulas.univariate import GaussianUnivariate

LOGGER = logging.getLogger(__name__)

BUCKET_NAME = 'atm-data'  # Bucket where the datasets are stored
DATA_URL = 'http://{}.s3.amazonaws.com/'.format(BUCKET_NAME)

AVAILABLE_MODELS = {
    'GaussianMultivariate(GaussianUnivariate)':
    GaussianMultivariate(GaussianUnivariate),
    'GaussianMultivariate()':
    GaussianMultivariate(),
    'VineCopula("center")':
    VineCopula('center'),
    'VineCopula("direct")':
    VineCopula('direct'),
    'VineCopula("regular")':
    VineCopula('regular')
}
OUTPUT_COLUMNS = [
    'model_name',
    'dataset_name',
    'num_columns',
    'num_rows',
    'elapsed_time',