def test_fit_sample(self): data = sample_trivariate_xyz() model = GaussianMultivariate() model.fit(data) for N in [10, 50, 100]: assert len(model.sample(N)) == N sampled_data = model.sample(10) assert sampled_data.shape == (10, 3) for column in data.columns: assert column in sampled_data
def test_cdf(self): data = sample_trivariate_xyz() model = GaussianMultivariate() model.fit(data) sampled_data = model.sample(10) # Test CDF cdf = model.cumulative_distribution(sampled_data) assert (0 <= cdf).all() and (cdf <= 1).all() # Test CDF increasing function for column in sampled_data.columns: sorted_data = sampled_data.sort_values(column) other_columns = data.columns.to_list() other_columns.remove(column) row = sorted_data.sample(1).iloc[0] for column in other_columns: sorted_data[column] = row[column] cdf = model.cumulative_distribution(sorted_data) diffs = np.diff( cdf ) + 0.001 # Add tolerance to avoid floating precision issues. assert (diffs >= 0).all()
def test_fit_sample_distribution_dict(self): data = sample_trivariate_xyz() model = GaussianMultivariate(distribution={'x': GaussianKDE()}) model.fit(data) sampled_data = model.sample(10) assert sampled_data.shape == (10, 3)
def fit_copula_to_z2_data(name=None, do_plot=False): """ Example of fitting a copula to z2 stream data :param name: stream name :return: copula obj """ name = name or 'z2~helicopter_psi~helicopter_theta~70.json' assert 'z2~' in name, "Expecting a bivariate stream" lagged_values = get_stream_lagged_values(name=name) normalized_points = [ mr.norminv(mr.from_zcurve(zvalue=z, dim=2)) for z in lagged_values ] npitch, nyaw = zip(*normalized_points) copula = GaussianMultivariate() X = np.array([npitch, nyaw]).transpose() copula.fit(X) synthetic_points = copula.sample(len(X)) spitch = synthetic_points[0] syaw = synthetic_points[1] if do_plot: plt.scatter(spitch, syaw) plt.xlabel('Simulated Pitch - normalized') plt.ylabel('Simulated Yaw - normalized') plt.show() return copula
def test_fit_sample_distribution_name(self): data = sample_trivariate_xyz() model = GaussianMultivariate( 'copulas.univariate.gaussian_kde.GaussianKDE') model.fit(data) sampled_data = model.sample(10) assert sampled_data.shape == (10, 3)
def test_gaussiankde_arguments(self): size = 1000 low = 0 high = 9 data = randint.rvs(low, high, size=size) + norm.rvs(0, 0.1, size=size) dist = GaussianMultivariate(distribution=GaussianKDE(bw_method=0.01)) dist.fit(data) samples = dist.sample(size).to_numpy()[0] d, p = ks_2samp(data, samples) assert p >= 0.05
def test_pdf(self): data = sample_trivariate_xyz() model = GaussianMultivariate() model.fit(data) sampled_data = model.sample(10) # Test PDF pdf = model.probability_density(sampled_data) assert (0 < pdf).all()
def test_fit_sample_distribution_dict_multiple(self): data = sample_trivariate_xyz() model = GaussianMultivariate( distribution={ 'x': Univariate(parametric=ParametricType.PARAMETRIC), 'y': BetaUnivariate(), 'z': GaussianKDE() }) model.fit(data) sampled_data = model.sample(10) assert sampled_data.shape == (10, 3)
def test_conditional_sampling(): condition = np.random.randint(1, 4, size=3000) conditioned = np.random.normal(loc=1, scale=1, size=3000) * condition data = pd.DataFrame({ 'a': condition, 'b': condition, 'c': conditioned, }) gm = GaussianMultivariate() gm.fit(data) sampled = gm.sample(3000, conditions={'b': 1}) np.testing.assert_allclose(sampled['a'].mean(), 1, atol=.5) np.testing.assert_allclose(sampled['b'].mean(), 1, atol=.5) np.testing.assert_allclose(sampled['c'].mean(), 1, atol=.5) sampled = gm.sample(3000, conditions={'a': 3, 'b': 3}) np.testing.assert_allclose(sampled['a'].mean(), 3, atol=.5) np.testing.assert_allclose(sampled['b'].mean(), 3, atol=.5) np.testing.assert_allclose(sampled['c'].mean(), 3, atol=.5)
def _gaussian(self, dataset): """ For the given dataset, this runs "everything but the kitchen sink" (i.e. every feature of GaussianMultivariate that is officially supported) and makes sure it doesn't crash. """ model = GaussianMultivariate({ dataset.columns[0]: GaussianKDE() # Use a KDE for the first column }) model.fit(dataset) for N in [10, 100, 50]: assert len(model.sample(N)) == N sampled_data = model.sample(10) pdf = model.probability_density(sampled_data) cdf = model.cumulative_distribution(sampled_data) # Test Save/Load from Dictionary config = model.to_dict() model2 = GaussianMultivariate.from_dict(config) for N in [10, 100, 50]: assert len(model2.sample(N)) == N pdf2 = model2.probability_density(sampled_data) cdf2 = model2.cumulative_distribution(sampled_data) assert np.all(np.isclose(pdf, pdf2, atol=0.01)) assert np.all(np.isclose(cdf, cdf2, atol=0.01)) path_to_model = os.path.join(self.test_dir.name, "model.pkl") model.save(path_to_model) model2 = GaussianMultivariate.load(path_to_model) for N in [10, 100, 50]: assert len(model2.sample(N)) == N pdf2 = model2.probability_density(sampled_data) cdf2 = model2.cumulative_distribution(sampled_data) assert np.all(np.isclose(pdf, pdf2, atol=0.01)) assert np.all(np.isclose(cdf, cdf2, atol=0.01))
def fit_and_sample(lagged_zvalues:[[float]],num:int, copula=None): """ Example of fitting a copula function, and sampling lagged_zvalues: [ [z1,z2,z3] ] distributed N(0,1) margins, roughly copula : Something from https://pypi.org/project/copulas/ returns: [ [z1, z2, z3] ] representative sample """ # Remark: It's lazy to just sample synthetic data # Some more evenly spaced sampling would be preferable. # See https://www.microprediction.com/blog/lottery for discussion df = pd.DataFrame(data=lagged_zvalues) if copula is None: copula = GaussianMultivariate() copula.fit(df) synthetic = copula.sample(num) return synthetic.values.tolist()
def test_to_dict_from_dict(self): data = sample_trivariate_xyz() model = GaussianMultivariate() model.fit(data) sampled_data = model.sample(10) params = model.to_dict() model2 = GaussianMultivariate.from_dict(params) pdf = model.probability_density(sampled_data) pdf2 = model2.probability_density(sampled_data) assert np.all(np.isclose(pdf, pdf2, atol=0.01)) cdf = model.cumulative_distribution(sampled_data) cdf2 = model2.cumulative_distribution(sampled_data) assert np.all(np.isclose(cdf, cdf2, atol=0.01))
def test_save_load(self): data = sample_trivariate_xyz() model = GaussianMultivariate() model.fit(data) sampled_data = model.sample(10) path_to_model = os.path.join(self.test_dir.name, "model.pkl") model.save(path_to_model) model2 = GaussianMultivariate.load(path_to_model) pdf = model.probability_density(sampled_data) pdf2 = model2.probability_density(sampled_data) assert np.all(np.isclose(pdf, pdf2, atol=0.01)) cdf = model.cumulative_distribution(sampled_data) cdf2 = model2.cumulative_distribution(sampled_data) assert np.all(np.isclose(cdf, cdf2, atol=0.01))
def get_errors_sample(errors, copula="Gumbel"): ''' Parameters ---------- errors : numpy array of shape (n,2) The errors to fit with a copula copula : string "Gumbel": fit with a Gumbel copula "Normal": fit with a Normal copula Returns synthetic : a new sample of errors obtain with the JPD of the errors ------- None. ''' ### Transforming the np.array to a dataframe df = pd.DataFrame(errors, columns=['res1', 'res2']) ### Transforming the errors series such that there is no 0 or 1 # because this is leading to problems when using the copula df = pd.DataFrame(np.where(df == 0, 0.00000001, np.where(df == 1, 0.99999999, df)), columns=df.columns) scaler = MinMaxScaler() df = pd.DataFrame(scaler.fit_transform(df.values), columns=df.columns) ### Selecting afor fiting the errors series if copula == "Gumbel": c = gumbel.Gumbel() elif copula == "Normal": c = GaussianMultivariate() ### Fiting the copula and getting the parameters c.fit(df.values) #copulas_parameters=c.to_dict() ### Generating a sample from the copula synthetic = c.sample(len(df)) synthetic = scaler.inverse_transform(synthetic) return synthetic
def copula_based(X,Y): """ Calculate joint PDF/CDF using copula """ import pandas as pd from copulas.multivariate import GaussianMultivariate # fit gaussian copula data=pd.DataFrame(list(zip(X,Y)),columns=['P','T']) dist=GaussianMultivariate() dist.fit(data) sampled=dist.sample(1) sampled.at[0,'P']=np.mean(X) sampled.at[0,'T']=np.mean(Y) # find pdf/cdf at mean value pdf=dist.pdf(sampled) cdf=dist.cumulative_distribution(sampled) return [pdf,cdf]
def fit_and_sample(lagged_zvalues: [[float]], num: int, copula=None): """ Example of creating a "sample" of future values lagged_zvalues: [ [z1,z2,z3] ] distributed N(0,1) margins, roughly copula : Something from https://pypi.org/project/copulas/ returns: [ [z1, z2, z3] ] representative sample Swap out this function for whatever you like. """ # Remark 1: It's lazy to just sample synthetic data # Remark 2: Any multivariate density estimation could go here. # Remark 3: If you prefer uniform margin, use mw.get_lagged_copulas(name=name, count= 5000) # # See https://www.microprediction.com/blog/lottery for discussion of this "game" df = pd.DataFrame(data=lagged_zvalues) if copula is None: copula = GaussianMultivariate() # <--- copula.fit(df) synthetic = copula.sample(num) return synthetic.values.tolist()
def testMITCopulas(): import warnings warnings.filterwarnings('ignore') from copulas.datasets import sample_trivariate_xyz from copulas.multivariate import GaussianMultivariate from copulas.visualization import compare_3d # Load a dataset with 3 columns that are not independent real_data = sample_trivariate_xyz() # Fit a gaussian copula to the data copula = GaussianMultivariate() copula.fit(real_data) # Sample synthetic data synthetic_data = copula.sample(len(real_data)) # Plot the real and the synthetic data to compare compare_3d(real_data, synthetic_data) return True
def test_cdf(self): data = sample_trivariate_xyz() model = GaussianMultivariate() model.fit(data) sampled_data = model.sample(10) # Test CDF cdf = model.cumulative_distribution(sampled_data) assert (0 < cdf).all() and (cdf < 1).all() # Test CDF increasing function for column in sampled_data.columns: sorted_data = sampled_data.sort_values(column) other_columns = data.columns.to_list() other_columns.remove(column) row = sorted_data.sample(1).iloc[0] for column in other_columns: sorted_data[column] = row[column] cdf = model.cumulative_distribution(sorted_data) assert (np.diff(cdf) >= 0).all()
def fit_and_sample(lagged_zvalues: [[float]], num: int, copula=None): """ Example of fitting a copula function, and sampling lagged_zvalues: [ [z1,z2,z3] ] distributed N(0,1) margins, roughly copula : Something from https://pypi.org/project/copulas/ returns: [ [z1, z2, z3] ] representative sample """ # This is the part you'll want to change. # Remark 1: It's lazy to just sample synthetic data # Some more evenly spaced sampling would be preferable. # Remark 2: Any multivariate density estimation could go here. # Remark 3: If you want to literally fit to a Copula (i.e. roughly uniform margins) # then you might want to use mw.get_lagged_copulas(name=name, count= 5000) instead # # See https://www.microprediction.com/blog/lottery for discussion of why evenly # spaced samples are likely to serve you better. df = pd.DataFrame(data=lagged_zvalues) if copula is None: copula = GaussianMultivariate() # <--- copula.fit(df) synthetic = copula.sample(num) return synthetic.values.tolist()
class GaussianCopula(SDVModel): """Model wrapping ``copulas.multivariate.GaussianMultivariate`` copula. Args: distribution (copulas.univariate.Univariate or str): Copulas univariate distribution to use. Example: The example below shows simple usage case where a ``GaussianMultivariate`` is being created and its ``fit`` and ``sample`` methods are being called. >>> model = GaussianMultivariate() >>> model.fit(pd.DataFrame({'a_field': list(range(10))})) >>> model.sample(5) a_field 0 4.796559 1 7.395329 2 7.400417 3 2.794212 4 1.925887 """ DISTRIBUTION = GaussianUnivariate distribution = None model = None def __init__(self, distribution=None): self.distribution = distribution or self.DISTRIBUTION def fit(self, table_data): """Fit the model to the table. Impute the table data before fit the model. Args: table_data (pandas.DataFrame): Data to be fitted. """ table_data = impute(table_data) self.model = GaussianMultivariate(distribution=self.distribution) self.model.fit(table_data) def sample(self, num_samples): """Sample ``num_samples`` rows from the model. Args: num_samples (int): Amount of rows to sample. Returns: pandas.DataFrame: Sampled data with the number of rows specified in ``num_samples``. """ return self.model.sample(num_samples) def get_parameters(self): """Get copula model parameters. Compute model ``covariance`` and ``distribution.std`` before it returns the flatten dict. Returns: dict: Copula flatten parameters. """ values = list() triangle = np.tril(self.model.covariance) for index, row in enumerate(triangle.tolist()): values.append(row[:index + 1]) self.model.covariance = np.array(values) params = self.model.to_dict() univariates = dict() for name, univariate in zip(params.pop('columns'), params['univariates']): univariates[name] = univariate if 'scale' in univariate: scale = univariate['scale'] if scale == 0: scale = EPSILON univariate['scale'] = np.log(scale) params['univariates'] = univariates return flatten_dict(params) def _prepare_sampled_covariance(self, covariance): """Prepare a covariance matrix. Args: covariance (list): covariance after unflattening model parameters. Result: list[list]: symmetric Positive semi-definite matrix. """ covariance = np.array(square_matrix(covariance)) covariance = (covariance + covariance.T - (np.identity(covariance.shape[0]) * covariance)) if not check_matrix_symmetric_positive_definite(covariance): covariance = make_positive_definite(covariance) return covariance.tolist() def _unflatten_gaussian_copula(self, model_parameters): """Prepare unflattened model params to recreate Gaussian Multivariate instance. The preparations consist basically in: - Transform sampled negative standard deviations from distributions into positive numbers - Ensure the covariance matrix is a valid symmetric positive-semidefinite matrix. - Add string parameters kept inside the class (as they can't be modelled), like ``distribution_type``. Args: model_parameters (dict): Sampled and reestructured model parameters. Returns: dict: Model parameters ready to recreate the model. """ univariate_kwargs = {'type': model_parameters['distribution']} columns = list() univariates = list() for column, univariate in model_parameters['univariates'].items(): columns.append(column) univariate.update(univariate_kwargs) univariate['scale'] = np.exp(univariate['scale']) univariates.append(univariate) model_parameters['univariates'] = univariates model_parameters['columns'] = columns covariance = model_parameters.get('covariance') model_parameters['covariance'] = self._prepare_sampled_covariance( covariance) return model_parameters def set_parameters(self, parameters): """Set copula model parameters. Add additional keys after unflatte the parameters in order to set expected parameters for the copula. Args: dict: Copula flatten parameters. """ parameters = unflatten_dict(parameters) parameters.setdefault('fitted', True) parameters.setdefault('distribution', self.distribution) parameters = self._unflatten_gaussian_copula(parameters) self.model = GaussianMultivariate.from_dict(parameters)
df_uniform = uniformise_normal_data(df0) pd.DataFrame(df_uniform[:,0]).hist() test = np.random.normal(size=600) dd = pd.DataFrame({'t' : test , 'a':test }) pd.Series(test).hist() c = gumbel.Gumbel() c.fit(dd) c2 = GaussianMultivariate() c2.fit(df0) c2.sample(len(df0)) pd.Series(c.probability_density(df.values)).hist() pd.Series(c.cumulative_distribution(df.values)).hist() c.partial_derivative(df.values) c.compute_theta() synthetic = c.sample(len(df)) h = sns.jointplot(df.iloc[:, 0], df.iloc[:, 1], kind='kde', stat_func=None)
cross_indices, cross_clock_genes, cross_scores = FourierClock.cross_corr( X_data, Y_copy, X_ID) cross_scores = np.abs(np.array(cross_scores)) scores = np.concatenate((auto_scores.reshape(-1, 1), cross_scores.reshape( -1, 1), arser_scores['fdr_BH'].values.reshape( -1, 1), jtk_scores['ADJ.P'].values.reshape(-1, 1)), axis=1) scores[:, 2:] = 1 - scores[:, 2:] num_resamples = 1000 # Change to 50,000/100,000 gcopula = GaussianMultivariate() gcopula.fit(scores) random_sample = gcopula.sample(num_resamples) sample_scores = pd.DataFrame(random_sample) mean = np.mean(sample_scores.values, axis=0) covariance = np.cov(sample_scores.T) dist = mvn(mean=mean, cov=covariance, allow_singular=True) gene_scores = [] for i in range(scores.shape[0]): gene_scores.append(dist.cdf(x=scores[i, :])) gene_scores = np.array(gene_scores) gene_scores = np.concatenate( (arser_scores['CycID'].values.reshape(-1, 1), gene_scores.reshape(-1, 1)), axis=1) gene_scores = gene_scores[gene_scores[:, 1].argsort()[::-1]]
#!/usr/bin/env python ''' Given a tabular dataset, fit a copula to it. ''' import matplotlib.pyplot as plt import pandas as pd from copulas.multivariate import GaussianMultivariate from copulas.visualization import compare_3d df = pd.read_csv('samples.csv') cols = ['x1', 'x2', 'x3'] copula = GaussianMultivariate() copula.fit(df[cols]) # generate synthetic data from our fit sd = copula.sample(df.shape[0]) compare_3d(df[cols], sd) plt.show()
g.map_offdiag(sns.scatterplot) var_sp = rst_sp.forecast().variance.dropna().values[0] var_tn = rst_tn.forecast().variance.dropna().values[0] vol_sp = np.sqrt(var_sp) vol_tn = np.sqrt(var_tn) vol = np.array([vol_sp, vol_tn]) n = 10000 ## copulas package gaus_cop1 = GaussianMultivariate() gaus_cop1.fit(filtered_returns) print(gaus_cop1.covariance) samples1 = gaus_cop1.sample(n).values scale_samples1 = samples1 * vol.T sim_returns1 = 0.5 * (scale_samples1[:, 0] + scale_samples1[:, 1]) sorted_returns1 = np.sort(sim_returns1) sns.distplot(sorted_returns1) var = stats.scoreatpercentile(sorted_returns1, 1) es = np.mean(sorted_returns1[:100])
class Sample: def __init__(self, load, mode, pv_connection, ev_connection, ev_max_connection, pld_pred): self.load = pd.DataFrame(load) self.mode = mode self.copula = GaussianMultivariate() self.pv_connection = pv_connection self.ev_connection = ev_connection self.ev_max_connection = ev_max_connection self.pld_pred = pld_pred def get_load_sample_nrtp(self, bus, pv_curve, ev_curve): load = {} if self.mode == 0: for n in range(bus): load[n] = np.random.normal(self.load.iloc[:, n], SD_LOAD) # distribuição normal [Morshed, 2018] [Unidade: kWh] elif self.mode == 1: for n in range(bus): load_aux = np.random.normal(self.load.iloc[:, n], SD_LOAD) load[n] = [float(item) for item in load_aux] df = pd.DataFrame.from_dict(load) self.copula.fit(df) load = self.copula.sample(len(df)) elif self.mode == 2: load_aux = {} pv_total_curve = {0: pv_curve[0], 1: pv_curve[1], 2: pv_curve[2], 3: pv_curve[2], 4: pv_curve[3], 5: pv_curve[4], 6: pv_curve[5], 7: pv_curve[5], 8: pv_curve[5], 9: pv_curve[6], 10: pv_curve[7], 11: pv_curve[7], 12: pv_curve[7], 13: pv_curve[7], 14: pv_curve[8], 15: pv_curve[8], 16: pv_curve[8], 17: pv_curve[9], 18: pv_curve[9], 19: pv_curve[10], 20: pv_curve[11], 21: pv_curve[12], 22: pv_curve[13]} pv_number = 0 for n in range(bus): load[n] = np.random.normal(self.load.iloc[:, n], SD_LOAD) if n in self.pv_connection: load_aux[0] = [float(item) for item in load[n]] pv_aux = pv_total_curve[pv_number] load_aux[1] = [float(item) for item in pv_aux] df = pd.DataFrame.from_dict(load_aux) self.copula.fit(df) load_sample = self.copula.sample(len(df)) load[n] = load_sample[0] pv_number += 1 elif self.mode == 3: load_aux = {} ev_total_curve = {0: ev_curve[0], 1: ev_curve[0], 2: ev_curve[0], 3: ev_curve[1], 4: ev_curve[1], 5: ev_curve[1], 6: ev_curve[1], 7: ev_curve[2], 8: ev_curve[2], 9: ev_curve[3], 10: ev_curve[3], 11: ev_curve[3], 12: ev_curve[3], 13: ev_curve[3], 14: ev_curve[3], 15: ev_curve[3], 16: ev_curve[4]} ev_number = 0 for n in range(bus): load[n] = np.random.normal(self.load.iloc[:, n], SD_LOAD) if n in self.ev_connection: load_aux[0] = [float(item) for item in load[n]] ev_aux = ev_total_curve[ev_number] load_aux[1] = [float(item) for item in ev_aux] df = pd.DataFrame.from_dict(load_aux) self.copula.fit(df) load_sample = self.copula.sample(len(df)) load[n] = load_sample[0] ev_number += 1 elif self.mode == 4: load_aux = {} pv_total_curve = {0: pv_curve[0], 1: pv_curve[1], 2: pv_curve[2], 3: pv_curve[2], 4: pv_curve[3], 5: pv_curve[4], 6: pv_curve[5], 7: pv_curve[5], 8: pv_curve[5], 9: pv_curve[6], 10: pv_curve[7], 11: pv_curve[7], 12: pv_curve[7], 13: pv_curve[7], 14: pv_curve[8], 15: pv_curve[8], 16: pv_curve[8], 17: pv_curve[9], 18: pv_curve[9], 19: pv_curve[10], 20: pv_curve[11], 21: pv_curve[12], 22: pv_curve[13]} ev_total_curve = {0: ev_curve[0], 1: ev_curve[0], 2: ev_curve[0], 3: ev_curve[1], 4: ev_curve[1], 5: ev_curve[1], 6: ev_curve[1], 7: ev_curve[2], 8: ev_curve[2], 9: ev_curve[3], 10: ev_curve[3], 11: ev_curve[3], 12: ev_curve[3], 13: ev_curve[3], 14: ev_curve[3], 15: ev_curve[3], 16: ev_curve[4]} pv_number = 0 ev_number = 0 for n in range(bus): load[n] = np.random.normal(self.load.iloc[:, n], SD_LOAD) if (n in self.pv_connection) and (n in self.ev_connection): load_aux[0] = [float(item) for item in load[n]] pv_aux = pv_total_curve[pv_number] load_aux[1] = [float(item) for item in pv_aux] ev_aux = ev_total_curve[ev_number] load_aux[2] = [float(item) for item in ev_aux] df = pd.DataFrame.from_dict(load_aux) self.copula.fit(df) load_sample = self.copula.sample(len(df)) load[n] = load_sample[0] pv_number += 1 ev_number += 1 elif n in self.pv_connection: load_aux[0] = [float(item) for item in load[n]] pv_aux = pv_total_curve[pv_number] load_aux[1] = [float(item) for item in pv_aux] df = pd.DataFrame.from_dict(load_aux) self.copula.fit(df) load_sample = self.copula.sample(len(df)) load[n] = load_sample[0] pv_number += 1 elif n in self.ev_connection: load_aux[0] = [float(item) for item in load[n]] ev_aux = ev_total_curve[ev_number] load_aux[1] = [float(item) for item in ev_aux] df = pd.DataFrame.from_dict(load_aux) self.copula.fit(df) load_sample = self.copula.sample(len(df)) load[n] = load_sample[0] ev_number += 1 return load def get_load_sample_rtp(self, bus, pv_curve, ev_curve): load = {} load_aux = {} load_aux[0] = [float(-item) for item in self.pld_pred] if self.mode == 0: for n in range(bus): load[n] = np.random.normal(self.load.iloc[:, n], SD_LOAD) # distribuição normal [Morshed, 2018] [Unidade: kWh] load_aux[1] = [float(item) for item in load[n]] df = pd.DataFrame.from_dict(load_aux) self.copula.fit(df) load_sample = self.copula.sample(len(df)) load[n] = load_sample[1] elif self.mode == 1: for n in range(bus): load[n] = np.random.normal(self.load.iloc[:, n], SD_LOAD) load_aux[1] = [float(item) for item in load[n]] df = pd.DataFrame.from_dict(load_aux) self.copula.fit(df) load_sample = self.copula.sample(len(df)) load[n] = load_sample[1] df = pd.DataFrame.from_dict(load) self.copula.fit(df) load = self.copula.sample(len(df)) elif self.mode == 2: pv_total_curve = {0: pv_curve[0], 1: pv_curve[1], 2: pv_curve[2], 3: pv_curve[2], 4: pv_curve[3], 5: pv_curve[4], 6: pv_curve[5], 7: pv_curve[5], 8: pv_curve[5], 9: pv_curve[6], 10: pv_curve[7], 11: pv_curve[7], 12: pv_curve[7], 13: pv_curve[7], 14: pv_curve[8], 15: pv_curve[8], 16: pv_curve[8], 17: pv_curve[9], 18: pv_curve[9], 19: pv_curve[10], 20: pv_curve[11], 21: pv_curve[12], 22: pv_curve[13]} pv_number = 0 for n in range(bus): load[n] = np.random.normal(self.load.iloc[:, n], SD_LOAD) if n in self.pv_connection: load_aux[1] = [float(item) for item in load[n]] pv_aux = pv_total_curve[pv_number] load_aux[2] = [float(item) for item in pv_aux] df = pd.DataFrame.from_dict(load_aux) self.copula.fit(df) load_sample = self.copula.sample(len(df)) load[n] = load_sample[1] pv_number += 1 elif self.mode == 3: ev_total_curve = {0: ev_curve[0], 1: ev_curve[0], 2: ev_curve[0], 3: ev_curve[1], 4: ev_curve[1], 5: ev_curve[1], 6: ev_curve[1], 7: ev_curve[2], 8: ev_curve[2], 9: ev_curve[3], 10: ev_curve[3], 11: ev_curve[3], 12: ev_curve[3], 13: ev_curve[3], 14: ev_curve[3], 15: ev_curve[3], 16: ev_curve[4]} ev_number = 0 for n in range(bus): load[n] = np.random.normal(self.load.iloc[:, n], SD_LOAD) if n in self.ev_connection: load_aux[1] = [float(item) for item in load[n]] ev_aux = ev_total_curve[ev_number] load_aux[2] = [float(item) for item in ev_aux] df = pd.DataFrame.from_dict(load_aux) self.copula.fit(df) load_sample = self.copula.sample(len(df)) load[n] = load_sample[1] ev_number += 1 elif self.mode == 4: pv_total_curve = {0: pv_curve[0], 1: pv_curve[1], 2: pv_curve[2], 3: pv_curve[2], 4: pv_curve[3], 5: pv_curve[4], 6: pv_curve[5], 7: pv_curve[5], 8: pv_curve[5], 9: pv_curve[6], 10: pv_curve[7], 11: pv_curve[7], 12: pv_curve[7], 13: pv_curve[7], 14: pv_curve[8], 15: pv_curve[8], 16: pv_curve[8], 17: pv_curve[9], 18: pv_curve[9], 19: pv_curve[10], 20: pv_curve[11], 21: pv_curve[12], 22: pv_curve[13]} ev_total_curve = {0: ev_curve[0], 1: ev_curve[0], 2: ev_curve[0], 3: ev_curve[1], 4: ev_curve[1], 5: ev_curve[1], 6: ev_curve[1], 7: ev_curve[2], 8: ev_curve[2], 9: ev_curve[3], 10: ev_curve[3], 11: ev_curve[3], 12: ev_curve[3], 13: ev_curve[3], 14: ev_curve[3], 15: ev_curve[3], 16: ev_curve[4]} pv_number = 0 ev_number = 0 for n in range(bus): load[n] = np.random.normal(self.load.iloc[:, n], SD_LOAD) if (n in self.pv_connection) and (n in self.ev_connection): load_aux[1] = [float(item) for item in load[n]] pv_aux = pv_total_curve[pv_number] load_aux[2] = [float(item) for item in pv_aux] ev_aux = ev_total_curve[ev_number] load_aux[3] = [float(item) for item in ev_aux] df = pd.DataFrame.from_dict(load_aux) self.copula.fit(df) load_sample = self.copula.sample(len(df)) load[n] = load_sample[1] pv_number += 1 ev_number += 1 elif n in self.pv_connection: load_aux[1] = [float(item) for item in load[n]] pv_aux = pv_total_curve[pv_number] load_aux[2] = [float(item) for item in pv_aux] df = pd.DataFrame.from_dict(load_aux) self.copula.fit(df) load_sample = self.copula.sample(len(df)) load[n] = load_sample[1] pv_number += 1 elif n in self.ev_connection: load_aux[1] = [float(item) for item in load[n]] ev_aux = ev_total_curve[ev_number] load_aux[2] = [float(item) for item in ev_aux] df = pd.DataFrame.from_dict(load_aux) self.copula.fit(df) load_sample = self.copula.sample(len(df)) load[n] = load_sample[1] ev_number += 1 return load # FUNÇÃO DE POTÊCIA GERADA DA PV def get_pv_sample(self, bus): pv_sample = {} for i in range(bus): # Função de distribuição de probabilidade da radiação solar radiation = ss.beta.pdf(np.linspace(0, 1, 24), ALFA_PV, BETA_PV, 0, 1) # beta [Yaotang, 2016] radiation = radiation * R_FACTOR # Configurando a curva da potência gerada kW pv = [0] * 24 for n in range(np.size(pv)): if 0 <= radiation[n] < R_CERTAIN_POINT: pv[n] = PV_POWER_GENERATION * (radiation[n] ** 2 / (R_CERTAIN_POINT * R_STANDARD_CONDITION)) elif R_CERTAIN_POINT <= radiation[n] < R_STANDARD_CONDITION: pv[n] = PV_POWER_GENERATION * (radiation[n] / R_STANDARD_CONDITION) elif radiation[n] >= R_STANDARD_CONDITION: pv[n] = PV_POWER_GENERATION pv_sample[i] = pv return pv_sample # CONFIGURANDO AMOSTRA EV def get_ev_sample(self, bus, mode): ev_curve = {} ev_curve_aux = [0] * 24 ev_power = {} ev_power_aux = 0 ev_incoming = [0] * bus ev_t_duration = [] for bus_i in range(bus): # Quantidade de EV ev_incoming[bus_i] = np.random.randint(int(self.ev_max_connection[bus_i] / 3), self.ev_max_connection[bus_i]) for ev_i in range(ev_incoming[bus_i]): # SOC do veículo elétrico soc_init, soc_min, soc_hini = get_ev_soc() # Estimando tempos do carregamento t_duration_charge = 0 while t_duration_charge <= 0: choice = np.random.randint(1, 4) if choice == 1: t_duration_charge = np.random.randint(1, 6) elif choice == 2: t_duration_charge = round(np.random.normal(3, 0.50)) else: t_duration_charge = round(np.random.normal(6, 0.75)) ev_t_duration.append(t_duration_charge) t_start_charge = int(np.random.normal(MU_EV_HOUR_ARRIVE, SD_EV_HOUR)) while t_start_charge > 24: t_start_charge = int(np.random.normal(MU_EV_HOUR_ARRIVE, SD_EV_HOUR)) # Construindo curva curve = [0] * (t_start_charge - 1) curve.extend([1] * t_duration_charge) if len(curve) < 24: curve.extend([0] * (24 - len(curve))) else: curve_aux = curve[24:] n = len(curve_aux) for i in range(n): curve[i] = curve_aux[i] ev_curve_aux = ev_curve_aux + np.asarray(curve[0:24]) # Energia do carro energy = (soc_init - soc_hini) * EV_BATTERY_CAPACITY ev_power_aux = ev_power_aux + energy / t_duration_charge if mode == 1: ev_curve_aux = [-item for item in ev_curve_aux] charge_time = np.zeros(8) discharge_time = np.random.randint(5, 15, 8) discharge_curve = np.concatenate((charge_time, discharge_time)) discharge_curve = np.concatenate((discharge_curve, charge_time)) ev_curve_aux = ev_curve_aux + discharge_curve ev_curve[bus_i] = ev_curve_aux ev_power[bus_i] = ev_power_aux return ev_curve, ev_power, ev_incoming, ev_t_duration, EV_BATTERY_CAPACITY