def plot_mean_clusters(tabclu_o): tabclu = tabclu_o.copy() columns = list(tabclu.columns) columns.pop() columns.append('cluster') tabclu.columns = columns clusters = sorted(list(tabclu.cluster.value_counts().index.array)) sns.set_style("darkgrid") fig, ax = plt.subplots(figsize=(9, 7)) for cluster in clusters: tab_c = tabclu[tabclu.cluster == cluster].copy() tab_c.drop(columns=['cluster', 'Cliente'], inplace=True) fda_tab_c = skfda.FDataGrid(tab_c) mean_periodos = skfda.exploratory.stats.mean(fda_tab_c) lab = 'Cluster ' + str(cluster) ax.plot(mean_periodos.data_matrix[0, :, 0], '-o', label=lab) tabclu.drop(columns=['cluster', 'Cliente'], inplace=True) fda_all = skfda.FDataGrid(tabclu) mean_all = skfda.exploratory.stats.mean(fda_all) ax.plot(mean_all.data_matrix[0, :, 0], '--o', label='All data') dates = [str(x) for x in tabclu.columns] ax.set_xticks(np.arange(len(dates))) ax.set_xticklabels(dates, rotation=90) plt.title('Comportamiento Volumenes') plt.legend() plt.show()
def test_several_variables(self): def f(x, y, z): return x * y * z t = np.linspace(0, 1, 100) x2, y2, z2 = ndm(t, 2 * t, 3 * t) data_matrix = f(x2, y2, z2) sample_points = [t, 2 * t, 3 * t] fd = skfda.FDataGrid(data_matrix[np.newaxis, ...], sample_points=sample_points) basis = Tensor([ Monomial(n_basis=5, domain_range=(0, 1)), Monomial(n_basis=5, domain_range=(0, 2)), Monomial(n_basis=5, domain_range=(0, 3)) ]) fd_basis = fd.to_basis(basis) res = 8 np.testing.assert_allclose(skfda.misc.inner_product(fd, fd), res, rtol=1e-5) np.testing.assert_allclose(skfda.misc.inner_product( fd_basis, fd_basis), res, rtol=1e-5)
def test_vector_valued(self): def f(x): return x**2 def g(y): return 3 * y t = np.linspace(0, 1, 100) data_matrix = np.array([np.array([f(t), g(t)]).T]) sample_points = [t] fd = skfda.FDataGrid(data_matrix, sample_points=sample_points) basis = VectorValued([Monomial(n_basis=5), Monomial(n_basis=5)]) fd_basis = fd.to_basis(basis) res = 1 / 5 + 3 np.testing.assert_allclose(skfda.misc.inner_product(fd, fd), res, rtol=1e-5) np.testing.assert_allclose(skfda.misc.inner_product( fd_basis, fd_basis), res, rtol=1e-5)
def data_for_twos(): """Length-100 array in which all the elements are two.""" data_matrix = np.full(100 * 10 * 10 * 3, fill_value=2).reshape(100, 10, 10, 3) grid_points = [np.arange(10), np.arange(10) / 10] return skfda.FDataGrid(data_matrix, grid_points=grid_points)
def data_missing(): """Length-2 array with [NA, Valid]""" data_matrix = np.arange(2 * 10 * 10 * 3, dtype=np.float_).reshape(2, 10, 10, 3) data_matrix[0, ...] = np.NaN grid_points = [np.arange(10), np.arange(10) / 10] return skfda.FDataGrid(data_matrix, grid_points=grid_points)
def plot_inertias(tabla): list_inertias = [] num_clusters_list = np.arange(2, 15) for num_clusters in num_clusters_list: data_fda = skfda.FDataGrid(tabla.copy()) kmeans = skfda.ml.clustering.KMeans(n_clusters=num_clusters) kmeans.fit(data_fda) list_inertias.append(kmeans.inertia_) plt.plot(num_clusters_list, list_inertias, '*-')
def cluster_fda(table, num_clusters, marca): data_fda = skfda.FDataGrid(table) kmeans = skfda.ml.clustering.KMeans(n_clusters=num_clusters) kmeans.fit(data_fda) clusters_df = pd.DataFrame(kmeans.labels_, index=table.index) table_cluster = table.merge(clusters_df, how='inner', right_index=True, left_index=True) table_cluster.rename(columns={0: f'cluster_{marca}'}, inplace=True) return table_cluster
def data(): """ Length-100 array for this type. * data[0] and data[1] should both be non missing * data[0] and data[1] should not be equal """ data_matrix = np.arange(1, 100 * 10 * 10 * 3 + 1).reshape(100, 10, 10, 3) grid_points = [np.arange(10), np.arange(10) / 10] return skfda.FDataGrid(data_matrix, grid_points=grid_points)
def build_dataset(d): X = [] y = [] for i, (_, value) in enumerate(d.items()): X.append(value) y.append(np.full(shape=value.shape[0], fill_value=i)) data_matrix = np.vstack([x.data_matrix for x in X]) X = skfda.FDataGrid(data_matrix=data_matrix, sample_points=X[0].sample_points) y = np.concatenate(y) return X, y
def test_basis_conversion(self): data_matrix = np.linspace([0, 1, 2, 3], [1, 2, 3, 4], 100) fd = skfda.FDataGrid(data_matrix.T) smoother = skfda.preprocessing.smoothing.BasisSmoother( basis=skfda.representation.basis.BSpline( n_basis=10, domain_range=fd.domain_range), regularization=TikhonovRegularization( lambda x: x(1)[:, 0] - x(0)[:, 0]), smoothing_parameter=10000) fd_basis = smoother.fit_transform(fd) np.testing.assert_allclose(fd_basis(0), fd_basis(1), atol=0.001)
def split_data(data, max_pow): first_key, *_ = data # We will select segments of data with a power of two plus one # number of points and discard the remaining data n_points_segment = 2**max_pow + 1 n_segments = data[first_key].shape[0] // n_points_segment dict_subseries = {} for key, value in data.items(): subseries = np.split(value[:n_points_segment * n_segments], n_segments) subseries = np.array(subseries) dict_subseries[key] = skfda.FDataGrid(data_matrix=subseries, sample_points=range( subseries.shape[1])) return dict_subseries
def _test_generic(self, estimator_class): loo_scorer = validation.LinearSmootherLeaveOneOutScorer() loo_scorer_alt = _LinearSmootherLeaveOneOutScorerAlternative() x = np.linspace(-2, 2, 5) fd = skfda.FDataGrid(x**2, x) estimator = estimator_class() grid = validation.SmoothingParameterSearch(estimator, [2, 3], scoring=loo_scorer) grid.fit(fd) score = np.array(grid.cv_results_['mean_test_score']) grid_alt = validation.SmoothingParameterSearch(estimator, [2, 3], scoring=loo_scorer_alt) grid_alt.fit(fd) score_alt = np.array(grid_alt.cv_results_['mean_test_score']) np.testing.assert_array_almost_equal(score, score_alt)
# In :ref:`sphx_glr_auto_examples_plot_landmark_registration.py` it is shown # the simplest case, where it is used to apply a transformation of the time # scale of unidimensional data to register its features. # # The following example shows the basic usage applied to a surface and a # curve, although the method will work for data with arbitrary dimensions to. # # Firstly we will create a data object containing a surface # :math:`g: \mathbb{R}^2 \rightarrow \mathbb{R}`. # # Constructs example surface X, Y, Z = axes3d.get_test_data(1.2) data_matrix = [Z.T] grid_points = [X[0, :], Y[:, 0]] g = skfda.FDataGrid(data_matrix, grid_points) # Sets cubic interpolation g.interpolation = skfda.representation.interpolation.SplineInterpolation( interpolation_order=3) # Plots the surface g.plot() ############################################################################## # We will create a parametric curve # :math:`f(t)=(10 \, \cos(t), 10 \, sin(t))`. The result of the composition, # :math:`g \circ f:\mathbb{R} \rightarrow \mathbb{R}` will be another # functional object with the values of :math:`g` along the path given by # :math:`f`. #
# In :ref:`sphx_glr_auto_examples_plot_landmark_registration.py` it is shown # the simplest case, where it is used to apply a transformation of the time # scale of unidimensional data to register its features. # # The following example shows the basic usage applied to a surface and a # curve, although the method will work for data with arbitrary dimensions to. # # Firstly we will create a data object containing a surface # :math:`g: \mathbb{R}^2 \rightarrow \mathbb{R}`. # # Constructs example surface X, Y, Z = axes3d.get_test_data(1.2) data_matrix = [Z.T] sample_points = [X[0, :], Y[:, 0]] g = skfda.FDataGrid(data_matrix, sample_points) # Sets cubic interpolation g.interpolator = skfda.representation.interpolation.SplineInterpolator( interpolation_order=3) # Plots the surface g.plot() ############################################################################## # We will create a parametric curve # :math:`f(t)=(10 \, \cos(t), 10 \, sin(t))`. The result of the composition, # :math:`g \circ f:\mathbb{R} \rightarrow \mathbb{R}` will be another # functional object with the values of :math:`g` along the path given by # :math:`f`. #
############################################################################## # # All the extrapolators shown will work with multidimensional objects. # In the following example it is constructed a 2d-surface and it is extended # using periodic extrapolation. fig = plt.figure() ax = fig.add_subplot(111, projection='3d') # Make data. t = np.arange(-2.5, 2.75, 0.25) X, Y = np.meshgrid(t, t) Z = np.exp(-0.5 * (X**2 + Y**2)) # Creation of FDataGrid fd_surface = skfda.FDataGrid([Z], (t, t)) t = np.arange(-7, 7.5, 0.5) # Evaluation with periodic extrapolation values = fd_surface((t, t), grid=True, extrapolation="periodic") T, S = np.meshgrid(t, t) ax.plot_wireframe(T, S, values[0, ..., 0], alpha=.3, color="C0") ax.plot_surface(X, Y, Z, color="C0") ############################################################################### # # The previous extension can be compared with the extrapolation using the values # of the bounds.
fd[10].scatter(s=0.5) nw['fdatagrid'][10].plot(c='g') ############################################################################### # Now, we can see the effects of a proper smoothing. We can plot the same 5 # samples from the beginning using the Nadaraya-Watson kernel smoother with # the best choice of parameter. plt.figure(4) nw['fdatagrid'][0:5].plot() ############################################################################### # We can also appreciate the effects of undersmoothing and oversmoothing in # the following plots. fd_us = skfda.FDataGrid( ks.nw(fd.sample_points, h=2).dot(fd.data_matrix[10, ..., 0]), fd.sample_points, fd.sample_range, fd.dataset_label, fd.axes_labels) fd_os = skfda.FDataGrid( ks.nw(fd.sample_points, h=15).dot(fd.data_matrix[10, ..., 0]), fd.sample_points, fd.sample_range, fd.dataset_label, fd.axes_labels) # Under-smoothed fd[10].scatter(s=0.5) fd_us.plot(c='sandybrown') # Over-smoothed plt.figure() fd[10].scatter(s=0.5) fd_os.plot(c='r')
plt.tight_layout() t = np.linspace(0, 1, 200) time = np.copy(t) eps = .04 a = .4 m = 11.9 idx1 = t < a idx3 = t > a + eps idx2 = np.logical_not(np.logical_or(idx1, idx3)) t[idx2] = m * (t[idx2] - a) + a t[idx3] = np.linspace(t[idx2][-1], 1, idx3.sum()) warp = skfda.FDataGrid([t], time, domain_range=(0, 1)) plt.figure("pinching-warping") warp.plot() fd_reg = fd2.compose(warp) plt.tight_layout() plt.figure("pinching-effect") fd_reg = fd_reg.to_grid(fd.sample_points[0]) x1 = fd_reg.data_matrix.squeeze() x2 = fd.data_matrix.squeeze() t = fd.sample_points[0] x1[t > .4358] = x2[t > .4358]
fd.plot() fd_align.plot(color='C0', linestyle='--') # Legend plt.legend(['$f$', '$g$', '$f \\circ \\gamma $']) plt.tight_layout() ############################################################################### # The non-linear transformation :math:`\gamma` applied to :math:`f` in # the alignment can be obtained using :func:`elastic_registration_warping # <skfda.preprocessing.registration.elastic_registration_warping>`. # # Warping to align f to g warping = skfda.preprocessing.registration.elastic_registration_warping(f, g) identity = skfda.FDataGrid([warping.sample_points[0]], warping.sample_points[0]) plt.figure("pairwise-alignment-warping") # Warping used warping.plot() # Plot identity t = np.linspace(0, 1) plt.plot(t, t, linestyle='--') # Legend plt.legend(['$\\gamma$', '$\\gamma_{id}$']) plt.tight_layout() ###
df=pd.concat([yin,xin], axis=1) # Using statsmodels kde = KernelReg(x, y, var_type='c', reg_type='ll', bw=[3.2]) estimator = kde.fit(y) estimator = np.reshape(estimator[0], df.shape[0]) plt.scatter(x, y) plt.scatter(x, estimator, c='r') plt.show() # Using SKFDA df_grid=skfda.FDataGrid(df) bandwidth = np.arange(0.1, 5, 0.2) llr = val.SmoothingParameterSearch( ks.LocalLinearRegressionSmoother(), bandwidth) fit = llr.fit(df_grid) llr_df = llr.transform(df_grid) plt.scatter(x, y) plt.xlabel('x') plt.ylabel('y') plt.scatter(x, llr_df, c='r') plt.show()
def setUp(self): data_matrix = [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]] self.fd = skfda.FDataGrid(data_matrix)
blit=True) anim.save("warpings.gif", writer='imagemagick') fig = plt.figure("composition-animation") ax = plt.axes(xlim=(-0.05, 1.05), ylim=(-1.2, 1.2)) fd = skfda.datasets.make_sinusoidal_process(n_samples=1, phase_std=0, amplitude_std=0, error_std=0) fd.plot(color='maroon', linestyle='--', label=r'$f_i$') plt.scatter([0, 1], [0, 0], color='maroon') wa, wb, wc, wd = skfda.FDataGrid(data, sample_points=t) line, = ax.plot(t, fd.data_matrix.squeeze(), color='C0', lw=2, label=r'$f_i(\gamma_i(t))$') plt.legend() plt.tight_layout() def init2(): line.set_data([], []) return line,
def setUp(self): self.fd = skfda.FDataGrid([[1, 2, 3, 4, 5, 6, 7], [2, 3, 4, 5, 6, 7, 9]]) self.fd_basis = self.fd.to_basis( skfda.representation.basis.BSpline(n_basis=5))