Esempio n. 1
0
    def est(self, n_pca):
        # dimension of the dataset
        N = self.aX.size()
        # Step 1: Create the current permuted dataset
        G_per = GraphSet()
        for i in range(N):
            G = copy.deepcopy(self.aX.X[i])
            G.permute(self.f[i])
            G_per.add(G)
            del (G)
        Mat = G_per.to_matrix_with_attr()
        #print(Mat)
        # Standardizing the features
        if (self.scale == True):
            Mat_scale = pd.DataFrame(scale(Mat), columns=Mat.columns)

        else:
            Mat_scale = Mat
            self.barycenter = np.mean(Mat_scale)
            print(self.barycenter)
        pca = PCA(n_components=n_pca)
        scores = pca.fit_transform(Mat_scale)
        vals = pca.explained_variance_ratio_
        #scores=pca.transform(Mat_scale)
        vecs = pd.DataFrame(pca.components_, columns=Mat_scale.columns)
        #top=np.argmax(vals_k)
        # TO HERE
        #vals=(vals_k[top]/sum(vals_k)).real
        #vecs=vecs_k[:,[top]]
        del Mat, Mat_scale, G_per
        return (vals, vecs, scores)
Esempio n. 2
0
    def est(self, n_pca, k, old_pca=None):
        # dimension of the dataset
        N = self.aX.size()
        # Step 1: Create the current permuted dataset
        G_per = GraphSet()
        for i in range(N):
            G = copy.deepcopy(self.aX.X[i])
            G.permute(self.f[i])
            G_per.add(G)
            del (G)

        Mat = G_per.to_matrix_with_attr()

        # Standardizing the features
        if (self.scale == True):
            Mat_scale = pd.DataFrame(scale(Mat), columns=Mat.columns)

        else:
            Mat_scale = Mat
            # self.barycenter=np.mean(Mat_scale)
        pca = PCA(n_components=n_pca)
        scores = pca.fit_transform(Mat_scale)
        vals = pca.explained_variance_ratio_
        vecs = pd.DataFrame(pca.components_, columns=Mat_scale.columns)
        self.pcas[k] = [pca, Mat_scale]
        self.barycenter = pd.Series(
            pca.mean_, index=Mat_scale.columns)  # np.mean(Mat_scale)
        if (k > 0):
            # Compute the alignment error
            Mat_along_old = pd.DataFrame(old_pca.inverse_transform(scores),
                                         columns=Mat_scale.columns)
            for i in range(N):
                x_along = Mat_along_old.iloc[i, :]
                X_curr_pca = self.give_me_a_network(x_along,
                                                    n_a=self.aX.node_attr,
                                                    e_a=self.aX.edge_attr)
                matchID = ID(self.distance)
                a = matchID.align(G_per.X[i], X_curr_pca)
                self.pcaold_error[i, k] = a.dis()
                del (matchID, X_curr_pca, x_along, a)

        # Compute the pca error
        # FIT TRANSFORM THE DATA along the first pca
        Mat_along = pd.DataFrame(pca.inverse_transform(scores),
                                 columns=Mat_scale.columns)
        # PCA error:
        for i in range(N):
            x_along = Mat_along.iloc[i, :]
            X_curr_pca = self.give_me_a_network(x_along,
                                                n_a=self.aX.node_attr,
                                                e_a=self.aX.edge_attr)
            matchID = ID(self.distance)
            a = matchID.align(G_per.X[i], X_curr_pca)
            self.pca_error[i, k] = a.dis()
            del (matchID, X_curr_pca, x_along, a)
        del Mat, Mat_scale, G_per
        return (vals, vecs, scores, pca)
Esempio n. 3
0
 def align_G(self,*args):
         if(isinstance(args,Graph)):
             if(self.m_C==None):
                 return args
             else:
                 a=self.m_matcher.align(args,self.m_C)
                 return a.alignedSource()
         if(isinstance(args,GraphSet)):
             if(self.m_C==None):
                 return args
             else:
                     new_a_set=GraphSet()
                     i=0
                     while(i==args.size()):
                         Gi=args.X[i]
                         # add to the new graph set an aligned graph
                         new_a_set.add(self.align_G(Gi))
                         i+=1
                     return new_a_set
Esempio n. 4
0
 def variance(self):
     if (self.aX != None and self.aX.size() != 0):
         if (self.var != None):
             return self.var
         else:
             if (not isinstance(self.mean, Graph)):
                 self.mean = self.align_and_est()
             n = self.aX.size()
             if (self.m_dis == None):
                 # the variance is computed as a distance between the mean and the sample
                 align_X = GraphSet()
                 for i in range(n):
                     G = copy.deepcopy(self.aX.X[i])
                     G.permute(self.f[i])
                     align_X.add(G)
                     del (G)
                 self.m_dis = self.matcher.dis(align_X, self.mean)
             self.var = 0.0
             for i in range(n):
                 self.var += self.m_dis[i]
             self.var = self.var / n
             return self.var
     else:
         print("Sample of graphs is empty")
Esempio n. 5
0
    def est(self, k):
        # Step 1: Create the current permuted dataset
        self.f_iteration[k] = self.f
        G_per = GraphSet()
        for i in range(self.aX.size()):
            G_temp = copy.deepcopy(self.aX.X[i])
            G_temp.permute(self.f[i])
            G_temp.s = copy.deepcopy(self.aX.X[i].s)
            G_per.add(G_temp)
            del (G_temp)
        del (self.aX)
        self.aX = copy.deepcopy(G_per)

        # Step 2: Transform it into a matrix
        y = G_per.to_matrix_with_attr()
        # parameter saved:
        self.variables_names = y.columns

        # Step 3: create the x vector
        # Create the input value
        x = pd.DataFrame(columns=range(len(G_per.X[0].s)),
                         index=range(y.shape[0]))
        for i in range(y.shape[0]):
            x.iloc[i] = [float(regressor) for regressor in G_per.X[i].s]
        self.regressor = x
        # Step 4: fit the chosen regression model
        # Ordinary Least Square
        if (self.model_type == 'OLS'):
            # Create linear regression object
            model = linear_model.LinearRegression()
            model.fit(x, y)
            along_geo_pred = pd.DataFrame(model.predict(x),
                                          columns=self.variables_names)
            self.f_all[k] = self.f
            #self.regression_error.iloc[:, k] = (along_geo_pred - y).pow(2).sum(axis=1)
            return (model, along_geo_pred)

        # Gaussian Process
        elif (self.model_type == 'GPR'):

            along_geo_pred = pd.DataFrame(index=range(y.shape[0]),
                                          columns=self.variables_names)
            along_geo_pred_sd = pd.DataFrame(index=range(y.shape[0]),
                                             columns=self.variables_names)
            # list in which we save the temporary regression error
            regression_error_temp = []
            # We are fitting a different Gaussian process for every variable (i.e. for every node or edge)
            for m in range(len(self.variables_names)):
                # Inizialize the gaussian process
                model = gaussian_process.GaussianProcessRegressor(
                    kernel=self.kernel,
                    n_restarts_optimizer=self.restarts,
                    alpha=self.alpha)
                # Fitting the Gaussian Process means finding the correct hyperparameters
                model.fit(x, y.iloc[:, m])
                # Saving the model
                self.models[self.variables_names[m]] = model
                # Predict to compute the regression error (to compare with the alignment error)
                y_pred, y_std = model.predict(x, return_std=True)
                # save both the predicted y and the std, to estimate the posterior
                along_geo_pred.loc[:,
                                   self.variables_names[m]] = pd.Series(y_pred)
                along_geo_pred_sd.loc[:, self.variables_names[m]] = pd.Series(
                    y_std)
                # Compute the error
                # HERE! YOU CAN SUBSTITUTE IT WITH AN ERROR FUNCTION
                err_euclidean = (y_tr.iloc[:, 2] - y_pred).pow(2)
                err_weighted = [
                    err_euclidean[i] / y_std[i] for i in range(len(y_std))
                ]
                self.regression_error.iloc[:, k] += err_weighted
            return (model, along_geo_pred, y_std)
        else:
            raise Exception("Wrong regression model: select either OLS or GPR")
Esempio n. 6
0
class ggr_aac(aligncompute):
    def __init__(self,
                 graphset,
                 matcher,
                 distance,
                 regression_model='OLS',
                 nr_iterations=100,
                 alpha=1e-10,
                 kernel=None,
                 restarts=0):
        # distance and matcher used to compute the alignment
        aligncompute.__init__(self, graphset, matcher)
        # distance used to compute the regression error
        self.distance = distance
        # nr of iteration of the algorithm
        self.nr_iterations = nr_iterations
        # indicate which type of regression model:
        # OLS (e.g. network on scalar regression problems)
        # GPR (e.g. network on time regression problems)
        self.model_type = regression_model
        if (self.model_type == 'GPR'):
            self.alpha = alpha
            self.restarts = restarts
            self.models = {}
            if (kernel == None):
                # by deafault we select an exponential kernel
                # See kernel section in gaussian_process documentation
                # https://scikit-learn.org/stable/modules/gaussian_process.html#gp-kernels
                # Here we used: 1/2exp(-d(x1/l,x2/l)^2)
                # - s is the parameter of the ConstantKernel
                # - l is the parameter of the RBF (radial basis function) kernel
                self.kernel = gaussian_process.kernels.ConstantKernel(
                    1.0) * gaussian_process.kernels.RBF(1.0)
            else:
                self.kernel = kernel
        # Regression error for each iteration and each observation
        self.regression_error = {
        }  #pd.DataFrame(0,index=range(graphset.size()), columns=range(self.nr_iterations))
        self.postalignment_error = {
        }  #pd.DataFrame(0,index=range(graphset.size()), columns=range(self.nr_iterations))
        self.f_iteration = {}
        self.f_all = {}

    def align_and_est(self):
        # INITIALIZATION:
        # Select a Random Candidate:
        first_id = random.randint(0, self.aX.size() - 1)
        m_1 = self.aX.X[first_id]
        while (m_1.n_nodes == 1):
            first_id = random.randint(0, self.aX.size() - 1)
            m_1 = self.aX.X[first_id]
        # Sequential version:
        # Align all the points wrt the random candidate
        #for i in range(self.X.size()):
        #   # Align X to Y
        #   a = self.matcher.dis(self.aX.X[i],m_1)
        #   # Permutation of X to go closer to Y
        #   self.f[i] = range(0,9)#self.matcher.f
        # Parallel Version;
        Parallel(n_jobs=10, require='sharedmem')(
            delayed(self.two_net_match)(m_1, i, first_id)
            for i in range(self.aX.size()))
        # Compute the first Generalized Geodesic Regression line
        E_1 = self.est(k=0)
        # Align the set wrt the geodesic
        Parallel(n_jobs=10,
                 require='sharedmem')(delayed(self.align_pred)(E_1[1], i, 0)
                                      for i in range(self.aX.size()))
        # AAC iterative algorithm
        for k in range(1, self.nr_iterations):
            # Compute the first Generalized Geodesic Regression line
            E_2 = self.est(k)
            # Align the set wrt the geodesic
            Parallel(n_jobs=6, require='sharedmem')(
                delayed(self.align_pred)(E_2[1], i, k)
                for i in range(self.aX.size()))
            #sequential version: self.align_pred(E_2[1],k)
            # Compute the step: the algorithmic step is computed as the square difference between the coefficients
            step_range = abs(
                sum([
                    self.regression_error[i, k - 1]
                    for i in range(0, self.aX.size())
                ]) - sum([
                    self.regression_error[i, k]
                    for i in range(0, self.aX.size())
                ]))
            #self.error+=[self.regression_error.iloc[:,k].sum()]

            if (step_range < 0.05):
                self.model = E_2[0]
                if (self.model_type == 'OLS'):
                    # Return the coefficients
                    self.network_coef = GraphSet()
                    # self.vector_coef = pd.Series(data=E_2[0].coef_.flatten(), index=self.variables_names)
                    self.network_coef.add(
                        self.give_me_a_network(pd.Series(
                            data=E_2[0].intercept_.flatten(),
                            index=self.variables_names),
                                               self.aX.node_attr,
                                               self.aX.edge_attr,
                                               s='Intercept'))
                    for i_th in range(E_2[0].coef_.shape[1]):
                        self.network_coef.add(
                            self.give_me_a_network(pd.Series(
                                data=E_2[0].coef_[:, i_th],
                                index=self.variables_names),
                                                   self.aX.node_attr,
                                                   self.aX.edge_attr,
                                                   s=str('beta' + str(i_th))))
                    self.regression_error = pd.DataFrame.from_dict({
                        iteration: [
                            self.regression_error[observation, iteration]
                            for observation in range(self.aX.size())
                        ]
                        for iteration in range(k + 1)
                    })
                    self.postalignment_error = pd.DataFrame.from_dict({
                        iteration: [
                            self.postalignment_error[observation, iteration]
                            for observation in range(self.aX.size())
                        ]
                        for iteration in range(k + 1)
                    })
                    self.nr_iterations = k
                print("Step Range smaller than 0.005")
                return
            #else Go on with the computation: update the new result and restart from step 1.
            del E_1
            E_1 = E_2
            del E_2
        print("Maximum number of iteration reached.")
        # Return the result
        if ('E_2' in locals()):
            self.model = E_2[0]
            if (self.model_type == 'OLS'):
                # Return the coefficients
                self.network_coef = GraphSet()
                #self.vector_coef = pd.Series(data=E_2[0].coef_.flatten(), index=self.variables_names)
                self.network_coef.add(
                    self.give_me_a_network(pd.Series(
                        data=E_2[0].intercept_.flatten(),
                        index=self.variables_names),
                                           self.aX.node_attr,
                                           self.aX.edge_attr,
                                           s='Intercept'))
                for i_th in range(E_2[0].coef_.shape[1]):
                    self.network_coef.add(
                        self.give_me_a_network(pd.Series(
                            data=E_2[0].coef_[:, i_th],
                            index=self.variables_names),
                                               self.aX.node_attr,
                                               self.aX.edge_attr,
                                               s=str('beta' + str(i_th))))
                self.regression_error = pd.DataFrame.from_dict({
                    iteration: [
                        self.regression_error[observation, iteration]
                        for observation in range(self.aX.size())
                    ]
                    for iteration in range(self.nr_iterations)
                })
                self.postalignment_error = pd.DataFrame.from_dict({
                    iteration: [
                        self.postalignment_error[observation, iteration]
                        for observation in range(self.aX.size())
                    ]
                    for iteration in range(self.nr_iterations)
                })
            else:
                # Return the prior and the posterior
                # ATTENTION: CHECK ON THE PRIOR WITH AASA
                self.y_post = E_2[1]
                self.y_post_std = E_2[2]

            del E_2, E_1

        else:
            self.model = E_1[0]
            if (self.model_type == 'OLS'):
                # Return the coefficients
                self.network_coef = GraphSet()
                #self.vector_coef = pd.Series(data=E_2[0].coef_.flatten(), index=self.variables_names)
                self.network_coef.add(
                    self.give_me_a_network(pd.Series(
                        data=E_1[0].intercept_.flatten(),
                        index=self.variables_names),
                                           self.aX.node_attr,
                                           self.aX.edge_attr,
                                           s='Intercept'))
                for i_th in range(E_1[0].coef_.shape[1]):
                    self.network_coef.add(
                        self.give_me_a_network(pd.Series(
                            data=E_1[0].coef_[:, i_th],
                            index=self.variables_names),
                                               self.aX.node_attr,
                                               self.aX.edge_attr,
                                               s=str('beta' + str(i_th))))
                self.regression_error = pd.DataFrame.from_dict({
                    iteration: [
                        self.regression_error[observation, iteration]
                        for observation in range(self.aX.size())
                    ]
                    for iteration in range(self.nr_iterations)
                })
                self.postalignment_error = pd.DataFrame.from_dict({
                    iteration: [
                        self.postalignment_error[observation, iteration]
                        for observation in range(self.aX.size())
                    ]
                    for iteration in range(self.nr_iterations)
                })
            else:
                # Return the prior and the posterior
                # ATTENTION: CHECK ON THE PRIOR WITH AASA
                self.y_post = E_1[1]
                self.y_post_std = E_1[2]
            del E_1

    # Align wrt a geodesic
    def align_pred(self, y_pred, i, k):
        #self.f.clear()
        # the alignment wrt a geodesic aiming at predicting data is an alignment wrt the prediction along
        # the regression gamma(x_i) and the data point itself y_i
        # i.e. find the optimal candidate y* in [y] st d(gamma(x)-y) is minimum
        self.aX.get_node_attr()
        self.aX.get_edge_attr()
        # Sequential Version:
        # for every graph save the new alignment
        # for i in range(self.aX.size()):
        #     # transform the estimation into a network to compute the networks distances
        #     y_pred_net= self.give_me_a_network(y_pred.iloc[i], self.aX.node_attr, self.aX.edge_attr)
        #     # Regression error:
        #     match=ID(self.distance)
        #     self.regression_error.iloc[i,k]=match.dis(self.aX.X[i],y_pred_net)
        #     # sum of squares of distances
        #     self.postalignment_error.iloc[i,k]=self.matcher.dis(self.aX.X[i],y_pred_net)
        #     self.f[i] = self.matcher.f
        #     del(y_pred_net,match)
        # Parallel Version: see the function at the end of the code
        # transform the estimation into a network to compute the networks distances
        y_pred_net = self.give_me_a_network(y_pred.iloc[i], self.aX.node_attr,
                                            self.aX.edge_attr)
        # Regression error:
        match = ID(self.distance)
        self.regression_error[i, k] = match.dis(self.aX.X[i], y_pred_net)
        self.postalignment_error[i,
                                 k] = self.matcher.dis(self.aX.X[i],
                                                       y_pred_net)
        self.f[i] = self.matcher.f
        del (y_pred_net, match)

    # Compute the generalized geodesic regression on the total space as a regression of the aligned graph set
    def est(self, k):
        # Step 1: Create the current permuted dataset
        self.f_iteration[k] = self.f
        G_per = GraphSet()
        for i in range(self.aX.size()):
            G_temp = copy.deepcopy(self.aX.X[i])
            G_temp.permute(self.f[i])
            G_temp.s = copy.deepcopy(self.aX.X[i].s)
            G_per.add(G_temp)
            del (G_temp)
        del (self.aX)
        self.aX = copy.deepcopy(G_per)

        # Step 2: Transform it into a matrix
        y = G_per.to_matrix_with_attr()
        # parameter saved:
        self.variables_names = y.columns

        # Step 3: create the x vector
        # Create the input value
        x = pd.DataFrame(columns=range(len(G_per.X[0].s)),
                         index=range(y.shape[0]))
        for i in range(y.shape[0]):
            x.iloc[i] = [float(regressor) for regressor in G_per.X[i].s]
        self.regressor = x
        # Step 4: fit the chosen regression model
        # Ordinary Least Square
        if (self.model_type == 'OLS'):
            # Create linear regression object
            model = linear_model.LinearRegression()
            model.fit(x, y)
            along_geo_pred = pd.DataFrame(model.predict(x),
                                          columns=self.variables_names)
            self.f_all[k] = self.f
            #self.regression_error.iloc[:, k] = (along_geo_pred - y).pow(2).sum(axis=1)
            return (model, along_geo_pred)

        # Gaussian Process
        elif (self.model_type == 'GPR'):

            along_geo_pred = pd.DataFrame(index=range(y.shape[0]),
                                          columns=self.variables_names)
            along_geo_pred_sd = pd.DataFrame(index=range(y.shape[0]),
                                             columns=self.variables_names)
            # list in which we save the temporary regression error
            regression_error_temp = []
            # We are fitting a different Gaussian process for every variable (i.e. for every node or edge)
            for m in range(len(self.variables_names)):
                # Inizialize the gaussian process
                model = gaussian_process.GaussianProcessRegressor(
                    kernel=self.kernel,
                    n_restarts_optimizer=self.restarts,
                    alpha=self.alpha)
                # Fitting the Gaussian Process means finding the correct hyperparameters
                model.fit(x, y.iloc[:, m])
                # Saving the model
                self.models[self.variables_names[m]] = model
                # Predict to compute the regression error (to compare with the alignment error)
                y_pred, y_std = model.predict(x, return_std=True)
                # save both the predicted y and the std, to estimate the posterior
                along_geo_pred.loc[:,
                                   self.variables_names[m]] = pd.Series(y_pred)
                along_geo_pred_sd.loc[:, self.variables_names[m]] = pd.Series(
                    y_std)
                # Compute the error
                # HERE! YOU CAN SUBSTITUTE IT WITH AN ERROR FUNCTION
                err_euclidean = (y_tr.iloc[:, 2] - y_pred).pow(2)
                err_weighted = [
                    err_euclidean[i] / y_std[i] for i in range(len(y_std))
                ]
                self.regression_error.iloc[:, k] += err_weighted
            return (model, along_geo_pred, y_std)
        else:
            raise Exception("Wrong regression model: select either OLS or GPR")

    # Given x_new is predicting the corresponding graph:
    def predict(self, x_new, std=False):
        if (not isinstance(x_new, pd.core.frame.DataFrame)):
            print(
                "The new observation should be a pandas dataframe of real values"
            )
        self.y_vec_pred = self.model.predict(X=x_new)
        self.y_net_pred = GraphSet()
        for i in range(self.y_vec_pred.shape[0]):
            self.y_net_pred.add(
                self.give_me_a_network(geo=pd.Series(
                    data=self.y_vec_pred[i], index=self.variables_names),
                                       n_a=self.aX.node_attr,
                                       e_a=self.aX.edge_attr,
                                       s=float(x_new.loc[i])))
        if (std == True and self.model_type == 'GPR'):
            self.y_vec_pred, self.y_std_pred = self.model.predict(
                X=x_new, return_std=True)
            self.y_net_pred = GraphSet()
            for i in range(self.y_vec_pred.shape[0]):
                self.y_net_pred.add(
                    self.give_me_a_network(geo=pd.Series(
                        data=self.y_vec_pred[i], index=self.variables_names),
                                           n_a=self.aX.node_attr,
                                           e_a=self.aX.edge_attr,
                                           s=float(x_new.loc[i])))

    # These functions are auxiliary function to compute the ggr

    # geo is a pd Series
    # n_a and e_a are nodes and edges attributes
    def give_me_a_network(self, geo, n_a, e_a, s=None):
        ind = [re.findall(r'-?\d+\.?\d*', k) for k in geo.axes[0]]
        x_g = {}
        for i in range(len(ind)):
            if (len(ind[i]) > 2 and int(ind[i][0]) == int(ind[i][1])
                    and not (int(ind[i][0]), int(ind[i][1])) in x_g):
                x_g[int(ind[i][0]), int(ind[i][1])] = [
                    geo.loc[geo.axes[0][i + j]] for j in range(n_a)
                ]
            elif (len(ind[i]) > 2 and int(ind[i][0]) != int(ind[i][1])
                  and not (int(ind[i][0]), int(ind[i][1])) in x_g):
                x_g[int(ind[i][0]), int(ind[i][1])] = [
                    geo.loc[geo.axes[0][i + j]] for j in range(e_a)
                ]
            elif (len(ind[i]) == 2
                  and not (int(ind[i][0]), int(ind[i][1])) in x_g):
                x_g[int(ind[i][0]), int(ind[i][1])] = [geo.loc[geo.axes[0][i]]]

        geo_net = Graph(x=x_g, adj=None, s=s)
        return geo_net

    # Conformal prediction
    def align_est_and_predRegions(
        self,
        alpha,
    ):
        # Divide training and test
        # save the training in aX
        # X.s you can find the regressors
        # self.est and self.align_pred are the two function for the estimation of the coefficients
        # you can extract the coefficient as self.network_coef  (graphset)
        # you can extract the s
        return 0

    # This function is used to parallelized the alignment procedure
    # receive two graphs, a matcher, an f where you are willing to save the permutations and gives back the
    # optimal permutation
    def two_net_match(self, X2, i, first_id):
        if (i == first_id):
            self.f[first_id] = range(self.aX.n_nodes)
        # Align X to Y
        else:
            self.matcher.the_dis(self.aX.X[i], X2)
            # Permutation of X to go closer to Y
            self.f[i] = self.matcher.f
Esempio n. 7
0
    def align_and_est(self, max_iterations=200, eps=0.001):
        # Select a Random Candidate:
        first_id = random.randint(0, self.aX.size() - 1)
        # first_id = 318
        m_1 = self.aX.X[first_id]
        self.f[first_id] = range(self.X.n_nodes)
        # k=200 maximum number of iteration
        for self.k in range(max_iterations):
            print("\n start of iteration: " + str(self.k))
            for i in range(self.X.size()):
                # print('\t already matched: ' + str(i))
                # Align X to Y
                a = self.matcher.align(self.aX.X[i], m_1)
                # Permutation of X to go closer to Y
                self.f[i] = a.f
                # self.aX.X[i]=a.alignedSource()
                # print m_1.x
                # print a.alignedSource().x

            m_2 = self.est(m_1)

            step_range = self.matcher.dis(m_1, m_2)

            if (step_range < eps):
                self.mean = m_2
                # Update aX with the final permutations:
                Aligned = GraphSet()
                # Aligned.add(self.aX.X[0])
                for i in range(self.X.size()):
                    G = self.aX.X[i]
                    G.permute(self.f[i])
                    Aligned.add(G)
                    del G
                self.aX = copy.deepcopy(Aligned)
                del Aligned
                print("Step Range smaller than 0.001")
                return
            else:
                del m_1
                m_1 = m_2
                del m_2
                # check here
                self.f.clear()
        print("Maximum number of iteration reached.")
        if ('m_2' in locals()):
            self.mean = m_2
            # Update aX with the final permutations:
            Aligned = GraphSet()
            Aligned.add(self.aX.X[0])
            for i in range(self.X.size()):
                G = self.aX.X[i]
                G.permute(self.f[i])
                Aligned.add(G)
                del G
            self.aX = copy.deepcopy(Aligned)
            del Aligned
            del m_2, m_1
        else:
            self.mean = m_1
            # Update aX with the final permutations:
            Aligned = GraphSet()
            Aligned.add(self.aX.X[0])
            for i in range(1, self.X.size()):
                G = self.aX.X[i]
                G.permute(self.f[i])
                Aligned.add(G)
                del G
            self.aX = copy.deepcopy(Aligned)
            del Aligned
            del m_1
Esempio n. 8
0
    def align_and_est(self, n_comp, scale, s):
        # If True scaling is applied to the GraphSet
        self.scale = scale
        # Range for the alignment wrt a geodesic
        self.s_min = s[0]
        self.s_max = s[1]
        # k=100 maximum number of iteration
        for k in range(100):
            # STEP 0: Align wrt an randomly selected observation, Compute the first pca
            if (k == 0):
                self.f[0] = list(range(self.aX.n_nodes))
                # PREVIOUS:
                m_1 = self.aX.X[0]
                # Align wrt one of the minimum size random element
                #size_obs = {i: len(self.aX.X[i].adj.keys()) for i in range(self.aX.size())}
                #min_size = min(size_obs.values())
                #id_min_size=[i for i, v in size_obs.items() if v == min_size]
                #m_1=self.aX.X[id_min_size[0]]
                for i in range(1, self.aX.size()):
                    # Align X to Y
                    a = self.matcher.align(self.aX.X[i], m_1)
                    # Permutation of X to go closer to Y
                    self.f[i] = a.f
                # Compute the first Principal Component in the first step
                E_1 = self.est(n_comp)
                continue
                #return E1

            # STEP 1: Align wrt the first principal component
            self.align_geo(E_1[1].loc[0, :])
            # STEP 2: Compute the principal component
            if (k > 0):
                E_2 = self.est(n_comp)
            # STEP 3: Step range is the difference between the eigenvalues
            step_range = distance = math.sqrt(
                sum([(a - b)**2 for a, b in zip(E_2[0], E_1[0])]))

            if (step_range < 0.01):
                # IF small enough, I am converging! Save and exit.
                self.e_val = E_2[0]
                self.scores = E_2[2]
                if (n_comp == 1):
                    self.e_vec = self.give_me_a_network(E_2[1].loc[0, :],
                                                        n_a=self.aX.node_attr,
                                                        e_a=self.aX.edge_attr)
                    self.barycenter_net = self.give_me_a_network(
                        self.barycenter,
                        n_a=self.aX.node_attr,
                        e_a=self.aX.edge_attr)
                else:
                    G_PCA = GraphSet()
                    for n_pca in range(n_comp):
                        G_PCA.add(
                            self.give_me_a_network(E_2[1].loc[n_pca, :],
                                                   n_a=self.aX.node_attr,
                                                   e_a=self.aX.edge_attr))
                    self.e_vec = G_PCA
                    self.barycenter_net = self.give_me_a_network(
                        self.barycenter,
                        n_a=self.aX.node_attr,
                        e_a=self.aX.edge_attr)
                print("Step Range smaller than 0.001")
                return
            else:
                # Go on with the computation: update the new result and restart from step 1.
                del E_1
                E_1 = E_2
                del E_2
        print("Maximum number of iteration reached.")
        # Return the result
        if ('E_2' in locals()):
            self.e_val = E_2[0]
            self.scores = E_2[2]
            self.barycenter_net = self.give_me_a_network(self.barycenter,
                                                         n_a=self.aX.node_attr,
                                                         e_a=self.aX.edge_attr)
            if (n_comp == 1):
                self.e_vec = self.give_me_a_network(E_2[1].loc[0, :],
                                                    n_a=self.aX.node_attr,
                                                    e_a=self.aX.edge_attr)

            else:
                G_PCA = GraphSet()
                for n_pca in range(n_comp):
                    G_PCA.add(
                        self.give_me_a_network(E_2[1].loc[n_pca, :],
                                               n_a=self.aX.node_attr,
                                               e_a=self.aX.edge_attr))
                self.e_vec = G_PCA
                del G_PCA
            del E_2
        else:
            self.e_val = E_1[0]
            self.scores = E_1[2]
            self.barycenter_net = self.give_me_a_network(self.barycenter,
                                                         n_a=self.aX.node_attr,
                                                         e_a=self.aX.edge_attr)
            if (n_comp == 1):
                self.e_vec = self.give_me_a_network(E_1[1].loc[0, :],
                                                    n_a=self.aX.node_attr,
                                                    e_a=self.aX.edge_attr)
            else:
                G_PCA = GraphSet()
                for n_pca in range(n_comp):
                    G_PCA.add(
                        self.give_me_a_network(E_1[1].loc[n_pca, :],
                                               n_a=self.aX.node_attr,
                                               e_a=self.aX.edge_attr))
                self.e_vec = G_PCA
                del G_PCA
            del E_1
Esempio n. 9
0
x2 = {}
x2[0, 0] = [1]
x2[1, 1] = [1]
x2[2, 2] = [1]
x2[3, 3] = [1]
x2[4, 4] = [1]
x2[5, 5] = [1]
x2[0, 1] = [1]
x2[1, 0] = [1]
x2[1, 2] = [1]
x2[2, 1] = [1]
x2[3, 4] = [1]
x2[4, 3] = [1]
# Create Graph set:
G = GraphSet(graph_type='directed')
G.add(Graph(x=x1, s=[1, 2], adj=None))
G.add(Graph(x=x2, s=[2, 3], adj=None))

# Compute a distance with euclidean distance without matching the graphs
match = ID(hamming())
match.dis(G.X[0], G.X[1])

# 2) GRAPHS with Euclidean scalar and vector attributes on both nodes and edges
# Define the graphs:
x1 = {}
x1[0, 0] = [0.813, 0.630]
x1[1, 1] = [1.606, 2.488]
x1[2, 2] = [2.300, 0.710]
x1[3, 3] = [0.950, 1.616]
x1[4, 4] = [2.046, 1.560]
x1[5, 5] = [2.959, 2.387]