Ejemplo n.º 1
0
# Mu @ lambda binomial
#======================================
stat_bin(out['lambda_bin'], out['mu'][0][0].T, nj_bin)[0] 
stat_bin(out['lambda_bin'], out['mu'][0][1].T, nj_bin)[0] 



#======================================
# Variables contribution
#======================================

# !!! TO DO: A comparer avec la vraie matrice d'association

# Vars contributions
vc = vars_contributions(complete_y, out['Ez.y'], assoc_thr = 0.0, \
                       title = 'Contribution of the variables to the latent dimensions',\
                       storage_path = None)
s = cosine_similarity(vc, dense_output=True)

vc2 = vars_contributions(completed_y2, out2['Ez.y'], assoc_thr = 0.0, \
                       title = 'Contribution of the variables to the latent dimensions',\
                       storage_path = None)
s2 = cosine_similarity(vc2, dense_output=True)

    
vc_full = vars_contributions(full_contra, out_full['Ez.y'], assoc_thr = 0.0, \
                       title = 'Contribution of the variables to the latent dimensions',\
                       storage_path = None)

s_full = cosine_similarity(vc_full, dense_output=True)
Ejemplo n.º 2
0
def MIAMI(y, n_clusters, r, k, init, var_distrib, nj, authorized_ranges,\
          target_nb_pseudo_obs = 500, it = 50, \
          eps = 1E-05, maxstep = 100, seed = None, perform_selec = True,\
              dm = [], max_patience = 1): # dm: Hack to remove
    ''' Complete the missing values using a trained M1DGMM
    
    y (numobs x p ndarray): The observations containing mixed variables
    n_clusters (int): The number of clusters to look for in the data
    r (list): The dimension of latent variables through the first 2 layers
    k (list): The number of components of the latent Gaussian mixture layers
    init (dict): The initialisation parameters for the algorithm
    var_distrib (p 1darray): An array containing the types of the variables in y 
    nj (p 1darray): For binary/count data: The maximum values that the variable can take. 
                    For ordinal data: the number of different existing categories for each variable
    nan_mask (ndarray): A mask array equal to True when the observation value is missing False otherwise
    target_nb_pseudo_obs (int): The number of pseudo-observations to generate         
    it (int): The maximum number of MCEM iterations of the algorithm
    eps (float): If the likelihood increase by less than eps then the algorithm stops
    maxstep (int): The maximum number of optimisation step for each variable
    seed (int): The random state seed to set (Only for numpy generated data for the moment)
    perform_selec (Bool): Whether to perform architecture selection or not
    dm (np array): The distance matrix of the observations. If not given M1DGMM computes it
    n_neighbors (int): The number of neighbors to use for NA imputation
    ------------------------------------------------------------------------------------------------
    returns (dict): The predicted classes, the likelihood through the EM steps
                    and a continuous representation of the data
    '''

    # !!! Hack
    cols = y.columns
    # Formatting
    if not isinstance(y, np.ndarray): y = np.asarray(y)

    assert len(k) < 2  # Not implemented for deeper MDGMM for the moment


    out = M1DGMM(y, n_clusters, r, k, init, var_distrib, nj, it,\
             eps, maxstep, seed, perform_selec = perform_selec,\
                 dm = dm, max_patience = max_patience, use_silhouette = True)

    # Compute the associations
    vars_contributions(pd.DataFrame(y, columns = cols), out['Ez.y'], assoc_thr = 0.0, \
                           title = 'Contribution of the variables to the latent dimensions',\
                           storage_path = None)

    # Upacking the model from the M1DGMM output
    p = y.shape[1]
    k = out['best_k']
    r = out['best_r']
    mu = out['mu'][0]
    sigma = out['sigma'][0]
    w = out['best_w_s']
    #eta = out['eta'][0]

    #Ez_y = out['Ez.y']

    lambda_bin = np.array(out['lambda_bin'])
    lambda_ord = out['lambda_ord']
    lambda_categ = out['lambda_categ']
    lambda_cont = np.array(out['lambda_cont'])

    nj_bin = nj[pd.Series(var_distrib).isin(['bernoulli',
                                             'binomial'])].astype(int)
    nj_ord = nj[var_distrib == 'ordinal'].astype(int)
    nj_categ = nj[var_distrib == 'categorical'].astype(int)

    y_std = y[:,var_distrib == 'continuous'].astype(float).std(axis = 0,\
                                                                    keepdims = True)

    nb_points = 200

    # Bloc de contraintes
    '''
    is_constrained = np.isfinite(authorized_ranges).any(1)[0]
    is_min_constrained = np.isfinite(authorized_ranges[0])[0]
    is_max_constrained = np.isfinite(authorized_ranges[1])[0]

    is_continuous = (var_distrib == 'continuous') | (var_distrib == 'binomial')
    min_unconstrained_cont = is_continuous & ~is_min_constrained
    max_unconstrained_cont = is_continuous & ~is_max_constrained
    
    authorized_ranges[0] = np.where(min_unconstrained_cont, np.min(y, 0), authorized_ranges[0])
    authorized_ranges[1] = np.where(max_unconstrained_cont, np.max(y, 0), authorized_ranges[1])
    '''

    #from scipy.stats import norm
    '''
    #==============================================
    # Constraints determination
    #==============================================
    
    # Force to stay in the support for binomial and continuous variables

    #authorized_ranges = np.expand_dims(np.stack([[-np.inf,np.inf] for var in var_distrib]).T, 1)
    #authorized_ranges[:, 0, 8] = [0, 0]  # Of more than 60 years old
    #authorized_ranges[:, 0, 0] = [-np.inf, np.inf]  # Of more than 60 years old

    # Look for the constrained variables
    #authorized_ranges[:,:,0] = np.array([[-np.inf],[np.inf]])
    is_constrained = np.isfinite(authorized_ranges).any(1)[0]
    
    #bbox = np.dstack([Ez_y.min(0),Ez_y.max(0)])
    #bbox * np.array([0.6, 1.4])
    
    proba_min = 1E-3
    proba = proba_min
      
    epsilon = 1E-12
    best_A = []
    best_b = []
    
    is_solution = True
    while is_solution:
        b = []#np.array([])
        A = []#np.array([[]]).reshape((0, r[0]))
        
        bbox = np.array([[-10, 10]] * r[0]) # !!! A corriger
        
        alpha = 1 - proba
        q = norm.ppf(1 - alpha / 2)  
        
        #=========================================
        # Store the constraints for each datatype
        #=========================================

        for j in range(p):
            if is_constrained[j]:
                bounds_j = authorized_ranges[:,:,j]
                # The index of the variable among the variables of the same type
                idx_among_type = (var_distrib[:j] == var_distrib[j]).sum()
                
                if var_distrib[j] == 'continuous':
                    # Lower bound
                    lb_j = bounds_j[0] / y_std[0, idx_among_type] - lambda_cont[idx_among_type, 0] + q
                    A.append(- lambda_cont[idx_among_type,1:])
                    b.append(- lb_j)
                    
                    # Upper bound                                
                    ub_j = bounds_j[1] / y_std[0, idx_among_type] - lambda_cont[idx_among_type, 0] - q
                    A.append(lambda_cont[idx_among_type,1:])
                    b.append(ub_j)
                
                elif var_distrib[j] == 'binomial':
                    idx_among_type = ((var_distrib[:j] == 'bernoulli') | (var_distrib[:j] == 'binomial')).sum()
    
                    # Lower bound
                    lb_j = bounds_j[0]
                    lb_j = logit(lb_j / nj_bin[idx_among_type]) - lambda_bin[idx_among_type,0]
                    A.append(- lambda_bin[idx_among_type,1:])
                    b.append(- lb_j)
                    
                    # Upper bound
                    ub_j = bounds_j[1]
                    ub_j = logit(ub_j / nj_bin[idx_among_type]) - lambda_bin[idx_among_type,0]
                    
                    A.append(lambda_bin[idx_among_type, 1:])
                    b.append(ub_j)
                    
                elif var_distrib[j] == 'bernoulli':
                    idx_among_type = ((var_distrib[:j] == 'bernoulli') | (var_distrib[:j] == 'binomial')).sum()
                    assert bounds_j[0] == bounds_j[1] # !!! To improve
                    
                    # Lower bound
                    lb_j = proba if bounds_j[0] == 1 else  0 + epsilon
                    lb_j = logit(lb_j / nj_bin[idx_among_type]) - lambda_bin[idx_among_type,0]
                    A.append(- lambda_bin[idx_among_type,1:])
                    b.append(- lb_j)
                    
                    # Upper bound
                    ub_j = 1 - epsilon if bounds_j[0] == 1 else 1 - proba
                    ub_j = logit(ub_j / nj_bin[idx_among_type]) - lambda_bin[idx_among_type,0]
                    A.append(lambda_bin[idx_among_type, 1:])
                    b.append(ub_j)
                    
                elif var_distrib[j] ==  'categorical':
                    continue
                    assert bounds_j[0] == bounds_j[1] # !!! To improve
                    modality_idx = int(bounds_j[0][0])        
                    
                    # Define the probability to draw the modality of interest to proba
                    pi = np.full(nj_categ[idx_among_type],\
                                 (1 - proba) / (nj_categ[idx_among_type] - 1))
                       
                    # For the inversion of the softmax a constant C = 0 is taken:
                    pi[modality_idx] = proba
                    lb_j = np.log(pi) - lambda_categ[idx_among_type][:, 0] 
    
                    # -1 Mask
                    mask = np.ones((nj_categ[idx_among_type], 1))
                    mask[modality_idx] = -1
                    A.append(lambda_categ[idx_among_type][:, 1:] * mask)
                    b.append(lb_j * mask[:,0])
    
                    
                elif var_distrib[j] == 'ordinal':
                    assert bounds_j[0] == bounds_j[1] # !!! To improve
                    modality_idx = int(bounds_j[0][0])  
                    
                    RuntimeError('Not implemented for the moment')
                        
        #=========================================
        # Try if the solution is feasible
        #=========================================
        try:

            points, interior_point, hs = solve_convex_set(np.reshape(A, (-1, r[0]),\
                                    order = 'C'), np.hstack(b), bbox)
        
            # If yes store the new constraints
            best_A = deepcopy(A)
            best_b = deepcopy(b)
            
            proba = np.min([1.05 * proba, 0.8])
            if proba >= 0.8:
                is_solution = False
        
        except QhullError:
            is_solution = False
                    
            
    best_A = np.reshape(best_A, (-1, r[0]), order = 'C')
    best_b = np.hstack(best_b)
    points, interior_point, hs = solve_convex_set(best_A, best_b, bbox)
    polygon = Polygon(points)    
    '''
    #=======================================================
    # Data augmentation part
    #=======================================================

    # Create pseudo-observations iteratively:
    nb_pseudo_obs = 0

    y_new_all = []
    zz = []

    total_nb_obs_generated = 0
    while nb_pseudo_obs <= target_nb_pseudo_obs:

        #===================================================
        # Generate a batch of latent variables (try)
        #===================================================
        '''
        # Simulate points in the Polynom
        pts = generate_random(nb_points, polygon)
        pts = np.array([np.array([p.x, p.y]) for p in pts])
        
        # Compute their density and resample them
        pts_density = fz(pts, mu, sigma, w)
        pts_density = pts_density / pts_density.sum(keepdims = True) # Normalized the pdfs
        
        idx = np.random.choice(np.arange(nb_points), size = target_nb_pseudo_obs,\
                               p = pts_density, replace=True)
        z = pts[idx]
        '''
        #===================================================
        # Generate a batch of latent variables
        #===================================================

        # Draw some z^{(1)} | Theta using z^{(1)} | s, Theta
        z = np.zeros((nb_points, r[0]))

        z0_s = multivariate_normal(size = (nb_points, 1), \
            mean = mu.flatten(order = 'C'), cov = block_diag(*sigma))
        z0_s = z0_s.reshape(nb_points, k[0], r[0], order='C')

        comp_chosen = np.random.choice(k[0], nb_points, p=w / w.sum())
        for m in range(nb_points):  # Dirty loop for the moment
            z[m] = z0_s[m, comp_chosen[m]]

        #===================================================
        # Draw pseudo-observations
        #===================================================

        y_bin_new = []
        y_categ_new = []
        y_ord_new = []
        y_cont_new = []

        y_bin_new.append(draw_new_bin(lambda_bin, z, nj_bin))
        y_categ_new.append(draw_new_categ(lambda_categ, z, nj_categ))
        y_ord_new.append(draw_new_ord(lambda_ord, z, nj_ord))
        y_cont_new.append(draw_new_cont(lambda_cont, z))

        # Stack the quantities
        y_bin_new = np.vstack(y_bin_new)
        y_categ_new = np.vstack(y_categ_new)
        y_ord_new = np.vstack(y_ord_new)
        y_cont_new = np.vstack(y_cont_new)

        # "Destandardize" the continous data
        y_cont_new = y_cont_new * y_std

        # Put them in the right order and append them to y
        type_counter = {'count': 0, 'ordinal': 0,\
                        'categorical': 0, 'continuous': 0}

        y_new = np.full((nb_points, y.shape[1]), np.nan)

        # Quite dirty:
        for j, var in enumerate(var_distrib):
            if (var == 'bernoulli') or (var == 'binomial'):
                y_new[:, j] = y_bin_new[:, type_counter['count']]
                type_counter['count'] = type_counter['count'] + 1
            elif var == 'ordinal':
                y_new[:, j] = y_ord_new[:, type_counter[var]]
                type_counter[var] = type_counter[var] + 1
            elif var == 'categorical':
                y_new[:, j] = y_categ_new[:, type_counter[var]]
                type_counter[var] = type_counter[var] + 1
            elif var == 'continuous':
                y_new[:, j] = y_cont_new[:, type_counter[var]]
                type_counter[var] = type_counter[var] + 1
            else:
                raise ValueError(var, 'Type not implemented')

        #===================================================
        # Acceptation rule
        #===================================================

        # Check that each variable is in the good range
        y_new_exp = np.expand_dims(y_new, 1)

        total_nb_obs_generated += len(y_new)

        mask = np.logical_and(y_new_exp >= authorized_ranges[0][np.newaxis],\
                       y_new_exp <= authorized_ranges[1][np.newaxis])

        # Keep an observation if it lies at least into one of the ranges possibility
        mask = np.any(mask.mean(2) == 1, axis=1)

        y_new = y_new[mask]
        y_new_all.append(y_new)
        nb_pseudo_obs = len(np.concatenate(y_new_all))

        zz.append(z[mask])
        #print(nb_pseudo_obs)

    # Keep target_nb_pseudo_obs pseudo-observations
    y_new_all = np.concatenate(y_new_all)
    y_new_all = y_new_all[:target_nb_pseudo_obs]

    #y_all = np.vstack([y, y_new_all])
    share_kept_pseudo_obs = len(y_new_all) / total_nb_obs_generated

    out['zz'] = zz
    out['y_all'] = y_new_all
    out['share_kept_pseudo_obs'] = share_kept_pseudo_obs

    return (out)
    '''
Ejemplo n.º 3
0
out = M1DGMM(y_np, 'auto', r, k, prince_init, var_distrib, nj, it,\
             eps, maxstep, seed, perform_selec = False)
m, pred = misc(labels_oh, out['classes'], True)
print(m)
print(confusion_matrix(labels_oh, pred))
print(silhouette_score(dm, pred, metric='precomputed'))

# Plot of the latent representation of the observations and contributions of the variables
y.columns = ['age', 'sex', 'cp' ,'trestbps', 'chol', 'fbs', 'restecg', 'thalach',\
    'exang', 'oldpeak', 'slope', 'ca', 'thal']

obs_representation(out['classes'],
                   out['Ez.ys'],
                   title='Latent representation of the observations')
vars_contributions(y, out['Ez.ys'], assoc_thr=0.0)
density_representation(out, is_3D=False)

# Plot the final groups

import matplotlib
import matplotlib.pyplot as plt
import numpy as np

colors = ['green', 'red']

fig = plt.figure(figsize=(8, 8))
plt.scatter(out['Ez.ys'][:, 0], out['Ez.ys'][:, 1], c=pred,\
            cmap=matplotlib.colors.ListedColormap(colors))

cb = plt.colorbar()
Ejemplo n.º 4
0
## Look for the z and y mapping
complete_y = complete_y.reset_index(drop=True)
zz = out['Ez.y']

## Individual variables
var = 'WifeRelig'
fig, ax = plt.subplots()
for g in np.unique(complete_y[var]):
    ix = np.where(complete_y[var] == g)
    ax.scatter(zz[ix, 0], zz[ix, 1], label=g, s=7)
ax.legend()
ax.set_title(var + ' zz')
plt.show()

comb = (complete_y['WifeEduc'] == 3.0) & (complete_y['WifeRelig'] == 1.0) & (
    complete_y['HusbEduc'] == 3.0)
fig, ax = plt.subplots()
for g in np.unique(comb):
    ix = np.where(comb == g)
    ax.scatter(zz[ix, 0], zz[ix, 1], label=g, s=7)
ax.legend()
ax.set_title('WifeEduc == 3 & HusbEduc == 3 & WifeRelig == 1')
plt.show()

plt.scatter(zz[:, 0], zz[:, 1], c=complete_y['HusbEduc'].astype(float))


vars_contributions(full_contra, zz, assoc_thr = 0.0, \
                       title = 'Contribution of the variables to the latent dimensions',\
                       storage_path = None)
Ejemplo n.º 5
0
#=============================
# Comparing associations structure
#=============================

import seaborn as sns
from dython.nominal import compute_associations, associations
from sklearn.metrics.pairwise import cosine_similarity

original_assoc = compute_associations(full_pima, nominal_columns = cat_features)

associations(full_pima, nominal_columns = cat_features)

Ez = out2['Ez.y']
vc = vars_contributions(completed_y2, Ez, assoc_thr = 0.0, \
                       title = 'Contribution of the variables to the latent dimensions',\
                       storage_path = None)

assoc = cosine_similarity(vc, dense_output=True)

labels = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'D.P. Function', 'Age', 'Outcome']

fig, axn = plt.subplots(1, 2, sharex=True, sharey=True, figsize = (12,10)) 
cbar_ax = fig.add_axes([.91, .3, .03, .4])




sns.heatmap(original_assoc.abs(), ax=axn[0],
            cbar=0 == 0,
Ejemplo n.º 6
0
def MI2AMI(y, n_clusters, r, k, init, var_distrib, nj,\
          nan_mask, target_nb_pseudo_obs = 500, it = 50, \
          eps = 1E-05, maxstep = 100, seed = None, perform_selec = True,\
          dm = [], max_patience = 1): # dm: Hack to remove
    ''' Complete the missing values using a trained M1DGMM
    
    y (numobs x p ndarray): The observations containing mixed variables
    n_clusters (int): The number of clusters to look for in the data
    r (list): The dimension of latent variables through the first 2 layers
    k (list): The number of components of the latent Gaussian mixture layers
    init (dict): The initialisation parameters for the algorithm
    var_distrib (p 1darray): An array containing the types of the variables in y 
    nj (p 1darray): For binary/count data: The maximum values that the variable can take. 
                    For ordinal data: the number of different existing categories for each variable
    nan_mask (ndarray): A mask array equal to True when the observation value is missing False otherwise
    target_nb_pseudo_obs (int): The number of pseudo-observations to generate         
    it (int): The maximum number of MCEM iterations of the algorithm
    eps (float): If the likelihood increase by less than eps then the algorithm stops
    maxstep (int): The maximum number of optimisation step for each variable
    seed (int): The random state seed to set (Only for numpy generated data for the moment)
    perform_selec (Bool): Whether to perform architecture selection or not
    dm (np array): The distance matrix of the observations. If not given M1DGMM computes it
    n_neighbors (int): The number of neighbors to use for NA imputation
    ------------------------------------------------------------------------------------------------
    returns (dict): The predicted classes, the likelihood through the EM steps
                    and a continuous representation of the data
    '''

    # !!! Hack
    cols = y.columns
    # Formatting
    if not isinstance(nan_mask, np.ndarray): nan_mask = np.asarray(nan_mask)
    if not isinstance(y, np.ndarray): y = np.asarray(y)

    assert len(k) < 2  # Not implemented for deeper MDGMM for the moment

    # Keep complete observations
    complete_y = y[~np.isnan(y.astype(float)).any(1)]
    completed_y = deepcopy(y)

    out = M1DGMM(complete_y, 'auto', r, k, init, var_distrib, nj, it,\
             eps, maxstep, seed, perform_selec = perform_selec,\
                 dm = dm, max_patience = max_patience, use_silhouette = True)

    # Compute the associations
    vc = vars_contributions(pd.DataFrame(complete_y, columns = cols), out['Ez.y'], assoc_thr = 0.0, \
                           title = 'Contribution of the variables to the latent dimensions',\
                           storage_path = None)

    # Upacking the model from the M1DGMM output
    #p = y.shape[1]
    k = out['best_k']
    r = out['best_r']
    mu = out['mu'][0]
    lambda_bin = np.array(out['lambda_bin'])
    lambda_ord = out['lambda_ord']
    lambda_categ = out['lambda_categ']
    lambda_cont = np.array(out['lambda_cont'])

    nj_bin = nj[pd.Series(var_distrib).isin(['bernoulli',
                                             'binomial'])].astype(int)
    nj_ord = nj[var_distrib == 'ordinal'].astype(int)
    nj_categ = nj[var_distrib == 'categorical'].astype(int)

    nb_cont = np.sum(var_distrib == 'continuous')
    nb_bin = np.sum(var_distrib == 'binomial')

    y_std = complete_y[:,var_distrib == 'continuous'].astype(float).std(axis = 0,\
                                                                    keepdims = True)
    cat_features = var_distrib != 'categorical'

    # Compute the associations between variables and use them as weights for the optimisation
    assoc = cosine_similarity(vc, dense_output=True)
    np.fill_diagonal(assoc, 0.0)
    assoc = np.abs(assoc)
    weights = (assoc / assoc.sum(1, keepdims=True))

    #==============================================
    # Optimisation sandbox
    #==============================================

    # Define the observation generated by the center of each cluster
    cluster_obs = [impute(mu[kk,:,0], var_distrib, lambda_bin, nj_bin, lambda_categ, nj_categ,\
                 lambda_ord, nj_ord, lambda_cont, y_std) for kk in range(k[0])]

    # Use only of the observed variables as references
    types = {'bin': ['bernoulli', 'binomial'], 'categ': ['categorical'],\
             'cont': ['continuous'], 'ord': 'ordinal'}

    # Gradient optimisation
    nan_indices = np.where(nan_mask.any(1))[0]
    imputed_y = np.zeros_like(y)
    numobs = y.shape[0]

    #************************************
    # Linear constraint to stay in the support of continuous variables
    #************************************

    lb = np.array([])
    ub = np.array([])
    A = np.array([[]]).reshape((0, r[0]))

    if nb_bin > 0:
        ## Corrected Binomial bounds (ub is actually +inf)
        bin_indices = var_distrib[np.logical_or(var_distrib == 'bernoulli',
                                                var_distrib == 'binomial')]
        binomial_indices = bin_indices == 'binomial'

        lb_bin = np.nanmin(y[:, var_distrib == 'binomial'], 0)
        lb_bin = logit(
            lb_bin / nj_bin[binomial_indices]) - lambda_bin[binomial_indices,
                                                            0]
        ub_bin = np.nanmax(y[:, var_distrib == 'binomial'], 0)
        ub_bin = logit(
            ub_bin / nj_bin[binomial_indices]) - lambda_bin[binomial_indices,
                                                            0]
        A_bin = lambda_bin[binomial_indices, 1:]

        ## Concatenate the constraints
        lb = np.concatenate([lb, lb_bin])
        ub = np.concatenate([ub, ub_bin])
        A = np.concatenate([A, A_bin], axis=0)

    if nb_cont > 0:
        ## Corrected Gaussian bounds
        lb_cont = np.nanmin(y[:, var_distrib == 'continuous'],
                            0) / y_std[0] - lambda_cont[:, 0]
        ub_cont = np.nanmax(y[:, var_distrib == 'continuous'],
                            0) / y_std[0] - lambda_cont[:, 0]
        A_cont = lambda_cont[:, 1:]

        ## Concatenate the constraints
        lb = np.concatenate([lb, lb_cont])
        ub = np.concatenate([ub, ub_cont])
        A = np.concatenate([A, A_cont], axis=0)

    lc = LinearConstraint(A, lb, ub, keep_feasible=True)

    zz = []
    fun = []
    for i in range(numobs):
        if i in nan_indices:

            # Design the nan masks for the optimisation process
            nan_mask_i = nan_mask[i]
            weights_i = weights[nan_mask_i].mean(0)

            # Look for the best starting point
            cluster_dist = [error(y[i, ~nan_mask_i], obs[~nan_mask_i],\
                            cat_features[~nan_mask_i], weights_i)\
                            for obs in cluster_obs]
            z02 = mu[np.argmin(cluster_dist), :, 0]

            # Formatting
            vars_i = {type_alias: np.where(~nan_mask_i[np.isin(var_distrib, vartype)])[0] \
                             for type_alias, vartype in types.items()}

            complete_categ = [
                l for idx, l in enumerate(lambda_categ)
                if idx in vars_i['categ']
            ]
            complete_ord = [
                l for idx, l in enumerate(lambda_ord) if idx in vars_i['ord']
            ]

            opt = minimize(stat_all, z02, \
                   args = (y[i, ~nan_mask_i], var_distrib[~nan_mask_i],\
                   weights_i[~nan_mask_i],\
                   lambda_bin[vars_i['bin']], nj_bin[vars_i['bin']],\
                   complete_categ,\
                   nj_categ[vars_i['categ']],\
                   complete_ord,\
                   nj_ord[vars_i['ord']],\
                   lambda_cont[vars_i['cont']], y_std[:, vars_i['cont']]),
                   tol = eps, method='trust-constr', jac = grad_stat,\
                   constraints = lc,
                   options = {'maxiter': 1000})

            z = opt.x
            zz.append(z)
            fun.append(opt.fun)

            imputed_y[i] = impute(z, var_distrib, lambda_bin, nj_bin, lambda_categ, nj_categ,\
                         lambda_ord, nj_ord, lambda_cont, y_std)

        else:
            imputed_y[i] = y[i]

    completed_y = np.where(nan_mask, imputed_y, y)

    out['completed_y'] = completed_y
    out['zz'] = zz
    out['fun'] = fun
    return (out)