예제 #1
0
def _frank_PKTE(X):
    # calculate empirical kendall's tau
    ktau = multivariate_stats.kendalls_tau(X)
    # inverse to find dependency parameter
    alpha_hat = invcopulastat('Frank', 'kendall', ktau)
    
    return alpha_hat
def optimalCopulaFamily(X, K=4, family_search=['Gaussian', 'Clayton', 'Gumbel', 'Frank']):
    """
    This function, given a multivariate data set X, computes the best copula family which fits
    the data, using the procedure described in the paper "Highly Efficient Learning of Mixed
    Copula Networks," by Gal Elidan
      
      X - the multivariate dataset for which we desire the copula.  Must be a numpy array of 
          dimension [M x N], where M is the number of data points, and N is the dimensionality
          of the dataset
      K - the square root of the number of grid points (for now, we assume square gridding of the
          unit cube)
      family_search - a list of all the copula families to search.  Currently, what is supported is
          Gaussian, Clayton, Gumbel, and Frank.  As more copula's are added, the default list will
          be expanded.
    """
    # compute the empirical Kendall's Tau
    tau_hat = multivariate_stats.kendalls_tau(X)
    
    # compute empirical multinomial signature
    empirical_mnsig = empirical_copulamnsig(X, K)
    empirical_mnsig = empirical_mnsig[0]['esig']
    # replace any 0 values w/ smallest possible float value
    empirical_mnsig[empirical_mnsig==0] = np.spacing(1)
    
    # compute the multinomial signature for each of the copula families specified
    # and simultaneously compute the kullback leibler divergence between the empirical
    # and the computed, and store that info
    distances = {}
    for family in family_search:
        # because the Clayton and Gumbel Copula's have restrictions for the valid values of
        # Kendall's tau, we do checks here to ensure those restrictions are met, because there
        # will be a certain variance associated with the tau_hat measurement
        
        if(family.lower()=='clayton'):
            # here we add some additional optimizatons as follows.  We know that the Clayton copula
            # captures only positive concordance.  Like any estimator, tau_hat will have some variance
            # associated with it.  Thus, the optimization we make is as follows, if tau_hat is within
            # a configurable amount less than 0, then we will set tau_hat to 0 and continue processing.  
            # However, if tau_hat is greater than that, we theoretically wouldn't have to test against 
            # the Clayton copula model, so we set the KL-divergence to be infinity to exclude 
            # this family from being selected
            if(tau_hat<-0.05):
                distances[family] = np.inf
                continue
            elif(tau_hat>=-0.05 and tau_hat<0):
                tau_hat = 0
            elif(tau_hat>=1):
                tau_hat = 1 - np.spacing(1)     # as close to 1 as possible in our precision
        elif(family.lower()=='gumbel'):
            # here we add some additional optimizatons as follows.  We know that the Gumbel copula
            # captures only positive concordance.  Like any estimator, tau_hat will have some variance
            # associated with it.  Thus, the optimization we make is as follows, if tau_hat is within
            # a configurable amount less than 0, then we will set tau_hat to 0 and continue processing.  
            # However, if tau_hat is greater than that, we theoretically wouldn't have to test against 
            # the Gumbel copula model, so we set the KL-divergence to be infinity to exclude 
            # this family from being selected
            if(tau_hat<-0.05):
                distances[family] = np.inf
                continue
            elif(tau_hat>=-0.05 and tau_hat<0):
                tau_hat = 0
            elif(tau_hat>=1):
                tau_hat = 1 - np.spacing(1)     # as close to 1 as possible in our precision
        # any other copula families with restrictions can go here
        
        mnsig = copulamnsig(family,K,'kendall',tau_hat)
        # replace any 0 values w/ smallest possible float value
        mnsig[mnsig==0] = np.spacing(1)
        
        # compute KL divergence, see
        # http://docs.scipy.org/doc/scipy-dev/reference/generated/scipy.stats.entropy.html
        distances[family] = entropy(mnsig, empirical_mnsig)
        
    # search for the minimum distance, that is the optimal copula family to use
    minDistance = np.inf
    for family, distance in distances.iteritems():
        if distance<minDistance:
            minDistance = distance
            optimalFamily = family
    
    depParams = invcopulastat(optimalFamily, 'kendall', tau_hat)
    
    return (optimalFamily, depParams, tau_hat)
예제 #3
0
def optimalCopulaFamily(X,
                        K=4,
                        family_search=[
                            'Gaussian', 'Clayton', 'Gumbel', 'Frank'
                        ]):
    """
    This function, given a multivariate data set X, computes the best copula family which fits
    the data, using the procedure described in the paper "Highly Efficient Learning of Mixed
    Copula Networks," by Gal Elidan
      
      X - the multivariate dataset for which we desire the copula.  Must be a numpy array of 
          dimension [M x N], where M is the number of data points, and N is the dimensionality
          of the dataset
      K - the square root of the number of grid points (for now, we assume square gridding of the
          unit cube)
      family_search - a list of all the copula families to search.  Currently, what is supported is
          Gaussian, Clayton, Gumbel, and Frank.  As more copula's are added, the default list will
          be expanded.
    """
    # compute the empirical Kendall's Tau
    tau_hat = multivariate_stats.kendalls_tau(X)

    # compute empirical multinomial signature
    empirical_mnsig = empirical_copulamnsig(X, K)
    empirical_mnsig = empirical_mnsig[0]['esig']
    # replace any 0 values w/ smallest possible float value
    empirical_mnsig[empirical_mnsig == 0] = np.spacing(1)

    # compute the multinomial signature for each of the copula families specified
    # and simultaneously compute the kullback leibler divergence between the empirical
    # and the computed, and store that info
    distances = {}
    for family in family_search:
        # because the Clayton and Gumbel Copula's have restrictions for the valid values of
        # Kendall's tau, we do checks here to ensure those restrictions are met, because there
        # will be a certain variance associated with the tau_hat measurement

        if (family.lower() == 'clayton'):
            # here we add some additional optimizatons as follows.  We know that the Clayton copula
            # captures only positive concordance.  Like any estimator, tau_hat will have some variance
            # associated with it.  Thus, the optimization we make is as follows, if tau_hat is within
            # a configurable amount less than 0, then we will set tau_hat to 0 and continue processing.
            # However, if tau_hat is greater than that, we theoretically wouldn't have to test against
            # the Clayton copula model, so we set the KL-divergence to be infinity to exclude
            # this family from being selected
            if (tau_hat < -0.05):
                distances[family] = np.inf
                continue
            elif (tau_hat >= -0.05 and tau_hat < 0):
                tau_hat = 0
            elif (tau_hat >= 1):
                tau_hat = 1 - np.spacing(
                    1)  # as close to 1 as possible in our precision
        elif (family.lower() == 'gumbel'):
            # here we add some additional optimizatons as follows.  We know that the Gumbel copula
            # captures only positive concordance.  Like any estimator, tau_hat will have some variance
            # associated with it.  Thus, the optimization we make is as follows, if tau_hat is within
            # a configurable amount less than 0, then we will set tau_hat to 0 and continue processing.
            # However, if tau_hat is greater than that, we theoretically wouldn't have to test against
            # the Gumbel copula model, so we set the KL-divergence to be infinity to exclude
            # this family from being selected
            if (tau_hat < -0.05):
                distances[family] = np.inf
                continue
            elif (tau_hat >= -0.05 and tau_hat < 0):
                tau_hat = 0
            elif (tau_hat >= 1):
                tau_hat = 1 - np.spacing(
                    1)  # as close to 1 as possible in our precision
        # any other copula families with restrictions can go here

        mnsig = copulamnsig(family, K, 'kendall', tau_hat)
        # replace any 0 values w/ smallest possible float value
        mnsig[mnsig == 0] = np.spacing(1)

        # compute KL divergence, see
        # http://docs.scipy.org/doc/scipy-dev/reference/generated/scipy.stats.entropy.html
        distances[family] = entropy(mnsig, empirical_mnsig)

    # search for the minimum distance, that is the optimal copula family to use
    minDistance = np.inf
    for family, distance in distances.iteritems():
        if distance < minDistance:
            minDistance = distance
            optimalFamily = family

    depParams = invcopulastat(optimalFamily, 'kendall', tau_hat)

    return (optimalFamily, depParams, tau_hat)