def _frank_PKTE(X): # calculate empirical kendall's tau ktau = multivariate_stats.kendalls_tau(X) # inverse to find dependency parameter alpha_hat = invcopulastat('Frank', 'kendall', ktau) return alpha_hat
def optimalCopulaFamily(X, K=4, family_search=['Gaussian', 'Clayton', 'Gumbel', 'Frank']): """ This function, given a multivariate data set X, computes the best copula family which fits the data, using the procedure described in the paper "Highly Efficient Learning of Mixed Copula Networks," by Gal Elidan X - the multivariate dataset for which we desire the copula. Must be a numpy array of dimension [M x N], where M is the number of data points, and N is the dimensionality of the dataset K - the square root of the number of grid points (for now, we assume square gridding of the unit cube) family_search - a list of all the copula families to search. Currently, what is supported is Gaussian, Clayton, Gumbel, and Frank. As more copula's are added, the default list will be expanded. """ # compute the empirical Kendall's Tau tau_hat = multivariate_stats.kendalls_tau(X) # compute empirical multinomial signature empirical_mnsig = empirical_copulamnsig(X, K) empirical_mnsig = empirical_mnsig[0]['esig'] # replace any 0 values w/ smallest possible float value empirical_mnsig[empirical_mnsig==0] = np.spacing(1) # compute the multinomial signature for each of the copula families specified # and simultaneously compute the kullback leibler divergence between the empirical # and the computed, and store that info distances = {} for family in family_search: # because the Clayton and Gumbel Copula's have restrictions for the valid values of # Kendall's tau, we do checks here to ensure those restrictions are met, because there # will be a certain variance associated with the tau_hat measurement if(family.lower()=='clayton'): # here we add some additional optimizatons as follows. We know that the Clayton copula # captures only positive concordance. Like any estimator, tau_hat will have some variance # associated with it. Thus, the optimization we make is as follows, if tau_hat is within # a configurable amount less than 0, then we will set tau_hat to 0 and continue processing. # However, if tau_hat is greater than that, we theoretically wouldn't have to test against # the Clayton copula model, so we set the KL-divergence to be infinity to exclude # this family from being selected if(tau_hat<-0.05): distances[family] = np.inf continue elif(tau_hat>=-0.05 and tau_hat<0): tau_hat = 0 elif(tau_hat>=1): tau_hat = 1 - np.spacing(1) # as close to 1 as possible in our precision elif(family.lower()=='gumbel'): # here we add some additional optimizatons as follows. We know that the Gumbel copula # captures only positive concordance. Like any estimator, tau_hat will have some variance # associated with it. Thus, the optimization we make is as follows, if tau_hat is within # a configurable amount less than 0, then we will set tau_hat to 0 and continue processing. # However, if tau_hat is greater than that, we theoretically wouldn't have to test against # the Gumbel copula model, so we set the KL-divergence to be infinity to exclude # this family from being selected if(tau_hat<-0.05): distances[family] = np.inf continue elif(tau_hat>=-0.05 and tau_hat<0): tau_hat = 0 elif(tau_hat>=1): tau_hat = 1 - np.spacing(1) # as close to 1 as possible in our precision # any other copula families with restrictions can go here mnsig = copulamnsig(family,K,'kendall',tau_hat) # replace any 0 values w/ smallest possible float value mnsig[mnsig==0] = np.spacing(1) # compute KL divergence, see # http://docs.scipy.org/doc/scipy-dev/reference/generated/scipy.stats.entropy.html distances[family] = entropy(mnsig, empirical_mnsig) # search for the minimum distance, that is the optimal copula family to use minDistance = np.inf for family, distance in distances.iteritems(): if distance<minDistance: minDistance = distance optimalFamily = family depParams = invcopulastat(optimalFamily, 'kendall', tau_hat) return (optimalFamily, depParams, tau_hat)
def optimalCopulaFamily(X, K=4, family_search=[ 'Gaussian', 'Clayton', 'Gumbel', 'Frank' ]): """ This function, given a multivariate data set X, computes the best copula family which fits the data, using the procedure described in the paper "Highly Efficient Learning of Mixed Copula Networks," by Gal Elidan X - the multivariate dataset for which we desire the copula. Must be a numpy array of dimension [M x N], where M is the number of data points, and N is the dimensionality of the dataset K - the square root of the number of grid points (for now, we assume square gridding of the unit cube) family_search - a list of all the copula families to search. Currently, what is supported is Gaussian, Clayton, Gumbel, and Frank. As more copula's are added, the default list will be expanded. """ # compute the empirical Kendall's Tau tau_hat = multivariate_stats.kendalls_tau(X) # compute empirical multinomial signature empirical_mnsig = empirical_copulamnsig(X, K) empirical_mnsig = empirical_mnsig[0]['esig'] # replace any 0 values w/ smallest possible float value empirical_mnsig[empirical_mnsig == 0] = np.spacing(1) # compute the multinomial signature for each of the copula families specified # and simultaneously compute the kullback leibler divergence between the empirical # and the computed, and store that info distances = {} for family in family_search: # because the Clayton and Gumbel Copula's have restrictions for the valid values of # Kendall's tau, we do checks here to ensure those restrictions are met, because there # will be a certain variance associated with the tau_hat measurement if (family.lower() == 'clayton'): # here we add some additional optimizatons as follows. We know that the Clayton copula # captures only positive concordance. Like any estimator, tau_hat will have some variance # associated with it. Thus, the optimization we make is as follows, if tau_hat is within # a configurable amount less than 0, then we will set tau_hat to 0 and continue processing. # However, if tau_hat is greater than that, we theoretically wouldn't have to test against # the Clayton copula model, so we set the KL-divergence to be infinity to exclude # this family from being selected if (tau_hat < -0.05): distances[family] = np.inf continue elif (tau_hat >= -0.05 and tau_hat < 0): tau_hat = 0 elif (tau_hat >= 1): tau_hat = 1 - np.spacing( 1) # as close to 1 as possible in our precision elif (family.lower() == 'gumbel'): # here we add some additional optimizatons as follows. We know that the Gumbel copula # captures only positive concordance. Like any estimator, tau_hat will have some variance # associated with it. Thus, the optimization we make is as follows, if tau_hat is within # a configurable amount less than 0, then we will set tau_hat to 0 and continue processing. # However, if tau_hat is greater than that, we theoretically wouldn't have to test against # the Gumbel copula model, so we set the KL-divergence to be infinity to exclude # this family from being selected if (tau_hat < -0.05): distances[family] = np.inf continue elif (tau_hat >= -0.05 and tau_hat < 0): tau_hat = 0 elif (tau_hat >= 1): tau_hat = 1 - np.spacing( 1) # as close to 1 as possible in our precision # any other copula families with restrictions can go here mnsig = copulamnsig(family, K, 'kendall', tau_hat) # replace any 0 values w/ smallest possible float value mnsig[mnsig == 0] = np.spacing(1) # compute KL divergence, see # http://docs.scipy.org/doc/scipy-dev/reference/generated/scipy.stats.entropy.html distances[family] = entropy(mnsig, empirical_mnsig) # search for the minimum distance, that is the optimal copula family to use minDistance = np.inf for family, distance in distances.iteritems(): if distance < minDistance: minDistance = distance optimalFamily = family depParams = invcopulastat(optimalFamily, 'kendall', tau_hat) return (optimalFamily, depParams, tau_hat)