def visualizeMNSig(): # some tests on the copula multinomial signature K = 4 M = 1000 N = 3 tauVec = np.arange(-0.9,0.95,0.05) # the families to test against and pick optimal copula families = ['Gaussian', 'Clayton', 'Gumbel', 'Frank'] helmAccuracyResults = testHELM_parametric(K,M,N,tauVec,families) resultsAggregate = {} for family in families: famResults = {} for tau in tauVec: mnsig = copulamnsig(family,K,'kendall',tau) famResults[tau] = mnsig resultsAggregate[family] = famResults # visualize the results for tau in tauVec: # we would also like to visualize this copula on the side, to try to # understand what may be a better way todo model selection try: r = invcopulastat('Gaussian', 'kendall', tau) except ValueError: r = -1 Rho = np.empty((N,N)) for jj in range(0,N): for kk in range(0,N): if(jj==kk): Rho[jj][kk] = 1 else: Rho[jj][kk] = r try: alpha_clayton = invcopulastat('Clayton', 'kendall', tau) except ValueError: alpha_clayton = -1 try: alpha_gumbel = invcopulastat('Gumbel', 'kendall', tau) except ValueError: alpha_gumbel = -1 try: alpha_frank = invcopulastat('Frank', 'kendall', tau) except ValueError: alpha_frank = -1 if(r!=-1): try: U_gauss = copularnd('Gaussian', M, Rho) except ValueError: U_gauss = np.zeros((M,N)) if(alpha_clayton!=-1): try: U_clayton = copularnd('Clayton', M, N, alpha_clayton) except ValueError: U_clayton = np.zeros((M,N)) if(alpha_frank!=-1): try: U_frank = copularnd('Frank', M, N, alpha_frank) except ValueError: U_frank = np.zeros((M,N)) if(alpha_gumbel!=-1): try: U_gumbel = copularnd('Gumbel', M, N, alpha_gumbel) except ValueError: U_gumbel = np.zeros((M,N)) # get each family's MN signature and plot it plt.figure(figsize=(30,20)) plt.subplot(231) if(np.sum(resultsAggregate['Gaussian'][tau])>0): plt.plot(np.arange(1,K*K+1), resultsAggregate['Gaussian'][tau], 'b.-', label='Gaussian Copula') if(np.sum(resultsAggregate['Clayton'][tau])>0): plt.plot(np.arange(1,K*K+1), resultsAggregate['Clayton'][tau], 'g.-', label='Clayton Copula') if(np.sum(resultsAggregate['Gumbel'][tau])>0): plt.plot(np.arange(1,K*K+1), resultsAggregate['Gumbel'][tau], 'r.-', label='Gumbel Copula') if(np.sum(resultsAggregate['Frank'][tau])>0): plt.plot(np.arange(1,K*K+1), resultsAggregate['Frank'][tau], 'k.-', label='Frank Copula') plt.title(r'Copula Multinomial Signature $\tau$=' + "{0:.2f}".format(tau) + ' K=' + str(K)) plt.legend() plt.grid() plt.subplot(232) if(r!=-1): plt.scatter(U_gauss[:,0], U_gauss[:,1]) plt.grid() plt.title(r'Gaussian Copula, $\rho$=' + "{0:.2f}".format(r) + r' $\tau$=' + "{0:.2f}".format(tau)) plt.subplot(233) if(alpha_clayton!=-1): plt.scatter(U_clayton[:,0], U_clayton[:,1]) plt.grid() plt.title(r'Clayton Copula, $\alpha$=' + "{0:.2f}".format(alpha_clayton) + r' $\tau$=' + "{0:.2f}".format(tau)) plt.subplot(235) if(alpha_frank!=-1): plt.scatter(U_frank[:,0], U_frank[:,1]) plt.grid() plt.title(r'Frank Copula, $\alpha$=' + "{0:.2f}".format(alpha_frank) + r' $\tau$=' + "{0:.2f}".format(tau)) plt.subplot(236) if(alpha_gumbel!=-1): plt.scatter(U_gumbel[:,0], U_gumbel[:,1]) plt.grid() plt.title(r'Gumbel Copula, $\alpha$=' + "{0:.2f}".format(alpha_gumbel) + r' $\tau$=' + "{0:.2f}".format(tau)) plt.subplot(234) # index manually to ensure accuracy cla = np.array([helmAccuracyResults['Clayton'][tau]['clayton'], helmAccuracyResults['Gaussian'][tau]['clayton'], helmAccuracyResults['Gumbel'][tau]['clayton'], helmAccuracyResults['Frank'][tau]['clayton']]) gau = np.array([helmAccuracyResults['Clayton'][tau]['gaussian'], helmAccuracyResults['Gaussian'][tau]['gaussian'], helmAccuracyResults['Gumbel'][tau]['gaussian'], helmAccuracyResults['Frank'][tau]['gaussian']]) gum = np.array([helmAccuracyResults['Clayton'][tau]['gumbel'], helmAccuracyResults['Gaussian'][tau]['gumbel'], helmAccuracyResults['Gumbel'][tau]['gumbel'], helmAccuracyResults['Frank'][tau]['gumbel']]) fra = np.array([helmAccuracyResults['Clayton'][tau]['frank'], helmAccuracyResults['Gaussian'][tau]['frank'], helmAccuracyResults['Gumbel'][tau]['frank'], helmAccuracyResults['Frank'][tau]['frank']]) ind = np.arange(4) width = 0.2 p1 = plt.bar(ind,cla,width,color='b') p2 = plt.bar(ind,gau,width,color='g',bottom=cla) p3 = plt.bar(ind,gum,width,color='k',bottom=cla+gau) p4 = plt.bar(ind,fra,width,color='r',bottom=cla+gau+gum) plt.xticks(ind+width/2.,('Clayton', 'Gaussian', 'Gumbel', 'Frank')) plt.legend( (p1[0], p2[0], p3[0], p4[0]), ('Clayton', 'Gaussian', 'Gumbel', 'Frank') ) plt.grid() plt.savefig(os.path.join('figures/HELM_performance/', 'HELM_DIM_' + str(N) + '_tau_' + "{0:.2f}".format(tau) + ' _K_' + str(K) + '.png')) plt.close()
def visualizeMNSig(): # some tests on the copula multinomial signature K = 4 M = 1000 N = 3 tauVec = np.arange(-0.9, 0.95, 0.05) # the families to test against and pick optimal copula families = ['Gaussian', 'Clayton', 'Gumbel', 'Frank'] helmAccuracyResults = testHELM_parametric(K, M, N, tauVec, families) resultsAggregate = {} for family in families: famResults = {} for tau in tauVec: mnsig = copulamnsig(family, K, 'kendall', tau) famResults[tau] = mnsig resultsAggregate[family] = famResults # visualize the results for tau in tauVec: # we would also like to visualize this copula on the side, to try to # understand what may be a better way todo model selection try: r = invcopulastat('Gaussian', 'kendall', tau) except ValueError: r = -1 Rho = np.empty((N, N)) for jj in range(0, N): for kk in range(0, N): if (jj == kk): Rho[jj][kk] = 1 else: Rho[jj][kk] = r try: alpha_clayton = invcopulastat('Clayton', 'kendall', tau) except ValueError: alpha_clayton = -1 try: alpha_gumbel = invcopulastat('Gumbel', 'kendall', tau) except ValueError: alpha_gumbel = -1 try: alpha_frank = invcopulastat('Frank', 'kendall', tau) except ValueError: alpha_frank = -1 if (r != -1): try: U_gauss = copularnd('Gaussian', M, Rho) except ValueError: U_gauss = np.zeros((M, N)) if (alpha_clayton != -1): try: U_clayton = copularnd('Clayton', M, N, alpha_clayton) except ValueError: U_clayton = np.zeros((M, N)) if (alpha_frank != -1): try: U_frank = copularnd('Frank', M, N, alpha_frank) except ValueError: U_frank = np.zeros((M, N)) if (alpha_gumbel != -1): try: U_gumbel = copularnd('Gumbel', M, N, alpha_gumbel) except ValueError: U_gumbel = np.zeros((M, N)) # get each family's MN signature and plot it plt.figure(figsize=(30, 20)) plt.subplot(231) if (np.sum(resultsAggregate['Gaussian'][tau]) > 0): plt.plot(np.arange(1, K * K + 1), resultsAggregate['Gaussian'][tau], 'b.-', label='Gaussian Copula') if (np.sum(resultsAggregate['Clayton'][tau]) > 0): plt.plot(np.arange(1, K * K + 1), resultsAggregate['Clayton'][tau], 'g.-', label='Clayton Copula') if (np.sum(resultsAggregate['Gumbel'][tau]) > 0): plt.plot(np.arange(1, K * K + 1), resultsAggregate['Gumbel'][tau], 'r.-', label='Gumbel Copula') if (np.sum(resultsAggregate['Frank'][tau]) > 0): plt.plot(np.arange(1, K * K + 1), resultsAggregate['Frank'][tau], 'k.-', label='Frank Copula') plt.title(r'Copula Multinomial Signature $\tau$=' + "{0:.2f}".format(tau) + ' K=' + str(K)) plt.legend() plt.grid() plt.subplot(232) if (r != -1): plt.scatter(U_gauss[:, 0], U_gauss[:, 1]) plt.grid() plt.title(r'Gaussian Copula, $\rho$=' + "{0:.2f}".format(r) + r' $\tau$=' + "{0:.2f}".format(tau)) plt.subplot(233) if (alpha_clayton != -1): plt.scatter(U_clayton[:, 0], U_clayton[:, 1]) plt.grid() plt.title(r'Clayton Copula, $\alpha$=' + "{0:.2f}".format(alpha_clayton) + r' $\tau$=' + "{0:.2f}".format(tau)) plt.subplot(235) if (alpha_frank != -1): plt.scatter(U_frank[:, 0], U_frank[:, 1]) plt.grid() plt.title(r'Frank Copula, $\alpha$=' + "{0:.2f}".format(alpha_frank) + r' $\tau$=' + "{0:.2f}".format(tau)) plt.subplot(236) if (alpha_gumbel != -1): plt.scatter(U_gumbel[:, 0], U_gumbel[:, 1]) plt.grid() plt.title(r'Gumbel Copula, $\alpha$=' + "{0:.2f}".format(alpha_gumbel) + r' $\tau$=' + "{0:.2f}".format(tau)) plt.subplot(234) # index manually to ensure accuracy cla = np.array([ helmAccuracyResults['Clayton'][tau]['clayton'], helmAccuracyResults['Gaussian'][tau]['clayton'], helmAccuracyResults['Gumbel'][tau]['clayton'], helmAccuracyResults['Frank'][tau]['clayton'] ]) gau = np.array([ helmAccuracyResults['Clayton'][tau]['gaussian'], helmAccuracyResults['Gaussian'][tau]['gaussian'], helmAccuracyResults['Gumbel'][tau]['gaussian'], helmAccuracyResults['Frank'][tau]['gaussian'] ]) gum = np.array([ helmAccuracyResults['Clayton'][tau]['gumbel'], helmAccuracyResults['Gaussian'][tau]['gumbel'], helmAccuracyResults['Gumbel'][tau]['gumbel'], helmAccuracyResults['Frank'][tau]['gumbel'] ]) fra = np.array([ helmAccuracyResults['Clayton'][tau]['frank'], helmAccuracyResults['Gaussian'][tau]['frank'], helmAccuracyResults['Gumbel'][tau]['frank'], helmAccuracyResults['Frank'][tau]['frank'] ]) ind = np.arange(4) width = 0.2 p1 = plt.bar(ind, cla, width, color='b') p2 = plt.bar(ind, gau, width, color='g', bottom=cla) p3 = plt.bar(ind, gum, width, color='k', bottom=cla + gau) p4 = plt.bar(ind, fra, width, color='r', bottom=cla + gau + gum) plt.xticks(ind + width / 2., ('Clayton', 'Gaussian', 'Gumbel', 'Frank')) plt.legend((p1[0], p2[0], p3[0], p4[0]), ('Clayton', 'Gaussian', 'Gumbel', 'Frank')) plt.grid() plt.savefig( os.path.join( 'figures/HELM_performance/', 'HELM_DIM_' + str(N) + '_tau_' + "{0:.2f}".format(tau) + ' _K_' + str(K) + '.png')) plt.close()
def testHELM(tau, M, N, familyToTest, numMCSims, copulaFamiliesToTest): results = {} for fam in copulaFamiliesToTest: results[fam.lower()] = 0 for ii in range(0,numMCSims): # generate samples of the requested copula with tau same as the # empirical signature we calculated above if(familyToTest.lower()=='gaussian'): r = invcopulastat(familyToTest, 'kendall', tau) Rho = np.empty((N,N)) for jj in range(0,N): for kk in range(0,N): if(jj==kk): Rho[jj][kk] = 1 else: Rho[jj][kk] = r try: U = copularnd(familyToTest, M, Rho) except ValueError: # copularnd will throw a ValueError if Rho is not a positive semidefinite matrix return results # return 0, which will then be ignored by tests else: # assume Clayton, Frank, or Gumbel try: alpha = invcopulastat(familyToTest, 'kendall', tau) U = copularnd(familyToTest, M, N, alpha) except ValueError: continue lst = [] for jj in range(0,N): U_conditioned = U[:,jj] # if there are any 1's, condition it U_conditioned[U_conditioned==1] = 0.99 if(jj%2==0): lst.append(norm.ppf(U_conditioned)) else: lst.append(expon.ppf(U_conditioned)) # combine X and Y into the joint distribution w/ the copula X = np.vstack(lst) X = X.T ret = optimalCopulaFamily(X, family_search=copulaFamiliesToTest) ret_family = ret[0].lower() # aggregate results results[ret_family] = results[ret_family] + 1.0 # display some progress sys.stdout.write("\rComputing " + str(familyToTest) + " Copula (DIM=%d) (tau=%f)-- %d%%" % (N,tau,ii+1)) sys.stdout.flush() sys.stdout.write("\r") # convert results to percentage for fam in copulaFamiliesToTest: results[fam.lower()] = results[fam.lower()]/float(numMCSims) * 100 return results
def testHELM(tau, M, N, familyToTest, numMCSims, copulaFamiliesToTest): results = {} for fam in copulaFamiliesToTest: results[fam.lower()] = 0 for ii in range(0, numMCSims): # generate samples of the requested copula with tau same as the # empirical signature we calculated above if (familyToTest.lower() == 'gaussian'): r = invcopulastat(familyToTest, 'kendall', tau) Rho = np.empty((N, N)) for jj in range(0, N): for kk in range(0, N): if (jj == kk): Rho[jj][kk] = 1 else: Rho[jj][kk] = r try: U = copularnd(familyToTest, M, Rho) except ValueError: # copularnd will throw a ValueError if Rho is not a positive semidefinite matrix return results # return 0, which will then be ignored by tests else: # assume Clayton, Frank, or Gumbel try: alpha = invcopulastat(familyToTest, 'kendall', tau) U = copularnd(familyToTest, M, N, alpha) except ValueError: continue lst = [] for jj in range(0, N): U_conditioned = U[:, jj] # if there are any 1's, condition it U_conditioned[U_conditioned == 1] = 0.99 if (jj % 2 == 0): lst.append(norm.ppf(U_conditioned)) else: lst.append(expon.ppf(U_conditioned)) # combine X and Y into the joint distribution w/ the copula X = np.vstack(lst) X = X.T ret = optimalCopulaFamily(X, family_search=copulaFamiliesToTest) ret_family = ret[0].lower() # aggregate results results[ret_family] = results[ret_family] + 1.0 # display some progress sys.stdout.write("\rComputing " + str(familyToTest) + " Copula (DIM=%d) (tau=%f)-- %d%%" % (N, tau, ii + 1)) sys.stdout.flush() sys.stdout.write("\r") # convert results to percentage for fam in copulaFamiliesToTest: results[fam.lower()] = results[fam.lower()] / float(numMCSims) * 100 return results