def compute(H, K, cdf=False): H, K = Divergence.compute(H, K) try: if cdf: if not (analytics.isValidCDF(H) and analytics.isValidCDF(K)): raise smp.InvalidProbabilityDistributionException( 'Invalid cumulative distribution') if not cdf: if not (analytics.isValidPDF(H) and analytics.isValidPDF(K)): raise smp.InvalidProbabilityDensityException( 'Invalid probability density') #convert to cdf H = analytics.pdfTocdf(H) K = analytics.pdfTocdf(K) all_keys = dict(H.items() + K.items()).keys() analytics.fill_gaps(H, all_keys, cdf=True) analytics.fill_gaps(K, all_keys, cdf=True) max_diff = 0 #max_key=-1 for key in sorted(all_keys): if math.fabs(H[key] - K[key]) > max_diff: # max_key=key max_diff = math.fabs(H[key] - K[key]) return max_diff #, max_key, H[max_key],K[max_key] except smp.InvalidProbabilityDistributionException: logging.exception('Invalid probability distribution') except smp.InvalidProbabilityDensityException: logging.exception('Invalid probability density')
def compute(H, K): Divergence.compute(H, K) try: if analytics.isValidPDF(H) and analytics.isValidPDF(K): all_keys = dict(H.items() + K.items()).keys() analytics.fill_gaps(H, all_keys) analytics.fill_gaps(K, all_keys) total = 0 for key in sorted(all_keys): if H[key] != 0: if K[key] == 0: raise ValueError( 'KL Divergence is not defined for the input. Please see the definition of KL-Divergence' ) else: total += H[key] * math.log(H[key] / K[key], 2) return total else: raise smp.InvalidProbabilityDensityException( 'Invalid probability density') except ValueError: logging.error( 'KL Divergence is not defined for the input. Please see the definition of KL-Divergence' ) except smp.InvalidProbabilityDensityException: logging.exception('Invalid probability density')
def compute(H,K, cdf=False): H,K=Divergence.compute(H, K) try: if cdf: if not (analytics.isValidCDF(H) and analytics.isValidCDF(K)): raise smp.InvalidProbabilityDistributionException('Invalid cumulative distribution') if not cdf: if not (analytics.isValidPDF(H) and analytics.isValidPDF(K)): raise smp.InvalidProbabilityDensityException('Invalid probability density') #convert to cdf H=analytics.pdfTocdf(H) K=analytics.pdfTocdf(K) all_keys= dict(H.items()+K.items()).keys() analytics.fill_gaps(H, all_keys,cdf=True) analytics.fill_gaps(K, all_keys,cdf=True) max_diff=0 #max_key=-1 for key in sorted(all_keys): if math.fabs(H[key]-K[key])>max_diff: # max_key=key max_diff=math.fabs(H[key]-K[key]) return max_diff#, max_key, H[max_key],K[max_key] except smp.InvalidProbabilityDistributionException: logging.exception('Invalid probability distribution') except smp.InvalidProbabilityDensityException: logging.exception('Invalid probability density')
def distribution_save(H, K, file_name, cdf=False): H,K=Divergence.compute(H, K) try: if cdf: if not (analytics.isValidCDF(H) and analytics.isValidCDF(K)): raise smp.InvalidProbabilityDistributionException('Invalid cumulative distribution') if not cdf: if not (analytics.isValidPDF(H) and analytics.isValidPDF(K)): raise smp.InvalidProbabilityDensityException('Invalid probability density') #convert to cdf H=analytics.pdfTocdf(H) K=analytics.pdfTocdf(K) all_keys= dict(H.items()+K.items()).keys() analytics.fill_gaps(H, all_keys, cdf=True) analytics.fill_gaps(K, all_keys, cdf=True) fileObject = open(file_name, 'w') for key in sorted(all_keys): fileObject.write(str(key)+' '+ str(K[key])+'\n') # print("dict[%s] =" % key,K[key]) fileObject.close() #============================================================================== # plot_keys=[] # plot_values=[] # fileObject = open(file_name, 'w') # for key in sorted(all_keys): # plot_keys.append(key) # plot_values.append(K[key]) # # fileObject.write(str(key)+' '+ str(H[key])+'\n') # print("dict[%s] =" % key,H[key]) # fileObject.close() # # plt.figure(1) # plt.plot(plot_keys, plot_values, 'r-') # plt.xscale('log') # plt.yscale('log') # plt.ylim([0,1]) # plt.ylabel('CDF') # plt.xlabel('Degree') # plt.savefig(file_name+ '_cdf.pdf') # plt.clf() #============================================================================== except smp.InvalidProbabilityDistributionException: logging.exception('Invalid probability distribution') except smp.InvalidProbabilityDensityException: logging.exception('Invalid probability density')
def compute(H,K,lamda): try: H,K=Divergence.compute(H, K) if lamda <=0 or lamda >=1 : raise ValueError('Invalid lambda value for LambdaDivergence. It should be in (0,1).') if analytics.isValidPDF(H) and analytics.isValidPDF(K): smoothH,_=analytics.smooth(H, K, lamda) return lamda* KLDivergence.compute(H, smoothH)+ (1-lamda)* KLDivergence.compute(K, smoothH) else: print K raise smp.InvalidProbabilityDensityException('Invalid probability density') except smp.InvalidProbabilityDensityException: logging.exception('Invalid probability density') except ValueError: logging.exception('Invalid lambda value for LambdaDivergence. It should be positive.')
def plot_single_distribution(normalized_distr, cdf=True, xlabel=None, title=None, *args, **kwds): axis= plt.gca() if xlabel is not None: axis.set_xlabel(xlabel) if title is not None: axis.set_title(title) if not cdf and not analytics.isValidPDF(normalized_distr): raise sampling.InvalidProbabilityDensityException('Invalid probability density') axis.set_ylim(0,1) if cdf: axis.set_ylabel('CDF') if kwds.has_key('kind'): fill_gaps(normalized_distr, range(min(normalized_distr.keys()), max(normalized_distr.keys())), cdf=True) cdf=analytics.pdfTocdf(normalized_distr) if analytics.trim_cdf(cdf): Series(cdf,copy=True).plot(ax=axis, *args, **kwds) else: axis.set_ylabel('PDF') if kwds.has_key('kind'): fill_gaps(normalized_distr, range(min(normalized_distr.keys()), max(normalized_distr.keys())+1), cdf=False) if analytics.trim_pdf(normalized_distr): Series(normalized_distr,copy=True).plot(ax=axis, *args, **kwds) xmin, xmax = plt.xlim() axis.set_xlim(xmin-0.10*(xmax-xmin),xmax+0.05*(xmax-xmin))
def compute(H, K, lamda): try: H, K = Divergence.compute(H, K) if lamda <= 0 or lamda >= 1: raise ValueError( 'Invalid lambda value for LambdaDivergence. It should be in (0,1).' ) if analytics.isValidPDF(H) and analytics.isValidPDF(K): smoothH, _ = analytics.smooth(H, K, lamda) return lamda * KLDivergence.compute(H, smoothH) + ( 1 - lamda) * KLDivergence.compute(K, smoothH) else: print K raise smp.InvalidProbabilityDensityException( 'Invalid probability density') except smp.InvalidProbabilityDensityException: logging.exception('Invalid probability density') except ValueError: logging.exception( 'Invalid lambda value for LambdaDivergence. It should be positive.' )
def compute(H, K): Divergence.compute(H, K) try: if analytics.isValidPDF(H) and analytics.isValidPDF(K): all_keys= dict(H.items()+K.items()).keys() analytics.fill_gaps(H,all_keys) analytics.fill_gaps(K,all_keys) total=0 for key in sorted(all_keys): if H[key] != 0: if K[key] == 0: raise ValueError('KL Divergence is not defined for the input. Please see the definition of KL-Divergence') else: total+=H[key]*math.log(H[key]/K[key],2) return total else: raise smp.InvalidProbabilityDensityException('Invalid probability density') except ValueError: logging.error('KL Divergence is not defined for the input. Please see the definition of KL-Divergence') except smp.InvalidProbabilityDensityException: logging.exception('Invalid probability density')
def plot_single_distribution(normalized_distr, cdf=True, xlabel=None, title=None, *args, **kwds): axis = plt.gca() if xlabel is not None: axis.set_xlabel(xlabel) if title is not None: axis.set_title(title) if not cdf and not analytics.isValidPDF(normalized_distr): raise sampling.InvalidProbabilityDensityException( 'Invalid probability density') axis.set_ylim(0, 1) if cdf: axis.set_ylabel('CDF') if kwds.has_key('kind'): fill_gaps(normalized_distr, range(min(normalized_distr.keys()), max(normalized_distr.keys())), cdf=True) cdf = analytics.pdfTocdf(normalized_distr) if analytics.trim_cdf(cdf): Series(cdf, copy=True).plot(ax=axis, *args, **kwds) else: axis.set_ylabel('PDF') if kwds.has_key('kind'): fill_gaps(normalized_distr, range(min(normalized_distr.keys()), max(normalized_distr.keys()) + 1), cdf=False) if analytics.trim_pdf(normalized_distr): Series(normalized_distr, copy=True).plot(ax=axis, *args, **kwds) xmin, xmax = plt.xlim() axis.set_xlim(xmin - 0.10 * (xmax - xmin), xmax + 0.05 * (xmax - xmin))