def compute(H, K, cdf=False):
        H, K = Divergence.compute(H, K)
        try:
            if cdf:
                if not (analytics.isValidCDF(H) and analytics.isValidCDF(K)):
                    raise smp.InvalidProbabilityDistributionException(
                        'Invalid cumulative distribution')
            if not cdf:
                if not (analytics.isValidPDF(H) and analytics.isValidPDF(K)):
                    raise smp.InvalidProbabilityDensityException(
                        'Invalid probability density')
                #convert to cdf
                H = analytics.pdfTocdf(H)
                K = analytics.pdfTocdf(K)

            all_keys = dict(H.items() + K.items()).keys()
            analytics.fill_gaps(H, all_keys, cdf=True)
            analytics.fill_gaps(K, all_keys, cdf=True)

            max_diff = 0
            #max_key=-1

            for key in sorted(all_keys):

                if math.fabs(H[key] - K[key]) > max_diff:
                    #        max_key=key
                    max_diff = math.fabs(H[key] - K[key])
            return max_diff  #, max_key, H[max_key],K[max_key]

        except smp.InvalidProbabilityDistributionException:
            logging.exception('Invalid probability distribution')
        except smp.InvalidProbabilityDensityException:
            logging.exception('Invalid probability density')
 def compute(H, K):
     Divergence.compute(H, K)
     try:
         if analytics.isValidPDF(H) and analytics.isValidPDF(K):
             all_keys = dict(H.items() + K.items()).keys()
             analytics.fill_gaps(H, all_keys)
             analytics.fill_gaps(K, all_keys)
             total = 0
             for key in sorted(all_keys):
                 if H[key] != 0:
                     if K[key] == 0:
                         raise ValueError(
                             'KL Divergence is  not defined for the input. Please see the definition of KL-Divergence'
                         )
                     else:
                         total += H[key] * math.log(H[key] / K[key], 2)
             return total
         else:
             raise smp.InvalidProbabilityDensityException(
                 'Invalid probability density')
     except ValueError:
         logging.error(
             'KL Divergence is  not defined for the input. Please see the definition of KL-Divergence'
         )
     except smp.InvalidProbabilityDensityException:
         logging.exception('Invalid probability density')
def distribution_save(H, K, file_name, cdf=False):
        H,K=Divergence.compute(H, K)
        try:
            if cdf:
                if not (analytics.isValidCDF(H) and analytics.isValidCDF(K)):
                    raise smp.InvalidProbabilityDistributionException('Invalid cumulative distribution')
            if not cdf:
                if not (analytics.isValidPDF(H) and analytics.isValidPDF(K)):
                    raise smp.InvalidProbabilityDensityException('Invalid probability density')
                #convert to cdf
                H=analytics.pdfTocdf(H)
                K=analytics.pdfTocdf(K)
            
            all_keys= dict(H.items()+K.items()).keys()
            analytics.fill_gaps(H, all_keys, cdf=True)
            analytics.fill_gaps(K, all_keys, cdf=True)
            fileObject = open(file_name, 'w')  
            
            for key in sorted(all_keys):                
                fileObject.write(str(key)+' '+ str(K[key])+'\n')  
#                print("dict[%s] =" % key,K[key])
            fileObject.close()            
#==============================================================================
#             plot_keys=[]
#             plot_values=[]        
#             fileObject = open(file_name, 'w')  
#             for key in sorted(all_keys):
#                 plot_keys.append(key)
#                 plot_values.append(K[key])
#                 
#                 fileObject.write(str(key)+' '+ str(H[key])+'\n')  
#                 print("dict[%s] =" % key,H[key])
#             fileObject.close()
#             
#             plt.figure(1)
#             plt.plot(plot_keys, plot_values, 'r-')
#             plt.xscale('log')
#             plt.yscale('log')
#             plt.ylim([0,1])
#             plt.ylabel('CDF')
#             plt.xlabel('Degree')
#             plt.savefig(file_name+ '_cdf.pdf')
#             plt.clf()
#==============================================================================
        
        except smp.InvalidProbabilityDistributionException:
            logging.exception('Invalid probability distribution')
        except smp.InvalidProbabilityDensityException:
            logging.exception('Invalid probability density')
 def compute(H, K, lamda):
     try:
         H, K = Divergence.compute(H, K)
         if lamda <= 0 or lamda >= 1:
             raise ValueError(
                 'Invalid lambda value for LambdaDivergence. It should be in (0,1).'
             )
         if analytics.isValidPDF(H) and analytics.isValidPDF(K):
             smoothH, _ = analytics.smooth(H, K, lamda)
             return lamda * KLDivergence.compute(H, smoothH) + (
                 1 - lamda) * KLDivergence.compute(K, smoothH)
         else:
             print K
             raise smp.InvalidProbabilityDensityException(
                 'Invalid probability density')
     except smp.InvalidProbabilityDensityException:
         logging.exception('Invalid probability density')
     except ValueError:
         logging.exception(
             'Invalid lambda value for LambdaDivergence. It should be positive.'
         )
Example #5
0
def plot_single_distribution(normalized_distr,
                             cdf=True,
                             xlabel=None,
                             title=None,
                             *args,
                             **kwds):
    axis = plt.gca()
    if xlabel is not None:
        axis.set_xlabel(xlabel)
    if title is not None:
        axis.set_title(title)
    if not cdf and not analytics.isValidPDF(normalized_distr):
        raise sampling.InvalidProbabilityDensityException(
            'Invalid probability density')

    axis.set_ylim(0, 1)
    if cdf:
        axis.set_ylabel('CDF')
        if kwds.has_key('kind'):
            fill_gaps(normalized_distr,
                      range(min(normalized_distr.keys()),
                            max(normalized_distr.keys())),
                      cdf=True)

        cdf = analytics.pdfTocdf(normalized_distr)
        if analytics.trim_cdf(cdf):
            Series(cdf, copy=True).plot(ax=axis, *args, **kwds)

    else:
        axis.set_ylabel('PDF')
        if kwds.has_key('kind'):
            fill_gaps(normalized_distr,
                      range(min(normalized_distr.keys()),
                            max(normalized_distr.keys()) + 1),
                      cdf=False)
        if analytics.trim_pdf(normalized_distr):
            Series(normalized_distr, copy=True).plot(ax=axis, *args, **kwds)

    xmin, xmax = plt.xlim()
    axis.set_xlim(xmin - 0.10 * (xmax - xmin), xmax + 0.05 * (xmax - xmin))