def _get_kdes(train_ats, train_pred, class_matrix, args): """Kernel density estimation Args: train_ats (list): List of activation traces in training set. train_pred (list): List of prediction of train set. class_matrix (list): List of index of classes. args: Keyboard args. Returns: kdes (list): List of kdes per label if classification task. removed_cols (list): List of removed columns by variance threshold. """ sess = K.get_session() K.set_learning_phase(False) removed_cols = [] if args.is_classification: for label in range(args.num_classes): col_vectors = np.transpose(train_ats[class_matrix[label]]) for i in range(col_vectors.shape[0]): if (np.var(col_vectors[i]) < args.var_threshold and i not in removed_cols): removed_cols.append(i) kdes = {} for label in tqdm(range(args.num_classes), desc="kde"): refined_ats = np.transpose(train_ats[class_matrix[label]]) refined_ats = np.delete(refined_ats, removed_cols, axis=0) if refined_ats.shape[0] == 0: print( warn("ats were removed by threshold {}".format( args.var_threshold))) break kdes[label] = gaussian_kde(refined_ats) #kdes[label] = DensityEstimate(sess, np.transpose(refined_ats), sigma=0.864) print(refined_ats.shape) #print(kdes[label].factor) else: col_vectors = np.transpose(train_ats) for i in range(col_vectors.shape[0]): if np.var(col_vectors[i]) < args.var_threshold: removed_cols.append(i) refined_ats = np.transpose(train_ats) refined_ats = np.delete(refined_ats, removed_cols, axis=0) if refined_ats.shape[0] == 0: print( warn("ats were removed by threshold {}".format( args.var_threshold))) kdes = [gaussian_kde(refined_ats)] print(infog("The number of removed columns: {}".format(len(removed_cols)))) return kdes, removed_cols
def __init__(self, ehefile="../Resources/EHE/EHE_effective.csv"): pass_sr = pd.read_csv(ehefile) bins = np.linspace(-1, 1, 180) m = np.histogram(pass_sr["cos(ImpLF_zen)"], weights=np.array(pass_sr["wE3"]) / 0.457845099495, bins=bins, normed=True) self.density_nocos = gaussian_kde(np.rad2deg( np.arccos(np.array(pass_sr["cos(ImpLF_zen)"]))), weights=np.array(pass_sr["wE3"])) self.density_nocos.set_bandwidth(0.04) self.x_nocos = m[1] bins = np.linspace(0, 180, 180) m = np.histogram(np.rad2deg(np.arccos(pass_sr["cos(ImpLF_zen)"])), weights=np.array(pass_sr["wE3"]) / 0.457845099495, bins=bins, normed=True) self.density = gaussian_kde(np.array(pass_sr["cos(ImpLF_zen)"]), weights=np.array(pass_sr["wE3"])) self.density.set_bandwidth(0.05) self.x = m[1]
def score(args): files = args.files print "# Calculating ESCORE..." fh = open(args.name,'w') fh.write("# This is a baRNAba run.\n") for k in sorted(args.__dict__): s = "# " + str(k) + " " + str(args.__dict__[k]) + "\n" fh.write(s) # calculate interaction matrix of the reference structure ref_pdb = reader.Pdb(args.ff,res_mode=args.res_mode) ref_len = len(ref_pdb.model.sequence) ref_mat = ref_pdb.model.get_mat_score(args.cutoff) kernel = kde.gaussian_kde(ref_mat) kernel.set_bandwidth(0.25) print "# KDE computed. Bandwidth=",kernel.factor print "# Calculating ESCORE..." if(args.xtc!=None): assert len(files)==1, "# Error: when providing XTC trajectories, specify a single reference PDB file with -f" for i in xrange(0,len(files)): cur_pdb = reader.Pdb(files[i],res_mode=args.res_mode) cur_pdb.set_xtc(args.xtc) idx = 0 while(idx>=0): cur_mat = cur_pdb.model.get_mat_score(args.cutoff+0.2) val = kernel(cur_mat) string = '%8.5f ' % (sum(val)) string += '%s.%i \n' % (files[i],idx) fh.write(string) idx = cur_pdb.read() fh.close() return 0
def kde_posterior_pdf(paramx, paramy, posterior, npoints=100, bin_limits=None, bw_method='scott', fft=True): r""" Kenerl density estimate (KDE) of two-dimensional posterior pdf with Gaussian kernel. See e.g. `wiki <https://en.wikipedia.org/wiki/Kernel_density_estimation/>`_ and `scipy <http://docs.scipy.org/doc/scipy-0.17.0/reference/generated/scipy.stats.gaussian_kde.html>`_ for more information. .. warning:: By default, the band-width is estimated with Scott's rule of thumb. This could lead to biased/inaccurate estimates of the pdf if the parent distribution isn't approximately Gaussian. .. warning:: There is no special treatment for e.g. boundaries, which can be problematic. .. warning:: Posterior pdf normalized such that maximum value is one. :param paramx: Data column of parameter x :type paramx: numpy.ndarray :param paramy: Data column of parameter y :type paramy: numpy.ndarray :param posterior: Data column of posterior weight :type posterior: numpy.ndarray :param npoints: Number of points to evaluate PDF at per dimension :type npoints: integer :param bin_limits: Bin limits for histogram :type bin_limits: list [[xmin,xmax],[ymin,ymax]] :param bw_method: Method for determining band-width or bandwidth :type bw_method: string or float :param fft: Whether to use Fast-Fourier transform :type fft: bool :returns: KDE of posterior pdf at x and y centers :rtype: named tuple (pdf: numpy.ndarray, bin_centers_x: \ numpy.ndarray, bin_centers_y: numpy.ndarray) :Example: >>> npoints = 100 >>> pdf, x, y = kde_posterior_pdf(data[2], data[3], data[0], npoints=npoints) >>> assert len(pdf) == npoints >>> assert len(x) == npoints >>> assert len(y) == npoints """ if bin_limits: upper_x = max(bin_limits[0]) lower_x = min(bin_limits[0]) upper_y = max(bin_limits[1]) lower_y = min(bin_limits[1]) else: upper_x = max(paramx) lower_x = min(paramx) upper_y = max(paramy) lower_y = min(paramy) kde_func = gaussian_kde(np.array((paramx, paramy)), weights=posterior, bw_method=bw_method, fft=fft) centers_x = np.linspace(lower_x, upper_x, npoints) centers_y = np.linspace(lower_y, upper_y, npoints) points = np.array([[x, y] for x in centers_x for y in centers_y]).T kde = kde_func(points) kde = np.reshape(kde, (npoints, npoints)) # Normalize the pdf so that its maximum value is one. NB in other functions, # normalize such that area is one. kde = kde / kde.max() return _kde_posterior_pdf_2D(kde, centers_x, centers_y)
def kde_posterior_pdf(parameter, posterior, npoints=500, bin_limits=None, norm_area=False, bw_method='scott', fft=True ): r""" Kernel density estimate (KDE) of one-dimensional posterior pdf with Gaussian kernel. See e.g. `wiki <https://en.wikipedia.org/wiki/Kernel_density_estimation/>`_ and `scipy <http://docs.scipy.org/doc/scipy-0.17.0/reference/generated/scipy.stats.gaussian_kde.html>`_ for more information. .. warning:: By default, the band-width is estimated with Scott's rule of thumb. This could lead to biased/inaccurate estimates of the pdf if the parent distribution isn't approximately Gaussian. .. warning:: There is no special treatment for e.g. boundaries, which can be problematic. .. warning:: By default, posterior pdf normalized such that maximum value is one. :param parameter: Data column of parameter of interest :type parameter: numpy.ndarray :param posterior: Data column of posterior weight :type posterior: numpy.ndarray :param npoints: Number of points to evaluate PDF at :type npoints: integer :param bin_limits: Bin limits for histogram :type bin_limits: list [[xmin,xmax],[ymin,ymax]] :param norm_area: If True, normalize the pdf so that the integral over the range is one. Otherwise, normalize the pdf so that the maximum value is one. :param bw_method: Method for determining band-width or bandwidth :type bw_method: string or float :param fft: Whether to use Fast-Fourier transform :type fft: bool :returns: KDE of posterior pdf evaluated at centers :rtype: named tuple (pdf: numpy.ndarray, bin_centers: numpy.ndarray) :Example: >>> npoints = 1000 >>> kde = kde_posterior_pdf(data[2], data[0], npoints=npoints) >>> assert len(kde.pdf) == npoints >>> assert len(kde.bin_centers) == npoints """ if bin_limits: upper = max(bin_limits) lower = min(bin_limits) else: upper = max(parameter) lower = min(parameter) kde_func = gaussian_kde(parameter, weights=posterior, bw_method=bw_method, fft=fft ) centers = np.linspace(lower, upper, npoints) kde = kde_func(centers) if not norm_area: kde = kde / kde.max() return _kde_posterior_pdf_1D(kde, centers)
def kde_posterior_pdf(paramx, paramy, posterior, npoints=100, bin_limits=None, bw_method='scott'): r""" Kenerl density estimate (KDE) of two-dimensional posterior pdf with Gaussian kernel. See e.g. `wiki <https://en.wikipedia.org/wiki/Kernel_density_estimation/>`_ and `scipy <http://docs.scipy.org/doc/scipy-0.17.0/reference/generated/scipy.stats.gaussian_kde.html>`_ for more information. .. warning:: By default, the band-width is estimated with Scott's rule of thumb. This could lead to biased/inaccurate estimates of the pdf if the parent distribution isn't approximately Gaussian. .. warning:: There is no special treatment for e.g. boundaries, which can be problematic. .. warning:: Posterior pdf normalized such that maximum value is one. :param paramx: Data column of parameter x :type paramx: numpy.ndarray :param paramy: Data column of parameter y :type paramy: numpy.ndarray :param posterior: Data column of posterior weight :type posterior: numpy.ndarray :param npoints: Number of points to evaluate PDF at per dimension :type npoints: integer :param bin_limits: Bin limits for histogram :type bin_limits: list [[xmin,xmax],[ymin,ymax]] :param bw_method: Method for determining band-width or bandwidth :type bw_method: string or float :returns: KDE of posterior pdf at x and y centers :rtype: named tuple (pdf: numpy.ndarray, bin_centers_x: \ numpy.ndarray, bin_centers_y: numpy.ndarray) :Example: >>> npoints = 100 >>> pdf, x, y = kde_posterior_pdf(data[2], data[3], data[0], npoints=npoints) >>> assert len(pdf) == npoints >>> assert len(x) == npoints >>> assert len(y) == npoints """ if bin_limits: upper_x = max(bin_limits[0]) lower_x = min(bin_limits[0]) upper_y = max(bin_limits[1]) lower_y = min(bin_limits[1]) else: upper_x = max(paramx) lower_x = min(paramx) upper_y = max(paramy) lower_y = min(paramy) kde_func = gaussian_kde(np.array((paramx, paramy)), weights=posterior, bw_method=bw_method, ) centers_x = np.linspace(lower_x, upper_x, npoints) centers_y = np.linspace(lower_y, upper_y, npoints) points = np.array([[x, y] for x in centers_x for y in centers_y]).T kde = kde_func(points) kde = np.reshape(kde, (npoints, npoints)) # Normalize the pdf so that its maximum value is one. NB in other functions, # normalize such that area is one. kde = kde / kde.max() return _kde_posterior_pdf_2D(kde, centers_x, centers_y)
def kde_posterior_pdf(parameter, posterior, npoints=500, bin_limits=None, norm_area=False, bw_method='scott', fft=True): r""" Kernel density estimate (KDE) of one-dimensional posterior pdf with Gaussian kernel. See e.g. `wiki <https://en.wikipedia.org/wiki/Kernel_density_estimation/>`_ and `scipy <http://docs.scipy.org/doc/scipy-0.17.0/reference/generated/scipy.stats.gaussian_kde.html>`_ for more information. .. warning:: By default, the band-width is estimated with Scott's rule of thumb. This could lead to biased/inaccurate estimates of the pdf if the parent distribution isn't approximately Gaussian. .. warning:: There is no special treatment for e.g. boundaries, which can be problematic. .. warning:: By default, posterior pdf normalized such that maximum value is one. :param parameter: Data column of parameter of interest :type parameter: numpy.ndarray :param posterior: Data column of posterior weight :type posterior: numpy.ndarray :param npoints: Number of points to evaluate PDF at :type npoints: integer :param bin_limits: Bin limits for histogram :type bin_limits: list [[xmin,xmax],[ymin,ymax]] :param norm_area: If True, normalize the pdf so that the integral over the range is one. Otherwise, normalize the pdf so that the maximum value is one. :param bw_method: Method for determining band-width or bandwidth :type bw_method: string or float :param fft: Whether to use Fast-Fourier transform :type fft: bool :returns: KDE of posterior pdf evaluated at centers :rtype: named tuple (pdf: numpy.ndarray, bin_centers: numpy.ndarray) :Example: >>> npoints = 1000 >>> kde = kde_posterior_pdf(data[2], data[0], npoints=npoints) >>> assert len(kde.pdf) == npoints >>> assert len(kde.bin_centers) == npoints """ if bin_limits: upper = max(bin_limits) lower = min(bin_limits) else: upper = max(parameter) lower = min(parameter) kde_func = gaussian_kde(parameter, weights=posterior, bw_method=bw_method, fft=fft) centers = np.linspace(lower, upper, npoints) kde = kde_func(centers) if not norm_area: kde = kde / kde.max() return _kde_posterior_pdf_1D(kde, centers)
def kde_posterior_pdf(paramx, paramy, posterior, npoints=100, bin_limits=None, bw_method='scott'): r""" Kenerl density estimate of two-dimensional posterior pdf. .. warning:: Outliers sometimes mess up bins. So you might want to \ specify the bin limits. .. warning:: Posterior pdf normalized such that maximum value is one. :param paramx: Data column of parameter x :type paramx: numpy.ndarray :param paramy: Data column of parameter y :type paramy: numpy.ndarray :param posterior: Data column of posterior weight :type posterior: numpy.ndarray :param npoints: Number of points to evaluate PDF at per dimension :type npoints: integer :param bin_limits: Bin limits for histogram :type bin_limits: list [[xmin,xmax],[ymin,ymax]] :param bw_method: Method for determining band-width variance :type bw_method: string :returns: KDE of posterior pdf at x and y centers :rtype: named tuple (pdf: numpy.ndarray, bin_centers_x: \ numpy.ndarray, bin_centers_y: numpy.ndarray) :Example: >>> npoints = 100 >>> pdf, x, y = kde_posterior_pdf(data[2], data[3], data[0], npoints=npoints) >>> assert len(pdf) == npoints >>> assert len(x) == npoints >>> assert len(y) == npoints """ if bin_limits: upper_x = max(bin_limits[0]) lower_x = min(bin_limits[0]) upper_y = max(bin_limits[1]) lower_y = min(bin_limits[1]) else: upper_x = max(paramx) lower_x = min(paramx) upper_y = max(paramy) lower_y = min(paramy) kde_func = gaussian_kde(np.array((paramx, paramy)), weights=posterior, bw_method=bw_method, ) centers_x = np.linspace(lower_x, upper_x, npoints) centers_y = np.linspace(lower_y, upper_y, npoints) points = np.array([[x, y] for y in centers_y for x in centers_x]).T kde = kde_func(points) kde = np.reshape(kde, (npoints, npoints)).T # Normalize the pdf so that its maximum value is one. NB in other functions, # normalize such that area is one. kde = kde / kde.max() return _kde_posterior_pdf_2D(kde, centers_x, centers_y)