Example #1
0
    def argsort_nearest(self, target_position, target_site_family=None):
        """Return an ndarray of site indices, sorted by distance from the target

        Parameters
        ----------
        target_position : array_like
        target_site_family : int
            Look for a specific sublattice site. By default any will do.

        Returns
        -------
        np.ndarray

        Examples
        --------
        >>> sites = Sites(([0, 1, 1.1], [0, 0, 0], [0, 0, 0]), [0, 1, 0])
        >>> np.all(sites.argsort_nearest([1, 0, 0]) == [1, 2, 0])
        True
        >>> np.all(sites.argsort_nearest([1, 0, 0], target_site_family=0) == [2, 0, 1])
        True
        """
        distances = self.distances(target_position)
        if target_site_family is None:
            return np.argsort(distances)
        else:
            return ma.argsort(
                ma.array(distances, mask=(self.ids != target_site_family)))
Example #2
0
    def predict(self, mu, sigma, Ys, model=None):
        #calculating var
        s = sigma + self.sigma
        alpha = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8000, 0.9000])
        q = np.outer(np.sqrt(2 * s), erfinv(2 * alpha - 1)) + mu

        z = self.warp(model.Y)[0]
        I = argsort(z, axis=0)
        sortz = sort(z, axis=0)
        sortt = model.Y[I]

        quant = self.warpinv(q, self._get_initial_points(q, sortz, sortt), 100)
        var = np.square((quant[:, 8] - (quant[:, 0])) / 4)

        #calculating mu
        H = np.array([7.6e-07, 0.0013436, 0.0338744, 0.2401386, 0.6108626, 0.6108626, 0.2401386, 0.0338744, 0.0013436, 7.6e-07])
        quard = np.array([-3.4361591, -2.5327317, -1.7566836, -1.0366108, -0.3429013, 0.3429013, 1.0366108, 1.7566836, 2.5327317, 3.4361591])
        mu_quad = np.outer(np.sqrt(2 * s), quard) + mu
        mean = self.warpinv(mu_quad, self._get_initial_points(mu_quad, sortz, sortt), 100)
        mean = mdot(mean, H[:, np.newaxis]) / np.sqrt(math.pi)
        lpd = None
        if not (Ys is None):
            ts, w = self.warp(Ys)
            lpd = -0.5*np.log(2*math.pi*s) - 0.5 * np.square(ts-mu)/s + np.log(w)
        return mean, var[:, np.newaxis], lpd[:, 0][:, np.newaxis]
Example #3
0
    def argsort_nearest(self, target_position, target_sublattice=None):
        """Return an ndarray of site indices, sorted by distance from the target

        Parameters
        ----------
        target_position : array_like
        target_sublattice : int
            Look for a specific sublattice site. By default any will do.

        Returns
        -------
        np.ndarray

        Examples
        --------
        >>> sites = Sites(([0, 1, 1.1], [0, 0, 0], [0, 0, 0]), [0, 1, 0])
        >>> np.all(sites.argsort_nearest([1, 0, 0]) == [1, 2, 0])
        True
        >>> np.all(sites.argsort_nearest([1, 0, 0], target_sublattice=0) == [2, 0, 1])
        True
        """
        distances = self.distances(target_position)
        if target_sublattice is None:
            return np.argsort(distances)
        else:
            target_sublattice = self._translate_sublattice(target_sublattice)
            return ma.argsort(ma.array(distances, mask=(self.sublattices != target_sublattice)))
Example #4
0
 def aggrade_front(self, grid, tstep, source_cells_Qs, elev, SL): #ensure Qs and tstep units match!
     
     self.total_sed_supplied_in_tstep = source_cells_Qs*tstep
     self.Qs_sort_order = np.argsort(source_cells_Qs)[::-1] #descending order
     self.Qs_sort_order = self.Qs_sort_order[:np.count_nonzero(self.Qs_sort_order>0)]
     for i in self.Qs_sort_order:
         subaerial_nodes = elev>=SL
         subsurface_elev_array = ma.array(elev, subaerial_nodes)
         xy_tuple = (grid.node_x[i], grid.node_y[i])
         distance_map = grid.get_distances_of_nodes_to_point(xy_tuple)
         loop_number = 0
         closest_node_list = ma.argsort(ma.masked_array(distance_map, mask=subsurface_elev_array.mask))
         smooth_cone_elev_from_apex = subsurface_elev_array[i]-distance_map*self.tan_repose_angle
         while 1:
             filled_all_cells_flag = 0
             accom_space_at_controlling_node = SL - subsurface_elev_array[closest_node_list[loop_number]]
             new_max_cone_surface_elev = smooth_cone_elev_from_apex + accom_space_at_controlling_node
             subsurface_elev_array.mask = (elev>=SL or new_max_cone_surface_elev<elev)
             depth_of_accom_space = new_max_cone_surface_elev - subsurface_elev_array
             accom_depth_order = ma.argsort(depth_of_accom_space)[::-1]
             #Vectorised method to calc fill volumes:
             area_to_fill = ma.cumsum(grid.cellarea[accom_depth_order])
             differential_depths = ma.empty_like(depth_of_accom_space)
             differential_depths[:-1] = depth_of_accom_space[accom_depth_order[:-1]] - depth_of_accom_space[accom_depth_order[1:]]
             differential_depths[-1] = depth_of_accom_space[accom_depth_order[-1]]
             incremental_volumes = ma.cumsum(differential_depths*area_to_fill)
             match_position_of_Qs_in = ma.searchsorted(incremental_volumes, self.total_sed_supplied_in_tstep[i])
             try:
                 depths_to_add = depth_of_accom_space-depth_of_accom_space[match_position_of_Qs_in]
             except:
                 depths_to_add = depth_of_accom_space-depth_of_accom_space[match_position_of_Qs_in-1]
                 filled_all_cells_flag = 1
             depths_to_add = depths_to_add[ma.where(depths_to_add>=0)]
             if not filled_all_cells_flag:
                 depths_to_add += (self.total_sed_supplied_in_tstep[i] - incremental_volumes[match_position_of_Qs_in-1])/area_to_fill[match_position_of_Qs_in-1]
                 subsurface_elev_array[accom_depth_order[len(depths_to_add)]] = depths_to_add
                 self.total_sed_supplied_in_tstep[i] = 0
                 break
             else:
                 subsurface_elev_array[accom_depth_order] = depths_to_add
                 self.total_sed_supplied_in_tstep[i] -= incremental_volumes[-1]
                 loop_number += 1
     
     return elev
Example #5
0
def top_words_in_doc(doc_term_frequency: ndarray,
                     features: list,
                     row_id: int,
                     top_n: int = 20) -> List[Tuple[str, str]]:
    """
    Top TF IDF features in specific document (matrix row).

    :param doc_term_frequency: Document-term frequency matrix.
    :param features: Feature names for the document term matrix.
    :param row_id: Row index of the document.
    :param top_n: Top number of words to display on wordcloud.
    :return: List[(word, tf_idf)]
    """
    row = squeeze(doc_term_frequency[row_id].toarray())
    top_n_ids = argsort(row)[::-1][:top_n]
    return [(features[i], row[i]) for i in top_n_ids]
Example #6
0
    def predict(self, mu, sigma, Ys, model=None):
        # calculating var
        s = sigma + self.sigma
        alpha = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8000, 0.9000])
        q = np.outer(np.sqrt(2 * s), erfinv(2 * alpha - 1)) + mu

        z = self.warp(model.Y)[0]
        I = argsort(z, axis=0)
        sortz = sort(z, axis=0)
        sortt = model.Y[I]

        quant = self.warpinv(q, self._get_initial_points(q, sortz, sortt), 100)
        var = np.square((quant[:, 8] - (quant[:, 0])) / 4)

        # calculating mu
        H = np.array(
            [7.6e-07, 0.0013436, 0.0338744, 0.2401386, 0.6108626, 0.6108626, 0.2401386, 0.0338744, 0.0013436, 7.6e-07]
        )
        quard = np.array(
            [
                -3.4361591,
                -2.5327317,
                -1.7566836,
                -1.0366108,
                -0.3429013,
                0.3429013,
                1.0366108,
                1.7566836,
                2.5327317,
                3.4361591,
            ]
        )
        mu_quad = np.outer(np.sqrt(2 * s), quard) + mu
        mean = self.warpinv(mu_quad, self._get_initial_points(mu_quad, sortz, sortt), 100)
        mean = mdot(mean, H[:, np.newaxis]) / np.sqrt(math.pi)
        lpd = None
        if not (Ys is None):
            ts, w = self.warp(Ys)
            lpd = -0.5 * np.log(2 * math.pi * s) - 0.5 * np.square(ts - mu) / s + np.log(w)
        return mean, var[:, np.newaxis], lpd[:, 0][:, np.newaxis]
Example #7
0
def probAdjustEquil(binProb,rates,uncert,threshold=0.0,fullCalcClust=False,fullCalcBins=False):
    """This function adjusts bin pops in binProb using rates and uncert matrices
    fullCalcBins --> True for weighted avg, False for simple calc
    fullCalcClust --> True for weighted avg, False for simple calc
    threshold --> minimum weight (relative to max) for another value to be averaged
            only matters if fullCalcBins == True (or later perhaps if fullCalcClust == True)
    """
    
    
    # Check that rate matrix is square
    Ni,Nj = rates.shape
    if Ni != Nj:
        print('\nWARNING: Not a square matrix!\n')

    zi = np.where(binProb == 0.0)[0]  # indices of bins with zero probability
    
    rates_uncert = UncertMath.UncertContainer(rates,rates - uncert,rates + uncert)
    
    # STEP 1a: Create matrix of ratios of probabilities based on DIRECT estimates
    # that is, ij element is p_i / p_j = k_ji / k_ij
    
    ratios_direct = rates_uncert.transpose() / rates_uncert  

    # STEP 1b: Create averaged matrix of ratios of probabilities based on both direct and indirect estimates
    # Indirect means '3rd bin' estimates: p_i / p_j = ( k_ki / k_ik ) ( k_jk / k_kj )
    # Turns out this is not helpful, so generally set fullCalcBins = 0 
    if fullCalcBins:
        # Calculate indirect ratios using Einstein Summation convention where
        # ratios_indirect_kij  = ( k_ki / k_ik ) ( k_jk / k_kj ) = ratios_direct_ik * ratios_direct_kj
        ri_vals = np.einsum('ik,kj->kij',ratios_direct.vals,ratios_direct.vals)
        ri_min = np.einsum('ik,kj->kij',ratios_direct.dmin,ratios_direct.dmin)
        ri_max = np.einsum('ik,kj->kij',ratios_direct.dmax,ratios_direct.dmax)
        ratios_indirect = UncertMath.UncertContainer(ri_vals,ri_min,ri_max,mask=ratios_direct.vals.mask)

        # Threshold indirect ratios 
        ti = ratios_indirect.wt < ratios_direct * threshold
        ratios_indirect.mask = ti
        ratios_indirect.update_mask()

        ratios_indirect.concatenate(ratios_direct,axis=0) 
        ratios_average = ratios_indirect.weighted_average(axis=0)
 
    else:
        ratios_average = ratios_direct.weighted_average(axis=0,expaxis=0)
    

    # STEP 2: Form clusters

    # STEP 2a: Sort probability ratios based on uncertainty
    # Sort uncertainties of ratios_average subject to the convention that p_i < p_j
    
    i,j = np.triu_indices(Ni,1) # indices of ij pairs where i != j

    # Remove pairs that include a bin that has zero probability
    nzi = (binProb[i] != 0.0) & (binProb[j] != 0.0)
    i = i[nzi]
    j = j[nzi]

    vals = ma.vstack((ratios_average.vals[i,j],ratios_average.vals[j,i]))
    ias = ma.argsort(vals,axis=0,fill_value=np.inf)
    
    ordered_ind = np.vstack((i,j))
    flip_ind = np.nonzero(ias[0,:]) # Find pairs in which to select ji rather than ij
    ordered_ind[:,flip_ind[0]] = ordered_ind[:,flip_ind[0]][::-1]
    
    iind = ordered_ind[0,:]
    jind = ordered_ind[1,:]
    uncertij = ratios_average.uncert[iind,jind] # Get the uncert for ij pairs

    count = uncertij.count() # Count of the unmasked uncertainties
    ias = ma.argsort(uncertij,fill_value=np.inf) # Get the indices that would sort uncertij
    iind = iind[ias[:count]] # Sort the indices excluding masked/undefined values
    jind = jind[ias[:count]]


    # STEP 2b: Create ClusterList object and cluster bins
    clusters = BinCluster.ClusterList(ratios_average,Ni)

    if fullCalcClust:
        clusters.join((iind,jind))
    else:
        clusters.join_simple((iind,jind))

    total_prob = 0.0  # total probability in all clusters
    for cid in clusters.cluster_contents:
        binlist = list(clusters.cluster_contents[cid])
        if len(binlist):
            prob_cluster = binProb[binlist].sum()
            total_prob += prob_cluster

            binProb[binlist] = prob_cluster * clusters.bin_data[binlist].vals

    binProb[zi] = 0.0 # re-zero bins that previously had zero prob
    #for bi,p in enumerate(binProb):
    #    print('bin: {} -- {}'.format(bi,p))
    print('.........Total Probability: {}'.format(binProb.sum()))
Example #8
0
 def closest_neighbours(self, n=3):
     """For each boid get a list of indices of nearest neighbours that fall inside a visibility range"""
     for x in self.rel_dist:
         idx, = ma.where(x < self.max_vis)
         yield idx[ma.argsort(x[idx])][:n]
Example #9
0
def probAdjustEquil(binProb,rates,uncert,threshold=0.0,fullCalcClust=False,fullCalcBins=False):
    """This function adjusts bin pops in binProb using rates and uncert matrices
    fullCalcBins --> True for weighted avg, False for simple calc
    fullCalcClust --> True for weighted avg, False for simple calc
    threshold --> minimum weight (relative to max) for another value to be averaged
            only matters if fullCalcBins == True (or later perhaps if fullCalcClust == True)
    """
    
    
    # Check that rate matrix is square
    Ni,Nj = rates.shape
    if Ni != Nj:
        print('\nWARNING: Not a square matrix!\n')

    zi = np.where(binProb == 0.0)[0]  # indices of bins with zero probability
    
    rates_uncert = UncertMath.UncertContainer(rates,rates - uncert,rates + uncert)
    
    # STEP 1a: Create matrix of ratios of probabilities based on DIRECT estimates
    # that is, ij element is p_i / p_j = k_ji / k_ij
    
    ratios_direct = rates_uncert.transpose() / rates_uncert  

    # STEP 1b: Create averaged matrix of ratios of probabilities based on both direct and indirect estimates
    # Indirect means '3rd bin' estimates: p_i / p_j = ( k_ki / k_ik ) ( k_jk / k_kj )
    # Turns out this is not helpful, so generally set fullCalcBins = 0 
    if fullCalcBins:
        # Calculate indirect ratios using Einstein Summation convention where
        # ratios_indirect_kij  = ( k_ki / k_ik ) ( k_jk / k_kj ) = ratios_direct_ik * ratios_direct_kj
        ri_vals = np.einsum('ik,kj->kij',ratios_direct.vals,ratios_direct.vals)
        ri_min = np.einsum('ik,kj->kij',ratios_direct.dmin,ratios_direct.dmin)
        ri_max = np.einsum('ik,kj->kij',ratios_direct.dmax,ratios_direct.dmax)
        ratios_indirect = UncertMath.UncertContainer(ri_vals,ri_min,ri_max,mask=ratios_direct.vals.mask)

        # Threshold indirect ratios 
        ti = ratios_indirect.wt < ratios_direct * threshold
        ratios_indirect.mask = ti
        ratios_indirect.update_mask()

        ratios_indirect.concatenate(ratios_direct,axis=0) 
        ratios_average = ratios_indirect.weighted_average(axis=0)
 
    else:
        ratios_average = ratios_direct.weighted_average(axis=0,expaxis=0)
    

    # STEP 2: Form clusters

    # STEP 2a: Sort probability ratios based on uncertainty
    # Sort uncertainties of ratios_average subject to the convention that p_i < p_j
    
    i,j = np.triu_indices(Ni,1) # indices of ij pairs where i != j

    # Remove pairs that include a bin that has zero probability
    nzi = (binProb[i] != 0.0) & (binProb[j] != 0.0)
    i = i[nzi]
    j = j[nzi]

    vals = ma.vstack((ratios_average.vals[i,j],ratios_average.vals[j,i]))
    ias = ma.argsort(vals,axis=0,fill_value=np.inf)
    
    ordered_ind = np.vstack((i,j))
    flip_ind = np.nonzero(ias[0,:]) # Find pairs in which to select ji rather than ij
    ordered_ind[:,flip_ind[0]] = ordered_ind[:,flip_ind[0]][::-1]
    
    iind = ordered_ind[0,:]
    jind = ordered_ind[1,:]
    uncertij = ratios_average.uncert[iind,jind] # Get the uncert for ij pairs

    count = uncertij.count() # Count of the unmasked uncertainties
    ias = ma.argsort(uncertij,fill_value=np.inf) # Get the indices that would sort uncertij
    iind = iind[ias[:count]] # Sort the indices excluding masked/undefined values
    jind = jind[ias[:count]]


    # STEP 2b: Create ClusterList object and cluster bins
    clusters = BinCluster.ClusterList(ratios_average,Ni)

    if fullCalcClust:
        clusters.join((iind,jind))
    else:
        clusters.join_simple((iind,jind))

    total_prob = 0.0  # total probability in all clusters
    for cid in clusters.cluster_contents:
        binlist = list(clusters.cluster_contents[cid])
        if len(binlist):
            prob_cluster = binProb[binlist].sum()
            total_prob += prob_cluster

            binProb[binlist] = prob_cluster * clusters.bin_data[binlist].vals

    binProb[zi] = 0.0 # re-zero bins that previously had zero prob
    #for bi,p in enumerate(binProb):
    #    print('bin: {} -- {}'.format(bi,p))
    print('.........Total Probability: {}'.format(binProb.sum()))