Ejemplo n.º 1
0
def bfs(src,dst,seeds,printLayers=False):
    # holds vertices in the current layer of the bfs
    Z = ak.unique(seeds)
    # holds the visited vertices
    V = ak.unique(Z) # holds vertices in Z to start with
    # frontiers
    F = [Z]
    while Z.size != 0:
        if printLayers:
            print("Z.size = ",Z.size," Z = ",Z)
        fZv = ak.in1d(src,Z) # find src vertex edges 
        W = ak.unique(dst[fZv]) # compress out dst vertices to match and make them unique
        Z = ak.setdiff1d(W,V) # subtract out vertices already visited
        V = ak.union1d(V,Z) # union current frontier into vertices already visited
        F.append(Z)
    return (F,V)
Ejemplo n.º 2
0
    def apply_permutation(self, perm):
        """
        Apply a permutation to an entire DataFrame.

        This may be useful if you want to unsort an DataFrame, or even to
        apply an arbitrary permutation such as the inverse of a sorting
        permutation.

        Parameters
        ----------
        perm : ak.pdarray
            A permutation array. Should be the same size as the data
            arrays, and should consist of the integers [0,size-1] in
            some order. Very minimal testing is done to ensure this
            is a permutation.

        See Also
        --------
        sort
        """

        if (perm.min() != 0) or (perm.max() != perm.size - 1):
            raise ValueError("The indicated permutation is invalid.")
        if ak.unique(perm).size != perm.size:
            raise ValueError("The indicated permutation is invalid.")
        for key, val in self.data.items():
            self[key] = self[key][perm]
Ejemplo n.º 3
0
    def _merge_all(self, array):

        idx = self.index
        callback = aku.get_callback(idx)

        for other in array:

            self._check_types(other)
            idx = aku.concatenate([idx, other.index], ordered=False)

        return Index(callback(ak.unique(idx)))
Ejemplo n.º 4
0
def conn_comp(src, dst, printCComp=False, printLayers=False):
    unvisited = ak.unique(src)
    if printCComp: print("unvisited size = ", unvisited.size, unvisited)
    components = []
    while unvisited.size > 0:
        # use lowest numbered vertex as representative vertex 
        rep_vertex = unvisited[0]
        # bfs from rep_vertex
        layers,visited = bfs(src,dst,ak.array([rep_vertex]),printLayers)
        # add verticies in component to list of components
        components.append(visited)
        # subtract out visited from unvisited vertices
        unvisited = ak.setdiff1d(unvisited,visited)
        if printCComp: print("  visited size = ", visited.size, visited)
        if printCComp: print("unvisited size = ", unvisited.size, unvisited)
    return components
Ejemplo n.º 5
0
def invert_permutation(perm):
    """ Find the inverse of a permutation array.

    Parameters
    ----------
    perm : ak.pdarray
        The permutation array.

    Returns
    -------
    ak.array
        The inverse of the permutation array.

    """
    # I think this suffers from overflow errors on large arrays.
    #if perm.sum() != (perm.size * (perm.size -1)) / 2:
    #    raise ValueError("The indicated permutation is invalid.")
    if ak.unique(perm).size != perm.size:
        raise ValueError("The array is not a permutation.")
    return ak.coargsort([perm, ak.arange(0, perm.size)])
Ejemplo n.º 6
0
def invert_permutation(perm):
    """
    Find the inverse of a permutation array.

    Parameters
    ----------
    perm : ak.pdarray
        The permutation array.

    Returns
    -------
    ak.pdarray
        The inverse of the permutation array.
    """

    # Test if the array is actually a permutation
    rng = perm.max() - perm.min()
    if (ak.unique(perm).size != perm.size) and (perm.size != rng + 1):
        raise ValueError("The array is not a permutation.")
    return ak.coargsort([perm, ak.arange(0, perm.size)])
Ejemplo n.º 7
0
def right_align(left, right):
    """ Map two arrays of sparse values to the 0-up index set implied by the right array, discarding values from left that do not appear in right.

    Parameters
    ----------
    left : pdarray
        Left-hand identifiers
    right : pdarray
        Right-hand identifiers that define the index

    Returns
    -------
    keep : pdarray, bool
        Logical index of left-hand values that survived
    aligned : (pdarray, pdarray)
        Left and right arrays with values replaced by 0-up indices
    """
    uright = ak.unique(right)
    keep = ak.in1d(left, uright)
    fleft = left[keep]
    return keep, align(fleft, right)
Ejemplo n.º 8
0
    def __init__(self,
                 segments,
                 values,
                 copy=False,
                 lengths=None,
                 grouping=None):
        """
        An array of variable-length arrays, also called a skyline array or ragged array.

        Parameters
        ----------
        segments : pdarray, int64
            Start index of each sub-array in the flattened values array
        values : pdarray
            The flattened values of all sub-arrays
        copy : bool
            If True, make a copy of the input arrays; otherwise, just store a reference.

        Returns
        -------
        SegArray
            Data structure representing an array whose elements are variable-length arrays.

        Notes
        -----
        Keyword args 'lengths' and 'grouping' are not user-facing. They are used by the
        attach method.
        """
        if not isinstance(segments, ak.pdarray) or segments.dtype != ak.int64:
            raise TypeError("Segments must be int64 pdarray")
        if not ak.is_sorted(segments) or (ak.unique(segments).size !=
                                          segments.size):
            raise ValueError("Segments must be unique and in sorted order")
        if segments.size > 0:
            if segments.min() != 0 or segments.max() >= values.size:
                raise ValueError(
                    "Segments must start at zero and be less than values.size")
        elif values.size > 0:
            raise ValueError(
                "Cannot have non-empty values with empty segments")
        if copy:
            self.segments = segments[:]
            self.values = values[:]
        else:
            self.segments = segments
            self.values = values
        self.size = segments.size
        self.valsize = values.size
        if lengths is None:
            self.lengths = self._get_lengths()
        else:
            self.lengths = lengths
        self.dtype = values.dtype
        if grouping is None:
            if self.size == 0:
                self.grouping = ak.GroupBy(ak.zeros(0, dtype=ak.int64))
            else:
                # Treat each sub-array as a group, for grouped aggregations
                self.grouping = ak.GroupBy(
                    ak.broadcast(self.segments, ak.arange(self.size),
                                 self.valsize))
        else:
            self.grouping = grouping
Ejemplo n.º 9
0
    # print out the pdarrays in the dict and their types
    print(nfDF['start'],nfDF['start'].dtype)
    print(nfDF['srcIP'],type(nfDF['srcIP'])) # Strings dosen't have a dtype?!?
    print(nfDF['dstIP'],type(nfDF['dstIP'])) # Strings dosen't have a dtype?!?
    print(nfDF['srcPort'],nfDF['srcPort'].dtype)
    print(nfDF['dstPort'],nfDF['dstPort'].dtype)
    print(nfDF)

    # print oput the symbols the server knows about
    print(ak.info(ak.AllSymbols))

    # print out how much memory is being used by the server
    print("mem used: ", ak.get_mem_used())

    # get the unique srcIP and the counts for each unique srcIP
    u,c = ak.unique(nfDF['srcIP'],return_counts=True)
    print("unique values = ", u.size,u)
    print("value counts = ", c.size,c)
    
    # get the unique dstIP and the counts for each unique dstIP
    u,c = ak.unique(nfDF['dstIP'],return_counts=True)
    print("unique values = ", u.size,u)
    print("value counts = ", c.size,c)
    
    # get the unique srcPort and the counts for each unique srcPort
    u,c = ak.unique(nfDF['srcPort'],return_counts=True)
    print("unique values = ", u.size,u)
    print("value counts = ", c.size,c)
    
    # get the unique dstPort and the counts for each unique dstPort
    u,c = ak.unique(nfDF['dstPort'],return_counts=True)
Ejemplo n.º 10
0
        inds |= (strings == word)
    assert ((inds == matches).all())
    print("in1d and iter passed")

    # argsort
    akperm = ak.argsort(strings)
    aksorted = strings[akperm].to_ndarray()
    npsorted = np.sort(test_strings)
    assert ((aksorted == npsorted).all())
    catperm = ak.argsort(cat)
    catsorted = cat[catperm].to_ndarray()
    assert ((catsorted == npsorted).all())
    print("argsort passed")

    # unique
    akuniq = ak.unique(strings)
    catuniq = ak.unique(cat)
    akset = set(akuniq.to_ndarray())
    catset = set(catuniq.to_ndarray())
    assert (akset == catset)
    # There should be no duplicates
    assert (akuniq.size == len(akset))
    npset = set(np.unique(test_strings))
    # When converted to a set, should agree with numpy
    assert (akset == npset)
    print("unique passed")

    # groupby
    g = ak.GroupBy(strings)
    gc = ak.GroupBy(cat)
    # Unique keys should be same result as ak.unique
Ejemplo n.º 11
0
ak.v = False

a = ak.arange(0,10,1)
b = a[a<5]
a = ak.linspace(0,9,10)
b = a[a<5]
print(b)

ak.v = True
ak.pdarrayIterThresh = 1000
a = ak.arange(0,10,1)
print(list(a))

ak.v = False
a = ak.randint(10,30,40)
u = ak.unique(a)
h = ak.histogram(a,bins=20)
print(a)
print(h.size,h)
print(u.size,u)

ak.v = False
a = ak.randint(10,30,50)
h = ak.histogram(a,bins=20)
print(a)
print(h)

ak.v = False
a = ak.randint(0,2,50,dtype=ak.bool)
print(a)
print(a.sum())
Ejemplo n.º 12
0
    def _merge(self, other):
        self._check_types(other)

        callback = aku.get_callback(self.index)
        idx = aku.concatenate([self.index, other.index], ordered=False)
        return Index(callback(ak.unique(idx)))