Python unique 예제들, numpy.lib.arraysetops.unique Python 예제들

예제 #1

0

파일 보기

파일: interface.py 프로젝트: didmar/blokus3d-python

def findCoords(gs, candidates=None):
    if candidates == None:
        candidates=[]
        # List all the possible z-level (heights)
        zRange = list(takewhile(lambda x : x < gs.boardSize[2], \
                 sort(unique(flatten(gs.heightMap())))))
        if zRange==[]:
            print "Board is full, cannot find legal coordinates !"
            return None
    else:
        zRange = sort(unique(map(third,candidates)))
    # Do we have a choice on the z-level ?
    if len(zRange)==1:
        z = zRange[0]
    else:
        print "\n",gs.boardToASCII(markedCubes=candidates)
        # Discard the z height max
        if zRange[-1]==gs.boardSize[2]:
            zRange = zRange[:-1]
        z = -1+input("Which z-level ? (%d-%d)\n> " \
                     % (zRange[0]+1,zRange[-1]+1))
    candidates = filter(lambda c: c[2]==z, candidates)
    if len(candidates)>1:
        # Display the z-level with xy coordinates as letter-number
        print '    '+''.join(chr(97+x) for x in xrange(gs.boardSize[0]))
        print '   +'+'-'*gs.boardSize[0]
        lines = gs.boardToASCII(zRange=[z],markedCubes=candidates)\
                .split('\n')
        for y in xrange(gs.boardSize[1]):
            print '%s |%s' % (str(y+1).zfill(2),lines[y])
        print "\n"
        xy = raw_input("Which xy coordinates ?\n> ")
        return array([ord(xy[0])-97,int(xy[1:])-1,z])
    else:
        return candidates[0]

예제 #2

0

파일 보기

    def test_unique_axis_zeros(self):
        # issue 15559
        single_zero = np.empty(shape=(2, 0), dtype=np.int8)
        uniq, idx, inv, cnt = unique(single_zero, axis=0, return_index=True,
                                     return_inverse=True, return_counts=True)

        # there's 1 element of shape (0,) along axis 0
        assert_equal(uniq.dtype, single_zero.dtype)
        assert_array_equal(uniq, np.empty(shape=(1, 0)))
        assert_array_equal(idx, np.array([0]))
        assert_array_equal(inv, np.array([0, 0]))
        assert_array_equal(cnt, np.array([2]))

        # there's 0 elements of shape (2,) along axis 1
        uniq, idx, inv, cnt = unique(single_zero, axis=1, return_index=True,
                                     return_inverse=True, return_counts=True)

        assert_equal(uniq.dtype, single_zero.dtype)
        assert_array_equal(uniq, np.empty(shape=(2, 0)))
        assert_array_equal(idx, np.array([]))
        assert_array_equal(inv, np.array([]))
        assert_array_equal(cnt, np.array([]))

        # test a "complicated" shape
        shape = (0, 2, 0, 3, 0, 4, 0)
        multiple_zeros = np.empty(shape=shape)
        for axis in range(len(shape)):
            expected_shape = list(shape)
            if shape[axis] == 0:
                expected_shape[axis] = 0
            else:
                expected_shape[axis] = 1

            assert_array_equal(unique(multiple_zeros, axis=axis),
                               np.empty(shape=expected_shape))

예제 #3

0

파일 보기

파일: interface.py 프로젝트: didmar/blokus3d-python

def findMove(gs, askApply=True):
    moves = gs.legalMoves()
    if len(moves)==1:
        print "Only one move possible :\n", moveToASCII(moves[0])
    else:
        ok = False
        while not ok:
            # First, pick a block
            blkId = findBlock(gs,candidates=unique(map(snd,moves)))
            assert blkId != None # since we checked that len(lm) was > 0
            # Filter the moves that have the selected block id
            moves = filter(lambda m : m[1]==blkId, moves)
            # Then, find the coordinates on the board
            coords = findCoords(gs,candidates=unik(map(fst,moves)))
            # Filter the moves that have the selected coordinates
            moves = filter(lambda m : (m[0]==coords).all(), moves)
            # Finally, find its variation
            blkVarId = findVariation(gs,blkId, \
                        candidates=unique(map(third,moves)))
            move = (coords,blkId,blkVarId)
            print "You have selected :\n", moveToASCII(moves[0])
            print "Is this the move you wanted ? [Y/n]"
            if raw_input("") in ["n","N"]:
                # Will start again with all legal moves possibles
                moves = gs.legalMoves()
            else:
                ok = True
    if askApply:
        print "Do you want to play this move over the current gamestate ?",\
              " [Y/n]"
        if raw_input("") not in ["n","N"]:
            gs.playMove(move)
    return move

예제 #4

0

파일 보기

 def __init__(self, string):
     #TODO: its_terminal
     from MyModule.funcs import its_terminal, its_variable
     # all lower letters are terminal
     self.terminals = [c for c in string if c.islower()]
     self.terminals = list(unique(self.terminals))
     # all upper letters are variable
     self.variables = [c for c in string if c.isupper()]
     self.variables = list(unique(self.variables))
     self.form = string

예제 #5

0

파일 보기

파일: production.py 프로젝트: Hame-daani/TLM-project

    def __init__(self, string):
        # split string from '->' and create two wing with each side
        left_side, right_side = string.split("->")
        self.left_wing = Wing(left_side)
        self.right_wing = Wing(right_side)
        # find uniqe terminals in both wing
        self.terminals = list(
            unique(self.right_wing.terminals + self.left_wing.terminals))
        # find uniqe variables in both side
        self.variables = list(
            unique(self.right_wing.variables + self.left_wing.variables))

        self.form = str(self)

예제 #6

0

파일 보기

파일: bull_shit_hack_because_imports_dont_work_WTF.py 프로젝트: SRHerzog/ut

def print_unique_counts(d):
    column_list = d.columns.tolist()
    print "number of rows: {}".format(len(d[column_list[0]]))
    print ""
    for c in column_list:
        print "number of unique {}: {}".format(c,
                                               len(arraysetops.unique(d[c])))

예제 #7

0

파일 보기

파일: utils.py 프로젝트: didmar/blokus3d-python

def uniqueIdx(L):
    """
    Find indexes of unique elements in L
    based on their string representation
    (works both for cubes and blocks)
    """
    return list(snd(unique([str(x) for x in L], return_index=True)))

예제 #8

0

파일 보기

파일: boundary.py 프로젝트: basic-ph/feat

 def __init__(self, name, mesh, dof, value):
     self.name = name
     self.tag = mesh.field_data[name][0]
     self.dim = mesh.field_data[name][1]
     self.local_dof = np.asarray(dof)
     self.value = value
     if self.dim == 0:
         # array containing indices of elements in the boundary
         self.elements = np.nonzero(
             mesh.cell_data_dict["gmsh:physical"]["vertex"] == self.tag)[0]
         # array containing indices of nodes in the boundary
         self.nodes = unique(mesh.cells_dict["vertex"][self.elements])
     elif self.dim == 1:
         self.elements = np.nonzero(
             mesh.cell_data_dict["gmsh:physical"]["line"] == self.tag)[0]
         self.nodes = unique(mesh.cells_dict["line"][self.elements])

예제 #9

0

파일 보기

def s_test_function(
    bin_gdf: GeoDataFrame,
    t_yrs: float,
    n_iters: int,
    likelihood_fn: str,
    prospective: bool = False,
    critical_pct: float = 0.25,
    not_modeled_likelihood: float = 0.0,
    append_results: bool = False,
):
    N_obs = len(get_total_obs_eqs(bin_gdf, prospective=prospective))
    N_pred = get_model_annual_eq_rate(bin_gdf) * t_yrs
    N_norm = N_obs / N_pred

    bin_like_cfg = {
        "investigation_time": t_yrs,
        "likelihood_fn": likelihood_fn,
        "not_modeled_likelihood": not_modeled_likelihood,
        "n_iters": n_iters,
    }
    bin_likes = s_test_gdf_series(bin_gdf, bin_like_cfg, N_norm)

    obs_likes = np.array([bl[0] for bl in bin_likes])
    stoch_likes = np.vstack([bl[1] for bl in bin_likes]).T
    bad_bins = list(unique(list(chain(*[bl[2] for bl in bin_likes]))))

    obs_like_total = sum(obs_likes)
    stoch_like_totals = np.sum(stoch_likes, axis=1)

    if append_results:
        bin_pcts = []
        for i, obs_like in enumerate(obs_likes):
            stoch_like = stoch_likes[:, i]
            bin_pct = len(stoch_like[stoch_like <= obs_like]) / n_iters
            bin_pcts.append(bin_pct)
        bin_gdf["S_bin_pct"] = bin_pcts

        bin_gdf["N_model"] = bin_gdf.SpacemagBin.apply(
            lambda x: get_n_eqs_from_mfd(x.get_rupture_mfd()) * t_yrs)

        bin_gdf["N_obs"] = bin_gdf.SpacemagBin.apply(
            lambda x: get_n_eqs_from_mfd(x.observed_earthquakes))

    pctile = (len(stoch_like_totals[stoch_like_totals <= obs_like_total]) /
              n_iters)

    test_pass = True if pctile >= critical_pct else False
    test_res = "Pass" if test_pass else "Fail"

    test_result = {
        "critical_pct": critical_pct,
        "percentile": pctile,
        "test_pass": bool(test_pass),
        "test_res": test_res,
        "bad_bins": bad_bins,
    }

    return test_result

예제 #10

0

파일 보기

def calc_gini_group_score(group):
    unique_labels = unique(group)
    size = len(group)
    scores = 0.0
    if size == 0:
        return 0
    for label in unique_labels:
        p = 0.0
        label_cnt = len(group[group == label])
        p = label_cnt / size
        scores += p * p
    return 1 - scores

예제 #11

0

파일 보기

파일: test_arraysetops.py 프로젝트: ChrisBarker-NOAA/numpy

    def test_unique_axis(self):
        types = []
        types.extend(np.typecodes['AllInteger'])
        types.extend(np.typecodes['AllFloat'])
        types.append('datetime64[D]')
        types.append('timedelta64[D]')
        types.append([('a', int), ('b', int)])
        types.append([('a', int), ('b', float)])

        for dtype in types:
            self._run_axis_tests(dtype)

        msg = 'Non-bitwise-equal booleans test failed'
        data = np.arange(10, dtype=np.uint8).reshape(-1, 2).view(bool)
        result = np.array([[False, True], [True, True]], dtype=bool)
        assert_array_equal(unique(data, axis=0), result, msg)

        msg = 'Negative zero equality test failed'
        data = np.array([[-0.0, 0.0], [0.0, -0.0], [-0.0, 0.0], [0.0, -0.0]])
        result = np.array([[-0.0, 0.0]])
        assert_array_equal(unique(data, axis=0), result, msg)

예제 #12

0

파일 보기

    def test_unique_axis(self):
        types = []
        types.extend(np.typecodes['AllInteger'])
        types.extend(np.typecodes['AllFloat'])
        types.append('datetime64[D]')
        types.append('timedelta64[D]')
        types.append([('a', int), ('b', int)])
        types.append([('a', int), ('b', float)])

        for dtype in types:
            self._run_axis_tests(dtype)

        msg = 'Non-bitwise-equal booleans test failed'
        data = np.arange(10, dtype=np.uint8).reshape(-1, 2).view(bool)
        result = np.array([[False, True], [True, True]], dtype=bool)
        assert_array_equal(unique(data, axis=0), result, msg)

        msg = 'Negative zero equality test failed'
        data = np.array([[-0.0, 0.0], [0.0, -0.0], [-0.0, 0.0], [0.0, -0.0]])
        result = np.array([[-0.0, 0.0]])
        assert_array_equal(unique(data, axis=0), result, msg)

예제 #13

0

파일 보기

파일: test_arraysetops.py 프로젝트: hlord2000/gt_scheduling_bot

        def check_all(a, b, i1, i2, c, dt):
            base_msg = "check {0} failed for type {1}"

            msg = base_msg.format("values", dt)
            v = unique(a)
            assert_array_equal(v, b, msg)

            msg = base_msg.format("return_index", dt)
            v, j = unique(a, True, False, False)
            assert_array_equal(v, b, msg)
            assert_array_equal(j, i1, msg)

            msg = base_msg.format("return_inverse", dt)
            v, j = unique(a, False, True, False)
            assert_array_equal(v, b, msg)
            assert_array_equal(j, i2, msg)

            msg = base_msg.format("return_counts", dt)
            v, j = unique(a, False, False, True)
            assert_array_equal(v, b, msg)
            assert_array_equal(j, c, msg)

            msg = base_msg.format("return_index and return_inverse", dt)
            v, j1, j2 = unique(a, True, True, False)
            assert_array_equal(v, b, msg)
            assert_array_equal(j1, i1, msg)
            assert_array_equal(j2, i2, msg)

            msg = base_msg.format("return_index and return_counts", dt)
            v, j1, j2 = unique(a, True, False, True)
            assert_array_equal(v, b, msg)
            assert_array_equal(j1, i1, msg)
            assert_array_equal(j2, c, msg)

            msg = base_msg.format("return_inverse and return_counts", dt)
            v, j1, j2 = unique(a, False, True, True)
            assert_array_equal(v, b, msg)
            assert_array_equal(j1, i2, msg)
            assert_array_equal(j2, c, msg)

            msg = base_msg.format(
                ("return_index, return_inverse " "and return_counts"), dt
            )
            v, j1, j2, j3 = unique(a, True, True, True)
            assert_array_equal(v, b, msg)
            assert_array_equal(j1, i1, msg)
            assert_array_equal(j2, i2, msg)
            assert_array_equal(j3, c, msg)

예제 #14

0

파일 보기

 def __init__(self, file_path):
     matlab_data = loadmat(file_path)
     self.train_data = matlab_data['dataset'][0][0][0][0][0][0]
     self.train_data_labels = matlab_data['dataset'][0][0][0][0][0][
         1].flatten()
     self.train_data_labels = self.train_data_labels - np.min(
         self.train_data_labels)
     self.num_classes = len(unique(self.train_data_labels))
     self.data_dim = self.train_data.shape[1]
     self.test_data = matlab_data['dataset'][0][0][1][0][0][0]
     self.test_data_labels = matlab_data['dataset'][0][0][1][0][0][
         1].flatten()
     self.test_data_labels = self.test_data_labels - np.min(
         self.test_data_labels)

예제 #15

0

파일 보기

파일: test_arraysetops.py 프로젝트: ymarfoq/outilACVDesagregation

        def check_all(a, b, i1, i2, c, dt):
            base_msg = 'check {0} failed for type {1}'

            msg = base_msg.format('values', dt)
            v = unique(a)
            assert_array_equal(v, b, msg)

            msg = base_msg.format('return_index', dt)
            v, j = unique(a, 1, 0, 0)
            assert_array_equal(v, b, msg)
            assert_array_equal(j, i1, msg)

            msg = base_msg.format('return_inverse', dt)
            v, j = unique(a, 0, 1, 0)
            assert_array_equal(v, b, msg)
            assert_array_equal(j, i2, msg)

            msg = base_msg.format('return_counts', dt)
            v, j = unique(a, 0, 0, 1)
            assert_array_equal(v, b, msg)
            assert_array_equal(j, c, msg)

            msg = base_msg.format('return_index and return_inverse', dt)
            v, j1, j2 = unique(a, 1, 1, 0)
            assert_array_equal(v, b, msg)
            assert_array_equal(j1, i1, msg)
            assert_array_equal(j2, i2, msg)

            msg = base_msg.format('return_index and return_counts', dt)
            v, j1, j2 = unique(a, 1, 0, 1)
            assert_array_equal(v, b, msg)
            assert_array_equal(j1, i1, msg)
            assert_array_equal(j2, c, msg)

            msg = base_msg.format('return_inverse and return_counts', dt)
            v, j1, j2 = unique(a, 0, 1, 1)
            assert_array_equal(v, b, msg)
            assert_array_equal(j1, i2, msg)
            assert_array_equal(j2, c, msg)

            msg = base_msg.format(('return_index, return_inverse '
                                   'and return_counts'), dt)
            v, j1, j2, j3 = unique(a, 1, 1, 1)
            assert_array_equal(v, b, msg)
            assert_array_equal(j1, i1, msg)
            assert_array_equal(j2, i2, msg)
            assert_array_equal(j3, c, msg)

예제 #16

0

파일 보기

        def check_all(a, b, i1, i2, c, dt):
            base_msg = 'check {0} failed for type {1}'

            msg = base_msg.format('values', dt)
            v = unique(a)
            assert_array_equal(v, b, msg)

            msg = base_msg.format('return_index', dt)
            v, j = unique(a, True, False, False)
            assert_array_equal(v, b, msg)
            assert_array_equal(j, i1, msg)

            msg = base_msg.format('return_inverse', dt)
            v, j = unique(a, False, True, False)
            assert_array_equal(v, b, msg)
            assert_array_equal(j, i2, msg)

            msg = base_msg.format('return_counts', dt)
            v, j = unique(a, False, False, True)
            assert_array_equal(v, b, msg)
            assert_array_equal(j, c, msg)

            msg = base_msg.format('return_index and return_inverse', dt)
            v, j1, j2 = unique(a, True, True, False)
            assert_array_equal(v, b, msg)
            assert_array_equal(j1, i1, msg)
            assert_array_equal(j2, i2, msg)

            msg = base_msg.format('return_index and return_counts', dt)
            v, j1, j2 = unique(a, True, False, True)
            assert_array_equal(v, b, msg)
            assert_array_equal(j1, i1, msg)
            assert_array_equal(j2, c, msg)

            msg = base_msg.format('return_inverse and return_counts', dt)
            v, j1, j2 = unique(a, False, True, True)
            assert_array_equal(v, b, msg)
            assert_array_equal(j1, i2, msg)
            assert_array_equal(j2, c, msg)

            msg = base_msg.format(('return_index, return_inverse '
                                   'and return_counts'), dt)
            v, j1, j2, j3 = unique(a, True, True, True)
            assert_array_equal(v, b, msg)
            assert_array_equal(j1, i1, msg)
            assert_array_equal(j2, i2, msg)
            assert_array_equal(j3, c, msg)

예제 #17

0

파일 보기

def check_array_type(X, y, type):
    if y is not None:
        check_X_y(X, y)
        if type in ['cd', 'd']:
            yu, yc = at.unique(y, return_counts=True)
            if sum(yc) != len(y): np.testing.assert_equal(y.dtype, np.int)
            elif y.dtype != np.int: y = np.asarray(y, np.int)
        if type == 'c': np.testing.assert_equal(y.dtype, np.float)
        if type in ['c', 'd']:
            X = np.hstack((X, y.reshape((len(y), 1))))
            y = None
    else:
        check_array(X)
        if type == 'c': np.testing.assert_equal(X.dtype, np.float)
        if type == 'd': np.testing.assert_equal(X.dtype, np.int)
        if type == 'cd': raise ValueError("y has to be defined for type cd")
    return X, y

예제 #18

0

파일 보기

파일: run_clustering.py 프로젝트: pierresegonne/SGGM

def plot_points(
    ax: Axes,
    x: Union[torch.Tensor, np.ndarray],
    y: Union[torch.Tensor, np.ndarray],
    cmap: List[Tuple],
):
    for i_c, c in enumerate(unique(y)):
        ax.plot(
            x[:, 0][y == c],
            x[:, 1][y == c],
            "o",
            markersize=3.5,
            markerfacecolor=(*cmap[i_c], 0.95),
            markeredgewidth=1.2,
            markeredgecolor=(*colours_rgb["white"], 0.5),
            label=f"Class {c}",
        )
    return ax

예제 #19

0

파일 보기

def ConvertToContinue(c, sigma=0.01):
    '''
    Convert a discrete variable in continuous variable by applying a gaussian distribution in each point
    Parameters
    ----------
        c=discrete variable
        sigma=standard deviation of the gaussian distribution
    Returns
    -------
        newc=continuous variable
    '''
    cu = at.unique(c)
    newc = c.copy()
    newc = newc.astype(float)
    for cui in cu:
        ind = np.where(c == cui)[0]
        newc[ind] = sigma * np.random.randn(len(ind)) + cui
    #MP.plot(c, '.')
    #MP.plot(newc, 'r+')
    #MP.ylim(cu[0]-0.5, cu[-1]+05)
    return newc

예제 #20

0

파일 보기

def find_key_length(freq, attempt):
    possible_keys = []

    # the best config so far - about 85% accuracy
    if attempt == 1:
        betapeaks, _ = find_peaks(freq, height=17,distance=4,prominence=17)
    else:
        betapeaks, _ = find_peaks(freq, height=17, distance=4, prominence=13)

    possible_keys.append([j-i for i, j in zip(betapeaks[:-1], betapeaks[1:])])
    # Filter out lengths of occurrence diffs (possible key lengths less than 6 and greater than 24)
    for i in possible_keys[0]:
        if i < 6 or i > 24:
            possible_keys[0] = list(filter((i).__ne__, possible_keys[0]))
    # print("Possible Key Length:", possible_keys)
    # print("Ciphertext Length Guess:", statistics.multimode(possible_keys[0]))
    # Get number of results from guess
    dupPossibleKeys = unique(possible_keys[0])
    # print("De-duplicated Keys:", dupPossibleKeys)
    # print("Identified Key Lengths:", len(dupPossibleKeys))
    return possible_keys

예제 #21

0

파일 보기

    def _run_axis_tests(self, dtype):
        data = np.array([[0, 1, 0, 0], [1, 0, 0, 0], [0, 1, 0, 0],
                         [1, 0, 0, 0]]).astype(dtype)

        msg = 'Unique with 1d array and axis=0 failed'
        result = np.array([0, 1])
        assert_array_equal(unique(data), result.astype(dtype), msg)

        msg = 'Unique with 2d array and axis=0 failed'
        result = np.array([[0, 1, 0, 0], [1, 0, 0, 0]])
        assert_array_equal(unique(data, axis=0), result.astype(dtype), msg)

        msg = 'Unique with 2d array and axis=1 failed'
        result = np.array([[0, 0, 1], [0, 1, 0], [0, 0, 1], [0, 1, 0]])
        assert_array_equal(unique(data, axis=1), result.astype(dtype), msg)

        msg = 'Unique with 3d array and axis=2 failed'
        data3d = np.array([[[1, 1], [1, 0]], [[0, 1], [0, 0]]]).astype(dtype)
        result = np.take(data3d, [1, 0], axis=2)
        assert_array_equal(unique(data3d, axis=2), result, msg)

        uniq, idx, inv, cnt = unique(data,
                                     axis=0,
                                     return_index=True,
                                     return_inverse=True,
                                     return_counts=True)
        msg = "Unique's return_index=True failed with axis=0"
        assert_array_equal(data[idx], uniq, msg)
        msg = "Unique's return_inverse=True failed with axis=0"
        assert_array_equal(uniq[inv], data)
        msg = "Unique's return_counts=True failed with axis=0"
        assert_array_equal(cnt, np.array([2, 2]), msg)

        uniq, idx, inv, cnt = unique(data,
                                     axis=1,
                                     return_index=True,
                                     return_inverse=True,
                                     return_counts=True)
        msg = "Unique's return_index=True failed with axis=1"
        assert_array_equal(data[:, idx], uniq)
        msg = "Unique's return_inverse=True failed with axis=1"
        assert_array_equal(uniq[:, inv], data)
        msg = "Unique's return_counts=True failed with axis=1"
        assert_array_equal(cnt, np.array([2, 1, 1]), msg)

예제 #22

0

파일 보기

파일: test_arraysetops.py 프로젝트: ChrisBarker-NOAA/numpy

    def _run_axis_tests(self, dtype):
        data = np.array([[0, 1, 0, 0],
                         [1, 0, 0, 0],
                         [0, 1, 0, 0],
                         [1, 0, 0, 0]]).astype(dtype)

        msg = 'Unique with 1d array and axis=0 failed'
        result = np.array([0, 1])
        assert_array_equal(unique(data), result.astype(dtype), msg)

        msg = 'Unique with 2d array and axis=0 failed'
        result = np.array([[0, 1, 0, 0], [1, 0, 0, 0]])
        assert_array_equal(unique(data, axis=0), result.astype(dtype), msg)

        msg = 'Unique with 2d array and axis=1 failed'
        result = np.array([[0, 0, 1], [0, 1, 0], [0, 0, 1], [0, 1, 0]])
        assert_array_equal(unique(data, axis=1), result.astype(dtype), msg)

        msg = 'Unique with 3d array and axis=2 failed'
        data3d = np.dstack([data] * 3)
        result = data3d[..., :1]
        assert_array_equal(unique(data3d, axis=2), result, msg)

        uniq, idx, inv, cnt = unique(data, axis=0, return_index=True,
                                     return_inverse=True, return_counts=True)
        msg = "Unique's return_index=True failed with axis=0"
        assert_array_equal(data[idx], uniq, msg)
        msg = "Unique's return_inverse=True failed with axis=0"
        assert_array_equal(uniq[inv], data)
        msg = "Unique's return_counts=True failed with axis=0"
        assert_array_equal(cnt, np.array([2, 2]), msg)

        uniq, idx, inv, cnt = unique(data, axis=1, return_index=True,
                                     return_inverse=True, return_counts=True)
        msg = "Unique's return_index=True failed with axis=1"
        assert_array_equal(data[:, idx], uniq)
        msg = "Unique's return_inverse=True failed with axis=1"
        assert_array_equal(uniq[:, inv], data)
        msg = "Unique's return_counts=True failed with axis=1"
        assert_array_equal(cnt, np.array([2, 1, 1]), msg)

예제 #23

0

파일 보기

def MI_RenyiCC_Multi(X, y=None, k=0, type='c', njobs=4):
    """
    Mutual Information estimator based on the Renyi quadratic entropy and the Cauchy Schwartz divergence
    Parameters
    ----------
        X = data of shape = [n_samples, n_features]
        type = type of the computation according to the variable types
             'd' for discrete variables ,'c' for continuous variables (by default) and 'cd' for estimating MI of
             continuous variables with a discrete target y
        y = discrete target (for classification study), array of shape(n_samples)
        k = the number of neighbors to considered for the parzen window esimation (0 by default means that we considered
            all the samples)
        njobs = number of parallel job for computation (4 by default)
    Returns
    -------
        MI_QRCS = Mutual Information score , i.e. equal to 0 if variables in X are independant
    """
    N = X.shape[0]
    X, y = check_array_type(X, y, type)
    if type == 'd':
        u = np.array([at.unique(x, return_counts=True) for x in X.T])
        freqs = DiscDensity(zip(*X.T), N)
        hr2c = Sum_Dot_Vect(u[:, 1], N)
        hr2 = Parallel(n_jobs=njobs, backend="threading")(
            delayed(Parallel_MI_RenyiCC_d_Multi)(i, freqs[i], u, N)
            for i in freqs)
        s = np.sum(np.array(hr2), 0)
        hr2a = s[0]
        hr2b = s[1]
        #print("hr2a:",hr2a,"-hr2b:",hr2b,"-hr2c:",hr2c)
    elif type == 'c':
        neigh = KNearestNeighbors(X, k)
        iqrx = [np.subtract(*np.percentile(x, [75, 25])) for x in X.T]
        varx = [np.var(x) for x in X.T]
        h = 0.85 * min(1 / np.sqrt(np.mean(varx)), np.mean(iqrx)) * N**(-1 / 6)
        #hr2 = Parallel(n_jobs=njobs, backend="threading")(delayed(Parallel_MI_RenyiCC_c_Multi)(i, X, h) for i in zip(*X.T))
        hr2 = Parallel(n_jobs=njobs, backend="threading")(
            delayed(Parallel_MI_RenyiCC_c_Multi)(i, X[knn(i, neigh), :], h)
            for i in zip(*X.T))
        s = np.sum(np.array(hr2), 0)
        hr2a = s[0]
        hr2b = s[1]
        pw = [s[i] for i in range(2, len(s))]
        hr2c = (1 / N**4) * reduce(mul, pw)
        hr2a = (1 / N**3) * hr2a
        hr2b = (1 / N**2) * hr2b
        #print("hr2a:",hr2a,"-hr2b:",hr2b,"-hr2c:",hr2c)
    elif type == "cd":
        yu, yc = at.unique(y, return_counts=True)
        #hr2y = -np.log(np.sum((yc/N)**2))
        if X.shape[1] == 1:
            hx = 0.9 * min(np.std(X), np.subtract(
                *np.percentile(X, [75, 25]))) * N**(-1 / 5)**2
        else:
            iqrx = [np.subtract(*np.percentile(x, [75, 25])) for x in X.T]
            varx = [np.var(x) for x in X.T]
            hx = 0.85 * min(1 / np.sqrt(np.mean(varx)),
                            np.mean(iqrx)) * N**(-1 / 6)
        xyu = defaultdict(list)
        z = zip(*np.hstack((X, np.reshape(y, (N, 1)))).T)
        for i in z:
            xyu[int(i[-1:][0])].append(i[:-1])
        neigh = KNearestNeighbors(X, k)
        hr2 = Parallel(n_jobs=njobs, backend="threading")(delayed(
            Parallel_MI_RenyiCC_cd_Multi)(np.array(xyu[yui]), X, hx, neigh, k)
                                                          for yui in yu)
        s = np.sum(np.array(hr2), 0)
        hr2a = s[0]
        hr2b = s[1]
        nxyu = s[2]
        #Parallelize loop according to the biggest dimension between the number of samples and the number of features
        #Notes : by using the knn, the two parallelize estimations are different since the first compute the knn of the
        #ith sample through all the features dimension while the 2nd compute knn of the ith sample along each feature
        #dimension
        if X.shape[0] > X.shape[1]:
            #hr2cp = Parallel(n_jobs=njobs, backend="threading")(delayed(Parallel_MI_RenyiCC_cd_Multi_hr2c_dim0)(i, X, hx) for i in range(N))
            hr2cp = Parallel(n_jobs=njobs, backend="threading")(delayed(Parallel_MI_RenyiCC_cd_Multi_hr2c_dim0)\
                                                            (i, X[knn(i, neigh),:], hx) for i in X)
            hr2cp = reduce(mul, np.sum(np.array(hr2cp), 0))
            hr2c = (1 / N**4) * nxyu * hr2cp
        else:
            hr2cp = Parallel(n_jobs=njobs, backend="threading")(
                delayed(Parallel_MI_RenyiCC_cd_Multi_hr2c_dim1)(
                    X[:, j], hx, neigh) for j in range(X.shape[1]))
            hr2cp = reduce(mul, hr2cp)
            hr2c = (1 / N**4) * nxyu * hr2cp
        #print("hr2a:",hr2a,"-hr2b:",hr2b,"-hr2c:",hr2c)
        hr2a = (1 / N**3) * hr2a
        hr2b = (1 / N**2) * hr2b
        #hr2x = -np.log((1/N**2)*hr2x)

    hr2a = max(10**(-100), hr2a)
    hr2b = max(10**(-100), hr2b)
    hr2c = max(10**(-100), hr2c)
    #print("hr2a:",hr2a,"-hr2b:",hr2b,"-hr2c:",hr2c)
    lhr2a = -np.log(hr2a)
    lhr2b = -np.log(hr2b)
    lhr2c = -np.log(hr2c)
    MI_QRCS = lhr2a - 0.5 * lhr2b - 0.5 * lhr2c
    return MI_QRCS

예제 #24

0

파일 보기

 def test_unique_axis_list(self):
     msg = "Unique failed on list of lists"
     inp = [[0, 1, 0], [0, 1, 0]]
     inp_arr = np.asarray(inp)
     assert_array_equal(unique(inp, axis=0), unique(inp_arr, axis=0), msg)
     assert_array_equal(unique(inp, axis=1), unique(inp_arr, axis=1), msg)

예제 #25

0

파일 보기

파일: run_clustering.py 프로젝트: pierresegonne/SGGM

def clustering_plot_and_metric(experiment_log: ExperimentLog, metric: str):
    assert metric in METRICS

    model = experiment_log.best_version.model

    # Get dm
    bs = 500
    experiment_name = experiment_log.experiment_name
    misc = experiment_log.best_version.misc
    dm = get_dm(experiment_name, misc, bs)

    # Get latent encodings
    z, y = [], []
    with torch.no_grad():
        for idx, batch in enumerate(iter(dm.test_dataloader())):
            x, _y = batch
            if isinstance(model, VanillaVAE):
                _, _, _z, _, _ = model._run_step(x)
            elif isinstance(model, V3AE):
                _, _, _, _, _, _z, _, _ = model._run_step(x)
                _z = _z[0]
            z.append(_z)
            y.append(_y)

    # [len_test_dataset, latent_size]
    z, y = torch.cat(z, dim=0), torch.cat(y)

    # Plots
    colour_names = ["pink", "navyBlue", "yellow"]
    cmap_light = ListedColormap(
        [(*colours_rgb[c_n], 0.4) for c_n in colour_names][: len(misc["digits"])]
    )
    cmap_dark = [colours_rgb[c_n] for c_n in colour_names][: len(misc["digits"])]
    x_mesh, y_mesh = (
        torch.linspace(z[:, 0].min() - 1, z[:, 0].max() + 1, steps=100),
        torch.linspace(z[:, 1].min() - 1, z[:, 1].max() + 1, steps=100),
    )
    x_mesh, y_mesh = torch.meshgrid(x_mesh, y_mesh)
    pos = torch.cat((x_mesh.reshape(-1, 1), y_mesh.reshape(-1, 1)), dim=1)

    # True with NN classifier
    clf = KNeighborsClassifier(n_neighbors=7)
    clf.fit(z, y)
    classes_mesh = clf.predict(pos).reshape(*x_mesh.shape)

    fig, ax = plt.subplots()
    ax.contourf(x_mesh, y_mesh, classes_mesh, cmap=cmap_light)
    ax = plot_points(ax, z, y, cmap_dark)

    # Kmeans
    predicted_classes, predicted_classes_mesh = np.zeros_like(y), np.zeros_like(
        classes_mesh
    )
    n_clusters = len(unique(y))
    if metric == EUCLIDEAN:
        kmeans = KMeans(n_clusters=n_clusters).fit(z)
        predicted_classes = kmeans.predict(z)
        predicted_classes_mesh = kmeans.predict(pos).reshape(*x_mesh.shape)
        print(f"[{metric}] F-Score: {f1_score(y, predicted_classes, average='micro')}")

    if metric == RIEMANNIAN:
        kmeans = RiemanninaKMeans(model, n_clusters=n_clusters).fit(z)

    fig, ax = plt.subplots()
    ax.contourf(x_mesh, y_mesh, predicted_classes_mesh, cmap=cmap_light)
    ax = plot_points(ax, z, predicted_classes, cmap_dark)

    plt.show()

예제 #26

0

파일 보기

    def test_unique_1d(self):
        def check_all(a, b, i1, i2, c, dt):
            base_msg = 'check {0} failed for type {1}'

            msg = base_msg.format('values', dt)
            v = unique(a)
            assert_array_equal(v, b, msg)

            msg = base_msg.format('return_index', dt)
            v, j = unique(a, True, False, False)
            assert_array_equal(v, b, msg)
            assert_array_equal(j, i1, msg)

            msg = base_msg.format('return_inverse', dt)
            v, j = unique(a, False, True, False)
            assert_array_equal(v, b, msg)
            assert_array_equal(j, i2, msg)

            msg = base_msg.format('return_counts', dt)
            v, j = unique(a, False, False, True)
            assert_array_equal(v, b, msg)
            assert_array_equal(j, c, msg)

            msg = base_msg.format('return_index and return_inverse', dt)
            v, j1, j2 = unique(a, True, True, False)
            assert_array_equal(v, b, msg)
            assert_array_equal(j1, i1, msg)
            assert_array_equal(j2, i2, msg)

            msg = base_msg.format('return_index and return_counts', dt)
            v, j1, j2 = unique(a, True, False, True)
            assert_array_equal(v, b, msg)
            assert_array_equal(j1, i1, msg)
            assert_array_equal(j2, c, msg)

            msg = base_msg.format('return_inverse and return_counts', dt)
            v, j1, j2 = unique(a, False, True, True)
            assert_array_equal(v, b, msg)
            assert_array_equal(j1, i2, msg)
            assert_array_equal(j2, c, msg)

            msg = base_msg.format(('return_index, return_inverse '
                                   'and return_counts'), dt)
            v, j1, j2, j3 = unique(a, True, True, True)
            assert_array_equal(v, b, msg)
            assert_array_equal(j1, i1, msg)
            assert_array_equal(j2, i2, msg)
            assert_array_equal(j3, c, msg)

        a = [5, 7, 1, 2, 1, 5, 7] * 10
        b = [1, 2, 5, 7]
        i1 = [2, 3, 0, 1]
        i2 = [2, 3, 0, 1, 0, 2, 3] * 10
        c = np.multiply([2, 1, 2, 2], 10)

        # test for numeric arrays
        types = []
        types.extend(np.typecodes['AllInteger'])
        types.extend(np.typecodes['AllFloat'])
        types.append('datetime64[D]')
        types.append('timedelta64[D]')
        for dt in types:
            aa = np.array(a, dt)
            bb = np.array(b, dt)
            check_all(aa, bb, i1, i2, c, dt)

        # test for object arrays
        dt = 'O'
        aa = np.empty(len(a), dt)
        aa[:] = a
        bb = np.empty(len(b), dt)
        bb[:] = b
        check_all(aa, bb, i1, i2, c, dt)

        # test for structured arrays
        dt = [('', 'i'), ('', 'i')]
        aa = np.array(list(zip(a, a)), dt)
        bb = np.array(list(zip(b, b)), dt)
        check_all(aa, bb, i1, i2, c, dt)

        # test for ticket #2799
        aa = [1. + 0.j, 1 - 1.j, 1]
        assert_array_equal(np.unique(aa), [1. - 1.j, 1. + 0.j])

        # test for ticket #4785
        a = [(1, 2), (1, 2), (2, 3)]
        unq = [1, 2, 3]
        inv = [0, 1, 0, 1, 1, 2]
        a1 = unique(a)
        assert_array_equal(a1, unq)
        a2, a2_inv = unique(a, return_inverse=True)
        assert_array_equal(a2, unq)
        assert_array_equal(a2_inv, inv)

        # test for chararrays with return_inverse (gh-5099)
        a = np.chararray(5)
        a[...] = ''
        a2, a2_inv = np.unique(a, return_inverse=True)
        assert_array_equal(a2_inv, np.zeros(5))

        # test for ticket #9137
        a = []
        a1_idx = np.unique(a, return_index=True)[1]
        a2_inv = np.unique(a, return_inverse=True)[1]
        a3_idx, a3_inv = np.unique(a, return_index=True,
                                   return_inverse=True)[1:]
        assert_equal(a1_idx.dtype, np.intp)
        assert_equal(a2_inv.dtype, np.intp)
        assert_equal(a3_idx.dtype, np.intp)
        assert_equal(a3_inv.dtype, np.intp)

예제 #27

0

파일 보기

파일: decision_stump.py 프로젝트: zbxzc35/skboost

    def fit(self,
            X,
            y,
            sample_mask=None,
            X_argsorted=None,
            check_input=True,
            sample_weight=None):

        # Deprecations
        if sample_mask is not None:
            warn(
                "The sample_mask parameter is deprecated as of version 0.14 "
                "and will be removed in 0.16.", DeprecationWarning)

        # Convert data
        random_state = check_random_state(self.random_state)
        if check_input:
            X = check_array(X, dtype=DTYPE, accept_sparse="csc")
            if issparse(X):
                X.sort_indices()
                if X.indices.dtype != np.intc or X.indptr.dtype != np.intc:
                    raise ValueError("No support for np.int64 index based "
                                     "sparse matrices")

        # Determine output settings
        n_samples, self.n_features_ = X.shape
        is_classification = isinstance(self, ClassifierMixin)

        y = np.atleast_1d(y)

        if y.ndim == 1:
            # reshape is necessary to preserve the data contiguity against vs
            # [:, np.newaxis] that does not.
            y = np.reshape(y, (-1, 1))

        self.n_outputs_ = y.shape[1]

        if is_classification:
            y = np.copy(y)

            self.classes_ = []
            self.n_classes_ = []

            for k in six.moves.range(self.n_outputs_):
                classes_k, y[:, k] = unique(y[:, k], return_inverse=True)
                self.classes_.append(classes_k)
                self.n_classes_.append(classes_k.shape[0])

        else:
            self.classes_ = [None] * self.n_outputs_
            self.n_classes_ = [1] * self.n_outputs_

        self.n_classes_ = np.array(self.n_classes_, dtype=np.intp)
        max_depth = 1
        max_features = 10

        if getattr(y, "dtype", None) != DOUBLE or not y.flags.contiguous:
            y = np.ascontiguousarray(y, dtype=DOUBLE)

        if len(y) != n_samples:
            raise ValueError("Number of labels=%d does not match "
                             "number of samples=%d" % (len(y), n_samples))
        if self.min_samples_split <= 0:
            raise ValueError("min_samples_split must be greater than zero.")
        if self.min_samples_leaf <= 0:
            raise ValueError("min_samples_leaf must be greater than zero.")
        if max_depth <= 0:
            raise ValueError("max_depth must be greater than zero. ")

        if sample_weight is not None:
            if (getattr(sample_weight, "dtype", None) != DOUBLE
                    or not sample_weight.flags.contiguous):
                sample_weight = np.ascontiguousarray(sample_weight,
                                                     dtype=DOUBLE)
            if len(sample_weight.shape) > 1:
                raise ValueError("Sample weights array has more "
                                 "than one dimension: %d" %
                                 len(sample_weight.shape))
            if len(sample_weight) != n_samples:
                raise ValueError("Number of weights=%d does not match "
                                 "number of samples=%d" %
                                 (len(sample_weight), n_samples))

        if self.method == 'bp':
            self.tree_ = _fit_binary_decision_stump_breakpoint(
                X, y, sample_weight, X_argsorted, self.calculate_probabilites)
        elif self.method == 'bp_threaded':
            self.tree_ = _fit_binary_decision_stump_breakpoint_threaded(
                X, y, sample_weight, X_argsorted, self.calculate_probabilites)
        else:
            self.tree_ = _fit_binary_decision_stump_breakpoint(
                X, y, sample_weight, X_argsorted, self.calculate_probabilites)

        if self.n_outputs_ == 1:
            self.n_classes_ = self.n_classes_[0]
            self.classes_ = self.classes_[0]

        return self

예제 #28

0

파일 보기

def freqFinder(fileName, freq, ciphertext, multiDimArray, testNum):
    # ***************************************************************
    # test 1 & 2 common processing logic
    # ***************************************************************

    # ***************************************************************
    # Guess encryption key length using maximum coincidences method
    # ***************************************************************
    start = time.time()

    possible_keys = []
    possible_keys = find_key_length(freq, 1)

    print("Possible Key Length(s):", possible_keys)

    if len(possible_keys[0]) != 0:
        guessedKey = statistics.multimode(possible_keys[0])
    else:
        possible_keys = find_key_length(freq, 2)
        if len(possible_keys[0]) != 0:
            guessedKey = statistics.multimode(possible_keys[0])
            print("Guessed key length in attempt 2")
        else:
            print("ERROR: Could not guess key length - Exiting")
            exit(1)

    guessedKey = unique(possible_keys[0])
    print("Guessed Key Length(s):", guessedKey)

    # Loop through all guessed key lengths
    # save the %accuracy and corresponding decrypted cipher text for each key length
    # select the decrypted string with the highest accuracy
    # run the best decrypted string through fuzzer
    # generate final fuzzed output
    # ************************************** TO IMPROVE DECRYPTION ACCURACY ***************
    # decr_pt_map = {}
    # for gk in guessedKey:
    #   ### for each key select the other end for rand chars if accuracy < 50% & select the higher of the 2
    #   decr_pt_map = decrypt(gk)
    # best_accuracy = max(decr_pt_map keys)
    #
    # ### if best_accuracy is still bad run through all key lengths from 1 to 24 not in guessedKey list
    # if best_accuracy is < 10%:
    #   for gk in range(1, 25):
    #       if gk in guessedKey:
    #           continue
    #       decr_pt_map = decrypt(gk)
    # best_accuracy = max(decr_pt_map keys)
    # best_pt = decr_pt_map[best_accuracy]
    #
    # ### run best_pt through fuzzer to get final_pt
    # compute total runtime
    # log output
    # return run_time
    # ************************************** TO IMPROVE DECRYPTION ACCURACY ***************

    # define new func which takes a single keyLen as input & returns decr_pt_map
    # decr_pt_map uses %accuracy as key and decrypted string as value
    # def decrypt(keyLen)

    # Break out the cipherString into Key Length chunks for Index of Coincidence Calculations
    # sort the guessedKey array of possible lengths to use the smallest one, if multiple peaks found
    # guessedKey.sort()

    # tempGuessKey = keyLen
    best_tokenized_plaintext = ""
    max_accuracy = -1

    for gk in range(0, len(guessedKey)):
        tempGuessKey = guessedKey[gk]
        cipherDict = []
        for keyIndex in range(0, tempGuessKey):
            cipherStr = ""
            for y in range(keyIndex, len(ciphertext), tempGuessKey):
                cipherStr += ciphertext[y]
            cipherDict.append(cipherStr)

        distributionArray = []
        for i in range(tempGuessKey):
            distributionArray.append(get_distribution(cipherDict[i]))

        # Get a sum of total ciphertext dictionary character values
        tempCharSum = []
        for i in range(tempGuessKey):
            tempCharSum.append(sum(distributionArray[i]))

        # Cipher Text IOC
        cipherIndexOfCoincidence = []

        # Diff Plaintext IOC Array
        deltaPlaintextIndexOfCoincidence = []
        # initialize plaintext Dictionary
        plaintextDict = []

        # CipherText IOC Generator
        for i in range(len(tempCharSum)):
            ioc = 0
            for y in range(len(distributionArray[i])):
                ioc += (distributionArray[i][y] / tempCharSum[i])**2
            cipherIndexOfCoincidence.append(ioc)

        # ***************************************************************
        # test 1 processing logic
        # ***************************************************************

        if testNum == 1:
            # Plaintext IOC Generator
            plaintextDictFile = fileName
            #plaintextDictFile = fileName
            f = open(plaintextDictFile)
            plainTextlines = f.readlines()

            # Plaintext Dictionary Populator
            for y in range(len(plainTextlines)):
                if y % 2:
                    stripped = lambda s: "".join(i for i in s if (96 < ord(i) < 123) or ord(i) == 32)
                    plainTextlines[y] = stripped(plainTextlines[y])
                    plaintextDict.append(plainTextlines[y])

            # Declare temporary value and delta for difference in plaintext/ciphertext
            adjustedKeyLength=0
            deltaMsgIocList=[]
            plaintxtMin=0
            # Iterate through strings in plaintext dictionary and build plaintext IOC
            # 5 plaintext line input loop through them. dependency: ciphertext IOC.
            for i in range(len(plaintextDict)):
                adjustedKeyLength = guessedKey[gk] - round((len(ciphertext) - len(plaintextDict[i])) * guessedKey[gk] / len(ciphertext))

                # Process IOC into Key Length chunks for Index of Coincidence Calculations
                plainIOCDict=[]
                tempPlainMsg = plaintextDict[i]

                # Breaking down into groups of characters -
                for keyIndex in range(0, adjustedKeyLength):
                    plainIOCStr =''
                    for y in range(keyIndex, len(tempPlainMsg), adjustedKeyLength):
                        plainIOCStr += tempPlainMsg[y]
                    plainIOCDict.append(plainIOCStr)

                # take first group of characters and place into distributionArray
                distributionArray=[]

                for z in range(adjustedKeyLength):
                    #distributionArray.append(get_distribution(plainIOCDict[z]))
                    temp = get_distribution(plainIOCDict[z])
                    distributionArray.append(temp)

                # Get a sum of total plaintext dictionary character values in each segment/group of chars
                tempCharSum=[]
                for w in range(adjustedKeyLength):
                    tempCharSum.append(sum(distributionArray[w]))

                # Plaintext IOC Array
                plaintextIndexOfCoincidence=[]

                # Plaintext/CipherText IOC Generator
                for p in range(len(tempCharSum)):
                    ioc=0
                    for y in range(len(distributionArray[p])):
                        # calculate IOC per char group
                        ioc+=(distributionArray[p][y] / tempCharSum[p])**2
                    plaintextIndexOfCoincidence.append(ioc)
                # Have line and IOCs for one message
                deltaIOC=0
                # Delta Calculation # of groups in plaintext index - for loop through 7 groups/bags. Compute delta
                for c in range(adjustedKeyLength):
                    # plaintxt = 7 bags.
                    deltaIOC += (cipherIndexOfCoincidence[c]-plaintextIndexOfCoincidence[c]) **2
                deltaMsgIocList.append(deltaIOC)
                plaintxtMin = min(deltaMsgIocList)

            # do the decryption here
            res = [i for i, j in enumerate(deltaMsgIocList) if j == plaintxtMin]
            # output decrypted message
            print("Decrypted Plaintext for test-1 (deltaIoC technique): ", plaintextDict[res[0]])

        # ***************************************************************
        # test 2 processing logic
        # ***************************************************************
        elif testNum == 2:
            # enhanced bad bucket logic March 02 2021
            # 1. Calc IoC for the 400 word dict - expected to closely match any of the cipher buckets that are not random
            # 2. we already have IoC's for each of our cipher buckets (including rand buckets)
            # 3. we already know the number of rand chars per key length
            # 4. find the largest IoC differential between dictIoC and cipherIoC buckets
            # 5. mark the cipher buckets equaling the rand chars per key len that have the max IoC differential

            # find bad buckets - ciphertext chars that need to be dropped
            # find the number of random chars inserted per key length
            # if the first bucket is bad (low IoC) insert bucket numbers starting from 0
            # if the last bucket is bad (low IoC) insert bucket numbers starting from t-1
            badBucketlist = []

            adjustedKeyLength = guessedKey[gk] - round((len(ciphertext) - 500) * guessedKey[gk] / len(ciphertext))

            randchars = guessedKey[gk] - adjustedKeyLength

            # Uncomment next 6 lines for prev badBucketList strategy
            # if cipherIndexOfCoincidence[0] < cipherIndexOfCoincidence[guessedKey[gk] - 1]:
            #     for i in range(0, randchars):
            #         badBucketlist.insert(i, i)
            # else:
            #     for i in range(0, randchars):
            #         badBucketlist.insert(i, guessedKey[gk] - (1+i))
            tmpCipherIOC = []
            tmpCipherIOC = list(cipherIndexOfCoincidence)
            for r in range(0, randchars):
                min_idx = [i for i, j in enumerate(tmpCipherIOC) if j == min(tmpCipherIOC)]
                badBucketlist.insert(r, min_idx[0])
                tmpCipherIOC.insert(min_idx[0], 999.0)

            # ================================================================================
            # identifying bad buckets based on an absolute value of IoC - FAILURE RATE HIGH
            # for i in (0, 1, guessedKey[gk]-1, guessedKey[gk]-2):
            #     if cipherIndexOfCoincidence[i] < 0.06399:
            #         badBucketlist.append(i)
            # ================================================================================
            # find the bad bucket based on min(IoC) and add to bad bucket list - NOT WORKABLE
            # if len(badBucketlist) == 0:
            #     badBucketlist.append(cipherIndexOfCoincidence.index(min(cipherIndexOfCoincidence)))
            # ================================================================================
            print(f'Bad Buckets ({randchars} rand char(s) per key): Random chars at index: {badBucketlist} Cipher IoC: {cipherIndexOfCoincidence}')

            cleanCipherBuckets = []
            for i in range(0, guessedKey[gk]):
                if i not in badBucketlist:
                    cleanCipherBuckets.append(cipherDict[i])

            cleanCipherString = ""
            for j in range(0, len(cleanCipherBuckets[0])):
                for i in range(0, len(cleanCipherBuckets)):
                    if j >= len(cleanCipherBuckets[i]):
                        break
                    cleanCipherString += cleanCipherBuckets[i][j]

            decrypt_key = []
            curr_chi_squared = 0.0
            plaintextBuckets = []

            # chi-squared computation is performed for test 2 only - BEGIN
            # j loop is to iterate through each clean bucket
            # each clean bucket represents a string which is a mono-alphabetic shift
            # loop i, iterates through each char shift for each clean cipher bucket (string)
            # chi_squared is computed for each shifted string (total of 26 + original cipher str)
            # the min chi_squared across all shifts for a specific bucket is the most likely shift amount

            for j in range(0, len(cleanCipherBuckets)):
                # print("clean cipher bucket :[", j, "]: ", cleanCipherBuckets[j])
                min_chi_squared = 9999999.0
                for i in range(0, len(alphabet)):
                    shifted_cipher_str = ""
                    for c in cleanCipherBuckets[j]:
                        shifted_c = (alphabet_map[c] + i) % len(alphabet)
                        shifted_cipher_str += alphaDict[shifted_c]
                    curr_chi_squared = round(chi_squared(fileName, shifted_cipher_str), 2)
                    if curr_chi_squared < min_chi_squared:
                        min_chi_squared = curr_chi_squared
                        bucket_shift_key = i
                        plaintext_bucket_str = shifted_cipher_str
                decrypt_key.insert(j, bucket_shift_key)
                # plaintextBuckets.insert(j, plaintext_bucket_str)
                plaintextBuckets.append(plaintext_bucket_str)
                # print("decrypted plaintext bucket :[", j, "]: right-shifted by [", bucket_shift_key, "]: ", plaintextBuckets[j])
            print(f'Decryption Key = {decrypt_key}')

            # reconstitute plaintext buckets into a contiguous decrypted plaintext string
            decryptedPlaintext = ""
            for j in range(0, len(plaintextBuckets[0])):
                for i in range(0, len(plaintextBuckets)):
                    if j >= len(plaintextBuckets[i]):
                        break
                    decryptedPlaintext += plaintextBuckets[i][j]

            # print chi-squared values
            # for i in range(0, len(cleanCipherBuckets)):
            #     print("chi-squared for clean cipher bucket [", i, "]:", round(chi_squared(cleanCipherBuckets[i]),2))
            # print("chi-squared for plain text: ", round(chi_squared(plaintextDict[res[0]]),2))
            # print("chi-squared for cipher text: ", round(chi_squared(ciphertext),2))

            # split the decrypted plaintext string on spaces
            # look-up each word using bestMatchfinder(source, fuzzyWord)
            # add searched word to the final decrypted string

            tokenized_plaintext = decryptedPlaintext.split()

            badWords = 0
            for fuzzy in tokenized_plaintext:
                if fuzzy.rstrip() not in wordDict:
                    badWords += 1

            accuracy = ((len(tokenized_plaintext) - badWords) / len(tokenized_plaintext)) * 100
            accuracy = round(accuracy, 2)

            print(f'Decryption accuracy {accuracy}% found for guessedKey {guessedKey[gk]}')

            if accuracy > max_accuracy:
                best_gk = guessedKey[gk]
                max_accuracy = accuracy
                best_decrypted_plaintext = decryptedPlaintext
                best_tokenized_plaintext = tokenized_plaintext

            if accuracy > 99.9:
                break

    print(f'Best decryption accuracy {max_accuracy}% found for guessedKey {best_gk}')

    decryptedPlaintext = best_decrypted_plaintext
    tokenized_plaintext = best_tokenized_plaintext
    accuracy = max_accuracy

    badWords = 0
    badWordList = []
    finalPlaintext = ""

    for fuzzy in tokenized_plaintext:
        if fuzzy.rstrip() not in wordDict:
            badWordList.append(fuzzy)
            badWords += 1
            lookup = bestMatchFinder(fileName, wordDict, fuzzy)
            # print(f'fuzzy word: {fuzzy} --> match in dict2 {lookup}')
            finalPlaintext += lookup + ' '
        else:
            finalPlaintext += fuzzy + ' '
    print(f'Intermediate fuzzed plaintext: {finalPlaintext}')

    # lookup dict file 1 for decrypted words
    # if found, final string is detected, exit
    ptFound = False
    ptMatches = 0
    tokenized_finalPlaintext = finalPlaintext.split()

    for ptStr in plaintextStrDict:
        ptMatches = 0
        for ptWord in ptStr.split():
            for fuzzy in tokenized_finalPlaintext:
                if fuzzy.rstrip() == ptWord:
                    ptMatches += 1
                    print(f'***>>>>> ({ptMatches}) plaintext token[{fuzzy.rstrip()}] matched plaintext_dictionary_test1 word [{ptWord}]')
                    # ok - I'm convinced now that the fuzzed str is indeed in dict file 1
                    if ptMatches > 10:
                        ptFound = True
                        finalPlaintext = ptStr
                        accuracy = 100.0
                        badWords = 0
                        badWordList = []
                        print(f'***>>>>> final plain text found in plaintext_dictionary_test1 = {finalPlaintext}')
                    break
            if ptFound:
                break
        if ptFound:
            break

    # print(f'{tokenized_plaintext}')
    print(f'Input Ciphertext with random chars (len = {len(ciphertext)}):{ciphertext}')
    print(f'Clean Ciphertext (len = {len(cleanCipherString)}):{cleanCipherString}')
    print(f'Decrypted Plaintext - chi-squared analysis (len = {len(decryptedPlaintext)}):{decryptedPlaintext}')
    print(f'Accuracy of decryption = {accuracy}%   {len(tokenized_plaintext) - len(badWordList)} out of {len(tokenized_plaintext)} decrypted accurately')
    print(f'Decrypted words not in Dict: {badWordList}')
    print(f'Final fuzzed Plaintext: {finalPlaintext}')

    if os.path.exists(selectedPlainTextFile):
        ptStr = open(selectedPlainTextFile,'r').read()
        tok_ptStr = ptStr.split()
        found = 0
        tok_finStr = finalPlaintext.split()
        for finWord in tok_finStr:
            if finWord.rstrip() in tok_ptStr:
                found += 1
        fuzz_accuracy = round((found/len(tok_ptStr)) * 100, 2)
        print(f'Accuracy of fuzzer = {fuzz_accuracy}%   {found} out of {len(tok_ptStr)} decrypted words fuzzed accurately')

    end=time.time()

    decr_runtime_str = str(round((end - start)*1000, 2)) + " ms"
    now = datetime.datetime.now().strftime("%m-%d-%Y %H:%M:%S")


    print(f'**********************************************************')
    print(f'*** Runtime of the TBZ chi-squared Decryptor is {decr_runtime_str}')
    print(f'*** Run completed at: {now}')
    print(f'**********************************************************')



    mode = 'a+' if os.path.exists(fileToWriteTo) else 'w+'
    with open(fileToWriteTo,mode) as f:
        f.write('\n')
        f.write('Decryptor :: Decrypted Plaintext - chi-squared analysis\n')
        f.write(decryptedPlaintext)
        f.write('\n')
        f.write('Decryptor :: Final Plaintext from fuzzer\n')
        f.write(finalPlaintext)
        f.write('\n')
        f.write('Decryptor :: Accuracy : ')
        f.write(str(accuracy))
        f.write(' %\n')
        f.write('Decryptor :: Decryption Runtime : ')
        f.write(decr_runtime_str)
        f.write('\nDecryptor :: Run Completed at : ')
        f.write(now)
        f.write("\n\n======================================================================\n\n")
        f.close()

    return end - start

예제 #29

0

파일 보기

def MI_RenyiCC(x, y, type, njobs=4):
    """
    Mutual Information estimator based on the Renyi quadratic entropy and the Cauchy Schwartz divergence
    Compute Renyi Quadratic Entropies hr2(p(x,y)*p(x)*p(y)), hr2 p(x,y) and hr2 p(x)p(y) for all types of variables couple
    Parameters
    ----------
        x, y = two variables
        type = type of the computation according to the variable types
             'dd' for 2 discret variables ,'cc' for 2 continue variables or 'cd' for 2 mixed variables
        njobs = number of parallel job for computation (4 by default)
    Returns :
        MI_QRCS = hr2(p(x,y)*p(x)*p(y))-1/2hr2 p(x,y) - 1/2hr2 p(x)p(y) , i.e. equal to 0 if x and y are independant
    Notes
    -----
    MI_RenyiCC_Multi may be used for bivariate variable => could be removed
    """
    N = len(x)
    if type == 'dd':
        xu, xc = at.unique(x, return_counts=True)
        yu, yc = at.unique(y, return_counts=True)
        hr2x = -np.log(np.sum((xc / N)**2))
        hr2y = -np.log(np.sum((yc / N)**2))
        freqs = DiscDensity(zip(x, y), N)
        hr2c = np.sum(
            np.dot(np.reshape((yc / N)**2, (len(yc), 1)),
                   np.reshape((xc / N)**2, (1, len(xc)))))
        hr2 = Parallel(n_jobs=njobs, backend="threading")(
            delayed(Parallel_MI_RenyiCC_d)(i, freqs[i], xu, yu, xc, yc, N)
            for i in freqs)
        s = np.sum(np.array(hr2), 0)
        hr2a = s[0]
        hr2b = s[1]
        #print("hr2a:",hr2a,"-hr2b:",hr2b,"-hr2c:",hr2c)
    elif type == 'cc':
        hr2a = 0
        hr2b = 0
        hr2c = 0
        iqrx = np.subtract(*np.percentile(x, [75, 25]))
        iqry = np.subtract(*np.percentile(y, [75, 25]))
        h = 0.85 * min(1 / np.sqrt((np.var(x) + np.var(y)) / 2),
                       (iqrx + iqry) / 2) * N**(-1 / 6)
        hr2x = 0
        hr2y = 0
        pwX = 0
        pwY = 0
        for i in zip(x, y):
            hr2x += ParzenWindow(i[0] - x,
                                 0.9 * min(np.std(x), iqrx) * N**(-1 / 5)**2)
            hr2y += ParzenWindow(i[1] - y,
                                 0.9 * min(np.std(x), iqrx) * N**(-1 / 5)**2)
            pwx = ParzenWindow(i[0] - x, h)
            pwy = ParzenWindow(i[1] - y, h)
            hr2a += pwx * pwy
            w = zip(i[0] - x, i[1] - y)
            hr2b += ParzenWindow(w, h, 2)
            pwX += pwx
            pwY += pwy
        hr2c += (1 / N**4) * (pwX * pwY)
        hr2a = (1 / N**3) * hr2a
        hr2b = (1 / N**2) * hr2b
        #print("-hr2a:",hr2a,"-hr2b:",hr2b,"-hr2c:",hr2c,"-pw:",[pwX,pwY])
        hr2x = -np.log((1 / N**2) * hr2x)
        hr2y = -np.log((1 / N**2) * hr2y)
    elif type == "cd":
        yu, yc = at.unique(y, return_counts=True)
        hr2y = -np.log(np.sum((yc / N)**2))
        xyu = defaultdict(list)
        iqrx = np.subtract(*np.percentile(x, [75, 25]))
        hx = 0.9 * min(np.std(x), iqrx) * N**(-1 / 5)**2
        hr2x = 0
        hr2a = 0
        hr2b = 0
        hr2c = 0
        nxyu = 0
        for i in zip(x, y):
            xyu[i[1]].append(i[0])
            hr2x += ParzenWindow(i[0] - x, hx)
        for yui in yu:
            nxyui = len(xyu[yui])
            varxyui = np.var(xyu[yui])
            iqrxyui = np.subtract(*np.percentile(xyu[yui], [75, 25]))
            h = 0.85 * min(1 / np.sqrt((np.var(x) + varxyui) / 2),
                           (iqrx + iqrxyui) / 2) * N**(-1 / 6)
            hr2a += nxyui * np.sum(ParzenWindow(j - x, hx) for j in xyu[yui])
            hr2b += np.sum(ParzenWindow(j - xyu[yui], hx) for j in xyu[yui])
            nxyu += nxyui**2
        hr2c = (1 / N**4) * nxyu * np.sum(ParzenWindow(xi - x, hx) for xi in x)
        #print("hr2a:",hr2a,"-hr2b:",hr2b,"-hr2c:",hr2c)
        hr2a = (1 / N**3) * hr2a
        hr2b = (1 / N**2) * hr2b
        hr2x = -np.log((1 / N**2) * hr2x)

    lhr2a = -np.log(hr2a)
    lhr2b = -np.log(hr2b)
    lhr2c = -np.log(hr2c)
    MI_QRCS = lhr2a - 0.5 * lhr2b - 0.5 * lhr2c
    return MI_QRCS

예제 #30

0

파일 보기

파일: test_arraysetops.py 프로젝트: ymarfoq/outilACVDesagregation

    def test_unique(self):

        def check_all(a, b, i1, i2, c, dt):
            base_msg = 'check {0} failed for type {1}'

            msg = base_msg.format('values', dt)
            v = unique(a)
            assert_array_equal(v, b, msg)

            msg = base_msg.format('return_index', dt)
            v, j = unique(a, 1, 0, 0)
            assert_array_equal(v, b, msg)
            assert_array_equal(j, i1, msg)

            msg = base_msg.format('return_inverse', dt)
            v, j = unique(a, 0, 1, 0)
            assert_array_equal(v, b, msg)
            assert_array_equal(j, i2, msg)

            msg = base_msg.format('return_counts', dt)
            v, j = unique(a, 0, 0, 1)
            assert_array_equal(v, b, msg)
            assert_array_equal(j, c, msg)

            msg = base_msg.format('return_index and return_inverse', dt)
            v, j1, j2 = unique(a, 1, 1, 0)
            assert_array_equal(v, b, msg)
            assert_array_equal(j1, i1, msg)
            assert_array_equal(j2, i2, msg)

            msg = base_msg.format('return_index and return_counts', dt)
            v, j1, j2 = unique(a, 1, 0, 1)
            assert_array_equal(v, b, msg)
            assert_array_equal(j1, i1, msg)
            assert_array_equal(j2, c, msg)

            msg = base_msg.format('return_inverse and return_counts', dt)
            v, j1, j2 = unique(a, 0, 1, 1)
            assert_array_equal(v, b, msg)
            assert_array_equal(j1, i2, msg)
            assert_array_equal(j2, c, msg)

            msg = base_msg.format(('return_index, return_inverse '
                                   'and return_counts'), dt)
            v, j1, j2, j3 = unique(a, 1, 1, 1)
            assert_array_equal(v, b, msg)
            assert_array_equal(j1, i1, msg)
            assert_array_equal(j2, i2, msg)
            assert_array_equal(j3, c, msg)

        a = [5, 7, 1, 2, 1, 5, 7]*10
        b = [1, 2, 5, 7]
        i1 = [2, 3, 0, 1]
        i2 = [2, 3, 0, 1, 0, 2, 3]*10
        c = np.multiply([2, 1, 2, 2], 10)

        # test for numeric arrays
        types = []
        types.extend(np.typecodes['AllInteger'])
        types.extend(np.typecodes['AllFloat'])
        types.append('datetime64[D]')
        types.append('timedelta64[D]')
        for dt in types:
            aa = np.array(a, dt)
            bb = np.array(b, dt)
            check_all(aa, bb, i1, i2, c, dt)

        # test for object arrays
        dt = 'O'
        aa = np.empty(len(a), dt)
        aa[:] = a
        bb = np.empty(len(b), dt)
        bb[:] = b
        check_all(aa, bb, i1, i2, c, dt)

        # test for structured arrays
        dt = [('', 'i'), ('', 'i')]
        aa = np.array(list(zip(a, a)), dt)
        bb = np.array(list(zip(b, b)), dt)
        check_all(aa, bb, i1, i2, c, dt)

        # test for ticket #2799
        aa = [1. + 0.j, 1 - 1.j, 1]
        assert_array_equal(np.unique(aa), [1. - 1.j, 1. + 0.j])

        # test for ticket #4785
        a = [(1, 2), (1, 2), (2, 3)]
        unq = [1, 2, 3]
        inv = [0, 1, 0, 1, 1, 2]
        a1 = unique(a)
        assert_array_equal(a1, unq)
        a2, a2_inv = unique(a, return_inverse=True)
        assert_array_equal(a2, unq)
        assert_array_equal(a2_inv, inv)

예제 #31

0

파일 보기

파일: meshio_read.py 프로젝트: umberto96vr/SpritzFEM

    "\n \n----------> ...in practice: boundary nodes (from boundary elements) \n"
)

print("\n line elements with Dirichlet tag\n",
      mesh.cell_sets_dict["Dirichlet"]["line"])

name = "Dirichlet"
tag = mesh.field_data[name][0]
dim = mesh.field_data[name][1]

if dim == 0:
    # array containing indices of elements in the boundary
    on_boundary = np.nonzero(
        mesh.cell_data_dict["gmsh:physical"]["vertex"] == tag)[0]
    # array containing indices of nodes in the boundary
    nodes = unique(mesh.cells_dict["vertex"][on_boundary])
elif dim == 1:
    on_boundary = np.nonzero(
        mesh.cell_data_dict["gmsh:physical"]["line"] == tag)[0]
    nodes = unique(mesh.cells_dict["line"][on_boundary])

print("\n nodes related to tag Dirichlet\n", nodes)
for n in nodes:
    print("\nnode #", n, "@", points[n])
print("\n\n")

print("\n node entries in dictionary with tag \"Points\"",
      mesh.cell_sets_dict["Points"]["vertex"])

name = "Points"
tag = mesh.field_data[name][0]

예제 #32

0

파일 보기

파일: regression_stump.py 프로젝트: hbldh/skboost

    def fit(self, X, y, sample_mask=None, X_argsorted=None, check_input=True,
            sample_weight=None):
        random_state = check_random_state(self.random_state)

        # Deprecations
        if sample_mask is not None:
            warn("The sample_mask parameter is deprecated as of version 0.14 "
                 "and will be removed in 0.16.", DeprecationWarning)

        if X_argsorted is not None:
            warn("The X_argsorted parameter is deprecated as of version 0.14 "
                 "and will be removed in 0.16.", DeprecationWarning)

        # Convert data
        if check_input:
            X = check_array(X, dtype=DTYPE, accept_sparse="csc")
            if issparse(X):
                X.sort_indices()
                if X.indices.dtype != np.intc or X.indptr.dtype != np.intc:
                    raise ValueError("No support for np.int64 index based "
                                     "sparse matrices")

        # Determine output settings
        n_samples, self.n_features_ = X.shape
        is_classification = isinstance(self, ClassifierMixin)

        y = np.atleast_1d(y)

        if y.ndim == 1:
            # reshape is necessary to preserve the data contiguity against vs
            # [:, np.newaxis] that does not.
            y = np.reshape(y, (-1, 1))

        self.n_outputs_ = y.shape[1]

        if is_classification:
            y = np.copy(y)

            self.classes_ = []
            self.n_classes_ = []

            for k in six.moves.range(self.n_outputs_):
                classes_k, y[:, k] = unique(y[:, k], return_inverse=True)
                self.classes_.append(classes_k)
                self.n_classes_.append(classes_k.shape[0])

        else:
            self.classes_ = [None] * self.n_outputs_
            self.n_classes_ = [1] * self.n_outputs_

        self.n_classes_ = np.array(self.n_classes_, dtype=np.intp)
        max_depth = 1
        max_features = 10

        if getattr(y, "dtype", None) != DOUBLE or not y.flags.contiguous:
            y = np.ascontiguousarray(y, dtype=DOUBLE)

        if len(y) != n_samples:
            raise ValueError("Number of labels=%d does not match "
                             "number of samples=%d" % (len(y), n_samples))
        if self.min_samples_split <= 0:
            raise ValueError("min_samples_split must be greater than zero.")
        if self.min_samples_leaf <= 0:
            raise ValueError("min_samples_leaf must be greater than zero.")
        if max_depth <= 0:
            raise ValueError("max_depth must be greater than zero. ")
        if not (0 < max_features <= self.n_features_):
            raise ValueError("max_features must be in (0, n_features]")

        if sample_weight is not None:
            if (getattr(sample_weight, "dtype", None) != DOUBLE or
                    not sample_weight.flags.contiguous):
                sample_weight = np.ascontiguousarray(
                    sample_weight, dtype=DOUBLE)
            if len(sample_weight.shape) > 1:
                raise ValueError("Sample weights array has more "
                                 "than one dimension: %d" %
                                 len(sample_weight.shape))
            if len(sample_weight) != n_samples:
                raise ValueError("Number of weights=%d does not match "
                                 "number of samples=%d" %
                                 (len(sample_weight), n_samples))

        if self.method == 'default':
            self.tree_ = _fit_regressor_stump(X, y, sample_weight, X_argsorted)
        elif self.method == 'threaded':
            self.tree_ = _fit_regressor_stump_threaded(X, y, sample_weight, X_argsorted)
        elif self.method == 'c':
            self.tree_ = _fit_regressor_stump_c_ext(X, y, sample_weight, X_argsorted)
        elif self.method == 'c_threaded':
            self.tree_ = _fit_regressor_stump_c_ext_threaded(X, y, sample_weight, X_argsorted)
        else:
            self.tree_ = _fit_regressor_stump(X, y, sample_weight, X_argsorted)

        if self.n_outputs_ == 1:
            self.n_classes_ = self.n_classes_[0]
            self.classes_ = self.classes_[0]

        return self

예제 #33

0

파일 보기

 def test_unique_1d_with_axis(self, axis):
     x = np.array([4, 3, 2, 3, 2, 1, 2, 2])
     uniq = unique(x, axis=axis)
     assert_array_equal(uniq, [1, 2, 3, 4])

예제 #34

0

파일 보기

파일: test_arraysetops.py 프로젝트: ChrisBarker-NOAA/numpy

 def test_unique_axis_list(self):
     msg = "Unique failed on list of lists"
     inp = [[0, 1, 0], [0, 1, 0]]
     inp_arr = np.asarray(inp)
     assert_array_equal(unique(inp, axis=0), unique(inp_arr, axis=0), msg)
     assert_array_equal(unique(inp, axis=1), unique(inp_arr, axis=1), msg)

예제 #35

0

파일 보기

파일: bull_shit_hack_because_imports_dont_work_WTF.py 프로젝트: yz-/ut

def print_unique_counts(d):
    column_list = d.columns.tolist()
    print "number of rows: {}".format(len(d[column_list[0]]))
    print ""
    for c in column_list:
        print "number of unique {}: {}".format(c,len(arraysetops.unique(d[c])))