Exemple #1
0
    def feature_difference(self, y: BinaryFeatureVector,
                           xa: BinaryFeatureVector) -> List:
        y_array = y.get_csr_matrix()
        xa_array = xa.get_csr_matrix()

        C_y = (y_array - xa_array).indices

        return C_y
Exemple #2
0
def csr_mat_to_instances(csr_mat, labels, binary=False):
    """
    Return a list of instances
    :param nd_arr:
    :param labels:
    :return:
    """
    data = csr_mat.data
    indices = csr_mat.indices
    indptr = csr_mat.indptr
    instance_len, num_features = csr_mat.shape
    instance_lst = []
    for i in range(instance_len):
        label = labels[i]
        instance_data = data[indptr[i]:indptr[i + 1]]
        instance_indices = list(indices[indptr[i]:indptr[i + 1]])
        if binary:
            instance_lst.append(
                Instance(label,
                         BinaryFeatureVector(num_features, instance_indices)))
        else:
            instance_lst.append(
                Instance(
                    label,
                    RealFeatureVector(num_features, instance_indices,
                                      instance_data)))
    return instance_lst
Exemple #3
0
    def _generate_inst(self):
        """
        :return: a properly generated Instance that has feature vector self.x
                 and label self.y
        """

        indices_list = []
        for i in range(len(self.x)):
            if self.x[i] >= 0.5:
                indices_list.append(i)

        # Generate new instance
        self.inst = Instance(self.y,
                             BinaryFeatureVector(len(self.x), indices_list))
Exemple #4
0
def load_dataset(emailData: EmailDataset) -> List[Instance]:
    """
    Conversion from dataset object into a list of instances
    :param emailData:
    """

    instances = []
    num_features = emailData.shape[1]
    indptr = emailData.features.indptr
    indices = emailData.features.indices
    data = emailData.features.data
    for i in range(0, emailData.num_instances):
        if emailData.binary:
            tmp_vector = BinaryFeatureVector(num_features, indices[indptr[i]:indptr[i + 1]].tolist())
        else:
            instance_data = data[indptr[i]:indptr[i + 1]].tolist()
            tmp_vector = RealFeatureVector(num_features, indices[indptr[i]:indptr[i + 1]].tolist(),
                                           instance_data)
        instances.append(Instance(emailData.labels[i], tmp_vector))
    return instances
Exemple #5
0
def nd_arr_to_instances(nd_arr, labels=None, binary=False):
    """
    Return a list of instances
    :param nd_arr:
    :param labels:
    :param binary:
    :return:
    """
    num_instances = nd_arr.shape[0]
    if labels is None:
        labels = nd_arr[:, :1]
        data = nd_arr[:, 1:]
        num_features = nd_arr.shape[1] - 1
    else:
        data = nd_arr
        num_features = nd_arr.shape[1]

    instance_lst = []
    for i in range(num_instances):
        if binary:
            mat_indices = [
                x for x in range(0, num_features) if data[i][x] != 0
            ]
            instance_lst.append(
                Instance(labels[i],
                         BinaryFeatureVector(num_instances, mat_indices)))
        else:
            mat_indices = [
                x for x in range(0, num_features) if data[i][x] != 0
            ]
            mat_data = [
                data[i][x] for x in range(0, num_features) if data[0][x] != 0
            ]
            instance_lst.append(
                Instance(
                    labels[i],
                    RealFeatureVector(num_instances, mat_indices, mat_data)))
    return instance_lst
Exemple #6
0
    def attack(self, instances) -> List[Instance]:
        """
        Performs a data modification attack
        :param instances: the input instances
        :return: the attacked instances
        """

        if len(instances) == 0:
            raise ValueError('Need at least one instance.')

        self.instances = instances
        self.return_instances = deepcopy(self.instances)
        self._calculate_constants()

        fv_dist = 0.0
        theta_dist = np.linalg.norm(self.theta - self.target_theta)
        iteration = 0
        while (iteration == 0
               or (theta_dist > self.alpha and iteration < self.max_iter)):

            print('Iteration: ',
                  iteration,
                  ' - FV distance: ',
                  fv_dist,
                  ' - theta distance: ',
                  theta_dist,
                  sep='')

            # Gradient descent
            gradient = self._calc_gradient()

            if self.verbose:
                print('\nGRADIENT\n', gradient, '\n', sep='')

            self.fvs -= (gradient * self.beta)
            self._project_fvs()

            # Update variables
            self._calc_theta()
            fv_dist = np.linalg.norm(self.fvs - self.old_fvs)
            theta_dist = np.linalg.norm(self.theta - self.target_theta)
            self.old_fvs = deepcopy(self.fvs)

            iteration += 1

        print('Iteration: FINAL - FV distance: ',
              fv_dist,
              ' - theta distance: ',
              theta_dist,
              ' - alpha: ',
              self.alpha,
              ' - beta: ',
              self.beta,
              sep='')

        if self.verbose:
            print('\nTarget Theta:\n', self.target_theta, '\n\nTheta:\n',
                  self.theta, '\n')

        # Go from floating-point values in [0, 1] to integers in {0, 1}
        for i in range(len(self.fvs)):
            indices = []
            for j in range(len(self.fvs[i])):
                if self.fvs[i][j] >= 0.5:
                    indices.append(j)
            self.return_instances[i].feature_vector = BinaryFeatureVector(
                self.return_instances[i].get_feature_count(), indices)

        return self.return_instances
Exemple #7
0
    def coordinate_greedy(self, instance: Instance) -> Instance:
        indices = [i for i in range(0, self.num_features)]

        x = xk = instance.get_csr_matrix().toarray()[0]
        # Q = [self.transform_cost(xk,x)]
        # f = [self.learn_model.model.learner.predict(xk.reshape(1,-1))]
        # p = [self.learn_model.model.learner.coef_.dot(xk)+
        #     self.learn_model.model.learner.intercept_]
        # c = [self.quadratic_cost(xk,x)]

        no_improve_count = 0
        shuffle(indices)
        for i in indices:
            xkplus1 = self.minimize_transform(xk, i)
            oldQ = self.transform_cost(xk, x)
            newQ = self.transform_cost(xkplus1, x)
            # step_change = np.log(newQ) / np.log(oldQ)
            # using difference instead of log ratio for convergence check

            step_change = newQ - oldQ
            # print('oldQ= '+str(oldQ) + ' newQ= '+str(newQ)+
            #       ' step_change= '+str(step_change))
            # print('xk[i]= ' + str(xk[i]) + ' xk+1[i]= ' +
            #       str(xkplus1[i]) + ' x[i]= ' + str(x[i]))

            if step_change >= 0:
                no_improve_count += 1
                if no_improve_count >= self.max_change:
                    break
            else:
                xk = xkplus1

                # Q.append(self.transform_cost(xk,x))
                # f.append(
                #     self.learn_model.model.learner.predict(xk.reshape(1, -1)))
                # c.append(self.quadratic_cost(xk,x))
                # p.append(self.learn_model.model.learner.coef_.dot(xk) +
                #          self.learn_model.model.learner.intercept_)

        # print('xk shape: '+str(xk.shape))

        # Q = np.array(Q)
        # f = np.array(f)
        # c = np.array(c)
        # p = np.array(p).reshape((-1,))
        # pnc = p+c
        # print(p.shape)
        # print(c.shape)
        # print(pnc.shape)
        # t = np.array([i for i in range(len(Q))])
        # plt.plot(t,Q,'r', label='Q(x)')
        # plt.plot(t, f, 'b', label='sign(f(x))')
        # plt.plot( t,c ,'g', label='||x-xi||^2')
        # plt.plot(t, p, 'b--',label='w.T*x+b')
        # plt.plot(t, pnc, 'r--',
        #          label='w.T*x+b + ||x-xi||^2')
        # plt.legend()
        # plt.show()

        # ('mod succeeded')

        mat_indices = [x for x in range(0, self.num_features) if xk[x] != 0]
        new_instance = Instance(
            -1, BinaryFeatureVector(self.num_features, mat_indices))

        if self.learn_model.predict(
                new_instance) == self.learn_model.positive_classification:
            return instance
        else:
            return new_instance