예제 #1
0
    def _Q(self, inst_1: Instance, inst_2: Instance, derivative=False, k=-1):
        """
        Calculates Q_ij or partial Q_ij / partial x_k
        :param inst_1: the first instance
        :param inst_2: the second instance
        :param derivative: True -> calculate derivative, False -> calculate Q
        :param k: determines which derivative to calculate
        :return: Q_ij or the derivative where i corresponds to inst_1 and j
                 corresponds to inst_2
        """

        if inst_1.get_feature_count() != inst_2.get_feature_count():
            raise ValueError('Feature vectors need to have same length.')

        fvs = []
        for i in range(2):
            if i == 0:
                inst = inst_1
            else:
                inst = inst_2

            fvs.append(inst.get_feature_vector().get_csr_matrix())
            fvs[i] = np.array(fvs[i].todense().tolist()).flatten()

        if derivative:
            ret_val = self.kernel_derivative(fvs[0], fvs[1], k)
        else:
            ret_val = self.kernel(fvs[0], fvs[1])

        return inst_1.get_label() * inst_2.get_label() * ret_val
예제 #2
0
    def _Q(self, inst_1: Instance, inst_2: Instance, derivative=False, k=-1):
        """
        Calculates Q_ij or partial Q_ij / partial x_k
        :param inst_1: the first instance
        :param inst_2: the second instance
        :param derivative: True -> calculate derivative, False -> calculate Q
        :param k: determines which derivative to calculate
        :return: Q_ij or the derivative where i corresponds to inst_1 and j
                 corresponds to inst_2
        """

        if inst_1.get_feature_count() != inst_2.get_feature_count():
            raise ValueError('Feature vectors need to have same length.')

        fv = [[], []]
        for i in range(2):
            if i == 0:
                inst = inst_1
            else:
                inst = inst_2

            feature_vector = inst.get_feature_vector()
            for j in range(inst.get_feature_count()):
                if feature_vector.get_feature(j) == 0:
                    fv[i].append(0)
                else:
                    fv[i].append(1)

        if derivative:
            ret_val = self.kernel_derivative(np.array(fv[0]), np.array(fv[1]),
                                             k)
        else:
            ret_val = self.kernel(np.array(fv[0]), np.array(fv[1]))
        return inst_1.get_label() * inst_2.get_label() * ret_val
예제 #3
0
    def get_feature_vector_array(inst: Instance):
        """
        Turns the feature vector into an np.ndarray
        :param inst: the Instance
        :return: the feature vector (np.ndarray)
        """

        fv = inst.get_feature_vector()
        tmp = []
        for j in range(inst.get_feature_count()):
            if fv.get_feature(j) == 1:
                tmp.append(1)
            else:
                tmp.append(0)
        return np.array(tmp)
예제 #4
0
    def _calc_inst_loss(self, inst: Instance):
        """
        Calculates the logistic loss for one instance
        :param inst: the instance
        :return: the logistic loss
        """

        fv = []
        for i in range(inst.get_feature_count()):
            if inst.get_feature_vector().get_feature(i) == 1:
                fv.append(1)
            else:
                fv.append(0)
        fv = np.array(fv)

        # reshape is for the decision function when inputting only one sample
        loss = self.learner.model.learner.decision_function(fv.reshape(1, -1))
        loss *= -1 * inst.get_label()
        loss = math.log(1 + math.exp(loss))

        return loss
예제 #5
0
    def coordinate_greedy(self, instance: Instance):
        """
         Greedily update the feature to incrementally improve the attackers utility.
         run CS from L random starting points in the feature space. We repeat the
         alternation until differences of instances are small or max_change is
         reached.

         no_improve_count: number of points
         Q: transofrm cost(we use quodratic distance)
         GreedyImprove: using the coordinate descent algorithm.
        :param instance:
        :return: if the result is still classified as +1, we return origin instance
                 else we return the improved.
        """
        instance_len = instance.get_feature_count()
        if DEBUG:
            iteration_list = []
            Q_value_list = []

        x = xk = instance.get_csr_matrix().toarray()[0]

        # converge is used for checking convergance conditions
        # if the last convergence_time iterations all satisfy <= eplison condition
        # ,the attack successfully finds a optimum
        converge = 0

        for iteration_time in range(self.max_iteration):
            i = randint(0, instance_len - 1)

            #calcualte cost function and greediy improve from a random feature i
            xkplus1 = self.minimize_transform(xk, x, i)
            old_q = self.transform_cost(xk, x)
            new_q = self.transform_cost(xkplus1, x)

            # check whether Q_value actually descends and converges to a minimum
            # plot the iteration and Q_values using matplotlib
            #if DEBUG:
            #    iteration_list.append(iteration_time)
            #    Q_value_list.append(new_q)

            # if new_q < 0:
            #     print("Attack finishes because Q is less than 0")
            #     break

            if new_q - old_q <= 0:
                xk = xkplus1
                step_change = old_q - new_q
                # the np.log() may not converge in special cases
                # makes sure the cost function actually converges
                # alternative implementation?
                #step_change = np.log(new_q) / np.log(old_q)
                #step_change = np.log(old_q - new_q)

                if step_change <= self.epsilon:
                    converge += 1
                    if converge >= self.convergence_time:
                        #print("Attack finishes because of convergence!")
                        break

        #if DEBUG:
        #    plt.plot(iteration_list,Q_value_list)

        mat_indices = [x for x in range(0, self.num_features) if xk[x] != 0]
        mat_data = [xk[x] for x in range(0, self.num_features) if xk[x] != 0]
        new_instance = Instance(
            -1, RealFeatureVector(self.num_features, mat_indices, mat_data))
        return new_instance