Beispiel #1
0
 def __init__(self, num_features, top_k_size):
     self.D = num_features
     self.learning_rate = 5e-1
     self.cms = CustomCountMinSketch(2, (1 << 15) - 1)
     # self.cms = CustomCountMinSketch(2, (1<<15) - 1)
     self.top_k = TopK(top_k_size)
     self.loss_val = 0
Beispiel #2
0
class LogisticRegression(object):
    def __init__(self, count_sketch_size, top_k, feature_size):
        self.learning_rate = 0.5
        self.cms = CustomCountMinSketch(2, count_sketch_size)
        self.top_k = TopK(top_k)
        self.recovered_weight_vector = [0] * feature_size

    def sigmoid(self, x):
        if x >= 0:
            return 1. / (1. + np.exp(-x))
        else:
            return np.exp(x) / (1. + np.exp(x))

    def loss(self, y, p):
        return -(y * math.log(p) + (1 - y) * math.log(1 - p))

    def train_with_sketch(self, feature_pos, features, label):
        logit = 0
        min_logit = float("inf")
        max_logit = float("-inf")
        for i in range(len(feature_pos)):
            # print("top k at pos {} value {}".format(feature_pos[i], self.top_k.get_item(feature_pos[i])))
            val = self.top_k.get_value_for_key(feature_pos[i]) * features[i]
            if val > max_logit:
                max_logit = val
            if val < min_logit:
                min_logit = val
            logit += val
        if max_logit - min_logit == 0:
            max_logit = 1
            min_logit = 0
        normalized_weights = (logit - min_logit) / (max_logit - min_logit)
        print("normalized weights {}".format(normalized_weights))
        sigm_val = self.sigmoid(normalized_weights)
        print("label {} sigmoid {}".format(label, sigm_val))
        loss = self.loss(y=label, p=sigm_val)
        diff_label = (label - sigm_val)  # difference in label
        if diff_label != 0:
            for i in range(len(feature_pos)):
                # updating the change only on previous values
                grad_update = self.learning_rate * diff_label * features[i]
                value = self.cms.update(feature_pos[i], grad_update)
                self.top_k.push(Node(feature_pos[i], value))
        return loss

    def predict(self, feature_pos, feature_val):
        logit = 0
        for i in range(len(feature_pos)):
            logit += self.top_k.get_value_for_key(feature_pos[i]) * feature_val[i]
        a = self.sigmoid(logit)
        if a > 0.5:
            return 1
        else:
            return 0

    def sparse_recovery(self, feature_pos, feature_vals, label):
        for i in range(len(feature_pos)):
            cumulative_grad_val = self.cms.query(feature_pos[i])
            self.recovered_weight_vector[feature_pos[i]-1] += cumulative_grad_val / feature_vals[i]
 def __init__(self, num_features, top_k_size, learning_rate):
     self.D = num_features
     self.w = np.array([0] * self.D)
     self.b = 0
     self.learning_rate = learning_rate
     self.cms = CustomCountMinSketch(2, (1 << 15) - 1)
     self.top_k = TopK(top_k_size)
     self.loss_val = 0
Beispiel #4
0
class LogisticRegression(object):
    def __init__(self, num_features, top_k_size):
        self.D = num_features
        self.learning_rate = 5e-1
        self.cms = CustomCountMinSketch(2, (1 << 15) - 1)
        # self.cms = CustomCountMinSketch(2, (1<<15) - 1)
        self.top_k = TopK(top_k_size)
        self.loss_val = 0

    def sigmoid(self, x):
        if x >= 0:
            return 1. / (1. + np.exp(-x))
        else:
            return np.exp(x) / (1. + np.exp(x))

    def loss(self, y, p):
        return -(y * math.log(p) + (1 - y) * math.log(1 - p))

    def train_with_sketch(self, feature_pos, features, label):
        logit = 0
        min_logit = float("inf")
        max_logit = float("-inf")
        for i in range(len(feature_pos)):
            # print("top k at pos {} value {}".format(feature_pos[i], self.top_k.get_item(feature_pos[i])))
            # multiplying w[i] with x[i]
            val = self.top_k.get_value_for_key(feature_pos[i]) * features[i]
            if val > max_logit:
                max_logit = val
            if val < min_logit:
                min_logit = val
            # calculating wTx
            logit += val
        if max_logit - min_logit == 0:
            max_logit = 1
            min_logit = 0
        normalized_weights = (logit - min_logit) / (max_logit - min_logit)
        sigm_val = self.sigmoid(normalized_weights)
        if sigm_val == 1.0:
            sigm_val = sigm_val - (1e-5)
        # print("label {} sigmoid {}".format(label, sigm_val))
        gradient = (label - sigm_val)
        loss = self.loss(y=label, p=sigm_val)
        self.loss_val += loss
        if gradient != 0:
            for i in range(len(feature_pos)):
                updated_val = self.learning_rate * gradient * features[i]
                value = self.cms.update(feature_pos[i], updated_val)
                self.top_k.push(Node(feature_pos[i], value))
        return loss

    def negative_log_likelihood(self, y, x):
        return -y * x / (1 + math.exp(y))

    def predict(self, feature_pos, feature_val):
        logit = 0
        for i in range(len(feature_pos)):
            logit += self.top_k.get_value_for_key(
                feature_pos[i]) * feature_val[i]
        a = self.sigmoid(logit)
        return a
class LogisticRegression(object):
    def __init__(self, num_features):
        self.D = num_features
        self.learning_rate = 5e-1
        # self.cms = CustomCountMinSketch(3, int(np.log(self.D) ** 2 / 3))
        self.cms = CustomCountMinSketch(2, (1 << 18) - 1)
        self.top_k = TopK(1 << 14 - 1)

    def sigmoid(self, x):
        if x >= 0:
            return 1. / (1. + np.exp(-x))
        else:
            return np.exp(x) / (1. + np.exp(x))

    def loss(self, y, p):
        return -(y * math.log(p) + (1 - y) * math.log(1 - p))

    def train_with_sketch(self, feature_pos, features, label):
        logit = 0
        min_logit = float("inf")
        max_logit = float("-inf")
        for i in range(len(feature_pos)):
            # print("top k at pos {} value {}".format(feature_pos[i], self.top_k.get_item(feature_pos[i])))
            val = self.top_k.get_value_for_key(feature_pos[i]) * features[i]
            if val > max_logit:
                max_logit = val
            if val < min_logit:
                min_logit = val
            logit += val
        if max_logit - min_logit == 0:
            max_logit = 1
            min_logit = 0
        normalized_weights = (logit - min_logit) / (max_logit - min_logit)
        print("normalized weights {}".format(normalized_weights))
        sigm_val = self.sigmoid(normalized_weights)
        print("label {} sigmoid {}".format(label, sigm_val))
        loss = self.loss(y=label, p=sigm_val)
        gradient = (label - sigm_val)
        if gradient != 0:
            for i in range(len(feature_pos)):
                # updating the change only on previous values
                updated_val = self.learning_rate * gradient * features[i]
                value = self.cms.update(feature_pos[i], updated_val)
                self.top_k.push(Node(feature_pos[i], value))
        return loss

    def predict(self, feature_pos, feature_val):
        logit = 0
        for i in range(len(feature_pos)):
            logit += self.top_k.get_value_for_key(
                feature_pos[i]) * feature_val[i]
        a = self.sigmoid(logit)
        if a > 0.5:
            return 1
        else:
            return 0
 def __init__(self, num_features):
     self.D = num_features
     self.learning_rate = 5e-1
     # self.cms = CustomCountMinSketch(3, int(np.log(self.D) ** 2 / 3))
     self.cms = CustomCountMinSketch(2, (1 << 18) - 1)
     self.top_k = TopK(1 << 14 - 1)
Beispiel #7
0
 def __init__(self, count_sketch_size, top_k, feature_size):
     self.learning_rate = 0.5
     self.cms = CustomCountMinSketch(2, count_sketch_size)
     self.top_k = TopK(top_k)
     self.recovered_weight_vector = [0] * feature_size