def __init__(self, num_features, top_k_size, learning_rate): self.D = num_features self.w = np.array([0] * self.D) self.b = 0 self.learning_rate = learning_rate self.cms = CustomCountSketch(3, (1 << 18) - 1) self.top_k = TopK(top_k_size) self.loss_val = 0
class LogisticRegression(object): def __init__(self, num_features): self.D = num_features self.learning_rate = 0.5 # self.cms = CustomCountSketch(3, int(np.log(self.D) ** 2 / 3)) self.cms = CustomCountSketch(3, (1 << 18) - 1) self.top_k = TopK(1 << 14 - 1) def sigmoid(self, x): if x >= 0: return 1. / (1. + np.exp(-x)) else: return np.exp(x) / (1. + np.exp(x)) def loss(self, y, p): return -(y * math.log(p) + (1 - y) * math.log(1 - p)) def train_with_sketch(self, feature_pos, features, label): logit = 0 min_logit = float("inf") max_logit = float("-inf") for i in range(len(feature_pos)): # print("top k at pos {} value {}".format(feature_pos[i], self.top_k.get_item(feature_pos[i]))) val = self.top_k.get_value_for_key(feature_pos[i]) * features[i] if val > max_logit: max_logit = val if val < min_logit: min_logit = val logit += val if max_logit - min_logit == 0: max_logit = 1 min_logit = 0 normalized_weights = (logit - min_logit) / (max_logit - min_logit) print("normalized weights {}".format(normalized_weights)) sigm_val = self.sigmoid(normalized_weights) if sigm_val == 1.0: sigm_val = sigm_val - (1e-5) print("label {} sigmoid {}".format(label, sigm_val)) loss = self.loss(y=label, p=sigm_val) gradient = (label - sigm_val) if gradient != 0: for i in range(len(feature_pos)): # updating the change only on previous values updated_val = self.learning_rate * gradient * features[i] value = self.cms.update(feature_pos[i], updated_val) self.top_k.push(Node(feature_pos[i], value)) return loss def predict(self, feature_pos, feature_val): logit = 0 for i in range(len(feature_pos)): logit += self.top_k.get_value_for_key(feature_pos[i]) * feature_val[i] a = self.sigmoid(logit) if a > 0.5: return 1 else: return 0
def __init__(self, num_features): self.D = num_features self.learning_rate = 0.5 # self.cms = CustomCountSketch(3, int(np.log(self.D) ** 2 / 3)) self.cms = CustomCountSketch(3, (1 << 18) - 1) self.top_k = TopK(1 << 14 - 1)
class LogisticRegression(object): def __init__(self, num_features, top_k_size): self.D = num_features self.w = np.array([0] * self.D) self.b = 0 self.learning_rate = 5e-1 self.cms = CustomCountSketch(3, (1 << 18) - 1) # self.cms = CustomCountMinSketch(2, (1<<15) - 1) self.top_k = TopK(top_k_size) self.loss_val = 0 def sigmoid(self, x): if x >= 0: return 1. / (1. + np.exp(-x)) else: return np.exp(x) / (1. + np.exp(x)) def loss(self, y, p): return -(y * math.log(p) + (1 - y) * math.log(1 - p)) def train_with_sketch(self, feature_pos, features, label): logit = 0 min_logit = float("inf") max_logit = float("-inf") for i in range(len(feature_pos)): # print("top k at pos {} value {}".format(feature_pos[i], self.top_k.get_item(feature_pos[i]))) # multiplying w[i] with x[i] val = self.top_k.get_value_for_key(feature_pos[i]) * features[i] if val > max_logit: max_logit = val if val < min_logit: min_logit = val # calculating wTx logit += val if max_logit - min_logit == 0: max_logit = 1 min_logit = 0 normalized_weights = (logit - min_logit) / (max_logit - min_logit) sigm_val = self.sigmoid(normalized_weights) if sigm_val == 1.0: sigm_val = sigm_val - (1e-5) # print("label {} sigmoid {}".format(label, sigm_val)) gradient = (label - sigm_val) loss = self.loss(y=label, p=sigm_val) self.loss_val += loss if gradient != 0: for i in range(len(feature_pos)): updated_val = self.learning_rate * gradient * features[i] value = self.cms.update(feature_pos[i], updated_val) self.top_k.push(Node(feature_pos[i], value)) return loss def negative_log_likelihood(self, y, x): return -y * x / (1 + math.exp(y)) def predict(self, feature_pos, feature_val): logit = 0 for i in range(len(feature_pos)): logit += self.top_k.get_value_for_key( feature_pos[i]) * feature_val[i] a = self.sigmoid(logit) return a
def __init__(self, num_features): self.learning_rate = 5e-1 self.cms = CustomCountSketch(3, (1 << 18) - 1) self.top_k = TopK(num_features) self.loss_val = 0