Python CountSketch Exemples, src.sketches.count_sketch.CountSketch Python Exemples

Exemple #1

0

Afficher le fichier

class SparseRecovery(object):
    def __init__(self, n, d, rand_count):
        self.A = np.random.randn(n, d)
        self.d = d
        # self.A = np.ones((n, d))
        self.rand_count = rand_count
        self.countsketch = CountSketch(3, 10000)

    def run(self):
        print("A {}".format(self.A))
        for i in range(1):
            self.x = np.zeros((self.d, 1))
            for j in range(self.rand_count):
                pos = np.random.randint(self.d)
                self.x[pos] = np.random.rand() * np.random.randint(10000)
            self.non_zero_values = self.x[self.x > 0]
            self.non_zero_x, self.non_zero_y = np.where(self.x > 0)
            for j in range(len(self.x)):
                values = self.A[:,j]*self.x[j]
                for k in range(0, len(values)):
                    self.countsketch.update(k, values[k])
                # self.top_k.push(Node(i, value))
            print("non zero values {}".format(self.non_zero_values))
            print("non zero x {}".format(self.non_zero_x))
            print("printing heap")
            approximate_values = []
            for k in range(len(self.x)):
                approximate_values.append(self.countsketch.query(k))
            approximate_values = np.array(approximate_values)
            print(approximate_values.argsort()[-self.rand_count:][::-1])

Exemple #2

0

Afficher le fichier

Fichier : sparse_logistic_baseline.py Projet : neerajsharma9195/count-sketch-feature-selection

 def __init__(self, num_features):
     self.D = num_features
     self.learning_rate = 5e-1
     self.cms = CountSketch(3, (1 << 18) - 1)
     # self.cms = CountSketch(3, int(np.log(self.D) ** 2 / 3))
     self.top_k = TopK((1 << 14) - 1)
     self.loss_val = 0

Exemple #3

0

Afficher le fichier

Fichier : sparse_logisitic_auc_roc_count_sketch.py Projet : neerajsharma9195/count-sketch-feature-selection

 def __init__(self, num_features, top_k_size):
     self.D = num_features
     self.w = np.array([0] * self.D)
     self.b = 0
     self.learning_rate = 5e-1
     self.cms = CountSketch(3, (1 << 18) - 1)
     # self.cms = CustomCountMinSketch(2, (1<<15) - 1)
     self.top_k = TopK(top_k_size)
     self.loss_val = 0

Exemple #4

0

Afficher le fichier

def repetitions():
    for n in range(nodes):
        power_law_distribution[n] = power_law(k_min, k_max,
                                              np.random.uniform(0, 1), gamma)
    round_values = [int(round(item)) for item in power_law_distribution]
    pos_neg = [1, -1]
    random_numbers = [random.choice(pos_neg) * item for item in round_values]
    count_dict = Counter(random_numbers)
    actual_count_dict = count_dict
    count_dict = sorted(count_dict.items())
    ccms = ComplementaryCountMinSketch(4, 25)
    # top frequent items comparison
    cs = CountSketch(5, 50)
    for item in random_numbers:
        if item > 0:
            ccms.update(item)
            cs.update(item)
        else:
            ccms.update(abs(item), -1)
            cs.update(abs(item), -1)
    items = list(val[0] for val in count_dict)
    items = list(set(items))
    ccms_loss = 0
    cs_loss = 0
    for item in items:
        ccms_val = ccms.query(item)
        cs_val = cs.query(item)
        actual_count = actual_count_dict[item] - actual_count_dict[-item]
        ccms_loss += (actual_count - ccms_val)**2
        cs_loss += (actual_count - cs_val)**2
    ccms_losses.append(ccms_loss / len(items))
    cs_losses.append(cs_loss / len(items))

Exemple #5

0

Afficher le fichier

class MissionQuadreticLoss(object):
    def __init__(self, top_k_size):
        self.learning_rate = 0.2
        self.cms = CountSketch(3, 1000)
        # self.cms = CountSketch(3, int(np.log(self.D) ** 2 / 3))
        self.top_k = TopK(top_k_size)
        self.loss_val = 0

    def train_with_sketch(self, feature_pos, features, label):
        logit = 0
        for i in range(len(feature_pos)):
            val = self.top_k.get_value_for_key(feature_pos[i]) * features[i]
            # calculating wTx
            logit += val
        # print("label {} wx {}".format(label, logit))
        gradient = (label - logit)
        print("loss {}".format(gradient))
        if gradient != 0:
            for i in range(len(feature_pos)):
                updated_val = 2 * self.learning_rate * gradient * features[i]
                value = self.cms.update(feature_pos[i], updated_val)
                self.top_k.push(Node(feature_pos[i], value))
        return gradient

Exemple #6

0

Afficher le fichier

Fichier : sparse_recovery_plan_b.py Projet : neerajsharma9195/count-sketch-feature-selection

 def __init__(self, n, d, rand_count):
     self.A = np.random.randn(n, d)
     self.d = d
     # self.A = np.ones((n, d))
     self.rand_count = rand_count
     self.countsketch = CountSketch(3, 10000)

Exemple #7

0

Afficher le fichier

        print("Total loss")
        print("Loss for CCMS", ccms_loss)
        print("Loss for CS", cs_loss)
        print("Loss for CCMS Variant", ccms_variant_loss)
        print("Loss for CMS Variant Loss", cms_variant_loss)
        return [ccms_loss, cs_loss, ccms_variant_loss, cms_variant_loss]


if __name__ == '__main__':
    ccms_losses = []
    cs_losses = []
    ccms_variant_losses = []
    cms_variant_losses = []
    for i in range(1000):
        ccms = ComplementaryCountMinSketch(2, 25)
        ccms_variant = ComplementaryCountMinSketchVariant(2, 25)
        cms_variant = CountMinSketchVariant(4, 50)
        cs = CountSketch(4, 50)
        analysis = Comparison(ccms_variant, cs, cms_variant, ccms, 10000)
        analysis.run_sketch()
        analysis.compare_sketches()
        losses = analysis.calculate_loss()
        ccms_losses.append(losses[0])
        cs_losses.append(losses[1])
        ccms_variant_losses.append(losses[2])
        cms_variant_losses.append(losses[3])
    print("Mean ccms loss", np.mean(ccms_losses))
    print("Mean cs loss", np.mean(cs_losses))
    print("Mean ccms variant loss", np.mean(ccms_variant_losses))
    print("Mean cms variant loss", np.mean(cms_variant_losses))

Exemple #8

0

Afficher le fichier

 def __init__(self, num_features):
     self.D = num_features
     self.w = np.array([0] * self.D)
     self.b = 0
     self.learning_rate = 0.01
     self.cms = CountSketch(3, int(np.log(self.D) ** 2 / 3))

Exemple #9

0

Afficher le fichier

class OurLogisticRegression(object):
    def __init__(self, num_features):
        self.D = num_features
        self.w = np.array([0] * self.D)
        self.b = 0
        self.learning_rate = 0.01
        self.cms = CountSketch(3, int(np.log(self.D) ** 2 / 3))

    def sigmoid(self, x):
        if x >= 0:
            return 1. / (1. + np.exp(-x))
        else:
            return np.exp(x) / (1. + np.exp(x))

    def loss(self, y, p):
        return y * np.log(p) + (1 - y) * np.log(1 - p)

    def gradient(self, w, y, x, b):
        dw = (-y * x) / (1 + np.exp(y * (np.dot(x, w) + b)))
        db = -y / (1 + np.exp(y * (np.dot(x, w) + b)))
        return dw, db

    def train(self, X, y):
        y_hat = np.dot(X, self.w) + self.b
        loss = self.loss(y, self.sigmoid(y_hat))
        dw, db = self.gradient(self.w, y, X, self.b)
        self.w = self.w - self.learning_rate * dw
        self.b = self.b - self.learning_rate * db

    def train_with_sketch(self, X, y):
        y_hat = np.dot(X, self.w) + self.b
        loss = self.loss(y, self.sigmoid(y_hat))
        dw, db = self.gradient(self.w, y, X, self.b)
        self.w = self.w - self.learning_rate * dw
        self.b = self.b - self.learning_rate * db

    def predict(self, X):
        a = self.sigmoid(np.dot(X, self.w) + self.b)
        if a > 0.5:
            return 1
        else:
            return -1

    def gradient_using_sketch(self, X):
        for i in range(self.D):
            self.cms.update(i, self.w[i])
        dw, db = self.gradient(self.w, y, X, self.b)
        for i in range(self.D):
            self.cms.update(i, dw[i])
        # todo: update in top K

    def fit(self, X, y):
        num_features = X.shape[1]
        initial_wcb = np.zeros(shape=(2 * X.shape[1] + 1,))
        params, min_val_obj, grads = fmin_l_bfgs_b(func=self.objective,
                                                   args=(X, y), x0=initial_wcb,
                                                   disp=10,
                                                   maxiter=500,
                                                   fprime=self.objective_grad)
        print("params {}".format(params))
        print("min val obj {}".format(min_val_obj))
        print("grads dict {}".format(grads))

Exemple #10

0

Afficher le fichier

Fichier : sparse_logistic_baseline.py Projet : neerajsharma9195/count-sketch-feature-selection

class LogisticRegression(object):
    def __init__(self, num_features):
        self.D = num_features
        self.learning_rate = 5e-1
        self.cms = CountSketch(3, (1 << 18) - 1)
        # self.cms = CountSketch(3, int(np.log(self.D) ** 2 / 3))
        self.top_k = TopK((1 << 14) - 1)
        self.loss_val = 0

    def sigmoid(self, x):
        if x >= 0:
            return 1. / (1. + np.exp(-x))
        else:
            return np.exp(x) / (1. + np.exp(x))

    def loss(self, y, p):
        return - (y * math.log(p) + (1 - y) * math.log(1 - p))

    def train(self, X, y):
        y_hat = np.dot(X, self.w) + self.b
        loss = self.loss(y, self.sigmoid(y_hat))
        dw, db = self.gradient(self.w, y, X, self.b)
        self.w = self.w - self.learning_rate * dw
        self.b = self.b - self.learning_rate * db

    def train_with_sketch(self, feature_pos, features, label):
        logit = 0
        min_logit = float("inf")
        max_logit = float("-inf")
        print("number of features {}".format(len([i for i in range(0, len(features)) if features[i] > 0])))
        for i in range(len(feature_pos)):
            # print("top k at pos {} value {}".format(feature_pos[i], self.top_k.get_item(feature_pos[i])))
            # multiplying w[i] with x[i]
            val = self.top_k.get_value_for_key(feature_pos[i]) * features[i]
            if val > max_logit:
                max_logit = val
            if val < min_logit:
                min_logit = val
            # calculating wTx
            logit += val
        if max_logit - min_logit == 0:
            max_logit = 1
            min_logit = 0
        normalized_weights = (logit - min_logit) / (max_logit - min_logit)
        print("normalized weights {}".format(normalized_weights))
        sigm_val = self.sigmoid(normalized_weights)
        # if sigm_val == 1.0:
        #     sigm_val = sigm_val - (1e-5)
        print("label {} sigmoid {}".format(label, sigm_val))
        gradient = (label - sigm_val)
        loss = self.loss(y=label, p=sigm_val)
        self.loss_val += loss
        if gradient != 0:
            for i in range(len(feature_pos)):
                # updating the change only on previous values
                # if features[i] != 0 :
                updated_val = self.learning_rate * gradient * features[i]
                value = self.cms.update(feature_pos[i], updated_val)
                self.top_k.push(Node(feature_pos[i], value))
        return loss

    def negative_log_likelihood(self, y, x):
        return - y * x / (1 + math.exp(y))

    def predict(self, feature_pos, feature_val):
        logit = 0
        for i in range(len(feature_pos)):
            logit += self.top_k.get_value_for_key(feature_pos[i]) * feature_val[i]
        a = self.sigmoid(logit)
        if a > 0.5:
            return 1
        else:
            return 0

    def gradient_using_sketch(self, X):
        for i in range(self.D):
            self.cms.update(i, self.w[i])
        dw, db = self.gradient(self.w, y, X, self.b)
        for i in range(self.D):
            self.cms.update(i, dw[i])
        # todo: update in top K

    def fit(self, X, y):
        num_features = X.shape[1]
        initial_wcb = np.zeros(shape=(2 * X.shape[1] + 1,))
        params, min_val_obj, grads = fmin_l_bfgs_b(func=self.objective,
                                                   args=(X, y), x0=initial_wcb,
                                                   disp=10,
                                                   maxiter=500,
                                                   fprime=self.objective_grad)
        print("params {}".format(params))
        print("min val obj {}".format(min_val_obj))
        print("grads dict {}".format(grads))

Exemple #11

0

Afficher le fichier

 def __init__(self, top_k_size):
     self.learning_rate = 0.2
     self.cms = CountSketch(3, 1000)
     # self.cms = CountSketch(3, int(np.log(self.D) ** 2 / 3))
     self.top_k = TopK(top_k_size)
     self.loss_val = 0

Exemple #12

0

Afficher le fichier

Fichier : sparse_logisitic_auc_roc_count_sketch.py Projet : neerajsharma9195/count-sketch-feature-selection

class LogisticRegression(object):
    def __init__(self, num_features, top_k_size):
        self.D = num_features
        self.w = np.array([0] * self.D)
        self.b = 0
        self.learning_rate = 5e-1
        self.cms = CountSketch(3, (1 << 18) - 1)
        # self.cms = CustomCountMinSketch(2, (1<<15) - 1)
        self.top_k = TopK(top_k_size)
        self.loss_val = 0

    def sigmoid(self, x):
        if x >= 0:
            return 1. / (1. + np.exp(-x))
        else:
            return np.exp(x) / (1. + np.exp(x))

    def loss(self, y, p):
        return -(y * math.log(p) + (1 - y) * math.log(1 - p))

    def train_with_sketch(self, feature_pos, features, label):
        logit = 0
        min_logit = float("inf")
        max_logit = float("-inf")
        for i in range(len(feature_pos)):
            # print("top k at pos {} value {}".format(feature_pos[i], self.top_k.get_item(feature_pos[i])))
            # multiplying w[i] with x[i]
            val = self.top_k.get_value_for_key(feature_pos[i]) * features[i]
            if val > max_logit:
                max_logit = val
            if val < min_logit:
                min_logit = val
            # calculating wTx
            logit += val
        if max_logit - min_logit == 0:
            max_logit = 1
            min_logit = 0
        normalized_weights = (logit - min_logit) / (max_logit - min_logit)
        sigm_val = self.sigmoid(normalized_weights)
        if sigm_val == 1.0:
            sigm_val = sigm_val - (1e-5)
        # print("label {} sigmoid {}".format(label, sigm_val))
        gradient = (label - sigm_val)
        loss = self.loss(y=label, p=sigm_val)
        self.loss_val += loss
        if gradient != 0:
            for i in range(len(feature_pos)):
                updated_val = self.learning_rate * gradient * features[i]
                value = self.cms.update(feature_pos[i], updated_val)
                self.top_k.push(Node(feature_pos[i], value))
        return loss

    def negative_log_likelihood(self, y, x):
        return -y * x / (1 + math.exp(y))

    def predict(self, feature_pos, feature_val):
        logit = 0
        for i in range(len(feature_pos)):
            logit += self.top_k.get_value_for_key(
                feature_pos[i]) * feature_val[i]
        a = self.sigmoid(logit)
        return a