Example #1
0
 def __init__(self, params):
     super().__init__(params)
     self.loc = params[0]
     self.scale = np.exp(params[1])
     self.var = self.scale**2
     self.df = np.exp(params[2])
     self.dist = dist(loc=self.loc, scale=self.scale, df=self.df)
Example #2
0
    def __init__(self, params, temp_scale=1.0):
        self.loc = params[0]
        self.scale = np.exp(params[1] / temp_scale) + 1e-8
        self.var = self.scale**2 + 1e-8
        self.shp = self.loc.shape

        self.dist = dist(loc=self.loc, scale=self.scale)
Example #3
0
    def __init__(self, params):
        # save the parameters
        self._params = params

        # create other objects that will be useful later
        self.logmu = params[0]
        self.mu = np.exp(self.logmu)
        self.dist = dist(mu=self.mu)
Example #4
0
 def __init__(self, params):
     super().__init__(params)
     self.loc = params[0]
     self.scale = np.exp(params[1])
     self.var = self.scale**2
     # fixed df
     self.df = np.ones_like(self.loc) * self.fixed_df
     self.dist = dist(loc=self.loc, scale=self.scale, df=self.df)
Example #5
0
    def __init__(self, params, temp_scale = 1.0):
        mu = params[0]
        logsigma = params[1]
        sigma = np.exp(logsigma)

        self.mu = mu
        self.sigma = sigma

        self.dist = dist(s=sigma, scale=np.exp(mu))
Example #6
0
 def __init__(self, params):
     super().__init__(params)
     self.loc = params[0]
     #Sankalp: adding the clipping line on 10/16/2020.
     q = np.clip(params[1], a_min=-1e1, a_max=1e1)
     self.scale = np.exp(q)
     #Sankalp: adding the clipping line on 10/16/2020.
     self.scale = np.clip(self.scale, a_min=0, a_max=1e1)
     self.var = np.float_power(self.scale, 2)
     self.dist = dist(loc=self.loc, scale=self.scale)
Example #7
0
def generate_mv_surv(N, mu0, mu1, S00, S11, S01):
    mu = [mu0, mu1]
    S = [[S00, S01], [S10, S11]]

    R = dist(mean=mu, cov=S)
    raw = np.array([R.rvs() for _ in range(N)])
    T = np.min(raw, axis=1)
    E = raw[:, 0] < raw[:, 1]

    print('Prevalence:', np.mean(E))
    print('mu_E:', mu0)
    print('Marginal Mean of E:', np.mean(T[np.where(E == 1)]))
    print('mu_C:', mu1)
    print('Marginal Mean of C:', np.mean(T[np.where(E == 0)]))

    Y = Y_join(T, E)
    return Y
Example #8
0
 def __init__(self, params):
     self.scale = np.exp(params[0])
     self.dist = dist(scale=self.scale)
    def update_records(self, update_messages, human):
        if not update_messages:
            return self
        grouped_update_messages = self.group_by_received_at(update_messages)
        for received_at, update_messages in grouped_update_messages.items():

            # num days x num clusters
            cluster_cards = np.zeros((max(self.clusters_by_day.keys())+1,  max(self.clusters.keys())+1))
            update_cards = np.zeros((max(self.clusters_by_day.keys())+1, 1))

            # figure out the cardinality of each day's message set
            for day, clusters in self.clusters_by_day.items():
                for cluster_id, messages in clusters.items():
                    cluster_cards[day][cluster_id] = len(messages)

            for update_message in update_messages:
                update_cards[update_message.day] += 1

            # find the nearest cardinality cluster
            perfect_signatures = np.where((cluster_cards == update_cards).all(axis=0))[0]
            if not any(perfect_signatures):
                # calculate the wasserstein distance between every signature
                scores = []
                for cluster_idx in range(cluster_cards.shape[1]):
                    scores.append(dist(cluster_cards[:, cluster_idx], update_cards.reshape(-1)))
                best_cluster = int(np.argmin(scores))

                # for each day
                for day in range(len(update_cards)):
                    cur_cardinality = int(cluster_cards[day, best_cluster])
                    target_cardinality = int(update_cards[day])

                    # if (and while) the cardinality is not what it should be, as determined by the update_messages
                    while cur_cardinality - target_cardinality != 0:
                        # print(f"day: {day}, cur_cardinality: {cur_cardinality}, target_cardinality: {target_cardinality}")
                        # if we need to remove messages from this cluster on this day,
                        if cur_cardinality > target_cardinality:
                            best_score = -1
                            best_message = None
                            new_cluster_id = None

                            # then for each message in that day/cluster,
                            for message in self.clusters_by_day[day][best_cluster]:
                                for cluster_id, messages in self.clusters_by_day[day].items():
                                    if cluster_id == best_cluster:
                                        continue

                                    # and for each alternative cluster on that day
                                    for candidate_cluster_message in messages:
                                        # check if it's a good cluster to move this message to
                                        score = self.score_two_messages(decode_message(candidate_cluster_message), message)
                                        if (score > best_score or not best_message):
                                            best_message = message
                                            new_cluster_id = cluster_id

                            # if there are no other clusters on that day make a new cluster
                            if not best_message:
                                best_message = message
                                message = decode_message(message)
                                new_cluster_id = hash_to_cluster(message)
                            best_message = decode_message(best_message)

                            # for the message which best fits another cluster, move it there
                            self.update_record(best_cluster, new_cluster_id, best_message, best_message)
                            cur_cardinality -= 1
                            # print(f"removing from cluster {best_cluster} to cluster {new_cluster_id} on day {day}")

                        #otherwise we need to add messages to this cluster/day
                        else:
                            # so look for messages which closely match our update messages, and add them
                            for update_message in update_messages:
                                if update_message.day == day:
                                    break
                            best_score = -2
                            best_message = None
                            old_cluster_id = None
                            for cluster_id, messages in self.clusters_by_day[day].items():
                                for message in messages:
                                    score = self.score_two_messages(update_message, message)
                                    if (score > best_score and cluster_id != best_cluster):
                                        best_message = message
                                        old_cluster_id = cluster_id

                            best_message = decode_message(best_message)
                            updated_message = Message(best_message.uid, update_message.new_risk, best_message.day, best_message.unobs_id)
                            # print(f"adding from cluster {old_cluster_id} to cluster {best_cluster} on day {day}")
                            self.update_record(old_cluster_id, best_cluster, best_message, updated_message)
                            cur_cardinality += 1
            else:
                best_cluster = self.score_clusters(update_messages, perfect_signatures)
            for update_message in update_messages:
                best_score = -1
                best_message = self.clusters_by_day[update_message.day][best_cluster][0]
                for risk_message in self.clusters_by_day[update_message.day][best_cluster]:
                    score = self.score_two_messages(update_message, risk_message)
                    if score > best_score:
                        best_message = risk_message
                best_message = decode_message(best_message)
                updated_message = Message(best_message.uid, update_message.new_risk, best_message.day, best_message.unobs_id)
                self.update_record(best_cluster, best_cluster, best_message, updated_message)
        return self
Example #10
0
 def __init__(self, params):
     self._params = params
     self.loc = params[0]
     self.scale = np.exp(params[1])
     self.dist = dist(s=self.scale, scale=np.exp(self.loc))
     self.eps = 1e-5
Example #11
0
 def __init__(self, params):
     self.loc = params[0]
     self.var = np.ones_like(self.loc)
     self.scale = np.ones_like(self.loc)
     self.shape = self.loc.shape
     self.dist = dist(loc=self.loc, scale=self.scale)
Example #12
0
 def __init__(self, params):
     self._params = params
     self.loc = params[0]
     self.logscale = params[1]
     self.scale = np.exp(params[1])
     self.dist = dist(loc=self.loc, scale=self.scale)
Example #13
0
 def __init__(self, params):
     self.logit = params[0]
     self.prob = sp.special.expit(self.logit)
     self.dist = dist(p=self.prob)
Example #14
0
 def __init__(self, params, temp_scale=1.0):
     self.loc = params[0]
     self.scale = np.exp(params[1])
     self.dist = dist(s=self.scale, scale=np.exp(self.loc))
Example #15
0
            'lognorm': res['lognorm']
        }
        results = pd.concat([results, pd.DataFrame(out, index=[0])],
                            axis=0,
                            sort=False)
        results.reset_index(drop=True).to_csv(logfile, index=False)

    #     joint[i] = res['joint']
    #     lognorm[i] = res['lognorm']

    # plt.plot(covs, joint, label='joint')
    # plt.plot(covs, single, label='lognormal')
    # plt.ylabel('est. E')
    # plt.xlabel('cov(E, C)')
    # plt.legend()
    # plt.show()

    mu = [mu0, mu1]
    S = [[S00, S01], [S10, S11]]

    R = dist(mean=mu, cov=S)
    raw = np.array([R.rvs() for _ in range(N)])
    T = np.min(raw, axis=1)
    E = raw[:, 0] < raw[:, 1]

    print('Prevalence:', np.mean(E))
    print('mu_E:', mu0)
    print('Marginal Mean of E:', np.mean(T[np.where(E == 1)]))
    print('mu_C:', mu1)
    print('Marginal Mean of C:', np.mean(T[np.where(E == 0)]))
Example #16
0
 def __init__(self, params):  # pylint: disable=super-init-not-called
     self._params = params
     self.scale = np.exp(params[0])
     self.dist = dist(scale=self.scale)
Example #17
0
    def __init__(self, params):
        self.loc = 0
        self.scale = np.exp(params[0])
        self.shp = self.scale.shape

        self.dist = dist(scale=self.scale)
Example #18
0
        "passer_player_id",
        "defteam",
    ])

    target = "pass_touchdown"
    X = df.drop(columns=[target]).values
    Y = df[target].values

    X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                        Y,
                                                        test_size=0.2,
                                                        random_state=SEED)

    # baseline not using predictor data
    avg_tds = np.mean(Y_train)
    y_dist = dist(avg_tds)
    naive_NLL = -y_dist.logpmf(Y_test).mean()

    print("Mean squared error using only the mean: {:.4f}".format(
        mean_squared_error(np.repeat(avg_tds, len(Y_test)), Y_test)))
    print(
        "Poisson negative log liklihood without using predictor variables: {:.4f}"
        .format(naive_NLL))

    ngb = NGBRegressor(Dist=Poisson)

    ngb.fit(X_train, Y_train)

    Y_preds = ngb.predict(X_test)
    Y_dists = ngb.pred_dist(X_test)
Example #19
0
 def __init__(self, params):
     self.loc = params[0]
     self.scale = np.exp(params[1])
     self.var = self.scale**2
     self.dist = dist(loc=self.loc, scale=self.scale)