def test_distributions_normal_json_serialization(): d = NormalDistribution(5, 2) e = Distribution.from_json(d.to_json()) assert_equal(e.name, "NormalDistribution") assert_array_equal(e.parameters, [5, 2]) assert_array_equal(e.summaries, [0, 0, 0])
def test_distributions_normal_weighted_fit(): d = NormalDistribution(5, 2) d.fit([0, 2, 3, 2, 100], weights=[0, 5, 2, 3, 200]) assert_array_almost_equal(d.parameters, [95.3429, 20.8276], 4) assert_almost_equal(d.log_probability(50), -6.32501194) assert_array_equal(d.summaries, [0, 0, 0])
def load_segmentation_model(modeldata): model = HiddenMarkovModel('model') states = {} for s in modeldata: if len(s['emission']) == 1: emission = NormalDistribution(*s['emission'][0][:2]) else: weights = np.array([w for _, _, w in s['emission']]) dists = [NormalDistribution(mu, sigma) for mu, sigma, _ in s['emission']] emission = GeneralMixtureModel(dists, weights=weights) state = State(emission, name=s['name']) states[s['name']] = state model.add_state(state) if 'start_prob' in s: model.add_transition(model.start, state, s['start_prob']) for s in modeldata: current = states[s['name']] for nextstate, prob in s['transition']: model.add_transition(current, states[nextstate], prob) model.bake() return model
def test_distributions_normal_json_serialization(): d = NormalDistribution(5, 2) e = Distribution.from_json(d.to_json()) assert_equal(e.name, "NormalDistribution") assert_array_equal(e.parameters, [5, 2]) assert_array_equal(e.summaries, [0, 0, 0])
def test_distributions_normal_freeze_fit(): d = NormalDistribution(5, 2) d.freeze() d.fit([0, 1, 1, 2, 3, 2, 1, 2, 2]) assert_array_almost_equal(d.parameters, [5, 2]) assert_array_equal(d.summaries, [0, 0, 0])
def test_distributions_normal_weighted_fit(): d = NormalDistribution(5, 2) d.fit([0, 2, 3, 2, 100], weights=[0, 5, 2, 3, 200]) assert_array_almost_equal(d.parameters, [95.3429, 20.8276], 4) assert_almost_equal(d.log_probability(50), -6.32501194) assert_array_equal(d.summaries, [0, 0, 0])
def test_distributions_normal_inertia_fit(): d = NormalDistribution(5, 2) d.fit([0, 5, 3, 5, 7, 3, 4, 5, 2], inertia=0.5) assert_array_almost_equal(d.parameters, [4.3889, 1.9655], 4) assert_array_equal(d.summaries, [0, 0, 0])
def test_distributions_normal_inertia_fit(): d = NormalDistribution(5, 2) d.fit([0, 5, 3, 5, 7, 3, 4, 5, 2], inertia=0.5) assert_array_almost_equal(d.parameters, [4.3889, 1.9655], 4) assert_array_equal(d.summaries, [0, 0, 0])
def test_distributions_normal_random_sample(): d = NormalDistribution(0, 1) x = numpy.array([ 0.44122749, -0.33087015, 2.43077119, -0.25209213, 0.10960984]) assert_array_almost_equal(d.sample(5, random_state=5), x) assert_raises(AssertionError, assert_array_almost_equal, d.sample(5), x)
def test_distributions_normal_random_sample(): d = NormalDistribution(0, 1) x = numpy.array([ 0.44122749, -0.33087015, 2.43077119, -0.25209213, 0.10960984]) assert_array_almost_equal(d.sample(5, random_state=5), x) assert_raises(AssertionError, assert_array_almost_equal, d.sample(5), x)
def test_sample_from_site(): dists = [ NormalDistribution(5, 1), NormalDistribution(1, 7), NormalDistribution(8, 2) ] trans_mat = np.array([[0.7, 0.3, 0.0], [0.0, 0.8, 0.2], [0.0, 0.0, 0.9]]) starts = np.array([1.0, 0.0, 0.0]) ends = np.array([0.0, 0.0, 0.1]) model = HiddenMarkovModel.from_matrix(trans_mat, dists, starts, ends) model.plot()
def test_distributions_normal_probability(): d = NormalDistribution(5, 2) e = NormalDistribution(5., 2.) assert_almost_equal(d.probability(5), 0.19947114) assert_equal(d.probability(5), e.probability(5)) assert_equal(d.probability(5), d.probability(5.)) assert_almost_equal(d.probability(0), 0.0087641502) assert_equal(d.probability(0), e.probability(0.))
def test_distributions_normal_log_probability(): d = NormalDistribution(5, 2) e = NormalDistribution(5., 2.) assert_almost_equal(d.log_probability(5), -1.61208571) assert_equal(d.log_probability(5), e.log_probability(5)) assert_equal(d.log_probability(5), d.log_probability(5.)) assert_almost_equal(d.log_probability(0), -4.737085713764219) assert_equal(d.log_probability(0), e.log_probability(0.))
def test_distributions_normal_freeze_thaw_fit(): d = NormalDistribution(5, 2) d.freeze() d.thaw() d.fit([5, 4, 5, 4, 6, 5, 6, 5, 4, 6, 5, 4]) assert_array_almost_equal(d.parameters, [4.9166, 0.7592], 4)
def test_distributions_normal_nan_fit(): d = NormalDistribution(5, 2) e = NormalDistribution(5, 2) d.fit([5, 4, nan, 5, 4, nan, 6, 5, 6, nan, nan, 5, 4, 6, nan, 5, 4, nan]) assert_array_almost_equal(d.parameters, [4.9167, 0.7592], 4) assert_not_equal(d.log_probability(4), e.log_probability(4)) assert_almost_equal(d.log_probability(4), -1.3723678499651766) assert_almost_equal(d.log_probability(18), -149.13140399454429) assert_almost_equal(d.log_probability(1e8), -8674697942168743.0, -4) assert_array_equal(d.summaries, [0, 0, 0])
def plot_HalfNorm(): X = np.random.sample(size=(100000, 1)) dn = NormalDistribution(0, 1) dhn1 = HalfNormalDistribution(0.5) dhn2 = HalfNormalDistribution.from_samples(X) dhn3 = HalfNormalDistribution(1) x = np.arange(-5, 5, 0.1) fig, ax = plt.subplots(figsize=(7, 4)) ax.plot(x, dn.probability(x), label='Normal') ax.plot(x, set_y(x, dhn1), label='HalfNorm, s=0.5') ax.plot(x, set_y(x, dhn2), label='HalfNorm, s=rand') ax.plot(x, set_y(x, dhn3), label='HalfNorm, s=1') ax.set_ylabel('Probability', fontsize=10) ax.legend(fontsize=10) plt.savefig('/scratch/chd5n/test.png', bbox_inches='tight') print('plot written to', '/scratch/chd5n/test.png')
def oriHMMParams(self): """ Set initial parameters for the Hidden Markov Model (HMM). Attributes ---------- HMMParams : dict Has 3 keys: "A", state transition matrix, "B" (emission probabilities), specifying parameters (Means, Variances, Weights) of the mixture Gaussian distributions for each hidden state, and "pi", indicating the hidden state weights. This dict will be updated after learning procedure. """ hmm = HiddenMarkovModel() # GMM emissions # 5 Hidden States: # 0--start, 1--downstream, 2--no bias, 3--upstream, 4--end numdists = 3 # Three-distribution Gaussian Mixtures var = 7.5 / (numdists - 1) means = [[], [], [], [], []] for i in range(numdists): means[4].append(i * 7.5 / (numdists - 1) + 2.5) means[3].append(i * 7.5 / (numdists - 1)) means[2].append((i - (numdists - 1) / 2) * 7.5 / (numdists - 1)) means[1].append(-i * 7.5 / (numdists - 1)) means[0].append(-i * 7.5 / (numdists - 1) - 2.5) states = [] for i, m in enumerate(means): tmp = [] for j in m: tmp.append(NormalDistribution(j, var)) mixture = GeneralMixtureModel(tmp) states.append(State(mixture, name=str(i))) hmm.add_states(*tuple(states)) # Transmission matrix #A = [[0., 1., 0., 0., 0.], # [0., 0.4, 0.3, 0.3, 0.], # [0.05, 0., 0.5, 0.45, 0.], # [0., 0., 0., 0.5, 0.5], # [0.99, 0., 0.01, 0., 0.]] hmm.add_transition(states[0], states[1], 1) hmm.add_transition(states[1], states[1], 0.4) hmm.add_transition(states[1], states[2], 0.3) hmm.add_transition(states[1], states[3], 0.3) hmm.add_transition(states[2], states[0], 0.05) hmm.add_transition(states[2], states[2], 0.5) hmm.add_transition(states[2], states[3], 0.45) hmm.add_transition(states[3], states[3], 0.5) hmm.add_transition(states[3], states[4], 0.5) hmm.add_transition(states[4], states[0], 0.99) hmm.add_transition(states[4], states[2], 0.01) pi = [0.05, 0.3, 0.3, 0.3, 0.05] for i in range(len(states)): hmm.add_transition(hmm.start, states[i], pi[i]) hmm.bake() return hmm
def test_distributions_normal_pickle_serialization(): d = NormalDistribution(5, 2) e = pickle.loads(pickle.dumps(d)) assert_equal(e.name, "NormalDistribution") assert_array_equal(e.parameters, [5, 2]) assert_array_equal(e.summaries, [0, 0, 0])
def test_distributions_normal_exclusive_nan_fit(): d = NormalDistribution(5, 2) e = NormalDistribution(5, 2) d.fit([nan, nan, nan, nan, nan]) assert_array_equal(d.parameters, [5, 2]) assert_almost_equal(d.log_probability(4), e.log_probability(4.)) assert_array_equal(d.summaries, [0, 0, 0])
def init_cycle_hmm(sequences, steps, states_per_step, model_id): """ insantiate a left-right model with random parameters randomly generates start and transition matrices generates nomal distrobutions for each state from partition on sequences """ model = HiddenMarkovModel(model_id) n_states = steps * states_per_step # make distrobutions from chronological subsets of timepoints step_size = int(math.ceil(sequences.shape[1] / float(n_states+1))) # generate states states = np.empty((steps, states_per_step), dtype=object) for i in range(steps): for j in range(states_per_step): temp_assignment = np.arange(step_size * i, step_size * (i+1)) dist = \ NormalDistribution.from_samples(sequences[:, temp_assignment]) state_name = str(i) + '-' + str(j) states[i, j] = State(dist, name=str(state_name)) # add states to model model.add_states(states.flatten().tolist()) # make random transition from start -> step0 trans = np.random.ranf(n_states) trans = trans / trans.sum() for i, state in enumerate(states.flatten().tolist()): model.add_transition(model.start, state, trans[i]) # make random transition from step(i) -> step(i+1) for i in range(steps-1): for j in range(states_per_step): trans = np.random.ranf(states_per_step + 1) trans = trans / trans.sum() # self transition model.add_transition(states[i, j], states[i, j], trans[0]) # out transition for x in range(states_per_step): model.add_transition(states[i, j], states[i + 1, x], trans[x + 1]) # make random transition from stepn -> step0 for j in range(states_per_step): trans = np.random.ranf(states_per_step + 1) trans = trans / trans.sum() # self transition model.add_transition(states[(steps - 1), j], states[(steps - 1), j], trans[0]) # out transition for x in range(states_per_step): model.add_transition(states[(steps - 1), j], states[0, x], trans[x + 1]) model.bake() print 'Initialized Cyclic State HMM:', '[', \ steps, states_per_step, ']' return model
def test_distributions_normal_freeze_thaw_fit(): d = NormalDistribution(5, 2) d.freeze() d.thaw() d.fit([5, 4, 5, 4, 6, 5, 6, 5, 4, 6, 5, 4]) assert_array_almost_equal(d.parameters, [4.9166, 0.7592], 4)
def init_model(start_dip, stay_state, mean_eu, sd_eu, mean_loh): ## define distributions d_eu = NormalDistribution(mean_eu, sd_eu) ## euploid enriched at 0 d_loh = NormalDistribution(mean_loh, sd_eu) ## loss of heterozygosity enriched at 1 d_aneu = NormalDistribution(mean_loh / 2.0, sd_eu * 1.4) ## aneuploid enriched at 1 ## define states s_eu = State(d_eu, name='EU') ## enriched at 0 s_loh = State(d_loh, name='LOH') ## enriched at 1 s_aneu = State(d_aneu, name='ANEU') ## enriched at 1 ## define model and pass in states model = HiddenMarkovModel() model.add_states(s_eu, s_loh, s_aneu) ## define transition matrix (state a, state b, probability) model.add_transition(model.start, s_eu, start_dip) model.add_transition(model.start, s_loh, 1.0 - start_dip - 0.1) model.add_transition(model.start, s_aneu, 0.1) model.add_transition(s_eu, s_eu, stay_state) model.add_transition(s_eu, s_loh, 1.0 - 4 * stay_state / 5 - 0.001) model.add_transition(s_eu, s_aneu, 1.0 - stay_state / 5 - 0.001) model.add_transition(s_eu, model.end, 0.002) model.add_transition(s_loh, s_loh, stay_state) model.add_transition(s_loh, s_eu, 1.0 - 4 * stay_state / 5 - 0.001) model.add_transition(s_loh, s_aneu, 1.0 - stay_state / 5 - 0.001) model.add_transition(s_loh, model.end, 0.002) model.add_transition(s_aneu, s_aneu, stay_state) model.add_transition(s_aneu, s_eu, 1.0 - stay_state / 2 - 0.001) model.add_transition(s_aneu, s_loh, 1.0 - stay_state / 2 - 0.001) model.add_transition(s_aneu, model.end, 0.002) ## finalize internal structure model.bake() ## only train transitions, not emissions model.freeze_distributions() return model
def build_the_same_model_in_test_sample_from_site_line_by_line(): # State olds emission distribution, but not #transition distribution, because that's stored in the graph edges. s1 = State(NormalDistribution(5, 1)) s2 = State(NormalDistribution(1, 7)) s3 = State(NormalDistribution(8, 2)) model = HiddenMarkovModel() model.add_states(s1, s2, s3) model.add_transition(model.start, s1, 1.0) model.add_transition(s1, s1, 0.7) model.add_transition(s1, s2, 0.3) model.add_transition(s2, s2, 0.8) model.add_transition(s2, s3, 0.2) model.add_transition(s3, s3, 0.9) model.add_transition(s3, model.end, 0.1) model.bake() model.plot()
def test_distributions_normal_fit_ooc(): d = NormalDistribution(5, 2) d.summarize([0, 2], weights=[0, 5]) d.summarize([3, 2], weights=[2, 3]) d.summarize([100], weights=[200]) assert_array_equal(d.summaries, [2.100000e+02, 2.002200e+04, 2.000050e+06]) d.from_summaries() assert_array_equal(d.summaries, [0, 0, 0]) assert_array_almost_equal(d.parameters, [95.3429, 20.8276], 4)
def oriHMMParams(self, numdists=3): """ Set initial parameters for the Hidden Markov Model (HMM). """ # GMM emissions # 3 Hidden States: # 0--downstream, 1--no bias, 2--upstream if numdists == 1: dists = [ NormalDistribution(-2.5, 7.5), NormalDistribution(0, 7.5), NormalDistribution(2.5, 7.5) ] else: var = 7.5 / (numdists - 1) means = [[], [], []] for i in range(numdists): means[0].append(i * 7.5 / (numdists - 1) + 2.5) means[1].append(i * 7.5 * (-1)**i / (numdists - 1)) means[2].append(-i * 7.5 / (numdists - 1) - 2.5) dists = [] for i, m in enumerate(means): tmp = [] for j in m: tmp.append(NormalDistribution(j, var)) mixture = GeneralMixtureModel(tmp) dists.append(mixture) # transition matrix A = [[0.34, 0.33, 0.33], [0.33, 0.34, 0.33], [0.33, 0.33, 0.34]] starts = np.ones(3) / 3 hmm = HiddenMarkovModel.from_matrix(A, dists, starts, state_names=['0', '1', '2'], name='mixture{0}'.format(numdists)) return hmm
def test_distributions_independent_random_sample(): d = IndependentComponentsDistribution([NormalDistribution(5, 2), UniformDistribution(0, 10), ExponentialDistribution(7), LogNormalDistribution(0, 0.4)]) x = numpy.array([[5.882455, 2.219932, 0.03586 , 1.193024], [4.33826 , 8.707323, 0.292267, 0.876036], [9.861542, 2.067192, 0.033083, 2.644041]]) assert_array_almost_equal(d.sample(3, random_state=5), x) assert_raises(AssertionError, assert_array_almost_equal, d.sample(5), x)
def test_distributions_normal_freeze_fit(): d = NormalDistribution(5, 2) d.freeze() d.fit([0, 1, 1, 2, 3, 2, 1, 2, 2]) assert_array_almost_equal(d.parameters, [5, 2]) assert_array_equal(d.summaries, [0, 0, 0])
def test_distributions_normal_fit_ooc(): d = NormalDistribution(5, 2) d.summarize([0, 2], weights=[0, 5]) d.summarize([3, 2], weights=[2, 3]) d.summarize([100], weights=[200]) assert_array_equal(d.summaries, [2.100000e+02, 2.002200e+04, 2.000050e+06]) d.from_summaries() assert_array_equal(d.summaries, [0, 0, 0]) assert_array_almost_equal(d.parameters, [95.3429, 20.8276], 4)
def test_distributions_normal_exclusive_nan_fit(): d = NormalDistribution(5, 2) e = NormalDistribution(5, 2) d.fit([nan, nan, nan, nan, nan]) assert_array_equal(d.parameters, [5, 2]) assert_almost_equal(d.log_probability(4), e.log_probability(4.)) assert_array_equal(d.summaries, [0, 0, 0])
def ghmm_model(states_labels: tuple, transitions: tuple, init_prob: tuple, end_prob: tuple, means: list, vars: list) -> HiddenMarkovModel: """ :param states_labels: :param transitions: :param init_prob: :param end_prob: :param means: :param vars: :return: """ hmm_model = HiddenMarkovModel() mix_num = len(vars[0]) states = [] for state_i, state in enumerate(states_labels): mixture = [] for mix_i in range(mix_num): init_mean = means[state_i][mix_i] init_var = vars[state_i][mix_i] mixture.append(NormalDistribution(init_mean, init_var)) states.append(State(GeneralMixtureModel(mixture), name=str(state_i))) hmm_model.add_states(*tuple(states)) for row in range(len(states_labels)): for col in range(len(states_labels)): prob = transitions[row][col] if prob != 0.: hmm_model.add_transition(states[row], states[col], prob) for state_i, prob in enumerate(init_prob): if prob != 0.: hmm_model.add_transition(hmm_model.start, states[state_i], prob) for state_i, prob in enumerate(end_prob): if prob != 0.: hmm_model.add_transition(states[state_i], hmm_model.end, prob) hmm_model.bake() return hmm_model
def visit_helper(self, k): """ Returns a tuple x,y that coresponds to the coords which we will manipulate""" mu_x, mu_y, sigma = int(round(k.pt[0])), int(round(k.pt[1])), k.size # Remember, it may be wise to expand simga - greater varience = less honed attack sigma += self.params.SIGMA_CONSTANT d_x = NormalDistribution(mu_x, sigma) d_y = NormalDistribution(mu_y, sigma) x = d_x.sample() y = d_y.sample() if (self.params.small_image): x /= self.params.inflation_constant y /= self.params.inflation_constant if (x >= self.params.X_SHAPE): x = self.params.X_SHAPE - 1 elif (x < 0): x = 0 if (y >= self.params.Y_SHAPE): y = self.params.Y_SHAPE - 1 elif (y < 0): y = 0 return int(x), int(y)
def sample_from_kp(k): mu_x, mu_y, sigma = int(round(k.pt[0])), int(round(k.pt[1])), k.size # Remember, it may be wise to expand simga # greater varience = less honed attack sigma += params.SIGMA_CONSTANT d_x = NormalDistribution(mu_x, sigma) d_y = NormalDistribution(mu_y, sigma) x = d_x.sample() y = d_y.sample() if (params.small_image): x /= params.inflation_constant y /= params.inflation_constant x = int(x) y = int(y) if (x >= params.X_SHAPE): x = params.X_SHAPE - 1 elif (x < 0): x = 0 if (y >= params.Y_SHAPE): y = params.Y_SHAPE - 1 elif (y < 0): y = 0 return int(x), int(y)
def init_gaussian_hmm(sequences, n_states, model_id, seed=None): """ insantiate a model with random parameters randomly generates start and transition matrices generates nomal distrobutions for each state from partition on sequences """ """ # make random transition probability matrix # scale each row to sum to 1 trans = np.random.ranf((n_states, n_states)) for i in range(n_states): trans[i, :] = trans[i, :] / trans[i, :].sum() # make distrobutions from random subsets of timepoints x = int(math.ceil(sequences.shape[1] / float(n_states))) # x = math.min(3, x) dists = [] for i in range(n_states): temp_assignment = np.random.choice(sequences.shape[1], x) dists.append(NormalDistribution.from_samples (sequences[:, temp_assignment])) # random start probabilities # scale to sum to 1 starts = np.random.ranf(n_states) starts = starts / sum(starts) model = HiddenMarkovModel.from_matrix(trans, dists, starts, name=model_id) """ # seed random numer generator if seed is not None: np.random.seed(seed) model = HiddenMarkovModel(model_id) # make states with distrobutions from random subsets of timepoints x = int(math.ceil(sequences.shape[1] / float(n_states))) states = [] for i in range(n_states): temp_assignment = np.random.choice(sequences.shape[1], x) dist = \ NormalDistribution.from_samples(sequences[:, temp_assignment]) states.append(State(dist, name=str(i))) model.add_states(states) # add random start probabilities start_probs = np.random.ranf(n_states) start_probs = start_probs / start_probs.sum() for i, state in enumerate(states): model.add_transition(model.start, state, start_probs[i]) # add random transition probabilites out of each state for state1 in states: transitions = np.random.ranf(n_states) transitions = transitions / transitions.sum() for i, state2 in enumerate(states): model.add_transition(state1, state2, transitions[i]) model.bake() print 'Initialized HMM: ', model.name return model
def test_distributions_normal_underflow_log_probability(): d = NormalDistribution(5, 1e-10) assert_almost_equal(d.log_probability(1e100), -4.9999999999999987e+219)
def test_distributions_normal_underflow_probability(): d = NormalDistribution(5, 1e-10) assert_almost_equal(d.probability(1e100), 0.0)
def test_distributions_normal_nan_probability(): d = NormalDistribution(5, 2) assert_equal(d.probability(nan), 1) assert_array_almost_equal(d.probability([nan, 5]), [1, 0.199471])
def test_distributions_normal_nan_from_samples(): d = NormalDistribution.from_samples([5, nan, 2, nan, 4, 6, nan, 8, 3, nan, 6, nan, 8, 3]) assert_array_almost_equal(d.parameters, [5.0, 2.05480466]) assert_array_equal(d.summaries, [0, 0, 0])
def init_lr_hmm(sequences, steps, states_per_step, force_end=False, model_id='Left-Righ HMM', seed=None): """ insantiate a left-right model with random parameters randomly generates start and transition matrices generates nomal distrobutions for each state from partition on sequences force_end if we require sequence to end in end state """ # seed random number generator if seed is not None: np.random.seed(seed) model = HiddenMarkovModel(model_id) n_states = steps * states_per_step # make distrobutions from chronological subsets of timepoints step_size = int(math.ceil(sequences.shape[1] / float(n_states+1))) # generate states states = np.empty((steps, states_per_step), dtype=object) for i in range(steps): for j in range(states_per_step): temp_assignment = np.arange(step_size * i, step_size * (i+1)) dist = \ NormalDistribution.from_samples(sequences[:, temp_assignment]) state_name = str(i) + '-' + str(j) states[i, j] = State(dist, name=str(state_name)) # add states to model model.add_states(states.flatten().tolist()) # make random transition from start -> step0 trans = np.random.ranf(states_per_step) trans = trans / trans.sum() for j in range(states_per_step): model.add_transition(model.start, states[0, j], trans[j]) # make random transition from step(i) -> step(i+1) for i in range(steps-1): for j in range(states_per_step): trans = np.random.ranf(states_per_step + 1) trans = trans / trans.sum() # self transition model.add_transition(states[i, j], states[i, j], trans[0]) # out transition for x in range(states_per_step): model.add_transition(states[i, j], states[i + 1, x], trans[x + 1]) # make random transition from stepn -> end if force_end: for j in range(states_per_step): trans = np.random.ranf(2) trans = trans / trans.sum() # self transition model.add_transition(states[(steps - 1), j], states[(steps - 1), j], trans[0]) # end transition model.add_transition(states[(steps - 1), j], model.end, trans[1]) model.bake() print 'Initialized Left-Right HMM:', model.name, '[', \ steps, states_per_step, ']' return model
def test_independent(): d = IndependentComponentsDistribution( [NormalDistribution(5, 2), ExponentialDistribution(2)]) assert_equal(round(d.log_probability((4, 1)), 4), -3.0439) assert_equal(round(d.log_probability((100, 0.001)), 4), -1129.0459) d = IndependentComponentsDistribution( [NormalDistribution(5, 2), ExponentialDistribution(2)], weights=[18., 1.]) assert_equal(round(d.log_probability((4, 1)), 4), -32.5744) assert_equal(round(d.log_probability((100, 0.001)), 4), -20334.5764) d.fit([(5, 1), (5.2, 1.7), (4.7, 1.9), (4.9, 2.4), (4.5, 1.2)]) assert_equal(round(d.parameters[0][0].parameters[0], 4), 4.86) assert_equal(round(d.parameters[0][0].parameters[1], 4), 0.2417) assert_equal(round(d.parameters[0][1].parameters[0], 4), 0.6098) d = IndependentComponentsDistribution( [NormalDistribution(5, 2), UniformDistribution(0, 10)]) d.fit([(0, 0), (5, 0), (3, 0), (5, -5), (7, 0), (3, 0), (4, 0), (5, 0), (2, 20)], inertia=0.5) assert_equal(round(d.parameters[0][0].parameters[0], 4), 4.3889) assert_equal(round(d.parameters[0][0].parameters[1], 4), 1.9655) assert_equal(d.parameters[0][1].parameters[0], -2.5) assert_equal(d.parameters[0][1].parameters[1], 15) d.fit([(0, 0), (5, 0), (3, 0), (5, -5), (7, 0), (3, 0), (4, 0), (5, 0), (2, 20)], inertia=0.75) assert_not_equal(round(d.parameters[0][0].parameters[0], 4), 4.3889) assert_not_equal(round(d.parameters[0][0].parameters[1], 4), 1.9655) assert_not_equal(d.parameters[0][1].parameters[0], -2.5) assert_not_equal(d.parameters[0][1].parameters[1], 15) d = IndependentComponentsDistribution( [NormalDistribution(5, 2), UniformDistribution(0, 10)]) d.summarize([(0, 0), (5, 0), (3, 0)]) d.summarize([(5, -5), (7, 0)]) d.summarize([(3, 0), (4, 0), (5, 0), (2, 20)]) d.from_summaries(inertia=0.5) assert_equal(round(d.parameters[0][0].parameters[0], 4), 4.3889) assert_equal(round(d.parameters[0][0].parameters[1], 4), 1.9655) assert_equal(d.parameters[0][1].parameters[0], -2.5) assert_equal(d.parameters[0][1].parameters[1], 15) d.freeze() d.fit([(1, 7), (7, 2), (2, 4), (2, 4), (1, 4)]) assert_equal(round(d.parameters[0][0].parameters[0], 4), 4.3889) assert_equal(round(d.parameters[0][0].parameters[1], 4), 1.9655) assert_equal(d.parameters[0][1].parameters[0], -2.5) assert_equal(d.parameters[0][1].parameters[1], 15) e = Distribution.from_json(d.to_json()) assert_equal(e.name, "IndependentComponentsDistribution") assert_equal(round(e.parameters[0][0].parameters[0], 4), 4.3889) assert_equal(round(e.parameters[0][0].parameters[1], 4), 1.9655) assert_equal(e.parameters[0][1].parameters[0], -2.5) assert_equal(e.parameters[0][1].parameters[1], 15) f = pickle.loads(pickle.dumps(e)) assert_equal(e.name, "IndependentComponentsDistribution") assert_equal(round(f.parameters[0][0].parameters[0], 4), 4.3889) assert_equal(round(f.parameters[0][0].parameters[1], 4), 1.9655) assert_equal(f.parameters[0][1].parameters[0], -2.5) assert_equal(f.parameters[0][1].parameters[1], 15) X = numpy.array([[0.5, 0.2, 0.7], [0.3, 0.1, 0.9], [0.4, 0.3, 0.8], [0.3, 0.3, 0.9], [0.3, 0.2, 0.6], [0.5, 0.2, 0.8]]) d = IndependentComponentsDistribution.from_samples( X, distributions=NormalDistribution) assert_almost_equal(d.parameters[0][0].parameters[0], 0.38333, 4) assert_almost_equal(d.parameters[0][0].parameters[1], 0.08975, 4) assert_almost_equal(d.parameters[0][1].parameters[0], 0.21666, 4) assert_almost_equal(d.parameters[0][1].parameters[1], 0.06872, 4) assert_almost_equal(d.parameters[0][2].parameters[0], 0.78333, 4) assert_almost_equal(d.parameters[0][2].parameters[1], 0.10672, 4) d = IndependentComponentsDistribution.from_samples( X, distributions=ExponentialDistribution) assert_almost_equal(d.parameters[0][0].parameters[0], 2.6087, 4) assert_almost_equal(d.parameters[0][1].parameters[0], 4.6154, 4) assert_almost_equal(d.parameters[0][2].parameters[0], 1.2766, 4) d = IndependentComponentsDistribution.from_samples(X, distributions=[ NormalDistribution, NormalDistribution, NormalDistribution ]) assert_almost_equal(d.parameters[0][0].parameters[0], 0.38333, 4) assert_almost_equal(d.parameters[0][0].parameters[1], 0.08975, 4) assert_almost_equal(d.parameters[0][1].parameters[0], 0.21666, 4) assert_almost_equal(d.parameters[0][1].parameters[1], 0.06872, 4) assert_almost_equal(d.parameters[0][2].parameters[0], 0.78333, 4) assert_almost_equal(d.parameters[0][2].parameters[1], 0.10672, 4) d = IndependentComponentsDistribution.from_samples( X, distributions=[ NormalDistribution, LogNormalDistribution, ExponentialDistribution ]) assert_almost_equal(d.parameters[0][0].parameters[0], 0.38333, 4) assert_almost_equal(d.parameters[0][0].parameters[1], 0.08975, 4) assert_almost_equal(d.parameters[0][1].parameters[0], -1.5898, 4) assert_almost_equal(d.parameters[0][1].parameters[1], 0.36673, 4) assert_almost_equal(d.parameters[0][2].parameters[0], 1.27660, 4)
def test_distributions_normal_log_probability(): d = NormalDistribution(5, 2) e = NormalDistribution(5., 2.) assert_almost_equal(d.log_probability(5), -1.61208571) assert_equal(d.log_probability(5), e.log_probability(5)) assert_equal(d.log_probability(5), d.log_probability(5.)) assert_almost_equal(d.log_probability(0), -4.737085713764219) assert_equal(d.log_probability(0), e.log_probability(0.))
def test_distributions_normal_nan_log_probability(): d = NormalDistribution(5, 2) assert_equal(d.log_probability(nan), 0) assert_array_almost_equal(d.log_probability([nan, 5]), [0, -1.61208571])
def test_distributions_normal_underflow_log_probability(): d = NormalDistribution(5, 1e-10) assert_almost_equal(d.log_probability(1e100), -4.9999999999999987e+219)
def test_distributions_normal_nan_log_probability(): d = NormalDistribution(5, 2) assert_equal(d.log_probability(nan), 0) assert_array_almost_equal(d.log_probability([nan, 5]), [0, -1.61208571])
def test_distributions_normal_nan_from_samples(): d = NormalDistribution.from_samples([5, nan, 2, nan, 4, 6, nan, 8, 3, nan, 6, nan, 8, 3]) assert_array_almost_equal(d.parameters, [5.0, 2.05480466]) assert_array_equal(d.summaries, [0, 0, 0])
def test_distributions_normal_probability(): d = NormalDistribution(5, 2) e = NormalDistribution(5., 2.) assert_almost_equal(d.probability(5), 0.19947114) assert_equal(d.probability(5), e.probability(5)) assert_equal(d.probability(5), d.probability(5.)) assert_almost_equal(d.probability(0), 0.0087641502) assert_equal(d.probability(0), e.probability(0.))
def test_distributions_normal_blank(): d = NormalDistribution.blank() assert_equal(d.name, "NormalDistribution") assert_array_equal(d.parameters, [0, 1]) assert_array_equal(d.summaries, [0, 0, 0])
def test_distributions_normal_nan_probability(): d = NormalDistribution(5, 2) assert_equal(d.probability(nan), 1) assert_array_almost_equal(d.probability([nan, 5]), [1, 0.199471])
def test_normal(): d = NormalDistribution(5, 2) e = NormalDistribution(5., 2.) assert_true(isinstance(d.log_probability(5), float)) assert_true(isinstance(d.log_probability([5]), float)) assert_true(isinstance(d.log_probability([5, 6]), numpy.ndarray)) assert_almost_equal(d.log_probability(5), -1.61208571, 8) assert_equal(d.log_probability(5), e.log_probability(5)) assert_equal(d.log_probability(5), d.log_probability(5.)) assert_almost_equal(d.log_probability(0), -4.737085713764219) assert_equal(d.log_probability(0), e.log_probability(0.)) d.fit([5, 4, 5, 4, 6, 5, 6, 5, 4, 6, 5, 4]) assert_almost_equal(d.parameters[0], 4.9167, 4) assert_almost_equal(d.parameters[1], 0.7592, 4) assert_not_equal(d.log_probability(4), e.log_probability(4)) assert_almost_equal(d.log_probability(4), -1.3723678499651766) assert_almost_equal(d.log_probability(18), -149.13140399454429) assert_almost_equal(d.log_probability(1e8), -8674697942168743.0, -4) d = NormalDistribution(5, 1e-10) assert_almost_equal(d.log_probability(1e100), -4.9999999999999994e+219) d.fit([0, 2, 3, 2, 100], weights=[0, 5, 2, 3, 200]) assert_equal(round(d.parameters[0], 4), 95.3429) assert_equal(round(d.parameters[1], 4), 20.8276) assert_equal(round(d.log_probability(50), 8), -6.32501194) d = NormalDistribution(5, 2) d.fit([0, 5, 3, 5, 7, 3, 4, 5, 2], inertia=0.5) assert_equal(round(d.parameters[0], 4), 4.3889) assert_equal(round(d.parameters[1], 4), 1.9655) d.summarize([0, 2], weights=[0, 5]) d.summarize([3, 2], weights=[2, 3]) d.summarize([100], weights=[200]) d.from_summaries() assert_equal(round(d.parameters[0], 4), 95.3429) assert_equal(round(d.parameters[1], 4), 20.8276) d.freeze() d.fit([0, 1, 1, 2, 3, 2, 1, 2, 2]) assert_equal(round(d.parameters[0], 4), 95.3429) assert_equal(round(d.parameters[1], 4), 20.8276) d.thaw() d.fit([5, 4, 5, 4, 6, 5, 6, 5, 4, 6, 5, 4]) assert_equal(round(d.parameters[0], 4), 4.9167) assert_equal(round(d.parameters[1], 4), 0.7592) e = Distribution.from_json(d.to_json()) assert_equal(e.name, "NormalDistribution") assert_equal(round(e.parameters[0], 4), 4.9167) assert_equal(round(e.parameters[1], 4), 0.7592) f = pickle.loads(pickle.dumps(e)) assert_equal(f.name, "NormalDistribution") assert_equal(round(f.parameters[0], 4), 4.9167) assert_equal(round(f.parameters[1], 4), 0.7592)
def test_distributions_normal_underflow_probability(): d = NormalDistribution(5, 1e-10) assert_almost_equal(d.probability(1e100), 0.0)
def test_distributions_normal_nan_fit(): d = NormalDistribution(5, 2) e = NormalDistribution(5, 2) d.fit([5, 4, nan, 5, 4, nan, 6, 5, 6, nan, nan, 5, 4, 6, nan, 5, 4, nan]) assert_array_almost_equal(d.parameters, [4.9167, 0.7592], 4) assert_not_equal(d.log_probability(4), e.log_probability(4)) assert_almost_equal(d.log_probability(4), -1.3723678499651766) assert_almost_equal(d.log_probability(18), -149.13140399454429) assert_almost_equal(d.log_probability(1e8), -8674697942168743.0, -4) assert_array_equal(d.summaries, [0, 0, 0])
def test_normal(): d = NormalDistribution(5, 2) e = NormalDistribution(5., 2.) assert_true(isinstance(d.log_probability(5), float)) assert_true(isinstance(d.log_probability([5]), float)) assert_true(isinstance(d.log_probability([5, 6]), numpy.ndarray)) assert_almost_equal(d.log_probability(5), -1.61208571, 8) assert_equal(d.log_probability(5), e.log_probability(5)) assert_equal(d.log_probability(5), d.log_probability(5.)) assert_almost_equal(d.log_probability(0), -4.737085713764219) assert_equal(d.log_probability(0), e.log_probability(0.)) d.fit([5, 4, 5, 4, 6, 5, 6, 5, 4, 6, 5, 4]) assert_almost_equal(d.parameters[0], 4.9167, 4) assert_almost_equal(d.parameters[1], 0.7592, 4) assert_not_equal(d.log_probability(4), e.log_probability(4)) assert_almost_equal(d.log_probability(4), -1.3723678499651766) assert_almost_equal(d.log_probability(18), -149.13140399454429) assert_almost_equal(d.log_probability(1e8), -8674697942168743.0, -4) d = NormalDistribution(5, 1e-10) assert_almost_equal(d.log_probability(1e100), -4.9999999999999994e+219) d.fit([0, 2, 3, 2, 100], weights=[0, 5, 2, 3, 200]) assert_equal(round(d.parameters[0], 4), 95.3429) assert_equal(round(d.parameters[1], 4), 20.8276) assert_equal(round(d.log_probability(50), 8), -6.32501194) d = NormalDistribution(5, 2) d.fit([0, 5, 3, 5, 7, 3, 4, 5, 2], inertia=0.5) assert_equal(round(d.parameters[0], 4), 4.3889) assert_equal(round(d.parameters[1], 4), 1.9655) d.summarize([0, 2], weights=[0, 5]) d.summarize([3, 2], weights=[2, 3]) d.summarize([100], weights=[200]) d.from_summaries() assert_equal(round(d.parameters[0], 4), 95.3429) assert_equal(round(d.parameters[1], 4), 20.8276) d.freeze() d.fit([0, 1, 1, 2, 3, 2, 1, 2, 2]) assert_equal(round(d.parameters[0], 4), 95.3429) assert_equal(round(d.parameters[1], 4), 20.8276) d.thaw() d.fit([5, 4, 5, 4, 6, 5, 6, 5, 4, 6, 5, 4]) assert_equal(round(d.parameters[0], 4), 4.9167) assert_equal(round(d.parameters[1], 4), 0.7592) e = Distribution.from_json(d.to_json()) assert_equal(e.name, "NormalDistribution") assert_equal(round(e.parameters[0], 4), 4.9167) assert_equal(round(e.parameters[1], 4), 0.7592) f = pickle.loads(pickle.dumps(e)) assert_equal(f.name, "NormalDistribution") assert_equal(round(f.parameters[0], 4), 4.9167) assert_equal(round(f.parameters[1], 4), 0.7592)