예제 #1
0
def recover(data):
    mu = np.mean(data)
    sigma = np.var(data)
    init_params = [(mu + 0.1, sigma), (mu - 0.1, sigma)]
    weight, distributions, ll = mixem.em(
        data, [NormalDistribution(mu, sigma) for mu_sigma in init_params])
    print(weight, distributions, ll)
예제 #2
0
def train_lambda(data):
    weights, distributions, ll = mixem.em(np.array(data), [
        mixem.distribution.NormalDistribution(0, 1),
        mixem.distribution.NormalDistribution(3, 4),
        mixem.distribution.NormalDistribution(7, 8)
    ])
    return weights
예제 #3
0
def recover(data):

    init_params = np.random.choice(data, size=2)

    weight, distributions, ll = mixem.em(data, [ExponentialDistribution(l) for l in init_params])

    print(weight, distributions, ll)
예제 #4
0
def fitDataBMM(data, depth, lowerLimit, upperLimit, init_proportions, components=2):
    """ Fit data and return Binomial Mixture Model"""

    if components > 9:
        logging.error('Too many components specified. Max components 9.')
        exit()

    distros = []

    # Init distros
    for i in range(components):
        distros.append(BinomialDistribution(init_proportions[i], depth))

    # Format data as pairs of success and trials
    data_points = []

    for x,y in zip(data[:,:-2].flatten(), np.repeat(data[:,-1],4)):

        # do filtering on each proportion
        if x > lowerLimit and x < upperLimit:
            charCount = x*0.01*y    # convert proportion to count
            data_points.append([charCount,y])

        else:
            continue

    data = np.array(data_points)
    weights, distros, log_like = mixem.em(data, distros, initial_weights=None, progress_callback=None, max_iterations=500, tol_iters=200, tol=0.1)
    return BinomialMixture(weights, distros, log_like)
예제 #5
0
def recover(data):

    weight, distributions, ll = mixem.em(data, [
        GeometricDistribution(0.8),
        GeometricDistribution(0.1),
    ])

    print(weight, distributions, ll)
예제 #6
0
def recover(data):

    init_params = np.random.choice(data, size=2)

    weight, distributions, ll = mixem.em(
        data, [ExponentialDistribution(l) for l in init_params])

    print(weight, distributions, ll)
예제 #7
0
def train_lambda(data):
    # plt.scatter(np.array(range(data.shape[0])), data)
    # plt.show();
    weights, distributions, ll = mixem.em(np.sort(np.array(data)), [
        mixem.distribution.NormalDistribution(0, 1),
        mixem.distribution.NormalDistribution(0.3, 5),
        mixem.distribution.NormalDistribution(1, 9)
    ])
    return weights
예제 #8
0
def recover(data):

    mu = np.mean(data)
    sigma = np.var(data)

    init_params = [(np.array([mu + 0.1]), np.diag([sigma])), (np.array([mu - 0.1]), np.diag([sigma]))]

    weight, distributions, ll = mixem.em(data, [MultivariateNormalDistribution(mu, sigma) for mu, sigma in init_params])

    print(weight, distributions, ll)
예제 #9
0
def train_lambda(data):
    '''
    train_lambda takes training data and returns the lambdas which will used 
    in the EM algorithm.
    We use a implementation of the Expectation-Maximization (EM) algorithm
    called "mixem" to tune the parameters lambda in the interpolation.
    https://pypi.python.org/pypi/mixem
    '''
    weights, distributions, ll = mixem.em(np.sort(np.array(data)), [mixem.distribution.NormalDistribution(0,1),mixem.distribution.NormalDistribution(0.3,5),mixem.distribution.NormalDistribution(1,9)])
    return weights
예제 #10
0
def recover(data):
    
    mu = np.mean(data)
    sigma = np.var(data)
    init_params = [
        (np.array([mu + 0.1]), np.diag([sigma])),
        (np.array([mu - 0.1]), np.diag([sigma]))
    ]
 #   start = time.time()
    weight, distributions, ll = mixem.em(data, [MultivariateNormalDistribution(mu, sigma) for mu, sigma in init_params])

    print(weight, distributions, ll)
예제 #11
0
def recover(data):

    mu = np.mean(data)
    sigma = np.var(data)

    init_params = [
        (mu + 0.1, sigma),
        (mu - 0.1, sigma)
    ]

    weight, distributions, ll = mixem.em(data, [NormalDistribution(mu, sigma) for mu_sigma in init_params])

    print(weight, distributions, ll)
def main():

    data = pd.read_csv(os.path.join(os.path.dirname(__file__), "faithful.csv"))

    data = np.array(data)

    init_params = [
        (np.array((2, 50)), np.identity(2)),
        (np.array((4, 80)), np.identity(2)),
    ]

    weight, distributions, ll = mixem.em(data, [MultivariateNormalDistribution(mu, sigma) for mu, sigma in init_params], initial_weights=[0.3, 0.7])

    print(weight, distributions, ll)
예제 #13
0
def main():
    startTime = time.time()
    folderName = "assignment-2-data-files/"
    fileName = "P1M1L1.txt"

    data, lengthData = readTrainTxt(folderName + fileName)
    data = takeBatches(data, 1000)

    init_params = [(0, 2), (4, 2), (8, 2), (12, 2)]

    weight, distributions, ll = mixem.em(
        data, [NormalDistribution(mu, sigma) for (mu, sigma) in init_params])

    print(weight, distributions, ll)
    print(time.time() - startTime)
def recover(data):

    mu = [np.mean(data[0, :]), np.mean(data[1, :])]
    sigma = [np.var(data[0, :]), np.var(data[1, :])]
    #print mu,sigma

    init_params = [
        (np.array((mu[0] - 1, mu[0] + 1)), np.identity(2)),
        (np.array((mu[1] - 1, mu[1] + 1)), np.identity(2)),
    ]

    start = time.time()

    weight, distributions, ll, iteration = mixem.em(data, [
        MultivariateNormalDistribution(mu, sigma) for mu, sigma in init_params
    ])

    #print(weight, distributions, ll)
    #print 'iterate time: ' + str(time.time() - start) + ' seconds'

    return weight, distributions, iteration, (time.time() - start)
예제 #15
0
def get_normals_mixture(x, data):

    dist_list_norm = [
        mixem.distribution.NormalDistribution(mu=0.5, sigma=1),
        mixem.distribution.NormalDistribution(mu=2, sigma=1)
    ]
    norm_mixture = Data_Rep(data, dist_list_norm)
    #removed ref to train set
    norm_weights, norm_dists, norm_log_l = mixem.em(data,
                                                    dist_list_norm,
                                                    max_iterations=200,
                                                    progress_callback=None)

    post_scipy_dist_1_norm, post_scipy_dist_2_norm = norm_mixture.scipy_dists
    norm_pdf1 = [post_scipy_dist_1_norm.pdf(i) for i in x]
    norm_pdf2 = [post_scipy_dist_2_norm.pdf(i) for i in x]
    norm_joint_pdf = [
        norm_weights[0] * norm_pdf1[i] + norm_weights[1] * norm_pdf2[i]
        for i in range(len(norm_pdf1))
    ]

    return norm_mixture, norm_joint_pdf
예제 #16
0
def organize_data(x, data1, data2, dist1, dist2):

    data = np.concatenate((data1, data2))
    np.random.shuffle(data)
    # train_set = data[1000:]
    # val_set = data[:1000]
    # sorted_val = np.array(sorted(val_set))

    dist_list = [dist1, dist2]
    mixture = Data_Rep(data, dist_list)
    post_scipy_dist_1, post_scipy_dist_2 = mixture.scipy_dists
    # took out train/val split here
    weights, distributions, log_l = mixem.em(data,
                                             dist_list,
                                             max_iterations=200,
                                             progress_callback=None)

    pdf1 = [post_scipy_dist_1.pdf(i) for i in x]
    pdf2 = [post_scipy_dist_2.pdf(i) for i in x]
    joint_pdf = [
        weights[0] * pdf1[i] + weights[1] * pdf2[i] for i in range(len(pdf1))
    ]

    return mixture, joint_pdf, data
예제 #17
0
	def _mixture_cdf(self, data, dist_list):
		self.weights, self.distributions, self.log_l = mixem.em(data, dist_list, max_iterations=200, progress_callback=None)
		self.scipy_dists = self.get_scipy_dists(self.distributions)
		return lambda query: sum([w * dist.cdf(query) for w, dist in zip(self.weights, self.scipy_dists)])
예제 #18
0
# prepare some data
data = pd.read_csv("faithful.csv")
#print(data.head())

# output to static HTML file
output_file("Fitting_Data_Contour.html", title="Old Faithful Data")

# create a new plot with a title and axis labels
'''
fig = figure(title="Old Faithful Data", x_axis_label="Eruption duration (minutes)", y_axis_label="Waiting time (minutes)")
fig.scatter(x=data.eruptions, y=data.waiting)
show(fig);
'''
weights, distributions, ll, iteration = mixem.em(np.array(data), [
    mixem.distribution.MultivariateNormalDistribution(np.array(
        (2, 50)), np.identity(2)),
    mixem.distribution.MultivariateNormalDistribution(np.array(
        (4, 80)), np.identity(2)),
])

N = 100
x = np.linspace(np.min(data.eruptions), np.max(data.eruptions), num=N)
y = np.linspace(np.min(data.waiting), np.max(data.waiting), num=N)
xx, yy = np.meshgrid(x, y, indexing="ij")
#print x,y
# Convert meshgrid into a ((N*N), 2) array of coordinates
xxyy = np.array([xx.flatten(), yy.flatten()]).T

# Compute model probabilities
p = mixem.probability(xxyy, weights, distributions).reshape((N, N))
#print p
fig2 = figure(title="Fitted Old Faithful Data",