Esempio n. 1
0
    def normalize(self, probe_int, probe_seq, probe_type, outFilePath, outputExtraModelData=False):
        probe_ids = sorted(list(set(probe_seq.keys()) & set(probe_int.keys())))

        pseq = []
        pint = []
        ptype = []
        for p in probe_ids:
            pseq.append(probe_seq[p])
            pint.append(probe_int[p])
            ptype.append(probe_type[p])

        mx = self._design_matrix(array([self._encode(v) for v in pseq]))
        random.seed(1)
        randomNoise = array([random.random() / 10000000 for x in range(len(pint))])
        my = log2(array((pint)))
        my = my + randomNoise # This avoids a problem where too many identical values results in some bins having no values

        model = EM()
        samplingProbeIndices = self._sample(len(probe_ids))
        mainProbeIndices = [i for i in range(len(ptype)) if ptype[i] == "M"]
        samplingProbeIndices = list(set(samplingProbeIndices) & set(mainProbeIndices))

        nbins = 25
        b1, b2 = model.EM_vMix(my[samplingProbeIndices,],mx[samplingProbeIndices,], bins=nbins)
        m1 = dot(mx, b1)
        m2 = dot(mx, b2)

        binsize = 5000
        nGroups = int(ceil(size(my) / binsize))
        index = argsort(m1)
        y_norm = zeros(size(my), 'f')

        for i in arange(nGroups):
            tmp = index[(binsize * i):min([binsize * i + binsize, size(my)])]
            tmpSd = self._sig(my[tmp], m1[tmp])
            y_norm[tmp] = ((my[tmp] - m1[tmp]) / tmpSd).tolist()

        model.assign_bin(m1, bins=nbins)
        gam = model.vresp(my, mx)

        print "Outputting to %s" % outFilePath
        outFile = file(outFilePath, 'w')

        outLines = []
        for i, pid in enumerate(probe_ids):
            out = pid + "\t%.9f\t%.9f" % (y_norm[i], gam[i, 1])

            if outputExtraModelData:
                out += "\t%.9f\t%.9f\t%.9f" % (my[i], m1[i], m2[i])

            outLines.append(out)

            if len(outLines) % 100000 == 0:
                outFile.write("\n".join(outLines) + "\n")
                outLines = []

        if len(outLines) > 0:
            outFile.write("\n".join(outLines) + "\n")

        outFile.close()
Esempio n. 2
0
def run_mnist():
    #  FIXME: running EM on MNIST has the problem that all data collapses to one class
    # This is because the likelihood for that class is slightly higher than all other.
    # Probably has to do with the variance being lower for one, form k-means,
    # and that being more important than closeness to mean for such high dimensional data?
    # Running it with 0 iterations (i.e. on k-means) work fine, then it finds different orientations of the digits.
    data_per_class = 20

    training_data = list(mnist.read("training"))
    dim_x, dim_y = np.shape(training_data[0][1])
    ones = [d[1] for d in training_data if d[0] == 1]
    fours = [d[1] for d in training_data if d[0] == 4]
    fives = [d[1] for d in training_data if d[0] == 5]

    ones = ones[:data_per_class]
    fours = fours[:data_per_class]
    fives = fives[:data_per_class]

    data = np.array(ones + fours + fives).reshape((-1, dim_x * dim_y))
    solver = EM(data=data, num_classes=3, num_nuisances=3)
    split_data, thetas = solver.fit(max_iter=1)

    for c, class_thetas in enumerate(thetas):
        for n, theta in enumerate(class_thetas):
            print(f"Prior: {theta.prior}, Var: {theta.variance}")
            mnist.show(thetas[c][n].mean.reshape(28, 28))
Esempio n. 3
0
    def f2():
        img = cv2.imread(get_filename('0d648f99c.jpg', 'Train'),
                         cv2.IMREAD_GRAYSCALE)
        img = gaussian(img, 0.5)
        # img = cv2.resize(img, dsize=tile_size)

        em_object = EM(img)
        em_object.run_it()
Esempio n. 4
0
    def __init__(self):
        Thread.__init__(self)
        EM.__init__(self)

        self.daemon = True

        self.alive = True

        self.first = True
Esempio n. 5
0


        @return [1,3,5,7,8, ....]



        """

        interval = total/self.N

        if interval <= 1:

            interval = 1

        return [i for i in range(start,total,interval)]



    def _quantile_normalize(self,x):

        xr = sort(x,axis=0)

        xm = mean(x,axis=1)

        return xm[argsort(x,axis=0)]



    def _design_matrix(self,PMProbe):

        """DesignMatrix(PMProbe): make design matrix from PMProbe

        PMProbe: probe sequence Int8 array,

        Constructs a 80 pars X-matrix for the model

        3 * 25 'ACG' postions + 4 * 2 'ACGT' count and count square

        """

        x = zeros((PMProbe.shape[0],80), 'f')

        x[:,0] = sum(PMProbe == self.code['T'],1).astype('float32')

        j = 1

        for ibase in 'ACG':

            x[:, j:j+25] = (PMProbe == self.code[ibase])

            j += 25
Esempio n. 6
0
def model_dict(X, Z, K):
    """Run all the models and store their features into one dictionary for plotting.
    Args:
        - X : (N, d) data
        - Z : (N) labels
        - K : Number of clusters
    Returns:
        - models : Dictionary containing the means, covariance matrices (when existing) and labels of the models clusters.
    """
    # Models dictionary
    models = dict()

    # Ground truth
    models["ground truth"] = {
        "mean": np.array([X[Z == k].mean(0) for k in range(len(np.unique(Z)))]),
        "cov": None,
        "labels": Z,
    }

    # Run diagonal EM
    em_diag = EM(K)
    em_diag.fit(X)
    models["diagonal EM"] = {
        "mean": em_diag.mus,
        "cov": np.array([np.diag(em_diag.Ds[k]) for k in range(K)]),
        "labels": em_diag.labels_,
    }

    # Run general EM
    em = GaussianMixture(K)
    em.fit(X)

    # Compute reponsabilities
    gaussians = np.array(
        [
            multivariate_normal.pdf(X, em.means_[k], em.covariances_[k])
            * em.weights_[k]
            for k in range(K)
        ]
    )
    r = gaussians / gaussians.sum(0)
    models["general EM"] = {
        "mean": em.means_,
        "cov": em.covariances_,
        "labels": r.argmax(0),
    }

    # Run K-means
    km = KMeans(K)
    km.fit(X)
    models["K-means"] = {"mean": km.cluster_centers_, "cov": None, "labels": km.labels_}

    return models
Esempio n. 7
0
def run_gaussian():
    data_per_cluster = 15
    num_clusters = 2
    data = generate_gaussian_data(data_per_cluster, num_clusters)

    num_classes = num_clusters
    num_nuisances = 1

    solver = EM(data, num_classes, num_nuisances)
    split_data, thetas = solver.fit_and_plot(5)
    for class_thetas in thetas:
        for theta in class_thetas:
            print(f"Mean: {theta.mean} - Var: {theta.variance}")
Esempio n. 8
0
    def __init__(self, host, port, rcon_password):
        Thread.__init__(self)
        EM.__init__(self)

        self.daemon = True

        self.alive = True

        self.conn = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

        self.host = host
        self.port = port
        self.rcon_password = rcon_password

        self.first = True
Esempio n. 9
0
    def __init__(self, host, port, rcon_password):
        Thread.__init__(self)
        EM.__init__(self)

        self.daemon = True

        self.alive = True

        self.conn = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

        self.host = host
        self.port = port
        self.rcon_password = rcon_password

        self.first = True
Esempio n. 10
0
    def _create_model_and_run_em(self, d, k, mode, nframes):
        #+++++++++++++++++++++++++++++++++++++++++++++++++
        # Generate a model with k components, d dimensions
        #+++++++++++++++++++++++++++++++++++++++++++++++++
        w, mu, va = GM.gen_param(d, k, mode, spread=1.5)
        gm = GM.fromvalues(w, mu, va)
        # Sample nframes frames  from the model
        data = gm.sample(nframes)

        #++++++++++++++++++++++++++++++++++++++++++
        # Approximate the models with classical EM
        #++++++++++++++++++++++++++++++++++++++++++
        # Init the model
        lgm = GM(d, k, mode)
        gmm = GMM(lgm, 'kmean')

        em = EM()
        lk = em.train(data, gmm)
Esempio n. 11
0
    def _create_model_and_run_em(self, d, k, mode, nframes):
        #+++++++++++++++++++++++++++++++++++++++++++++++++
        # Generate a model with k components, d dimensions
        #+++++++++++++++++++++++++++++++++++++++++++++++++
        w, mu, va   = GM.gen_param(d, k, mode, spread = 1.5)
        gm          = GM.fromvalues(w, mu, va)
        # Sample nframes frames  from the model
        data        = gm.sample(nframes)

        #++++++++++++++++++++++++++++++++++++++++++
        # Approximate the models with classical EM
        #++++++++++++++++++++++++++++++++++++++++++
        # Init the model
        lgm = GM(d, k, mode)
        gmm = GMM(lgm, 'kmean')

        em  = EM()
        lk  = em.train(data, gmm)
Esempio n. 12
0
    def _test(self, dataset, log):
        dic = load_dataset(dataset)

        gm = GM.fromvalues(dic['w0'], dic['mu0'], dic['va0'])
        gmm = GMM(gm, 'test')
        EM().train(dic['data'], gmm, log=log)

        assert_array_almost_equal(gmm.gm.w, dic['w'], DEF_DEC)
        assert_array_almost_equal(gmm.gm.mu, dic['mu'], DEF_DEC)
        assert_array_almost_equal(gmm.gm.va, dic['va'], DEF_DEC)
Esempio n. 13
0
def process_image(image_id, thread_number, locale):
    try:
        print("%d start" % thread_number)
        img = cv2.imread(get_filename(image_id, "final"), cv2.IMREAD_GRAYSCALE)
        img = gaussian(img, 0.5)  # TODO maybe change to 1

        final_img = img.copy()
        img = cv2.resize(img, dsize=tile_size)

        print("%d go EM" % thread_number)
        em = EM(img, thread_number)
        em.run_it()

        print("%d done EM" % thread_number)

        st = timer()
        prediction_matrix = create_prediction_image(final_img, em.miu,
                                                    em.sigma, em.pgk,
                                                    em.clusters)
        fs = timer()

        print('Prediciton matrix %d' % (fs - st))

        out_file = open("../EM_Result/%s" % image_id.split(".")[0], "w")

        for line in prediction_matrix:
            for el in line:
                out_file.write("%d " % el)
            out_file.write("\n")
        out_file.close()

        thread_file = open("../EM_Result/_Thread_%d" % thread_number, "a")
        thread_file.write(image_id)
        thread_file.write("\n")
        thread_file.close()
    except Exception as e:
        print(e)
        th_err = open("../EM_Result/_Thread_Error_%d" % thread_number, "a")
        th_err.write(image_id)
        th_err.write("\n")
        th_err.close()
Esempio n. 14
0
 def train(self, word_root_procs):
     '''
     '''
     word_cand_feats = []
     for word, root_procs in word_root_procs:
         cand_feats = [
             self.__features(root, proc) for root, proc in root_procs
         ]
         word_cand_feats.append((word, cand_feats))
     self.__prob_roots, self.__prob_pats, self.__prob_trans_on_feat = EM(
     ).estimate(word_cand_feats)
     self.__trained = True
Esempio n. 15
0
def set_up_experiment(args):
    """
    Create exp. directory if needed, store used arguments, create patches and em objects.
    """
    if not os.path.exists(args.output):
        os.makedirs(args.output)

    with open(os.path.join(args.output, "args.txt"), "w") as f:
        args_dict = vars(args)
        for k, v in args_dict.items():
            if isinstance(v, bool) and not v:
                continue
            f.write("-{}={} \\\n".format(k, v))

    np.random.seed(args.random_seed)

    patches = Patches(
        input_path=args.input,
        source_path=args.source,
        patch_size=args.patch_size,
        patch_overlap=args.patch_overlap,
        pca_k=args.pca_k,
        color=(args.color == "color"),
    )

    initial_posteriors_path = os.path.join(args.output,
                                           "initial_posteriors.npy")

    lbp_params = dict()
    lbp_params["output_dir"] = args.output
    lbp_params["two_sigma2"] = args.lbp_two_sigma2
    lbp_params["iterations"] = args.lbp_iterations
    lbp_params["seed"] = args.random_seed

    em = EM(
        patches=patches,
        num_candidates=args.num_candidates,
        num_transformations=args.num_transformations,
        lbp_params=lbp_params,
        lambdas_init_type=args.init_transformations,
    )

    return patches, em
Esempio n. 16
0
"""
For tests
"""
import time
from model_gm import ModelGM, sample_gm
from em import EM
from mm import MM
from dmm import DMM
from discrete_rv import wass

if __name__ == '__main__':
    k = 2
    mm = MM(k, sigma=1)
    em = EM(k, sigma=1)
    dmm = DMM(k, sigma=None)

    model = ModelGM(w=[0.5, 0.5], x=[-.5, .5], std=1)
    sample = sample_gm(model, 10000)

    # esti_em = em.estimate(sample)
    # print(wass(esti_em.mean_rv(), model.mean_rv()))
    print(dmm.estimate(sample))
    print(dmm.estimate_online(sample))
    # print(wass(esti_dmm, model.mean_rv()))
    # print(mm.estimate(sample))

    # dmm = DMM(k=2)
    # print(dmm.estimate(sample))
Esempio n. 17
0
from scipy.stats import multivariate_normal
import math
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
import time
import os

if __name__ == "__main__":

  if not os.path.exists('plots'):
    os.makedirs('plots')

  plt.rcParams['figure.figsize'] = [15, 15]
  X,y = make_blobs(n_samples=200, centers=[[2.5,3.5],[3.5,5.5],[4,10]])

  em = EM(X,num_clusters=3)
  means,sigmas = em.cluster_means,em.cluster_cov_mtrxs
  iters = 16
  color_vals = ['red','blue','green']
  for i in np.arange(iters):
      print(f'Performing iteration {i}/{iters-1}..')

      plt.clf()
      plt.axis([-5, 30, -5, 30])
      plt.scatter(X[:,0],X[:,1],marker='o', c=y,
                      s=35, edgecolor='k')
      j = 0
      for mu,cov in zip(means,sigmas):
          x_plt, y_plt = np.mgrid[mu[0] - 3*cov[0,0]:mu[0] +3*cov[0,0]:.01, mu[1] - 3*cov[1,1]:mu[1] + 3*cov[1,1]:.01]
          pos = np.empty(x_plt.shape + (2,))
          pos[:, :, 0] = x_plt 
Esempio n. 18
0
from em import EM
from dataset import Dataset
from reader import Reader
from nltk import word_tokenize

# d = []
# with open('in2.txt', 'r') as file:
#     lines = file.readlines()
#     for l in lines:
#         d.append(word_tokenize(l[:-1]))
#
# dataset = Dataset(d)
# em = EM(dataset, 2, 0.0000000000000001)
# em.do()

if __name__ == "__main__":
    [d,d_names] = Reader.read('../ROBOT/64')
    dataset = Dataset(d, d_names)
    lh = {}
    max = 0
    for i in range(5, 6):
        em = EM(dataset, i, 0.0000000000000001)
        em.do()
        # TODO zapis wyników
        lh[i] = em.likelyhood
        pass
    for l in lh:
        print(str(l) + ': ' + str(lh[l]))
    pass
Esempio n. 19
0
            turn_off_the_lights_validation_samples,
            turn_off_the_lights_samples, turn_off_the_lights_hmm, 11
        ])
    if not os.path.exists(what_time_is_it_hmm):
        training_list.append([
            what_time_is_it_validation_samples, what_time_is_it_samples,
            what_time_is_it_hmm, 10
        ])

    csv.write([
        "Trainee", "# of States", "Iteration", "Test Set", "Max", "Min",
        "Mean", "Std", "5%/95% Cutoff", "10%/90% Cutoff", "15%/85% Cutoff",
        "20%/80% Cutoff"
    ])

    em = EM()
    for item in training_list:
        for q in range(6, 40, 4):
            for iteration in range(0, 2):
                folder_name = os.path.split(item[1])[-1]
                hmm_path = "%s_%d_%d.hmm" % (os.path.splitext(
                    item[2])[0], q, iteration)
                if os.path.exists(hmm_path):
                    print("Skipping already trained %s for q=%d and i=%d..." %
                          (folder_name, q, iteration))
                    continue
                print("Training %s for q=%d and i=%d..." %
                      (folder_name, q, iteration))

                speech_hmm = em.build_hmm_from_folder(item[1],
                                                      q,
Esempio n. 20
0
import glob, os, sys, time, posix