likelihood_evolu_e = np.zeros(
    (num_point * num_epoch, 2))  #np.zeros((num_epoch, 2))

# perO_evolu_i = np.zeros((num_batch, 2))
# perD_evolu_i = np.zeros((num_batch, 2))
# perT_evolu_i = np.zeros((num_batch, 2))

perO_evolu_e = np.zeros((num_point * num_epoch, 2))  #np.zeros((num_epoch, 2))
perD_evolu_e = np.zeros((num_point * num_epoch, 2))  #np.zeros((num_epoch, 2))
perT_evolu_e = np.zeros((num_point * num_epoch, 2))  #np.zeros((num_epoch, 2))

test_docs = docs.iloc[M:]

for lam_k, lam_v in enumerate(LAMBDA):

    MTRobot.sendtext(worker_idx, " Start Lambda: {}".format(lam_v))
    print(f'Start Lambda: {lam_v}')
    betaO = np.zeros([J, num_station])  #+ 1e-5
    betaD = np.zeros([K, num_station])  #+ 1e-5
    betaT = np.zeros([L, num_time])  #+ 1e-5
    theta = np.zeros((M, J, K, L))
    gamma = np.zeros((M, J, K, L))
    model_test = GR_TensorLDA(worker_idx, alpha, J, K, L, M, test_docs, iterEM,
                              EM_CONVERGED, EM_CONVERGED_fine_tune,
                              iterInference, VAR_CONVERGED)
    time_0 = int(time.time())
    # Begin for-loop-2 iterating e: num_epoch
    for e in range(num_epoch):
        MTRobot.sendtext(worker_idx, " Start {}-th epoch".format(e))
        _updatect = e * S
        model = GR_OnlineTensorLDA(worker_idx, alpha, num_station, num_time, J, K, L, M, S, \
    def fit(self, i, docs_minibatch, lam, mu, nu, G_net, G_poi, dictionary_o,
            dictionary_d, dictionary_t, idx_corpus_o, idx_corpus_d,
            idx_corpus_t, num_user, num_station, num_time):
        """ 
        the docs here is mini-batch docs as a moving window;
        and the fit function here is to fit learn each mini-batch;
        betaO, betaD, betaT: here are parameters learned from each mini-batch, with assuming the entire corpus at this moment is this mini-batch repeated num_batch times
        """
        i = i
        J = self.J
        K = self.K
        L = self.L
        alpha = self.alpha
        M = self.M  #num_user
        S = self.S
        num_batch = self.num_batch

        # sufficient statistic of alpha
        alphaSS = 0
        # the topic-word distribution (beta in D. Blei's paper)
        betaO = np.zeros([J, num_station])  #+ 1e-5
        betaD = np.zeros([K, num_station])  #+ 1e-5
        betaT = np.zeros([L, num_time])  #+ 1e-5
        # topic-word count, this is a sufficient statistic to calculate beta
        count_zwo = np.zeros(
            (J, num_station))  # sufficient statistic for beta^O
        count_zwd = np.zeros(
            (K, num_station))  # sufficient statistic for beta^D
        count_zwt = np.zeros((L, num_time))  # sufficient statistic for beta^T
        # topic count, sum of nzw with w ranging from [0, M-1], for calculating varphi
        count_zo = np.zeros(J)
        count_zd = np.zeros(K)
        count_zt = np.zeros(L)

        # inference parameter phi
        phiO = np.zeros([self.maxItemNum(S, docs_minibatch), J])
        phiD = np.zeros([self.maxItemNum(S, docs_minibatch), K])
        phiT = np.zeros([self.maxItemNum(S, docs_minibatch), L])

        #MTRobot.sendtext(self.worker_idx, " Start Lambda: {}".format(lam))
        #print(f'Start Lambda: {lam}')
        MTRobot.sendtext(self.worker_idx, " Start {}-th minibatch".format(i))
        # initial so that after this for loop program can still enter next while loop
        #i_em = 0
        betaO_norm_old = 0.1
        betaD_norm_old = 0.1
        betaT_norm_old = 0.1
        bool_beta_converge = False
        converged = -1
        likelihood_mb_old = 0.1
        # initialization of the model parameter varphi, the update of alpha is ommited
        count_zwo, count_zo, count_zwd, count_zd, count_zwt, count_zt, betaO, betaD, betaT = self.initialLdaModel(
            num_batch, num_station, num_time)

        # begin while #!!!
        #while (converged < 0 or converged > self.EM_CONVERGED or i_em <2 or bool_beta_converge == False) and i_em <= self.iterEM: # no need to run EM big loop until convergence; or we could set iterEM as small value like iterEM=3 #!!!
        #    iteration += 1
        #for i_em in range(iterEM):
        likelihood_mb = 0
        #MTRobot.sendtext(self.worker_idx, " -- Start EM interation: {}".format(i_em))
        #print(f'-- Start EM interation: {i_em}')
        count_zwo = np.zeros(
            (J, num_station))  # sufficient statistic for beta^O
        count_zwd = np.zeros(
            (K, num_station))  # sufficient statistic for beta^D
        count_zwt = np.zeros((L, num_time))  # sufficient statistic for beta^T
        count_zo = np.zeros(J)
        count_zd = np.zeros(K)
        count_zt = np.zeros(L)
        count_uzo = np.zeros((M, J))
        count_uzd = np.zeros((M, K))
        count_uzt = np.zeros((M, L))

        alphaSS = 0

        # iteration times of newton method # varies for each EM iteration because of
        # MAX_NT_ITER = 20#50 # 10
        NT_CONVERGED = 0.001  # since beta_w_g_norm magnitude is 0.7
        g_step = 0.001

        # E-Step
        #print("-start variational Inference E-step")
        #MTRobot.sendtext(self.worker_idx, " ---- E-step")
        #print(" ---- start variational Inference E-step")
        for s in range(S):
            u = i * S + s  # this tells us the s-th passenger in i-th mini-batch is actually he u-th passenger in the whole corpus #!!!
            #MTRobot.sendtext(self.worker_idx, "------Passenger{}".format(u))
            phiO, phiD, phiT, self.gamma, likelihood_s = self.variationalInference(
                i, docs_minibatch, s, u, self.gamma, phiO, phiD, phiT, betaO,
                betaD, betaT, idx_corpus_o, idx_corpus_d, idx_corpus_t)
            likelihood_mb += likelihood_s  #
            #converged = (likelihood_old - likelihood) / (likelihood_old);
            gammaSum = 0
            for j in range(J):
                for k in range(K):
                    for l in range(L):
                        gammaSum += self.gamma[u, j, k, l]
                        alphaSS += psi(self.gamma[u, j, k, l])
            alphaSS -= J * K * L * psi(gammaSum)

            # To update count_zwo, count_zo
            for wo in range(len(idx_corpus_o[u])):
                for j in range(J):
                    count_zwo[j, idx_corpus_o[u][wo]] += phiO[
                        wo,
                        j]  # count_zwo[j, bow_corpus_o[u][wo][0]] += bow_corpus_o[u][wo][1] * phiO[wo, j] # nzw[z][docs[d].itemIdList[w]] += docs[d].itemCountList[w] * phi[w, z]
                    count_zo[j] += phiO[
                        wo, j]  # nz[z] += docs[d].itemCountList[w] * phi[w, z]
                    count_uzo[u, j] += phiO[wo, j]

            # To update count_zwd, count_zd
            for wd in range(len(idx_corpus_d[u])):
                for k in range(K):
                    count_zwd[k, idx_corpus_d[u][wd]] += phiD[wd, k]
                    count_zd[k] += phiD[wd, k]
                    count_uzd[u, k] += phiD[wd, k]

            # To update count_zwo, count_zo
            for wt in range(len(idx_corpus_t[u])):
                for l in range(L):
                    count_zwt[l, idx_corpus_t[u][wt]] += phiT[wt, l]
                    count_zt[l] += phiT[wt, l]
                    count_uzt[u, l] += phiT[wt, l]

            # To update theta_u
            for j in range(J):
                for k in range(K):
                    for l in range(L):
                        self.theta[u, j, k, l] = sum(
                            phiO[w, j] * phiD[w, k] * phiT[w, l]
                            for w in range(
                                int(docs_minibatch.iloc[s]['wordcount'])))
            self.theta[u, :, :, :] = self.theta[u, :, :, :] / sum(
                sum(sum(self.theta[u, :, :, :])))
            #theta[u, :, :, :] = gamma[u, :, :, :] / sum(sum(sum(gamma[u, :, :, :])))
        time_lkh = int(time.time(
        ))  #record the time when likelihood for this mini-batch is done

        converged = (likelihood_mb_old - likelihood_mb) / (likelihood_mb_old)
        likelihood_mb_old = likelihood_mb

        # M-Step
        #print("---- start variational Inference M-step")
        #MTRobot.sendtext(self.worker_idx, " ---- M-step")
        if converged < self.EM_CONVERGED_fine_tune and converged > 0:  # start fine tune for beta when EM algorithm stabilizes
            # MAX_NT_ITER = 2 * MAX_NT_ITER
            NT_CONVERGED = 0.5 * NT_CONVERGED

        # Update betaO
        #betaO = update_beta(count_zwo, count_zo)
        #MTRobot.sendtext(self.worker_idx, " ------ Origin ")
        betaO_no_g = self.update_beta(
            num_batch, count_zwo, count_zo
        )  # betaO, gradient, hessian = update_beta_w_graph(lam, count_zwo, count_zo, mu, G_net, G_poi) # update_beta(count_zwo, count_zo)
        betaO, gradientO, hessianO = self.update_beta_w_graph(
            num_batch, num_station, lam, betaO_no_g, mu, G_net, G_poi,
            NT_CONVERGED, g_step)
        #MTRobot.sendtext(worker_idx, " ------ End Origin ")

        # Update betaD
        #betaD = update_beta(count_zwd, count_zd)
        #MTRobot.sendtext(self.worker_idx, " ------ Destination ")
        betaD_no_g = self.update_beta(num_batch, count_zwd, count_zd)
        betaD, gradientD, hessianD = self.update_beta_w_graph(
            num_batch, num_station, lam, betaD_no_g, nu, G_net, G_poi,
            NT_CONVERGED, g_step)

        # Update betaT
        betaT = self.update_beta(num_batch, count_zwt, count_zt)

        betaO_norm = np.linalg.norm(np.exp(betaO))
        betaD_norm = np.linalg.norm(np.exp(betaD))
        betaT_norm = np.linalg.norm(np.exp(betaT))

        # check for convergence
        bool_beta_converge = self.converge_paras(
            betaO_norm,
            betaD_norm,
            betaT_norm,
            betaO_norm_old,
            betaD_norm_old,
            betaT_norm_old,
            PARA_CONVERGED=0.0015)  # beta_norm magnitude: 0.7

        # update old parameters for next EM-iteration
        betaO_norm_old = betaO_norm
        betaD_norm_old = betaD_norm
        betaT_norm_old = betaT_norm

        #MTRobot.sendtext(self.worker_idx, f'End EM Iter {i_em} -- Likelihood: {likelihood_mb:.5f}   Converged: {converged:.5f}')
        #MTRobot.sendtext(self.worker_idx, f'betaO: {betaO_norm:.5f}   betaD: {betaD_norm:.5f}   betaT: {betaT_norm:.5f}')
        #print(f'End EM Iter {i_em} -- Likelihood: {likelihood:.5f}   Converged: {converged:.5f}')
        #print(f'betaO: {betaO_norm:.5f}   betaD: {betaD_norm:.5f}   betaT: {betaT_norm:.5f}')

        #i_em = i_em +1
        # End while #!!!

        rhos = pow(self._tau0 + self._updatect, -self._kappa)
        self._rhos = rhos

        betaO_exp = np.exp(betaO)
        betaD_exp = np.exp(betaD)
        betaT_exp = np.exp(betaT)

        betaO_o_exp = np.exp(self.betaO_o)
        betaD_o_exp = np.exp(self.betaD_o)
        betaT_o_exp = np.exp(self.betaT_o)

        betaO_o_exp = (1 - rhos) * betaO_o_exp + rhos * betaO_exp
        betaD_o_exp = (1 - rhos) * betaD_o_exp + rhos * betaD_exp
        betaT_o_exp = (1 - rhos) * betaT_o_exp + rhos * betaT_exp

        betaO_o_exp[betaO_o_exp <= 0] = 1e-2  # to aviod non-feasible value
        betaD_o_exp[betaD_o_exp <= 0] = 1e-2  # to aviod non-feasible value
        betaT_o_exp[betaT_o_exp <= 0] = 1e-2  # to aviod non-feasible value

        betaO_o_exp = normalize(betaO_o_exp, norm='l1')
        betaD_o_exp = normalize(betaD_o_exp, norm='l1')
        betaT_o_exp = normalize(betaT_o_exp, norm='l1')

        self.betaO_o = np.log(betaO_o_exp)
        self.betaD_o = np.log(betaD_o_exp)
        self.betaT_o = np.log(betaT_o_exp)

        # self.betaO_o = (1-rhos) * self.betaO_o + rhos * betaO
        # self.betaD_o = (1-rhos) * self.betaD_o + rhos * betaD
        # self.betaT_o = (1-rhos) * self.betaT_o + rhos * betaT

        self._updatect += 1

        return count_uzo, count_uzd, count_uzt, self.betaO_o, self.betaD_o, self.betaT_o, self.gamma, self.theta, likelihood_mb, time_lkh
Ejemplo n.º 3
0
                            phiO[w, j] * phiD[w, k] * phiT[w, l]
                            for w in range(int(docs.iloc[u]['wordcount'])))
            theta[u, :, :, :] = theta[u, :, :, :] / sum(
                sum(sum(theta[u, :, :, :])))
            #theta[u, :, :, :] = gamma[u, :, :, :] / sum(sum(sum(gamma[u, :, :, :])))

        converged = (likelihood_old - likelihood) / (likelihood_old)
        likelihood_old = likelihood
        # M-Step
        print("---- start variational Inference M-step")
        #MTRobot.sendtext(worker_idx, " ---- start variational Inference M-step")

        # Update betaO
        #betaO = update_beta(count_zwo, count_zo)

        MTRobot.sendtext(worker_idx, " ------ Origin ")
        if converged < EM_CONVERGED_fine_tune and converged > 0:  # start fine tune for beta when EM algorithm stabilizes
            # MAX_NT_ITER = 2 * MAX_NT_ITER
            NT_CONVERGED = 0.5 * NT_CONVERGED
        betaO_no_g = update_beta(
            count_zwo, count_zo
        )  # betaO, gradient, hessian = update_beta_w_graph(lam, count_zwo, count_zo, mu, G_net, G_poi) # update_beta(count_zwo, count_zo)
        betaO, gradientO, hessianO = update_beta_w_graph(
            lam_v, betaO_no_g, mu, G_net, G_poi)
        #MTRobot.sendtext(worker_idx, " ------ End Origin ")

        # Update betaD
        betaD = update_beta(count_zwd, count_zd)

        # Update betaT
        betaT = update_beta(count_zwt, count_zt)