Ejemplo n.º 1
0
 def initializeMu(self):
     self.mu = []
     for e in range(self.numObjects):
         mu_e = [(self.gamma[e][v] - 1.0)
                 for v in range(len(self.candidates[e]))]
         #mu_e = [log(float(self.value_counts[candName]))+2 for candName,_ in self.candidates[e]]
         normalize1d(mu_e)
         self.mu.append(mu_e)
     self.initializeMuNumDen()
Ejemplo n.º 2
0
    def EstepQuality(self, claimsByObjI, quality, estepParam, pop=False):
        for e in range(self.numObjects):
            numValues = len(self.candidates[e])
            if self.skip_single_cand and self.single_cand[e]:
                continue
            for i, (s, vs) in enumerate(claimsByObjI[e]):
                ptmp = estepParam[e][i]
                ptmp.fill(0.0)

                if self.descendant_ancestor_relationship[e]:
                    node_vs = self.matching_nodes[e][vs]
                    for v in range(numValues):
                        if vs == v:
                            ptmp[0] = self.mu[e][v]
                        elif pop:
                            node_v = self.matching_nodes[e][v]
                            if node_vs.isAncestorof(node_v):
                                ptmp[1] += (self.mu[e][v] *
                                            self.pop_weight1[e][v][vs])
                            else:
                                ptmp[2] += (self.mu[e][v] *
                                            self.pop_weight2[e][v][vs])
                        else:
                            node_v = self.matching_nodes[e][v]
                            if node_vs.isAncestorof(node_v):
                                ptmp[1] += (self.mu[e][v] /
                                            self.count_parents[e][v])
                            else:
                                ptmp[2] += (self.mu[e][v] /
                                            (self.count_values[e] -
                                             self.count_parents[e][v] - 1))

                else:
                    for v in range(numValues):
                        if vs == v:
                            ptmp[0] = self.mu[e][v]
                            ptmp[1] = self.mu[e][v]
                        elif pop:
                            ptmp[2] += (self.mu[e][v] *
                                        self.pop_weight2[e][v][vs])
                        else:
                            ptmp[2] += (self.mu[e][v] /
                                        (self.count_values[e] -
                                         self.count_parents[e][v] - 1))

                for k in range(3):
                    ptmp[k] *= quality[s][k]

                normalize1d(ptmp)
Ejemplo n.º 3
0
    def initializePopularity(self):
        self.pop_weight1 = []
        self.pop_weight2 = []
        for e in range(self.numObjects):
            numClaims = float(
                len(self.claimsByObjI[e]) + len(self.answersByObjI[e]))
            numValues = len(self.candidates[e])
            pw1 = np.zeros((numValues, numValues))
            pw2 = np.zeros((numValues, numValues))

            for v in range(numValues):
                node_v = self.matching_nodes[e][v]
                case1 = False
                case2 = False
                for i, (s, vs) in enumerate(self.claimsByObjI[e]):
                    if vs == v:
                        continue
                    node_vs = self.matching_nodes[e][vs]

                    if node_vs.isAncestorof(node_v):
                        pw1[v][vs] += 1.0
                        case1 = True
                    else:
                        pw2[v][vs] += 1.0
                        case2 = True

                if case1:
                    normalize1d(pw1[v])
                if case2:
                    normalize1d(pw2[v])

            self.pop_weight1.append(pw1)
            self.pop_weight2.append(pw2)

        popularity = []
        for e in range(self.numObjects):
            numClaims = float(
                len(self.claimsByObjI[e]) + len(self.answersByObjI[e]))
            pg = np.zeros(len(self.candidates[e]))
            for i, cand in enumerate(self.candidates[e]):
                pg[i] = cand[1] / numClaims

            popularity.append(pg)
        self.popularity = popularity
Ejemplo n.º 4
0
    def MstepQuality(self, quality, prior, claimsByObjI, estepPrams,
                     numSources, pop):
        numStates = 3

        if self.regularization:
            for s in range(numSources):
                for b in range(numStates):
                    quality[s][b] = prior[s][b] - 1.0

        for e in range(self.numObjects):
            numValues = len(self.candidates[e])
            if self.skip_single_cand and self.single_cand[e]:
                continue

            for i, (s, vs) in enumerate(claimsByObjI[e]):
                for b in range(numStates):
                    quality[s][b] += estepPrams[e][i][b]

        for s in range(numSources):

            normalize1d(quality[s])
Ejemplo n.º 5
0
    def initializeConfidences(self):
        for s in range(self.numSources):
            self.phis[s][0] = 0.5
            self.phis[s][1] = 0.3
            self.phis[s][2] = 0.2
            #'''
            self.phis[s][0] = np.random.uniform(0.6, 0.7)
            self.phis[s][1] = np.random.uniform(0.2, 0.3)
            self.phis[s][2] = np.random.uniform(0.1, 0.2)
            #'''

            normalize1d(self.phis[s])

        #print(self.phis)

        for w in range(self.numWorkers):
            self.psiw[w][0] = np.random.uniform(0.65, 0.75)
            self.psiw[w][1] = np.random.uniform(0.15, 0.25)
            self.psiw[w][2] = np.random.uniform(0.15, 0.25)

            normalize1d(self.psiw[w])
Ejemplo n.º 6
0
    def regiWorkers(self, numNewWorkers):
        super(TDH_meta, self).regiWorkers(numNewWorkers)
        self.initPriors()

        maxs = -1
        maxconf = 0.0
        for s in range(self.numSources):
            if self.phis[s][0] > maxconf:
                maxconf = self.phis[s][0]
                maxs = s

        for w in range(numNewWorkers):
            pw = [0.0] * self.states_wrk

            pw[0] = np.random.uniform(0.7, 0.8)
            pw[1] = np.random.uniform(0.2, 0.3)
            pw[2] = np.random.uniform(0.1, 0.2)
            if self.states_wrk > 3:
                print("num state err")

            normalize1d(pw)

            self.psiw.append(pw)
Ejemplo n.º 7
0
    def get_estimated_distribution_matrix(self, w):
        answered = set([answer[0] for answer in self.answersByWorkerI[w]])

        estimated_distribution_list = []
        for e in range(self.numObjects):
            if e in answered:
                continue

            estmated_distribution = np.zeros(len(self.mu[e]))
            numCand = len(estmated_distribution)
            if numCand == 1:
                continue

            #step 1
            p_at_rev = np.zeros((numCand, numCand))
            p_a = np.zeros((numCand))
            for t in range(numCand):
                node_t = self.matching_nodes[e][t]
                if self.descendant_ancestor_relationship[e]:
                    for a in range(numCand):
                        if a == t:
                            p_tmp = self.psiw[w][0]
                        elif self.popWrk:
                            node_a = self.matching_nodes[e][a]
                            if node_a.isAncestorof(node_t):
                                p_tmp = self.psiw[w][1] * self.pop_weight1[e][
                                    t][a]
                            else:
                                p_tmp = self.psiw[w][2] * self.pop_weight2[e][
                                    t][a]
                        else:
                            node_a = self.matching_nodes[e][a]
                            if node_a.isAncestorof(node_t):
                                p_tmp = self.psiw[w][1] / (
                                    self.count_parents[e][t])
                            else:
                                p_tmp = self.psiw[w][2] / (
                                    self.count_values[e] -
                                    self.count_parents[e][t] - 1)
                        p_at_rev[t][a] = p_tmp

                else:
                    for a in range(numCand):
                        if a == t:
                            p_tmp = self.psiw[w][0] + self.psiw[w][1]
                        elif self.popWrk:
                            p_tmp = self.psiw[w][2] * self.pop_weight2[e][t][a]
                        else:
                            p_tmp = self.psiw[w][2] / (
                                self.count_values[e] -
                                self.count_parents[e][t] - 1)
                        p_at_rev[t][a] = p_tmp

                normalize1d(p_at_rev[t])
                for a in range(numCand):
                    p_a[a] += p_at_rev[t][a] * self.mu[e][t]

            if sum(p_a) > 1.01 or sum(p_a) < 0.99:
                print("sum")

            li = np.random.choice(range(numCand), p=p_a)

            #step 2
            for t in range(numCand):
                estmated_distribution[t] = self.mu[e][t] * p_at_rev[li][t]

            normalize1d(estmated_distribution)
            estimated_distribution_list.append((e, estmated_distribution))
        return estimated_distribution_list