コード例 #1
0
ファイル: network.py プロジェクト: ielhelw/MCMC_FOR_AMMSB
    def __init_test_set(self):
        """
        sample test set. we draw equal number of samples for 
        linked and non-linked edges
        """
        p = int(self.__held_out_size / 2)
        # sample p linked edges from the network
        while p > 0:
            # Because we already used some of the linked edges for held_out sets,
            # here we sample twice as much as links, and select among them, which
            # is likely to contain valid p linked edges.
            sampled_linked_edges = random.get("graph init").sample(
                self.__linked_edges, 2 * p)
            if self.__compatibility_mode:
                sampled_linked_edges = sorted(sampled_linked_edges)
            for edge in sampled_linked_edges:
                if p < 0:
                    print sys._getframe(
                    ).f_code.co_name + ": Are you sure p < 0 is a good idea?"
                    break
                # check whether it is already used in hold_out set
                if edge in self.__held_out_map or edge in self.__test_map:
                    continue
                else:
                    self.__test_map[edge] = True
                    self.__train_link_map[edge[0]].remove(edge[1])
                    self.__train_link_map[edge[1]].remove(edge[0])
                    p -= 1

        # sample p non-linked edges from the network
        p = int(self.__held_out_size / 2)
        while p > 0:
            edge = self.__sample_non_link_edge_for_test()
            self.__test_map[edge] = False
            p -= 1
コード例 #2
0
    def __update_phi(self, i, neighbors):
        '''
        update phi for current node i. 
        ''' 
        eps_t  = self.__a*((1 + self._step_count/self.__b)**-self.__c)
        phi_i_sum = np.sum(self.__phi[i])
    
        phi_star = copy.copy(self.__phi[i])                              # updated \phi
        phi_i_sum = np.sum(self.__phi[i])                                   
        grads = np.zeros(self._K)
        noise = random.get("phi update").randn(self._K)                                 # random noise. 

        for neighbor in neighbors:
            if neighbor == i:
                continue

            y_ab = 0            # observation
            edge = (min(i, neighbor), max(i, neighbor))
            if edge in self._network.get_linked_edges():
                y_ab = 1

            probs = np.empty(self._K)
            for k in range(0,self._K):
                probs[k] = self._beta[k] ** y_ab * (1 - self._beta[k]) ** (1 - y_ab) * self._pi[i][k] * self._pi[neighbor][k]
                probs[k] += self._epsilon ** y_ab * (1 - self._epsilon) ** (1 - y_ab) * self._pi[i][k] * (1 - self._pi[neighbor][k])

            prob_sum = np.sum(probs)
            for k in range(0,self._K):
                grads[k] += (probs[k] / prob_sum) / self.__phi[i][k] - 1.0 / phi_i_sum
        
        # update phi for node i
        for k in range(0, self._K):
            self.__phi[i][k] = abs(self.__phi[i][k] + eps_t / 2 * (self._alpha - self.__phi[i][k] + (self._N*1.0 / self.__num_node_sample) *grads[k]) + eps_t ** 0.5 * self.__phi[i][k] ** 0.5 *noise[k])
コード例 #3
0
    def __init__(self, args, graph, compatibility_mode):
        # call base class initialization
        Learner.__init__(self, args, graph, compatibility_mode)

        self._strategy = args.strategy

        self._interval = args.interval
        self.__num_pieces = graph.get_num_pieces()

        # step size parameters.
        # FIXME RFHH make SURE self.__b is initialized to a float. As published,
        # it is an int = 1024, which results in integer step_count / b so
        # eps_t always equals self.__a.
        self.__b = 1.0 * args.b
        self.__c = 1.0 * args.c
        if (args.a == 0.0):
            self.__a = pow(self.__b, -self.__c)
        else:
            self.__a = 1.0 * args.a

        # control parameters for learning
        #self.__num_node_sample = int(math.sqrt(self._network.get_num_nodes()))

        # TODO: automative update.....
        # self.__num_node_sample = int(self._N/50)
        self.__num_node_sample = args.num_node_sample
        # model parameters and re-parameterization
        # since the model parameter - \pi and \beta should stay in the simplex,
        # we need to restrict the sum of probability equals to 1.  The way we
        # restrict this is using re-reparameterization techniques, where we
        # introduce another set of variables, and update them first followed by
        # updating \pi and \beta.
        # self.__theta = random.gamma(100,0.01,(self._K, 2))      # parameterization for \beta
        self.__theta = random.get("theta init").gamma(
            self._eta[0], self._eta[1],
            (self._K, 2))  # parameterization for \beta
        self.__phi = random.get("phi init").gamma(
            1, 1, (self._N, self._K))  # parameterization for \pi

        # temp = self.__theta/np.sum(self.__theta,1)[:,np.newaxis]
        # self._beta = temp[:,1]
        # self._pi = self.__phi/np.sum(self.__phi,1)[:,np.newaxis]
        self.update_pi_from_phi()
        self.update_beta_from_theta()
コード例 #4
0
    def __init__(self, args, graph, compatibility_mode):
        # call base class initialization
        Learner.__init__(self, args, graph, compatibility_mode)

        self._strategy = args.strategy

        self._interval = args.interval
        self.__num_pieces = graph.get_num_pieces()
        
        # step size parameters. 
        # FIXME RFHH make SURE self.__b is initialized to a float. As published,
        # it is an int = 1024, which results in integer step_count / b so
        # eps_t always equals self.__a.
        self.__b = 1.0 * args.b
        self.__c = 1.0 * args.c
        if (args.a == 0.0):
            self.__a = pow(self.__b, -self.__c)
        else:
            self.__a = 1.0 * args.a
        
        # control parameters for learning
        #self.__num_node_sample = int(math.sqrt(self._network.get_num_nodes())) 
        
        # TODO: automative update.....
        # self.__num_node_sample = int(self._N/50)
        self.__num_node_sample = args.num_node_sample
        # model parameters and re-parameterization
        # since the model parameter - \pi and \beta should stay in the simplex, 
        # we need to restrict the sum of probability equals to 1.  The way we
        # restrict this is using re-reparameterization techniques, where we 
        # introduce another set of variables, and update them first followed by 
        # updating \pi and \beta.  
        # self.__theta = random.gamma(100,0.01,(self._K, 2))      # parameterization for \beta
        self.__theta = random.get("theta init").gamma(self._eta[0], self._eta[1], (self._K, 2))      # parameterization for \beta
        self.__phi = random.get("phi init").gamma(1,1,(self._N, self._K))       # parameterization for \pi
        
        # temp = self.__theta/np.sum(self.__theta,1)[:,np.newaxis]
        # self._beta = temp[:,1]
        # self._pi = self.__phi/np.sum(self.__phi,1)[:,np.newaxis]
        self.update_pi_from_phi()
        self.update_beta_from_theta()
コード例 #5
0
ファイル: network.py プロジェクト: ielhelw/MCMC_FOR_AMMSB
    def __sample_non_link_edge_for_test(self):
        """
        Sample one non-link edge for test set from the network. We first randomly generate one 
        edge, then check conditions. If that edge passes all the conditions, return that edge. 
        TODO prevent the infinit loop
        """
        while True:
            firstIdx = random.get("graph init").randint(0, self.__N - 1)
            secondIdx = random.get("graph init").randint(0, self.__N - 1)

            if (firstIdx == secondIdx):
                continue
            # ensure the first index is smaller than the second one.
            edge = (min(firstIdx, secondIdx), max(firstIdx, secondIdx))

            # check conditions:
            if edge in self.__linked_edges or edge in self.__held_out_map \
                                                    or edge in self.__test_map:
                continue

            return edge
コード例 #6
0
ファイル: network.py プロジェクト: ielhelw/MCMC_FOR_AMMSB
    def __sample_non_link_edge_for_held_out(self):
        '''
        sample one non-link edge for held out set from the network. We should make sure the edge is not 
        been used already, so we need to check the condition before we add it into 
        held out sets
        TODO: add condition for checking the infinit-loop
        '''
        while True:
            firstIdx = random.get("graph init").randint(0, self.__N - 1)
            secondIdx = random.get("graph init").randint(0, self.__N - 1)

            if (firstIdx == secondIdx):
                continue

            # ensure the first index is smaller than the second one.
            edge = (min(firstIdx, secondIdx), max(firstIdx, secondIdx))

            # check conditions.
            if edge in self.__linked_edges or edge in self.__held_out_map:
                continue

            return edge
コード例 #7
0
ファイル: network.py プロジェクト: ielhelw/MCMC_FOR_AMMSB
    def __random_edge_sampling(self, mini_batch_size):
        if mini_batch_size >= self.__num_total_edges - self.__held_out_size - len(
                self.__test_map):
            return self.__sample_full_training_set()

        mini_batch_set = Set()
        p = mini_batch_size
        while p > 0:
            sampled_linked_edges = random.get("minibatch sampler").sample(
                self.__linked_edges, mini_batch_size)
            for edge in sampled_linked_edges:
                if p < 0:
                    break
                if edge in self.__held_out_map or edge in self.__test_map or edge in mini_batch_set:
                    continue
                mini_batch_set.add(edge)
                p -= 1

        return (mini_batch_set, len(self.__linked_edges) / mini_batch_size)
コード例 #8
0
    def __update_phi(self, i, neighbors):
        '''
        update phi for current node i. 
        '''
        eps_t = self.__a * ((1 + self._step_count / self.__b)**-self.__c)
        phi_i_sum = np.sum(self.__phi[i])

        phi_star = copy.copy(self.__phi[i])  # updated \phi
        phi_i_sum = np.sum(self.__phi[i])
        grads = np.zeros(self._K)
        noise = random.get("phi update").randn(self._K)  # random noise.

        for neighbor in neighbors:
            if neighbor == i:
                continue

            y_ab = 0  # observation
            edge = (min(i, neighbor), max(i, neighbor))
            if edge in self._network.get_linked_edges():
                y_ab = 1

            probs = np.empty(self._K)
            for k in range(0, self._K):
                probs[k] = self._beta[k]**y_ab * (1 - self._beta[k])**(
                    1 - y_ab) * self._pi[i][k] * self._pi[neighbor][k]
                probs[k] += self._epsilon**y_ab * (1 - self._epsilon)**(
                    1 - y_ab) * self._pi[i][k] * (1 - self._pi[neighbor][k])

            prob_sum = np.sum(probs)
            for k in range(0, self._K):
                grads[k] += (probs[k] /
                             prob_sum) / self.__phi[i][k] - 1.0 / phi_i_sum

        # update phi for node i
        for k in range(0, self._K):
            self.__phi[i][k] = abs(
                self.__phi[i][k] + eps_t / 2 *
                (self._alpha - self.__phi[i][k] +
                 (self._N * 1.0 / self.__num_node_sample) * grads[k]) +
                eps_t**0.5 * self.__phi[i][k]**0.5 * noise[k])
コード例 #9
0
    def __update_beta(self, mini_batch, scale):
        '''
        update beta for mini_batch. 
        '''

        grads = np.zeros((self._K, 2))  # gradients K*2 dimension
        theta_sum = np.sum(self.__theta, 1)

        eps_t = self.__a * ((1 + self._step_count / self.__b)**-self.__c)

        for edge in mini_batch:
            y = 0
            if edge in self._network.get_linked_edges():
                y = 1

            i, j = edge
            probs = np.zeros(self._K)
            pi_sum = 0.0
            for k in range(0, self._K):
                pi_sum += self._pi[i][k] * self._pi[j][k]
                probs[k] = self._beta[k]**y * (1 - self._beta[k])**(
                    1 - y) * self._pi[i][k] * self._pi[j][k]

            prob_0 = self._epsilon**y * (1 - self._epsilon)**(1 - y) * (1 -
                                                                        pi_sum)
            prob_sum = np.sum(probs) + prob_0
            for k in range(0, self._K):
                grads[k][0] += (probs[k] / prob_sum) * (
                    abs(1 - y) / self.__theta[k][0] - 1 / theta_sum[k])
                grads[k][1] += (probs[k] / prob_sum) * (
                    abs(-y) / self.__theta[k][1] - 1 / theta_sum[k])

        # update theta
        noise = random.get("beta update").randn(self._K, 2)  # random noise.

        for k in range(0, self._K):
            for i in range(0, 2):
                self.__theta[k][i] = abs(self.__theta[k][i] + eps_t / 2.0 * (self._eta[i] - self.__theta[k][i] + \
                                    scale * grads[k][i]) + eps_t**.5*self.__theta[k][i] ** .5 * noise[k][i])
        self.update_beta_from_theta()
コード例 #10
0
ファイル: network.py プロジェクト: ielhelw/MCMC_FOR_AMMSB
    def __init_held_out_set(self):
        """
        Sample held out set. we draw equal number of 
        links and non-links from the whole graph. 
        """
        p = self.__held_out_size / 2

        # Sample p linked-edges from the network.
        if len(self.__linked_edges) < p:
            print "There are not enough linked edges that can sample from. \
                    please use smaller held out ratio."

        sampled_linked_edges = random.get("graph init").sample(
            self.__linked_edges, p)
        for edge in sampled_linked_edges:
            self.__held_out_map[edge] = True
            self.__train_link_map[edge[0]].remove(edge[1])
            self.__train_link_map[edge[1]].remove(edge[0])
        # print sampled_linked_edges

        if False:
            sys.stdout.write("Sampled part of held out set:\n")
            for m in sorted(sampled_linked_edges):
                a, b = m
                sys.stdout.write("(%d,%d) " % (a, b))
            sys.stdout.write("\n")

        # sample p non-linked edges from the network
        while p > 0:
            edge = self.__sample_non_link_edge_for_held_out()
            self.__held_out_map[edge] = False
            p -= 1

        if False:
            sys.stdout.write("Held out set:\n")
            for m in sorted(self.__held_out_map):
                a, b = m
                sys.stdout.write("(%d,%d) " % (a, b))
            sys.stdout.write("\n")
コード例 #11
0
    def __update_beta(self, mini_batch, scale):
        '''
        update beta for mini_batch. 
        '''
            
        grads = np.zeros((self._K, 2))                               # gradients K*2 dimension
        theta_sum = np.sum(self.__theta,1)                                 

        eps_t  = self.__a*((1 + self._step_count/self.__b)**-self.__c)
        
        for  edge in mini_batch:
            y = 0
            if edge in self._network.get_linked_edges():
                y = 1

            i, j = edge
            probs = np.zeros(self._K)
            pi_sum = 0.0
            for k in range(0,self._K):
                pi_sum += self._pi[i][k] * self._pi[j][k]
                probs[k] = self._beta[k] ** y * (1 - self._beta[k]) ** (1 - y) * self._pi[i][k] * self._pi[j][k]

            prob_0 = self._epsilon ** y * (1 - self._epsilon) ** (1 - y) * (1 - pi_sum)
            prob_sum = np.sum(probs) + prob_0
            for k in range(0,self._K):
                grads[k][0] += (probs[k] / prob_sum) * (abs(1-y)/self.__theta[k][0] - 1/ theta_sum[k])
                grads[k][1] += (probs[k] / prob_sum) * (abs(-y)/self.__theta[k][1] - 1/theta_sum[k])
        
        # update theta 
        noise = random.get("beta update").randn(self._K, 2)                          # random noise. 

        for k in range(0,self._K):
            for i in range(0,2):
                self.__theta[k][i] = abs(self.__theta[k][i] + eps_t / 2.0 * (self._eta[i] - self.__theta[k][i] + \
                                    scale * grads[k][i]) + eps_t**.5*self.__theta[k][i] ** .5 * noise[k][i])  
        self.update_beta_from_theta()
コード例 #12
0
ファイル: network.py プロジェクト: ielhelw/MCMC_FOR_AMMSB
    def __stratified_random_node_sampling(self, num_pieces):
        """
        stratified sampling approach gives more attention to link edges (the edge is connected by two
        nodes). The sampling process works like this: 
        a) randomly choose one node $i$ from all nodes (1,....N)
        b) decide to choose link edges or non-link edges with (50%, 50%) probability. 
        c) if we decide to sample link edge:
                return all the link edges for the chosen node $i$
           else 
                sample edges from all non-links edges for node $i$. The number of edges
                we sample equals to  number of all non-link edges / num_pieces
        """
        # randomly select the node ID
        nodeId = random.get("minibatch sampler").randint(0, self.__N - 1)
        # decide to sample links or non-links
        flag = random.get("minibatch sampler").randint(
            0, 1)  # flag=0: non-link edges  flag=1: link edges
        # sys.stderr.write ("Sample minibatch num_pieces %d minibatch size %d\n" % (num_pieces, (self.__N / self.__num_pieces)))

        mini_batch_set = Set()

        if flag == 0:
            """ sample non-link edges """
            # this is approximation, since the size of self.train_link_map[nodeId]
            # greatly smaller than N.
            mini_batch_size = int(self.__N / self.__num_pieces)
            p = mini_batch_size
            while p > 0:
                # because of the sparsity, when we sample $mini_batch_size*2$ nodes, the list likely
                # contains at least mini_batch_size valid nodes.
                nodeList = random.sample_range("minibatch sampler", self.__N,
                                               mini_batch_size * 2)
                for neighborId in nodeList:
                    if p < 0:
                        if False:
                            print sys._getframe(
                            ).f_code.co_name + ": Are you sure p < 0 is a good idea?"
                        break
                    if neighborId == nodeId:
                        continue
                    # check condition, and insert into mini_batch_set if it is valid.
                    edge = (min(nodeId, neighborId), max(nodeId, neighborId))
                    if edge in self.__linked_edges or edge in self.__held_out_map or \
                            edge in self.__test_map or edge in mini_batch_set:
                        # print "Discard edge " + str(edge)
                        continue

                    # add it into mini_batch_set
                    mini_batch_set.add(edge)
                    p -= 1

            print "A Create mini batch size " + str(
                len(mini_batch_set)) + " scale " + str(
                    self.__N * self.__num_pieces)
            # for e in mini_batch_set:
            #     sys.stdout.write("%s " % str(e))
            # sys.stdout.write("\n")

            return (mini_batch_set, self.__N * self.__num_pieces)

        else:
            """ sample linked edges """
            # return all linked edges
            # print "train_link_map[" + str(nodeId) + "] size " + str(len(self.__train_link_map[nodeId]))
            for neighborId in self.__train_link_map[nodeId]:
                mini_batch_set.add((min(nodeId,
                                        neighborId), max(nodeId, neighborId)))

            print "B Create mini batch size " + str(
                len(mini_batch_set)) + " scale " + str(self.__N)
            return (mini_batch_set, self.__N)