Ejemplo n.º 1
0
 def create_split(self):
     """
     Creating an EgoNetSplitter.
     """
     self.egonet_splitter = EgoNetSplitter()
     self.egonet_splitter.fit(self.graph)
     self.persona_walker = DeepWalker(self.egonet_splitter.persona_graph,
                                      self.args)
     print("\nDoing persona random walks.\n")
     self.persona_walker.create_features()
     self.create_noises()
Ejemplo n.º 2
0
class SplitterTrainer(object):
    """
    Class for training a Splitter.
    """
    def __init__(self, graph, args):
        """
        :param graph: NetworkX graph object.
        :param args: Arguments object.
        """
        self.graph = graph
        self.args = args
        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')

    def create_noises(self):
        """
        Creating node noise distribution for negative sampling.
        """
        self.downsampled_degrees = {}
        for n in self.egonet_splitter.persona_graph.nodes():
            self.downsampled_degrees[n] = int(
                1 + self.egonet_splitter.persona_graph.degree(n)**0.75)
        self.noises = [
            k for k, v in self.downsampled_degrees.items() for i in range(v)
        ]

    def base_model_fit(self):
        """
        Fitting DeepWalk on base model.
        """
        self.base_walker = DeepWalker(self.graph, self.args)
        print("\nDoing base random walks.\n")
        self.base_walker.create_features()
        print("\nLearning the base model.\n")
        self.base_node_embedding = self.base_walker.learn_base_embedding()
        print("\nDeleting the base walker.\n")
        del self.base_walker

    def create_split(self):
        """
        Creating an EgoNetSplitter.
        """
        self.egonet_splitter = EgoNetSplitter()
        self.egonet_splitter.fit(self.graph)
        self.persona_walker = DeepWalker(self.egonet_splitter.persona_graph,
                                         self.args)
        print("\nDoing persona random walks.\n")
        self.persona_walker.create_features()
        self.create_noises()

    def setup_model(self):
        """
        Creating a model and doing a transfer to GPU.
        """
        base_node_count = self.graph.number_of_nodes()
        persona_node_count = self.egonet_splitter.persona_graph.number_of_nodes(
        )
        self.model = Splitter(self.args, base_node_count, persona_node_count)
        self.model.create_weights()
        self.model.initialize_weights(self.base_node_embedding,
                                      self.egonet_splitter.personality_map)
        self.model = self.model.to(self.device)

    def transfer_batch(self, source_nodes, context_nodes, targets,
                       persona_nodes, pure_source_nodes):
        """
        Transfering the batch to GPU.
        """
        self.sources = torch.LongTensor(source_nodes).to(self.device)
        self.contexts = torch.LongTensor(context_nodes).to(self.device)
        self.targets = torch.FloatTensor(targets).to(self.device)
        self.personas = torch.LongTensor(persona_nodes).to(self.device)
        self.pure_sources = torch.LongTensor(pure_source_nodes).to(self.device)

    def optimize(self):
        """
        Doing a weight update.
        """
        loss = self.model(self.sources, self.contexts, self.targets,
                          self.personas, self.pure_sources)
        loss.backward()
        self.optimizer.step()
        self.optimizer.zero_grad()
        return loss.item()

    def process_walk(self, walk):
        """
        Process random walk (source, context) pairs.
        Sample negative instances and create persona node list.
        :param walk: Random walk sequence.
        """
        left_nodes = [
            walk[i] for i in range(len(walk) - self.args.window_size)
            for j in range(1, self.args.window_size + 1)
        ]
        right_nodes = [
            walk[i + j] for i in range(len(walk) - self.args.window_size)
            for j in range(1, self.args.window_size + 1)
        ]
        node_pair_count = len(left_nodes)
        source_nodes = left_nodes + right_nodes
        context_nodes = right_nodes + left_nodes
        persona_nodes = np.array([
            self.egonet_splitter.personality_map[source_node]
            for source_node in source_nodes
        ])
        pure_source_nodes = np.array(source_nodes)
        source_nodes = np.array(
            (self.args.negative_samples + 1) * source_nodes)
        noises = np.random.choice(
            self.noises, node_pair_count * 2 * self.args.negative_samples)
        context_nodes = np.concatenate((np.array(context_nodes), noises))
        positives = [1.0 for node in range(node_pair_count * 2)]
        negatives = [
            0.0
            for node in range(node_pair_count * self.args.negative_samples * 2)
        ]
        targets = np.array(positives + negatives)
        self.transfer_batch(source_nodes, context_nodes, targets,
                            persona_nodes, pure_source_nodes)

    def update_average_loss(self, loss_score):
        """
        Updating the average loss and the description of the time remains bar.
        :param loss_score: Loss on the sample.
        """
        self.cummulative_loss = self.cummulative_loss + loss_score
        self.steps = self.steps + 1
        average_loss = self.cummulative_loss / self.steps
        self.walk_steps.set_description("Splitter (Loss=%g)" %
                                        round(average_loss, 4))

    def reset_average_loss(self, step):
        """
        Doing a reset on the average loss.
        :param step: Current number of walks processed.
        """
        if step % 100 == 0:
            self.cummulative_loss = 0
            self.steps = 0

    def fit(self):
        """
        Fitting a model.
        """
        self.base_model_fit()
        self.create_split()
        self.setup_model()
        self.model.train()
        self.optimizer = torch.optim.Adam(self.model.parameters(),
                                          lr=self.args.learning_rate)
        self.optimizer.zero_grad()
        print("\nLearning the joint model.\n")
        random.shuffle(self.persona_walker.paths)
        self.walk_steps = trange(len(self.persona_walker.paths), desc="Loss")
        for step in self.walk_steps:
            self.reset_average_loss(step)
            walk = self.persona_walker.paths[step]
            self.process_walk(walk)
            loss_score = self.optimize()
            self.update_average_loss(loss_score)

    def save_embedding(self):
        """
        Saving the node embedding.
        """
        print("\n\nSaving the model.\n")
        nodes = [node for node in self.egonet_splitter.persona_graph.nodes()]
        nodes.sort()
        nodes = torch.LongTensor(nodes).to(self.device)
        embedding = self.model.node_embedding(nodes).cpu().detach().numpy()
        embedding_header = ["id"] + [
            "x_" + str(x) for x in range(self.args.dimensions)
        ]
        embedding = [
            np.array(range(embedding.shape[0])).reshape(-1, 1), embedding
        ]
        embedding = np.concatenate(embedding, axis=1)
        embedding = pd.DataFrame(embedding, columns=embedding_header)
        embedding.to_csv(self.args.embedding_output_path, index=None)

    def save_persona_graph_mapping(self):
        """
        Saving the persona map.
        """
        with open(self.args.persona_output_path, "w") as f:
            json.dump(self.egonet_splitter.personality_map, f)