def create_split(self): """ Creating an EgoNetSplitter. """ self.egonet_splitter = EgoNetSplitter() self.egonet_splitter.fit(self.graph) self.persona_walker = DeepWalker(self.egonet_splitter.persona_graph, self.args) print("\nDoing persona random walks.\n") self.persona_walker.create_features() self.create_noises()
class SplitterTrainer(object): """ Class for training a Splitter. """ def __init__(self, graph, args): """ :param graph: NetworkX graph object. :param args: Arguments object. """ self.graph = graph self.args = args self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') def create_noises(self): """ Creating node noise distribution for negative sampling. """ self.downsampled_degrees = {} for n in self.egonet_splitter.persona_graph.nodes(): self.downsampled_degrees[n] = int( 1 + self.egonet_splitter.persona_graph.degree(n)**0.75) self.noises = [ k for k, v in self.downsampled_degrees.items() for i in range(v) ] def base_model_fit(self): """ Fitting DeepWalk on base model. """ self.base_walker = DeepWalker(self.graph, self.args) print("\nDoing base random walks.\n") self.base_walker.create_features() print("\nLearning the base model.\n") self.base_node_embedding = self.base_walker.learn_base_embedding() print("\nDeleting the base walker.\n") del self.base_walker def create_split(self): """ Creating an EgoNetSplitter. """ self.egonet_splitter = EgoNetSplitter() self.egonet_splitter.fit(self.graph) self.persona_walker = DeepWalker(self.egonet_splitter.persona_graph, self.args) print("\nDoing persona random walks.\n") self.persona_walker.create_features() self.create_noises() def setup_model(self): """ Creating a model and doing a transfer to GPU. """ base_node_count = self.graph.number_of_nodes() persona_node_count = self.egonet_splitter.persona_graph.number_of_nodes( ) self.model = Splitter(self.args, base_node_count, persona_node_count) self.model.create_weights() self.model.initialize_weights(self.base_node_embedding, self.egonet_splitter.personality_map) self.model = self.model.to(self.device) def transfer_batch(self, source_nodes, context_nodes, targets, persona_nodes, pure_source_nodes): """ Transfering the batch to GPU. """ self.sources = torch.LongTensor(source_nodes).to(self.device) self.contexts = torch.LongTensor(context_nodes).to(self.device) self.targets = torch.FloatTensor(targets).to(self.device) self.personas = torch.LongTensor(persona_nodes).to(self.device) self.pure_sources = torch.LongTensor(pure_source_nodes).to(self.device) def optimize(self): """ Doing a weight update. """ loss = self.model(self.sources, self.contexts, self.targets, self.personas, self.pure_sources) loss.backward() self.optimizer.step() self.optimizer.zero_grad() return loss.item() def process_walk(self, walk): """ Process random walk (source, context) pairs. Sample negative instances and create persona node list. :param walk: Random walk sequence. """ left_nodes = [ walk[i] for i in range(len(walk) - self.args.window_size) for j in range(1, self.args.window_size + 1) ] right_nodes = [ walk[i + j] for i in range(len(walk) - self.args.window_size) for j in range(1, self.args.window_size + 1) ] node_pair_count = len(left_nodes) source_nodes = left_nodes + right_nodes context_nodes = right_nodes + left_nodes persona_nodes = np.array([ self.egonet_splitter.personality_map[source_node] for source_node in source_nodes ]) pure_source_nodes = np.array(source_nodes) source_nodes = np.array( (self.args.negative_samples + 1) * source_nodes) noises = np.random.choice( self.noises, node_pair_count * 2 * self.args.negative_samples) context_nodes = np.concatenate((np.array(context_nodes), noises)) positives = [1.0 for node in range(node_pair_count * 2)] negatives = [ 0.0 for node in range(node_pair_count * self.args.negative_samples * 2) ] targets = np.array(positives + negatives) self.transfer_batch(source_nodes, context_nodes, targets, persona_nodes, pure_source_nodes) def update_average_loss(self, loss_score): """ Updating the average loss and the description of the time remains bar. :param loss_score: Loss on the sample. """ self.cummulative_loss = self.cummulative_loss + loss_score self.steps = self.steps + 1 average_loss = self.cummulative_loss / self.steps self.walk_steps.set_description("Splitter (Loss=%g)" % round(average_loss, 4)) def reset_average_loss(self, step): """ Doing a reset on the average loss. :param step: Current number of walks processed. """ if step % 100 == 0: self.cummulative_loss = 0 self.steps = 0 def fit(self): """ Fitting a model. """ self.base_model_fit() self.create_split() self.setup_model() self.model.train() self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.args.learning_rate) self.optimizer.zero_grad() print("\nLearning the joint model.\n") random.shuffle(self.persona_walker.paths) self.walk_steps = trange(len(self.persona_walker.paths), desc="Loss") for step in self.walk_steps: self.reset_average_loss(step) walk = self.persona_walker.paths[step] self.process_walk(walk) loss_score = self.optimize() self.update_average_loss(loss_score) def save_embedding(self): """ Saving the node embedding. """ print("\n\nSaving the model.\n") nodes = [node for node in self.egonet_splitter.persona_graph.nodes()] nodes.sort() nodes = torch.LongTensor(nodes).to(self.device) embedding = self.model.node_embedding(nodes).cpu().detach().numpy() embedding_header = ["id"] + [ "x_" + str(x) for x in range(self.args.dimensions) ] embedding = [ np.array(range(embedding.shape[0])).reshape(-1, 1), embedding ] embedding = np.concatenate(embedding, axis=1) embedding = pd.DataFrame(embedding, columns=embedding_header) embedding.to_csv(self.args.embedding_output_path, index=None) def save_persona_graph_mapping(self): """ Saving the persona map. """ with open(self.args.persona_output_path, "w") as f: json.dump(self.egonet_splitter.personality_map, f)