def base_model_fit(self): self.base_walker = DeepWalker(self.graph, self.args) print("\nDoing base random walks.\n") self.base_walker.create_features() print("\nLearning the base model.\n") self.base_node_embedding = self.base_walker.learn_base_embedding() print("\nDeleting the base walker.\n") del self.base_walker
def create_split(self): """ Creating an EgoNetSplitter. """ self.egonet_splitter = EgoNetSplitter(self.graph) self.persona_walker = DeepWalker(self.egonet_splitter.persona_graph, self.args) print("\nDoing persona random walks.\n") self.persona_walker.create_features() self.create_noises()
def __init__(self, args, graph): print("\nPerforming Node2vec...\n") # 1. generate walker walker = DeepWalker(args, graph) print("\nDoing deepwalks...\n") walker.create_features() self.inputFileName = "{}{}-deepwalk_{}-num_walks_{}-len_metapath.txt".format( args.input_path, args.idx_metapath, args.number_of_walks, args.walk_length) # 2. read data self.data = DataReader(args.min_count, args.care_type, self.inputFileName) # 3. make dataset for training dataset = DatasetLoader(self.data, args.window_size) # 4. initialize dataloader self.dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=dataset.collate) self.output_file_name = "{}{}-embedding_{}-deepwalk_{}-dim_{}-initial_lr_{}-window_size_{}-iterations_{}-min_count.pickle".format( args.output_path, args.idx_embed, args.idx_metapath, args.dim, args.initial_lr, args.window_size, args.iterations, args.min_count) self.emb_size = len(self.data.word2id) self.emb_dimension = args.dim self.batch_size = args.batch_size self.iterations = args.iterations self.initial_lr = args.initial_lr self.skip_gram_model = SkipGramModel(self.emb_size, self.emb_dimension) self.use_cuda = torch.cuda.is_available() self.device = torch.device("cuda" if self.use_cuda else "cpu") if self.use_cuda: self.skip_gram_model.cuda()
class SplitterTrainer(object): """ Class for training a Splitter. """ def __init__(self, graph, args): """ :param graph: NetworkX graph object. :param args: Arguments object. """ self.graph = graph self.args = args self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') def create_noises(self): """ Creating node noise distribution for negative sampling. """ self.downsampled_degrees = {node: int(1+self.egonet_splitter.persona_graph.degree(node)**0.75) for node in self.egonet_splitter.persona_graph.nodes()} self.noises = [k for k,v in self.downsampled_degrees.items() for i in range(v)] def base_model_fit(self): """ Fitting DeepWalk on base model. """ self.base_walker = DeepWalker(self.graph, self.args) print("\nDoing base random walks.\n") self.base_walker.create_features() print("\nLearning the base model.\n") self.base_node_embedding = self.base_walker.learn_base_embedding() print("\nDeleting the base walker.\n") del self.base_walker def create_split(self): """ Creating an EgoNetSplitter. """ self.egonet_splitter = EgoNetSplitter(self.graph) self.persona_walker = DeepWalker(self.egonet_splitter.persona_graph, self.args) print("\nDoing persona random walks.\n") self.persona_walker.create_features() self.create_noises() def setup_model(self): """ Creating a model and doing a transfer to GPU. """ base_node_count = self.graph.number_of_nodes() persona_node_count = self.egonet_splitter.persona_graph.number_of_nodes() self.model = Splitter(self.args, base_node_count, persona_node_count) self.model.create_weights() self.model.initialize_weights(self.base_node_embedding, self.egonet_splitter.personality_map) self.model = self.model.to(self.device) def transfer_batch(self, source_nodes, context_nodes, targets, persona_nodes, pure_source_nodes): """ Transfering the batch to GPU. """ self.sources = torch.LongTensor(source_nodes).to(self.device) self.contexts = torch.LongTensor(context_nodes).to(self.device) self.targets = torch.FloatTensor(targets).to(self.device) self.personas = torch.LongTensor(persona_nodes).to(self.device) self.pure_sources = torch.LongTensor(pure_source_nodes).to(self.device) def optimize(self): """ Doing a weight update. """ loss = self.model(self.sources, self.contexts, self.targets, self.personas, self.pure_sources) loss.backward() self.optimizer.step() self.optimizer.zero_grad() return loss.item() def process_walk(self, walk): """ Process random walk (source, context) pairs. Sample negative instances and create persona node list. :param walk: Random walk sequence. """ left_nodes = [walk[i] for i in range(len(walk)-self.args.window_size) for j in range(1, self.args.window_size+1)] right_nodes = [walk[i+j] for i in range(len(walk)-self.args.window_size) for j in range(1, self.args.window_size+1)] node_pair_count = len(left_nodes) source_nodes = left_nodes + right_nodes context_nodes = right_nodes + left_nodes persona_nodes = np.array([self.egonet_splitter.personality_map[source_node] for source_node in source_nodes]) pure_source_nodes = np.array(source_nodes) source_nodes = np.array((self.args.negative_samples+1)*source_nodes) context_nodes = np.concatenate((np.array(context_nodes), np.random.choice(self.noises,node_pair_count*2*self.args.negative_samples))) positives = [1.0 for node in range(node_pair_count*2)] negatives = [0.0 for node in range(node_pair_count*self.args.negative_samples*2)] targets = np.array(positives + negatives) self.transfer_batch(source_nodes, context_nodes, targets, persona_nodes, pure_source_nodes) def update_average_loss(self, loss_score): """ Updating the average loss and the description of the time remains bar. :param loss_score: Loss on the sample. """ self.cummulative_loss = self.cummulative_loss + loss_score self.steps = self.steps + 1 average_loss = self.cummulative_loss/self.steps self.walk_steps.set_description("Splitter (Loss=%g)" % round(average_loss,4)) def reset_average_loss(self, step): """ Doing a reset on the average loss. :param step: Current number of walks processed. """ if step % 100 == 0: self.cummulative_loss = 0 self.steps = 0 def fit(self): """ Fitting a model. """ self.base_model_fit() self.create_split() self.setup_model() self.model.train() self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.args.learning_rate) self.optimizer.zero_grad() print("\nLearning the joint model.\n") random.shuffle(self.persona_walker.paths) self.walk_steps = trange(len(self.persona_walker.paths), desc="Loss") for step in self.walk_steps: self.reset_average_loss(step) walk = self.persona_walker.paths[step] self.process_walk(walk) loss_score = self.optimize() self.update_average_loss(loss_score) def save_embedding(self): """ Saving the node embedding. """ print("\n\nSaving the model.\n") nodes = [node for node in self.egonet_splitter.persona_graph.nodes()] nodes.sort() nodes = torch.LongTensor(nodes).to(self.device) self.embedding = self.model.node_embedding(nodes).cpu().detach().numpy() embedding_header = ["id"] + ["x_" + str(x) for x in range(self.args.dimensions)] self.embedding = np.concatenate([np.array(range(self.embedding.shape[0])).reshape(-1,1),self.embedding],axis=1) self.embedding = pd.DataFrame(self.embedding, columns = embedding_header) self.embedding.to_csv(self.args.embedding_output_path, index = None) def save_persona_graph_mapping(self): """ Saving the persona map. """ with open(self.args.persona_output_path, "w") as f: json.dump(self.egonet_splitter.personality_map, f)
class SplitterTrainer(object): """ Class for training a Splitter. """ def __init__(self, graph, args): """ :param graph: NetworkX graph object. :param args: Arguments object. """ self.graph = graph self.args = args self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') def base_model_fit(self): """ Fitting DeepWalk on base model. """ self.base_walker = DeepWalker(self.graph, self.args) print("\nDoing base random walks.\n") self.base_walker.create_features() print("\nLearning the base model.\n") self.base_node_embedding = self.base_walker.learn_base_embedding() print("\nDeleting the base walker.\n") del self.base_walker def create_split(self): """ Creating an EgoNetSplitter. """ self.egonet_splitter = EgoNetSplitter(self.graph) self.persona_walker = DeepWalker(self.egonet_splitter.persona_graph, self.args) print("\nDoing persona random walks.\n") self.persona_walker.create_features() def setup_model(self): """ Creating a model and doing a transfer to GPU. """ base_node_count = self.graph.number_of_nodes() persona_node_count = self.egonet_splitter.persona_graph.number_of_nodes( ) self.model = Splitter(self.args, base_node_count, persona_node_count) self.model.create_weights() self.model.initialize_weights(self.base_node_embedding, self.egonet_splitter.personality_map) self.model = self.model.to(self.device) def reset_node_sets(self): """ Resetting the node sets. """ self.pure_sources = [] self.personas = [] self.sources = [] self.contexts = [] self.targets = [] def create_batch(self, source_node, context_node): """ Augmenting a batch of data. :param source_node: A source node. :param context_node: A target to predict. """ self.pure_sources = self.pure_sources + [source_node] self.personas = self.personas + [ self.egonet_splitter.personality_map[source_node] ] self.sources = self.sources + [source_node ] * (self.args.negative_samples + 1) self.contexts = self.contexts + [context_node] + random.sample( self.egonet_splitter.persona_graph.nodes(), self.args.negative_samples) self.targets = self.targets + [1.0 ] + [0.0] * self.args.negative_samples def transfer_batch(self): """ Transfering the batch to GPU. """ self.sources = torch.LongTensor(self.sources).to(self.device) self.contexts = torch.LongTensor(self.contexts).to(self.device) self.targets = torch.FloatTensor(self.targets).to(self.device) self.personas = torch.LongTensor(self.personas).to(self.device) self.pure_sources = torch.LongTensor(self.pure_sources).to(self.device) def optimize(self): """ Doing a weight update. """ loss = self.model(self.sources, self.contexts, self.targets, self.personas, self.pure_sources) loss.backward() self.optimizer.step() self.optimizer.zero_grad() self.reset_node_sets() return loss.item() def fit(self): """ Fitting a model. """ self.reset_node_sets() self.base_model_fit() self.create_split() self.setup_model() self.model.train() self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.args.learning_rate) self.optimizer.zero_grad() print("\nLearning the joint model.\n") random.shuffle(self.persona_walker.paths) self.steps = 0 self.losses = 0 self.walk_steps = trange(len(self.persona_walker.paths), desc="Loss") for step in self.walk_steps: if step % 1000 == 0: self.steps = 0 self.losses = 0 walk = self.persona_walker.paths[step] for i in range(self.args.walk_length - self.args.window_size): for j in range(1, self.args.window_size + 1): source_node = walk[i] context_node = walk[i + j] self.create_batch(source_node, context_node) for i in range(self.args.window_size, self.args.walk_length): for j in range(1, self.args.window_size + 1): source_node = walk[i] context_node = walk[i - j] self.create_batch(source_node, context_node) self.transfer_batch() self.losses = self.losses + self.optimize() self.steps = self.steps + 1 average_loss = self.losses / self.steps self.walk_steps.set_description("Splitter (Loss=%g)" % round(average_loss, 4)) def save_embedding(self): """ Saving the node embedding. """ print("\n\nSaving the model.\n") nodes = torch.LongTensor([ node for node in self.egonet_splitter.persona_graph.nodes() ]).to(self.device) self.embedding = self.model.node_embedding( nodes).cpu().detach().numpy() embedding_header = ["id"] + [ "x_" + str(x) for x in range(self.args.dimensions) ] self.embedding = np.concatenate([ np.array(range(self.embedding.shape[0])).reshape(-1, 1), self.embedding ], axis=1) self.embedding = pd.DataFrame(self.embedding, columns=embedding_header) self.embedding.to_csv(self.args.embedding_output_path, index=None) def save_persona_graph_mapping(self): """ Saving the persona map. """ with open(self.args.persona_output_path, "w") as f: json.dump(self.egonet_splitter.personality_map, f)