def test_walker_custom(line_graph): walker = CustomWalker() sampler = UnsupervisedSampler(line_graph, walker=walker) batches = sampler.run(2) assert len(batches) == line_graph.number_of_nodes() # all positive examples should be self loops, since we defined our custom walker this way for context_pairs, labels in batches: for node, neighbour in context_pairs[labels == 1]: assert node == neighbour
def test_run_batch_sizes(self, line_graph): batch_size = 4 sampler = UnsupervisedSampler(G=line_graph, length=2, number_of_walks=2) batches = sampler.run(batch_size) # check batch sizes assert len(batches) == np.ceil(len(line_graph.nodes()) * 4 / batch_size) for ids, labels in batches[:-1]: assert len(ids) == len(labels) == batch_size # last batch can be smaller ids, labels = batches[-1] assert len(ids) == len(labels) assert len(ids) <= batch_size
def test_ignored_param_warning(line_graph): walker = UniformRandomWalk(line_graph, n=2, length=3) with pytest.raises(ValueError, match="cannot specify both 'walker' and 'length'"): UnsupervisedSampler(line_graph, walker=walker, length=5) with pytest.raises( ValueError, match="cannot specify both 'walker' and 'number_of_walks'"): UnsupervisedSampler(line_graph, walker=walker, number_of_walks=5) with pytest.raises(ValueError, match="cannot specify both 'walker' and 'seed'"): UnsupervisedSampler(line_graph, walker=walker, seed=1)
def test_walker_uniform_random(line_graph): length = 3 number_of_walks = 2 batch_size = 4 walker = UniformRandomWalk(line_graph, n=number_of_walks, length=length) sampler = UnsupervisedSampler(line_graph, walker=walker) batches = sampler.run(batch_size) # batches should match the parameters used to create the walker object, instead of the defaults # for UnsupervisedSampler expected_num_batches = np.ceil(line_graph.number_of_nodes() * number_of_walks * (length - 1) * 2 / batch_size) assert len(batches) == expected_num_batches
def test_generator_multiple_batches(self): n_feat = 4 batch_size = 4 number_of_batches = 3 G = example_Graph_2(n_feat) sampler = UnsupervisedSampler(G=G) sample_gen = sampler.generator(batch_size) batches = [] for batch in range(number_of_batches): batches.append(next(sample_gen)) assert len(batches) == number_of_batches
def test_UnsupervisedSampler_parameter(self): g = create_test_graph() # rw = UniformRandomWalk(StellarGraph(g)) # if no graph is provided with pytest.raises(ValueError): UnsupervisedSampler(G=None) # graph has to be a Stellargraph object with pytest.raises(ValueError): UnsupervisedSampler(G=g) g = StellarGraph(g) """ # only Uniform random walk is supported at the moment with pytest.raises(TypeError): UnsupervisedSampler(G=g, walker="any random walker") # if no walker is provided, default to Uniform Random Walk sampler = UnsupervisedSampler(G=g) assert isinstance(sampler.walker, UniformRandomWalk) """ # walk must have length strictly greater than 1 with pytest.raises(ValueError): UnsupervisedSampler(G=g, length=1) # at least 1 walk from each root node with pytest.raises(ValueError): UnsupervisedSampler(G=g, number_of_walks=0) # nodes nodes parameter should be an iterable of node IDs with pytest.raises(ValueError): UnsupervisedSampler(G=g, nodes=1) # if no root nodes are provided for sampling defaulting to using all nodes as root nodes sampler = UnsupervisedSampler(G=g, nodes=None) assert sampler.nodes == list(g.nodes()) # if the seed value is provided check # that the random choices is reproducable sampler = UnsupervisedSampler(G=g, seed=1) assert sampler.random.choices(range(100), k=10) == [ 13, 84, 76, 25, 49, 44, 65, 78, 9, 2, ]
def test_run_context_pairs(self, line_graph): batch_size = 4 sampler = UnsupervisedSampler(G=line_graph, length=2, number_of_walks=2) batches = sampler.run(batch_size) grouped_by_target = defaultdict(list) for ids, labels in batches: for (target, context), label in zip(ids, labels): grouped_by_target[target].append((context, label)) assert len(grouped_by_target) == len(line_graph.nodes()) for target, sampled in grouped_by_target.items(): # exactly 2 positive and 2 negative context pairs for each target node assert len(sampled) == 4 # since each walk has length = 2, there must be an edge between each positive context pair for context, label in sampled: if label == 1: assert context in set(line_graph.neighbors(target))
def test_UnsupervisedSampler_parameter(self, line_graph): # if no graph is provided with pytest.raises(ValueError): UnsupervisedSampler(G=None) # walk must have length strictly greater than 1 with pytest.raises(ValueError): UnsupervisedSampler(G=line_graph, length=1) # at least 1 walk from each root node with pytest.raises(ValueError): UnsupervisedSampler(G=line_graph, number_of_walks=0) # nodes nodes parameter should be an iterable of node IDs with pytest.raises(ValueError): UnsupervisedSampler(G=line_graph, nodes=1) # if no root nodes are provided for sampling defaulting to using all nodes as root nodes sampler = UnsupervisedSampler(G=line_graph, nodes=None) assert sampler.nodes == list(line_graph.nodes())
def test_generator_samples(self): n_feat = 4 batch_size = 4 G = example_Graph_2(n_feat) sampler = UnsupervisedSampler(G=G) sample_gen = sampler.generator(batch_size) samples = next(sample_gen) # return two lists: [(target,context)] pairs and [1/0] binary labels assert len(samples) == 2 # each (target, context) pair has a matching label assert len(samples[0]) == len(samples[1]) # batch-size number of samples are returned if batch_size is even assert len(samples[0]) == batch_size
def unsup_gs( g, num_samples, optimizer, batch_size=4, epochs=4, bias=True, dropout=0.0, normalize="l2", number_of_walks=1, walk_length=5, seed=0, shuffle=True, ): set_seed(seed) tf.random.set_seed(seed) if shuffle: random.seed(seed) nodes = list(g.nodes()) unsupervised_samples = UnsupervisedSampler(g, nodes=nodes, length=walk_length, number_of_walks=number_of_walks) generator = GraphSAGELinkGenerator(g, batch_size, num_samples) train_gen = generator.flow(unsupervised_samples) model = unsup_gs_model(num_samples, generator, optimizer, bias, dropout, normalize) model.fit( train_gen, epochs=epochs, verbose=1, use_multiprocessing=False, workers=4, shuffle=shuffle, ) return model
def test_UnsupervisedSampler_parameter(self, line_graph): # if no graph is provided with pytest.raises(ValueError): UnsupervisedSampler(G=None) # walk must have length strictly greater than 1 with pytest.raises(ValueError): UnsupervisedSampler(G=line_graph, length=1) # at least 1 walk from each root node with pytest.raises(ValueError): UnsupervisedSampler(G=line_graph, number_of_walks=0) # nodes nodes parameter should be an iterable of node IDs with pytest.raises(ValueError): UnsupervisedSampler(G=line_graph, nodes=1) # if no root nodes are provided for sampling defaulting to using all nodes as root nodes sampler = UnsupervisedSampler(G=line_graph, nodes=None) assert sampler.nodes == list(line_graph.nodes()) # if the seed value is provided check # that the random choices is reproducable sampler = UnsupervisedSampler(G=line_graph, seed=1) assert sampler.random.choices(range(100), k=10) == [ 13, 84, 76, 25, 49, 44, 65, 78, 9, 2, ]
def test_generator_parameter(self): g = create_test_graph() g = StellarGraph(g) # rw = UniformRandomWalk(g) sampler = UnsupervisedSampler(G=g) # generator should be provided with a valid batch size. i.e. an integer >=1 sample_gen = sampler.generator(batch_size=None) with pytest.raises(ValueError): next(sample_gen) sample_gen = sampler.generator(batch_size="x") with pytest.raises(TypeError): next(sample_gen) sample_gen = sampler.generator(batch_size=0) with pytest.raises(ValueError): next(sample_gen) sample_gen = sampler.generator(batch_size=3) with pytest.raises(ValueError): next(sample_gen)