Esempio n. 1
0
def test_walker_custom(line_graph):
    walker = CustomWalker()
    sampler = UnsupervisedSampler(line_graph, walker=walker)
    batches = sampler.run(2)

    assert len(batches) == line_graph.number_of_nodes()

    # all positive examples should be self loops, since we defined our custom walker this way
    for context_pairs, labels in batches:
        for node, neighbour in context_pairs[labels == 1]:
            assert node == neighbour
Esempio n. 2
0
    def test_run_batch_sizes(self, line_graph):
        batch_size = 4
        sampler = UnsupervisedSampler(G=line_graph, length=2, number_of_walks=2)
        batches = sampler.run(batch_size)

        # check batch sizes
        assert len(batches) == np.ceil(len(line_graph.nodes()) * 4 / batch_size)
        for ids, labels in batches[:-1]:
            assert len(ids) == len(labels) == batch_size

        # last batch can be smaller
        ids, labels = batches[-1]
        assert len(ids) == len(labels)
        assert len(ids) <= batch_size
Esempio n. 3
0
def test_ignored_param_warning(line_graph):
    walker = UniformRandomWalk(line_graph, n=2, length=3)
    with pytest.raises(ValueError,
                       match="cannot specify both 'walker' and 'length'"):
        UnsupervisedSampler(line_graph, walker=walker, length=5)

    with pytest.raises(
            ValueError,
            match="cannot specify both 'walker' and 'number_of_walks'"):
        UnsupervisedSampler(line_graph, walker=walker, number_of_walks=5)

    with pytest.raises(ValueError,
                       match="cannot specify both 'walker' and 'seed'"):
        UnsupervisedSampler(line_graph, walker=walker, seed=1)
Esempio n. 4
0
def test_walker_uniform_random(line_graph):
    length = 3
    number_of_walks = 2
    batch_size = 4

    walker = UniformRandomWalk(line_graph, n=number_of_walks, length=length)
    sampler = UnsupervisedSampler(line_graph, walker=walker)

    batches = sampler.run(batch_size)

    # batches should match the parameters used to create the walker object, instead of the defaults
    # for UnsupervisedSampler
    expected_num_batches = np.ceil(line_graph.number_of_nodes() *
                                   number_of_walks * (length - 1) * 2 /
                                   batch_size)
    assert len(batches) == expected_num_batches
    def test_generator_multiple_batches(self):

        n_feat = 4
        batch_size = 4
        number_of_batches = 3

        G = example_Graph_2(n_feat)

        sampler = UnsupervisedSampler(G=G)

        sample_gen = sampler.generator(batch_size)

        batches = []
        for batch in range(number_of_batches):
            batches.append(next(sample_gen))

        assert len(batches) == number_of_batches
    def test_UnsupervisedSampler_parameter(self):

        g = create_test_graph()
        # rw = UniformRandomWalk(StellarGraph(g))

        # if no graph is provided
        with pytest.raises(ValueError):
            UnsupervisedSampler(G=None)

        # graph has to be a Stellargraph object
        with pytest.raises(ValueError):
            UnsupervisedSampler(G=g)

        g = StellarGraph(g)
        """
        # only Uniform random walk is supported at the moment
        with pytest.raises(TypeError):
            UnsupervisedSampler(G=g, walker="any random walker")

        # if no walker is provided, default to Uniform Random Walk
        sampler = UnsupervisedSampler(G=g)
        assert isinstance(sampler.walker, UniformRandomWalk)

        """

        # walk must have length strictly greater than 1
        with pytest.raises(ValueError):
            UnsupervisedSampler(G=g, length=1)

        # at least 1 walk from each root node
        with pytest.raises(ValueError):
            UnsupervisedSampler(G=g, number_of_walks=0)

        # nodes nodes parameter should be an iterable of node IDs
        with pytest.raises(ValueError):
            UnsupervisedSampler(G=g, nodes=1)

        # if no root nodes are provided for sampling defaulting to using all nodes as root nodes
        sampler = UnsupervisedSampler(G=g, nodes=None)
        assert sampler.nodes == list(g.nodes())

        # if the seed value is provided check
        # that the random choices is reproducable
        sampler = UnsupervisedSampler(G=g, seed=1)
        assert sampler.random.choices(range(100), k=10) == [
            13,
            84,
            76,
            25,
            49,
            44,
            65,
            78,
            9,
            2,
        ]
Esempio n. 7
0
    def test_run_context_pairs(self, line_graph):
        batch_size = 4
        sampler = UnsupervisedSampler(G=line_graph, length=2, number_of_walks=2)
        batches = sampler.run(batch_size)

        grouped_by_target = defaultdict(list)

        for ids, labels in batches:
            for (target, context), label in zip(ids, labels):
                grouped_by_target[target].append((context, label))

        assert len(grouped_by_target) == len(line_graph.nodes())

        for target, sampled in grouped_by_target.items():
            # exactly 2 positive and 2 negative context pairs for each target node
            assert len(sampled) == 4

            # since each walk has length = 2, there must be an edge between each positive context pair
            for context, label in sampled:
                if label == 1:
                    assert context in set(line_graph.neighbors(target))
Esempio n. 8
0
    def test_UnsupervisedSampler_parameter(self, line_graph):

        # if no graph is provided
        with pytest.raises(ValueError):
            UnsupervisedSampler(G=None)

        # walk must have length strictly greater than 1
        with pytest.raises(ValueError):
            UnsupervisedSampler(G=line_graph, length=1)

        # at least 1 walk from each root node
        with pytest.raises(ValueError):
            UnsupervisedSampler(G=line_graph, number_of_walks=0)

        # nodes nodes parameter should be an iterable of node IDs
        with pytest.raises(ValueError):
            UnsupervisedSampler(G=line_graph, nodes=1)

        # if no root nodes are provided for sampling defaulting to using all nodes as root nodes
        sampler = UnsupervisedSampler(G=line_graph, nodes=None)
        assert sampler.nodes == list(line_graph.nodes())
    def test_generator_samples(self):

        n_feat = 4
        batch_size = 4

        G = example_Graph_2(n_feat)

        sampler = UnsupervisedSampler(G=G)

        sample_gen = sampler.generator(batch_size)

        samples = next(sample_gen)

        # return two lists: [(target,context)] pairs and [1/0] binary labels
        assert len(samples) == 2

        # each (target, context) pair has a matching label
        assert len(samples[0]) == len(samples[1])

        # batch-size number of samples are returned if batch_size is even
        assert len(samples[0]) == batch_size
Esempio n. 10
0
def unsup_gs(
    g,
    num_samples,
    optimizer,
    batch_size=4,
    epochs=4,
    bias=True,
    dropout=0.0,
    normalize="l2",
    number_of_walks=1,
    walk_length=5,
    seed=0,
    shuffle=True,
):
    set_seed(seed)
    tf.random.set_seed(seed)
    if shuffle:
        random.seed(seed)

    nodes = list(g.nodes())
    unsupervised_samples = UnsupervisedSampler(g,
                                               nodes=nodes,
                                               length=walk_length,
                                               number_of_walks=number_of_walks)
    generator = GraphSAGELinkGenerator(g, batch_size, num_samples)
    train_gen = generator.flow(unsupervised_samples)

    model = unsup_gs_model(num_samples, generator, optimizer, bias, dropout,
                           normalize)

    model.fit(
        train_gen,
        epochs=epochs,
        verbose=1,
        use_multiprocessing=False,
        workers=4,
        shuffle=shuffle,
    )
    return model
Esempio n. 11
0
    def test_UnsupervisedSampler_parameter(self, line_graph):

        # if no graph is provided
        with pytest.raises(ValueError):
            UnsupervisedSampler(G=None)

        # walk must have length strictly greater than 1
        with pytest.raises(ValueError):
            UnsupervisedSampler(G=line_graph, length=1)

        # at least 1 walk from each root node
        with pytest.raises(ValueError):
            UnsupervisedSampler(G=line_graph, number_of_walks=0)

        # nodes nodes parameter should be an iterable of node IDs
        with pytest.raises(ValueError):
            UnsupervisedSampler(G=line_graph, nodes=1)

        # if no root nodes are provided for sampling defaulting to using all nodes as root nodes
        sampler = UnsupervisedSampler(G=line_graph, nodes=None)
        assert sampler.nodes == list(line_graph.nodes())

        # if the seed value is provided check
        # that the random choices is reproducable
        sampler = UnsupervisedSampler(G=line_graph, seed=1)
        assert sampler.random.choices(range(100), k=10) == [
            13,
            84,
            76,
            25,
            49,
            44,
            65,
            78,
            9,
            2,
        ]
    def test_generator_parameter(self):

        g = create_test_graph()
        g = StellarGraph(g)
        # rw = UniformRandomWalk(g)
        sampler = UnsupervisedSampler(G=g)

        # generator should be provided with a valid batch size. i.e. an integer >=1

        sample_gen = sampler.generator(batch_size=None)
        with pytest.raises(ValueError):
            next(sample_gen)

        sample_gen = sampler.generator(batch_size="x")
        with pytest.raises(TypeError):
            next(sample_gen)

        sample_gen = sampler.generator(batch_size=0)
        with pytest.raises(ValueError):
            next(sample_gen)

        sample_gen = sampler.generator(batch_size=3)
        with pytest.raises(ValueError):
            next(sample_gen)