Ejemplo n.º 1
0
    def test_one_clutter_after(self):
        paper_ind = random.randint(0, len(paper_titles) - 1)
        junk_ind = random.randint(0, len(junk) - 1)

        first = paper_titles[paper_ind]
        second = paper_titles[paper_ind] + junk[junk_ind]
        common = set(first).intersection(set(second))

        self.assertTrue(arrays_contain_same_reference(first, second, common))

        # First paper has junk rather than second
        first = paper_titles[paper_ind] + junk[junk_ind]
        second = paper_titles[paper_ind]
        common = set(first).intersection(set(second))

        self.assertTrue(arrays_contain_same_reference(first, second, common))
Ejemplo n.º 2
0
    def test_one_clutter_before(self):
        paper_ind = random.randint(0, len(paper_titles) - 1)
        junk_ind = random.randint(0, len(junk) - 1)

        first = paper_titles[paper_ind]
        second = junk[junk_ind] + paper_titles[paper_ind]
        common = set(first).intersection(set(second))

        self.assertTrue(arrays_contain_same_reference(first, second, common))

        # check it works if they are the other way round
        first = junk[junk_ind] + paper_titles[paper_ind]
        second = paper_titles[paper_ind]
        common = set(first).intersection(set(second))

        self.assertTrue(arrays_contain_same_reference(first, second, common))
Ejemplo n.º 3
0
    def test_noclutter_similar(self):
        similar_ind = random.randint(0, len(paper_title_similar_pairs) - 1)

        first = paper_title_similar_pairs[similar_ind][0]
        second = paper_title_similar_pairs[similar_ind][1]
        common = set(first).intersection(set(second))

        self.assertFalse(arrays_contain_same_reference(first, second, common))
Ejemplo n.º 4
0
    def test_both_clutter_after_repeated_different(self):
        # Artificially construct something so that the overlap is in the junk
        paper = ["this", "is", "the", "paper", "title"]
        junk_first = ["this", "here", "junk", "words"]
        junk_second = ["yet", "more", "stuff", "junk"]
        first = paper + junk_first
        second = paper + junk_second
        common = set(first).intersection(set(second))

        self.assertTrue(arrays_contain_same_reference(first, second, common))
Ejemplo n.º 5
0
    def test_noclutter_cutoff(self):
        # What do we want to be the case here? If we remove the last word in one
        # of the titles then it's sort of like the other one has junk at the
        # end?
        first = paper_titles[0]
        second = paper_titles[1][:-1]
        common = set(first).intersection(set(second))

        # Go with a more inclusive assumption for now...
        self.assertTrue(arrays_contain_same_reference(first, second, common))
Ejemplo n.º 6
0
    def test_both_sameauthor_difftitle_before(self):

        first = paper_title_similar_pairs[0][0]
        second = paper_title_similar_pairs[0][1]
        authors = ["john", "baker", "jill", "smith", "bea", "zisserman"]

        first = authors + first
        second = authors + second
        common = set(first).intersection(set(second))

        self.assertTrue(arrays_contain_same_reference(first, second, common))
Ejemplo n.º 7
0
    def test_both_clutter_after(self):
        paper_ind = random.randint(0, len(paper_titles) - 1)
        junk_first_ind = random.randint(0, len(junk) - 1)
        junk_second_ind = random.randint(0, len(junk) - 1)

        while junk_first_ind == junk_second_ind:
            junk_second_ind = random.randint(0, len(junk) - 1)

        first = paper_titles[paper_ind] + junk[junk_first_ind]
        second = paper_titles[paper_ind] + junk[junk_second_ind]
        common = set(first).intersection(set(second))

        self.assertTrue(arrays_contain_same_reference(first, second, common))
Ejemplo n.º 8
0
    def test_one_clutter_after_repeatword(self):
        # use clutter which contains a word which exists in the paper title
        paper = [
            'whats', 'point', 'semantic', 'segmentation', 'with', 'point',
            'supervision'
        ]
        junk = [
            '3rd', 'workshop', 'semantic', 'perception', 'mapping',
            'exploration', 'spme'
        ]

        first = paper
        second = paper + junk
        common = set(first).intersection(set(second))

        self.assertTrue(arrays_contain_same_reference(first, second, common))

        first = paper + junk
        second = paper
        common = set(first).intersection(set(second))

        self.assertTrue(arrays_contain_same_reference(first, second, common))
Ejemplo n.º 9
0
    def test_one_clutter_before_repeatword(self):
        paper = [
            'deeplab', 'semantic', 'image', 'segmentation', 'with', 'deep',
            'convolutional', 'nets', 'atrous', 'convolution', 'fully',
            'connected', 'crfs'
        ]
        junk = [
            'navab', 'hornegger', 'wells', 'frangi', 'eds', 'medical', 'image',
            'computing', 'computer-assisted', 'intervention', 'miccai', '2015'
        ]

        first = paper
        second = junk + paper
        common = set(first).intersection(set(second))

        self.assertTrue(arrays_contain_same_reference(first, second, common))
Ejemplo n.º 10
0
    def test_one_clutter_before_after_repeatword_after(self):
        paper = [
            'deeplab', 'semantic', 'image', 'segmentation', 'with', 'deep',
            'convolutional', 'nets', 'atrous', 'convolution', 'fully',
            'connected', 'crfs'
        ]
        junk_before = [
            'torch7', 'matlab-like', 'environment', 'machine', 'learning',
            'biglearn', 'nips', 'workshop', 'dai', 'sun', '2015'
        ]
        junk_after = [
            '3rd', 'workshop', 'semantic', 'perception', 'mapping',
            'exploration', 'spme'
        ]

        first = paper
        second = junk_before + paper + junk_after
        common = set(first).intersection(set(second))

        self.assertTrue(arrays_contain_same_reference(first, second, common))
Ejemplo n.º 11
0
    def test_one_clutter_before_after_repeatword_before(self):
        paper = [
            'deeplab', 'semantic', 'image', 'segmentation', 'with', 'deep',
            'convolutional', 'nets', 'atrous', 'convolution', 'fully',
            'connected', 'crfs'
        ]
        junk_before = [
            'navab', 'hornegger', 'wells', 'frangi', 'eds', 'medical', 'image',
            'computing', 'computer-assisted', 'intervention', 'miccai', '2015'
        ]
        junk_after = [
            'volume', 'jmlr', 'proceedings', 'pages', '195-206', 'jmlrorg',
            '2012', 'karen', 'simonyan', 'andrew', 'zisserman'
        ]

        first = paper
        second = junk_before + paper + junk_after
        common = set(first).intersection(set(second))

        self.assertTrue(arrays_contain_same_reference(first, second, common))
Ejemplo n.º 12
0
    def test_noclutter_invalid(self):
        first = paper_titles[0]
        second = paper_titles[1]
        common = set(first).intersection(set(second))

        self.assertFalse(arrays_contain_same_reference(first, second, common))
Ejemplo n.º 13
0
 def test_noclutter(self):
     for ind, title in enumerate(paper_titles):
         common = set(title).intersection(set(title))
         self.assertTrue(arrays_contain_same_reference(
             title, title, common))