Example #1
0
 def test_all_pairs_equality(self):
     '''Test if parallel and sequential version of all_pairs deliver equal results'''
     p = all_pairs(self.catalogue).tolist()
     pp = all_pairs(self.catalogue, parallel=True).tolist()
     np.testing.assert_array_equal(p, pp)
Example #2
0
 def test_all_pairs_unicode(self):
     '''Test all_pairs with strange unicode characters'''
     all_pairs(['ae', u'\xc3\xa4', 'ss', u'\xc3\x9f'])
Example #3
0
 def test_all_pairs_sequential(self):
     '''Test if sequential version of all_pairs works'''
     p = all_pairs(self.catalogue)
     np.testing.assert_array_equal(p, self.all_pairs)
Example #4
0
 def test_all_pairs_parallel(self):
     '''Test if parallel version of all_pairs works'''
     p = all_pairs(self.catalogue, parallel=True)
     np.testing.assert_array_equal(p, self.all_pairs)
Example #5
0
        ("http://www.gutenberg.org/cache/epub/5323/pg5323.txt",   "Effi Bri."),
        ("http://www.gutenberg.org/files/26686/26686-0.txt",      "Birnbaum"),
        ("http://www.gutenberg.org/files/21593/21593-0.txt",      "Urteil"),
        ("http://www.gutenberg.org/cache/epub/22367/pg22367.txt", "Verwand.")]

# get it from the interwebs
catalogue = []

for url, name in urls:
    headers = {'User-Agent': 'Mozilla/5.0'}
    req = urllib2.Request(url, None, headers)
    catalogue.append(urllib2.urlopen(req).read())

# calc similarity matrix
M = all_pairs(catalogue,
    distance=distances.jaccard,
    dist_kwargs=dict(mode=1),
    parallel=True)

# plot similarity matrix
pylab.figure(1)
pylab.title("similarity matrix")
pylab.imshow(M, aspect='auto', interpolation="nearest", cmap="Reds")
pylab.colorbar()

# plot complete linkage
pylab.figure(2)
pylab.title("complete linkage clustering")
hcluster.dendrogram(cluster(M, method='complete'), leaf_label_func=lambda i: urls[i][1])

# finally show
pylab.show()
Example #6
0
 def calc():
     log.debug('Calculating similarity matrix for key %s...', self.key)
     return all_pairs([s.source for s in self.submissions])
Example #7
0
 def test_all_pairs_empty(self):
     '''Test all_pairs with empty input'''
     all_pairs(['Something', 'nothing', ''])
Example #8
0
    }
}   // end of class HelloWorld
""", """
public class HelloWorld {

    public static void main(String[] args) {
        System.out.println("Hello World!");
    }
}   // end of class HelloWorld
""", """
public class HelloUniverse {

    public static void main(String[] args) {
        String message = "Hello World!";
        System.out.println(message);
    }
}
"""
    ]
    print all_pairs(catalogue)
    print all_pairs(catalogue, parallel=True)
    print all_pairs(catalogue, distance=distances.kolmogorov)

# Example code for loading a pickle file of submissions

#    d = pickle.load(open('submissions.pkl'))
#
#    for a in (a for a in d if a['submissions']):
#        catalogue = [s['source'] for s in a['submissions'] if s['source']]
#        print all_pairs(catalogue, parallel=True)
Example #9
0
 def test_all_pairs_equality(self):
     '''Test if parallel and sequential version of all_pairs deliver equal results'''
     p = all_pairs(self.catalogue).tolist()
     pp = all_pairs(self.catalogue, parallel=True).tolist()
     np.testing.assert_array_equal(p, pp)
Example #10
0
 def test_all_pairs_unicode(self):
     '''Test all_pairs with strange unicode characters'''
     all_pairs(['ae', u'\xc3\xa4', 'ss', u'\xc3\x9f'])
Example #11
0
 def test_all_pairs_parallel(self):
     '''Test if parallel version of all_pairs works'''
     p = all_pairs(self.catalogue, parallel=True)
     np.testing.assert_array_equal(p, self.all_pairs)
Example #12
0
 def test_all_pairs_sequential(self):
     '''Test if sequential version of all_pairs works'''
     p = all_pairs(self.catalogue)
     np.testing.assert_array_equal(p, self.all_pairs)
Example #13
0
        System.out.println("Hello World!");
    }
}   // end of class HelloWorld
""", """
public class HelloWorld {

    public static void main(String[] args) {
        System.out.println("Hello World!");
    }
}   // end of class HelloWorld
""", """
public class HelloUniverse {

    public static void main(String[] args) {
        String message = "Hello World!";
        System.out.println(message);
    }
}
"""]
    print(all_pairs(catalogue))
    print(all_pairs(catalogue, parallel=True))
    print(all_pairs(catalogue, distance=distances.kolmogorov))

# Example code for loading a pickle file of submissions

#    d = pickle.load(open('submissions.pkl'))
#
#    for a in (a for a in d if a['submissions']):
#        catalogue = [s['source'] for s in a['submissions'] if s['source']]
#        print all_pairs(catalogue, parallel=True)
Example #14
0
 def test_all_pairs_empty(self):
     '''Test all_pairs with empty input'''
     all_pairs(['Something', 'nothing', ''])
Example #15
0
 def test_all_pairs_kwargs(self):
     '''Test kwargs acceptance of all_pairs'''
     all_pairs(self.catalogue, distance=distances.jaccard, dist_kwargs=dict(mode=2))
     all_pairs(self.catalogue, dist_kwargs=dict(jaccard_mode=2))
Example #16
0
 def test_all_pairs_kwargs(self):
     '''Test kwargs acceptance of all_pairs'''
     all_pairs(self.catalogue,
               distance=distances.jaccard,
               dist_kwargs=dict(mode=2))
     all_pairs(self.catalogue, dist_kwargs=dict(jaccard_mode=2))
Example #17
0
        ("http://www.gutenberg.org/cache/epub/5323/pg5323.txt", "Effi Bri."),
        ("http://www.gutenberg.org/files/26686/26686-0.txt", "Birnbaum"),
        ("http://www.gutenberg.org/files/21593/21593-0.txt", "Urteil"),
        ("http://www.gutenberg.org/cache/epub/22367/pg22367.txt", "Verwand.")]

# get it from the interwebs
catalogue = []

for url, name in urls:
    headers = {'User-Agent': 'Mozilla/5.0'}
    req = urllib2.Request(url, None, headers)
    catalogue.append(urllib2.urlopen(req).read())

# calc similarity matrix
M = all_pairs(catalogue,
              distance=distances.jaccard,
              dist_kwargs=dict(mode=1),
              parallel=True)

# plot similarity matrix
pylab.figure(1)
pylab.title("similarity matrix")
pylab.imshow(M, aspect='auto', interpolation="nearest", cmap="Reds")
pylab.colorbar()

# plot complete linkage
pylab.figure(2)
pylab.title("complete linkage clustering")
hcluster.dendrogram(cluster(M, method='complete'),
                    leaf_label_func=lambda i: urls[i][1])

# finally show