def test_all_pairs_equality(self): '''Test if parallel and sequential version of all_pairs deliver equal results''' p = all_pairs(self.catalogue).tolist() pp = all_pairs(self.catalogue, parallel=True).tolist() np.testing.assert_array_equal(p, pp)
def test_all_pairs_unicode(self): '''Test all_pairs with strange unicode characters''' all_pairs(['ae', u'\xc3\xa4', 'ss', u'\xc3\x9f'])
def test_all_pairs_sequential(self): '''Test if sequential version of all_pairs works''' p = all_pairs(self.catalogue) np.testing.assert_array_equal(p, self.all_pairs)
def test_all_pairs_parallel(self): '''Test if parallel version of all_pairs works''' p = all_pairs(self.catalogue, parallel=True) np.testing.assert_array_equal(p, self.all_pairs)
("http://www.gutenberg.org/cache/epub/5323/pg5323.txt", "Effi Bri."), ("http://www.gutenberg.org/files/26686/26686-0.txt", "Birnbaum"), ("http://www.gutenberg.org/files/21593/21593-0.txt", "Urteil"), ("http://www.gutenberg.org/cache/epub/22367/pg22367.txt", "Verwand.")] # get it from the interwebs catalogue = [] for url, name in urls: headers = {'User-Agent': 'Mozilla/5.0'} req = urllib2.Request(url, None, headers) catalogue.append(urllib2.urlopen(req).read()) # calc similarity matrix M = all_pairs(catalogue, distance=distances.jaccard, dist_kwargs=dict(mode=1), parallel=True) # plot similarity matrix pylab.figure(1) pylab.title("similarity matrix") pylab.imshow(M, aspect='auto', interpolation="nearest", cmap="Reds") pylab.colorbar() # plot complete linkage pylab.figure(2) pylab.title("complete linkage clustering") hcluster.dendrogram(cluster(M, method='complete'), leaf_label_func=lambda i: urls[i][1]) # finally show pylab.show()
def calc(): log.debug('Calculating similarity matrix for key %s...', self.key) return all_pairs([s.source for s in self.submissions])
def test_all_pairs_empty(self): '''Test all_pairs with empty input''' all_pairs(['Something', 'nothing', ''])
} } // end of class HelloWorld """, """ public class HelloWorld { public static void main(String[] args) { System.out.println("Hello World!"); } } // end of class HelloWorld """, """ public class HelloUniverse { public static void main(String[] args) { String message = "Hello World!"; System.out.println(message); } } """ ] print all_pairs(catalogue) print all_pairs(catalogue, parallel=True) print all_pairs(catalogue, distance=distances.kolmogorov) # Example code for loading a pickle file of submissions # d = pickle.load(open('submissions.pkl')) # # for a in (a for a in d if a['submissions']): # catalogue = [s['source'] for s in a['submissions'] if s['source']] # print all_pairs(catalogue, parallel=True)
System.out.println("Hello World!"); } } // end of class HelloWorld """, """ public class HelloWorld { public static void main(String[] args) { System.out.println("Hello World!"); } } // end of class HelloWorld """, """ public class HelloUniverse { public static void main(String[] args) { String message = "Hello World!"; System.out.println(message); } } """] print(all_pairs(catalogue)) print(all_pairs(catalogue, parallel=True)) print(all_pairs(catalogue, distance=distances.kolmogorov)) # Example code for loading a pickle file of submissions # d = pickle.load(open('submissions.pkl')) # # for a in (a for a in d if a['submissions']): # catalogue = [s['source'] for s in a['submissions'] if s['source']] # print all_pairs(catalogue, parallel=True)
def test_all_pairs_kwargs(self): '''Test kwargs acceptance of all_pairs''' all_pairs(self.catalogue, distance=distances.jaccard, dist_kwargs=dict(mode=2)) all_pairs(self.catalogue, dist_kwargs=dict(jaccard_mode=2))
("http://www.gutenberg.org/cache/epub/5323/pg5323.txt", "Effi Bri."), ("http://www.gutenberg.org/files/26686/26686-0.txt", "Birnbaum"), ("http://www.gutenberg.org/files/21593/21593-0.txt", "Urteil"), ("http://www.gutenberg.org/cache/epub/22367/pg22367.txt", "Verwand.")] # get it from the interwebs catalogue = [] for url, name in urls: headers = {'User-Agent': 'Mozilla/5.0'} req = urllib2.Request(url, None, headers) catalogue.append(urllib2.urlopen(req).read()) # calc similarity matrix M = all_pairs(catalogue, distance=distances.jaccard, dist_kwargs=dict(mode=1), parallel=True) # plot similarity matrix pylab.figure(1) pylab.title("similarity matrix") pylab.imshow(M, aspect='auto', interpolation="nearest", cmap="Reds") pylab.colorbar() # plot complete linkage pylab.figure(2) pylab.title("complete linkage clustering") hcluster.dendrogram(cluster(M, method='complete'), leaf_label_func=lambda i: urls[i][1]) # finally show