def test_map(self): usMap = Map() usMap.plotPoints([-105.16, -117.16, -77.00], [40.02, 32.73, 38.55], color=GeneralMethods.getRandomColor()) usMap.plotPoints([-114.21, -88.10], [48.25, 17.29], color=GeneralMethods.getRandomColor()) plt.show()
def runMRJob(mrJobClass, outputFileName, inputFileList, mrJobClassParams = {}, args='-r hadoop'.split(), **kwargs): mrJob = mrJobClass(args=args, **mrJobClassParams) GeneralMethods.runCommand('rm -rf %s'%outputFileName) for l in mrJob.runJob(inputFileList=inputFileList, **kwargs): FileIO.writeToFileAsJson(l[1], outputFileName)
def runMRJobWithOutPutToHDFS(mrJobClass, outputFileName, inputFileList, mrJobClassParams = {}, args='-r hadoop'.split(), **kwargs): mrJob = mrJobClass(args=args, **mrJobClassParams) GeneralMethods.runCommand('hadoop fs -rm -r %s'%outputFileName) mrJob.runJobWithOutPutToHDFS(outputFileName, inputFileList=inputFileList, **kwargs)
def plot_graph_clusters_on_world_map(graph, s=0, lw=0, alpha=0.6, bkcolor='#CFCFCF', *args, **kwargs): no_of_clusters, tuples_of_location_and_cluster_id = clusterUsingAffinityPropagation( graph) map_from_location_to_cluster_id = dict(tuples_of_location_and_cluster_id) map_from_cluster_id_to_cluster_color = dict([ (i, GeneralMethods.getRandomColor()) for i in range(no_of_clusters) ]) points, colors = zip(*map( lambda location: (getLocationFromLid(location.replace( '_', ' ')), map_from_cluster_id_to_cluster_color[ map_from_location_to_cluster_id[location]]), graph.nodes())) _, m = plotPointsOnWorldMap(points, c=colors, s=s, lw=lw, returnBaseMapObject=True, *args, **kwargs) for u, v, data in graph.edges(data=True): if map_from_location_to_cluster_id[ u] == map_from_location_to_cluster_id[v]: color, u, v, w = map_from_cluster_id_to_cluster_color[ map_from_location_to_cluster_id[u]], getLocationFromLid( u.replace('_', ' ')), getLocationFromLid( v.replace('_', ' ')), data['w'] m.drawgreatcircle(u[1], u[0], v[1], v[0], color=color, alpha=alpha) return (no_of_clusters, tuples_of_location_and_cluster_id)
def getFrequentItemsets(self, maxdepth=0): self.read_data() self.prune_items() self.eclat_mine(self.data) itemIdToItemMap = GeneralMethods.reverseDict(self.itemToItemIdMap) return [([itemIdToItemMap[itemId] for itemId in itemset[0]], itemset[1]) for itemset in self.frequentItemsets]
def test_approximateToNearest5Minutes(self): self.assertEqual( datetime(2011, 7, 5, 15, 10), GeneralMethods.approximateToNearest5Minutes( datetime(2011, 7, 5, 15, 13, 34))) self.assertEqual( datetime(2011, 7, 5, 15, 15), GeneralMethods.approximateToNearest5Minutes( datetime(2011, 7, 5, 15, 15))) self.assertEqual( datetime(2011, 7, 5, 15, 10), GeneralMethods.approximateToNearest5Minutes( datetime(2011, 7, 5, 15, 13, 11, 30))) self.assertEqual( datetime(2011, 7, 5, 15, 35), GeneralMethods.approximateToNearest5Minutes( datetime(2011, 7, 5, 15, 35, 01)))
def plot_graph_clusters_on_world_map(graph, s=0, lw=0, alpha=0.6, bkcolor='#CFCFCF', *args, **kwargs): from graphs import clusterUsingAffinityPropagation no_of_clusters, tuples_of_location_and_cluster_id = clusterUsingAffinityPropagation(graph) map_from_location_to_cluster_id = dict(tuples_of_location_and_cluster_id) map_from_cluster_id_to_cluster_color = dict([(i, GeneralMethods.getRandomColor()) for i in range(no_of_clusters)]) points, colors = zip(*map(lambda location: (getLocationFromLid(location.replace('_', ' ')), map_from_cluster_id_to_cluster_color[map_from_location_to_cluster_id[location]]), graph.nodes())) _, m = plotPointsOnWorldMap(points, c=colors, s=s, lw=lw, returnBaseMapObject=True, *args, **kwargs) for u, v, data in graph.edges(data=True): if map_from_location_to_cluster_id[u]==map_from_location_to_cluster_id[v]: color, u, v, w = map_from_cluster_id_to_cluster_color[map_from_location_to_cluster_id[u]], getLocationFromLid(u.replace('_', ' ')), getLocationFromLid(v.replace('_', ' ')), data['w'] m.drawgreatcircle(u[1], u[0], v[1], v[0], color=color, alpha=alpha) return (no_of_clusters, tuples_of_location_and_cluster_id)
def test_reverseDict(self): self.assertEqual({ 1: 'a', 2: 'b' }, GeneralMethods.reverseDict({ 'a': 1, 'b': 2 })) self.assertRaises(Exception, GeneralMethods.reverseDict, { 'a': 1, 'b': 1 })
def write_file_from_hdfs_to_local_file(hdfs_file, f_local): f_temp_output = '/tmp/%s'%(''.join(random.choice(string.ascii_uppercase + string.digits) for x in range(10))) GeneralMethods.runCommand('hadoop fs -cat %s/part* > %s'%(hdfs_file, f_temp_output)) GeneralMethods.runCommand('rm -rf %s'%f_local) for data in file(f_temp_output): FileIO.writeToFile(data.strip().split('\t')[1], f_local) GeneralMethods.runCommand('rm -rf %s'%f_temp_output)
def plotNorm(maxYValue, mu, sigma, color=None, **kwargs): s = np.random.normal(mu, sigma, 1000) count, bins = np.histogram(s, 1000, normed=True) if not color: color = GeneralMethods.getRandomColor() plt.fill_between( bins, ((1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-(bins - mu) ** 2 / (2 * sigma ** 2))) / 4) * maxYValue, linewidth=1, color=color, alpha=0.3, ) plt.plot( bins, ((1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-(bins - mu) ** 2 / (2 * sigma ** 2))) / 4) * maxYValue, linewidth=3, color=color, **kwargs )
def plotNorm(maxYValue, mu, sigma, color=None, **kwargs): s = np.random.normal(mu, sigma, 1000) count, bins = np.histogram(s, 1000, normed=True) if not color: color = GeneralMethods.getRandomColor() plt.fill_between( bins, ((1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-(bins - mu)**2 / (2 * sigma**2))) / 4) * maxYValue, linewidth=1, color=color, alpha=0.3) plt.plot(bins, ((1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-(bins - mu)**2 / (2 * sigma**2))) / 4) * maxYValue, linewidth=3, color=color, **kwargs)
def test_approximateToNearest5Minutes(self): self.assertEqual(datetime(2011,7,5,15,10), GeneralMethods.approximateToNearest5Minutes(datetime(2011,7,5,15,13,34))) self.assertEqual(datetime(2011,7,5,15,15), GeneralMethods.approximateToNearest5Minutes(datetime(2011,7,5,15,15))) self.assertEqual(datetime(2011,7,5,15,10), GeneralMethods.approximateToNearest5Minutes(datetime(2011,7,5,15,13,11,30))) self.assertEqual(datetime(2011,7,5,15,35), GeneralMethods.approximateToNearest5Minutes(datetime(2011,7,5,15,35,01)))
def __isValidObject(self): return self.twoWayMap.getMap( TwoWayMap.MAP_REVERSE) == GeneralMethods.reverseDict( self.twoWayMap.getMap(TwoWayMap.MAP_FORWARD))
def drawKMLsForPoints(pointsIterator, outputKMLFile, color=None): kml = KML() if not color: color = GeneralMethods.getRandomColor() kml.addLocationPoints(pointsIterator, color=color) kml.write(outputKMLFile)
def tearDown(self): GeneralMethods.remove_file(self.f1) GeneralMethods.remove_file(self.f2) GeneralMethods.remove_file('pig_tests.pig')
def test_reverseDict(self): self.assertEqual({1:'a', 2:'b'}, GeneralMethods.reverseDict({'a':1, 'b':2})) self.assertRaises(Exception, GeneralMethods.reverseDict, {'a':1, 'b':1})
def test_getValueDistribution(self): map = {'a': [1, 2, 3], 'b': [3], 'c': [4]} self.assertEqual({ 1: 2, 3: 1 }, GeneralMethods.getValueDistribution(map.itervalues(), len))
def test_run(self): pig = Pig([self.f1, self.f2], [('A', '32'), ('B', 'sdfd')]) pig.run() GeneralMethods.runCommand('cat %s'%pig.output_pig_script)
def test_getValueDistribution(self): map = {'a': [1,2,3], 'b': [3], 'c': [4]} self.assertEqual({1: 2, 3: 1}, GeneralMethods.getValueDistribution(map.itervalues(), len))
def __isValidObject(self): return self.twoWayMap.getMap(TwoWayMap.MAP_REVERSE)==GeneralMethods.reverseDict(self.twoWayMap.getMap(TwoWayMap.MAP_FORWARD)) def test_basicOperation(self):
def addLocationPoints(self, points, color=None): if not color: color = GeneralMethods.getRandomColor() for point in (list(reversed(point)) for point in points): pnt = self.kml.newpoint(coords=[point]) pnt.iconstyle.icon.href = 'http://maps.google.com/mapfiles/kml/shapes/shaded_dot.png' pnt.iconstyle.color = 'ff' + color[1:]
def runMRJob(mrJobClass, outputFileName, inputFileList, args='-r hadoop'.split(), **kwargs): mrJob = mrJobClass(args=args) GeneralMethods.runCommand('rm -rf %s'%outputFileName) for l in mrJob.runJob(inputFileList=inputFileList, **kwargs): FileIO.writeToFileAsJson(l[1], outputFileName)
def addLocationPoints(self, points, color=None): if not color: color=GeneralMethods.getRandomColor() for point in (list(reversed(point)) for point in points): pnt = self.kml.newpoint(coords=[point]) pnt.iconstyle.icon.href = 'http://maps.google.com/mapfiles/kml/shapes/shaded_dot.png' pnt.iconstyle.color = 'ff'+color[1:]