def test_map(self):
     usMap = Map()
     usMap.plotPoints([-105.16, -117.16, -77.00], [40.02, 32.73, 38.55],
                      color=GeneralMethods.getRandomColor())
     usMap.plotPoints([-114.21, -88.10], [48.25, 17.29],
                      color=GeneralMethods.getRandomColor())
     plt.show()
Exemple #2
0
def runMRJob(mrJobClass,
             outputFileName,
             inputFileList,
             mrJobClassParams = {},
             args='-r hadoop'.split(),
             **kwargs):
    mrJob = mrJobClass(args=args, **mrJobClassParams)
    GeneralMethods.runCommand('rm -rf %s'%outputFileName)
    for l in mrJob.runJob(inputFileList=inputFileList, **kwargs): FileIO.writeToFileAsJson(l[1], outputFileName)
Exemple #3
0
def runMRJobWithOutPutToHDFS(mrJobClass,
             outputFileName,
             inputFileList,
             mrJobClassParams = {},
             args='-r hadoop'.split(),
             **kwargs):
    mrJob = mrJobClass(args=args, **mrJobClassParams)
    GeneralMethods.runCommand('hadoop fs -rm -r %s'%outputFileName)
    mrJob.runJobWithOutPutToHDFS(outputFileName, inputFileList=inputFileList, **kwargs)
Exemple #4
0
def plot_graph_clusters_on_world_map(graph,
                                     s=0,
                                     lw=0,
                                     alpha=0.6,
                                     bkcolor='#CFCFCF',
                                     *args,
                                     **kwargs):
    no_of_clusters, tuples_of_location_and_cluster_id = clusterUsingAffinityPropagation(
        graph)
    map_from_location_to_cluster_id = dict(tuples_of_location_and_cluster_id)
    map_from_cluster_id_to_cluster_color = dict([
        (i, GeneralMethods.getRandomColor()) for i in range(no_of_clusters)
    ])
    points, colors = zip(*map(
        lambda location: (getLocationFromLid(location.replace(
            '_', ' ')), map_from_cluster_id_to_cluster_color[
                map_from_location_to_cluster_id[location]]), graph.nodes()))
    _, m = plotPointsOnWorldMap(points,
                                c=colors,
                                s=s,
                                lw=lw,
                                returnBaseMapObject=True,
                                *args,
                                **kwargs)
    for u, v, data in graph.edges(data=True):
        if map_from_location_to_cluster_id[
                u] == map_from_location_to_cluster_id[v]:
            color, u, v, w = map_from_cluster_id_to_cluster_color[
                map_from_location_to_cluster_id[u]], getLocationFromLid(
                    u.replace('_', ' ')), getLocationFromLid(
                        v.replace('_', ' ')), data['w']
            m.drawgreatcircle(u[1], u[0], v[1], v[0], color=color, alpha=alpha)
    return (no_of_clusters, tuples_of_location_and_cluster_id)
Exemple #5
0
 def getFrequentItemsets(self, maxdepth=0):
     self.read_data()
     self.prune_items()
     self.eclat_mine(self.data)
     itemIdToItemMap = GeneralMethods.reverseDict(self.itemToItemIdMap)
     return [([itemIdToItemMap[itemId]
               for itemId in itemset[0]], itemset[1])
             for itemset in self.frequentItemsets]
 def test_approximateToNearest5Minutes(self):
     self.assertEqual(
         datetime(2011, 7, 5, 15, 10),
         GeneralMethods.approximateToNearest5Minutes(
             datetime(2011, 7, 5, 15, 13, 34)))
     self.assertEqual(
         datetime(2011, 7, 5, 15, 15),
         GeneralMethods.approximateToNearest5Minutes(
             datetime(2011, 7, 5, 15, 15)))
     self.assertEqual(
         datetime(2011, 7, 5, 15, 10),
         GeneralMethods.approximateToNearest5Minutes(
             datetime(2011, 7, 5, 15, 13, 11, 30)))
     self.assertEqual(
         datetime(2011, 7, 5, 15, 35),
         GeneralMethods.approximateToNearest5Minutes(
             datetime(2011, 7, 5, 15, 35, 01)))
Exemple #7
0
def plot_graph_clusters_on_world_map(graph, s=0, lw=0, alpha=0.6, bkcolor='#CFCFCF', *args, **kwargs): 
    from graphs import clusterUsingAffinityPropagation 
    no_of_clusters, tuples_of_location_and_cluster_id = clusterUsingAffinityPropagation(graph)
    map_from_location_to_cluster_id = dict(tuples_of_location_and_cluster_id)
    map_from_cluster_id_to_cluster_color = dict([(i, GeneralMethods.getRandomColor()) for i in range(no_of_clusters)])
    points, colors = zip(*map(lambda  location: (getLocationFromLid(location.replace('_', ' ')), map_from_cluster_id_to_cluster_color[map_from_location_to_cluster_id[location]]), graph.nodes()))
    _, m = plotPointsOnWorldMap(points, c=colors, s=s, lw=lw, returnBaseMapObject=True,  *args, **kwargs)
    for u, v, data in graph.edges(data=True):
        if map_from_location_to_cluster_id[u]==map_from_location_to_cluster_id[v]:
            color, u, v, w = map_from_cluster_id_to_cluster_color[map_from_location_to_cluster_id[u]], getLocationFromLid(u.replace('_', ' ')), getLocationFromLid(v.replace('_', ' ')), data['w']
            m.drawgreatcircle(u[1], u[0], v[1], v[0], color=color, alpha=alpha)
    return (no_of_clusters, tuples_of_location_and_cluster_id)
 def test_reverseDict(self):
     self.assertEqual({
         1: 'a',
         2: 'b'
     }, GeneralMethods.reverseDict({
         'a': 1,
         'b': 2
     }))
     self.assertRaises(Exception, GeneralMethods.reverseDict, {
         'a': 1,
         'b': 1
     })
Exemple #9
0
 def write_file_from_hdfs_to_local_file(hdfs_file, f_local):
     f_temp_output = '/tmp/%s'%(''.join(random.choice(string.ascii_uppercase + string.digits) for x in range(10)))
     GeneralMethods.runCommand('hadoop fs -cat %s/part* > %s'%(hdfs_file, f_temp_output))
     GeneralMethods.runCommand('rm -rf %s'%f_local)
     for data in file(f_temp_output): FileIO.writeToFile(data.strip().split('\t')[1], f_local)
     GeneralMethods.runCommand('rm -rf %s'%f_temp_output)
         
Exemple #10
0
def plotNorm(maxYValue, mu, sigma, color=None, **kwargs):
    s = np.random.normal(mu, sigma, 1000)
    count, bins = np.histogram(s, 1000, normed=True)
    if not color:
        color = GeneralMethods.getRandomColor()
    plt.fill_between(
        bins,
        ((1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-(bins - mu) ** 2 / (2 * sigma ** 2))) / 4) * maxYValue,
        linewidth=1,
        color=color,
        alpha=0.3,
    )
    plt.plot(
        bins,
        ((1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-(bins - mu) ** 2 / (2 * sigma ** 2))) / 4) * maxYValue,
        linewidth=3,
        color=color,
        **kwargs
    )
Exemple #11
0
def plotNorm(maxYValue, mu, sigma, color=None, **kwargs):
    s = np.random.normal(mu, sigma, 1000)
    count, bins = np.histogram(s, 1000, normed=True)
    if not color: color = GeneralMethods.getRandomColor()
    plt.fill_between(
        bins,
        ((1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-(bins - mu)**2 /
                                                    (2 * sigma**2))) / 4) *
        maxYValue,
        linewidth=1,
        color=color,
        alpha=0.3)
    plt.plot(bins,
             ((1 /
               (sigma * np.sqrt(2 * np.pi)) * np.exp(-(bins - mu)**2 /
                                                     (2 * sigma**2))) / 4) *
             maxYValue,
             linewidth=3,
             color=color,
             **kwargs)
Exemple #12
0
 def test_approximateToNearest5Minutes(self):
     self.assertEqual(datetime(2011,7,5,15,10), GeneralMethods.approximateToNearest5Minutes(datetime(2011,7,5,15,13,34)))
     self.assertEqual(datetime(2011,7,5,15,15), GeneralMethods.approximateToNearest5Minutes(datetime(2011,7,5,15,15)))
     self.assertEqual(datetime(2011,7,5,15,10), GeneralMethods.approximateToNearest5Minutes(datetime(2011,7,5,15,13,11,30)))
     self.assertEqual(datetime(2011,7,5,15,35), GeneralMethods.approximateToNearest5Minutes(datetime(2011,7,5,15,35,01)))
Exemple #13
0
 def __isValidObject(self):
     return self.twoWayMap.getMap(
         TwoWayMap.MAP_REVERSE) == GeneralMethods.reverseDict(
             self.twoWayMap.getMap(TwoWayMap.MAP_FORWARD))
Exemple #14
0
 def drawKMLsForPoints(pointsIterator, outputKMLFile, color=None):
     kml = KML()
     if not color: color = GeneralMethods.getRandomColor()
     kml.addLocationPoints(pointsIterator, color=color)
     kml.write(outputKMLFile)
Exemple #15
0
 def drawKMLsForPoints(pointsIterator, outputKMLFile, color=None):
     kml = KML()
     if not color: color = GeneralMethods.getRandomColor()
     kml.addLocationPoints(pointsIterator, color=color)
     kml.write(outputKMLFile)
Exemple #16
0
 def tearDown(self):
     GeneralMethods.remove_file(self.f1)
     GeneralMethods.remove_file(self.f2)
     GeneralMethods.remove_file('pig_tests.pig')
 def getFrequentItemsets(self, maxdepth=0):
     self.read_data()
     self.prune_items()
     self.eclat_mine(self.data)
     itemIdToItemMap = GeneralMethods.reverseDict(self.itemToItemIdMap)
     return [([itemIdToItemMap[itemId] for itemId in itemset[0]], itemset[1]) for itemset in self.frequentItemsets]
Exemple #18
0
 def test_reverseDict(self):
     self.assertEqual({1:'a', 2:'b'}, GeneralMethods.reverseDict({'a':1, 'b':2}))
     self.assertRaises(Exception, GeneralMethods.reverseDict, {'a':1, 'b':1})
Exemple #19
0
 def test_map(self):
     usMap = Map()
     usMap.plotPoints([-105.16, -117.16, -77.00], [40.02, 32.73, 38.55], color=GeneralMethods.getRandomColor())
     usMap.plotPoints([-114.21, -88.10], [48.25, 17.29], color=GeneralMethods.getRandomColor())
     plt.show()
Exemple #20
0
 def test_getValueDistribution(self):
     map = {'a': [1, 2, 3], 'b': [3], 'c': [4]}
     self.assertEqual({
         1: 2,
         3: 1
     }, GeneralMethods.getValueDistribution(map.itervalues(), len))
Exemple #21
0
 def test_run(self):
     pig = Pig([self.f1, self.f2], [('A', '32'), ('B', 'sdfd')])
     pig.run()
     GeneralMethods.runCommand('cat %s'%pig.output_pig_script)
Exemple #22
0
 def test_getValueDistribution(self):
     map = {'a': [1,2,3], 'b': [3], 'c': [4]}
     self.assertEqual({1: 2, 3: 1}, GeneralMethods.getValueDistribution(map.itervalues(), len))
Exemple #23
0
 def __isValidObject(self): return self.twoWayMap.getMap(TwoWayMap.MAP_REVERSE)==GeneralMethods.reverseDict(self.twoWayMap.getMap(TwoWayMap.MAP_FORWARD))
 def test_basicOperation(self):
Exemple #24
0
 def addLocationPoints(self, points, color=None):
     if not color: color = GeneralMethods.getRandomColor()
     for point in (list(reversed(point)) for point in points):
         pnt = self.kml.newpoint(coords=[point])
         pnt.iconstyle.icon.href = 'http://maps.google.com/mapfiles/kml/shapes/shaded_dot.png'
         pnt.iconstyle.color = 'ff' + color[1:]
Exemple #25
0
def runMRJob(mrJobClass, outputFileName, inputFileList, args='-r hadoop'.split(), **kwargs):
    mrJob = mrJobClass(args=args)
    GeneralMethods.runCommand('rm -rf %s'%outputFileName)
    for l in mrJob.runJob(inputFileList=inputFileList, **kwargs): FileIO.writeToFileAsJson(l[1], outputFileName)
Exemple #26
0
 def addLocationPoints(self, points, color=None): 
     if not color: color=GeneralMethods.getRandomColor()
     for point in (list(reversed(point)) for point in points):
         pnt = self.kml.newpoint(coords=[point])
         pnt.iconstyle.icon.href = 'http://maps.google.com/mapfiles/kml/shapes/shaded_dot.png'
         pnt.iconstyle.color = 'ff'+color[1:]