コード例 #1
0
ファイル: helper_test.py プロジェクト: alialavia/TwitterNews
    def test_scaletime(self):
        data_in = self.timeseries  

        # Test if 0Div error raises on a call with a scalingfactor = 0
        self.assertRaises(ZeroDivisionError, 
            helper.scaletime, data_in, 0)

        # test if function changes the input\
        self.assertEqual(data_in, self.timeseries)
        
        # Test if a call with a scalingfactor = 1 maintains the series
        self.assertEqual(data_in, helper.scaletime(data_in, 1))

        # Generate a random sequence to use as scaling factors        
        randomseq = range(1, 61)
        random.shuffle(randomseq)                    
        
        # count total number of words happening in the set
        termcount_in = self.sum_wordcount(data_in)            
        
        for scalingfactor in randomseq:  
                    
            ret = helper.scaletime(data_in, scalingfactor)
            
            # Make sure input and output have the same type (list)
            self.assertEqual(type(data_in), list)
            self.assertEqual(type(ret), list)
            
            # Make sure number of ticks is equal to original size divided by SF 
            # ,or one less due to rounding error.
            self.assertTrue(
                ((len(data_in) / scalingfactor) == len(ret)) or
                ((len(data_in) / scalingfactor) == len(ret) - 1)
            )

            # Make sure total size is greater or equal to the scaleddown version            
            self.assertGreaterEqual(
                sum([len(t) for t in data_in]),
                sum([len(t) for t in ret])
            )            

            termcount_out = self.sum_wordcount(ret)            
            # checking consistency of total number of words
            self.assertEqual(
                len(termcount_out),                
                len(termcount_in)
            )                        
            # checking consistency of total number of words
            for word in termcount_in:
                self.assertEqual(
                    termcount_out[word],                
                    termcount_in[word]
                )
コード例 #2
0
ファイル: helper_test.py プロジェクト: alialavia/bigdata
    def test_scaletime(self):
        data_in = self.timeseries

        # Test if 0Div error raises on a call with a scalingfactor = 0
        self.assertRaises(ZeroDivisionError, helper.scaletime, data_in, 0)

        # test if function changes the input\
        self.assertEqual(data_in, self.timeseries)

        # Test if a call with a scalingfactor = 1 maintains the series
        self.assertEqual(data_in, helper.scaletime(data_in, 1))

        # Generate a random sequence to use as scaling factors
        randomseq = range(1, 61)
        random.shuffle(randomseq)

        # count total number of words happening in the set
        termcount_in = self.sum_wordcount(data_in)

        for scalingfactor in randomseq:

            ret = helper.scaletime(data_in, scalingfactor)

            # Make sure input and output have the same type (list)
            self.assertEqual(type(data_in), list)
            self.assertEqual(type(ret), list)

            # Make sure number of ticks is equal to original size divided by SF
            # ,or one less due to rounding error.
            self.assertTrue(
                ((len(data_in) / scalingfactor) == len(ret))
                or ((len(data_in) / scalingfactor) == len(ret) - 1))

            # Make sure total size is greater or equal to the scaleddown version
            self.assertGreaterEqual(sum([len(t) for t in data_in]),
                                    sum([len(t) for t in ret]))

            termcount_out = self.sum_wordcount(ret)
            # checking consistency of total number of words
            self.assertEqual(len(termcount_out), len(termcount_in))
            # checking consistency of total number of words
            for word in termcount_in:
                self.assertEqual(termcount_out[word], termcount_in[word])
コード例 #3
0
def main():

    if len(sys.argv) < 4 or len(sys.argv) > 5:
        helper.msg(
            "Usage: timeanalysis TimeseriesFile Top ScalingFactor [plotname]\n"
        )
        exit(0)
    filename = sys.argv[1]
    top = int(sys.argv[2])
    scalingfactor = int(sys.argv[3])

    helper.msg("Reading the file...\n")
    filedata = json.load(open(filename))
    helper.msg("[DONE]\n")
    helper.msg("Parsing the file...\n")

    startdate_str, freq, timeseries = filedata
    startdate = helper.strtodate(startdate_str)
    helper.msg("[DONE]\n")

    perminute, sorted_perminute = {}, {}
    for category in timeseries:
        helper.msg("Changing the time scale...\n")

        perminute[category] = helper.scaletime(timeseries[category],
                                               scalingfactor)
        helper.msg("[DONE]\n")
        tickcount = len(perminute[category])

        sorted_perminute[category] = [(dict)] * tickcount

        helper.msg("Calculating the top topics...\n")
        for tick in range(tickcount):
            sorted_perminute[category][tick] = dict(
                sorted(perminute[category][tick].items(),
                       key=lambda l: l[1],
                       reverse=True)[:top])

    #print helper.findtops(perminute[category], top)
    print json.dumps((startdate_str, scalingfactor * freq, sorted_perminute))
    if len(sys.argv) == 5:
        plotname = sys.argv[4]
        for category in sorted_perminute:
            #print sorted_perminute
            helper.plot(sorted_perminute[category], startdate,
                        scalingfactor * freq, plotname + " " + category)
コード例 #4
0
ファイル: timeanalysis.py プロジェクト: alialavia/TwitterNews
def main():

    if len(sys.argv) < 4 or len(sys.argv) > 5:
        helper.msg("Usage: timeanalysis TimeseriesFile Top ScalingFactor [plotname]\n")
        exit(0)
    filename = sys.argv[1]
    top = int(sys.argv[2])
    scalingfactor = int(sys.argv[3])    

    helper.msg("Reading the file...\n")
    filedata = json.load(open(filename))
    helper.msg("[DONE]\n")
    helper.msg("Parsing the file...\n")        
    
    startdate_str, freq, timeseries = filedata
    startdate = helper.strtodate(startdate_str)
    helper.msg("[DONE]\n")    

    perminute, sorted_perminute = {}, {}
    for category in timeseries:
        helper.msg("Changing the time scale...\n")

        perminute[category] = helper.scaletime(
            timeseries[category], scalingfactor)
        helper.msg("[DONE]\n")
        tickcount = len(perminute[category])
        
        sorted_perminute[category] = [(dict)]*tickcount

        helper.msg("Calculating the top topics...\n")
        for tick in range(tickcount):
            sorted_perminute[category][tick] = dict(
                sorted(perminute[category][tick].items(), 
                key=lambda l: l[1], reverse=True) [:top]
                )
            

    #print helper.findtops(perminute[category], top)
    print json.dumps((startdate_str, scalingfactor * freq, sorted_perminute))
    if len(sys.argv) == 5:
        plotname = sys.argv[4]
        for category in sorted_perminute:
            #print sorted_perminute    
            helper.plot(sorted_perminute[category], 
                startdate, scalingfactor * freq, plotname + " " + category)