def test_scaletime(self): data_in = self.timeseries # Test if 0Div error raises on a call with a scalingfactor = 0 self.assertRaises(ZeroDivisionError, helper.scaletime, data_in, 0) # test if function changes the input\ self.assertEqual(data_in, self.timeseries) # Test if a call with a scalingfactor = 1 maintains the series self.assertEqual(data_in, helper.scaletime(data_in, 1)) # Generate a random sequence to use as scaling factors randomseq = range(1, 61) random.shuffle(randomseq) # count total number of words happening in the set termcount_in = self.sum_wordcount(data_in) for scalingfactor in randomseq: ret = helper.scaletime(data_in, scalingfactor) # Make sure input and output have the same type (list) self.assertEqual(type(data_in), list) self.assertEqual(type(ret), list) # Make sure number of ticks is equal to original size divided by SF # ,or one less due to rounding error. self.assertTrue( ((len(data_in) / scalingfactor) == len(ret)) or ((len(data_in) / scalingfactor) == len(ret) - 1) ) # Make sure total size is greater or equal to the scaleddown version self.assertGreaterEqual( sum([len(t) for t in data_in]), sum([len(t) for t in ret]) ) termcount_out = self.sum_wordcount(ret) # checking consistency of total number of words self.assertEqual( len(termcount_out), len(termcount_in) ) # checking consistency of total number of words for word in termcount_in: self.assertEqual( termcount_out[word], termcount_in[word] )
def test_scaletime(self): data_in = self.timeseries # Test if 0Div error raises on a call with a scalingfactor = 0 self.assertRaises(ZeroDivisionError, helper.scaletime, data_in, 0) # test if function changes the input\ self.assertEqual(data_in, self.timeseries) # Test if a call with a scalingfactor = 1 maintains the series self.assertEqual(data_in, helper.scaletime(data_in, 1)) # Generate a random sequence to use as scaling factors randomseq = range(1, 61) random.shuffle(randomseq) # count total number of words happening in the set termcount_in = self.sum_wordcount(data_in) for scalingfactor in randomseq: ret = helper.scaletime(data_in, scalingfactor) # Make sure input and output have the same type (list) self.assertEqual(type(data_in), list) self.assertEqual(type(ret), list) # Make sure number of ticks is equal to original size divided by SF # ,or one less due to rounding error. self.assertTrue( ((len(data_in) / scalingfactor) == len(ret)) or ((len(data_in) / scalingfactor) == len(ret) - 1)) # Make sure total size is greater or equal to the scaleddown version self.assertGreaterEqual(sum([len(t) for t in data_in]), sum([len(t) for t in ret])) termcount_out = self.sum_wordcount(ret) # checking consistency of total number of words self.assertEqual(len(termcount_out), len(termcount_in)) # checking consistency of total number of words for word in termcount_in: self.assertEqual(termcount_out[word], termcount_in[word])
def main(): if len(sys.argv) < 4 or len(sys.argv) > 5: helper.msg( "Usage: timeanalysis TimeseriesFile Top ScalingFactor [plotname]\n" ) exit(0) filename = sys.argv[1] top = int(sys.argv[2]) scalingfactor = int(sys.argv[3]) helper.msg("Reading the file...\n") filedata = json.load(open(filename)) helper.msg("[DONE]\n") helper.msg("Parsing the file...\n") startdate_str, freq, timeseries = filedata startdate = helper.strtodate(startdate_str) helper.msg("[DONE]\n") perminute, sorted_perminute = {}, {} for category in timeseries: helper.msg("Changing the time scale...\n") perminute[category] = helper.scaletime(timeseries[category], scalingfactor) helper.msg("[DONE]\n") tickcount = len(perminute[category]) sorted_perminute[category] = [(dict)] * tickcount helper.msg("Calculating the top topics...\n") for tick in range(tickcount): sorted_perminute[category][tick] = dict( sorted(perminute[category][tick].items(), key=lambda l: l[1], reverse=True)[:top]) #print helper.findtops(perminute[category], top) print json.dumps((startdate_str, scalingfactor * freq, sorted_perminute)) if len(sys.argv) == 5: plotname = sys.argv[4] for category in sorted_perminute: #print sorted_perminute helper.plot(sorted_perminute[category], startdate, scalingfactor * freq, plotname + " " + category)
def main(): if len(sys.argv) < 4 or len(sys.argv) > 5: helper.msg("Usage: timeanalysis TimeseriesFile Top ScalingFactor [plotname]\n") exit(0) filename = sys.argv[1] top = int(sys.argv[2]) scalingfactor = int(sys.argv[3]) helper.msg("Reading the file...\n") filedata = json.load(open(filename)) helper.msg("[DONE]\n") helper.msg("Parsing the file...\n") startdate_str, freq, timeseries = filedata startdate = helper.strtodate(startdate_str) helper.msg("[DONE]\n") perminute, sorted_perminute = {}, {} for category in timeseries: helper.msg("Changing the time scale...\n") perminute[category] = helper.scaletime( timeseries[category], scalingfactor) helper.msg("[DONE]\n") tickcount = len(perminute[category]) sorted_perminute[category] = [(dict)]*tickcount helper.msg("Calculating the top topics...\n") for tick in range(tickcount): sorted_perminute[category][tick] = dict( sorted(perminute[category][tick].items(), key=lambda l: l[1], reverse=True) [:top] ) #print helper.findtops(perminute[category], top) print json.dumps((startdate_str, scalingfactor * freq, sorted_perminute)) if len(sys.argv) == 5: plotname = sys.argv[4] for category in sorted_perminute: #print sorted_perminute helper.plot(sorted_perminute[category], startdate, scalingfactor * freq, plotname + " " + category)