def main(): if len(sys.argv) != 3: helper.msg("Usage: sample Path SamplingFrequency") exit(0) pathname = sys.argv[1] samplingfreq = int(sys.argv[2]) filelist = sorted(os.listdir(pathname)) filelist = [os.path.join(pathname, filename) for filename in filelist] (_, startdate, _) = helper.readtwitterfile(filelist[0]) filesize, twfile = 0, [] for filename in filelist: helper.printinline("Reading %s", filename) (_filesize, _, _twfile) = helper.readtwitterfile(filename) filesize += _filesize twfile += _twfile timeseries = helper.totimeseries( twfile, startdate, samplingfreq, filesize) # store metadata and time seires as a tuple print json.dumps((str(startdate), samplingfreq, timeseries))
def main(): if len(sys.argv) < 4 or len(sys.argv) > 5: helper.msg( "Usage: timeanalysis TimeseriesFile Top ScalingFactor [plotname]\n" ) exit(0) filename = sys.argv[1] top = int(sys.argv[2]) scalingfactor = int(sys.argv[3]) helper.msg("Reading the file...\n") filedata = json.load(open(filename)) helper.msg("[DONE]\n") helper.msg("Parsing the file...\n") startdate_str, freq, timeseries = filedata startdate = helper.strtodate(startdate_str) helper.msg("[DONE]\n") perminute, sorted_perminute = {}, {} for category in timeseries: helper.msg("Changing the time scale...\n") perminute[category] = helper.scaletime(timeseries[category], scalingfactor) helper.msg("[DONE]\n") tickcount = len(perminute[category]) sorted_perminute[category] = [(dict)] * tickcount helper.msg("Calculating the top topics...\n") for tick in range(tickcount): sorted_perminute[category][tick] = dict( sorted(perminute[category][tick].items(), key=lambda l: l[1], reverse=True)[:top]) #print helper.findtops(perminute[category], top) print json.dumps((startdate_str, scalingfactor * freq, sorted_perminute)) if len(sys.argv) == 5: plotname = sys.argv[4] for category in sorted_perminute: #print sorted_perminute helper.plot(sorted_perminute[category], startdate, scalingfactor * freq, plotname + " " + category)
def main(): if len(sys.argv) < 4 or len(sys.argv) > 5: helper.msg("Usage: timeanalysis TimeseriesFile Top ScalingFactor [plotname]\n") exit(0) filename = sys.argv[1] top = int(sys.argv[2]) scalingfactor = int(sys.argv[3]) helper.msg("Reading the file...\n") filedata = json.load(open(filename)) helper.msg("[DONE]\n") helper.msg("Parsing the file...\n") startdate_str, freq, timeseries = filedata startdate = helper.strtodate(startdate_str) helper.msg("[DONE]\n") perminute, sorted_perminute = {}, {} for category in timeseries: helper.msg("Changing the time scale...\n") perminute[category] = helper.scaletime( timeseries[category], scalingfactor) helper.msg("[DONE]\n") tickcount = len(perminute[category]) sorted_perminute[category] = [(dict)]*tickcount helper.msg("Calculating the top topics...\n") for tick in range(tickcount): sorted_perminute[category][tick] = dict( sorted(perminute[category][tick].items(), key=lambda l: l[1], reverse=True) [:top] ) #print helper.findtops(perminute[category], top) print json.dumps((startdate_str, scalingfactor * freq, sorted_perminute)) if len(sys.argv) == 5: plotname = sys.argv[4] for category in sorted_perminute: #print sorted_perminute helper.plot(sorted_perminute[category], startdate, scalingfactor * freq, plotname + " " + category)