コード例 #1
0
ファイル: sample.py プロジェクト: alialavia/bigdata
def main():
    if len(sys.argv) != 3:
        helper.msg("Usage: sample Path SamplingFrequency")
        exit(0)

    pathname = sys.argv[1]
    samplingfreq = int(sys.argv[2])

    filelist = sorted(os.listdir(pathname))
    filelist = [os.path.join(pathname, filename) for filename in filelist]
    (_, startdate, _) = helper.readtwitterfile(filelist[0])          
    filesize, twfile = 0, []
    for filename in filelist:    
        helper.printinline("Reading %s", filename)
        (_filesize, _, _twfile) = helper.readtwitterfile(filename)      
        filesize += _filesize
        twfile += _twfile

    timeseries = helper.totimeseries(
        twfile, startdate, samplingfreq, filesize)    
    # store metadata and time seires as a tuple
    print json.dumps((str(startdate), samplingfreq, timeseries))
コード例 #2
0
def main():

    if len(sys.argv) < 4 or len(sys.argv) > 5:
        helper.msg(
            "Usage: timeanalysis TimeseriesFile Top ScalingFactor [plotname]\n"
        )
        exit(0)
    filename = sys.argv[1]
    top = int(sys.argv[2])
    scalingfactor = int(sys.argv[3])

    helper.msg("Reading the file...\n")
    filedata = json.load(open(filename))
    helper.msg("[DONE]\n")
    helper.msg("Parsing the file...\n")

    startdate_str, freq, timeseries = filedata
    startdate = helper.strtodate(startdate_str)
    helper.msg("[DONE]\n")

    perminute, sorted_perminute = {}, {}
    for category in timeseries:
        helper.msg("Changing the time scale...\n")

        perminute[category] = helper.scaletime(timeseries[category],
                                               scalingfactor)
        helper.msg("[DONE]\n")
        tickcount = len(perminute[category])

        sorted_perminute[category] = [(dict)] * tickcount

        helper.msg("Calculating the top topics...\n")
        for tick in range(tickcount):
            sorted_perminute[category][tick] = dict(
                sorted(perminute[category][tick].items(),
                       key=lambda l: l[1],
                       reverse=True)[:top])

    #print helper.findtops(perminute[category], top)
    print json.dumps((startdate_str, scalingfactor * freq, sorted_perminute))
    if len(sys.argv) == 5:
        plotname = sys.argv[4]
        for category in sorted_perminute:
            #print sorted_perminute
            helper.plot(sorted_perminute[category], startdate,
                        scalingfactor * freq, plotname + " " + category)
コード例 #3
0
ファイル: timeanalysis.py プロジェクト: alialavia/TwitterNews
def main():

    if len(sys.argv) < 4 or len(sys.argv) > 5:
        helper.msg("Usage: timeanalysis TimeseriesFile Top ScalingFactor [plotname]\n")
        exit(0)
    filename = sys.argv[1]
    top = int(sys.argv[2])
    scalingfactor = int(sys.argv[3])    

    helper.msg("Reading the file...\n")
    filedata = json.load(open(filename))
    helper.msg("[DONE]\n")
    helper.msg("Parsing the file...\n")        
    
    startdate_str, freq, timeseries = filedata
    startdate = helper.strtodate(startdate_str)
    helper.msg("[DONE]\n")    

    perminute, sorted_perminute = {}, {}
    for category in timeseries:
        helper.msg("Changing the time scale...\n")

        perminute[category] = helper.scaletime(
            timeseries[category], scalingfactor)
        helper.msg("[DONE]\n")
        tickcount = len(perminute[category])
        
        sorted_perminute[category] = [(dict)]*tickcount

        helper.msg("Calculating the top topics...\n")
        for tick in range(tickcount):
            sorted_perminute[category][tick] = dict(
                sorted(perminute[category][tick].items(), 
                key=lambda l: l[1], reverse=True) [:top]
                )
            

    #print helper.findtops(perminute[category], top)
    print json.dumps((startdate_str, scalingfactor * freq, sorted_perminute))
    if len(sys.argv) == 5:
        plotname = sys.argv[4]
        for category in sorted_perminute:
            #print sorted_perminute    
            helper.plot(sorted_perminute[category], 
                startdate, scalingfactor * freq, plotname + " " + category)