Exemple #1
0
    interSet = len(genreValue1) + len(genreValue2) - unionSet;
    #[unionSet.append(obj) for obj in (genreValue1 + genreValue2) if obj not in unionSet];

    nominator = min( len(genreValue1), len( genreValue2 ));
    return float(interSet)/ float(nominator);
    #return float(interSet)/float(unionSet)

genreMap = GenreMap.GetMap();


# weekly aggregation. 


print 'Merging weekly data...'
widx = 0;
logArr1 = DailyLog.mergeLogs( [DailyLog.createFromFile(dataDir + '/' + logfiles[fidx]) \
              for fidx in range(widx * 7 + 0,  widx * 7 + 7)]);

#logArr1 = DailyLog.createFromFile('./data/20131122.tsv');

              
print 'Computing distance...'
kvPairs = logArr1.genreDic.items();
pairwiseComp = dict([ ( (kvPairs[i][0], kvPairs[j][0]), \
                        genreMergeValue (kvPairs[i][1], kvPairs[j][1])) \
  for i in range(0, len(kvPairs)) for j in range(0, len(kvPairs)) if i<j ]);

#print pairwiseComp

print 'Rank and output results...'
sortComp = sorted(pairwiseComp.iteritems(), key=operator.itemgetter(1), reverse = True);
Exemple #2
0
# Run and get some statistics. 
# Created: Jiayu Zhou, Jan 20, 2014. 

from DailyLog import DailyLog;
from DailyLog import GenreMap;
import csv;
import time;
from os import listdir;
from jiayuUtils import *;


genreMap = GenreMap.GetMap();

log1 = DailyLog.createFromFile('./data/20131122.tsv');

# display genre dictionary. 
for key,value in log1.genreDic.items():
    print "["+key+"]", genreMap[key], ":", len(value);

print 'Number of total entries:  ' + str(log1.length());
print 'Number of entries unique: ' + str(len(log1.progDic));

dataDir = './data';
logfiles = sorted(listdir(dataDir));


## compute the overlap week by week.
for widx in range(0, len(logfiles)/7 - 1):
    print '---Weekly----'

    if ('logArr2' in locals()):