interSet = len(genreValue1) + len(genreValue2) - unionSet; #[unionSet.append(obj) for obj in (genreValue1 + genreValue2) if obj not in unionSet]; nominator = min( len(genreValue1), len( genreValue2 )); return float(interSet)/ float(nominator); #return float(interSet)/float(unionSet) genreMap = GenreMap.GetMap(); # weekly aggregation. print 'Merging weekly data...' widx = 0; logArr1 = DailyLog.mergeLogs( [DailyLog.createFromFile(dataDir + '/' + logfiles[fidx]) \ for fidx in range(widx * 7 + 0, widx * 7 + 7)]); #logArr1 = DailyLog.createFromFile('./data/20131122.tsv'); print 'Computing distance...' kvPairs = logArr1.genreDic.items(); pairwiseComp = dict([ ( (kvPairs[i][0], kvPairs[j][0]), \ genreMergeValue (kvPairs[i][1], kvPairs[j][1])) \ for i in range(0, len(kvPairs)) for j in range(0, len(kvPairs)) if i<j ]); #print pairwiseComp print 'Rank and output results...' sortComp = sorted(pairwiseComp.iteritems(), key=operator.itemgetter(1), reverse = True);
# Run and get some statistics. # Created: Jiayu Zhou, Jan 20, 2014. from DailyLog import DailyLog; from DailyLog import GenreMap; import csv; import time; from os import listdir; from jiayuUtils import *; genreMap = GenreMap.GetMap(); log1 = DailyLog.createFromFile('./data/20131122.tsv'); # display genre dictionary. for key,value in log1.genreDic.items(): print "["+key+"]", genreMap[key], ":", len(value); print 'Number of total entries: ' + str(log1.length()); print 'Number of entries unique: ' + str(len(log1.progDic)); dataDir = './data'; logfiles = sorted(listdir(dataDir)); ## compute the overlap week by week. for widx in range(0, len(logfiles)/7 - 1): print '---Weekly----' if ('logArr2' in locals()):