def setUp(self): print "setting up database" config_global.cassandra_default_keyspace = 'unitTest' sK = Setup_Keyspaces() dM = DistributedCountersMigration() flattenedJson = {'item_url': u'http://www.ksta.de/html/fotolines/1317623400963/rahmen.shtml?1', 'item_recommendable': True, 'item_title': u'Die Hoch\xadzeit der Herzogin von Alba', 'item_created': 1318402695, 'impression_id': 100001, 'item_id': u'51079963', 'config_recommend': True, 'client_id': 49217133, 'msg': u'impression', 'domain_id': u'418'} self.dC = distributedCounters( flattenedJson, timestamp = 1 )
def testGetMostImportant(self): flattenedJson = {'item_created': 1318402695, 'impression_id': 100001, 'item_id': u'1', 'config_recommend': True, 'client_id': 10, 'msg': u'impression' } dC = distributedCounters(flattenedJson, 1) dC.increment_by_collumn() """ user id 1 """ flattenedJson = {'item_created': 1318402696, 'impression_id': 100002, 'item_id': u'2', 'config_recommend': True, 'client_id': 10, 'msg': u'impression' } dC = distributedCounters(flattenedJson, 2) dC.increment_by_collumn() """ user id 2 """ flattenedJson = {'item_created': 1318402697, 'impression_id': 100002, 'item_id': u'2', 'config_recommend': True, 'client_id': 20, 'msg': u'impression' } dC = distributedCounters(flattenedJson, 2) dC.increment_by_collumn() flattenedJson = {'item_created': 1318402698, 'impression_id': 100002, 'item_id': u'2', 'config_recommend': True, 'client_id': 20, 'msg': u'impression'} dC = distributedCounters(flattenedJson, 2) dC.increment_by_collumn() flattenedJson = {'item_created': 1318402698, 'impression_id': 100002, 'item_id': u'2', 'config_recommend': True, 'client_id': 20, 'msg': u'impression'} dC = distributedCounters(flattenedJson, 2) dC.increment_by_collumn() flattenedJson = {'item_created': 1318402698, 'impression_id': 100002, 'item_id': u'2', 'config_recommend': True, 'client_id': 30, 'msg': u'impression'} dC = distributedCounters(flattenedJson, 2) dC.increment_by_collumn() flattenedJson = {'item_created': 1318402696, 'impression_id': 100002, 'item_id': u'2', 'config_recommend': True, 'client_id': 2, 'msg': u'impression', 'domain_id': u'418'} dC = distributedCounters(flattenedJson, 4000) dC.increment_by_collumn() dimension = 'user_ids' binSize = 'minutes' timestampFrom = 0 #print "most important" #print self.dC.getMostImportant(dimension, binSize, timestampFrom) dimension = 'user_ids' binSize = 'minutes' timestampFrom = 0 timestampTo = 5 print "most important" print self.dC.getMostImportant(dimension, binSize, timestampFrom, timestampTo) dimension = 'user_ids' binSize = 'hours' timestampFrom = 0 print "most important" print self.dC.getMostImportant(dimension, binSize, timestampFrom) dimension = 'user_ids' binSize = 'hours' timestampFrom = 0 timestampTo = 2 print "most important" print self.dC.getMostImportant(dimension, binSize, timestampFrom, timestampTo)
print "user_id:\t" + str(flattenedJson['client_id']) except: print "ItemsByUser: no item given" try: #uBI = UserByItem(item_id = flattenedJson['item_id']) #uBI.save( user_id = flattenedJson['client_id'] ) pass except: print "UsersByItem: no user given" if (SAVE_DISTRIBUTED_COUNTER): try: dC = distributedCounters(flattenedJson, timestamp_sec) dC.increment_by_collumn() """ now we are doing some general statistics: most important users most important items """ except: if(debug): print flattenedJson print "problem with distributed counters" """ dimList = ['client_id', 'item_id']
print len( userList ) for userList_element in userList: #print userList_element #print len( userList[userList_element] ) #print userList[userList_element] pass second_computation = time.time() - second_computation print second_computation dimension = 'user_ids' binSize = 'hours' timestampFrom = 0 print "most important" dC = distributedCounters() most_important_users = dC.getMostImportant(dimension, binSize, 366560, 366561) #for most_important_users_element in most_important_users: #print "most important users" #print most_important_users_element #print most_important_users[most_important_users_element] #print "most important users" #print len(most_important_users) #print most_important_users