def add_data(): global users global trainData checkUserPartitionMapping() mcl = pm.MongoClient('10.137.172.201:27017') kafka = KafkaClient(kafkaHost, timeout=None) producer = UserProducer(kafka, kafkaTopic, users, partitions, async=False, req_acks=UserProducer.ACK_AFTER_LOCAL_WRITE, ack_timeout=200) coll = mcl.DataSet['PMLExpression'] for ent in coll.find(None, {'_id':True, 'userId':True}, timeout=False): entity = str(ent['_id']) user = ent['userId'] if ent['_id'] in trainData[user]: encodedMessage = simplejson.dumps({'turtleName':turtleName, 'user':user, 'entity':entity, 'operation':'add_data'}) print producer.send(user, encodedMessage) for user, partitionId in users.iteritems(): encodedMessage = simplejson.dumps({'turtleName':turtleName, 'user':user, 'operation':'save_turtle'}) print producer.send(user, encodedMessage) mcl.close()
def add_users(): global users mcl = pm.MongoClient('10.137.172.201:27017') kafka = KafkaClient(kafkaHost, timeout=None) producer = UserProducer(kafka, kafkaTopic, users, partitions, async=False, req_acks=UserProducer.ACK_AFTER_LOCAL_WRITE, ack_timeout=200) coll = mcl.DataSet['PMLExpression'] for ent in coll.find(None, {'_id':True, 'userId':True}, timeout=False): follower = ent['userId'] if follower not in users: encodedMessage = simplejson.dumps({'turtleName':turtleName, 'user':'******', 'follower':follower, 'operation':'add_user'}) print producer.send(follower, encodedMessage) userColl = mcl.DataSet['PMLUsers'] if users: for userId, partitionId in users.iteritems(): u = userColl.find_one({'userId':userId}, {'userId':userId}, timeout=False) if not u: userColl.insert({'userId':userId, 'partitionId':partitionId});
def train(numIters): global users checkUserPartitionMapping() kafka = KafkaClient(kafkaHost, timeout=None) producer = UserProducer(kafka, kafkaTopic, users, partitions, async=False, req_acks=UserProducer.ACK_AFTER_LOCAL_WRITE, ack_timeout=200) for i in range(numIters): for user, partitionId in users.iteritems(): if user == '' or user == 'monk': continue encodedMessage = simplejson.dumps({'turtleName':turtleName, 'user':user, 'operation':'train'}) print i, producer.send(user, encodedMessage) producer.stop(1) kafka.close()
def set_mantis_parameter(para, value): global users checkUserPartitionMapping() kafka = KafkaClient(kafkaHost, timeout=None) producer = UserProducer(kafka, kafkaTopic, users, partitions, async=False, req_acks=UserProducer.ACK_AFTER_LOCAL_WRITE, ack_timeout=200) for user, partitionId in users.iteritems(): # if not partitionId == 4: # continue encodedMessage = simplejson.dumps({'turtleName':turtleName, 'user':user, 'operation':'set_mantis_parameter', 'para':para, 'value':value}) print producer.send(user, encodedMessage) producer.stop(1) kafka.close()
def reset(): global users checkUserPartitionMapping() kafka = KafkaClient(kafkaHost, timeout=None) producer = UserProducer(kafka, kafkaTopic, users, partitions, async=False, req_acks=UserProducer.ACK_AFTER_LOCAL_WRITE, ack_timeout=200) for user, partitionId in users.iteritems(): encodedMessage = simplejson.dumps({'turtleName':turtleName, 'user':user, 'operation':'reset'}) print producer.send(user, encodedMessage) # users['monk'] = 8 # encodedMessage = simplejson.dumps({'turtleName':turtleName, # 'user':'******', # 'operation':'reset'}) # print producer.send('monk', encodedMessage) producer.stop(1) kafka.close()
def test(isPersonalized): global users global testData checkUserPartitionMapping() mcl = pm.MongoClient('10.137.172.201:27017') kafka = KafkaClient(kafkaHost, timeout=None) producer = UserProducer(kafka, kafkaTopic, users, partitions, async=False, req_acks=UserProducer.ACK_AFTER_LOCAL_WRITE, ack_timeout=200) for user, partitionId in users.iteritems(): if user != u'': for dataID in testData[user]: entity = str(dataID) encodedMessage = simplejson.dumps({'turtleName':turtleName, 'user':user, 'entity':entity, 'isPersonalized':isPersonalized, 'operation':'test_data'}) print producer.send(user, encodedMessage) mcl.close()
def train(numIters): global users try: mcl = pm.MongoClient('10.137.168.196:27017') userColl = mcl.DataSet['PMLUsers'] users = {user['userId']:user['partitionId'] for user in userColl.find()} mcl.close() kafka = KafkaClient('mozo.cloudapp.net:9092', timeout=None) producer = UserProducer(kafka, kafkaTopic, users, async=False, req_acks=UserProducer.ACK_AFTER_LOCAL_WRITE, ack_timeout=200) for userId, partitionId in users.iteritems(): if userId in UoI.keys(): for i in range(numIters): #print "iteration " + str(i) encodedMessage = simplejson.dumps({'turtleId':turtleId, 'userId':userId, 'operation':'train_one'}) print i, producer.send(userId, encodedMessage) finally: producer.stop() kafka.close()
def add_data(): global users try: mcl = pm.MongoClient('10.137.168.196:27017') kafka = KafkaClient('mozo.cloudapp.net:9092', timeout=None) producer = UserProducer(kafka, kafkaTopic, users, parts, async=False, req_acks=UserProducer.ACK_AFTER_LOCAL_WRITE, ack_timeout=200) coll = mcl.DataSet['PMLExpression'] ii = 0 # max is 151413 (number of doc in PMLExpression) for ent in coll.find({'userId': {'$in': UoI.keys()}}, {'_id':True, 'userId':True}, timeout=False): ii += 1 entity = str(ent['_id']) userId = ent['userId'] if (stop_add_data(userId)): continue UoI[userId] += 1 encodedMessage = simplejson.dumps({'turtleId':turtleId, 'userId':userId, 'entity':entity, 'operation':'add_data'}) print producer.send(userId, encodedMessage) for userId, partitionId in users.iteritems(): encodedMessage = simplejson.dumps({'turtleId':turtleId, 'userId':userId, 'operation':'save_one'}) print producer.send(userId, encodedMessage) userColl = mcl.DataSet['PMLUsers'] if users: userColl.insert([{'userId':userId, 'partitionId':partitionId} for userId, partitionId in users.iteritems()]) finally: producer.stop() mcl.close() kafka.close()