コード例 #1
0
def add_data():
    global users
    global trainData
    checkUserPartitionMapping()
    mcl = pm.MongoClient('10.137.172.201:27017')        
    kafka = KafkaClient(kafkaHost, timeout=None)
    producer = UserProducer(kafka, kafkaTopic, users, partitions, async=False,
                      req_acks=UserProducer.ACK_AFTER_LOCAL_WRITE,
                      ack_timeout=200)
    coll = mcl.DataSet['PMLExpression']

    for ent in coll.find(None, {'_id':True, 'userId':True}, timeout=False):
        entity = str(ent['_id'])
        user = ent['userId']
        if ent['_id'] in trainData[user]:
            encodedMessage = simplejson.dumps({'turtleName':turtleName,
                                               'user':user,
                                               'entity':entity,
                                               'operation':'add_data'})
            print producer.send(user, encodedMessage)
        
    for user, partitionId in users.iteritems():
        encodedMessage = simplejson.dumps({'turtleName':turtleName,
                                           'user':user,
                                           'operation':'save_turtle'})
        print producer.send(user, encodedMessage)
    mcl.close()
コード例 #2
0
def add_users():
    global users
    
    mcl = pm.MongoClient('10.137.172.201:27017')
    kafka = KafkaClient(kafkaHost, timeout=None)
    producer = UserProducer(kafka, kafkaTopic, users, partitions, async=False,
                            req_acks=UserProducer.ACK_AFTER_LOCAL_WRITE,
                            ack_timeout=200)
    coll = mcl.DataSet['PMLExpression']
    
    for ent in coll.find(None, {'_id':True, 'userId':True}, timeout=False):
        follower = ent['userId']
        if follower not in users:
            encodedMessage = simplejson.dumps({'turtleName':turtleName,
                                               'user':'******',
                                               'follower':follower,
                                               'operation':'add_user'})
            print producer.send(follower, encodedMessage)
    
    userColl = mcl.DataSet['PMLUsers']
    if users:
        for userId, partitionId in users.iteritems():            
            u = userColl.find_one({'userId':userId}, {'userId':userId}, timeout=False)
            if not u:
                userColl.insert({'userId':userId, 'partitionId':partitionId});
コード例 #3
0
def train(numIters):
    global users
    checkUserPartitionMapping()
    kafka = KafkaClient(kafkaHost, timeout=None)
    producer = UserProducer(kafka, kafkaTopic, users, partitions, async=False,
                      req_acks=UserProducer.ACK_AFTER_LOCAL_WRITE,
                      ack_timeout=200)
    for i in range(numIters):
        for user, partitionId in users.iteritems():
            if user == ''  or user == 'monk':
                continue
            encodedMessage = simplejson.dumps({'turtleName':turtleName,
                                               'user':user,
                                               'operation':'train'})
            print i, producer.send(user, encodedMessage)
    
    producer.stop(1)
    kafka.close()
コード例 #4
0
def set_mantis_parameter(para, value):
    global users
    checkUserPartitionMapping()
    kafka = KafkaClient(kafkaHost, timeout=None)
    producer = UserProducer(kafka, kafkaTopic, users, partitions, async=False,
                      req_acks=UserProducer.ACK_AFTER_LOCAL_WRITE,
                      ack_timeout=200)
    for user, partitionId in users.iteritems():
#        if not partitionId == 4:
#            continue
        encodedMessage = simplejson.dumps({'turtleName':turtleName,
                                           'user':user,
                                           'operation':'set_mantis_parameter',
                                           'para':para,
                                           'value':value})
        print producer.send(user, encodedMessage)
    
    producer.stop(1)
    kafka.close()
コード例 #5
0
def reset():
    global users
    checkUserPartitionMapping()
    kafka = KafkaClient(kafkaHost, timeout=None)
    producer = UserProducer(kafka, kafkaTopic, users, partitions, async=False,
                      req_acks=UserProducer.ACK_AFTER_LOCAL_WRITE,
                      ack_timeout=200)

    for user, partitionId in users.iteritems():            
        encodedMessage = simplejson.dumps({'turtleName':turtleName,
                                           'user':user,
                                           'operation':'reset'})
        print producer.send(user, encodedMessage)
    
#    users['monk'] = 8
#    encodedMessage = simplejson.dumps({'turtleName':turtleName,
#                                       'user':'******',
#                                       'operation':'reset'})
#    print producer.send('monk', encodedMessage)
    producer.stop(1)
    kafka.close()
コード例 #6
0
def test(isPersonalized):
    global users
    global testData
    checkUserPartitionMapping()
    mcl = pm.MongoClient('10.137.172.201:27017')        
    kafka = KafkaClient(kafkaHost, timeout=None)
    producer = UserProducer(kafka, kafkaTopic, users, partitions, async=False,
                      req_acks=UserProducer.ACK_AFTER_LOCAL_WRITE,
                      ack_timeout=200)
    
    for user, partitionId in users.iteritems():
        if user != u'':
            for dataID in testData[user]:
                entity = str(dataID)
                encodedMessage = simplejson.dumps({'turtleName':turtleName,
                                                   'user':user,
                                                   'entity':entity,
                                                   'isPersonalized':isPersonalized,
                                                   'operation':'test_data'})
                print producer.send(user, encodedMessage)                      
                
    mcl.close()
コード例 #7
0
def train(numIters):
    global users
    try:
        mcl = pm.MongoClient('10.137.168.196:27017')
        userColl = mcl.DataSet['PMLUsers']
        users = {user['userId']:user['partitionId'] for user in userColl.find()}
        mcl.close()
        kafka = KafkaClient('mozo.cloudapp.net:9092', timeout=None)
        producer = UserProducer(kafka, kafkaTopic, users, async=False,
                          req_acks=UserProducer.ACK_AFTER_LOCAL_WRITE,
                          ack_timeout=200)
        for userId, partitionId in users.iteritems():   
            if userId in UoI.keys():    
                for i in range(numIters):                         
                    #print "iteration " + str(i)
                    encodedMessage = simplejson.dumps({'turtleId':turtleId,
                                                   'userId':userId,
                                                   'operation':'train_one'})
                    print i, producer.send(userId, encodedMessage)
    finally:
        producer.stop()
        kafka.close()
コード例 #8
0
def add_data():
    global users
    try:
        mcl = pm.MongoClient('10.137.168.196:27017')
        kafka = KafkaClient('mozo.cloudapp.net:9092', timeout=None)
        producer = UserProducer(kafka, kafkaTopic, users, parts, async=False,
                          req_acks=UserProducer.ACK_AFTER_LOCAL_WRITE,
                          ack_timeout=200)
        coll = mcl.DataSet['PMLExpression']
        ii = 0      # max is 151413 (number of doc in PMLExpression)
        for ent in coll.find({'userId': {'$in': UoI.keys()}}, {'_id':True, 'userId':True}, timeout=False):
            
            ii += 1
            entity = str(ent['_id'])
            userId = ent['userId']
            if (stop_add_data(userId)):
                continue
            UoI[userId] += 1
            encodedMessage = simplejson.dumps({'turtleId':turtleId,
                                               'userId':userId,
                                               'entity':entity,
                                               'operation':'add_data'})
            print producer.send(userId, encodedMessage)
            
        for userId, partitionId in users.iteritems():
            encodedMessage = simplejson.dumps({'turtleId':turtleId,
                                               'userId':userId,
                                               'operation':'save_one'})
            print producer.send(userId, encodedMessage)
        userColl = mcl.DataSet['PMLUsers']
        if users:
            userColl.insert([{'userId':userId, 'partitionId':partitionId} for userId, partitionId in users.iteritems()])
    finally:
        producer.stop()
        mcl.close()
        kafka.close()