Example #1
0
class PoolStreamData(StreamData):
    def __init__(self, feat_path, feature_maker, label_path=None, fold_in_cv=None):
        self.pool = PriorityQueue(100000)
        super(PoolStreamData, self).__init__(feat_path, feature_maker, label_path, fold_in_cv)
        self.fill_pool()

    def fill_pool(self):
        while not self.pool.full():
            try:
                ins = super(PoolStreamData,self).next()
                self.pool.put((random.random(),ins))    # insert ins with random priority --> kind of random shuffle
            except StopIteration:
                break

    def rewind(self):
        self.pool = PriorityQueue(100000)
        super(PoolStreamData,self).rewind()
        self.fill_pool()

    def next(self):
        try:
            (_,ins) = self.pool.get(False)
        except Empty:
            raise StopIteration
        self.fill_pool()
        return ins
Example #2
0
class Client(object):
    def __init__(self, conn, addr):
        self.conn = conn
        self.addr = addr
        self.thread = None
        self.requests = PriorityQueue(10)
        self.alive = True

    def sendto(self, msg):
        self.conn.send(msg)
    def recvfrom(self, bits):
        res = self.conn.recv(bits)
        return res
    def close(self):
        self.conn.close()
    def updateThread(self, thread):
        self.thread = thread
    def getThread(self):
        return self.thread
    def getAddr(self):
        return self.addr
    def addRequest(self, request):
        if not self.requests.full():
            self.requests.put(request)     
    def getRequest(self):
        if not self.empty():
            return self.requests.get()
        return None
    def empty(self):
        return self.requests.empty()
    def updateLife(self, boolean):
        self.alive = boolean
    def is_alive(self):
        return self.alive
Example #3
0
def search_image(filepath, codebook, tfidf, control="with_tfidf"):
    print_detail = True
    k, _ = codebook.shape
    if print_detail:
        print "-------------------------------------------------------------------------------"
    bow = get_bow(filepath, codebook)
    if print_detail:
        print "-------------------------------------------------------------------------------"
        print "tfidf matrix shape:"
        print tfidf.shape
        print "-------------------------------------------------------------------------------"
        print "Bag of Word of: ", filepath
        print bow
        print "-------------------------------------------------------------------------------"
    _, l = tfidf.shape
    control = "no_tfidf"
    if control == "with_tfidf":
        idi = np.zeros((1, l))
        for i in range(k):
            if bow[i] != 0:
                idi = np.add(idi, tfidf[i])
        rank = [(i, j) for (i, j) in zip([i for i in range(l)], idi.tolist()[0])]
    else:
        bow = [float(i) / sum(bow) for i in bow.tolist()]
        rank = np.dot(np.asarray(bow), tfidf)
        rank = [(i, j) for (i, j) in zip([i for i in range(l)], rank.tolist())]

    q = PriorityQueue(50)
    for (x, y) in rank:
        if not q.full():
            q.put(Image(y, x))
        else:
            if y > q.queue[0].similarity():
                q.get()
                q.put(Image(y, x))

    result = []
    while not q.empty():
        result.append(q.get())

    # images_data_path = "/Users/minhuigu/FoodAdvisor/app/outputs/images_data.txt"
    # images_folder = "/Users/minhuigu/Desktop/"
    # img_list = []
    # json_content = open(images_data_path).read()
    # for each in json.loads(json_content):
    #     img_list.append(images_folder + each['relpath'])
    #
    # for a in [i.id() for i in result][::]:
    #     print img_list[a]
    if print_detail:
        print "Best rank images: "
        print [i.id() for i in result][::]
        print "-------------------------------------------------------------------------------"
    # decreasing according to similarity
    return [i.id() for i in result][::]
Example #4
0
def kMostFreqHeapThreePass(nums, k):
    freq = {}
    heap = PriorityQueue(k)
    # count
    for i in nums: freq[i] = freq.get(i, 0)+1
    for key, val in freq.iteritems():
        if not heap.full():
            heap.put((val, key))
        else:
            if heap.queue[0][0] < val:
                heap.get()
                heap.put((val, key))

    return [itm[1] for itm in reversed(heap.queue)]
def kMostFreqHeapThreePass(nums, k):
    freq = {}
    heap = PriorityQueue(k)
    # count
    for i in nums:
        freq[i] = freq.get(i, 0) + 1
    for key, val in freq.iteritems():
        if not heap.full():
            heap.put((val, key))
        else:
            if heap.queue[0][0] < val:
                heap.get()
                heap.put((val, key))

    return [itm[1] for itm in reversed(heap.queue)]
Example #6
0
class CrawlerPQueue(PriorityQueue):
    def __init__(self):
        self.pq = PriorityQueue(-1)

    def qsize(self):
        return self.pq.qsize()

    def empty(self):
        return self.pq.empty()

    def full(self):
        return self.pq.full()

    def put(self, item):
        self.pq.put(item)

    def get(self):
        return self.pq.get()
Example #7
0
from Queue import Queue, LifoQueue, PriorityQueue

q = Queue(maxsize=5)
lq = LifoQueue(maxsize=6)
pq = PriorityQueue(maxsize=5)

for i in range(5):
    q.put(i)
    lq.put(i)
    pq.put(i)

print "FIFO queue: %s, is empty: %s, size: %s, is full: %s" %(q.queue,q.empty(),q.qsize(),q.full())
print "LIFO queue: %s, is empty: %s, size: %s, is full: %s" %(lq.queue,lq.empty(),lq.qsize(),lq.full())
print "Priority queue: %s, is empty: %s, size: %s, is full: %s" %(pq.queue,pq.empty(),pq.qsize(),pq.full())

print q.get(), lq.get(), pq.get()

print "FIFO queue: %s, is empty: %s, size: %s, is full: %s" %(q.queue,q.empty(),q.qsize(),q.full())
print "LIFO queue: %s, is empty: %s, size: %s, is full: %s" %(lq.queue,lq.empty(),lq.qsize(),lq.full())
print "Priority queue: %s, is empty: %s, size: %s, is full: %s" %(pq.queue,pq.empty(),pq.qsize(),pq.full())
Example #8
0
def find_match(d, song_id):
    real_song_id = song_id
    find_landmark_num = 0
    match_hash_num = 0
    hash_num = 0
    result_dic = defaultdict(lambda: defaultdict(lambda: 0))
    final_id = -1
    final_delta_t = -1
    max_match_num = 0

    r = getRedis(host=redisServerIp, dbnum=dbnum)
    for h in next_time_hash(d):
        hash_num = hash_num + 1
        start_time, hash_value = parse_starttime_hash(h)
        result_str_arr = r.smembers(hash_value)

        if result_str_arr and len(result_str_arr) > 0:
            match_hash_num = max_match_num + 1

        for str_value in result_str_arr:
            find_landmark_num = find_landmark_num + 1

            song_id, start_time_song = parse_id_starttime(str_value)
            # if song_id == '1101':
            #    continue
            delta_t = start_time_song - start_time

            result_dic[song_id][delta_t] += 1
            hash_count = (
                result_dic[song_id][delta_t]
                + result_dic[song_id].get(delta_t - 1, 0)
                + result_dic[song_id].get(delta_t + 1, 0)
            )
            if hash_count > max_match_num:
                max_match_num = hash_count
                final_id = song_id
                final_delta_t = delta_t
                break  # what use??!#!@#!@#!@#@#!@#
    top25 = 0
    second_max = 0
    second_id = 0
    real_song_hash_match = -1
    real_song_hash_match_time = -1
    song_below_20_num = 0

    # hash_thresh = 100
    #
    # for (s_id, song_value) in result_dic.iteritems():
    #     if len(song_value) < hash_thresh:
    #         song_below_20_num += 1
    print "song_below_20_num : ", song_below_20_num
    print "total_song_num : ", len(result_dic)
    top25_song_id_queue = PriorityQueue(25)
    for (s_id, song_value) in result_dic.iteritems():
        name_printed = False
        # max matched hash number per song
        max_matched_hash_number_per_song = 0
        for (dt, num) in song_value.iteritems():
            num = song_value.get(dt, 0) + song_value.get(dt - 1, 0) + song_value.get(dt + 1, 0)
            if max_matched_hash_number_per_song < num:
                max_matched_hash_number_per_song = num
            if int(real_song_id) == int(s_id):
                if real_song_hash_match < num:
                    real_song_hash_match = num
                    real_song_hash_match_time = dt
            if num > second_max and s_id != final_id:
                second_max = num
                second_id = s_id
            if num > max_match_num - max_match_num * 0.25:
                if not name_printed:
                    if s_id == final_id:
                        print u"命中歌曲"
                    print "song id: ", s_id
                    name_printed = True
                top25 = top25 + 1
                print ("delta t: ", dt, "hashnum: ", num)
        if top25_song_id_queue.full():
            top25_song_id_queue.get()
        top25_song_id_queue.put((max_matched_hash_number_per_song, s_id), False)

    # while not top25_song_id_queue.empty():
    #     _, song_id = top25_song_id_queue.get_nowait()
    #     filename = str(final_id) + "_" + str(song_id) + ".txt"
    #     if song_id == final_id:
    #         filename = str(final_id) + "_" + str(song_id) + "_final.txt"
    #     output_file = open(filename,'w')
    #     song_result = result_dic[song_id]
    #     for delta_t, match_time in song_result.iteritems():
    #         output_file.write(str(match_time)+','+str(delta_t) + '\n')
    #     output_file.close()

    name_redis = getRedis()
    song_name = name_redis.get("song_id:" + str(final_id))
    print song_name
    print "find landmark num ", find_landmark_num
    print "highest match hash num", max_match_num
    # print "match hash num ", match_hash_num
    print "total hash num: ", hash_num
    is_match = "f"
    if str(real_song_id) == str(final_id):
        is_match = "t"
    ret = {
        "_is_match": is_match,
        "id": str(final_id),
        "delta_t": str(final_delta_t * 0.032),
        "real_song_hash_match": str(real_song_hash_match),
        "real_song_hash_match_time": str(real_song_hash_match_time),
        "real_song_id": real_song_id,
        "second_max_num": str(second_max),
        "second_id": str(second_id),
        "top25_num": str(top25),
        "total_hash_num": str(hash_num),
        "match_hash_num": str(max_match_num)
        # , 'id_song_hash_time':str(max_match_num)
        ,
        "song_name": song_name.decode("utf-8").encode("utf-8"),
    }

    print json.dumps(ret)
    return ret
Example #9
0
 for i, it in enumerate(q):
     pool.put((it[-1], i))
     vmx, imx = max((vmx, imx), (it[-1], i))
     it.pop()
 done = False
 while not done:
     assert not pool.empty()
     vmn, imn = pool.get()
     if max(vmn[0], vmx[0]) > min(vmn[1], vmx[1]):
         #print 'Drop!'
         if len(q[imn]) == 0:
             done = True
             break
         vmn = q[imn][-1]
         q[imn].pop()
         assert not pool.full()
         pool.put((vmn, imn))
         vmx, imx = max((vmx, imx), (vmn, imn))
     else:
         #print 'Serv!'
         if min(vmn[1], vmx[1]) > 0:
             res += 1
         pool = PriorityQueue(n)
         vmx, imx = (-1, -1), 0
         for i, it in enumerate(q):
             if len(it) == 0:
                 done = True
                 break
             assert not pool.full()
             pool.put((it[-1], i))
             vmx, imx = max((vmx, imx), (it[-1], i))
Example #10
0
class MonkServer(object):
    EXIT_WAIT_TIME = 3
    MAX_QUEUE_SIZE = 100000
    MAINTAIN_INTERVAL = 10000
    POLL_INTERVAL = 0.1
    EXECUTE_INTERVAL = 0.1

    def __init__(self, serverName='', config=None):
        if not config:
            self.ready = False
            return
        self.pq = PriorityQueue(self.MAX_QUEUE_SIZE)
        self.serverName = serverName
        self.lastMaintenance = now()
        self.ioLoop = tornado.ioloop.IOLoop.instance()
        self.httpServer = None
        self.port = 8888
        self.webApps = []
        self.brokers = self.init_brokers(config)
        if platform.system() == 'Windows':
            win32api.SetConsoleCtrlHandler(self._sig_handler, 1)
        else:
            signal.signal(signal.SIGINT, self._sig_handler)
            signal.signal(signal.SIGTERM, self._sig_handler)
        self.ready = True

    def _sig_handler(self, sig, frame):
        logger.warning('Caught signal : {}'.format(sig))
        self.ioLoop.add_callback(self._onexit)

    def _onexit(self):
        logger.info('stopping the server {}'.format(self.serverName))
        if self.httpServer:
            self.httpServer.stop()
        logger.info('exit in {} seconds'.format(self.EXIT_WAIT_TIME))

        #deadline = now() + self.EXIT_WAIT_TIME
        logger.info('onexit')
        self.onexit()

        logger.info('stopping ioloop')
        self.ioLoop.stop()
        for broker in self.brokers:
            logger.info('closing broker {}'.format(broker))
            broker.close()
        logger.info('stopping monkapi')
        monkapi.exits()
        #def stop_loop():
        #    logger.info('stopping loop')
        #    nowt = now()
        #    if nowt < deadline and (self.ioLoop._callbacks or self.ioLoop._timeouts):
        #        self.ioLoop.add_timeout(nowt + 1, stop_loop)
        #    else:
        #        self.ioLoop.stop()
        #        for broker in self.brokers:
        #            logger.info('closing broker')
        #            broker.close()
        #        logger.info('exiting monkapi')
        #        monkapi.exits()
        #stop_loop()
        logger.info('exited')

    def _maintain(self):
        self.maintain()
        self.ioLoop.add_timeout(now() + self.MAINTAIN_INTERVAL, self._maintain)

    def _poll(self):
        if self.pq.full():
            logger.debug('queue is full')
            self.ioLoop.add_timeout(now() + self.POLL_INTERVAL, self._poll)
        else:
            ready = filter(None, (broker.is_consumer_ready()
                                  for broker in self.brokers))
            if not ready:
                self._onexit()
                return
            taskScripts = filter(None, (broker.consume_one()
                                        for broker in self.brokers))
            for tscript in taskScripts:
                t = taskFactory.create(tscript)
                if t:
                    self.pq.put((t.priority, t), block=False)
            if taskScripts:
                #logger.debug('processing next task')
                self.ioLoop.add_callback(self._poll)
            else:
                #logger.debug('waiting on the polling')
                self.ioLoop.add_timeout(now() + self.POLL_INTERVAL, self._poll)

    def _execute(self):
        if self.pq.queue:
            try:
                priority, task = self.pq.get()
                task.act()
                logger.debug('executing {}'.format(task.name))
            except Exception as e:
                logger.debug(e.message)
                logger.debug(traceback.format_exc())
            finally:
                self.ioLoop.add_callback(self._execute)
        else:
            logger.debug('waiting for tasks {}'.format(now()))
            self.ioLoop.add_timeout(now() + self.EXECUTE_INTERVAL,
                                    self._execute)

    def add_application(self, regx, handler):
        self.webApps.append((regx, handler))

    def init_brokers(self, argvs):
        raise Exception('not implemented yet')

    def maintain(self):
        pass

    def onexit(self):
        pass

    def run(self):
        if not self.ready:
            logger.info('server {} is not intialized properly'.format(
                self.serverName))
            return

        self.ioLoop.add_timeout(now() + self.MAINTAIN_INTERVAL, self._maintain)
        self.ioLoop.add_timeout(now() + self.POLL_INTERVAL, self._poll)
        self.ioLoop.add_timeout(now() + self.EXECUTE_INTERVAL, self._execute)

        if self.webApps:
            # fail immediately if http server can not run
            application = tornado.web.Application(self.webApps)
            self.httpServer = tornado.httpserver.HTTPServer(application)
            self.httpServer.listen(self.port)

        logger.info('{} is running'.format(self.serverName))
        self.ioLoop.start()
        logger.info('{} is exiting'.format(self.serverName))
Example #11
0
def train():
    reader = DataReader(os.path.join(FLAGS.data_path, FLAGS.data_set), FLAGS.embedding_bag_size)

    train_data = reader.train_dataset
    eval_data = reader.dev_dataset

    iterator = tf.data.Iterator.from_structure(train_data.output_types, train_data.output_shapes)

    batch_data = iterator.get_next()
    start = batch_data['start']
    path = batch_data['path']
    end = batch_data['end']
    score = batch_data['score']
    original_features = batch_data['original_features']

    train_init_op = iterator.make_initializer(train_data)
    eval_init_op = iterator.make_initializer(eval_data)

    with tf.variable_scope("code2vec_model"):
        opt = Option(reader)
        train_model = Code2VecModel(start, path, end, score, original_features, opt)
        train_op = utils.get_optimizer(FLAGS.optimizer, FLAGS.learning_rate).minimize(train_model.loss)

    with tf.variable_scope('code2vec_model', reuse=True):
        eval_opt = Option(reader, training=False)
        eval_model = Code2VecModel(start, path, end, score, original_features, eval_opt)

    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement)

    with tf.Session(config=session_conf) as sess:
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())

        min_eval_loss = PriorityQueue(maxsize=3)
        stable_min_loss = 0

        for i in range(1000):
            start_time = time.time()

            train_loss, train_acc = evaluate(sess, train_model, batch_data, train_init_op, train_op)
            eval_loss, eval_acc = evaluate(sess, eval_model, batch_data, eval_init_op)
            eval_reg_loss, eval_reg_acc = evaluate(sess, train_model, batch_data, eval_init_op)

            if not min_eval_loss.full():
                min_eval_loss.put(-eval_loss)
                stable_min_loss = 0
            else:
                k = min_eval_loss.get()
                if k >= -eval_loss:
                    stable_min_loss += 1
                else:
                    stable_min_loss = 0
                min_eval_loss.put(max(k, -eval_loss))

            if opt.classification > 0:
                tf.logging.info(
                    'Epoch %2d: train-loss: %.5f (acc=%.2f), val-loss: %.5f (acc=%.2f), min-loss: %.5f, cost: %.4f s'
                    % (i + 1, train_loss, train_acc, eval_loss, eval_acc, float(-np.mean(min_eval_loss.queue)),
                       time.time() - start_time))
            else:
                tf.logging.info(
                    'Epoch %2d: train-loss: %.5f, val-reg: %.5f, val-loss: %.5f, min-loss: %.5f, cost: %.4f s, attention_orthogonal_penalty: %.4f, fusion_penalty: %4f, encoding_weight_L2: %4f'
                    % (i + 1, train_loss, eval_reg_loss, eval_loss, float(-np.mean(min_eval_loss.queue)),
                       time.time() - start_time, train_model.regularizations['attention_orthogonal_penalty'].eval(),
                       train_model.regularizations['fusion_penalty'].eval(),
                       train_model.regularizations['encoding_weight_L2'].eval()))

            if stable_min_loss >= 5 and i >= 200: break