Ejemplo n.º 1
0
    def run(self):
        global work_mutex
        global work_numbers

        err_local = 0
        try:
            socket = TSocket(self.server_ip, int(self.server_port))
            transport = TFramedTransport(socket)
            transport.open()
            protocol = TBinaryProtocol.TBinaryProtocol(transport)
            client = ThriftNeloEventServer.Client(protocol)

            stop_flag = True
            while stop_flag:
                #read thrift from file
                f = file(file_name, 'r')
                fd_transport = TFileObjectTransport(f)
                buffered_transport = TBufferedTransport(fd_transport)
                binary_protocol = TBinaryProtocol.TBinaryProtocol(
                    buffered_transport)
                fd_transport.open()
                stop_flag = False
                try:
                    evt = ThriftNeloEvent()
                    while True:
                        evt.read(binary_protocol)
                        #send the log to each project name
                        for prjName, logCnt in prj_dict.items():
                            try:
                                if logCnt_dict.has_key(prjName):
                                    if int(logCnt_dict[prjName]) < int(logCnt):
                                        evt.projectName = prjName
                                        evt.sendTime = int(time.time() * 1000)
                                        err = client.ackedAppend(evt)
                                        tps_remember(err)
                                        err_local += err
                                        logCnt_dict[
                                            prjName] = logCnt_dict[prjName] + 1
                                        stop_flag = True
                                else:
                                    evt.projectName = prjName
                                    err = client.ackedAppend(evt)
                                    tps_remember(err)
                                    err_local += err
                                    logCnt_dict[prjName] = 1
                                    stop_flag = True
                            except TException, msg:
                                print msg, prjName
                except EOFError, msg:
                    buffered_transport.close()  #close the transport
                    stop_flag = True

            work_mutex.acquire()
            work_numbers -= 1
            work_mutex.release()

            socket.close()
Ejemplo n.º 2
0
    def run(self):
        global work_mutex
        global work_numbers
                
        err_local = 0
        try:
    	    socket = TSocket(self.server_ip, int(self.server_port))
            transport = TFramedTransport(socket)
            transport.open()
            protocol = TBinaryProtocol.TBinaryProtocol(transport)
            client = ThriftNeloEventServer.Client(protocol)   


            stop_flag = True
            while stop_flag:
    	        #read thrift from file
                f = file(file_name, 'r')
                fd_transport = TFileObjectTransport(f)
                buffered_transport = TBufferedTransport(fd_transport)
                binary_protocol = TBinaryProtocol.TBinaryProtocol(buffered_transport)
                fd_transport.open()
                stop_flag = False
                try:
                    evt = ThriftNeloEvent()
                    while True:
                        evt.read(binary_protocol)
				        #send the log to each project name
                        for prjName, logCnt in prj_dict.items():
                            try:
                                if logCnt_dict.has_key(prjName):
                                    if int(logCnt_dict[prjName]) < int(logCnt):
                                        evt.projectName = prjName
                                        evt.sendTime = int(time.time() * 1000)
                                        err = client.ackedAppend(evt)
                                        tps_remember(err)
                                        err_local += err
                                        logCnt_dict[prjName] = logCnt_dict[prjName] + 1
                                        stop_flag = True
                                else:
                                    evt.projectName = prjName
                                    err = client.ackedAppend(evt)
                                    tps_remember(err)
                                    err_local += err
                                    logCnt_dict[prjName] = 1
                                    stop_flag = True
                            except TException, msg:
                                print msg, prjName
                except EOFError,msg:
                    buffered_transport.close() #close the transport
                    stop_flag = True

            work_mutex.acquire()
            work_numbers -= 1
            work_mutex.release()
 
            socket.close()
def get_mention_from_wikilink_thrift_file(fn):
    f = open(args.thrift_data_dir + '/%03d' % fn)
    out_val = defaultdict(list)
    p = TBinaryProtocol.TBinaryProtocolAccelerated(TFileObjectTransport(f))
    pp = WikiLinkItem()
    while True:
        try:
            pp.read(p)
        except EOFError:
            break
        for m in pp.mentions:
            c = m.context
            if c is not None:
                url = simplify_wiki_url(m.wiki_url)
                # Follow url redirect.
                try:
                    url = redirect[hash(url)]
                except KeyError:
                    pass
                if url in POOL:
                    # if c.left.startswith('the Musical August 30th, 2009 | Author: operator Shrek the Musical is a musical with music by Jeanine Tesori and a book and lyrics'):
                    #     print 1, fn
                    out_val[url].append([c.left, c.middle, c.right])
    print fn, len(out_val), sum(len(e) for e in out_val.itervalues())
    out_val.default_factory = None  # FINALIZE out_val
    return out_val
Ejemplo n.º 4
0
    def _process_socket(self, client, address):
        """A greenlet for handling a single client."""
        client = TFileObjectTransport(client.makefile())
        itrans = self.inputTransportFactory.getTransport(client)
        otrans = self.outputTransportFactory.getTransport(client)
        iprot = self.inputProtocolFactory.getProtocol(itrans)
        oprot = self.outputProtocolFactory.getProtocol(otrans)
        try:
            while True:
                self.processor.process(iprot, oprot)
        except EOFError:
            pass
        except Exception:
            self.log.exception("caught exception while processing thrift request")

        itrans.close()
        otrans.close()
Ejemplo n.º 5
0
    def _process_socket(self, client, address):
        """A greenlet for handling a single client."""
        client = TFileObjectTransport(client.makefile())
        itrans = self.inputTransportFactory.getTransport(client)
        otrans = self.outputTransportFactory.getTransport(client)
        iprot = self.inputProtocolFactory.getProtocol(itrans)
        oprot = self.outputProtocolFactory.getProtocol(otrans)
        try:
            while True:
                self.processor.process(iprot, oprot)
        except EOFError:
            pass
        except Exception:
            self.log.exception(
                "caught exception while processing thrift request")

        itrans.close()
        otrans.close()
Ejemplo n.º 6
0
 def _handle_request(self, listener_name, sock, addr):
     client = TFileObjectTransport(sock.makefile())
     itrans = self.tfactory.getTransport(client)
     otrans = self.tfactory.getTransport(client)
     iprot = self.pfactory.getProtocol(itrans)
     oprot = self.pfactory.getProtocol(otrans)
     try:
         while True:
             (name, type, seqid) = iprot.readMessageBegin()
             request_start = time.time()
             try:
                 timeout_con = Timeout(self.cfg.timeout, Timeout)
                 timeout_con.start()
                 if name not in self.wsgi._processMap:
                     iprot.skip(TType.STRUCT)
                     iprot.readMessageEnd()
                     x = TApplicationException(
                         TApplicationException.UNKNOWN_METHOD, "Unknown function %s" % (name))
                     oprot.writeMessageBegin(
                         name, TMessageType.EXCEPTION, seqid)
                     x.write(oprot)
                     oprot.writeMessageEnd()
                     oprot.trans.flush()
                     raise ThriftFuncNotFound
                 else:
                     self.wsgi._processMap[name](self.wsgi, seqid, iprot, oprot)
             except ThriftFuncNotFound, ex:
                 self.log.error("Unknown function %s" % (name))
                 self.log.access(
                     addr, name, "FUNC_NOT_FOUND", time.time() - request_start)
                 break
             except Timeout, ex:
                 self.log.error("A greenlet process timeout.")
                 self.log.access(
                     addr, name, "TIMEOUT", time.time() - request_start)
                 break
Ejemplo n.º 7
0
def read_thrift(file_obj, ttype):
    """Read a thrift structure from the given fo."""
    from thrift.transport.TTransport import TFileObjectTransport, TBufferedTransport
    starting_pos = file_obj.tell()

    # set up the protocol chain
    ft = TFileObjectTransport(file_obj)
    bufsize = 2 ** 16
    # for accelerated reading ensure that we wrap this so that the CReadable transport can be used.
    bt = TBufferedTransport(ft, bufsize)
    pin = TCompactProtocol(bt)

    # read out type
    obj = ttype()
    obj.read(pin)

    # The read will actually overshoot due to the buffering that thrift does. Seek backwards to the correct spot,.
    buffer_pos = bt.cstringio_buf.tell()
    ending_pos = file_obj.tell()
    blocks = ((ending_pos - starting_pos) // bufsize) - 1
    if blocks < 0:
        blocks = 0
    file_obj.seek(starting_pos + blocks * bufsize + buffer_pos)
    return obj
Ejemplo n.º 8
0
arg_parser.add_argument('--out_fn',
                        default='data/wiki_link_url_counts.pkl',
                        type=str)
args = arg_parser.parse_args()
import sys
sys.path.append(args.thrift_class_dir)
from edu.umass.cs.iesl.wikilink.expanded.data.constants import WikiLinkItem

pp = WikiLinkItem()

from collections import defaultdict
out_val = defaultdict(int)

for fn in xrange(1, 110):
    with open(args.thrift_data_dir + '/%03d' % fn) as f:
        p = TBinaryProtocol.TBinaryProtocolAccelerated(TFileObjectTransport(f))
        print fn, len(out_val)
        while True:
            try:
                pp.read(p)
            except EOFError:
                break
            for m in pp.mentions:
                c = m.context
                if c is not None:
                    url = m.wiki_url
                    out_val[url] += 1

import cPickle as pickle
with open(args.out_fn, 'wb') as out_f:
    pickle.dump(dict(out_val), out_f, -1)