Ejemplo n.º 1
0
def checkRead(o):
    prot = TBinaryProtocol.TBinaryProtocol(TTransport.TMemoryBuffer())
    o.write(prot)

    slow_version_binary = prot.trans.getvalue()

    prot = TBinaryProtocol.TBinaryProtocolAccelerated(
        TTransport.TMemoryBuffer(slow_version_binary))
    c = o.__class__()
    c.read(prot)
    if c != o:
        print("copy: ")
        pprint(eval(repr(c)))
        print("orig: ")
        pprint(eval(repr(o)))

    prot = TBinaryProtocol.TBinaryProtocolAccelerated(
        TTransport.TBufferedTransport(
            TTransport.TMemoryBuffer(slow_version_binary)))
    c = o.__class__()
    c.read(prot)
    if c != o:
        print("copy: ")
        pprint(eval(repr(c)))
        print("orig: ")
        pprint(eval(repr(o)))
Ejemplo n.º 2
0
 def _configure_scribe(self, host, port):
     self.socket = TSocket.TSocket(host=host, port=port)
     self.socket.setTimeout(1000)
     self.transport = TTransport.TFramedTransport(self.socket)
     self.protocol = TBinaryProtocol.TBinaryProtocolAccelerated(
         trans=self.transport, strictRead=False, strictWrite=False)
     self.client = scribe.Client(iprot=self.protocol, oprot=self.protocol)
Ejemplo n.º 3
0
 def __init__(self, data_status):
     self.block_names = [
         block_chain.block_ids[-1].split(':')
         for block_chain in data_status.data_blocks
     ]
     self.block_ids = [int(b[-1]) for b in self.block_names]
     self.transports = [
         TTransport.TFramedTransport(TSocket.TSocket(b[0], int(b[1])))
         for b in self.block_names
     ]
     self.protocols = [
         TBinaryProtocol.TBinaryProtocolAccelerated(transport)
         for transport in self.transports
     ]
     self.clients = [
         block_request_service.Client(protocol)
         for protocol in self.protocols
     ]
     for transport in self.transports:
         transport.open()
     self.notifications = Mailbox()
     self.controls = Mailbox()
     self.worker = SubscriptionWorker(self.protocols, self.notifications,
                                      self.controls)
     self.worker.start()
Ejemplo n.º 4
0
    def configure(cls,
                  ngram_table="ngrams",
                  subst_table="typogram",
                  wiki_anchors_table="wiki_anchors",
                  wiki_urls_table="wiki_urls",
                  wiki_edges_table="wiki_edges",
                  wiki_pagelinks_title_table="TL",
                  wiki_link_mention_cooccur="CC",
                  wiki_link_cooccur_table="LL",
                  hbase_host=None):
        cls.subst_table = subst_table
        cls.ngram_table = ngram_table
        cls.wiki_urls_table = wiki_urls_table
        cls.wiki_anchors_table = wiki_anchors_table
        cls.wiki_edges_table = wiki_edges_table
        cls.wiki_pagelinks_title_table = wiki_pagelinks_title_table
        cls.wiki_link_mention_cooccur_table = wiki_link_mention_cooccur
        cls.wiki_link_cooccur_table = wiki_link_cooccur_table

        # HBASE
        cls.h_transport = TTransport.TBufferedTransport(
            TSocket.TSocket(*hbase_host))
        protocol = TBinaryProtocol.TBinaryProtocolAccelerated(cls.h_transport)
        cls.h_client = Hbase.Client(protocol)
        cls.h_transport.open()
        cls.h_rate = 0
        cls.h_start = time.time()

        cls.substitution_counts = cls.get_freq(SUBSTITUTION_TOKEN)
        cls.substitutions = sorted(cls.substitution_counts.keys())
def get_mention_from_wikilink_thrift_file(fn):
    f = open(args.thrift_data_dir + '/%03d' % fn)
    out_val = defaultdict(list)
    p = TBinaryProtocol.TBinaryProtocolAccelerated(TFileObjectTransport(f))
    pp = WikiLinkItem()
    while True:
        try:
            pp.read(p)
        except EOFError:
            break
        for m in pp.mentions:
            c = m.context
            if c is not None:
                url = simplify_wiki_url(m.wiki_url)
                # Follow url redirect.
                try:
                    url = redirect[hash(url)]
                except KeyError:
                    pass
                if url in POOL:
                    # if c.left.startswith('the Musical August 30th, 2009 | Author: operator Shrek the Musical is a musical with music by Jeanine Tesori and a book and lyrics'):
                    #     print 1, fn
                    out_val[url].append([c.left, c.middle, c.right])
    print fn, len(out_val), sum(len(e) for e in out_val.itervalues())
    out_val.default_factory = None  # FINALIZE out_val
    return out_val
Ejemplo n.º 6
0
    def connect(self):
        """Creates a connection to an Impalad instance. Returns a tuple with the impala
    version string and the webserver address, otherwise raises an exception. If the client
    was already connected, closes the previous connection."""
        self.close_connection()

        sock, self.transport = self._get_socket_and_transport()
        if self.client_connect_timeout_ms > 0:
            sock.setTimeout(self.client_connect_timeout_ms)
        self.transport.open()
        if self.verbose:
            print_to_stderr('Opened TCP connection to %s:%s' %
                            (self.impalad_host, self.impalad_port))
        # Setting a timeout of None disables timeouts on sockets
        sock.setTimeout(None)
        protocol = TBinaryProtocol.TBinaryProtocolAccelerated(self.transport)
        self.imp_service = self._get_thrift_client(protocol)
        self.connected = True
        try:
            self._open_session()
            return self._ping_impala_service()
        except:
            # Ensure we are in a disconnected state if we failed above.
            self.close_connection()
            raise
Ejemplo n.º 7
0
    def _get_client_by_transport(self, options, transport, socket=None):
        # Create the protocol and client
        if options.json:
            protocol = TJSONProtocol.TJSONProtocol(transport)
        elif options.compact:
            protocol = TCompactProtocol.TCompactProtocol(transport)

        # No explicit option about protocol is specified. Try to infer.
        elif options.framed or options.unframed:
            protocol = TBinaryProtocol.TBinaryProtocolAccelerated(transport)

        elif socket is not None:
            # If json, compact, framed, and unframed are not specified,
            # THeaderProtocol is the default. Create a protocol using either
            # fuzzy or non-fuzzy transport depending on if options.fuzz is set.
            if options.fuzz is not None:
                transport = TFuzzyHeaderTransport(socket,
                                                  fuzz_fields=options.fuzz,
                                                  verbose=True)
            else:
                transport = THeaderTransport(socket)
            protocol = THeaderProtocol.THeaderProtocol(transport)
        else:
            self._exit(error_message=('No valid protocol '
                                      'specified for %s' % (type(self))),
                       status=os.EX_USAGE)

        transport.open()
        self._transport = transport

        client = self.service_class.Client(protocol)
        return client
Ejemplo n.º 8
0
    def __init__(self, keyspace, server, framed_transport=True, timeout=None,
                 credentials=None,
                 socket_factory=default_socket_factory,
                 transport_factory=default_transport_factory):
        self.keyspace = None
        self.server = server
        server = server.split(':')
        if len(server) <= 1:
            port = 9160
        else:
            port = server[1]
        host = server[0]
        socket = socket_factory(host, int(port))
        if timeout is not None:
            socket.setTimeout(timeout * 1000.0)
        self.transport = transport_factory(socket, host, port)
        protocol = TBinaryProtocol.TBinaryProtocolAccelerated(self.transport)
        Cassandra.Client.__init__(self, protocol)
        self.transport.open()

        self.set_keyspace(keyspace)

        if credentials is not None:
            request = AuthenticationRequest(credentials=credentials)
            self.login(request)
Ejemplo n.º 9
0
def get_thrift_client(host='127.0.0.1', port=9170):
    socket = TSocket.TSocket(host, port)
    transport = TTransport.TFramedTransport(socket)
    protocol = TBinaryProtocol.TBinaryProtocolAccelerated(transport)
    client = Cassandra.Client(protocol)
    client.transport = transport
    return client
Ejemplo n.º 10
0
    def __init__(self, node=None, host=None, port=None, ks_name='ks', cf_name='cf',
            cassandra_interface='11'):
        """
        initializes the connection.
         - node: a ccm node. If supplied, the host and port, and cassandra_interface
           will be pulled from the node.
         - host, port: overwritten if node is supplied
         - ks_name, cf_name: all operations are done on the supplied ks and cf
         - cassandra_interface: '07' and '11' are currently supported. This is the
           thrift interface to cassandra. '11' suffices for now except when creating
           keyspaces against cassandra0.7, in which case 07 must be used.
        """
        if node:
            host, port = node.network_interfaces['thrift']
        self.node = node
        self.host = host
        self.port = port
        self.cassandra_interface = cassandra_interface

        # import the correct version of the cassandra thrift interface
        # and set self.Cassandra as the imported module
        module_name = 'cassandra-thrift.v%s' % cassandra_interface
        imp = __import__(module_name, globals(), locals(), ['Cassandra'])
        self.Cassandra = imp.Cassandra

        socket = TSocket.TSocket(host, port)
        self.transport = TTransport.TFramedTransport(socket)
        protocol = TBinaryProtocol.TBinaryProtocolAccelerated(self.transport)
        self.client = self.Cassandra.Client(protocol)

        socket.open()
        self.open_socket = True

        self.ks_name = ks_name
        self.cf_name = cf_name
Ejemplo n.º 11
0
    def __init__(self,
                 keyspace,
                 server,
                 framed_transport=True,
                 timeout=None,
                 credentials=None,
                 api_version=None):
        self.keyspace = None
        self.server = server
        server = server.split(':')
        if len(server) <= 1:
            port = 9160
        else:
            port = server[1]
        host = server[0]
        socket = TSocket.TSocket(host, int(port))
        if timeout is not None:
            socket.setTimeout(timeout * 1000.0)
        if framed_transport:
            self.transport = TTransport.TFramedTransport(socket)
        else:
            self.transport = TTransport.TBufferedTransport(socket)
        protocol = TBinaryProtocol.TBinaryProtocolAccelerated(self.transport)
        Cassandra.Client.__init__(self, protocol)
        self.transport.open()

        self.set_keyspace(keyspace)

        if credentials is not None:
            request = AuthenticationRequest(credentials=credentials)
            self.login(request)
Ejemplo n.º 12
0
    def open(self, retry=5, timeout_ms=None, force=False):
        if not self.transport or force:
            timeout_ms = timeout_ms or self.timeout
            servers_count = len(self.servers)
            count = min(retry, servers_count)

            if count == 0:
                logger.warn('NO SERVERS  -->group: %s, %s:%s ' % (self.GROUP, self.host, self.port))

            retry_count = 1
            while retry_count <= count:
                try:
                    #TODO: investigate BufferedStream perf

                    # Use sequential retry
                    self.server = self.servers[retry_count - 1]
                    self.host = self.server['host']
                    self.port = self.server['port']

                    thrift_socket = TSocket.TSocket(self.host, int(self.port))
                    thrift_socket.setTimeout(timeout_ms)
                    self.transport = TTransport.TBufferedTransport(thrift_socket)
                    self.protocol = TBinaryProtocol.TBinaryProtocolAccelerated(self.transport)
                    self.client = self.client_cls(self.protocol)
                    self.transport.open()
                    logger.info("CONNECTED TO SUCCESS --> %s:%s." % (self.host, self.port))
                    return
                except Exception:
                    logger.exception('CONNECT TO  FAILED --> %s:%s retry: %d' % (self.host, self.port, retry_count))
                retry_count += 1
            raise ThriftException('FAILED TO CONNECT TO THRIFT SERVER --> servers:%s' % (self.servers))
Ejemplo n.º 13
0
    def deserialize_body(self, body):
        trans = TTransport.TMemoryBuffer(body)
        proto = TBinaryProtocol.TBinaryProtocolAccelerated(trans)

        result = self.deserialize_type()
        result.read(proto)
        return result
Ejemplo n.º 14
0
    def __init__(self, host, port, keyspace, user=None, password=None):
        """
        Params:
        * host .........: hostname of Cassandra node.
        * port .........: port number to connect to.
        * keyspace .....: keyspace to connect to.
        * user .........: username used in authentication (optional).
        * password .....: password used in authentication (optional).
        """
        self.host = host
        self.port = port
        self.keyspace = keyspace

        socket = TSocket.TSocket(host, port)
        self.transport = TTransport.TFramedTransport(socket)
        protocol = TBinaryProtocol.TBinaryProtocolAccelerated(self.transport)
        self.client = Cassandra.Client(protocol)

        socket.open()
        self.open_socket = True

        if user and password:
            credentials = {"username": user, "password": password}
            self.client.login(AuthenticationRequest(credentials=credentials))

        if keyspace:
            c = self.cursor()
            c.execute('USE %s;' % keyspace)
            c.close()
Ejemplo n.º 15
0
    def __init__(self, keyspace, server, framed_transport, timeout,
                 credentials, recycle):
        host, port = server.split(":")
        socket = TSocket.TSocket(host, int(port))
        if timeout is not None:
            socket.setTimeout(timeout * 1000.0)
        if framed_transport:
            transport = TTransport.TFramedTransport(socket)
        else:
            transport = TTransport.TBufferedTransport(socket)
        protocol = TBinaryProtocol.TBinaryProtocolAccelerated(transport)
        client = Cassandra.Client(protocol)
        transport.open()

        server_api_version = client.describe_version().split('.', 1)
        assert server_api_version[0] == API_VERSION[0], \
                "Thrift API version mismatch. " \
                 "(Client: %s, Server: %s)" % (API_VERSION[0], server_api_version[0])

        client.set_keyspace(keyspace)

        if credentials is not None:
            request = AuthenticationRequest(credentials=credentials)
            client.login(request)

        self.keyspace = keyspace
        self.client = client
        self.transport = transport

        if recycle:
            self.recycle = time.time() + recycle + random.uniform(
                0, recycle * 0.1)
        else:
            self.recycle = None
Ejemplo n.º 16
0
    def __init__(self, server, framed_transport, timeout, recycle):
        host, port = server.split(":")
        socket = TSocket.TSocket(host, int(port))
        if timeout is not None:
            socket.setTimeout(timeout * 1000.0)
        if framed_transport:
            transport = TTransport.TFramedTransport(socket)
        else:
            transport = TTransport.TBufferedTransport(socket)
        protocol = TBinaryProtocol.TBinaryProtocolAccelerated(transport)
        client = Rest.Client(protocol)
        transport.open()

        #        server_api_version = client.describe_version().split('.', 1)
        #        assert server_api_version[0] == API_VERSION[0], \
        #                "Thrift API version mismatch. " \
        #                 "(Client: %s, Server: %s)" % (API_VERSION[0], server_api_version[0])

        self.client = client
        self.transport = transport

        if recycle:
            self.recycle = time.time() + recycle + random.uniform(
                0, recycle * 0.1)
        else:
            self.recycle = None
Ejemplo n.º 17
0
 def __init__(self,
              uri=None,           # type: Optional[str]
              user=None,          # type: Optional[str]
              password=None,      # type: Optional[str]
              host=None,          # type: Optional[str]
              port=9091,          # type: Optional[int]
              dbname=None,        # type: Optional[str]
              protocol='binary',  # type: Optional[str]
              ):
     # type: (...) -> None
     if uri is not None:
         if not all([user is None,
                     password is None,
                     host is None,
                     port == 9091,
                     dbname is None,
                     protocol == 'binary']):
             raise TypeError("Cannot specify both URI and other arguments")
         user, password, host, port, dbname, protocol = _parse_uri(uri)
     if host is None:
         raise TypeError("`host` parameter is required.")
     if protocol in ("http", "https"):
         if not host.startswith(protocol):
             # the THttpClient expects http[s]://localhost
             host = protocol + '://' + host
         transport = THttpClient.THttpClient("{}:{}".format(host, port))
         proto = TJSONProtocol.TJSONProtocol(transport)
         socket = None
     elif protocol == "binary":
         socket = TSocket.TSocket(host, port)
         transport = TTransport.TBufferedTransport(socket)
         proto = TBinaryProtocol.TBinaryProtocolAccelerated(transport)
     else:
         raise ValueError("`protocol` should be one of",
                          " ['http', 'https', 'binary'],",
                          " got {} instead".format(protocol))
     self._user = user
     self._password = password
     self._host = host
     self._port = port
     self._dbname = dbname
     self._transport = transport
     self._protocol = protocol
     self._socket = socket
     self._closed = 0
     self._tdf = None
     try:
         self._transport.open()
     except TTransportException as e:
         if e.NOT_OPEN:
             err = OperationalError("Could not connect to database")
             six.raise_from(err, e)
         else:
             raise
     self._client = Client(proto)
     try:
         self._session = self._client.connect(user, password, dbname)
     except TMapDException as e:
         six.raise_from(_translate_exception(e), e)
Ejemplo n.º 18
0
 def thrift_client(self):
     socket = TSocket.TSocket(self.ip, self.port)
     transport = TTransport.TBufferedTransport(socket)
     protocol = TBinaryProtocol.TBinaryProtocolAccelerated(transport)
     client = MapService.Client(protocol)
     transport.open()
     yield client
     transport.close()
Ejemplo n.º 19
0
def doTest():
    checkWrite(hm)
    no_set = deepcopy(hm)
    no_set.contain = set()
    checkRead(no_set)
    checkWrite(rs)
    checkRead(rs)
    checkWrite(rshuge)
    checkRead(rshuge)
    checkWrite(my_zero)
    checkRead(my_zero)
    checkRead(Backwards({"first_tag2": 4, "second_tag1": 2}))
    try:
        checkWrite(my_nega)
        print "Hey, did this get fixed?"
    except AttributeError:
        # Sorry, doesn't work with negative tags.
        pass

    # One case where the serialized form changes, but only superficially.
    o = Backwards({"first_tag2": 4, "second_tag1": 2})
    trans_fast = TTransport.TMemoryBuffer()
    trans_slow = TTransport.TMemoryBuffer()
    prot_fast = TBinaryProtocol.TBinaryProtocolAccelerated(trans_fast)
    prot_slow = TBinaryProtocol.TBinaryProtocol(trans_slow)

    o.write(prot_fast)
    o.write(prot_slow)
    ORIG = trans_slow.getvalue()
    MINE = trans_fast.getvalue()
    if ORIG == MINE:
        print "That shouldn't happen."

    prot = TBinaryProtocol.TBinaryProtocolAccelerated(
        TTransport.TMemoryBuffer())
    o.write(prot)
    prot = TBinaryProtocol.TBinaryProtocol(
        TTransport.TMemoryBuffer(prot.trans.getvalue()))
    c = o.__class__()
    c.read(prot)
    if c != o:
        print "copy: "
        pprint(eval(repr(c)))
        print "orig: "
        pprint(eval(repr(o)))
Ejemplo n.º 20
0
def getLocalConnection(port=None):
    if port is None:
        port = conf.NETWORK_PORT
    socket = TSocket.TSocket("localhost", port)
    transport = TTransport.TFramedTransport(socket)
    transport.open()
    protocol = TBinaryProtocol.TBinaryProtocolAccelerated(transport)
    service = RndNodeApi.Client(protocol)
    return (service, transport)
Ejemplo n.º 21
0
 def __init__(self, socket, client_class):
     self.socket = None
     self.mutex = BoundedSemaphore(1)
     self.socket = TSocket.TSocket(unix_socket=socket)
     self.transport = TTransport.TBufferedTransport(self.socket)
     self.protocol = TBinaryProtocol.TBinaryProtocolAccelerated(
         self.transport)
     self.client = client_class(self.protocol)
     self.connected = False
Ejemplo n.º 22
0
def connect(server='localhost', port=9090, timeout=None):
    socket = TSocket(server, int(port))
    if timeout is not None:
        socket.setTimeout(timeout)
    transport = TBufferedTransport(socket)
    transport.open()
    protocol = TBinaryProtocol.TBinaryProtocolAccelerated(transport)
    client = Hbase.Client(protocol)
    return client
Ejemplo n.º 23
0
def serialize(msg):
    msg.validate()
    transport = TTransport.TMemoryBuffer()
    protocol = TBinaryProtocol.TBinaryProtocolAccelerated(transport)
    msg.write(protocol)

    data = transport.getvalue()
    transport.close()
    return data
Ejemplo n.º 24
0
def connect():
    transport = TTransport.TBufferedTransport(TSocket.TSocket('127.0.0.1', 9090))
    protocol = TBinaryProtocol.TBinaryProtocolAccelerated(transport)
    client = THBaseService.Client(protocol)
    transport.open()
    try:
        yield client
    finally:
        transport.close()
Ejemplo n.º 25
0
def deserialize(msg, data):
    transport = TTransport.TMemoryBuffer(data)
    protocol = TBinaryProtocol.TBinaryProtocolAccelerated(transport)
    msg.read(protocol)
    msg.validate()
    remaining = data[transport.cstringio_buf.tell():]
    transport.close()

    return msg, remaining
Ejemplo n.º 26
0
 def _create_thrift_connection(self):
     socket = TSocket.TSocket(self.host, self.port)
     if self.network_timeout > 0:
         socket.setTimeout(self.network_timeout)
     transport = TTransport.TBufferedTransport(socket)
     protocol = TBinaryProtocol.TBinaryProtocolAccelerated(transport)
     connection = self.iface_cls(protocol)
     transport.open()
     return connection
Ejemplo n.º 27
0
    def _refresh_thrift_client(self):
        """Refresh the Thrift socket, transport, and client."""
        socket = TSocket(self.host, self.port)
        if self.timeout is not None:
            socket.setTimeout(self.timeout)

        self.transport = self._transport_class(socket)
        protocol = TBinaryProtocol.TBinaryProtocolAccelerated(self.transport)
        self.client = Hbase.Client(protocol)
Ejemplo n.º 28
0
 def _getClient(self):
     # We can't just keep a connection because the app might fork
     # and we'll be left with the parent process's connection.
     # More robust to just open one for each flush.
     sock = TSocket.TSocket(host=self._remote_host, port=self._remote_port)
     sock.setTimeout(self._socket_timeout * 1000)
     transport = TTransport.TFramedTransport(sock)
     transport.open()
     protocol = TBinaryProtocol.TBinaryProtocolAccelerated(transport)
     return scribe.Client(protocol)
Ejemplo n.º 29
0
def get_client(host='127.0.0.1', port=9160):
    socket = TSocket.TSocket(host, port)
    if options.unframed:
        transport = TTransport.TBufferedTransport(socket)
    else:
        transport = TTransport.TFramedTransport(socket)
    protocol = TBinaryProtocol.TBinaryProtocolAccelerated(transport)
    client = Cassandra.Client(protocol)
    client.transport = transport
    return client
Ejemplo n.º 30
0
def doTest():
    checkWrite(hm)
    no_set = deepcopy(hm)
    no_set.contain = set()
    checkRead(no_set)
    checkRead(reserved)
    checkWrite(reserved)
    checkWrite(rs)
    checkRead(rs)
    checkWrite(rshuge)
    checkRead(rshuge)
    checkWrite(my_zero)
    checkRead(my_zero)
    checkRead(Backwards(first_tag2=4, second_tag1=2))

    # One case where the serialized form changes, but only superficially.
    o = Backwards(first_tag2=4, second_tag1=2)
    trans_fast = TTransport.TMemoryBuffer()
    trans_slow = TTransport.TMemoryBuffer()
    prot_fast = TBinaryProtocol.TBinaryProtocolAccelerated(trans_fast)
    prot_slow = TBinaryProtocol.TBinaryProtocol(trans_slow)

    o.write(prot_fast)
    o.write(prot_slow)
    ORIG = trans_slow.getvalue()
    MINE = trans_fast.getvalue()
    if ORIG == MINE:
        print("That shouldn't happen.")


    prot = TBinaryProtocol.TBinaryProtocolAccelerated(
            TTransport.TMemoryBuffer())
    o.write(prot)
    prot = TBinaryProtocol.TBinaryProtocol(TTransport.TMemoryBuffer(
            prot.trans.getvalue()))
    c = o.__class__()
    c.read(prot)
    if c != o:
        print("copy: ")
        pprint(eval(repr(c)))
        print("orig: ")
        pprint(eval(repr(o)))