Ejemplo n.º 1
0
def main():
    print(sys.argv[0])
    print(sys.argv[1:])

    # Connect to KVStore Thrift server
    transport = TSocket.TSocket(sys.argv[1], int(sys.argv[2]))
    transport = TTransport.TBufferedTransport(transport)
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    client = KVStore.Client(protocol)
    transport.open()

    # Main program loop (CTRL-C to exit)
    while (True):

        # Choose get (1) or put (2)
        inputstr = input('Enter 1 for get(), 2 for put()\n')
        try:
            func = int(inputstr)
        except ValueError:
            print("> Error:", "'" + inputstr + "'", "is not a number")
            continue
        if func < 1 or func > 2:
            print('> Error: you entered', inputstr + '.\n',
                  'Enter 1 for get(), 2 for put()')
            continue

        # Indicate consistenty level ONE (1) or QUORUM (2)
        inputstr = input('Enter 1 for consistently level ONE, 2 for QUORUM\n')
        try:
            clevel = int(inputstr)
        except ValueError:
            print("> Error:", "'" + inputstr + "'", "is not a number")
            continue
        if clevel < 1 or clevel > 2:
            print('> Error: you entered', inputstr + '.\n',
                  'Enter 1 for get(), 2 for put()')
            continue

        # get()
        if func == 1:
            inputstr = input("Enter a number in range [0, 255]: ")
            try:
                key = int(inputstr)
            except ValueError:
                print("> Error:", "'" + inputstr + "'", "is not a number")
                continue
            if (key < 0 or key > 255):
                print("> Error:", key, "is not between 0 and 255")
            else:
                getret = client.get(key, clevel - 1)
                val = getret.val
                ret = getret.ret
                if ret:
                    print(ret)
                    print("\nValue =", val)
                else:
                    print("\nValue for key", key, "not found")

                print("\nPress CTRL-C to exit, or...")

        # put()
        else:
            inputstr = input("Enter a number in range [0, 255]: ")
            try:
                key = int(inputstr)
            except ValueError:
                print("> Error:", "'" + inputstr + "'", "is not a number")
                continue

            if (key < 0 or key > 255):
                print("> Error:", key, "is not between 0 and 255")
            else:
                val = input("Enter a string of characters: ")
                client.put(KVPair(key, val), clevel - 1)
                print("\nSuccess!\n")
                print("Press CTRL-C to exit, or...")

    transport.close()

####
# Get auth token, connect to NoteStore and UserStore
####

authToken = ""  # bypass the dev token prompt by populating this variable.

if not authToken:
    authToken = getNonEmptyUserInput("Enter your dev token: ")

evernoteHost = "sandbox.evernote.com"
userStoreUri = "https://" + evernoteHost + "/edam/user"

userStoreHttpClient = THttpClient.THttpClient(userStoreUri)
userStoreProtocol = TBinaryProtocol.TBinaryProtocol(userStoreHttpClient)
userStore = UserStore.Client(userStoreProtocol)

try:
    noteStoreUrl = userStore.getNoteStoreUrl(authToken)
except Errors.EDAMUserException, ue:
    print "Error: your dev token is probably wrong; double-check it."
    print ue
    raise SystemExit

noteStoreHttpClient = THttpClient.THttpClient(noteStoreUrl)
noteStoreProtocol = TBinaryProtocol.TBinaryProtocol(noteStoreHttpClient)
noteStore = NoteStore.Client(noteStoreProtocol)

####
# The Main Event
Ejemplo n.º 3
0
def main(cfg, reqhandle, resphandle):
    if cfg.unix:
        if cfg.addr == "":
            sys.exit("invalid unix domain socket: {}".format(cfg.addr))
        socket = TSocket.TSocket(unix_socket=cfg.addr)
    else:
        try:
            (host, port) = cfg.addr.rsplit(":", 1)
            if host == "":
                host = "localhost"
            socket = TSocket.TSocket(host=host, port=int(port))
        except ValueError:
            sys.exit("invalid address: {}".format(cfg.addr))

    transport = TRecordingTransport(socket, reqhandle, resphandle)

    if cfg.transport == "framed":
        transport = TTransport.TFramedTransport(transport)
    elif cfg.transport == "unframed":
        transport = TTransport.TBufferedTransport(transport)
    elif cfg.transport == "header":
        transport = THeaderTransport.THeaderTransport(
            transport,
            client_type=THeaderTransport.CLIENT_TYPE.HEADER,
        )

        if cfg.headers is not None:
            pairs = cfg.headers.split(",")
            for p in pairs:
                key, value = p.split("=")
                transport.set_header(key, value)

        if cfg.protocol == "binary":
            transport.set_protocol_id(THeaderTransport.T_BINARY_PROTOCOL)
        elif cfg.protocol == "compact":
            transport.set_protocol_id(THeaderTransport.T_COMPACT_PROTOCOL)
        else:
            sys.exit("header transport cannot be used with protocol {0}".format(cfg.protocol))
    else:
        sys.exit("unknown transport {0}".format(cfg.transport))

    transport.open()

    if cfg.protocol == "binary":
        protocol = TBinaryProtocol.TBinaryProtocol(transport)
    elif cfg.protocol == "compact":
        protocol = TCompactProtocol.TCompactProtocol(transport)
    elif cfg.protocol == "json":
        protocol = TJSONProtocol.TJSONProtocol(transport)
    else:
        sys.exit("unknown protocol {0}".format(cfg.protocol))

    if cfg.service is not None:
        protocol = TMultiplexedProtocol.TMultiplexedProtocol(protocol, cfg.service)

    client = Example.Client(protocol)

    try:
        if cfg.method == "ping":
            client.ping()
            print("client: pinged")
        elif cfg.method == "poke":
            client.poke()
            print("client: poked")
        elif cfg.method == "add":
            if len(cfg.params) != 2:
                sys.exit("add takes 2 arguments, got: {0}".format(cfg.params))

            a = int(cfg.params[0])
            b = int(cfg.params[1])
            v = client.add(a, b)
            print("client: added {0} + {1} = {2}".format(a, b, v))
        elif cfg.method == "execute":
            param = Param(
                return_fields=cfg.params,
                the_works=TheWorks(
                    field_1=True,
                    field_2=0x7f,
                    field_3=0x7fff,
                    field_4=0x7fffffff,
                    field_5=0x7fffffffffffffff,
                    field_6=-1.5,
                    field_7=u"string is UTF-8: \U0001f60e",
                    field_8=b"binary is bytes: \x80\x7f\x00\x01",
                    field_9={
                        1: "one",
                        2: "two",
                        3: "three"
                    },
                    field_10=[1, 2, 4, 8],
                    field_11=set(["a", "b", "c"]),
                    field_12=False,
                ))

            try:
                result = client.execute(param)
                print("client: executed {0}: {1}".format(param, result))
            except AppException as e:
                print("client: execute failed with IDL Exception: {0}".format(e.why))
        else:
            sys.exit("unknown method {0}".format(cfg.method))
    except Thrift.TApplicationException as e:
        print("client exception: {0}: {1}".format(e.type, e.message))

    if cfg.request is None:
        req = "".join(["%02X " % ord(x) for x in reqhandle.getvalue()]).strip()
        print("request: {}".format(req))
    if cfg.response is None:
        resp = "".join(["%02X " % ord(x) for x in resphandle.getvalue()]).strip()
        print("response: {}".format(resp))

    transport.close()
Ejemplo n.º 4
0
 def __init__(self, host='10.237.14.236', port=9090):
     transport = TBufferedTransport(TSocket(host, port))
     transport.open()
     protocol = TBinaryProtocol.TBinaryProtocol(transport)
     self.client = Hbase.Client(protocol)
Ejemplo n.º 5
0
def main():
    start_time = time.time()

    parser = argparse.ArgumentParser()
    parser.add_argument('--num-workers', type=int, default=1)
    parser.add_argument('--rank', type=int, default=0)
    parser.add_argument('--host', type=str, default=constants.HOST)
    parser.add_argument('--port', type=int, default=constants.PORT)
    parser.add_argument('--size', type=int, default=100)
    args = parser.parse_args()
    print(args)

    print("host = {}".format(args.host))
    print("port = {}".format(args.port))

    # Set thrift connection
    # Make socket
    transport = TSocket.TSocket(args.host, args.port)
    # Buffering is critical. Raw sockets are very slow
    transport = TTransport.TBufferedTransport(transport)
    # Wrap in a protocol
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    # Create a client to use the protocol encoder
    t_client = ParameterServer.Client(protocol)
    # Connect!
    transport.open()

    # test thrift connection
    ps_client.ping(t_client)
    print("create and ping thrift server >>> HOST = {}, PORT = {}".format(args.host, args.port))

    # register model
    ps_client.register_model(t_client, args.rank, MODEL_NAME, args.size, args.num_workers)
    ps_client.exist_model(t_client, MODEL_NAME)
    print("register and check model >>> name = {}, length = {}".format(MODEL_NAME, args.size))

    # Training the Model
    train_start = time.time()
    iter_counter = 0
    for epoch in range(NUM_EPOCHS):
        epoch_start = time.time()
        for batch_index in range(NUM_BATCHES):
            print("------worker {} epoch {} batch {}------"
                  .format(args.rank, epoch, batch_index))
            batch_start = time.time()

            loss = 0.0

            # pull latest model
            ps_client.can_pull(t_client, MODEL_NAME, iter_counter, args.rank)
            pull_start = time.time()
            latest_model = ps_client.pull_model(t_client, MODEL_NAME, iter_counter, args.rank)
            pull_time = time.time() - pull_start

            cal_start = time.time()
            w_b_grad = np.random.rand(1, args.size).astype(np.double).flatten()
            cal_time = time.time() - cal_start

            # push gradient to PS
            ps_client.can_push(t_client, MODEL_NAME, iter_counter, args.rank)
            push_start = time.time()
            ps_client.push_grad(t_client, MODEL_NAME, w_b_grad, LEARNING_RATE, iter_counter, args.rank)
            push_time = time.time() - push_start
            ps_client.can_pull(t_client, MODEL_NAME, iter_counter + 1, args.rank)  # sync all workers

            print('Epoch: [%d/%d], Step: [%d/%d] >>> Time: %.4f, Loss: %.4f, epoch cost %.4f, '
                  'batch cost %.4f s: cal cost %.4f s, pull model cost %.4f s, push update cost %.4f s'
                  % (epoch + 1, NUM_EPOCHS, batch_index, NUM_BATCHES,
                     time.time() - train_start, loss, time.time() - epoch_start,
                     time.time() - batch_start, cal_time, pull_time, push_time))
            iter_counter += 1

    end_time = time.time()
    print("Elapsed time = {} s".format(end_time - start_time))
Ejemplo n.º 6
0
def filter_annotated_docs(annotation_path,
                          thrift_dir,
                          out_dir,
                          date_hour,
                          gpg_private=None,
                          gpg_public=None,
                          gpg_dir='gnupg-dir'):
    '''
    reads in the compressed (and possibly encrypted) thrift of
    thrift_dir and generates a duplicate that is identical except for
    only docs with annotation are passed through.

    The resulting data is re-compressed.  If gpg_public is provided,
    then it is also re-encrypted.

    The new files are stored in out_dir/<date_hour>/ directories
    
    The stats.json files are ignored.
    '''
    annotation = get_annotation(annotation_path)

    ## prepare to write files an a temp version of out_dir.  We will
    ## do an atomic rename of this dir after it is finished.
    out_dir = os.path.join(out_dir, date_hour)
    tmp_out_dir = out_dir + '.partial'

    if not os.path.exists(tmp_out_dir):
        os.makedirs(tmp_out_dir)

    ## loop over all files from input dir
    num_files = 0
    for i_fname in os.listdir(os.path.join(thrift_dir, date_hour)):
        ## ignore other files, e.g. stats.json
        if not (i_fname.endswith('.xz.gpg') or i_fname.endswith('.xz')):
            continue

        ## get subcorpus name and original_md5 for use in new output
        ## file names
        subcorpus, o_content_md5, _xz, _gpg = i_fname.split('.')
        assert subcorpus in ['news', 'linking', 'social'], subcorpus

        ## construct input file path
        i_fpath = os.path.join(thrift_dir, date_hour, i_fname)

        ## load the encrypted data
        i_encrypted_data = open(i_fpath).read()

        assert len(i_encrypted_data) > 0, 'failed to load: %s' % fpath

        ## decrypt and uncompress using subprocess tools above
        i_thrift_data = decrypt_and_uncompress(i_encrypted_data, gpg_private,
                                               gpg_dir)

        ## compare md5 hashes:
        i_content_md5 = hashlib.md5(i_thrift_data).hexdigest()
        assert i_content_md5 == i_fname.split('.')[1], \
            '%r != %r' % (i_content_md5, o_content_md5)

        ## Make output file obj for thrift, wrap in protocol
        o_transport = StringIO()
        o_protocol = TBinaryProtocol.TBinaryProtocol(o_transport)

        ## iterate over input stream items
        num_annotated = 0
        for stream_item in stream_items(i_thrift_data):

            ## only keep those docs that have annotation
            if not stream_item.stream_id in annotation:
                continue
            else:
                log('%s has annotation for %s' %
                    (stream_item.stream_id, ', '.join(
                        annotation[stream_item.stream_id].keys())))

            ## Every stream_item has a source_metadata JSON string,
            ## which we can load and extend to include the annotation:
            source_metadata = json.loads(stream_item.source_metadata)
            source_metadata['annotation'] = annotation[stream_item.stream_id]

            ## We can just replace the source_metadata string, and
            ## thrift will serialize it into output o_protocol
            stream_item.source_metadata = json.dumps(source_metadata)

            ## write modified stream_item object to new output file
            stream_item.write(o_protocol)

            num_annotated += 1

        if num_annotated == 0:
            ## do not save an empty file
            continue

        ## prepare to write out the new file
        o_transport.seek(0)
        o_thrift_data = o_transport.getvalue()

        ## compute md5 of uncompressed data
        o_content_md5 = hashlib.md5(o_thrift_data).hexdigest()

        ## construct output filename
        o_fname = '%s.%s.%s.xz' % (subcorpus, o_content_md5, i_content_md5)

        ## put gpg extension only if we are encrypting output
        if gpg_public is not None:
            o_fname += '.gpg'

        # output file
        o_fpath = os.path.join(tmp_out_dir, o_fname)

        ## temporary output file called .partial, which will be
        ## atomically renamed upon completion.  This provides
        ## robustness against crashes or restarts in condor.
        tmp_out_fpath = o_fpath + '.partial'

        ## compress and encrypt the data
        o_encrypted_data = compress_and_encrypt(o_thrift_data, gpg_public,
                                                gpg_dir)

        ## write it to the tmp file
        fh = open(tmp_out_fpath, 'wb')
        fh.write(o_encrypted_data)
        fh.close()

        ## atomic move of fully written file
        os.rename(tmp_out_fpath, o_fpath)

        ## loop to next input thrift file
        num_files += 1

        ## free memory
        o_encrypted_data = None
        o_thrift_data = None

    ## atomic move of tmp_out_dir to out_dir
    log('renaming %s --> %s' % (tmp_out_dir, out_dir))
    os.rename(tmp_out_dir, out_dir)
    log('Done!  created %d files' % num_files)
Ejemplo n.º 7
0
  def get(self, epoch, id):
    time = datetime.datetime.utcfromtimestamp(float(epoch))
    date = '%d-%.2d-%.2d-%.2d' %(time.year, time.month, time.day, time.hour)

    if 2011 == time.year:
      corpus_dir = './corpus/training'
    else:
      corpus_dir = './corpus/testing'
    date_dir = os.path.join(corpus_dir, date)

    target_id = '%s-%s' %(epoch, id)

    if not os.path.isdir(date_dir):
      msg = 'directory %s can not be opened' %date_dir
      #raise tornado.web.HTTPError(404, log_message=msg)
      self.set_status(404)
      self.render("error.html", msg=msg)
      return

    doc = Doc()
    doc['title'] = 'Null'
    doc['body'] = 'Null'
    doc['anchor'] = 'Null'
    doc['date'] = date
    doc['file'] = 'Null'
    doc['time'] = datetime.datetime.utcfromtimestamp(float(epoch)).ctime()
    doc['id'] = target_id
    #self.write('searching')
    #self.flush()

    for fname in os.listdir(date_dir):
      ## ignore other files
      if fname.endswith('.gpg'): continue
      if fname.endswith('.xz'): continue

      fpath = os.path.join(date_dir, fname)
      thrift_data = open(fpath).read()

      if not len(thrift_data) > 0:
        msg = 'failed to load: %s' % fpath
        #raise tornado.web.HTTPError(404, log_message=msg)
        self.render("error.html", msg=msg)
        return

      ## wrap it in a file obj, thrift transport, and thrift protocol
      transport = StringIO(thrift_data)
      transport.seek(0)
      transport = TTransport.TBufferedTransport(transport)
      protocol = TBinaryProtocol.TBinaryProtocol(transport)

      found = False

      ## iterate over all thrift items
      while 1:
        stream_item = StreamItem()
        try:
          stream_item.read(protocol)
          if stream_item.stream_id == target_id:
            found = True
            doc['title'] = stream_item.title.cleansed
            doc['body'] = stream_item.body.cleansed
            doc['anchor'] = stream_item.anchor.cleansed
            doc['file'] = fname
            break
        except EOFError:
          break

      if found: break

    self.render("doc.html", title=target_id, doc=doc)
Ejemplo n.º 8
0
def __get_client():
    tsocket = TSocket.TSocket(__HOST, __PORT)
    transport = TTransport.TBufferedTransport(tsocket)
    transport.open()
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    return Client(protocol)
Ejemplo n.º 9
0
import sys
import glob
sys.path.append('gen-py')
from CalculadoraRemota import CalculadoraRemotaServicio

from thrift.transport import TSocket
from thrift.transport import TTransport
from thrift.protocol import TBinaryProtocol

transporte = TSocket.TSocket('localhost', 8080)
transporte = TTransport.TBufferedTransport(transporte)
protocolo = TBinaryProtocol.TBinaryProtocol(transporte)

cliente = CalculadoraRemotaServicio.Client(protocolo)

transporte.open()


def Menu():
    print("------------")


print("Calculadora")
print("------------")
print("Menu")
print("1) Adición")
print("2) Substracción")
print("3) Multiplicacion")
print("4) Division")
print("5) Salir")
Ejemplo n.º 10
0
 def connect(self):
     self.socket = TSocket.TSocket(self.host, self.port)
     self.transport = TTransport.TFramedTransport(self.socket)
     self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
     self.client = ValkyrieDbService.Client(self.protocol)
     self.transport.open()
Ejemplo n.º 11
0
import sys
sys.path.append('gen-py')

from alerta import *
from alerta.ttypes import *

from thrift import Thrift
from thrift.transport import TSocket, TTransport
from thrift.protocol import TBinaryProtocol

try:
    # transport = TSocket.TSocket('localhost', 9090)
    # transport = TTransport.TBufferedTransport(transport)
    transport = TTransport.TMemoryBuffer()
    protocol = TBinaryProtocol.TBinaryProtocol(transport)

    alert = Alert()
    alert.resource = 'foo'
    alert.event = 'bar'

    print alert

    alert.write(protocol)
    bytes = transport.getvalue()

    print repr(bytes)

    thrift_in = TTransport.TMemoryBuffer(bytes)
    protocol_in = TBinaryProtocol.TBinaryProtocol(thrift_in)
    receivedAlert = Alert()
Ejemplo n.º 12
0
    def connect(self, host=None, port=None, uri=None):
        """
        Connect method should be called before any operations.
        Server will be connected after connect return OK

        :type  host: str
        :type  port: str
        :type  uri: str
        :param host: (Optional) host of the server, default host is 127.0.0.1
        :param port: (Optional) port of the server, default port is 9090
        :param uri: (Optional) only support tcp proto now, default uri is

                `tcp://127.0.0.1:9090`

        :return: Status, indicate if connect is successful
        :rtype: Status
        """
        if self.status and self.status == Status.SUCCESS:
            raise RepeatingConnectError("You have already connected!")

        config_uri = urlparse(config.THRIFTCLIENT_TRANSPORT)

        _uri = urlparse(uri) if uri else config_uri

        if not host:
            if _uri.scheme == 'tcp':
                host = _uri.hostname
                port = _uri.port or 9090
            else:
                if uri:
                    raise RuntimeError(
                        'Invalid parameter uri: {}'.format(uri)
                    )
                raise RuntimeError(
                    'Invalid configuration for THRIFTCLIENT_TRANSPORT: {transport}'.format(
                        transport=config.THRIFTCLIENT_TRANSPORT)
                )
        else:
            host = host
            port = port or 9090

        self._transport = TSocket.TSocket(host, port)

        if config.THRIFTCLIENT_BUFFERED:
            self._transport = TTransport.TBufferedTransport(self._transport)
        if config.THRIFTCLIENT_ZLIB:
            self._transport = TZlibTransport.TZlibTransport(self._transport)
        if config.THRIFTCLIENT_FRAMED:
            self._transport = TTransport.TFramedTransport(self._transport)

        if config.THRIFTCLIENT_PROTOCOL == Protocol.BINARY:
            protocol = TBinaryProtocol.TBinaryProtocol(self._transport)

        elif config.THRIFTCLIENT_PROTOCOL == Protocol.COMPACT:
            protocol = TCompactProtocol.TCompactProtocol(self._transport)

        elif config.THRIFTCLIENT_PROTOCOL == Protocol.JSON:
            protocol = TJSONProtocol.TJSONProtocol(self._transport)

        else:
            raise RuntimeError(
                "invalid configuration for THRIFTCLIENT_PROTOCOL: {protocol}"
                    .format(protocol=config.THRIFTCLIENT_PROTOCOL)
            )

        self._client = MilvusService.Client(protocol)

        try:
            self._transport.open()
            self.status = Status(Status.SUCCESS, 'Connected')
            LOGGER.info('Connected to {}:{}'.format(host, port))

        except TTransport.TTransportException as e:
            self.status = Status(code=e.type, message=e.message)
            LOGGER.error(e)
            raise e
        return self.status
Ejemplo n.º 13
0
 def __init__(self):
     transport = TSocket.TSocket(w2v_config.IP, w2v_config.PORT)
     self.transport = TTransport.TBufferedTransport(transport)
     protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
     self.client = word2vecServer.Client(protocol)
     self.transport.open()
Ejemplo n.º 14
0
def moving_average_dfe(size, data_in):
    """Simple DFE implementation."""
    try:
        start_time = time.time()

        # Make socket
        socket = TSocket.TSocket('localhost', 9090)

        # Buffering is critical. Raw sockets are very slow
        transport = TTransport.TBufferedTransport(socket)

        # Wrap in a protocol
        protocol = TBinaryProtocol.TBinaryProtocol(transport)

        # Create a client to use the protocol encoder
        client = MovingAverageService.Client(protocol)

        print('Creating a client:\t\t\t\t%.5lfs' % (time.time() - start_time))

        # Connect!
        start_time = time.time()
        transport.open()
        print('Opening connection:\t\t\t\t%.5lfs' % (time.time() - start_time))

        # Initialize maxfile
        start_time = time.time()
        max_file = client.MovingAverage_init()
        print('Initializing maxfile:\t\t\t\t%.5lfs' %
              (time.time() - start_time))

        # Load DFE
        start_time = time.time()
        max_engine = client.max_load(max_file, '*')
        print('Loading DFE:\t\t\t\t\t%.5lfs' % (time.time() - start_time))

        # Allocate and send input streams to server
        start_time = time.time()
        address_data_in = client.malloc_float(size)
        client.send_data_float(address_data_in, data_in)
        print('Sending input data:\t\t\t\t%.5lfs' % (time.time() - start_time))

        # Allocate memory for output stream on server
        start_time = time.time()
        address_data_out = client.malloc_float(size)
        print('Allocating memory for output stream on server:\t%.5lfs' %
              (time.time() - start_time))

        # Action default
        start_time = time.time()
        actions = MovingAverage_actions_t_struct(size, address_data_in,
                                                 address_data_out)
        address_actions = client.send_MovingAverage_actions_t(actions)
        client.MovingAverage_run(max_engine, address_actions)
        print('Moving average time:\t\t\t\t%.5lfs' %
              (time.time() - start_time))

        # Unload DFE
        start_time = time.time()
        client.max_unload(max_engine)
        print('Unloading DFE:\t\t\t\t\t%.5lfs' % (time.time() - start_time))

        # Get output stream from server
        start_time = time.time()
        data_out = client.receive_data_float(address_data_out, size)
        print('Getting output stream:\t(size = %d bit)\t%.5lfs' %
              ((size * 32), (time.time() - start_time)))

        # Free allocated memory for streams on server
        start_time = time.time()
        client.free(address_data_in)
        client.free(address_data_out)
        client.free(address_actions)
        print('Freeing allocated memory for streams on server:\t%.5lfs' %
              (time.time() - start_time))

        # Free allocated maxfile data
        start_time = time.time()
        client.MovingAverage_free()
        print('Freeing allocated maxfile data:\t\t\t%.5lfs' %
              (time.time() - start_time))

        # Close!
        start_time = time.time()
        transport.close()
        print('Closing connection:\t\t\t\t%.5lfs' % (time.time() - start_time))

    except Thrift.TException, thrift_exceptiion:
        print '%s' % (thrift_exceptiion.message)
        sys.exit(-1)
Ejemplo n.º 15
0
def handler(event, context):
    start_time = time.time()
    bucket = event['bucket_name']
    worker_index = event['rank']
    num_workers = event['num_workers']
    key = event['file']

    print('bucket = {}'.format(bucket))
    print('number of workers = {}'.format(num_workers))
    print('worker index = {}'.format(worker_index))
    print("file = {}".format(key))

    # Set thrift connection
    # Make socket
    transport = TSocket.TSocket(constants.HOST, constants.PORT)
    # Buffering is critical. Raw sockets are very slow
    transport = TTransport.TBufferedTransport(transport)
    # Wrap in a protocol
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    # Create a client to use the protocol encoder
    t_client = ParameterServer.Client(protocol)
    # Connect!
    transport.open()

    # test thrift connection
    ps_client.ping(t_client)
    print("create and ping thrift server >>> HOST = {}, PORT = {}"
          .format(constants.HOST, constants.PORT))

    # read file from s3
    file = get_object(bucket, key).read().decode('utf-8').split("\n")
    print("read data cost {} s".format(time.time() - start_time))

    # parse dataset
    parse_start = time.time()
    dataset = DenseDatasetWithLines(file, NUM_FEATURES)
    print("parse data cost {} s".format(time.time() - parse_start))

    # preprocess dataset
    preprocess_start = time.time()
    dataset_size = len(dataset)
    indices = list(range(dataset_size))     # indices for training and validation splits:
    split = int(np.floor(VALIDATION_RATIO * dataset_size))
    if SHUFFLE_DATASET:
        np.random.seed(RANDOM_SEED)
        np.random.shuffle(indices)
    train_indices, val_indices = indices[split:], indices[:split]
    # Creating PT data samplers and loaders:
    train_sampler = SubsetRandomSampler(train_indices)
    valid_sampler = SubsetRandomSampler(val_indices)
    train_loader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE, sampler=train_sampler)
    validation_loader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE, sampler=valid_sampler)
    print("preprocess data cost {} s".format(time.time() - preprocess_start))

    model = DenseSVM(NUM_FEATURES, NUM_CLASSES)

    # Loss and Optimizer
    # Softmax is internally computed.
    # Set parameters to be updated.
    criterion = BinaryClassHingeLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)

    # register model
    model_name = "SVM"
    weight_shape = model.linear.weight.data.numpy().shape
    weight_length = weight_shape[0] * weight_shape[1]
    bias_shape = model.linear.bias.data.numpy().shape
    bias_length = bias_shape[0]
    model_length = weight_length + bias_length
    ps_client.register_model(t_client, worker_index, model_name, model_length, num_workers)
    ps_client.exist_model(t_client, model_name)
    print("register and check model >>> name = {}, length = {}".format(model_name, model_length))

    # Training the Model
    train_start = time.time()
    iter_counter = 0
    for epoch in range(NUM_EPOCHS):
        epoch_start = time.time()
        for batch_index, (items, labels) in enumerate(train_loader):
            print("------worker {} epoch {} batch {}------"
                  .format(worker_index, epoch, batch_index))
            batch_start = time.time()

            # pull latest model
            ps_client.can_pull(t_client, model_name, iter_counter, worker_index)
            latest_model = ps_client.pull_model(t_client, model_name, iter_counter, worker_index)
            model.linear.weight = Parameter(torch.from_numpy(np.asarray(latest_model[:weight_length],dtype=np.double).reshape(weight_shape)))
            model.linear.bias = Parameter(torch.from_numpy(np.asarray(latest_model[weight_length:], dtype=np.double).reshape(bias_shape[0])))

            items = Variable(items.view(-1, NUM_FEATURES))
            labels = Variable(labels)

            # Forward + Backward + Optimize
            optimizer.zero_grad()
            outputs = model(items.double())
            loss = criterion(outputs, labels.double())
            loss.backward()

            # flatten and concat gradients of weight and bias
            w_b_grad = np.concatenate((model.linear.weight.grad.data.numpy().flatten(),
                                       model.linear.bias.grad.data.numpy().flatten()))
            cal_time = time.time() - batch_start

            # push gradient to PS
            sync_start = time.time()
            ps_client.can_push(t_client, model_name, iter_counter, worker_index)
            ps_client.push_grad(t_client, model_name, w_b_grad, LEARNING_RATE, iter_counter, worker_index)
            ps_client.can_pull(t_client, model_name, iter_counter+1, worker_index)      # sync all workers
            sync_time = time.time() - sync_start

            print('Epoch: [%d/%d], Step: [%d/%d] >>> Time: %.4f, Loss: %.4f, epoch cost %.4f, '
                  'batch cost %.4f s: cal cost %.4f s and communication cost %.4f s'
                  % (epoch + 1, NUM_EPOCHS, batch_index + 1, len(train_indices) / BATCH_SIZE,
                     time.time() - train_start, loss.data, time.time() - epoch_start,
                     time.time() - batch_start, cal_time, sync_time))
            iter_counter += 1

        # Test the Model
        correct = 0
        total = 0
        test_loss = 0
        for items, labels in validation_loader:
            items = Variable(items.view(-1, NUM_FEATURES))
            labels = Variable(labels)
            outputs = model(items)
            test_loss += criterion(outputs, labels).data
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum()

        print('Time = %.4f, accuracy of the model on the %d test samples: %d %%, loss = %f'
              % (time.time() - train_start, len(val_indices), 100 * correct / total, test_loss))

    end_time = time.time()
    print("Elapsed time = {} s".format(end_time - start_time))
Ejemplo n.º 16
0
 def __init__(self, ip, port):
     self.transport = TSocket.TSocket(ip, port)
     self.transport = TTransport.TBufferedTransport(self.transport)
     self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
     self.client = Tagger.Client(self.protocol)
     self.open()
def vector_addition_dfe(size, scalar, in_a, in_b):
    """VectorAddition DFE implementation."""
    try:
        start_time = time.time()
        # Make socket
        socket = TSocket.TSocket('localhost', 9090)

        # Buffering is critical. Raw sockets are very slow
        transport = TTransport.TBufferedTransport(socket)

        # Wrap in a protocol
        protocol = TBinaryProtocol.TBinaryProtocol(transport)

        # Create a client to use the protocol encoder
        client = VectorAdditionService.Client(protocol)
        print('Creating a client:\t\t\t\t%.5lfs' % (time.time() - start_time))

        # Connect!
        start_time = time.time()
        transport.open()
        print('Opening connection:\t\t\t\t%.5lfs' % (time.time() - start_time))

        # Initialize maxfile
        start_time = time.time()
        max_file = client.VectorAddition_init()
        print('Initializing maxfile:\t\t\t\t%.5lfs' %
              (time.time() - start_time))

        # Load DFE
        start_time = time.time()
        max_engine = client.max_load(max_file, '*')
        print('Loading DFE:\t\t\t\t\t%.5lfs' % (time.time() - start_time))

        # Allocate and send input streams to server
        start_time = time.time()
        address_in_a = client.malloc_int32_t(size)
        client.send_data_int32_t(address_in_a, in_a)

        address_in_b = client.malloc_int32_t(size)
        client.send_data_int32_t(address_in_b, in_b)
        print('Sending input data:\t\t\t\t%.5lfs' % (time.time() - start_time))

        # Allocate memory for output stream on server
        start_time = time.time()
        address_data_out = client.malloc_int32_t(size)
        print('Allocating memory for output stream on server:\t%.5lfs' %
              (time.time() - start_time))

        # Write vector a to LMem
        start_time = time.time()
        actions_lmem = VectorAddition_writeLMem_actions_t_struct(
            0, size * 4, address_in_a)
        address_actions_lmem = (
            client.send_VectorAddition_writeLMem_actions_t(actions_lmem))
        client.VectorAddition_writeLMem_run_nonblock(max_engine,
                                                     address_actions_lmem)
        print('Writing to LMem:\t\t\t\t%.5lfs' % (time.time() - start_time))

        # Action default
        start_time = time.time()
        actions = VectorAddition_actions_t_struct(scalar, size, address_in_b,
                                                  address_data_out)
        address_actions = client.send_VectorAddition_actions_t(actions)
        client.VectorAddition_run_nonblock(max_engine, address_actions)
        print('Vector addition time:\t\t\t\t%.5lfs' %
              (time.time() - start_time))

        # Unload DFE
        start_time = time.time()
        client.max_unload(max_engine)
        print('Unloading DFE:\t\t\t\t\t%.5lfs' % (time.time() - start_time))

        # Get output stream from server
        start_time = time.time()
        data_out = client.receive_data_int32_t(address_data_out, size)
        print('Getting output stream:\t(size = %d bit)\t%.5lfs' %
              ((size * 32), (time.time() - start_time)))

        # Free allocated memory for streams on server
        start_time = time.time()
        client.free(address_in_a)
        client.free(address_in_b)
        client.free(address_data_out)
        client.free(address_actions)
        client.free(address_actions_lmem)
        print('Freeing allocated memory for streams on server:\t%.5lfs' %
              (time.time() - start_time))

        # Free allocated maxfile data
        start_time = time.time()
        client.VectorAddition_free()
        print('Freeing allocated maxfile data:\t\t\t%.5lfs' %
              (time.time() - start_time))

        # Close!
        start_time = time.time()
        transport.close()
        print('Closing connection:\t\t\t\t%.5lfs' % (time.time() - start_time))

    except Thrift.TException, thrift_exceptiion:
        print '%s' % (thrift_exceptiion.message)
        sys.exit(-1)
Ejemplo n.º 18
0
    def findAndRead(self, microbatchId):
        edgeInfoData = EdgeInfoData()
        edgeInfoData.nodeId = EDGE_ID
        edgeInfoData.nodeIp = EDGE_IP
        edgeInfoData.port = EDGE_PORT
        edgeInfoData.reliability = EDGE_RELIABILITY
        edgeInfoData.storage = 12

        client, transport = self.openSocketConnection(FOG_IP, FOG_PORT,
                                                      FOG_SERVICE)

        timestamp_record = str(
            microbatchId) + ",23, local ,find req,starttime = " + repr(
                time.time()) + ","

        response = client.find(microbatchId, True, True, edgeInfoData)

        timestamp_record = timestamp_record + "endtime = " + repr(
            time.time()) + '\n'
        print("the time stamp for find request is ", timestamp_record)

        myLogs = open(BASE_LOG + 'logs.txt', 'a')
        myLogs.write(timestamp_record)
        myLogs.close()

        compFormat = str()
        uncompSize = int()
        ## if the response contains empty list then the search is terminated here
        if len(response) == 0:
            print(
                "Length of response = 0. Replica not found, terminating here")
            return 0, 0
        else:
            ## the microbatch is present in the system.
            ## for obtaining compression format and uncompressed block size. addresses the issues 1. what is the file extension?, 2. how many bytes to read from the stream.
            ## NOTE: this will only return a result if the block metadata is present in the fog of the current partition (i.e the read is a local read)
            ## This operation has a little overhead since it is only performed once. Another reason is in case it is a local read
            ## then an connection to an edge is directly made. But since the edge does not maintain bock metadata map, an explicit connection to
            ## the parent fog would have to be made once again in order to retreive the required metadata info.
            ## Therefore since a connection to the parent fog is already being made here it is better to make a call and  retreive the indormation.
            ## This call is just a wild guess, it may return null. If it is supposed to be a fog read, then, anyways a connection will be made to another fog,
            ## we will fetch the format and size at that point of time.
            ## Also the fog to which the connection will be made (for a fog read) it will definitely have the corresponding block metadata
            compFormatSize = client.requestCompFormatSize(microbatchId)
            print(compFormatSize)
            if len(compFormatSize) != 0:
                ## i.e format and uncompressed size present
                compFormat = list(compFormatSize.keys())[0]
                uncompSize = compFormatSize[compFormat]

        self.closeSocket(transport)
        print("Sent replicas ", response)

        for findReplica in response:
            edgeInfoData = findReplica.edgeInfo

            if (edgeInfoData != None):

                print("edgeInfoRecv from fog ", edgeInfoData)
                #have to read data from edge

                transport = TSocket.TSocket(edgeInfoData.nodeIp,
                                            edgeInfoData.port)

                # Buffering is critical. Raw sockets are very slow
                transport = TTransport.TFramedTransport(transport)

                # Wrap in a protocol
                protocol = TBinaryProtocol.TBinaryProtocol(transport)

                # Create a client to use the protocol encoder
                client = EdgeService.Client(protocol)

                # Connect!
                transport.open()

                timestamp_record = str(
                    microbatchId) + ", " + compFormat + ", 25 , " + str(
                        findReplica.node.nodeId
                    ) + " , Read req,starttime = " + repr(time.time()) + ","
                response = client.read(microbatchId, 0, compFormat,
                                       uncompSize)  #this is for recovery
                timestamp_record = timestamp_record + "endtime = " + repr(
                    time.time()) + '\n'
                myLogs = open(BASE_LOG + "logs.txt", 'a')
                myLogs.write(timestamp_record)
                myLogs.close()
                #print response
                print("Read status is ", response.status)
                if response.status == 0:
                    print("File not found : cannot read file")
                    return 0, 0

                elif response.status == 1:
                    #self.formulateJsonResponse(microbatchId,response)
                    bytesRead = len(response.data)
                    print("Local Read ", len(response.data),
                          " number of bytes")
                    print("metadata also read ", response.metadata)
                    return 1, bytesRead  #successful read
                else:
                    return response.code, 0

                transport.close()
            elif (findReplica.node != None):

                fogNode = findReplica.node

                client, transport = self.openSocketConnection(
                    fogNode.NodeIP, fogNode.port, FOG_SERVICE)

                ## retreiving the compression format and the uncompressed block size for read operation from
                ## If you have reached here it means that the block is present in another partition and the previous
                ## 'client.requestCompFormatSize()' would definitely have returned null.
                ## (Since no block in a partition => no metadata of that block maintained by fog of that particular partition)
                ## Therefore fetch the format and size with the following call. This call will definitely return an entry.
                compFormat = str()
                uncompSize = int()
                compFormatSize = client.requestCompFormatSize(microbatchId)
                if len(compFormatSize) != 0:
                    ## i.e format and uncompressed size present
                    compFormat = list(compFormatSize.keys())[0]
                    uncompSize = compFormatSize[compFormat]

                timestamp_record = str(
                    microbatchId) + ", " + compFormat + ", 27 ," + str(
                        findReplica.node.nodeId
                    ) + ", Read req,starttime = " + repr(time.time()) + ","

                response = client.read(microbatchId, 0, compFormat, uncompSize)
                timestamp_record = timestamp_record + "endtime = " + repr(
                    time.time()) + '\n'
                myLogs = open(BASE_LOG + "logs.txt", 'a')
                myLogs.write(timestamp_record)
                myLogs.close()
                if (response.status == 1):
                    #self.formulateJsonResponse(microbatchId,response)
                    bytesRead = len(response.data)
                    print("Fog Amount of bytes read ", len(response.data))
                    return 1, bytesRead  #successful read
                else:
                    print("The queried fog does not have data")
                    return response.status, 0

                self.closeSocket(transport)
            else:
                print("The queried fog does not have data")
Ejemplo n.º 19
0
#download(1,2);

outfile = open('log.txt', 'w')
sys.stdout = outfile
sys.stderr = outfile

opts, args = getopt.getopt(sys.argv[1:], "hi::o::c::k::s::")
#input_file="sina_userid"
output_folder = "/home/mapred/weibodata"
concurrent_thread = 5
skip_count = 0.5
shutdown_after_finish = 'off'

jsontransport = TSocket.TSocket("localhost", 9085)
jsontransport = TTransport.TBufferedTransport(jsontransport)
jsonprotocol = TBinaryProtocol.TBinaryProtocol(jsontransport)
jsonclient = SendJson.Client(jsonprotocol)
jsontransport.open()

for op, value in opts:
    if op == "-i":
        input_file = value
    elif op == "-o":
        output_folder = value
    elif op == "-c":
        concurrent_thread = int(value)
    elif op == "-s":
        shutdown_after_finish = value
    elif op == "-k":
        skip_count = int(value)
    elif op == "-h":
Ejemplo n.º 20
0
import sys
sys.path.append("gen-py")
from hello import HelloSvc

from thrift.transport import TSocket
from thrift.transport import TTransport
from thrift.protocol import TBinaryProtocol

trans = TSocket.TSocket("localhost", 9090)
trans = TTransport.TBufferedTransport(trans)
proto = TBinaryProtocol.TBinaryProtocol(trans)
client = HelloSvc.Client(proto)

trans.open()
msg = client.hello_func()
print("[Client] received: %s" % msg)
trans.close()
Ejemplo n.º 21
0
parser.add_option("--metastore_hostport",
                  dest="metastore_hostport",
                  default="localhost:9083",
                  help="Metastore hostport to wait for.")
parser.add_option(
    "--transport",
    dest="transport",
    default="buffered",
    help="Transport to use for connecting to HiveServer2. Valid values: "
    "'buffered', 'kerberos', 'plain_sasl'.")
options, args = parser.parse_args()

metastore_host, metastore_port = options.metastore_hostport.split(':')
hive_transport = create_transport(metastore_host, metastore_port, "hive",
                                  options.transport)
protocol = TBinaryProtocol.TBinaryProtocol(hive_transport)
hive_client = ThriftHiveMetastore.Client(protocol)

# Try to connect to the Hive metastore
now = time.time()
TIMEOUT_SECONDS = 30.0
while time.time() - now < TIMEOUT_SECONDS:
    try:
        hive_transport.open()
        resp = hive_client.get_database("default")
        if resp is not None:
            print "Metastore service is up at %s." % options.metastore_hostport
            exit(0)
    except Exception as e:
        if "SASL" in e.message:  # Bail out on SASL failures
            print "SASL failure when attempting connection:"
Ejemplo n.º 22
0
def handler(event, context):
    start_time = time.time()
    worker_index = event['rank']
    num_workers = event['num_workers']

    # Make socket
    transport = TSocket.TSocket(constants.HOST, constants.PORT)
    # Buffering is critical. Raw sockets are very slow
    transport = TTransport.TBufferedTransport(transport)
    # Wrap in a protocol
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    # Create a client to use the protocol encoder
    client = ParameterServer.Client(protocol)
    # Connect!
    transport.open()

    client.ping()
    print('ping()')

    # register model
    if worker_index == 0:
        client.register_model(MODEL_ID, MODEL_LENGTH, num_workers)

    is_model_exist = False
    while is_model_exist is not True:
        is_model_exist = client.exist_model(MODEL_ID)

    # pull latest model
    try:
        can_pull = False
        while can_pull is not True:
            can_pull = client.can_pull(MODEL_ID, 0, worker_index)
        weight = client.pull_model(MODEL_ID, 0, worker_index)
        weight_data = weight.data
        print("current weight = {}".format(weight_data))
    except InvalidOperation as e:
        print('InvalidOperation: %r' % e)

    # push gradient
    try:
        can_push = False
        while can_push is not True:
            can_push = client.can_push(MODEL_ID, 0, worker_index)
        grad = Grad()
        grad.id = MODEL_ID
        grad.learning_rate = 0.01
        grad.length = 10
        grad.data = [i for i in range(MODEL_LENGTH)]
        grad.n_iter = 0
        grad.worker_id = worker_index
        client.push_grad(grad)
    except InvalidOperation as e:
        print('InvalidOperation: %r' % e)

    # get latest model
    try:
        can_pull = False
        while can_pull is not True:
            can_pull = client.can_pull(MODEL_ID, 1, worker_index)
        weight = client.pull_model(MODEL_ID, 1, worker_index)
        weight_data = weight.data
        print("current weight = {}".format(weight_data))
    except InvalidOperation as e:
        print('InvalidOperation: %r' % e)

    # push update
    try:
        can_push = False
        while can_push is not True:
            can_push = client.can_push(MODEL_ID, 1, worker_index)
        update = Update()
        update.id = MODEL_ID
        update.length = MODEL_LENGTH
        update.data = [i for i in range(MODEL_LENGTH)]
        update.n_iter = 1
        update.worker_id = worker_index
        client.push_update(update)
    except InvalidOperation as e:
        print('InvalidOperation: %r' % e)

    # get latest model
    try:
        can_pull = False
        while can_pull is not True:
            can_pull = client.can_pull(MODEL_ID, 2, worker_index)
        weight = client.pull_model(MODEL_ID, 2, worker_index)
        weight_data = weight.data
        print("current weight = {}".format(weight_data))
    except InvalidOperation as e:
        print('InvalidOperation: %r' % e)

    # Close!
    transport.close()
Ejemplo n.º 23
0
 def framed_protocol_factory(trans):
     trans = TTransport.TFramedTransport(trans)
     return TBinaryProtocol.TBinaryProtocol(trans)
Ejemplo n.º 24
0
def main():
    global g_SvrHost
    global g_SvrPort
    global g_InputFile
    global g_OutputFile

    t_sock = TSocket.TSocket(g_SvrHost, g_SvrPort)
    t_transport = TTransport.TFramedTransport(t_sock)
    t_protocol = TBinaryProtocol.TBinaryProtocol(t_transport)
    client = MTSearch.Client(t_protocol)
    t_transport.open()

    if len(g_InputFile):
        fInput = open(g_InputFile, 'r')
    else:
        fInput = sys.stdin

    if len(g_OutputFile):
        fOutput = open(g_OutputFile, 'w')
    else:
        fOutput = sys.stdout

    jsData = json.load(fInput)
    #  print len(jsData)
    for jsItem in jsData:
        #  print jsItem
        req = SearchQueryReq()
        ifName = ''
        for (key, val) in jsItem.iteritems():
            #  print '%s = %s' % (key, val)
            if key == u'category':
                req.category = val.encode('utf-8')
            elif key == u'city':
                req.city = val.encode('utf-8')
            elif key == u'orderby':
                req.orderby = val.encode('utf-8')
            elif key == u'key_words':
                req.key_words = val.encode('utf-8')
            elif key == u'location':
                req.location = val.encode('utf-8')
            elif key == u'id':
                req.id = val
            elif key == u'cityid':
                req.cityid = val
            elif key == u'offset':
                req.offset = val
            elif key == u'limit':
                req.limit = val
            elif key == u'opt':
                req.opt = val
            elif key == u'filter':
                if val != None:
                    req.filter = dict()
                    json2dict(val, req.filter)
            elif key == u'counter':
                if val != None:
                    req.counter = dict()
                    json2dict(val, req.counter)
            elif key == u'control':
                if val != None:
                    req.control = dict()
                    json2dict(val, req.control)
            elif key == u'exdata':
                if val != None:
                    req.exdata = dict()
                    json2dict(val, req.exdata)
            elif key == u'if_name':
                ifName = val.encode('utf-8')

        #  print req
        result = SearchMultiRes()
        if ifName == u'MTSearchPoi':
            result = client.MTSearchPoi(req)
        elif ifName == u'MTSearchDeal':
            result = client.MTSearchDeal(req)
        elif ifName == u'MTMultiSearch':
            result = client.MTMultiSearch(req)
        elif ifName == u'MTMultiSearchDealPoi':
            result = client.MTMultiSearchDealPoi(req)

        fOutput.write('%s\n' % result)
Ejemplo n.º 25
0
 def setUp(self):
     self.transport = TSocket.TSocket('169.228.66.135', '6508')
     self.transport = TTransport.TBufferedTransport(self.transport)
     protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
     self.client = Twitter.Client(protocol)
     self.transport.open()
Ejemplo n.º 26
0
    def connect_thrift_node(self, node_to_connect):
        """ attempt to connect a node through thrift networking services """
        connection_successful = False
        if not node_to_connect.connected:
            try:
                if node_to_connect not in self.connections:  # and \
                    # peer.connection_attempts < MAX_CONNECTION_ATTEMPTS and \
                    # len(self.peers) < self.max_outbound_connections:
                    logger().info('attempting connect_thrift_node %s:%s',
                                  node_to_connect.host, node_to_connect.port)

                    pass_phrase = str(uuid.uuid4())

                    # Make socket
                    transport = TSocket.TSocket(node_to_connect.host,
                                                int(node_to_connect.port))

                    # Buffering is critical. Raw sockets are very slow
                    transport = TTransport.TBufferedTransport(transport)

                    # Wrap in a protocol
                    protocol = TBinaryProtocol.TBinaryProtocol(transport)

                    # Create a client to use the protocol encoder
                    client = BlockchainService.Client(protocol)
                    # Connect
                    transport.open()
                    logger().info('about to register')
                    connection_successful = client.register_node(
                        self.this_node, pass_phrase)
                    logger().info('transport open to node %s',
                                  node_to_connect.node_id)
                    if connection_successful:
                        node_to_connect.connected = True
                        node_to_connect.pass_phrase = pass_phrase
                        node_to_connect.transport = transport
                        node_to_connect.client = client
                        logger().info(
                            '%s accepted outbound connection request.',
                            node_to_connect.node_id)
                        logger().info('node owner: %s', node_to_connect.owner)
                        logger().info('phases provided: %s',
                                      '{:05b}'.format(node_to_connect.phases))
                    else:
                        try:
                            net_dao.update_con_attempts(
                                node_to_connect
                            )  # incrementing connection attempts on fail
                        except Exception as ex:
                            template = "An exception of type {0} occurred. Arguments:\n{1!r}"
                            message = template.format(
                                type(ex).__name__, ex.args)
                            logger().warning(message)
                        transport.close()
                        print(node_to_connect.node_id +
                              ' rejected outbound connection request.')

            except Exception as ex:
                if not connection_successful:
                    net_dao.update_con_attempts(node_to_connect)
                template = "An exception of type {0} occurred. Arguments:\n{1!r}"
                message = template.format(type(ex).__name__, ex.args)
                logger().warning(message)
            finally:
                logger().info('connect_thrift_node %s',
                              str(connection_successful))
        return connection_successful
Ejemplo n.º 27
0
    def _make_client(self):

        protocol = TBinaryProtocol.TBinaryProtocol(trans=self.transport,
                                                   strictRead=False,
                                                   strictWrite=False)
        self.client = scribe.Client(protocol)
Ejemplo n.º 28
0
 def __init__(self, host='localhost', port=9090):
     transport = TTransport.TBufferedTransport(TSocket.TSocket(host, port))
     protocol = TBinaryProtocol.TBinaryProtocol(transport)
     self.client = Hbase.Client(protocol)
     transport.open()
Ejemplo n.º 29
0
# Author:Dengwenxing
# -*- coding: utf-8 -*-
# @Time     :2019/12/30 15:09
# @Site     :
# @fILE     : hbaseReader.py
# @Software :

from thrift.transport import TSocket,TTransport
from thrift.protocol import TBinaryProtocol
from hbase import Hbase
from hbase.ttypes import *


transport = TSocket.TSocket('24.1.17.3',9090)
protocol = TBinaryProtocol.TBinaryProtocol(transport)
client = Hbase.Client(protocol)
transport.open()

def gettables():
    print client.getTableNames()


if __name__ == '__main__':
    gettables()
Ejemplo n.º 30
0
def refresh_topology_stats():
    logging.debug('Refreshing topology stats')
    for t in topologies:
        all_topology_stats[t] = {'topology_stats_got': False}
    global clusterinfo
    try:
        transport = TSocket.TSocket(nimbus_host, nimbus_port)
        transport.setTimeout(1000)
        framedtrasp = TTransport.TFramedTransport(transport)
        protocol = TBinaryProtocol.TBinaryProtocol(framedtrasp)
        client = Nimbus.Client(protocol)
        framedtrasp.open()
        boltspoutstats = None
        component_task_count = None
        component_exec_count = None
        clusterinfo = client.getClusterInfo()
        for tsummary in clusterinfo.topologies:
            if tsummary.name not in topologies:
                continue
            toplogyname = tsummary.name
            overallstats = {}
            overallstats['ExecutorCount'] = tsummary.num_executors
            overallstats['TaskCount'] = tsummary.num_tasks
            overallstats['WorkerCount'] = tsummary.num_workers
            overallstats['UptimeSecs'] = tsummary.uptime_secs
            all_topology_stats[toplogyname]['overallstats'] = overallstats
            boltspoutstats = {}
            component_task_count = {}
            component_exec_count = {}
            all_topology_stats[toplogyname]['boltspoutstats'] = boltspoutstats
            all_topology_stats[toplogyname][
                'component_task_count'] = component_task_count
            all_topology_stats[toplogyname][
                'component_exec_count'] = component_exec_count
            tinfo = client.getTopologyInfo(tsummary.id)
            all_topology_stats[toplogyname]['topology_stats_got'] = True
            for exstat in tinfo.executors:
                stats = exstat.stats
                update_whole_num_stat_special(stats.emitted[":all-time"],
                                              boltspoutstats,
                                              exstat.component_id, 'Emitted')
                update_whole_num_stat_special(stats.transferred[":all-time"],
                                              boltspoutstats,
                                              exstat.component_id,
                                              'Transferred')

                numtask = exstat.executor_info.task_end - exstat.executor_info.task_end + 1
                update_task_count(component_task_count, exstat.component_id,
                                  numtask)
                update_exec_count(component_exec_count, exstat.component_id, 1)
                if stats.specific.bolt is not None:
                    update_whole_num_stat(
                        stats.specific.bolt.acked[":all-time"], boltspoutstats,
                        exstat.component_id, 'Acked')
                    update_whole_num_stat(
                        stats.specific.bolt.failed[":all-time"],
                        boltspoutstats, exstat.component_id, 'Failed')
                    update_whole_num_stat(
                        stats.specific.bolt.executed[":all-time"],
                        boltspoutstats, exstat.component_id, 'Executed')
                    update_avg_stats(stats.specific.bolt.process_ms_avg["600"],
                                     boltspoutstats, exstat.component_id,
                                     'process_ms_avg')
                    update_avg_stats(stats.specific.bolt.execute_ms_avg["600"],
                                     boltspoutstats, exstat.component_id,
                                     'execute_ms_avg')
                if stats.specific.spout is not None:
                    update_whole_num_stat(
                        stats.specific.spout.acked[":all-time"],
                        boltspoutstats, exstat.component_id, 'Acked')
                    update_whole_num_stat(
                        stats.specific.spout.failed[":all-time"],
                        boltspoutstats, exstat.component_id, 'Failed')
                    update_avg_stats(
                        stats.specific.spout.complete_ms_avg[":all-time"],
                        boltspoutstats, exstat.component_id, 'complete_ms_avg')
            if '__acker' in boltspoutstats:
                del boltspoutstats['__acker']
            for key in boltspoutstats:
                if 'complete_ms_avg' in boltspoutstats[key]:
                    avg = get_avg(boltspoutstats[key]['complete_ms_avg'])
                    boltspoutstats[key]['CompleteLatency'] = avg
                    del boltspoutstats[key]['complete_ms_avg']
                if 'process_ms_avg' in boltspoutstats[key]:
                    avg = get_avg(boltspoutstats[key]['process_ms_avg'])
                    boltspoutstats[key]['ProcessLatency'] = avg
                    del boltspoutstats[key]['process_ms_avg']
                if 'execute_ms_avg' in boltspoutstats[key]:
                    avg = get_avg(boltspoutstats[key]['execute_ms_avg'])
                    boltspoutstats[key]['ExecuteLatency'] = avg
                    del boltspoutstats[key]['execute_ms_avg']

            for key in component_task_count:
                if key in boltspoutstats:
                    boltspoutstats[key]['Tasks'] = component_task_count[key]
            for key in component_exec_count:
                if key in boltspoutstats:
                    boltspoutstats[key]['Executors'] = component_exec_count[
                        key]
        framedtrasp.close()

    except Exception as e:
        clusterinfo = None
        logging.warn(e)