Ejemplo n.º 1
def add_data(traffic_data):
  gpudb = GPUdb(encoding='BINARY',gpudb_ip='',gpudb_port='9191')

  # Add more fileds as needed for the analysis  
  type_definition = """{
  retobj = gpudb.do_register_type(type_definition,"","point-type","POINT")
  type_id = retobj['type_id']
  set_id = str(uuid.uuid1())
  retobj = gpudb.do_new_set(type_id,set_id)
  x = 1;y = 1 
  encoded_datums = []
  for e in traffic_data: 
    datum = ordereddict.OrderedDict([('x',x), ('y',y), ('src',e[0]),('dst',e[1]),('payload',e[2])])
    encoded_datum = gpudb.encode_datum(type_definition,datum)
  gpudb.do_bulk_add(set_id, encoded_datums)

  return set_id,gpudb
Ejemplo n.º 2
def add_data(traffic_data):
    gpudb = GPUdb(encoding='BINARY', gpudb_ip='', gpudb_port='9191')

    # Add more fileds as needed for the analysis
    type_definition = """{

    retobj = gpudb.do_register_type(type_definition, "", "point-type", "POINT")
    type_id = retobj['type_id']

    set_id = str(uuid.uuid1())
    retobj = gpudb.do_new_set(type_id, set_id)

    x = 1
    y = 1
    encoded_datums = []
    for e in traffic_data:
        datum = ordereddict.OrderedDict([('x', x), ('y', y), ('src', e[0]),
                                         ('dst', e[1]), ('payload', e[2])])
        encoded_datum = gpudb.encode_datum(type_definition, datum)
        x += 1
        y += 1

    gpudb.do_bulk_add(set_id, encoded_datums)

    return set_id, gpudb
Ejemplo n.º 3
def gpudb_cmd(argv):
    """A command line interface to send a specified request to a GPUDB server.
       Can be used to print the parameters for a request as well.

    # Default values
    file_name = ""

    # Add arguments to the parser
    parser = argparse.ArgumentParser()
        "IP address and port of GPUdb in the format: IP_ADDRESS:PORT (default"
                        help="Username used when connecting to GPUdb.")
                        help="Password used when connecting to GPUdb.")
        "Ask for the password to use when connecting to GPUdb (more secure than --password)"
        "Use avro JSON encoding of request message to GPUdb (default is avro binary)"
        choices=["json", "oneline", "ini", "raw"],
        "Format the returned GPUDB response in a few ways. (default 'json')")
        "Print the request query before sending it using the specified format."

    # User must provide one or the other
    query_group = parser.add_mutually_exclusive_group(required=True)
                             help="Lists all available GPUDB request queries.")
        "Print the JSON schema of the specified request and response query.")
    query_group.add_argument( '--query', nargs = argparse.REMAINDER,
                         help = "Send a request query by specifying the name of the query and the parameters associated with the query. " \
                                "Help is provided if only the query name is specified. " \
                                "Note that unspecified parameters will take a default value. " \
                                "Example: '--query aggregate_min_max --column_name x --table_name DataTable'" )

    # Print the help message and quit if no arguments are given
    if (len(sys.argv) == 1):  # None provided

    # Parse the command line arguments
    args = parser.parse_args()

    # --------------------------------------
    # Set up GPUdb
    GPUdb_IP, GPUdb_Port = args.gpudb.split(":")
    password = args.password
    if args.ask_password:
        password = getpass.getpass("GPUdb password:"******"Unknown query name: '%s'" % query_name)

        req_schema_str = gpudb.gpudb_schemas[query_name]["REQ_SCHEMA_STR"]
        rsp_schema_str = gpudb.gpudb_schemas[query_name]["RSP_SCHEMA_STR"]
        req_odict = json.JSONDecoder(
        rsp_odict = json.JSONDecoder(

        # Use desired formatting
        print_dict(req_odict, args.format)
        print_dict(rsp_odict, args.format)

    # --------------------------------------
    # List all endpoint/query names, if desired by user
    if (args.list_queries == True) or (len(args.query) == 0):
        for q in sorted(query_names):
            0)  # Succesful termination after printing the desired help message

    # --------------------------------------
    # Get the query JSON string from GPUdb
    query_name = args.query[0]
    if query_name not in query_names:
        print("Unknown query name: '%s'" % query_name)
    request_json = gpudb.gpudb_schemas[query_name]["REQ_SCHEMA_STR"]

    # Parse the request JSON to get the parameters
    request_schema = gpudb.gpudb_schemas[query_name]["REQ_SCHEMA"]
    request_json = request_schema.to_json()["fields"]

    # Create a dictionary of (param name, param type) pairs based on the JSON
    param_name_type = {}
    param_vals = {}
    for param in request_json:
        param_name_type[param['name']] = param['type']
        # Binary/bytes parameters will be skipped
        if param['type'] == "string" or param['type'] == "bytes":
            param_vals[param['name']] = ""  # Default is empty string
        if param['type'] == "map":
            param_vals[param['name']] = {}  # Default is empty map
        if param['type'] == "list":
            param_vals[param['name']] = []  # Default is empty list
        # Note that numeric attributes are not getting a default
        # User MUST provide such values, or we output an error

    # Create a parser for query-specific parameters
    query_parser = argparse.ArgumentParser()

    # Add parameters to be parsed
    for pname, ptype in param_name_type.items():
        if ptype == "string":  # Make string arguments optional
            query_parser.add_argument("--" + pname,
                                      help="Defaults to empty string")
        elif ptype == "double" or ptype == "float":
            query_parser.add_argument("--" + pname,
                                      help="Required parameter, type %s" %
        elif ptype == "long":
            query_parser.add_argument("--" + pname,
                                      help="Required parameter, type %s" %
        elif ptype == "int":
            query_parser.add_argument("--" + pname,
                                      help="Required parameter, type %s" %
        elif ptype == "bytes":
            continue  # ignore bytes
        elif ptype == "boolean":  # Boolean flag
            # User must provide one or the other
            bool_group = query_parser.add_mutually_exclusive_group(
                "--" + pname,
                help="Boolean parameter, include to set %s to TRUE" % pname)
                "--no-" + pname,
                help="Boolean parameter, include to set %s to FALSE" % pname)
        else:  # Maps and lists get empty ones by default; handling is delicate; ignore 'bytes'
            if ptype['type'] == "map":
                    "--" + pname,
                    "Expected map value of type: %s; surround the whole map with single quotes (') and any string (key or value) within with double quotes (\"). E.g. for random, --param_map '{\"x\":{\"min\":2}}'. When omitted, defaults to empty map"
                    % ptype['values'])
            else:  # Arrays
                    "--" + pname,
                    "Comma separated list (escape spaces with \) enclosed in []. For example, for filter_by_nai, --x_vector [1,2,3,4] or --x_vector [1,\ 2,\ 3,\ 4]. If contains strings, then enclose the whole thing within single quotes and the individual string in double quotes.  E.g., for filter_by_string, --attributes '[\"x\",\"y\"]'. When omitted, defaults to an empty list."

    # Print the help message and quit if no arguments are given (and none is expected)
    if (len(args.query[1:]) == 0 and len(param_name_type) > 0):
        print("No parameters provided for query: ", query_name)

    # Parse the parameters and store in a dictionary
    query_args = vars(query_parser.parse_args(args.query[1:]))

    # Copy the parsed values to the ordered dictionary to pass to GPUdb
    for key, val in query_args.items():
        param_vals[key] = val

    # --------------------------------------
    # Call the GPUDB query:

    # Obtain the request and response schemas for the given query
    (req_schema, resp_schema) = gpudb._GPUdb__get_schemas(query_name)
    endpoint = gpudb._GPUdb__get_endpoint(query_name)

    # --------------------------------------
    if args.print_query:
        encoded_datum = gpudb.encode_datum(req_schema, param_vals)
        request_odict = gpudb._GPUdb__read_orig_datum(req_schema,
        print_dict(request_odict, args.format)

    # --------------------------------------
    # Perform the GPUDB query
    response = gpudb._GPUdb__post_then_get(req_schema, resp_schema, param_vals,

    print_dict(response, args.format)
Ejemplo n.º 4
def run_gpudb( argv ):
    """An interface to GPUDB.  Run the specified query on GPUDB on the local
       machine or at the specified address.  Also provide usage information.

    # Default values
    file_name = ""

    # Add arguments to the parser
    parser = argparse.ArgumentParser()
    parser.add_argument( '-g', nargs = '?', default = "",
                         help = "IP address and port of GPUdb in the format: xxx.xx.xx.xx:xxxx (defaults to" )
    parser.add_argument( '--request-path', nargs = '?', default = gpudb_obj_defs_path,
                         help = "Path of the JSON defintions (defaults to %s)" % gpudb_obj_defs_path )
    # User must provide one or the other
    query_group = parser.add_mutually_exclusive_group( required = True )
    query_group.add_argument( "--list-queries", action = 'store_true',
                         help = "Lists all available GPUDB queries." )
    query_group.add_argument( '--query', nargs = argparse.REMAINDER,
                         help = "Name of the query to be executed and any parameters associated with the query. For example, '--query max_min --attribute x --set_id set1'. Not providing any parameter after the query name will print query specific help information." )

    # Print the help message and quit if no arguments are given
    if ( len(sys.argv) == 1 ): # None provided
        sys.exit( 2 )

    # Parse the command line arguments
    args = parser.parse_args()

    # Parse and check the request JSON path
    request_path = args.request_path
    if not os.path.exists( request_path ): # Check that the path exists
        print "Path for JSONs does not exist: ", request_path
        sys.exit( 2 )
    if request_path[-1] != "/": # simplify logic below by enforcing trailing '/'
        request_path += "/"

    # Create a list of all request JSON filenames
    filenames = [request_path + f for f in os.listdir( request_path ) if "_request.json" in f]

    # --------------------------------------
    # List all endpoint/query names, if desired by user
    if (args.list_queries == True) or (len(args.query) == 0):
        # Strip filename of the path and suffix if it's a request JSON file
        query_names = [ f.replace( request_path, "" ).replace( "_request.json", "" ) for f in filenames if "_request.json" in f ]
        for q in sorted( query_names ):
            print q
        sys.exit( 0 ) # Succesful termination after printing the desired help message
    # --------------------------------------

    # --------------------------------------
    # Set up GPUdb
    GPUdb_IP, GPUdb_Port = args.g.split( ":" )
    gpudbdb = GPUdb( encoding = 'BINARY', host = GPUdb_IP, port = GPUdb_Port )

    # Find and read the desired query JSON file
    query_name = args.query[ 0 ]
    for f in filenames:
        file_name = f if ("/" + query_name + "_request.json") in f else file_name
    if file_name == "":
        print "Query not found: ", query_name
        sys.exit( 2 )
    json_file = open( file_name, "r" )
    request_json = json_file.read()

    # Parse the request JSON to get the parameters
    request_schema = schema.parse( request_json )
    request_json =  request_schema.to_json()["fields"]

    # Create a dictionary of (param name, param type) pairs based on the JSON
    param_name_type = {}
    param_vals = {}
    # param_vals = collections.OrderedDict()
    for param in request_json:
        param_name_type[ param['name'] ] = param['type']
        # Binary/bytes parameters will be skipped
        if param['type'] == "string" or param['type'] == "bytes":
            param_vals[ param['name'] ] = "" # Default is empty string
        if param['type'] == "map":
            param_vals[ param['name'] ] = {} # Default is empty map
        if param['type'] == "list":
            param_vals[ param['name'] ] = [] # Default is empty list
        # Note that numeric attributes are not getting a default
        # User MUST provide such values, or we output an error

    # Create a parser for query-specific parameters
    query_parser = argparse.ArgumentParser()

    # Add parameters to be parsed
    query_parser.add_argument( "--format-response", action = 'store_true', dest = "format_response",
                             help = "Boolean parameter, include to print formatted GPUDB response. Omitting it prints the raw GPUDB response." )
    for pname, ptype in param_name_type.iteritems():
        if ptype == "string": # Make string arguments optional
            query_parser.add_argument( "--" + pname, nargs='?', default="", help = "Defaults to empty string" )
        elif ptype == "double" or ptype == "float":
            query_parser.add_argument( "--" + pname, type = float, required = True, help = "Required parameter, type %s" % ptype )
        elif ptype == "long":
            query_parser.add_argument( "--" + pname, type = long, required = True, help = "Required parameter, type %s" % ptype )
        elif ptype == "int":
            query_parser.add_argument( "--" + pname, type = int, required = True, help = "Required parameter, type %s" % ptype )
        elif ptype == "bytes":
            continue # ignore bytes
        elif ptype == "boolean": # Boolean flag
            # User must provide one or the other
            bool_group = query_parser.add_mutually_exclusive_group( required = True )
            bool_group.add_argument( "--" + pname, action = 'store_true', dest = pname,
                                       help = "Boolean parameter, include to set %s to TRUE" %pname )
            bool_group.add_argument( "--no-" + pname, action = 'store_false', dest = pname,
                                       help = "Boolean parameter, include to set %s to FALSE" % pname )
        else: # Maps and lists get empty ones by default; handling is delicate; ignore 'bytes'
            if ptype[ 'type' ] == "map":
                query_parser.add_argument( "--" + pname, nargs = '?', type = json.loads, default = {},
                                           help = "Expected map value of type: %s; surround the whole map with single quotes (') and any string (key or value) within with double quotes (\"). E.g. for random, --param_map '{\"x\":{\"min\":2}}'. When omitted, defaults to empty map" % ptype['values'] )
            else: # Arrays
                query_parser.add_argument( "--" + pname, type = json.loads, default=[],
                                           help = "Comma separated list (escape spaces with \) enclosed in []. For example, for filter_by_nai, --x_vector [1,2,3,4] or --x_vector [1,\ 2,\ 3,\ 4]. If contains strings, then enclose the whole thing within single quotes and the individual string in double quotes.  E.g., for filter_by_string, --attributes '[\"x\",\"y\"]'. When omitted, defaults to an empty list." )

    # Print the help message and quit if no arguments are given (and none is expected)
    if ( len( args.query[1:] ) == 0 and len( param_name_type ) > 0 ):
        print "No parameters provided for query: ", query_name
        sys.exit( 2 )

    # Parse the parameters and store in a dictionary
    query_args = vars( query_parser.parse_args( args.query[1:] ) )

    # Copy the parsed values to the ordered dictionar to pass to GPUdb
    for key, val in query_args.iteritems():
        param_vals[ key ] = val
    # --------------------------------------

    # --------------------------------------
    # Call the GPUDB query:
    # Derive the endpoint name from the query name
    endpoint_name = "/" + query_name.replace( "_", "" )
    # One exception is /add
    if endpoint_name == "/addobject":
        endpoint_name = "/add"

    # Parse request and response schemas for GPUDB
    (req_schema, resp_schema) = gpudbdb.get_schemas( query_name )

    # Perform the GPUDB query
    response = gpudbdb.post_then_get( req_schema, resp_schema, param_vals, endpoint_name )

    print "GPUDB Response:"
    if query_args[ "format_response" ] == True:
        print format_response( response )
        print response
Ejemplo n.º 5
def test_gpudb_ingestor():
    """Tries to stress out Kinetica's multi-head ingestion mode.  Tests
       all possible sharding under the sun.
    global gpudb_ingestor

    gpudb = GPUdb( encoding='BINARY', host = '', port = '9191' )

    table_name = "test_ingest_table2"

    # Clear table if exists
    gpudb.clear_table( table_name, options = {"no_error_if_not_exists": "true"} )

    # The table type/schema-- want all possibly type/properties to be sharded and nullable
    _type = [ ["i1",          "int"                                       ],
              ["i2",          "int", "shard_key", "nullable"              ],
              ["i8",          "int", "shard_key", "nullable", "int8"      ],
              ["i16",         "int", "shard_key", "nullable", "int16"     ],
              ["d1",       "double", "shard_key", "nullable"              ],
              ["f1",        "float", "shard_key", "nullable"              ],
              ["l1",         "long", "shard_key", "nullable"              ],
              ["timestamp",  "long", "shard_key", "nullable", "timestamp" ],
              ["s1",       "string", "shard_key", "nullable"              ],
              ["date",     "string", "shard_key", "nullable", "date"      ],
              ["datetime", "string", "shard_key", "nullable", "datetime"  ],
              ["decimal",  "string", "shard_key", "nullable", "decimal"   ],
              ["ipv4",     "string", "shard_key", "nullable", "ipv4"      ],
              ["time",     "string", "shard_key", "nullable", "time"      ],
              ["c1",       "string", "shard_key", "nullable", "char1"     ],
              ["c2",       "string", "shard_key", "nullable", "char2"     ],
              ["c4",       "string", "shard_key", "nullable", "char4"     ],
              ["c8",       "string", "shard_key", "nullable", "char8"     ],
              ["c16",      "string", "shard_key", "nullable", "char16"    ],
              ["c32",      "string", "shard_key", "nullable", "char32"    ],
              ["c64",      "string", "shard_key", "nullable", "char64"    ],
              ["c128",     "string", "shard_key", "nullable", "char128"   ],
              ["c256",     "string", "shard_key", "nullable", "char256"   ] ]
    table = GPUdbTable( _type, table_name, db = gpudb )

    print ("Table Name:", table_name)

    record_type = table.get_table_type()

    # Instantiate a gpudb ingestor object; pay attention to the batch size.
    # Realistic cases would have higher batch sizes.
    ingestor_batch_size = 200
    options = {}
    workers = GPUdbWorkerList( gpudb )
    print ("Workers: ", workers.worker_urls, "\n")
    gpudb_ingestor = GPUdbIngestor( gpudb, table_name, record_type, ingestor_batch_size, options, workers )

    # Generate records to insert
    num_batches      =    5  # Passed to generate_and_insert_data()
    batch_size       = 1000  # Passed to generate_and_insert_data()
    num_pools        =    5  # Number of threads spawned in a single Pool call
    num_pool_batches =   10  # Number of times Pool is invoked

    # # In case someone wants to call the function directly
    # generate_and_insert_data( [batch_size, num_batches] ) # debug~~~~~~~~~~~~
    # Generate and insert data parallelly; total number of processes
    # spawned: (num_pools * num_pool_batches)
    for i in range(0, num_pool_batches):
        pool = Pool( processes = num_pools )
        results = pool.map_async( generate_and_insert_data, [[batch_size, num_batches]] * num_pools)
    # end multithreaded data generation and insertion

    # # Flush the ingestor
    # # NOTE: Was not seeing any record in the queues due to python's
    # # multithreading issues... need to flush from the function below
    # gpudb_ingestor.flush()

    num_records = num_batches * batch_size * num_pools * num_pool_batches
    print ()
    print ("Table name:", table_name)
    print ("Total # objects inserted:", num_records)
    print ()
import ordereddict
import sys
# Traffic capture packages
import dpkt
#from scapy.all import sr1,IP,ICMP,rdpcap
from scapy.all import *

# GPUdb packages
from gpudb import GPUdb
import uuid  #for generating uuids

gpudb = GPUdb(encoding='BINARY', gpudb_ip='', gpudb_port='9191')
# Add more fileds as needed for the analysis
type_definition = """{

# TODO : Pass pcap file as input
Ejemplo n.º 7
def diagnose_gpudb( argv ):
    Run a diagnostic test on GPUdb
      argv -- Command line arguments

    # Parse the command line arguments
    if ( len(sys.argv) == 1 ): # None provided
        # Print help message and quit
        print helpMessage
        sys.exit( 2 )
    try: # Parse the command line arguments
        opts, args = getopt.getopt( sys.argv[1:], "hlg:p:" )
    except getopt.GetoptError:
        print helpMessage
        sys.exit( 2 )

    # Some default values
    GPUdb_IP = '' # Run locally by default
    GPUdb_Port = '9191' # Default port

    # Parse the arguments
    for opt, arg in opts:
        if opt == '-h': # print usage and exit
            print helpMessage
        if opt == '-l': # run gpudb on local machine
            isServer = False
        if opt == '-g': # run gpudb on a server gpudb at the specified IP address
            GPUdb_IP = arg
            set_id = "TwitterPointText" # Default set ID for server gpudb
        if opt == '-p': # run gpudb on a server gpudb at the specified port
            GPUdb_Port = arg

    # Set up GPUdb with binary encoding
    gpudb = GPUdb( encoding='BINARY', host = GPUdb_IP, port = GPUdb_Port )

    # Create a data type
    point_schema_str = """{
                            }""".replace(' ','').replace('\n','')

    # Register the data type and ensure that it worked
    # Endpoint: /registertype
    register_resp = gpudb.register_type ( point_schema_str, "", "point_type", "POINT" )
    assert register_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to register point data type; error message: " \
                                              % register_resp['status_info'][ 'message' ]

    # Using the registered type's ID, create a new set (and check that worked)
    # Endpoint: /newset
    type_id = register_resp[ 'type_id' ]
    set_id = "diagnostics_point_set_" + datetime.datetime.now().isoformat()
    new_set_resp = gpudb.new_set( type_id, set_id, "" ) # no parent set ID
    assert new_set_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to create point set; error message: %s" \
                                              % new_set_resp['status_info'][ 'message' ]

    # Add some data to the set in batches
    # Endpoint: /random
    count_1 = 2000
    param_map_1 = { "x": {"min": 0, "max": 42 }, "y": {"min": 0, "max": 42 } }
    random_resp = gpudb.random( set_id, count_1, param_map_1 )

    # Check that the first set of objects were generated successfully
    assert random_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to generate random points; error message: %s" \
                                              % random_resp['status_info'][ 'message' ]

    # Add another batch of data points to the same set, but at a different location
    # Endpoint: /random
    count_2 = 2000
    param_map_2 = { "x": {"min": -50, "max": -20 }, "y": {"min": -50, "max": -20 } }
    random_resp = gpudb.random( set_id, count_2, param_map_2 )

    # Check that the first set of objects were generated successfully
    assert random_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to generate random points; error message: %s" \
                                              % random_resp['status_info'][ 'message' ]

    # Check the total size of the set is as intended
    # Endpoint: /status
    total_size = count_1 + count_2
    status_resp = gpudb.status( set_id )
    assert status_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to check status of set; error message: %s" \
                                              % status_resp['status_info'][ 'message' ]
    assert status_resp[ 'total_size' ] == total_size, "Error: Total size of set is not as expected. Set size = %s, expected size = %s" % ( status_resp[ 'total_size' ], total_size )

    # Query chaining: do two filters one after another, get final count
    # Do a similar query with select, check count against the chained queries

    # Bounding box: x within [10, 20] and y within [10, 20]
    # Endpoint: /boundingbox
    bbox_set_id = "diagnostics_bbox_result_" + datetime.datetime.now().isoformat()
    bbox_resp = gpudb.bounding_box( 10, 20, 10, 20, "x", "y", set_id, bbox_set_id )
    assert bbox_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform bounding box query; error message: %s" \
                                              % bbox_resp['status_info'][ 'message' ]

    # Filter by radius: 100km radius around (lon, lat) = (15, 15)
    # Endpoint: /filterbyradius
    fradius_set_id = "diagnostics_filter_by_radius_result_" + datetime.datetime.now().isoformat()
    fradius_resp = gpudb.filter_by_radius( bbox_set_id, "x", "y", 15, 15, 100000, fradius_set_id )
    assert fradius_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform filter by radius query; error message: %s" \
                                              % fradius_resp['status_info'][ 'message' ]

    # Do a select query with a predicate that should yield the same result
    # as the above chained queries
    # Select: ( (10 <= x) and (x <= 20) and (10 <= y) and (y <= 20) and (geodist(x, y, 15, 15) < 100000) )
    # Endpoint: /select
    select_set_id = "diagnostics_select_result_" + datetime.datetime.now().isoformat()
    predicate = "( (10 <= x) and (x <= 20) and (10 <= y) and (y <= 20) and (geodist(x, y, 15, 15) < 100000) )"
    select_resp = gpudb.select( set_id, select_set_id, predicate )
    assert select_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform select query; error message: %s" \
                                              % select_resp['status_info'][ 'message' ]
    assert select_resp[ 'count' ] == fradius_resp[ 'count' ], "Error: Mismatch in counts of select (%s) and chained queries (bounding box then filter by radius) (%s)" \
                                              % ( select_resp[ 'count' ], fradius_resp[ 'count' ] )

    # Delete a few objects and check the set size of the original set
    # Delete objects: Delte a few objects given a predicate
    # Endpoint: /selectdelete
    delete_predicate = "((15 <= x) and (x <= 18.5))"
    delete_resp = gpudb.select_delete( set_id, delete_predicate )
    assert delete_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform select delete operation; error message: %s" \
                                              % delete_resp['status_info'][ 'message' ]

    # Check that the size of the set has gone down
    # Statistics return the count as a default
    # Endpoint: /statistics
    new_size = total_size - delete_resp[ 'count' ]
    statistics_resp = gpudb.statistics( set_id, "x", "sum" )
    assert statistics_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform statistics operation; error message: %s" \
                                              % statistics_resp['status_info'][ 'message' ]
    assert statistics_resp[ 'stats' ][ 'count' ] == new_size, "Error: Mismatch in counts of set size (%s) and expected size (%s)" \
                                              % ( statistics_resp[ 'count' ], new_size )

    # Update a few objects and check the update was successful by doing a select
    # Update objects based on x, change the y value
    # Endpoing: /selectupdate
    update_predicate = "((-35 <= x) and (x <= -33.5))"
    update_resp = gpudb.select_update( set_id, update_predicate, {'y': "71"} )
    assert update_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform select update operation; error message: %s" \
                                              % update_resp['status_info'][ 'message' ]

    # Check that the selected objects' y values have been changed
    # Obtain the selected objects by performing a select query
    # Endpoint: /select
    select_set_id2 = "diagnostics_select_result_2_" + datetime.datetime.now().isoformat()
    select_resp1 = gpudb.select( set_id, select_set_id2, update_predicate )
    assert select_resp1['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform select operation; error message: %s" \
                                              % select_resp1['status_info'][ 'message' ]

    # Get all the objects in the resultant set that has the update y value
    # and check that it matches with the above count
    # Endpont: /select
    select_predicate = "(y == 71)"
    select_set_id3 = "diagnostics_select_result_3_" + datetime.datetime.now().isoformat()
    select_resp2 = gpudb.select( set_id, select_set_id3, select_predicate )
    assert select_resp2['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform select operation; error message: %s" \
                                              % select_resp2['status_info'][ 'message' ]
    # Now check that the counts match
    assert select_resp1[ 'count' ] == select_resp2[ 'count' ], "GPUdb failed in performing select update correctly; expected count is %s, but given count is %s" \
                                                              % ( select_resp1[ 'count' ], select_resp2[ 'count' ] )

    # Clear all the sets
    clear_resp = gpudb.clear( set_id )
    assert clear_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed in clearing set %s" % set_id
    clear_resp = gpudb.clear( bbox_set_id )
    assert clear_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed in clearing set %s" % bbox_set_id
    clear_resp = gpudb.clear( fradius_set_id )
    assert clear_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed in clearing set %s" % fradius_set_id
    clear_resp = gpudb.clear( select_set_id )
    assert clear_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed in clearing set %s" % select_set_id
    clear_resp = gpudb.clear( select_set_id2 )
    assert clear_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed in clearing set %s" % select_set_id2
    clear_resp = gpudb.clear( select_set_id3 )
    assert clear_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed in clearing set %s" % select_set_id3
def gpudb_ingestor_example():
    global gpudb_ingestor

    gpudb = GPUdb( encoding='BINARY', host = '', port = '9191')

    table_name = "test_ingest_table"
    # Clear table if exists
    gpudb.clear_table( table_name )

    # Create the table schema and the table
    table_type_schema_json = {
        "type": "record",
        "name": "ingest_test_type",
        "fields" :
            { "name" : "d1", "type": "double" },
            { "name" : "d2", "type": "double" },
            { "name" : "l", "type": "long" },
            { "name" : "s", "type": "string" }
    table_type_schema_str = json.dumps( table_type_schema_json )
    table_type_schema = schema.parse( table_type_schema_str )
    # Column names
    d1 = "d1"
    d2 = "d2"
    l  = "l"
    s  = "s"

    table_column_properties = {}

    type_id = gpudb.create_type( type_definition = table_type_schema_str,
                                 label = "",
                                 properties = table_column_properties )[ "type_id" ]
    gpudb.create_table( table_name = table_name,
                        type_id = type_id, )

    print "Table Name:", table_name

    # Instantiate a gpudb ingestor object
    batch_size = 7000
    options = {}
    # workers = None
    workers = GPUdbIngestor.WorkerList( gpudb )
    print "Workers: ", workers.worker_urls, "\n" 
    gpudb_ingestor = GPUdbIngestor( gpudb, table_name, batch_size, options, workers )

    # Generate records to insert
    num_batches =   10
    batch_size  = 10000
    num_pools = 5
    num_pool_batches = 3

    # Generate and insert data parallelly in a pool of 5
    for i in range(0, num_pool_batches):
        pool = Pool( processes = num_pools )
        results = pool.map_async( generate_and_insert_data, [[batch_size, num_batches]] * num_pools)
    # end multithreaded data generation and insertion

    # Flush the ingestor (must do this to actually insert the data)

    num_records = num_batches * batch_size * num_pools * num_pool_batches
    print "Total # objects inserted:", num_records
Ejemplo n.º 9
def diagnose_gpudb( argv ):
    Run a diagnostic test on GPUdb
      argv -- Command line arguments

    # Parse the command line arguments
    if ( len(sys.argv) == 1 ): # None provided
        # Print help message and quit
        print ( helpMessage )
        sys.exit( 2 )
    try: # Parse the command line arguments
        opts, args = getopt.getopt( sys.argv[1:], "hlg:p:v" )
    except getopt.GetoptError:
        print ( helpMessage )
        sys.exit( 2 )

    # Some default values
    GPUdb_IP   = '' # Run locally by default
    GPUdb_Port = '9191' # Default port
    isVerbose  = False

    # Parse the arguments
    for opt, arg in opts:
        if opt == '-h': # print usage and exit
            print ( helpMessage )
        if opt == '-l': # run gpudb on local machine
            isServer = False
        if opt == '-g': # run gpudb on a server gpudb at the specified IP address
            GPUdb_IP = arg
            set_id = "TwitterPointText" # Default set ID for server gpudb
        if opt == '-p': # run gpudb on a server gpudb at the specified port
            GPUdb_Port = arg
        if opt == '-v': # prints verbose messages (only the success message, really)
            isVerbose = True

    # Set up GPUdb with binary encoding
    gpudb = GPUdb( encoding='BINARY', host = GPUdb_IP, port = GPUdb_Port )

    # Create a data type
    point_schema_str = """{
                            }""".replace(' ','').replace('\n','')

    # Register the data type and ensure that it worked
    # Endpoint: /create/type
    create_resp = gpudb.create_type ( point_schema_str, "point_type" )
    assert create_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to create point data type; error message: " \
                                              % create_resp['status_info'][ 'message' ]

    # Using the registered type's ID, create a new set (and check that worked)
    # Endpoint: /create/table
    type_id = create_resp[ 'type_id' ]
    table_name = "diagnostics_point_set_" + datetime.datetime.now().isoformat()
    create_table_resp = gpudb.create_table( table_name, type_id ) # not a part of a collection
    assert create_table_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to create point table; error message: %s" \
                                              % create_table_resp['status_info'][ 'message' ]

    # Add some data to the set in batches
    # Endpoint: /insert/records/random
    count_1 = 2000
    param_map_1 = { "x": {"min": 0, "max": 42 }, "y": {"min": 0, "max": 42 } }
    random_resp = gpudb.insert_records_random( table_name, count_1, param_map_1 )

    # Check that the first set of objects were generated successfully
    assert random_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to generate random points; error message: %s" \
                                              % random_resp['status_info'][ 'message' ]

    # Add another batch of data points to the same set, but at a different location
    # Endpoint: /insert/records/random
    count_2 = 2000
    param_map_2 = { "x": {"min": -50, "max": -20 }, "y": {"min": -50, "max": -20 } }
    random_resp = gpudb.insert_records_random( table_name, count_2, param_map_2 )

    # Check that the first set of objects were generated successfully
    assert random_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to generate random points; error message: %s" \
                                              % random_resp['status_info'][ 'message' ]

    # Check the total size of the set is as intended
    # Endpoint: /show/table
    total_size = count_1 + count_2
    show_table_resp = gpudb.show_table( table_name, options = {"get_sizes": "true"} )
    assert show_table_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to check status of set; error message: %s" \
                                              % show_table_resp['status_info'][ 'message' ]
    assert show_table_resp[ 'total_size' ] == total_size, "Error: Total size of set is not as expected. Set size = %s, expected size = %s" % ( show_table_resp[ 'total_size' ], total_size )

    # Query chaining: do two filters one after another, get final count
    # Do a similar query with select, check count against the chained queries

    # Bounding box: x within [10, 20] and y within [10, 20]
    # Endpoint: /filter/bybox
    bbox_view_name = "diagnostics_bbox_result_" + datetime.datetime.now().isoformat()
    bbox_resp = gpudb.filter_by_box( table_name, bbox_view_name, "x", 10, 20, "y", 10, 20 )
    assert bbox_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform bounding box query; error message: %s" \
                                              % bbox_resp['status_info'][ 'message' ]

    # Filter by radius: 100km radius around (lon, lat) = (15, 15)
    # Endpoint: /filter/byradius
    fradius_view_name = "diagnostics_filter_by_radius_result_" + datetime.datetime.now().isoformat()
    fradius_resp = gpudb.filter_by_radius( bbox_view_name, fradius_view_name, "x", 15, "y", 15, 100000 )
    assert fradius_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform filter by radius query; error message: %s" \
                                              % fradius_resp['status_info'][ 'message' ]

    # Do a select query with a predicate that should yield the same result
    # as the above chained queries
    # Select: ( (10 <= x) and (x <= 20) and (10 <= y) and (y <= 20) and (geodist(x, y, 15, 15) < 100000) )
    # Endpoint: /filter
    filter_view_name = "diagnostics_filter_result_" + datetime.datetime.now().isoformat()
    predicate = "( (10 <= x) and (x <= 20) and (10 <= y) and (y <= 20) and (geodist(x, y, 15, 15) < 100000) )"
    filter_resp = gpudb.filter( table_name, filter_view_name, predicate )
    assert filter_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform filter query; error message: %s" \
                                              % filter_resp['status_info'][ 'message' ]
    assert filter_resp[ 'count' ] == fradius_resp[ 'count' ], "Error: Mismatch in counts of filter (%s) and chained queries (bounding box then filter by radius) (%s)" \
                                              % ( filter_resp[ 'count' ], fradius_resp[ 'count' ] )

    # Delete a few objects and check the set size of the original set
    # Delete objects: Delte a few objects given a predicate
    # Endpoint: /delete/records
    delete_expression = ["((15 <= x) and (x <= 18.5))"]
    delete_resp = gpudb.delete_records( table_name, delete_expression )
    assert delete_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform delete operation; error message: %s" \
                                              % delete_resp['status_info'][ 'message' ]

    # Check that the size of the set has gone down
    # Statistics return the count as a default
    # Endpoint: /aggregate/statistics
    new_size = total_size - delete_resp[ 'count_deleted' ]
    statistics_resp = gpudb.aggregate_statistics( table_name, "x", "count" )
    assert statistics_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform the statistics operation; error message: %s" \
                                              % statistics_resp['status_info'][ 'message' ]
    assert statistics_resp[ 'stats' ][ 'count' ] == new_size, "Error: Mismatch in counts of set size (%s) and expected size (%s)" \
                                              % ( statistics_resp[ 'count' ], new_size )

    # Update a few objects and check the update was successful by doing a select
    # Update objects based on x, change the y value
    # Endpoing: /update/records
    update_predicate = "((-35 <= x) and (x <= -33.5))"
    update_resp = gpudb.update_records( table_name, [ update_predicate ], [{'y': "71"}] )
    assert update_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform the update operation; error message: %s" \
                                              % update_resp['status_info'][ 'message' ]

    # Check that the selected objects' y values have been changed
    # Obtain the selected objects by performing a select query
    # Endpoint: /filter
    filter_view_name2 = "diagnostics_filter_result_2_" + datetime.datetime.now().isoformat()
    filter_resp1 = gpudb.filter( table_name, filter_view_name2, update_predicate )
    assert filter_resp1['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform filter operation; error message: %s" \
                                              % filter_resp1['status_info'][ 'message' ]

    # Get all the objects in the resultant set that has the update y value
    # and check that it matches with the above count
    # Endpont: /filter
    filter_expression = "(y == 71)"
    filter_view_name3 = "diagnostics_filter_result_3_" + datetime.datetime.now().isoformat()
    filter_resp2 = gpudb.filter( table_name, filter_view_name3, filter_expression )
    assert filter_resp2['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform filter operation; error message: %s" \
                                              % filter_resp2['status_info'][ 'message' ]
    # Now check that the counts match
    assert filter_resp1[ 'count' ] == filter_resp2[ 'count' ], "GPUdb failed in performing update correctly; expected count is %s, but given count is %s" \
                                                              % ( filter_resp1[ 'count' ], filter_resp2[ 'count' ] )

    # Clear all the tables (dropping the original table also drops views)
    clear_resp = gpudb.clear_table( table_name )
    assert clear_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed in clearing set %s" % table_name

    if isVerbose:
        print ( "The diagnostics tests succeeded!" )
import sys
# Traffic capture packages
import dpkt
#from scapy.all import sr1,IP,ICMP,rdpcap
from scapy.all import *

# GPUdb packages
from gpudb import GPUdb
import uuid #for generating uuids

gpudb = GPUdb(encoding='BINARY',gpudb_ip='',gpudb_port='9191')
# Add more fileds as needed for the analysis  
type_definition = """{

# TODO : Pass pcap file as input
def print_packet_stats():