Example #1
0
 def __init__(self,
              spec_dir,
              benchmark_hosts,
              result_hosts,
              log,
              fail_if,
              sample_mode):
     self.benchmark_hosts = benchmark_hosts
     self.sample_mode = sample_mode
     self.spec_dir = spec_dir
     self.client = clients.client(benchmark_hosts)
     self.result_client = clients.client(result_hosts)
     self.server_version_info = aio.run(self.client.get_server_version)
     self.server_version = parse_version(self.server_version_info['number'])
     self.log = log
     self.create_result = partial(
         Result,
         version_info=self.server_version_info
     )
     if fail_if:
         self.fail_if = partial(eval_fail_if, fail_if)
     else:
         self.fail_if = lambda x: None
     if result_hosts:
         self.process_result = _result_to_crate(self.log, self.result_client)
     else:
         self.process_result = log.result
Example #2
0
 def __init__(self,
              spec_dir,
              benchmark_hosts,
              result_hosts,
              log,
              fail_if,
              sample_mode):
     self.benchmark_hosts = benchmark_hosts
     self.sample_mode = sample_mode
     self.spec_dir = spec_dir
     self.client = clients.client(benchmark_hosts)
     self.result_client = clients.client(result_hosts)
     self.server_version_info = aio.run(self.client.get_server_version)
     self.server_version = parse_version(self.server_version_info['number'])
     self.log = log
     self.create_result = partial(
         Result,
         version_info=self.server_version_info
     )
     if fail_if:
         self.fail_if = partial(eval_fail_if, fail_if)
     else:
         self.fail_if = lambda x: None
     if result_hosts:
         self.process_result = _result_to_crate(self.log, self.result_client)
     else:
         self.process_result = log.result
Example #3
0
    def test_reindex(self):
        crate_v3 = CrateNode(crate_dir=get_crate('3.x.x'),
                             keep_data=True,
                             settings=self.crate_settings)
        self._to_stop.append(crate_v3)
        crate_v3.start()
        with client(crate_v3.http_url) as c:
            aio.run(c.execute, "create table t (x int)")
            args = (
                (1, ),
                (2, ),
                (3, ),
            )
            aio.run(c.execute_many, "insert into t (x) values (?)", args)
        crate_v3.stop()
        self._to_stop.remove(crate_v3)

        crate_v4 = CrateNode(crate_dir=get_crate('4.0.3'),
                             keep_data=True,
                             settings=self.crate_settings)
        self._to_stop.append(crate_v4)
        crate_v4.start()
        reindex(crate_v4.http_url)
        with client(crate_v4.http_url) as c:
            result = aio.run(
                c.execute,
                "SELECT version FROM information_schema.tables WHERE table_name = 't'"
            )
            version = result['rows'][0][0]
            self.assertEqual(version, {'upgraded': None, 'created': '4.0.3'})

            cnt = aio.run(c.execute, 'SELECT count(*) FROM t')['rows'][0][0]
            self.assertEqual(cnt, 3)
Example #4
0
def teardown(*args):
    try:
        with client(node.http_url) as c:
            aio.run(c.execute, 'drop table x.demo')
            aio.run(c.execute, 'drop blob table blobtable')
    finally:
        node.stop()
Example #5
0
def insert_from_sql(src_uri=None,
                    query=None,
                    fetch_size=100,
                    concurrency=25,
                    table=None,
                    hosts=None,
                    output_fmt=None):
    """Insert data read from another SQL source into table."""

    stats = Stats()
    with clients.client(hosts, concurrency=concurrency) as client:
        f = partial(aio.measure, stats, client.execute_many)
        try:
            aio.run(
                async_insert_from_sql,
                src_uri,
                concurrency,
                query,
                fetch_size,
                table,
                f
            )
        except clients.SqlException as e:
            raise SystemExit(str(e))
    try:
        print(format_stats(stats.get(), output_fmt))
    except KeyError:
        if not stats.sampler.values:
            raise SystemExit('No data read from source')
        raise
Example #6
0
def setup(*args):
    with client(node.http_url) as c:
        aio.run(
            c.execute,
            'create table x.demo (id int, name string, country string) \
            with (number_of_replicas = 0)'
        )
        aio.run(c.execute, 'create table y.demo (name text) with (number_of_replicas = 0)')
        aio.run(c.execute, 'create blob table blobtable with (number_of_replicas = 0)')
Example #7
0
def insert_json(table=None,
                bulk_size=1000,
                concurrency=25,
                hosts=None,
                infile=None,
                output_fmt=None):
    """Insert JSON lines from a file or stdin into a CrateDB cluster.

    If no hosts are specified the statements will be printed.

    Args:
        table: Target table name.
        bulk_size: Bulk size of the insert statements.
        concurrency: Number of operations to run concurrently.
        hosts: hostname:port pairs of the Crate nodes
    """
    if not hosts:
        return print_only(infile, table)

    queries = (to_insert(table, d) for d in dicts_from_lines(infile))
    bulk_queries = as_bulk_queries(queries, bulk_size)
    print('Executing inserts: bulk_size={} concurrency={}'.format(
        bulk_size, concurrency),
          file=sys.stderr)

    stats = Stats()
    with clients.client(hosts, concurrency=concurrency) as client:
        f = partial(aio.measure, stats, client.execute_many)
        try:
            aio.run_many(f, bulk_queries, concurrency)
        except clients.SqlException as e:
            raise SystemExit(str(e))
    try:
        print(format_stats(stats.get(), output_fmt))
    except KeyError:
        if not stats.sampler.values:
            raise SystemExit('No data received via stdin')
        raise
Example #8
0
def insert_json(table=None,
                bulk_size=1000,
                concurrency=25,
                hosts=None,
                output_fmt=None):
    """Insert JSON lines fed into stdin into a Crate cluster.

    If no hosts are specified the statements will be printed.

    Args:
        table: Target table name.
        bulk_size: Bulk size of the insert statements.
        concurrency: Number of operations to run concurrently.
        hosts: hostname:port pairs of the Crate nodes
    """
    if not hosts:
        return print_only(table)

    queries = (to_insert(table, d) for d in dicts_from_stdin())
    bulk_queries = as_bulk_queries(queries, bulk_size)
    print('Executing inserts: bulk_size={} concurrency={}'.format(
        bulk_size, concurrency), file=sys.stderr)

    stats = Stats()
    with clients.client(hosts, concurrency=concurrency) as client:
        f = partial(aio.measure, stats, client.execute_many)
        try:
            aio.run_many(f, bulk_queries, concurrency)
        except clients.SqlException as e:
            raise SystemExit(str(e))
    try:
        print(format_stats(stats.get(), output_fmt))
    except KeyError:
        if not stats.sampler.values:
            raise SystemExit('No data received via stdin')
        raise
Example #9
0
File: reindex.py Project: seut/cr8
def reindex(hosts=None):
    with clients.client(hosts) as client:
        run(_async_reindex, client)
Example #10
0
 def __init__(self, hosts, concurrency, sample_mode):
     self.concurrency = concurrency
     self.client = client(hosts, concurrency=concurrency)
     self.sampler = get_sampler(sample_mode)
Example #11
0
def insert_fake_data(hosts=None,
                     table=None,
                     num_records=1e5,
                     bulk_size=1000,
                     concurrency=25,
                     mapping_file=None):
    """Generate random data and insert it into a table.

    This will read the table schema and then find suitable random data providers.
    Which provider is choosen depends on the column name and data type.

    Example:

        A column named `name` will map to the `name` provider.
        A column named `x` of type int will map to `random_int` because there
        is no `x` provider.

    Available providers are listed here:
        https://faker.readthedocs.io/en/latest/providers.html

        Additional providers:
        - auto_inc:
            Returns unique incrementing numbers.
            Automatically used for columns named "id" of type int or long
        - geo_point
            Returns [<lon>, <lat>]
            Automatically used for columns of type geo_point

    Args:
        hosts: <host>:[<port>] of the Crate node
        table: The table name into which the data should be inserted.
            Either fully qualified: `<schema>.<table>` or just `<table>`
        num_records: Number of records to insert.
            Usually a number but expressions like `1e4` work as well.
        bulk_size: The bulk size of the insert statements.
        concurrency: How many operations to run concurrently.
        mapping_file: A JSON file that defines a mapping from column name to
            fake-factory provider.
            The format is as follows:
            {
                "column_name": ["provider_with_args", ["arg1", "arg"]],
                "x": ["provider_with_args", ["arg1"]],
                "y": "provider_without_args"
            }
    """
    with clients.client(hosts, concurrency=1) as client:
        schema, table_name = parse_table(table)
        columns = retrieve_columns(client, schema, table_name)
    if not columns:
        sys.exit('Could not find columns for table "{}"'.format(table))
    print('Found schema: ')
    print(json.dumps(columns, sort_keys=True, indent=4))
    mapping = None
    if mapping_file:
        mapping = json.load(mapping_file)

    bulk_size = min(num_records, bulk_size)
    num_inserts = int(math.ceil(num_records / bulk_size))

    gen_row = create_row_generator(columns, mapping)

    stmt = to_insert('"{schema}"."{table_name}"'.format(**locals()),
                     columns)[0]
    print('Using insert statement: ')
    print(stmt)

    print('Will make {} requests with a bulk size of {}'.format(
        num_inserts, bulk_size))

    print('Generating fake data and executing inserts')
    q = asyncio.Queue(maxsize=concurrency)
    with clients.client(hosts, concurrency=concurrency) as client:
        active = [True]

        def stop():
            asyncio.ensure_future(q.put(None))
            active.clear()
            loop.remove_signal_handler(signal.SIGINT)

        if sys.platform != 'win32':
            loop.add_signal_handler(signal.SIGINT, stop)
        bulk_seq = _bulk_size_generator(num_records, bulk_size, active)
        with ThreadPoolExecutor() as e:
            tasks = asyncio.gather(
                _gen_data_and_insert(q, e, client, stmt, gen_row, bulk_seq),
                consume(q, total=num_inserts))
            loop.run_until_complete(tasks)
Example #12
0
 def __init__(self, hosts, concurrency, sample_mode):
     self.concurrency = concurrency
     self.client = client(hosts, concurrency=concurrency)
     self.sampler = get_sampler(sample_mode)
Example #13
0
def insert_fake_data(hosts=None,
                     table=None,
                     num_records=1e5,
                     bulk_size=1000,
                     concurrency=25,
                     mapping_file=None):
    """Generate random data and insert it into a table.

    This will read the table schema and then find suitable random data providers.
    Which provider is choosen depends on the column name and data type.

    Example:

        A column named `name` will map to the `name` provider.
        A column named `x` of type int will map to `random_int` because there
        is no `x` provider.

    Available providers are listed here:
        https://faker.readthedocs.io/en/latest/providers.html

        Additional providers:
        - auto_inc:
            Returns unique incrementing numbers.
            Automatically used for columns named "id" of type int or long
        - geo_point
            Returns [<lon>, <lat>]
            Automatically used for columns of type geo_point

    Args:
        hosts: <host>:[<port>] of the Crate node
        table: The table name into which the data should be inserted.
            Either fully qualified: `<schema>.<table>` or just `<table>`
        num_records: Number of records to insert.
            Usually a number but expressions like `1e4` work as well.
        bulk_size: The bulk size of the insert statements.
        concurrency: How many operations to run concurrently.
        mapping_file: A JSON file that defines a mapping from column name to
            fake-factory provider.
            The format is as follows:
            {
                "column_name": ["provider_with_args", ["arg1", "arg"]],
                "x": ["provider_with_args", ["arg1"]],
                "y": "provider_without_args"
            }
    """
    with clients.client(hosts, concurrency=1) as client:
        schema, table_name = parse_table(table)
        columns = retrieve_columns(client, schema, table_name)
    if not columns:
        sys.exit('Could not find columns for table "{}"'.format(table))
    print('Found schema: ')
    print(json.dumps(columns, sort_keys=True, indent=4))
    mapping = None
    if mapping_file:
        mapping = json.load(mapping_file)

    bulk_size = min(num_records, bulk_size)
    num_inserts = int(math.ceil(num_records / bulk_size))

    gen_row = create_row_generator(columns, mapping)

    stmt = to_insert('"{schema}"."{table_name}"'.format(**locals()), columns)[0]
    print('Using insert statement: ')
    print(stmt)

    print('Will make {} requests with a bulk size of {}'.format(
        num_inserts, bulk_size))

    print('Generating fake data and executing inserts')
    q = asyncio.Queue(maxsize=concurrency)
    with clients.client(hosts, concurrency=concurrency) as client:
        active = [True]

        def stop():
            asyncio.ensure_future(q.put(None))
            active.clear()
            loop.remove_signal_handler(signal.SIGINT)
        if sys.platform != 'win32':
            loop.add_signal_handler(signal.SIGINT, stop)
        bulk_seq = _bulk_size_generator(num_records, bulk_size, active)
        with ThreadPoolExecutor() as e:
            tasks = asyncio.gather(
                _gen_data_and_insert(q, e, client, stmt, gen_row, bulk_seq),
                consume(q, total=num_inserts)
            )
            loop.run_until_complete(tasks)