def __init__(self, settings, workload, prefix=None): super(Latency, self).__init__(settings) self.clients = [] for bucket in self.get_buckets(): client = CBGen(bucket=bucket, host=settings.master_node, username=bucket, password=settings.bucket_password) self.clients.append((bucket, client)) self.existing_keys = ExistingKey(workload.working_set, workload.working_set_access, prefix=prefix) self.new_keys = NewKey(prefix=prefix, expiration=workload.expiration) self.keys_for_removal = KeyForRemoval(prefix=prefix) if not hasattr(workload, 'doc_gen') or workload.doc_gen == 'old': self.new_docs = NewDocument(workload.size) elif workload.doc_gen == 'new': self.new_docs = NewNestedDocument(workload.size) elif workload.doc_gen == 'reverse_lookup': self.new_docs = ReverseLookupDocument(workload.size, workload.doc_partitions) elif workload.doc_gen == 'reverse_lookup_array_indexing': self.new_docs = ReverseLookupDocumentArrayIndexing( workload.size, workload.doc_partitions, workload.items) self.items = workload.items self.n1ql_op = workload.n1ql_op
def __init__(self, workload_settings, target_settings, shutdown_event): super(N1QLWorker, self).__init__(workload_settings, target_settings, shutdown_event) self.new_queries = N1QLQueryGen(workload_settings.n1ql_queries) self.total_workers = self.ws.n1ql_workers self.throughput = self.ws.n1ql_throughput self.name = 'n1ql-worker' host, port = self.ts.node.split(':') params = {'bucket': self.ts.bucket, 'host': host, 'port': port, 'username': self.ts.bucket, 'password': self.ts.password} self.existing_keys = ExistingKey(self.ws.working_set, self.ws.working_set_access, 'n1ql') self.new_keys = NewKey('n1ql', self.ws.expiration) self.keys_for_removal = KeyForRemoval('n1ql') self.keys_for_casupdate = KeyForCASUpdate(self.total_workers, self.ws.working_set, self.ws.working_set_access, 'n1ql') if self.ws.doc_gen == 'merge': self.docs = MergeDocument(self.ws.size, self.ws.doc_partitions, False) elif self.ws.doc_gen == 'reverse_lookup': self.docs = ReverseLookupDocument(self.ws.size, self.ws.doc_partitions, False) self.cb = N1QLGen(**params)
def init_keys(self): self.existing_keys = ExistingKey(self.ws.working_set, self.ws.working_set_access, prefix='n1ql') self.new_keys = NewKey(prefix='n1ql', expiration=self.ws.expiration) self.keys_for_casupdate = KeyForCASUpdate(self.total_workers, self.ws.working_set, self.ws.working_set_access, prefix='n1ql')
class SpringLatency(Latency): COLLECTOR = "spring_latency" METRICS = ("latency_set", "latency_get") def __init__(self, settings, workload, prefix=None): super(Latency, self).__init__(settings) self.clients = [] for bucket in self.get_buckets(): client = CBGen(bucket=bucket, host=settings.master_node, username=bucket, password=settings.bucket_password) self.clients.append((bucket, client)) self.existing_keys = ExistingKey(workload.working_set, workload.working_set_access, prefix=prefix) self.new_keys = NewKey(prefix=prefix, expiration=workload.expiration) self.keys_for_removal = KeyForRemoval(prefix=prefix) if not hasattr(workload, 'doc_gen') or workload.doc_gen == 'old': self.new_docs = NewDocument(workload.size) elif workload.doc_gen == 'new': self.new_docs = NewNestedDocument(workload.size) elif workload.doc_gen == 'reverse_lookup': self.new_docs = ReverseLookupDocument(workload.size, workload.doc_partitions) elif workload.doc_gen == 'reverse_lookup_array_indexing': self.new_docs = ReverseLookupDocumentArrayIndexing( workload.size, workload.doc_partitions, workload.items) self.items = workload.items self.n1ql_op = workload.n1ql_op def measure(self, client, metric, bucket): key = self.existing_keys.next(curr_items=self.items, curr_deletes=0) doc = self.new_docs.next(key) t0 = time() if metric == "latency_set": client.create(key, doc) elif metric == "latency_get": client.read(key) elif metric == "latency_cas": client.cas(key, doc) return 1000 * (time() - t0) # Latency in ms def sample(self): for bucket, client in self.clients: samples = {} for metric in self.METRICS: samples[metric] = self.measure(client, metric, bucket) self.store.append(samples, cluster=self.cluster, bucket=bucket, collector=self.COLLECTOR)
def __init__(self, workload_settings, target_settings, shutdown_event): super(N1QLWorker, self).__init__(workload_settings, target_settings, shutdown_event) self.new_queries = N1QLQueryGen(workload_settings.n1ql_queries) self.total_workers = self.ws.n1ql_workers self.throughput = self.ws.n1ql_throughput self.name = 'n1ql-worker' host, port = self.ts.node.split(':') bucket = self.ts.bucket if workload_settings.n1ql_op == 'ryow': bucket += '?fetch_mutation_tokens=true' params = {'bucket': bucket, 'host': host, 'port': port, 'username': self.ts.bucket, 'password': self.ts.password} self.existing_keys = ExistingKey(self.ws.working_set, self.ws.working_set_access, 'n1ql') self.new_keys = NewKey('n1ql', self.ws.expiration) self.keys_for_removal = KeyForRemoval('n1ql') self.keys_for_casupdate = KeyForCASUpdate(self.total_workers, self.ws.working_set, self.ws.working_set_access, 'n1ql') if self.ws.doc_gen == 'merge': self.docs = MergeDocument(self.ws.size, self.ws.doc_partitions, False) elif self.ws.doc_gen == 'reverse_lookup': self.docs = ReverseLookupDocument(self.ws.size, self.ws.doc_partitions, False) elif self.ws.doc_gen == 'reverse_lookup_array_indexing': if self.ws.updates: self.docs = ReverseLookupDocumentArrayIndexing( self.ws.size, self.ws.doc_partitions, self.ws.items, delta=random.randint(0, 10)) else: self.docs = ReverseLookupDocumentArrayIndexing( self.ws.size, self.ws.doc_partitions, self.ws.items) self.cb = N1QLGen(**params)
def __init__(self, settings, workload, prefix=None): super(Latency, self).__init__(settings) self.clients = [] for bucket in self.get_buckets(): client = CBGen(bucket=bucket, host=settings.master_node, username=bucket, password=settings.bucket_password) self.clients.append((bucket, client)) self.existing_keys = ExistingKey(workload.working_set, workload.working_set_access, prefix=prefix) if not hasattr(workload, 'doc_gen') or workload.doc_gen == 'old': self.new_docs = NewDocument(workload.size) else: self.new_docs = NewNestedDocument(workload.size) self.items = workload.items
class SpringLatency(Latency): COLLECTOR = "spring_latency" METRICS = ("latency_set", "latency_get") def __init__(self, settings, workload, prefix=None): super(Latency, self).__init__(settings) self.clients = [] for bucket in self.get_buckets(): client = CBGen(bucket=bucket, host=settings.master_node, username=bucket, password=settings.bucket_password) self.clients.append((bucket, client)) self.existing_keys = ExistingKey(workload.working_set, workload.working_set_access, prefix=prefix) if not hasattr(workload, 'doc_gen') or workload.doc_gen == 'old': self.new_docs = NewDocument(workload.size) else: self.new_docs = NewNestedDocument(workload.size) self.items = workload.items def measure(self, client, metric): key = self.existing_keys.next(curr_items=self.items, curr_deletes=0) doc = self.new_docs.next(key) t0 = time() if metric == "latency_set": client.create(key, doc) elif metric == "latency_get": client.read(key) elif metric == "latency_cas": client.cas(key, doc) return 1000 * (time() - t0) # Latency in ms def sample(self): for bucket, client in self.clients: samples = {} for metric in self.METRICS: samples[metric] = self.measure(client, metric) self.store.append(samples, cluster=self.cluster, bucket=bucket, collector=self.COLLECTOR)
def __init__(self, settings, workload, prefix=None): super(Latency, self).__init__(settings) self.clients = [] for bucket in self.get_buckets(): client = CBGen(bucket=bucket, host=settings.master_node, username=bucket, password=settings.bucket_password) self.clients.append((bucket, client)) self.existing_keys = ExistingKey(workload.working_set, workload.working_set_access, prefix=prefix) self.new_keys = NewKey(prefix=prefix, expiration=workload.expiration) self.keys_for_removal = KeyForRemoval(prefix=prefix) if not hasattr(workload, 'doc_gen') or workload.doc_gen == 'basic': self.new_docs = Document(workload.size) elif workload.doc_gen == 'nested': self.new_docs = NestedDocument(workload.size) self.items = workload.items
class N1QLWorker(Worker): def __init__(self, workload_settings, target_settings, shutdown_event): super(N1QLWorker, self).__init__(workload_settings, target_settings, shutdown_event) self.new_queries = N1QLQueryGen(workload_settings.n1ql_queries) self.total_workers = self.ws.n1ql_workers self.throughput = self.ws.n1ql_throughput self.name = 'n1ql-worker' host, port = self.ts.node.split(':') bucket = self.ts.bucket if workload_settings.n1ql_op == 'ryow': bucket += '?fetch_mutation_tokens=true' params = {'bucket': bucket, 'host': host, 'port': port, 'username': self.ts.bucket, 'password': self.ts.password} self.existing_keys = ExistingKey(self.ws.working_set, self.ws.working_set_access, 'n1ql') self.new_keys = NewKey('n1ql', self.ws.expiration) self.keys_for_removal = KeyForRemoval('n1ql') self.keys_for_casupdate = KeyForCASUpdate(self.total_workers, self.ws.working_set, self.ws.working_set_access, 'n1ql') if self.ws.doc_gen == 'merge': self.docs = MergeDocument(self.ws.size, self.ws.doc_partitions, False) elif self.ws.doc_gen == 'reverse_lookup': self.docs = ReverseLookupDocument(self.ws.size, self.ws.doc_partitions, False) elif self.ws.doc_gen == 'reverse_lookup_array_indexing': if self.ws.updates: self.docs = ReverseLookupDocumentArrayIndexing( self.ws.size, self.ws.doc_partitions, self.ws.items, delta=random.randint(0, 10)) else: self.docs = ReverseLookupDocumentArrayIndexing( self.ws.size, self.ws.doc_partitions, self.ws.items) self.cb = N1QLGen(**params) @with_sleep def do_batch(self): if self.ws.n1ql_op == 'read': curr_items_spot = \ self.curr_items.value - self.ws.creates * self.ws.workers deleted_spot = \ self.deleted_items.value + self.ws.deletes * self.ws.workers for _ in xrange(self.BATCH_SIZE): key = self.existing_keys.next(curr_items_spot, deleted_spot) doc = self.docs.next(key) doc['key'] = key doc['bucket'] = self.ts.bucket ddoc_name, view_name, query = self.new_queries.next(doc) self.cb.query(ddoc_name, view_name, query=query) return curr_items_tmp = curr_items_spot = self.curr_items.value if self.ws.n1ql_op == 'create': with self.lock: self.curr_items.value += self.BATCH_SIZE curr_items_tmp = self.curr_items.value - self.BATCH_SIZE curr_items_spot = (curr_items_tmp - self.BATCH_SIZE * self.total_workers) deleted_items_tmp = deleted_spot = 0 if self.ws.n1ql_op == 'delete': with self.lock: self.deleted_items.value += self.BATCH_SIZE deleted_items_tmp = self.deleted_items.value - self.BATCH_SIZE deleted_spot = (deleted_items_tmp + self.BATCH_SIZE * self.total_workers) deleted_capped_items_tmp = deleted_capped_spot = 0 if self.ws.n1ql_op == 'rangedelete': with self.lock: self.deleted_capped_items.value += self.BATCH_SIZE deleted_capped_items_tmp = self.deleted_capped_items.value - self.BATCH_SIZE deleted_capped_spot = (deleted_capped_items_tmp + self.BATCH_SIZE * self.total_workers) casupdated_items_tmp = casupdated_spot = 0 if self.ws.n1ql_op == 'update': with self.lock: self.casupdated_items.value += self.BATCH_SIZE casupdated_items_tmp = self.casupdated_items.value - self.BATCH_SIZE casupdated_spot = (casupdated_items_tmp + self.BATCH_SIZE * self.total_workers) if self.ws.n1ql_op == 'create': for _ in xrange(self.BATCH_SIZE): curr_items_tmp += 1 key, ttl = self.new_keys.next(curr_items_tmp) doc = self.docs.next(key) doc['key'] = key doc['bucket'] = self.ts.bucket ddoc_name, view_name, query = self.new_queries.next(doc) self.cb.query(ddoc_name, view_name, query=query) elif self.ws.n1ql_op == 'delete': for _ in xrange(self.BATCH_SIZE): deleted_items_tmp += 1 key = self.keys_for_removal.next(deleted_items_tmp) doc = self.docs.next(key) doc['key'] = key doc['bucket'] = self.ts.bucket ddoc_name, view_name, query = self.new_queries.next(doc) self.cb.query(ddoc_name, view_name, query=query) elif self.ws.n1ql_op == 'update' or self.ws.n1ql_op == 'lookupupdate': for _ in xrange(self.BATCH_SIZE): key = self.keys_for_casupdate.next(self.sid, curr_items_spot, deleted_spot) doc = self.docs.next(key) doc['key'] = key doc['bucket'] = self.ts.bucket ddoc_name, view_name, query = self.new_queries.next(doc) self.cb.query(ddoc_name, view_name, query=query) elif self.ws.n1ql_op == 'ryow': for _ in xrange(self.BATCH_SIZE): query = self.ws.n1ql_queries[0]['statement'][1:-1] if self.ws.n1ql_queries[0]['prepared'] == "singleton_unique_lookup": by_key = 'email' elif self.ws.n1ql_queries[0]['prepared'] == "range_scan": by_key = 'capped_small' else: logger.error('n1ql_queries {} not defined'.format(self.ws.n1ql_queries)) key1 = self.keys_for_casupdate.next(self.sid, curr_items_spot, deleted_spot) doc1 = self.docs.next(key1) key2 = self.keys_for_casupdate.next(self.sid, curr_items_spot, deleted_spot) doc2 = self.docs.next(key2) rvs = self.cb.client.upsert_multi({key1: doc2, key2: doc1}) # This is a part of requirements: # Each n1ql worker sleeps for 1 seconds. time.sleep(float(self.ws.n1ql_queries[0]['time_sleep'])) ms = MutationState() ms.add_results(*rvs.values()) nq = N1QLQuery(query.format(doc2[by_key])) nq.consistent_with(ms) len(list(self.cb.client.n1ql_query(nq))) elif self.ws.n1ql_op == 'rangeupdate': for _ in xrange(self.BATCH_SIZE): key = self.keys_for_casupdate.next(self.sid, curr_items_spot, deleted_spot) doc = self.docs.next(key) doc['key'] = key doc['bucket'] = self.ts.bucket ddoc_name, view_name, query = self.new_queries.next(doc) self.cb.query(ddoc_name, view_name, query=query) elif self.ws.n1ql_op == 'rangedelete': for _ in xrange(self.BATCH_SIZE): doc = {} doc['capped_small'] = "n1ql-_100_" + str(deleted_capped_items_tmp) ddoc_name, view_name, query = self.new_queries.next(doc) self.cb.query(ddoc_name, view_name, query=query) deleted_capped_items_tmp += 1 elif self.ws.n1ql_op == 'merge': #run select * workload for merge for _ in xrange(self.BATCH_SIZE): key = self.existing_keys.next(curr_items_spot, deleted_spot) doc = self.docs.next(key) doc['key'] = key doc['bucket'] = self.ts.bucket ddoc_name, view_name, query = self.new_queries.next(doc) query['statement'] = "SELECT * FROM `bucket-1` USE KEYS[$1];" query['args'] = "[\"{key}\"]".format(**doc) del query['prepared'] self.cb.query(ddoc_name, view_name, query=query) def run(self, sid, lock, curr_queries, curr_items, deleted_items, casupdated_items, deleted_capped_items): self.cb.start_updater() if self.throughput < float('inf'): self.target_time = float(self.BATCH_SIZE) * self.total_workers / \ self.throughput else: self.target_time = None self.lock = lock self.sid = sid self.curr_items = curr_items self.deleted_items = deleted_items self.deleted_capped_items = deleted_capped_items self.casupdated_items = casupdated_items self.curr_queries = curr_queries try: logger.info('Started: {}-{}'.format(self.name, self.sid)) while curr_queries.value < self.ws.ops and not self.time_to_stop(): with self.lock: curr_queries.value += self.BATCH_SIZE self.do_batch() self.report_progress(curr_queries.value) except (KeyboardInterrupt, ValueFormatError, AttributeError) as e: logger.info('Interrupted: {}-{}-{}'.format(self.name, self.sid, e)) else: if self.fallingBehindCount > 0: logger.info('Worker {0} fell behind {1} times.'. format(self.name, self.fallingBehindCount)) logger.info('Finished: {}-{}'.format(self.name, self.sid))
def __init__(self, workload_settings, target_settings, shutdown_event=None): self.ws = workload_settings self.ts = target_settings self.shutdown_event = shutdown_event logger.setLevel(logging.INFO) self.existing_keys = ExistingKey(self.ws.working_set, self.ws.working_set_access, self.ts.prefix) self.new_keys = NewKey(self.ts.prefix, self.ws.expiration) self.keys_for_removal = KeyForRemoval(self.ts.prefix) if not hasattr(self.ws, 'doc_gen') or self.ws.doc_gen == 'old': extra_fields = False if (hasattr(self.ws, 'extra_doc_fields') and self.ws['extra_doc_fields'] == 'yes'): extra_fields = True self.docs = NewDocument(self.ws.size, extra_fields) elif self.ws.doc_gen == 'new': self.docs = NewNestedDocument(self.ws.size) elif self.ws.doc_gen == 'merge': isRandom = True if self.ts.prefix == 'n1ql': isRandom = False self.docs = MergeDocument(self.ws.size, self.ws.doc_partitions, isRandom) elif self.ws.doc_gen == 'reverse_lookup': isRandom = True if self.ts.prefix == 'n1ql': isRandom = False self.docs = ReverseLookupDocument(self.ws.size, self.ws.doc_partitions, isRandom) elif self.ws.doc_gen == 'reverse_lookup_array_indexing': isRandom = True if self.ts.prefix == 'n1ql': isRandom = False if self.ws.updates: # plus 10 to all values in array when updating doc self.docs = ReverseLookupDocumentArrayIndexing( self.ws.size, self.ws.doc_partitions, self.ws.items, delta=random.randint(0, 10)) else: self.docs = ReverseLookupDocumentArrayIndexing( self.ws.size, self.ws.doc_partitions, self.ws.items) elif self.ws.doc_gen == 'spatial': self.docs = NewDocumentFromSpatialFile( self.ws.spatial.data, self.ws.spatial.dimensionality) elif self.ws.doc_gen == 'large_subdoc': self.docs = NewLargeDocument(self.ws.size) self.next_report = 0.05 # report after every 5% of completion host, port = self.ts.node.split(':') # Only FTS uses proxyPort and authless bucket right now. # Instead of jumping hoops to specify proxyPort in target # iterator/settings, which only passes down very specific attributes, # just detect fts instead. The following does not work with # authless bucket. FTS's worker does its own Couchbase.connect if not (hasattr(self.ws, "fts") and hasattr( self.ws.fts, "doc_database_url")): # default sasl bucket self.init_db({'bucket': self.ts.bucket, 'host': host, 'port': port, 'username': self.ts.bucket, 'password': self.ts.password}) self.fallingBehindCount = 0
class N1QLWorker(Worker): NAME = 'n1ql-worker' def __init__(self, workload_settings, target_settings, shutdown_event): self.new_queries = N1QLQueryGen(workload_settings.n1ql_queries) self.total_workers = workload_settings.n1ql_workers self.throughput = workload_settings.n1ql_throughput self.reservoir = Reservoir(num_workers=workload_settings.n1ql_workers) super(N1QLWorker, self).__init__(workload_settings, target_settings, shutdown_event) def init_keys(self): self.existing_keys = ExistingKey(self.ws.working_set, self.ws.working_set_access, prefix='n1ql') self.new_keys = NewKey(prefix='n1ql', expiration=self.ws.expiration) self.keys_for_casupdate = KeyForCASUpdate(self.total_workers, self.ws.working_set, self.ws.working_set_access, prefix='n1ql') def init_docs(self): if self.ws.doc_gen == 'reverse_lookup': self.docs = ReverseLookupDocument(self.ws.size, prefix='n1ql') elif self.ws.doc_gen == 'reverse_range_lookup': self.docs = ReverseRangeLookupDocument(self.ws.size, prefix='n1ql', range_distance=self.ws.range_distance) elif self.ws.doc_gen == 'ext_reverse_lookup': self.docs = ExtReverseLookupDocument(self.ws.size, prefix='n1ql', num_docs=self.ws.items) elif self.ws.doc_gen == 'join': self.docs = JoinedDocument(self.ws.size, prefix='n1ql', num_docs=self.ws.items, num_categories=self.ws.num_categories, num_replies=self.ws.num_replies) elif self.ws.doc_gen == 'ref': self.docs = RefDocument(self.ws.size, prefix='n1ql') elif self.ws.doc_gen == 'array_indexing': self.docs = ArrayIndexingDocument(self.ws.size, prefix='n1ql', array_size=self.ws.array_size, num_docs=self.ws.items) def init_db(self): host, port = self.ts.node.split(':') self.cb = N1QLGen(bucket=self.ts.bucket, password=self.ts.password, host=host, port=port) def read(self): curr_items_tmp = self.curr_items.value if self.ws.doc_gen == 'ext_reverse_lookup': curr_items_tmp /= 4 for _ in range(self.BATCH_SIZE): key = self.existing_keys.next(curr_items=curr_items_tmp, curr_deletes=0) doc = self.docs.next(key) doc['key'] = key doc['bucket'] = self.ts.bucket query = self.new_queries.next(doc) _, latency = self.cb.query(query) self.reservoir.update(latency) def create(self): with self.lock: self.curr_items.value += self.BATCH_SIZE curr_items_tmp = self.curr_items.value - self.BATCH_SIZE for _ in range(self.BATCH_SIZE): curr_items_tmp += 1 key, ttl = self.new_keys.next(curr_items=curr_items_tmp) doc = self.docs.next(key) doc['key'] = key doc['bucket'] = self.ts.bucket query = self.new_queries.next(doc) _, latency = self.cb.query(query) self.reservoir.update(latency) def update(self): with self.lock: self.cas_updated_items.value += self.BATCH_SIZE curr_items_tmp = self.curr_items.value - self.BATCH_SIZE for _ in range(self.BATCH_SIZE): key = self.keys_for_casupdate.next(self.sid, curr_items=curr_items_tmp, curr_deletes=0) doc = self.docs.next(key) doc['key'] = key doc['bucket'] = self.ts.bucket query = self.new_queries.next(doc) _, latency = self.cb.query(query) self.reservoir.update(latency) def range_update(self): with self.lock: self.cas_updated_items.value += self.BATCH_SIZE curr_items_tmp = self.curr_items.value - self.BATCH_SIZE for _ in range(self.BATCH_SIZE): key = self.keys_for_casupdate.next(self.sid, curr_items=curr_items_tmp, curr_deletes=0) doc = self.docs.next(key) doc['key'] = key doc['bucket'] = self.ts.bucket query = self.new_queries.next(doc) _, latency = self.cb.query(query) self.reservoir.update(latency) @with_sleep def do_batch(self): if self.ws.n1ql_op == 'read': self.read() elif self.ws.n1ql_op == 'create': self.create() elif self.ws.n1ql_op == 'update': self.update() elif self.ws.n1ql_op == 'rangeupdate': self.range_update() def run(self, sid, lock, curr_queries, curr_items, deleted_items, cas_updated_items): if self.throughput < float('inf'): self.target_time = float(self.BATCH_SIZE) * self.total_workers / \ self.throughput else: self.target_time = None self.lock = lock self.sid = sid self.curr_items = curr_items self.cas_updated_items = cas_updated_items self.curr_queries = curr_queries try: logger.info('Started: {}-{}'.format(self.NAME, self.sid)) while curr_queries.value < self.ws.ops and not self.time_to_stop(): with self.lock: curr_queries.value += self.BATCH_SIZE self.do_batch() except (KeyboardInterrupt, ValueFormatError, AttributeError) as e: logger.info('Interrupted: {}-{}-{}'.format(self.NAME, self.sid, e)) else: logger.info('Finished: {}-{}'.format(self.NAME, self.sid)) self.reservoir.dump(filename='{}-{}'.format(self.NAME, self.sid))
class N1QLWorker(Worker): def __init__(self, workload_settings, target_settings, shutdown_event): super(N1QLWorker, self).__init__(workload_settings, target_settings, shutdown_event) self.new_queries = N1QLQueryGen(workload_settings.n1ql_queries) self.total_workers = self.ws.n1ql_workers self.throughput = self.ws.n1ql_throughput self.name = 'n1ql-worker' host, port = self.ts.node.split(':') params = {'bucket': self.ts.bucket, 'host': host, 'port': port, 'username': self.ts.bucket, 'password': self.ts.password} self.existing_keys = ExistingKey(self.ws.working_set, self.ws.working_set_access, 'n1ql') self.new_keys = NewKey('n1ql', self.ws.expiration) self.keys_for_removal = KeyForRemoval('n1ql') self.keys_for_casupdate = KeyForCASUpdate(self.total_workers, self.ws.working_set, self.ws.working_set_access, 'n1ql') if self.ws.doc_gen == 'merge': self.docs = MergeDocument(self.ws.size, self.ws.doc_partitions, False) elif self.ws.doc_gen == 'reverse_lookup': self.docs = ReverseLookupDocument(self.ws.size, self.ws.doc_partitions, False) self.cb = N1QLGen(**params) @with_sleep def do_batch(self): if self.ws.n1ql_op == 'read': curr_items_spot = \ self.curr_items.value - self.ws.creates * self.ws.workers deleted_spot = \ self.deleted_items.value + self.ws.deletes * self.ws.workers for _ in xrange(self.BATCH_SIZE): key = self.existing_keys.next(curr_items_spot, deleted_spot) doc = self.docs.next(key) doc['key'] = key doc['bucket'] = self.ts.bucket ddoc_name, view_name, query = self.new_queries.next(doc) self.cb.query(ddoc_name, view_name, query=query) return curr_items_tmp = curr_items_spot = self.curr_items.value if self.ws.n1ql_op == 'create': with self.lock: self.curr_items.value += self.BATCH_SIZE curr_items_tmp = self.curr_items.value - self.BATCH_SIZE curr_items_spot = (curr_items_tmp - self.BATCH_SIZE * self.total_workers) deleted_items_tmp = deleted_spot = 0 if self.ws.n1ql_op == 'delete': with self.lock: self.deleted_items.value += self.BATCH_SIZE deleted_items_tmp = self.deleted_items.value - self.BATCH_SIZE deleted_spot = (deleted_items_tmp + self.BATCH_SIZE * self.total_workers) deleted_capped_items_tmp = deleted_capped_spot = 0 if self.ws.n1ql_op == 'rangedelete': with self.lock: self.deleted_capped_items.value += self.BATCH_SIZE deleted_capped_items_tmp = self.deleted_capped_items.value - self.BATCH_SIZE deleted_capped_spot = (deleted_capped_items_tmp + self.BATCH_SIZE * self.total_workers) casupdated_items_tmp = casupdated_spot = 0 if self.ws.n1ql_op == 'update': with self.lock: self.casupdated_items.value += self.BATCH_SIZE casupdated_items_tmp = self.casupdated_items.value - self.BATCH_SIZE casupdated_spot = (casupdated_items_tmp + self.BATCH_SIZE * self.total_workers) if self.ws.n1ql_op == 'create': for _ in xrange(self.BATCH_SIZE): curr_items_tmp += 1 key, ttl = self.new_keys.next(curr_items_tmp) doc = self.docs.next(key) doc['key'] = key doc['bucket'] = self.ts.bucket ddoc_name, view_name, query = self.new_queries.next(doc) self.cb.query(ddoc_name, view_name, query=query) elif self.ws.n1ql_op == 'delete': for _ in xrange(self.BATCH_SIZE): deleted_items_tmp += 1 key = self.keys_for_removal.next(deleted_items_tmp) doc = self.docs.next(key) doc['key'] = key doc['bucket'] = self.ts.bucket ddoc_name, view_name, query = self.new_queries.next(doc) self.cb.query(ddoc_name, view_name, query=query) elif self.ws.n1ql_op == 'update' or self.ws.n1ql_op == 'lookupupdate': for _ in xrange(self.BATCH_SIZE): key = self.keys_for_casupdate.next(self.sid, curr_items_spot, deleted_spot) doc = self.docs.next(key) doc['key'] = key doc['bucket'] = self.ts.bucket ddoc_name, view_name, query = self.new_queries.next(doc) self.cb.query(ddoc_name, view_name, query=query) elif self.ws.n1ql_op == 'rangeupdate': for _ in xrange(self.BATCH_SIZE): key = self.keys_for_casupdate.next(self.sid, curr_items_spot, deleted_spot) doc = self.docs.next(key) doc['key'] = key doc['bucket'] = self.ts.bucket ddoc_name, view_name, query = self.new_queries.next(doc) self.cb.query(ddoc_name, view_name, query=query) elif self.ws.n1ql_op == 'rangedelete': for _ in xrange(self.BATCH_SIZE): doc = {} doc['capped_small'] = "n1ql-_100_" + str(deleted_capped_items_tmp) ddoc_name, view_name, query = self.new_queries.next(doc) self.cb.query(ddoc_name, view_name, query=query) deleted_capped_items_tmp += 1 elif self.ws.n1ql_op == 'merge': #run select * workload for merge for _ in xrange(self.BATCH_SIZE): key = self.existing_keys.next(curr_items_spot, deleted_spot) doc = self.docs.next(key) doc['key'] = key doc['bucket'] = self.ts.bucket ddoc_name, view_name, query = self.new_queries.next(doc) query['statement'] = "SELECT * FROM `bucket-1` USE KEYS[$1];" query['args'] = "[\"{key}\"]".format(**doc) del query['prepared'] self.cb.query(ddoc_name, view_name, query=query) def run(self, sid, lock, curr_queries, curr_items, deleted_items, casupdated_items, deleted_capped_items): self.cb.start_updater() if self.throughput < float('inf'): self.target_time = float(self.BATCH_SIZE) * self.total_workers / \ self.throughput else: self.target_time = None self.lock = lock self.sid = sid self.curr_items = curr_items self.deleted_items = deleted_items self.deleted_capped_items = deleted_capped_items self.casupdated_items = casupdated_items self.curr_queries = curr_queries try: logger.info('Started: {}-{}'.format(self.name, self.sid)) while curr_queries.value < self.ws.ops and not self.time_to_stop(): with self.lock: curr_queries.value += self.BATCH_SIZE self.do_batch() self.report_progress(curr_queries.value) except (KeyboardInterrupt, ValueFormatError, AttributeError) as e: logger.info('Interrupted: {}-{}-{}'.format(self.name, self.sid, e)) else: logger.info('Finished: {}-{}'.format(self.name, self.sid))
class N1QLWorker(Worker): def __init__(self, workload_settings, target_settings, shutdown_event): super(N1QLWorker, self).__init__(workload_settings, target_settings, shutdown_event) self.new_queries = N1QLQueryGen(workload_settings.n1ql_queries) self.total_workers = self.ws.n1ql_workers self.throughput = self.ws.n1ql_throughput self.name = 'n1ql-worker' host, port = self.ts.node.split(':') params = { 'bucket': self.ts.bucket, 'host': host, 'port': port, 'username': self.ts.bucket, 'password': self.ts.password } self.existing_keys = ExistingKey(self.ws.working_set, self.ws.working_set_access, 'n1ql') self.new_keys = NewKey('n1ql', self.ws.expiration) self.keys_for_removal = KeyForRemoval('n1ql') self.keys_for_casupdate = KeyForCASUpdate(self.total_workers, self.ws.working_set, self.ws.working_set_access, 'n1ql') if self.ws.doc_gen == 'merge': self.docs = MergeDocument(self.ws.size, self.ws.doc_partitions, False) elif self.ws.doc_gen == 'reverse_lookup': self.docs = ReverseLookupDocument(self.ws.size, self.ws.doc_partitions, False) elif self.ws.doc_gen == 'reverse_lookup_array_indexing': self.docs = ReverseLookupDocumentArrayIndexing( self.ws.size, self.ws.doc_partitions, self.ws.items) self.cb = N1QLGen(**params) @with_sleep def do_batch(self): if self.ws.n1ql_op == 'read': curr_items_spot = \ self.curr_items.value - self.ws.creates * self.ws.workers deleted_spot = \ self.deleted_items.value + self.ws.deletes * self.ws.workers for _ in xrange(self.BATCH_SIZE): key = self.existing_keys.next(curr_items_spot, deleted_spot) doc = self.docs.next(key) doc['key'] = key doc['bucket'] = self.ts.bucket ddoc_name, view_name, query = self.new_queries.next(doc) self.cb.query(ddoc_name, view_name, query=query) return curr_items_tmp = curr_items_spot = self.curr_items.value if self.ws.n1ql_op == 'create': with self.lock: self.curr_items.value += self.BATCH_SIZE curr_items_tmp = self.curr_items.value - self.BATCH_SIZE curr_items_spot = (curr_items_tmp - self.BATCH_SIZE * self.total_workers) deleted_items_tmp = deleted_spot = 0 if self.ws.n1ql_op == 'delete': with self.lock: self.deleted_items.value += self.BATCH_SIZE deleted_items_tmp = self.deleted_items.value - self.BATCH_SIZE deleted_spot = (deleted_items_tmp + self.BATCH_SIZE * self.total_workers) deleted_capped_items_tmp = deleted_capped_spot = 0 if self.ws.n1ql_op == 'rangedelete': with self.lock: self.deleted_capped_items.value += self.BATCH_SIZE deleted_capped_items_tmp = self.deleted_capped_items.value - self.BATCH_SIZE deleted_capped_spot = (deleted_capped_items_tmp + self.BATCH_SIZE * self.total_workers) casupdated_items_tmp = casupdated_spot = 0 if self.ws.n1ql_op == 'update': with self.lock: self.casupdated_items.value += self.BATCH_SIZE casupdated_items_tmp = self.casupdated_items.value - self.BATCH_SIZE casupdated_spot = (casupdated_items_tmp + self.BATCH_SIZE * self.total_workers) if self.ws.n1ql_op == 'create': for _ in xrange(self.BATCH_SIZE): curr_items_tmp += 1 key, ttl = self.new_keys.next(curr_items_tmp) doc = self.docs.next(key) doc['key'] = key doc['bucket'] = self.ts.bucket ddoc_name, view_name, query = self.new_queries.next(doc) self.cb.query(ddoc_name, view_name, query=query) elif self.ws.n1ql_op == 'delete': for _ in xrange(self.BATCH_SIZE): deleted_items_tmp += 1 key = self.keys_for_removal.next(deleted_items_tmp) doc = self.docs.next(key) doc['key'] = key doc['bucket'] = self.ts.bucket ddoc_name, view_name, query = self.new_queries.next(doc) self.cb.query(ddoc_name, view_name, query=query) elif self.ws.n1ql_op == 'update' or self.ws.n1ql_op == 'lookupupdate': for _ in xrange(self.BATCH_SIZE): key = self.keys_for_casupdate.next(self.sid, curr_items_spot, deleted_spot) doc = self.docs.next(key) doc['key'] = key doc['bucket'] = self.ts.bucket ddoc_name, view_name, query = self.new_queries.next(doc) self.cb.query(ddoc_name, view_name, query=query) elif self.ws.n1ql_op == 'rangeupdate': for _ in xrange(self.BATCH_SIZE): key = self.keys_for_casupdate.next(self.sid, curr_items_spot, deleted_spot) doc = self.docs.next(key) doc['key'] = key doc['bucket'] = self.ts.bucket ddoc_name, view_name, query = self.new_queries.next(doc) self.cb.query(ddoc_name, view_name, query=query) elif self.ws.n1ql_op == 'rangedelete': for _ in xrange(self.BATCH_SIZE): doc = {} doc['capped_small'] = "n1ql-_100_" + str( deleted_capped_items_tmp) ddoc_name, view_name, query = self.new_queries.next(doc) self.cb.query(ddoc_name, view_name, query=query) deleted_capped_items_tmp += 1 elif self.ws.n1ql_op == 'merge': #run select * workload for merge for _ in xrange(self.BATCH_SIZE): key = self.existing_keys.next(curr_items_spot, deleted_spot) doc = self.docs.next(key) doc['key'] = key doc['bucket'] = self.ts.bucket ddoc_name, view_name, query = self.new_queries.next(doc) query['statement'] = "SELECT * FROM `bucket-1` USE KEYS[$1];" query['args'] = "[\"{key}\"]".format(**doc) del query['prepared'] self.cb.query(ddoc_name, view_name, query=query) def run(self, sid, lock, curr_queries, curr_items, deleted_items, casupdated_items, deleted_capped_items): self.cb.start_updater() if self.throughput < float('inf'): self.target_time = float(self.BATCH_SIZE) * self.total_workers / \ self.throughput else: self.target_time = None self.lock = lock self.sid = sid self.curr_items = curr_items self.deleted_items = deleted_items self.deleted_capped_items = deleted_capped_items self.casupdated_items = casupdated_items self.curr_queries = curr_queries try: logger.info('Started: {}-{}'.format(self.name, self.sid)) while curr_queries.value < self.ws.ops and not self.time_to_stop(): with self.lock: curr_queries.value += self.BATCH_SIZE self.do_batch() self.report_progress(curr_queries.value) except (KeyboardInterrupt, ValueFormatError, AttributeError) as e: logger.info('Interrupted: {}-{}-{}'.format(self.name, self.sid, e)) else: logger.info('Finished: {}-{}'.format(self.name, self.sid))