def set_latest_batch_time(vineyard_id, hub_id, batch_sent, hub_data): """ Inserts timestamp for latest data, received from a hub, into the database. """ table = os.environ.get('DB_HW_TABLE') parameters = { 'vineid': vineyard_id, 'hubid': hub_id, 'lasthubbatchsent': batch_sent, } query = ( 'UPDATE {} SET lasthubbatchsent=? ' 'WHERE vineid=? AND hubid=? AND nodeid=?;' ) prepared_statement = session.prepare( query.format(table) ) batch_statement = BatchStatement() try: for data_point in hub_data: parameters['nodeid'] = int(data_point['node_id']) batch_statement.add( prepared_statement, parameters ) session.execute(batch_statement) return True # Unknown exception except Exception as e: raise Exception('Transaction Error Occurred: '.format(str(e)))
def put_entities_tx(self, app, txid, entities): """ Update transaction metadata with new put operations. Args: app: A string containing an application ID. txid: An integer specifying the transaction ID. entities: A list of entities that will be put upon commit. """ batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM, retry_policy=BASIC_RETRIES) insert = self.session.prepare(""" INSERT INTO transactions (txid_hash, operation, namespace, path, entity) VALUES (?, ?, ?, ?, ?) USING TTL {ttl} """.format(ttl=dbconstants.MAX_TX_DURATION * 2)) for entity in entities: args = (tx_partition(app, txid), TxnActions.MUTATE, entity.key().name_space(), bytearray(entity.key().path().Encode()), bytearray(entity.Encode())) batch.add(insert, args) try: self.session.execute(batch) except dbconstants.TRANSIENT_CASSANDRA_ERRORS: message = 'Exception while putting entities in a transaction' logging.exception(message) raise AppScaleDBConnectionError(message)
def store_qc_results(qc_results_values, pk, particle_ids, particle_bins, particle_deploys, param_name, strict_range=False): start_time = time.clock() if engine.app.config['QC_RESULTS_STORAGE_SYSTEM'] == 'cass': log.info('Storing QC results in Cassandra.') insert_results = SessionManager.prepare( "insert into ooi.qc_results " "(subsite, node, sensor, bin, deployment, stream, id, parameter, results) " "values (?, ?, ?, ?, ?, ?, ?, ?, ?)") batch = BatchStatement() for (qc_results, particle_id, particle_bin, particle_deploy) in izip(qc_results_values, particle_ids, particle_bins, particle_deploys): batch.add(insert_results, (pk.get('subsite'), pk.get('node'), pk.get('sensor'), particle_bin, particle_deploy, pk.get('stream'), uuid.UUID(particle_id), param_name, str(qc_results))) SessionManager.session().execute_async(batch) log.info("QC results stored in {} seconds.".format(time.clock() - start_time)) elif engine.app.config['QC_RESULTS_STORAGE_SYSTEM'] == 'log': log.info('Writing QC results to log file.') qc_log = logging.getLogger('qc.results') qc_log_string = "" for (qc_results, particle_id, particle_bin, particle_deploy) in izip(qc_results_values, particle_ids, particle_bins, particle_deploys): qc_log_string += "refdes:{0}-{1}-{2}, bin:{3}, stream:{4}, deployment:{5}, id:{6}, parameter:{7}, qc results:{8}\n" \ .format(pk.get('subsite'), pk.get('node'), pk.get('sensor'), particle_bin, pk.get('stream'), particle_deploy, particle_id, param_name, qc_results) qc_log.info(qc_log_string[:-1]) log.info("QC results stored in {} seconds.".format(time.clock() - start_time)) else: log.info("Configured storage system '{}' not recognized, qc results not stored.".format( engine.app.config['QC_RESULTS_STORAGE_SYSTEM']))
def _delete_task_and_index(self, task): """ Deletes a task and its index atomically. Args: task: A Task object. """ batch_delete = BatchStatement(retry_policy=self.db_access.retry_policy) delete_task = SimpleStatement( """ DELETE FROM pull_queue_tasks WHERE app = %(app)s AND queue = %(queue)s AND id = %(id)s """ ) parameters = {"app": self.app, "queue": self.name, "id": task.id} batch_delete.add(delete_task, parameters=parameters) delete_task_index = SimpleStatement( """ DELETE FROM pull_queue_tasks_index WHERE app = %(app)s AND queue = %(queue)s AND eta = %(eta)s AND id = %(id)s """ ) parameters = {"app": self.app, "queue": self.name, "eta": task.get_eta(), "id": task.id} batch_delete.add(delete_task_index, parameters=parameters) self.db_access.session.execute(batch_delete)
def __insert(self, models, if_not_exists=None): assert models, 'You can insert nothing, what good would that do?' serial_consistency_level = None if if_not_exists: serial_consistency_level = ConsistencyLevel.SERIAL batch = BatchStatement(serial_consistency_level=serial_consistency_level) for model in models: self.__validate_model(model) model._pre_put_hook() metadata = self._get_table_metadata(model.table()) fields = self.denormalize(model) fields.update(self.construct_primary_key(model, metadata)) cql_qry = CassandraQuery(model.query()).insert(fields) if if_not_exists: cql_qry.if_not_exists() batch.add(cql_qry.statement, parameters=cql_qry.condition_values) self._execute_batch(batch) for model in models: model._post_put_hook() edges = self.find_edges(model) if edges: self.set_edges_for_model(model, edges) if len(models) == 1: return models[0] return models
def test_unicode(self): """ Test to validate that unicode query strings are handled appropriately by various query types @since 3.0.0 @jira_ticket PYTHON-334 @expected_result no unicode exceptions are thrown @test_category query """ unicode_text = u"Fran\u00E7ois" batch = BatchStatement(BatchType.LOGGED) batch.add( u"INSERT INTO {0}.{1} (k, v) VALUES (%s, %s)".format(self.keyspace_name, self.function_table_name), (0, unicode_text), ) self.session.execute(batch) self.session.execute( u"INSERT INTO {0}.{1} (k, v) VALUES (%s, %s)".format(self.keyspace_name, self.function_table_name), (0, unicode_text), ) prepared = self.session.prepare( u"INSERT INTO {0}.{1} (k, v) VALUES (?, ?)".format(self.keyspace_name, self.function_table_name) ) bound = prepared.bind((1, unicode_text)) self.session.execute(bound)
def save_eod_data_to_db(data): """ Saves Data to Cassandra in row batches """ logger.info('Received Data: {}'.format(data['ticker'])) q = """ INSERT INTO historical_data (exchange, ticker, eoddate, open, close, high, low, volume) VALUES (%s, %s, %s, %s, %s, %s, %s, %s); """ session = cluster.connect('test') df = data['df'].dropna() for df in np.array_split(df, len(df) / 200): batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM) for eoddate, row in df.iterrows(): batch.add(q, [data['exchange'], data['ticker'], str(eoddate), row['Open'], row['Close'], row['High'], row['Low'], int(row['Volume'])] ) session.execute(batch) logger.info('Done')
def _normal_batch(self, mutations): """ Use Cassandra's native batch statement to apply mutations atomically. Args: mutations: A list of dictionaries representing mutations. """ self.logger.debug('Normal batch: {} mutations'.format(len(mutations))) batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM, retry_policy=self.retry_policy) prepared_statements = {'insert': {}, 'delete': {}} for mutation in mutations: table = mutation['table'] if mutation['operation'] == TxnActions.PUT: if table not in prepared_statements['insert']: prepared_statements['insert'][table] = self.prepare_insert(table) values = mutation['values'] for column in values: batch.add( prepared_statements['insert'][table], (bytearray(mutation['key']), column, bytearray(values[column])) ) elif mutation['operation'] == TxnActions.DELETE: if table not in prepared_statements['delete']: prepared_statements['delete'][table] = self.prepare_delete(table) batch.add( prepared_statements['delete'][table], (bytearray(mutation['key']),) ) try: self.session.execute(batch) except dbconstants.TRANSIENT_CASSANDRA_ERRORS: message = 'Exception during batch_mutate' logging.exception(message) raise AppScaleDBConnectionError(message)
def insert(self, events): if not events: return batch_stmt = BatchStatement(batch_type=BatchType.UNLOGGED, consistency_level=ConsistencyLevel.QUORUM) shard_idx = {} for _id, event in events: shard_time = round_down(event[TIMESTAMP_FIELD], self.width) shard = shard_idx.get(shard_time, random.randint(0, self.shards - 1)) # Insert to index. try: self.index_cache.get((shard_time, shard)) except KeyError: batch_stmt.add(BoundStatement(self.namespace.INDEX_INSERT_STMT, routing_key=self.stream, consistency_level=ConsistencyLevel.QUORUM) .bind((self.stream, shard_time, self.width, shard))) self.index_cache.set((shard_time, shard), None) # Insert to stream. shard_key = StreamShard.get_key(self.stream, shard_time, shard) batch_stmt.add(BoundStatement(self.namespace.INSERT_STMT, routing_key=shard_key, consistency_level=ConsistencyLevel.QUORUM) .bind((shard_key, _id, marshal.dumps(event)))) shard_idx[shard_time] = (shard + 1) % self.shards # Round robin. self.session.execute(batch_stmt)
def save_result(self, check_uuid: str, trigger: dict, scheduled_time: object, execution_time: object, insertion_time: object, alert_sent: bool, service_id: str, **kwargs): """Save check result. :param check_uuid: check uuid :param dict trigger: trigger definition :param scheduled_time: time when check was scheduled :param execution_time: time when worker finished execution :param insertion_time: time when check was analyzed by alerter :param alert_sent: indicates when alert was sent to AER :param service_id: service id """ batch = BatchStatement() args = ( UUID(check_uuid), UUID(trigger['uuid']), scheduled_time, execution_time, insertion_time, alert_sent, trigger['result']['status'], trigger['result']['message'], service_id, kwargs ) batch.add(self._save_query_stmt, args) batch.add(self._save_service_query_stmt, args) self._session.execute(batch)
def insert_rows(starting_partition, ending_partition, rows_per_partition, counter, counter_lock): cluster = Cluster(['127.0.0.1'], load_balancing_policy=TokenAwarePolicy(RoundRobinPolicy())) try: session = cluster.connect('ks') try: statement = session.prepare('INSERT INTO tbl (a, b, c, d) VALUES (?, ?, ?, ?)') for partition_key in xrange(starting_partition, ending_partition): batch = None batch_size = 0 for cluster_column in xrange(rows_per_partition): if batch is None: batch = BatchStatement(batch_type=BatchType.UNLOGGED) value1 = random.randint(1, 1000000) value2 = random.randint(1, 1000000) batch.add(statement, [partition_key, cluster_column, value1, value2]) batch_size += 1 if (batch_size == MAX_BATCH_SIZE) or (cluster_column + 1 == rows_per_partition): with counter_lock: counter.value += batch_size session.execute(batch) batch = None batch_size = 0 finally: session.shutdown() finally: cluster.shutdown()
def record_reads(self, app, txid, group_keys): """ Keep track of which entity groups were read in a transaction. Args: app: A string specifying an application ID. txid: An integer specifying a transaction ID. group_keys: An iterable containing Reference objects. """ batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM, retry_policy=BASIC_RETRIES) insert = self.session.prepare(""" INSERT INTO transactions (txid_hash, operation, namespace, path) VALUES (?, ?, ?, ?) USING TTL {ttl} """.format(ttl=dbconstants.MAX_TX_DURATION * 2)) for group_key in group_keys: if not isinstance(group_key, entity_pb.Reference): group_key = entity_pb.Reference(group_key) args = (tx_partition(app, txid), TxnActions.GET, group_key.name_space(), bytearray(group_key.path().Encode())) batch.add(insert, args) try: yield self.tornado_cassandra.execute(batch) except dbconstants.TRANSIENT_CASSANDRA_ERRORS: message = 'Exception while recording reads in a transaction' logger.exception(message) raise AppScaleDBConnectionError(message)
def delete_entities_tx(self, app, txid, entity_keys): """ Update transaction metadata with new delete operations. Args: app: A string containing an application ID. txid: An integer specifying the transaction ID. entity_keys: A list of entity keys that will be deleted upon commit. """ batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM, retry_policy=BASIC_RETRIES) insert = self.session.prepare(""" INSERT INTO transactions (txid_hash, operation, namespace, path, entity) VALUES (?, ?, ?, ?, ?) USING TTL {ttl} """.format(ttl=dbconstants.MAX_TX_DURATION * 2)) for key in entity_keys: # The None value overwrites previous puts. args = (tx_partition(app, txid), TxnActions.MUTATE, key.name_space(), bytearray(key.path().Encode()), None) batch.add(insert, args) try: yield self.tornado_cassandra.execute(batch) except dbconstants.TRANSIENT_CASSANDRA_ERRORS: message = 'Exception while deleting entities in a transaction' logger.exception(message) raise AppScaleDBConnectionError(message)
def _upsert_users(keyspace): try: _cluster = Cluster(contact_points=CASSANDRA_CLUSTER, port=9042) _session = _cluster.connect() _session.set_keyspace(keyspace) _session.row_factory = dict_factory _add_users = [ {'userid': 'dummy1', 'first_name': '11', 'last_name': 'dummy', 'emails': set('a')}, {'userid': 'dummy2', 'first_name': '22', 'last_name': 'dummy', 'emails': set('b')} ] _prepare_insert = _session.prepare( "INSERT INTO users (userid, first_name, last_name, emails) VALUES (?, ?, ?, ?)") # "INSERT INTO users (userid, first_name, emails) VALUES (?, ?, ?)") _batch = BatchStatement(consistency_level=0) for user in _add_users: print(user) _batch.add(_prepare_insert, [ user['userid'], user['first_name'], user['last_name'], user['emails'] ]) # _batch.add(_prepare_insert, [ user['userid'], user['first_name'], user['emails'] ]) _session.execute(_batch) except: print('EXCEPT insert: {}({})'.format(sys.exc_info()[0], sys.exc_info()[1])) else: _session.shutdown()
def _normal_batch(self, mutations): """ Use Cassandra's native batch statement to apply mutations atomically. Args: mutations: A list of dictionaries representing mutations. """ self.logger.debug("Normal batch: {} mutations".format(len(mutations))) batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM, retry_policy=self.retry_policy) prepared_statements = {"insert": {}, "delete": {}} for mutation in mutations: table = mutation["table"] if mutation["operation"] == TxnActions.PUT: if table not in prepared_statements["insert"]: prepared_statements["insert"][table] = self.prepare_insert(table) values = mutation["values"] for column in values: batch.add( prepared_statements["insert"][table], (bytearray(mutation["key"]), column, bytearray(values[column])), ) elif mutation["operation"] == TxnActions.DELETE: if table not in prepared_statements["delete"]: prepared_statements["delete"][table] = self.prepare_delete(table) batch.add(prepared_statements["delete"][table], (bytearray(mutation["key"]),)) try: self.session.execute(batch) except (cassandra.Unavailable, cassandra.Timeout, cassandra.CoordinationFailure, cassandra.OperationTimedOut): message = "Exception during batch_mutate" logging.exception(message) raise AppScaleDBConnectionError(message)
def insert_data(): # Example showing how to read data from a csv and insert into Cassandra using a prepared statement insert_statement = session.prepare("INSERT INTO sensor_data.raw (device, sensor, time, metric) VALUES (?,?,?,?)") f = open("rows_input.csv") rows = f.read().splitlines() f.close() rows_inserted = 0 # Example showing how to use Cassandra batched insert batch_insert = BatchStatement(consistency_level=ConsistencyLevel.QUORUM) for row in rows: data = row.split(",") values = [data[0], data[1], datetime.datetime.strptime(data[2], "%Y-%m-%d %H:%M:%S"), float(data[3])] insert = insert_statement.bind(values) batch_insert.add(insert) rows_inserted += 1 if rows_inserted % BATCH_SIZE == 0: session.execute_async(batch_insert) print "Inserted %d rows" % rows_inserted return rows_inserted
def test_simple_statements(self): batch = BatchStatement(BatchType.LOGGED) for i in range(10): batch.add(SimpleStatement("INSERT INTO test3rf.test (k, v) VALUES (%s, %s)"), (i, i)) self.session.execute(batch) self.session.execute_async(batch).result() self.confirm_results()
def update_gene_summary_w_cancer_census(session, genes): update_qry = "UPDATE gene_summary SET in_cosmic_census = ? " update_qry += " WHERE gene = ? and chrom = ?" query = session.prepare(update_qry) batch = BatchStatement() for gene in genes: batch.add(query, gene) session.execute(batch)
def test_rk_from_simple(self): """ batch routing key is inherited from SimpleStatement """ batch = BatchStatement() batch.add(self.simple_statement) self.assertIsNotNone(batch.routing_key) self.assertEqual(batch.routing_key, self.simple_statement.routing_key)
def test_add_all(self): batch = BatchStatement() statements = ['%s'] * 10 parameters = [(i,) for i in range(10)] batch.add_all(statements, parameters) bound_statements = [t[1] for t in batch._statements_and_parameters] str_parameters = [str(i) for i in range(10)] self.assertEqual(bound_statements, str_parameters)
def do_update(cluster, target_db, schema_file, record_size, start_record, batch_size, insert_percentage, delay, batch_count, replication_factor=3, suppress_output=False): record_size = int(record_size) start_record = int(start_record) batch_size = int(batch_size) insert_percentage = int(insert_percentage) delay = float(delay) / 1000 batch_count = int(batch_count) nr_batch = 0 random.seed(1) ks_name, cf_name = getKSCFNames(target_db) if ks_name == None or cf_name == None: return createKeyspace(cluster, ks_name, replication_factor=replication_factor) createTable(cluster, ks_name, cf_name, schema_file) ts = TestSchema(cluster, ks_name, cf_name) ts.getSchema() while True: if ts.counter_table: batch = BatchStatement(batch_type = BatchType.COUNTER) else: batch = BatchStatement() stat_str = '' for i in range(batch_size): if start_record <= 0 or random.randrange(100) <= insert_percentage: # insert case record_num = start_record query = ts.getInsertQuerywithRandomData(record_num, record_size) stat_str += 'I(%d) ' % record_num else: record_num = random.randrange(0, start_record) if random.randrange(100) <= 70: # 70% update if not ts.counter_table: query = ts.getUpdateQuery(record_num) else: query = ts.getInsertQuerywithRandomData(record_num, 0) stat_str += 'U(%d) ' % record_num else: # 30% deletion query = ts.getDeleteQuery(record_num) stat_str += 'D(%d) ' % record_num if not suppress_output: report(stat_str) batch.add(query) start_record += 1 #print stat_str cluster.session.execute(batch) nr_batch += 1 if nr_batch == batch_count: if batch_count >= 0: break time.sleep(delay)
def copy_dynamic_table(table): c_ft = pycassa.ColumnFamily(pool, table) table_rows = (c_ft.get_range(include_ttl=True)) command = "INSERT INTO {0} (" table_columns = [] values = '' keyCount = 0 for t in table_list: if(t.table_name == table): table_columns = t.columns for column in t.columns: if keyCount == 0: command += column.name values = ') VALUES (?' else: command += ', ' + column.name values += ', ?' if (column.key == True): keyCount += 1 break command += values + ') USING TTL ?' insert_flow = client.session.prepare(command.format(table)) batch = BatchStatement() num_rows_in_batch = 0 rowKey = '' for rowKey,columns in table_rows: key_value = [] rowKey_size = 0 if isinstance(rowKey, tuple): for key in range(0, len(list(rowKey))): key_value.append(_convert_to_cql_data(table_columns, rowKey_size, rowKey, rowKey_size)) rowKey_size += 1 if(t.is_index_table == True): key_value.append(random.randrange(0, 16)) else: key_value.append(_convert_to_cql_data(table_columns, rowKey_size, rowKey, rowKey_size)) rowKey_size += 1 for key in columns.keys(): value, ttl = columns.get(key) jsonValue = value if is_json(value) != True: jsonValue = json.dumps(value, ensure_ascii=False) column_value = copy.deepcopy(key_value) num_rows_in_batch += 1 if(num_rows_in_batch % args.max_batch_entry_count == 0): num_rows_in_batch = 0 client.load_data(batch) batch = BatchStatement() for col_num in range(rowKey_size, len(table_columns)-1): column_value.append(_convert_to_cql_data(table_columns, col_num, key, col_num-rowKey_size)) column_value.append(jsonValue) column_value.append(ttl if ttl else 7200) batch.add(insert_flow, tuple(column_value)) if(rowKey != ''): client.load_data(batch)
def test_rk_from_bound(self): """ batch routing key is inherited from BoundStatement """ bound = self.prepared.bind((1, None)) batch = BatchStatement() batch.add(bound) self.assertIsNotNone(batch.routing_key) self.assertEqual(batch.routing_key, bound.routing_key)
def sendPartition(iter): cluster = Cluster(['hdp-master','hdp-slave1','hdp-slave2','hdp-slave3']) session = cluster.connect('coursera') insert = session.prepare("INSERT INTO destination_rank_by_airport (airport, destination_rank) VALUES (?, ?)") batch = BatchStatement() for record in iter: batch.add(insert, record) session.execute(batch) cluster.shutdown()
def _assert_invalid_request(self, session, insert_cql, value): """ Perform two executions of the supplied statement, as a single statement and again as part of a batch """ prepared = session.prepare(insert_cql) self._execute_and_fail(lambda: session.execute(prepared, [value]), insert_cql) batch = BatchStatement() batch.add(prepared, [value]) self._execute_and_fail(lambda: session.execute(batch), insert_cql)
def test_clear_empty(self): batch = BatchStatement() batch.clear() self.assertFalse(batch._statements_and_parameters) self.assertIsNone(batch.keyspace) self.assertIsNone(batch.routing_key) self.assertFalse(batch.custom_payload) batch.add('something')
def sendPartition(iter): cluster = Cluster(['hdp-master','hdp-slave1','hdp-slave2','hdp-slave3']) session = cluster.connect('coursera') insert = session.prepare("INSERT INTO source_destination_mean_delay (source, destination, mean_arrival_delay) VALUES (?, ?, ?)") batch = BatchStatement() for record in iter: batch.add(insert, record) session.execute(batch) cluster.shutdown()
def read_callback(rows): global w batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM, batch_type=BatchType.UNLOGGED) for row in rows: if row.ttl_value: batch.add(insert_ttl, (row.key, row.column1, row.value, row.ttl_value)) else: batch.add(insert, (row.key, row.column1, row.value)) w = dst_session.execute_async(batch) w.add_callbacks(callback=write_callback, callback_kwargs={'rows': len(rows)}, errback=write_errback)
def test_bound_statements(self): prepared = self.session.prepare("INSERT INTO test3rf.test (k, v) VALUES (?, ?)") batch = BatchStatement(BatchType.LOGGED) for i in range(10): batch.add(prepared.bind((i, i))) self.session.execute(batch) self.session.execute_async(batch).result() self.confirm_results()
def test_rk_from_simple(self): """ batch routing key is inherited from SimpleStatement """ self.cluster = Cluster(protocol_version=PROTOCOL_VERSION) self.session = self.cluster.connect() batch = BatchStatement() batch.add(self.simple_statement) self.assertIsNotNone(batch.routing_key) self.assertEqual(batch.routing_key, self.simple_statement.routing_key)
def execute_batch(): query = "INSERT INTO data (device_id, data_source_id, time_upload, value) VALUES (?, ?, ?, ?) IF NOT EXISTS" batch = BatchStatement() prepared = session.prepare(query) batch_size = 0 # send each 50k values in batches for d in dates_data: batch.add(prepared, (device_id, data_source_id, d[0], str(d[1]))) batch_size += 1 if batch_size >= 25_000: res = session.execute(batch) print('values sent', str(batch_size)) batch.clear() batch_size = 0 # send remaining values if batch_size > 0: res = session.execute(batch) print('values sent', str(batch_size)) batch.clear() batch_size = 0
def insert_author(minuteslot, records): sorted_r = sorted(records.items(), key=itemgetter(1), reverse=True)[:20] count = 0 batch = BatchStatement() for author, count in sorted_r: batch.add(cql_author_stmt, (minuteslot, author, count)) count += 1 if (count == 50): session.execute(batch) count = 0 batch = BatchStatement() if (count > 0): session.execute(batch)
def execute_in_batch(self, session, reader, query, extract_data, get_params): batch = BatchStatement() query_count = 0 for line in itertools.islice(reader, self.ROW_COUNT): # Store data to be used by other methods extract_data(self, line) batch.add(query, get_params(self, line)) query_count = query_count + 1 if query_count % self.BATCH_SIZE == 0: session.execute(batch) batch = BatchStatement() # Execute last batch session.execute(batch)
def test_batch_statement(self): session, writer = self._traced_session() batch = BatchStatement() batch.add(SimpleStatement("INSERT INTO test.person (name, age, description) VALUES (%s, %s, %s)"), ("Joe", 1, "a")) batch.add(SimpleStatement("INSERT INTO test.person (name, age, description) VALUES (%s, %s, %s)"), ("Jane", 2, "b")) session.execute(batch) spans = writer.pop() eq_(len(spans), 1) s = spans[0] eq_(s.resource, 'BatchStatement') eq_(s.get_metric('cassandra.batch_size'), 2) assert 'test.person' in s.get_tag('cassandra.query')
def test_clear(self): keyspace = 'keyspace' routing_key = 'routing_key' custom_payload = {'key': six.b('value')} ss = SimpleStatement('whatever', keyspace=keyspace, routing_key=routing_key, custom_payload=custom_payload) batch = BatchStatement() batch.add(ss) self.assertTrue(batch._statements_and_parameters) self.assertEqual(batch.keyspace, keyspace) self.assertEqual(batch.routing_key, routing_key) self.assertEqual(batch.custom_payload, custom_payload) batch.clear() self.assertFalse(batch._statements_and_parameters) self.assertIsNone(batch.keyspace) self.assertIsNone(batch.routing_key) self.assertFalse(batch.custom_payload) batch.add(ss)
def main(): #Defining input directory, keyspace and table name MAX_LINES = 500 temp_line = 0 inputs = sys.argv[1] keyspace = sys.argv[2] table_name = sys.argv[3] #Cluster configuration cluster = Cluster(['199.60.17.136', '199.60.17.173']) session = cluster.connect(keyspace) session.execute('USE %s;' % keyspace) #Defining the query for inserting values into table nasalogs insert_query = session.prepare( "INSERT INTO %s (host, datetime, path, bytes) VALUES (?, ?, ?, ?);" % table_name) linere = re.compile( "^(\\S+) - - \\[(\\S+) [+-]\\d+\\] \"[A-Z]+ (\\S+) HTTP/\\d\\.\\d\" \\d+ (\\d+)$" ) for f in os.listdir(inputs): with gzip.GzipFile(os.path.join(inputs, f)) as logfile: batch = BatchStatement() for line in logfile: #splitting the row data as per the regular expression single_row = linere.split(line) #retrieving required values in the specific format as host,datetime,path and bytes if len(single_row) == 6: host = single_row[1] #stripping date-time to its format date_time = dt.datetime.strptime(single_row[2], '%d/%b/%Y:%H:%M:%S') path = single_row[3] bytes_transferred = single_row[4] #packaging multiple insert queries into one batch statement if temp_line <= MAX_LINES: temp_line += 1 batch.add(insert_query, [host, date_time, path, int(bytes_transferred)]) session.execute(batch) if temp_line == MAX_LINES: #checking batch threshold and clearing the batch when it meets the threshold batch.clear() temp_line = 0
def load_businessindex(session, csv_path): log.info("Opening " + csv_path) # get the number of lines with open(csv_path, 'rb') as csvlines: line_count = sum(1 for line in csvlines) - 1 number_of_batches = int(math.ceil(Decimal(line_count)/BATCH_SIZE)) batches_complete = 0 with open(csv_path, 'rb') as csvfile: reader = csv.DictReader(csvfile, skipinitialspace=True) query = SimpleStatement(""" INSERT INTO %s (%s) VALUES (%s) """ % (BUSINESSINDEX_TABLE, FIELDNAMES, REPLACEMENTS)) log.info("Query statement:\n%s" % query.query_string) batch = BatchStatement() for row in reader: try: row["TURNOVER"] = Decimal(row.get("TURNOVER")) except InvalidOperation: # TURNOVER is empty row["TURNOVER"] = None try: row["TOTAL_EMPLOYEES"] = int(row.get("TOTAL_EMPLOYEES")) except ValueError: # TOTAL_EMPLOYEES is empty row["TOTAL_EMPLOYEES"] = None batch.add(query, row) if (reader.line_num - 1) % BATCH_SIZE == 0: log.debug("Executing batch %s/%s insert with %s records" % (batches_complete+1, number_of_batches, BATCH_SIZE)) session.execute(batch) batches_complete += 1 batch = BatchStatement() if number_of_batches > batches_complete: log.debug("Executing batch %s/%s insert with %s records" % (batches_complete+1, number_of_batches, line_count-(batches_complete*BATCH_SIZE))) session.execute(batch) batches_complete += 1
def _update_index_async(self, old_index, task): """ Updates the index table after leasing a task. Args: old_index: The row to remove from the index table. task: A Task object to create a new index entry for. Returns: A cassandra-driver future. """ session = self.db_access.session old_eta = old_index.eta update_index = BatchStatement(retry_policy=BASIC_RETRIES) statement = """ DELETE FROM pull_queue_tasks_index WHERE app=? AND queue=? AND eta=? AND id=? """ if statement not in self.prepared_statements: self.prepared_statements[statement] = session.prepare(statement) delete_old_index = self.prepared_statements[statement] parameters = [self.app, self.name, old_eta, task.id] update_index.add(delete_old_index, parameters) statement = """ INSERT INTO pull_queue_tasks_index (app, queue, eta, id, tag, tag_exists) VALUES (?, ?, ?, ?, ?, ?) """ if statement not in self.prepared_statements: self.prepared_statements[statement] = session.prepare(statement) create_new_index = self.prepared_statements[statement] try: tag = task.tag except AttributeError: tag = '' tag_exists = tag != '' parameters = [ self.app, self.name, task.leaseTimestamp, task.id, tag, tag_exists ] update_index.add(create_new_index, parameters) return self.db_access.session.execute_async(update_index)
def insert_newlog(log_file, table_name): count = 1 insert_query = session.prepare( "INSERT INTO " + table_name + " (host, id, datetime, path, bytes) VALUES (?, uuid(), ?, ?, ?)") batch = BatchStatement() for line in log_file: values = log_dissemble.split(line) if len( values ) >= 4: # Only consider lines which can be split as host, dtime, path, num_bytes host = values[1] dtime = datetime.datetime.strptime(values[2], '%d/%b/%Y:%H:%M:%S') path = values[3] num_bytes = int(values[4]) count += 1 batch.add(insert_query, (host, dtime, path, num_bytes)) if count == 300: session.execute(batch) batch.clear() count = 1 session.execute(batch) batch.clear()
def import_from_activity(self, filename): """ import from csv file to db """ f = open(filename) # read the csv fle and skip th next header csv_f = csv.reader(f) next(csv_f, None) # insert ActivityByUser insert_count_activity_by_user = self._db_cur.prepare("UPDATE COUNT_ACTIVITY_BY_USER SET counter = counter + 1 where date = ? and company = ? ") insert_activity_by_user = self._db_cur.prepare("INSERT INTO ACTIVITY_BY_USER (date, event, domain, id, url, user_id, datetime) VALUES(?, ?, ?, ?, ?, ?, ? )") batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM) for row in csv_f: if len(row) != 4: continue (user_id, event, url, time_stamp) = row domain = re.findall('https?://[^/]*',url, re.IGNORECASE) if len(domain) != 1: domain = "" else: domain = domain[0][7:] if domain[0] == '/': domain = domain[1:] # if not correct time format. continue if( not self.check_time(time_stamp)): continue try: dt = datetime.datetime.strptime(time_stamp, "%Y/%m/%d %H:%M:%S.%f") except ValueError: dt = datetime.datetime.strptime(time_stamp, "%Y/%m/%d %H:%M:%S") while len(batch) > self._batch: self._db_cur.execute(batch) batch.clear() bucket = dt.strftime("%Y/%m/%d") company = self.get_company(user_id) batch.add(insert_activity_by_user,(bucket, event, domain, uuid.uuid1(), url, int(user_id), dt)) self._db_cur.execute(insert_count_activity_by_user, (bucket, company)) self.update_event_by_company(event, company, bucket) if len(batch) != 0: self._db_cur.execute(batch)
def delivery(self, w, carrier): ti = time.time() self.dc += 1 s = self.s orderdc = {} for i in range(1, 40): #query = 'select o_id from o_carrier where w_id='+str(w)+' and d_id='+str(i)+' and o_carrier_id = 1 limit 1' rows = s.execute(self.d_s_carr, (w, i)) for row in rows: orderdc[i] = row.o_id #print "first query:",time.time()-ti ubatch = BatchStatement() d_date = datetime.datetime.now() obatch = BatchStatement() ltime = time.time() for d_id, o_id in orderdc.iteritems(): ubatch.add(self.d_u_carr, (carrier, w, d_id, o_id)) future = s.execute_async(self.d_s_order, (w, d_id, o_id)) try: tq = time.time() #print "Second query:",tq-ltime rows = future.result() c = 0 ol_amount = 0 ol_count = 0 for row in rows: ol_count = row.o_ol_cnt + 1 c = row.o_c_id ol_amount = ol_amount + int(row.ol_amount) for i in range(1, ol_count): ubatch.add(self.d_u_order, (d_date, w, d_id, o_id, i)) tq = time.time() s.execute_async(self.d_i_cbal, (ol_amount, w, d_id, c)) #print "third query ",time.time()-tq except Exception: out = "Time-out in delivery" print out tq = time.time() s.execute_async(ubatch) #print "Loop Time:",time.time()-ti self.dtime += time.time() - ti
def config_check(self): cluster = None configuration_df = pd.DataFrame() try: select_query = "SELECT algorithm_name,file_param_name,flag,blobAsText(file_content) as file_content,param_value,type from {}.{} WHERE " \ "algorithm_name = '{}' and flag=1 ALLOW FILTERING;".format(KEYSPACE, CONFIG_TABLE_NAME, ALGORITHM_NAME) session, cluster = self.connect_cassandra() latest_df = session.execute(select_query) configuration_df = pd.DataFrame(latest_df) # dff=pd.DataFrame(latest_df) if not configuration_df.empty: param_df = configuration_df[configuration_df['type'] == 1] config_files_df = configuration_df[configuration_df['type'] == 2] batch = BatchStatement() insert_user = session.prepare( "INSERT INTO {}.{} (algorithm_name, file_param_name,flag) VALUES (?,?,?)" .format(KEYSPACE, CONFIG_TABLE_NAME)) if not param_df.empty: param_list = list(param_df['file_param_name']) param_df = param_df.set_index('file_param_name') config_param_df = pd.read_csv( CONFIG_PARAM_FILE_PATH).set_index('Parameter') for param in param_list: config_param_df.loc[param, 'Value'] = param_df.loc[ param, 'param_value'] batch.add(insert_user, (ALGORITHM_NAME, param, 0)) config_param_df.to_csv(CONFIG_PARAM_FILE_PATH) if not config_files_df.empty: for row in config_files_df.itertuples(): df_json = json.loads(row[4]) df = pd.DataFrame.from_dict(df_json) filename = CONFIG_FOLDER_NAME + os.sep + row[2] df.to_csv(filename, index=False) batch.add(insert_user, (ALGORITHM_NAME, row[2], 0)) session.execute(batch) except Exception as ex: logger.error("Error while updating the config files %s", ex) finally: if cluster: cluster.shutdown()
def sendCassandra1(iter): print("send to cassandra") cluster = Cluster(cassandra_hosts) session = cluster.connect(CASSANDRA_KEYSPACE) session.set_keyspace("ks") insert_statement1 = session.prepare("INSERT INTO totalInputCountSecond (global_id,edit_time,count) VALUES (?,?,?)") batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM) count=0 for record in iter.collect(): batch.add(insert_statement1, ('a',record[0], record[1])) count += 1 if count % 500 == 0: session.execute(batch) batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM) session.execute(batch) session.shutdown()
def sendCassandra4(iter): print("send to cassandra") cluster = Cluster(cassandra_hosts) session = cluster.connect(CASSANDRA_KEYSPACE) session.set_keyspace("ks") insert_statement4 = session.prepare("INSERT INTO useravgactivity (username,count) VALUES (?,?)") batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM) count=0 for record in iter.collect(): batch.add(insert_statement4, (record[0], record[1])) count += 1 if count % 500 == 0: session.execute(batch) batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM) session.execute(batch) session.shutdown()
def handler_new(iters): session = CassandraConnector().cassandraConnection() batch = BatchStatement() val = 0 for record in iters: print("REC", record) insertQuery = "insert into adtracker ({0}) values('{1}','{2}','{3}','{4}','{5}','{6}'){}".format( optional_params, str(record['date']), str(record['phone_number']), str(record['imein']), str(record['credit_card']), str(record['ip']), record['ssn_number']) batch.add(SimpleStatement(insertQuery)) val = val + 1 if val > 20: session.execute(batch) batch = BatchStatement() val = 0 if val != 0: session.execute(batch)
def insert(cluster, keyspace, cql_stmt, generator, batch_size): session = cluster.connect(keyspace) session.default_timeout = 60 session.default_consistency_level = ConsistencyLevel.LOCAL_ONE prepared_stmt = session.prepare(cql_stmt) batch_stmt = BatchStatement() values = take(batch_size, generator) count = 0 while values: batch_stmt.add_all([prepared_stmt] * batch_size, values) session.execute(batch_stmt) values = take(batch_size, generator) batch_stmt.clear() if (count % 1e3) == 0: print('#blocks {:,.0f}'.format(count), end='\r') count += batch_size
def write_next_blocks(self, start_block): next_block = blockutil.hash_str(start_block) while next_block: block_json = blockutil.fetch_block_json(next_block) if "nextblockhash" in block_json.keys(): next_block, block, txs = blockutil.transform_json(block_json) batchStmt = BatchStatement() batchStmt.add(self.__insert_block_stmt, block) for transaction in txs: batchStmt.add(self.__insert_transaction_stmt, transaction) while True: try: self.__session.execute(batchStmt) except Exception as err: print("Exception ", err, " retrying...", end="\r") continue break print("Wrote block %d" % (block[0]), end="\r")
def read_files(inputs, table): record_counter = 0 batch_counter = 0 batch_insert = BatchStatement() insert_statement = session.prepare( "INSERT INTO " + table + " (host, id, datetime, path, bytes) VALUES (?, ?, ?, ?, ?)") # get all files in input folder for file in os.listdir(inputs): # unzip files with gzip.open(os.path.join(inputs, file), 'rt', encoding='utf-8') as logfile: # read file line by line for line in logfile: # create a tuple of requried fields log_object = separate_columns(line) # if log object is valid if (log_object is not None): record_counter += 1 batch_insert.add( insert_statement, (log_object[0], log_object[1], log_object[2], log_object[3], log_object[4])) # insert records when reached to declared batch size if (record_counter >= BATCH_SIZE): print("writing batch " + str(batch_counter)) session.execute(batch_insert) batch_insert.clear() record_counter = 0 batch_counter += 1 # to insert the final part with number of rows less than batch size if (record_counter > 0): print("writing final batch " + str((batch_counter + 1))) session.execute(batch_insert)
def batchDataInsert(session, data, table): #tableDict is a dictionary mapping between the table name that the user provides and the columns in that table #the columns will now be stored as text files with columns seperated by newlines in docs directory #any new tables add the mapping here #probably a better way to do this, feel free to change it #tableDict = {"pitches": settings.docDir + "all_pitch_test_columns.txt", # "pitches_all": settings.docDir + "all_db_columns.txt", # } # opens the column file from docs corresponding to the table you chose #with open(tableDict[table]) as f: # returns a list of columns from your table # read_data = f.read().splitlines() # generates a string like "col1,col2,col3...." #columns = ','.join(read_data) cols=data[0].keys() columns=','.join(cols) # generates the appropriate number of ? marks for the number of columns in your table quests = ('?,' * (len(cols) - 1)) + '?' prepareString = "INSERT INTO " + table + "(" + columns + ")" + "VALUES" + "(" + quests + ")" insert_data = session.prepare(prepareString) batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM) counter = 0 batchList = [] while counter < len(data): batch.add(insert_data.bind(data[counter])) #Keep batches short otherwise an error is thrown if counter % 50 == 0 and counter > 0: batchList.append(batch) batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM) if counter == len(data)-1: batchList.append(batch) counter += 1 for batches in batchList: session.execute(batches)
def _update_index(self, old_index, task): """ Updates the index table after leasing a task. Args: old_index: The row to remove from the index table. task: A Task object to create a new index entry for. """ old_eta = old_index.eta update_index = BatchStatement(retry_policy=self.db_access.retry_policy) delete_old_index = SimpleStatement(""" DELETE FROM pull_queue_tasks_index WHERE app = %(app)s AND queue = %(queue)s AND eta = %(eta)s AND id = %(id)s """) parameters = { 'app': self.app, 'queue': self.name, 'eta': old_eta, 'id': task.id } update_index.add(delete_old_index, parameters) create_new_index = SimpleStatement(""" INSERT INTO pull_queue_tasks_index (app, queue, eta, id, tag, tag_exists) VALUES (%(app)s, %(queue)s, %(eta)s, %(id)s, %(tag)s, %(tag_exists)s) """) parameters = { 'app': self.app, 'queue': self.name, 'eta': task.leaseTimestamp, 'id': task.id } try: parameters['tag'] = task.tag except AttributeError: parameters['tag'] = '' parameters['tag_exists'] = parameters['tag'] != '' update_index.add(create_new_index, parameters) self.db_access.session.execute(update_index)
def sendPartition(iter): cassandra_cluster = Cluster(['54.71.115.97', '35.166.209.97', '35.166.89.248', '34.218.167.77']) cassandra_session = cassandra_cluster.connect('oilwell') insert_statement = cassandra_session.prepare( "INSERT INTO well_pressure (id, dt, well_name, pressure_1, pressure_2, pressure_3, pressure_4) VALUES (?, ?, ?, ?, ?, ?, ?)") count = 0 # batch insert into cassandra database batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM) for record in iter.collect(): batch.add(insert_statement, (int(record[0]), str(record[1]), str(record[2]), int(record[3]), int(record[4]), int(record[5]), int(record[6]))) # split the batch, so that the batch will not exceed the size limit count += 1 if count % 300 == 0: cassandra_session.execute(batch) batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM) # send the batch that is less than 300 cassandra_session.execute(batch) cassandra_session.shutdown()
def test_batch_statement(self): session, tracer = self._traced_session() batch = BatchStatement() batch.add( SimpleStatement("INSERT INTO test.person_write (name, age, description) VALUES (%s, %s, %s)"), ("Joe", 1, "a"), ) batch.add( SimpleStatement("INSERT INTO test.person_write (name, age, description) VALUES (%s, %s, %s)"), ("Jane", 2, "b"), ) session.execute(batch) spans = tracer.pop() assert len(spans) == 1 s = spans[0] assert s.resource == "BatchStatement" assert s.get_metric("cassandra.batch_size") == 2 assert "test.person" in s.get_tag("cassandra.query")
def test_batch_statement(self): session, tracer = self._traced_session() batch = BatchStatement() batch.add( SimpleStatement('INSERT INTO test.person_write (name, age, description) VALUES (%s, %s, %s)'), ('Joe', 1, 'a'), ) batch.add( SimpleStatement('INSERT INTO test.person_write (name, age, description) VALUES (%s, %s, %s)'), ('Jane', 2, 'b'), ) session.execute(batch) spans = tracer.pop() assert len(spans) == 1 s = spans[0] assert s.resource == 'BatchStatement' assert s.get_metric('cassandra.batch_size') == 2 assert 'test.person' in s.get_tag('cassandra.query')
def write_next_blocks(self, start_block): next_block = blockutil.hash_str(start_block) while next_block: block_json = blockutil.fetch_block_json(next_block) next_block, block, txs = blockutil.transform_json(block_json) batchStmt = BatchStatement() batchStmt.add(self.__insert_block_stmt, block) block_group = block[0] // 10000 tx_number = 0 for transaction in txs: batchStmt.add(self.__insert_transaction_stmt, [block_group, tx_number] + transaction) tx_number += 1 while True: try: self.__session.execute(batchStmt) except Exception as err: print("Exception ", err, " retrying...", end="\r") continue break print("Wrote block %d" % (block[0]), end="\r")
def batch_execute_prepared_sqls(self, sqls, parameters, consistency_level=None, keyspace=None): """ 不同同操作(语义)的SQL,批量执行,比如批量插入和更新等 :sqls 执行Prepared Statements 的Sql,是一个List,每个元素是一条SQL操作 :parameters 是一个List,元素是也是一个list或者tuple """ if len(sqls) != len(parameters): return False session = self.get_session(keyspace) if consistency_level: batch = BatchStatement(consistency_level=consistency_level) else: batch = BatchStatement(consistency_level=ConsistencyLevel.LOCAL_QUORUM) for sql, paras in zip(sqls, parameters): batch.add(SimpleStatement(sql), paras) return session.execute(batch)
def save_async(self, trades): """ :type entry_type: str :type quote: RTQuote """ query = \ """ INSERT INTO trades ({}) VALUES ({}) """.format(','.join(FIELDS_Trades), ','.join("%s" for _ in FIELDS_Trades)) batch_statement = BatchStatement() for i, trade in trades.iterrows(): data = tuple(trade[field] for field in FIELDS_Trades) batch_statement.add(query,data) if len(batch_statement)>= MAX_BATCH_SIZE: get_async_manager().execute_async(self._session,batch_statement) batch_statement = BatchStatement() if len(batch_statement) > 0: get_async_manager().execute_async(self._session,batch_statement)
def main(): row_count = 100000 max_insert = 10 log.info('truncate table') session.execute('truncate ooi.vel3d_k_wfp_instrument') log.info('done truncating') log.info('generating row data') rows = create_rows(row_count) now = time.time() batches = [] batch = BatchStatement() for i, row in enumerate(rows): if (i + 1) % max_insert == 0: batches.append((batch, [])) batch = BatchStatement() batch.add(insert, row) batches.append((batch, [])) log.info('inserting') execute_concurrent(session, batches, concurrency=50) log.info('%d rows: %7.2f sec elapsed', row_count, time.time() - now)
def insert(cls, params): idx_start, idx_end = params batch_size = 25 batch_stmt = BatchStatement() for index in range(idx_start, idx_end, batch_size): curr_batch_size = min(batch_size, idx_end - index) for i in range(0, curr_batch_size): block = cls.chain[index + i] block_tx = [block.height, [tx_stats(x) for x in block.txes]] batch_stmt.add(cls.prepared_stmt, block_tx) try: cls.session.execute(batch_stmt) except Exception as e: # ingest single blocks batch ingest fails # (batch too large error) print(e) for i in range(0, curr_batch_size): while True: try: block = cls.chain[index + i] block_tx = [ block.height, [tx_stats(x) for x in block.txes] ] cls.session.execute(cls.prepared_stmt, block_tx) except Exception as e: print(e) continue break batch_stmt.clear() with cls.counter.get_lock(): cls.counter.value += curr_batch_size print('#blocks {:,.0f}'.format(cls.counter.value), end='\r')
def main(inputs, key_space, table): cluster = Cluster(['199.60.17.188', '199.60.17.216']) session = cluster.connect(key_space) session.execute(""" CREATE TABLE IF NOT EXISTS nasalogs ( host TEXT, datetime TIMESTAMP, path TEXT, bytes INT, recId UUID, PRIMARY KEY (host,recId) ) """) session.execute("""TRUNCATE nasalogs;""") insert_log = session.prepare( "INSERT INTO " + table + " (host,datetime,path,bytes,recId) VALUES (?,?,?,?,?)") batch = BatchStatement(consistency_level=ConsistencyLevel.ONE) c = 0 for g_file in os.listdir(inputs): with gzip.open(os.path.join(inputs, g_file), 'rt', encoding='utf-8') as logfile: for line in logfile: w = get_words(line) if len(w) > 4: c += 1 batch.add( insert_log, (w[1], datetime.datetime.strptime(w[2], '%d/%b/%Y:%H:%M:%S'), w[3], int(w[4]), uid())) if (c == 400): session.execute(batch) batch.clear() c = 0 session.execute(batch) cluster.shutdown()
def save_async(self, quotes): """ :type entry_type: str :type quote: RTQuote """ batch_statement = BatchStatement() for quote in quotes: query = \ """ INSERT INTO quotes ({}) VALUES ({}) """.format(','.join(quote.keys()), ','.join("%s" for _ in quote.keys())) data = tuple(quote[field] for field in quote.keys()) batch_statement.add(query,data) if len(batch_statement)>= MAX_BATCH_SIZE: get_async_manager().execute_async(self._session,batch_statement) batch_statement = BatchStatement() if len(batch_statement) > 0: get_async_manager().execute_async(self._session,batch_statement)