def set_latest_batch_time(vineyard_id, hub_id, batch_sent, hub_data):
    """
    Inserts timestamp for latest data, received from a hub, into the database.
    """

    table = os.environ.get('DB_HW_TABLE')
    parameters = {
        'vineid': vineyard_id,
        'hubid': hub_id,
        'lasthubbatchsent': batch_sent,
    }
    query = (
        'UPDATE {} SET lasthubbatchsent=? '
        'WHERE vineid=? AND hubid=? AND nodeid=?;'
    )
    prepared_statement = session.prepare(
        query.format(table)
    )
    batch_statement = BatchStatement()

    try:
        for data_point in hub_data:
            parameters['nodeid'] = int(data_point['node_id'])
            batch_statement.add(
                prepared_statement,
                parameters
            )
        session.execute(batch_statement)
        return True
    # Unknown exception
    except Exception as e:
        raise Exception('Transaction Error Occurred: '.format(str(e)))
  def put_entities_tx(self, app, txid, entities):
    """ Update transaction metadata with new put operations.

    Args:
      app: A string containing an application ID.
      txid: An integer specifying the transaction ID.
      entities: A list of entities that will be put upon commit.
    """
    batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM,
                           retry_policy=BASIC_RETRIES)
    insert = self.session.prepare("""
      INSERT INTO transactions (txid_hash, operation, namespace, path, entity)
      VALUES (?, ?, ?, ?, ?)
      USING TTL {ttl}
    """.format(ttl=dbconstants.MAX_TX_DURATION * 2))

    for entity in entities:
      args = (tx_partition(app, txid),
              TxnActions.MUTATE,
              entity.key().name_space(),
              bytearray(entity.key().path().Encode()),
              bytearray(entity.Encode()))
      batch.add(insert, args)

    try:
      self.session.execute(batch)
    except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
      message = 'Exception while putting entities in a transaction'
      logging.exception(message)
      raise AppScaleDBConnectionError(message)
Exemple #3
0
def store_qc_results(qc_results_values, pk, particle_ids, particle_bins, particle_deploys,
                     param_name, strict_range=False):
    start_time = time.clock()
    if engine.app.config['QC_RESULTS_STORAGE_SYSTEM'] == 'cass':
        log.info('Storing QC results in Cassandra.')
        insert_results = SessionManager.prepare(
                "insert into ooi.qc_results "
                "(subsite, node, sensor, bin, deployment, stream, id, parameter, results) "
                "values (?, ?, ?, ?, ?, ?, ?, ?, ?)")

        batch = BatchStatement()
        for (qc_results, particle_id, particle_bin, particle_deploy) in izip(qc_results_values, particle_ids,
                                                                             particle_bins, particle_deploys):
            batch.add(insert_results, (pk.get('subsite'), pk.get('node'), pk.get('sensor'),
                                       particle_bin, particle_deploy, pk.get('stream'),
                                       uuid.UUID(particle_id), param_name, str(qc_results)))
        SessionManager.session().execute_async(batch)
        log.info("QC results stored in {} seconds.".format(time.clock() - start_time))
    elif engine.app.config['QC_RESULTS_STORAGE_SYSTEM'] == 'log':
        log.info('Writing QC results to log file.')
        qc_log = logging.getLogger('qc.results')
        qc_log_string = ""
        for (qc_results, particle_id, particle_bin, particle_deploy) in izip(qc_results_values, particle_ids,
                                                                             particle_bins, particle_deploys):
            qc_log_string += "refdes:{0}-{1}-{2}, bin:{3}, stream:{4}, deployment:{5}, id:{6}, parameter:{7}, qc results:{8}\n" \
                .format(pk.get('subsite'), pk.get('node'), pk.get('sensor'), particle_bin,
                        pk.get('stream'), particle_deploy, particle_id, param_name, qc_results)
        qc_log.info(qc_log_string[:-1])
        log.info("QC results stored in {} seconds.".format(time.clock() - start_time))
    else:
        log.info("Configured storage system '{}' not recognized, qc results not stored.".format(
                engine.app.config['QC_RESULTS_STORAGE_SYSTEM']))
Exemple #4
0
    def _delete_task_and_index(self, task):
        """ Deletes a task and its index atomically.

    Args:
      task: A Task object.
    """
        batch_delete = BatchStatement(retry_policy=self.db_access.retry_policy)

        delete_task = SimpleStatement(
            """
      DELETE FROM pull_queue_tasks
      WHERE app = %(app)s AND queue = %(queue)s AND id = %(id)s
    """
        )
        parameters = {"app": self.app, "queue": self.name, "id": task.id}
        batch_delete.add(delete_task, parameters=parameters)

        delete_task_index = SimpleStatement(
            """
      DELETE FROM pull_queue_tasks_index
      WHERE app = %(app)s
      AND queue = %(queue)s
      AND eta = %(eta)s
      AND id = %(id)s
    """
        )
        parameters = {"app": self.app, "queue": self.name, "eta": task.get_eta(), "id": task.id}
        batch_delete.add(delete_task_index, parameters=parameters)

        self.db_access.session.execute(batch_delete)
    def __insert(self, models, if_not_exists=None):
        assert models, 'You can insert nothing, what good would that do?'

        serial_consistency_level = None
        if if_not_exists:
            serial_consistency_level = ConsistencyLevel.SERIAL

        batch = BatchStatement(serial_consistency_level=serial_consistency_level)

        for model in models:
            self.__validate_model(model)

            model._pre_put_hook()
            metadata = self._get_table_metadata(model.table())

            fields = self.denormalize(model)
            fields.update(self.construct_primary_key(model, metadata))

            cql_qry = CassandraQuery(model.query()).insert(fields)
            if if_not_exists:
                cql_qry.if_not_exists()
            batch.add(cql_qry.statement, parameters=cql_qry.condition_values)
        self._execute_batch(batch)

        for model in models:
            model._post_put_hook()

            edges = self.find_edges(model)
            if edges:
                self.set_edges_for_model(model, edges)
        if len(models) == 1:
            return models[0]
        return models
    def test_unicode(self):
        """
        Test to validate that unicode query strings are handled appropriately by various query types

        @since 3.0.0
        @jira_ticket PYTHON-334
        @expected_result no unicode exceptions are thrown

        @test_category query
        """

        unicode_text = u"Fran\u00E7ois"
        batch = BatchStatement(BatchType.LOGGED)
        batch.add(
            u"INSERT INTO {0}.{1} (k, v) VALUES (%s, %s)".format(self.keyspace_name, self.function_table_name),
            (0, unicode_text),
        )
        self.session.execute(batch)
        self.session.execute(
            u"INSERT INTO {0}.{1} (k, v) VALUES (%s, %s)".format(self.keyspace_name, self.function_table_name),
            (0, unicode_text),
        )
        prepared = self.session.prepare(
            u"INSERT INTO {0}.{1} (k, v) VALUES (?, ?)".format(self.keyspace_name, self.function_table_name)
        )
        bound = prepared.bind((1, unicode_text))
        self.session.execute(bound)
def save_eod_data_to_db(data):
    """
    Saves Data to Cassandra in row batches
    """
    logger.info('Received Data: {}'.format(data['ticker']))

    q = """
        INSERT INTO historical_data (exchange, ticker, eoddate, open, close, high, low, volume)
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s);
      """

    session = cluster.connect('test')

    df = data['df'].dropna()
    for df in np.array_split(df, len(df) / 200):
        batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM)

        for eoddate, row in df.iterrows():
            batch.add(q, [data['exchange'],
                          data['ticker'],
                          str(eoddate),
                          row['Open'],
                          row['Close'],
                          row['High'],
                          row['Low'],
                          int(row['Volume'])]
                      )

        session.execute(batch)

    logger.info('Done')
  def _normal_batch(self, mutations):
    """ Use Cassandra's native batch statement to apply mutations atomically.

    Args:
      mutations: A list of dictionaries representing mutations.
    """
    self.logger.debug('Normal batch: {} mutations'.format(len(mutations)))
    batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM,
                           retry_policy=self.retry_policy)
    prepared_statements = {'insert': {}, 'delete': {}}
    for mutation in mutations:
      table = mutation['table']
      if mutation['operation'] == TxnActions.PUT:
        if table not in prepared_statements['insert']:
          prepared_statements['insert'][table] = self.prepare_insert(table)
        values = mutation['values']
        for column in values:
          batch.add(
            prepared_statements['insert'][table],
            (bytearray(mutation['key']), column, bytearray(values[column]))
          )
      elif mutation['operation'] == TxnActions.DELETE:
        if table not in prepared_statements['delete']:
          prepared_statements['delete'][table] = self.prepare_delete(table)
        batch.add(
          prepared_statements['delete'][table],
          (bytearray(mutation['key']),)
        )

    try:
      self.session.execute(batch)
    except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
      message = 'Exception during batch_mutate'
      logging.exception(message)
      raise AppScaleDBConnectionError(message)
  def insert(self, events):
    if not events:
      return
    batch_stmt = BatchStatement(batch_type=BatchType.UNLOGGED,
                                consistency_level=ConsistencyLevel.QUORUM)

    shard_idx = {}
    for _id, event in events:
      shard_time = round_down(event[TIMESTAMP_FIELD], self.width)
      shard = shard_idx.get(shard_time,
                            random.randint(0, self.shards - 1))

      # Insert to index.
      try:
        self.index_cache.get((shard_time, shard))
      except KeyError:
        batch_stmt.add(BoundStatement(self.namespace.INDEX_INSERT_STMT,
                                      routing_key=self.stream,
                                      consistency_level=ConsistencyLevel.QUORUM)
                       .bind((self.stream, shard_time, self.width, shard)))
        self.index_cache.set((shard_time, shard), None)

      # Insert to stream.
      shard_key = StreamShard.get_key(self.stream, shard_time, shard)
      batch_stmt.add(BoundStatement(self.namespace.INSERT_STMT,
                                    routing_key=shard_key,
                                    consistency_level=ConsistencyLevel.QUORUM)
                     .bind((shard_key,
                            _id,
                            marshal.dumps(event))))
      shard_idx[shard_time] = (shard + 1) % self.shards  # Round robin.

    self.session.execute(batch_stmt)
    def save_result(self, check_uuid: str, trigger: dict,
                    scheduled_time: object,
                    execution_time: object,
                    insertion_time: object,
                    alert_sent: bool,
                    service_id: str,
                    **kwargs):
        """Save check result.

        :param check_uuid: check uuid
        :param dict trigger: trigger definition
        :param scheduled_time: time when check was scheduled
        :param execution_time: time when worker finished execution
        :param insertion_time: time when check was analyzed by alerter
        :param alert_sent: indicates when alert was sent to AER
        :param service_id: service id
        """
        batch = BatchStatement()
        args = (
            UUID(check_uuid), UUID(trigger['uuid']),
            scheduled_time, execution_time, insertion_time,
            alert_sent,
            trigger['result']['status'],
            trigger['result']['message'],
            service_id,
            kwargs
        )
        batch.add(self._save_query_stmt, args)
        batch.add(self._save_service_query_stmt, args)

        self._session.execute(batch)
def insert_rows(starting_partition, ending_partition, rows_per_partition, counter, counter_lock):
    cluster = Cluster(['127.0.0.1'], load_balancing_policy=TokenAwarePolicy(RoundRobinPolicy()))
    try:
        session = cluster.connect('ks')
        try:
            statement = session.prepare('INSERT INTO tbl (a, b, c, d) VALUES (?, ?, ?, ?)')
            for partition_key in xrange(starting_partition, ending_partition):
                batch = None
                batch_size = 0
                for cluster_column in xrange(rows_per_partition):
                    if batch is None:
                        batch = BatchStatement(batch_type=BatchType.UNLOGGED)
                    value1 = random.randint(1, 1000000)
                    value2 = random.randint(1, 1000000)
                    batch.add(statement, [partition_key, cluster_column, value1, value2])
                    batch_size += 1
                    if (batch_size == MAX_BATCH_SIZE) or (cluster_column + 1 == rows_per_partition):
                        with counter_lock:
                            counter.value += batch_size
                        session.execute(batch)
                        batch = None
                        batch_size = 0
        finally:
            session.shutdown()
    finally:
        cluster.shutdown()
  def record_reads(self, app, txid, group_keys):
    """ Keep track of which entity groups were read in a transaction.

    Args:
      app: A string specifying an application ID.
      txid: An integer specifying a transaction ID.
      group_keys: An iterable containing Reference objects.
    """
    batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM,
                           retry_policy=BASIC_RETRIES)
    insert = self.session.prepare("""
      INSERT INTO transactions (txid_hash, operation, namespace, path)
      VALUES (?, ?, ?, ?)
      USING TTL {ttl}
    """.format(ttl=dbconstants.MAX_TX_DURATION * 2))

    for group_key in group_keys:
      if not isinstance(group_key, entity_pb.Reference):
        group_key = entity_pb.Reference(group_key)

      args = (tx_partition(app, txid),
              TxnActions.GET,
              group_key.name_space(),
              bytearray(group_key.path().Encode()))
      batch.add(insert, args)

    try:
      yield self.tornado_cassandra.execute(batch)
    except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
      message = 'Exception while recording reads in a transaction'
      logger.exception(message)
      raise AppScaleDBConnectionError(message)
  def delete_entities_tx(self, app, txid, entity_keys):
    """ Update transaction metadata with new delete operations.

    Args:
      app: A string containing an application ID.
      txid: An integer specifying the transaction ID.
      entity_keys: A list of entity keys that will be deleted upon commit.
    """
    batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM,
                           retry_policy=BASIC_RETRIES)
    insert = self.session.prepare("""
      INSERT INTO transactions (txid_hash, operation, namespace, path, entity)
      VALUES (?, ?, ?, ?, ?)
      USING TTL {ttl}
    """.format(ttl=dbconstants.MAX_TX_DURATION * 2))

    for key in entity_keys:
      # The None value overwrites previous puts.
      args = (tx_partition(app, txid),
              TxnActions.MUTATE,
              key.name_space(),
              bytearray(key.path().Encode()),
              None)
      batch.add(insert, args)

    try:
      yield self.tornado_cassandra.execute(batch)
    except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
      message = 'Exception while deleting entities in a transaction'
      logger.exception(message)
      raise AppScaleDBConnectionError(message)
Exemple #14
0
def _upsert_users(keyspace):
    try:
        _cluster = Cluster(contact_points=CASSANDRA_CLUSTER, port=9042)
        _session = _cluster.connect()

        _session.set_keyspace(keyspace)
        _session.row_factory = dict_factory

        _add_users = [
            {'userid': 'dummy1', 'first_name': '11', 'last_name': 'dummy', 'emails': set('a')},
            {'userid': 'dummy2', 'first_name': '22', 'last_name': 'dummy', 'emails': set('b')}
        ]
        
        _prepare_insert = _session.prepare(
            "INSERT INTO users (userid, first_name, last_name, emails) VALUES (?, ?, ?, ?)")
            # "INSERT INTO users (userid, first_name, emails) VALUES (?, ?, ?)")

        _batch = BatchStatement(consistency_level=0)
        for user in _add_users:
            print(user)
            _batch.add(_prepare_insert, [ user['userid'], user['first_name'], user['last_name'], user['emails'] ])
            # _batch.add(_prepare_insert, [ user['userid'], user['first_name'], user['emails'] ])
        _session.execute(_batch)
        
    except:
        print('EXCEPT insert: {}({})'.format(sys.exc_info()[0], sys.exc_info()[1]))
    else:
        _session.shutdown()
    def _normal_batch(self, mutations):
        """ Use Cassandra's native batch statement to apply mutations atomically.

    Args:
      mutations: A list of dictionaries representing mutations.
    """
        self.logger.debug("Normal batch: {} mutations".format(len(mutations)))
        batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM, retry_policy=self.retry_policy)
        prepared_statements = {"insert": {}, "delete": {}}
        for mutation in mutations:
            table = mutation["table"]
            if mutation["operation"] == TxnActions.PUT:
                if table not in prepared_statements["insert"]:
                    prepared_statements["insert"][table] = self.prepare_insert(table)
                values = mutation["values"]
                for column in values:
                    batch.add(
                        prepared_statements["insert"][table],
                        (bytearray(mutation["key"]), column, bytearray(values[column])),
                    )
            elif mutation["operation"] == TxnActions.DELETE:
                if table not in prepared_statements["delete"]:
                    prepared_statements["delete"][table] = self.prepare_delete(table)
                batch.add(prepared_statements["delete"][table], (bytearray(mutation["key"]),))

        try:
            self.session.execute(batch)
        except (cassandra.Unavailable, cassandra.Timeout, cassandra.CoordinationFailure, cassandra.OperationTimedOut):
            message = "Exception during batch_mutate"
            logging.exception(message)
            raise AppScaleDBConnectionError(message)
def insert_data():
    # Example showing how to read data from a csv and insert into Cassandra using a prepared statement

    insert_statement = session.prepare("INSERT INTO sensor_data.raw (device, sensor, time, metric) VALUES (?,?,?,?)")

    f = open("rows_input.csv")
    rows = f.read().splitlines()
    f.close()

    rows_inserted = 0

    # Example showing how to use Cassandra batched insert
    batch_insert = BatchStatement(consistency_level=ConsistencyLevel.QUORUM)

    for row in rows:
        data = row.split(",")
        values = [data[0], data[1], datetime.datetime.strptime(data[2], "%Y-%m-%d %H:%M:%S"), float(data[3])]
        insert = insert_statement.bind(values)
        batch_insert.add(insert)

        rows_inserted += 1

        if rows_inserted % BATCH_SIZE == 0:
            session.execute_async(batch_insert)

    print "Inserted %d rows" % rows_inserted

    return rows_inserted
Exemple #17
0
    def test_simple_statements(self):
        batch = BatchStatement(BatchType.LOGGED)
        for i in range(10):
            batch.add(SimpleStatement("INSERT INTO test3rf.test (k, v) VALUES (%s, %s)"), (i, i))

        self.session.execute(batch)
        self.session.execute_async(batch).result()
        self.confirm_results()
Exemple #18
0
def update_gene_summary_w_cancer_census(session, genes):
    update_qry = "UPDATE gene_summary SET in_cosmic_census = ? "
    update_qry += " WHERE gene = ? and chrom = ?"
    query = session.prepare(update_qry)
    batch = BatchStatement()
    for gene in genes:
        batch.add(query, gene)
    session.execute(batch)
Exemple #19
0
 def test_rk_from_simple(self):
     """
     batch routing key is inherited from SimpleStatement
     """
     batch = BatchStatement()
     batch.add(self.simple_statement)
     self.assertIsNotNone(batch.routing_key)
     self.assertEqual(batch.routing_key, self.simple_statement.routing_key)
 def test_add_all(self):
     batch = BatchStatement()
     statements = ['%s'] * 10
     parameters = [(i,) for i in range(10)]
     batch.add_all(statements, parameters)
     bound_statements = [t[1] for t in batch._statements_and_parameters]
     str_parameters = [str(i) for i in range(10)]
     self.assertEqual(bound_statements, str_parameters)
Exemple #21
0
def do_update(cluster, target_db, schema_file, record_size, start_record, batch_size, insert_percentage, delay, batch_count, replication_factor=3, suppress_output=False):
    record_size = int(record_size)
    start_record = int(start_record)
    batch_size = int(batch_size)
    insert_percentage = int(insert_percentage)
    delay = float(delay) / 1000
    batch_count = int(batch_count)
    nr_batch = 0

    random.seed(1)

    ks_name, cf_name = getKSCFNames(target_db)
    if ks_name == None or cf_name == None:
        return

    createKeyspace(cluster, ks_name, replication_factor=replication_factor)
    createTable(cluster, ks_name, cf_name, schema_file)

    ts = TestSchema(cluster, ks_name, cf_name)
    ts.getSchema()

    while True:
        if ts.counter_table:
            batch = BatchStatement(batch_type = BatchType.COUNTER)
        else:
            batch = BatchStatement()
        stat_str = ''
        for i in range(batch_size):
            if start_record <= 0 or random.randrange(100) <= insert_percentage:
                # insert case
                record_num = start_record
                query = ts.getInsertQuerywithRandomData(record_num, record_size)
                stat_str += 'I(%d) ' % record_num

            else:
                record_num = random.randrange(0, start_record)
                if random.randrange(100) <= 70:  # 70% update
                    if not ts.counter_table:
                        query = ts.getUpdateQuery(record_num)
                    else:
                        query = ts.getInsertQuerywithRandomData(record_num, 0)

                    stat_str += 'U(%d) ' % record_num
                else:                           # 30% deletion
                    query = ts.getDeleteQuery(record_num)
                    stat_str += 'D(%d) ' % record_num
            if not suppress_output:
                report(stat_str)
            batch.add(query)
            start_record += 1

        #print stat_str
        cluster.session.execute(batch)
        nr_batch += 1
        if nr_batch == batch_count:
            if batch_count >= 0:
                break
        time.sleep(delay)
def copy_dynamic_table(table):
    c_ft = pycassa.ColumnFamily(pool, table)
    table_rows = (c_ft.get_range(include_ttl=True))

    command = "INSERT INTO {0} ("
    table_columns = []
    values = ''
    keyCount = 0
    for t in table_list:
        if(t.table_name == table):
            table_columns = t.columns
            for column in t.columns:
                if keyCount == 0:
                    command += column.name
                    values = ') VALUES (?'
                else:
                    command += ', ' + column.name
                    values += ', ?'
                if (column.key == True):
                    keyCount += 1
            break
    command += values + ') USING TTL ?'
    insert_flow = client.session.prepare(command.format(table))

    batch = BatchStatement()
    num_rows_in_batch = 0
    rowKey = ''
    for rowKey,columns in table_rows:
        key_value = []
        rowKey_size = 0
        if isinstance(rowKey, tuple):
            for key in range(0, len(list(rowKey))):
                key_value.append(_convert_to_cql_data(table_columns, rowKey_size, rowKey, rowKey_size))
                rowKey_size += 1
            if(t.is_index_table == True):
                key_value.append(random.randrange(0, 16))
        else:
            key_value.append(_convert_to_cql_data(table_columns, rowKey_size, rowKey, rowKey_size))
            rowKey_size += 1
        for key in columns.keys():
            value, ttl = columns.get(key)
            jsonValue = value
            if is_json(value) != True:
                jsonValue = json.dumps(value, ensure_ascii=False)
            column_value = copy.deepcopy(key_value)
            num_rows_in_batch += 1
            if(num_rows_in_batch % args.max_batch_entry_count == 0):
                num_rows_in_batch = 0
                client.load_data(batch)
                batch = BatchStatement()
            for col_num in range(rowKey_size, len(table_columns)-1):
                column_value.append(_convert_to_cql_data(table_columns, col_num, key, col_num-rowKey_size))
            column_value.append(jsonValue)
            column_value.append(ttl if ttl else 7200)

            batch.add(insert_flow, tuple(column_value))
    if(rowKey != ''):
        client.load_data(batch)
Exemple #23
0
 def test_rk_from_bound(self):
     """
     batch routing key is inherited from BoundStatement
     """
     bound = self.prepared.bind((1, None))
     batch = BatchStatement()
     batch.add(bound)
     self.assertIsNotNone(batch.routing_key)
     self.assertEqual(batch.routing_key, bound.routing_key)
def sendPartition(iter):
  cluster = Cluster(['hdp-master','hdp-slave1','hdp-slave2','hdp-slave3'])
  session = cluster.connect('coursera')
  insert = session.prepare("INSERT INTO destination_rank_by_airport (airport, destination_rank) VALUES (?, ?)")
  batch = BatchStatement()
  for record in iter:
    batch.add(insert, record)
  session.execute(batch)
  cluster.shutdown()  
 def _assert_invalid_request(self, session, insert_cql, value):
     """ Perform two executions of the supplied statement, as a
     single statement and again as part of a batch
     """
     prepared = session.prepare(insert_cql)
     self._execute_and_fail(lambda: session.execute(prepared, [value]), insert_cql)
     batch = BatchStatement()
     batch.add(prepared, [value])
     self._execute_and_fail(lambda: session.execute(batch), insert_cql)
    def test_clear_empty(self):
        batch = BatchStatement()
        batch.clear()
        self.assertFalse(batch._statements_and_parameters)
        self.assertIsNone(batch.keyspace)
        self.assertIsNone(batch.routing_key)
        self.assertFalse(batch.custom_payload)

        batch.add('something')
def sendPartition(iter):
  cluster = Cluster(['hdp-master','hdp-slave1','hdp-slave2','hdp-slave3'])
  session = cluster.connect('coursera')
  insert = session.prepare("INSERT INTO source_destination_mean_delay (source, destination, mean_arrival_delay) VALUES (?, ?, ?)")
  batch = BatchStatement()
  for record in iter:
    batch.add(insert, record)
  session.execute(batch)
  cluster.shutdown()  
Exemple #28
0
def read_callback(rows):
    global w
    batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM, batch_type=BatchType.UNLOGGED)
    for row in rows:
        if row.ttl_value:
            batch.add(insert_ttl, (row.key, row.column1, row.value, row.ttl_value))
        else:
            batch.add(insert, (row.key, row.column1, row.value))
    w = dst_session.execute_async(batch)
    w.add_callbacks(callback=write_callback, callback_kwargs={'rows': len(rows)}, errback=write_errback)
Exemple #29
0
    def test_bound_statements(self):
        prepared = self.session.prepare("INSERT INTO test3rf.test (k, v) VALUES (?, ?)")

        batch = BatchStatement(BatchType.LOGGED)
        for i in range(10):
            batch.add(prepared.bind((i, i)))

        self.session.execute(batch)
        self.session.execute_async(batch).result()
        self.confirm_results()
 def test_rk_from_simple(self):
     """
     batch routing key is inherited from SimpleStatement
     """
     self.cluster = Cluster(protocol_version=PROTOCOL_VERSION)
     self.session = self.cluster.connect()
     batch = BatchStatement()
     batch.add(self.simple_statement)
     self.assertIsNotNone(batch.routing_key)
     self.assertEqual(batch.routing_key, self.simple_statement.routing_key)
def execute_batch():
    query = "INSERT INTO data (device_id, data_source_id, time_upload, value) VALUES (?, ?, ?, ?) IF NOT EXISTS"
    batch = BatchStatement()
    prepared = session.prepare(query)
    batch_size = 0
    # send each 50k values in batches
    for d in dates_data:
        batch.add(prepared, (device_id, data_source_id, d[0], str(d[1])))
        batch_size += 1
        if batch_size >= 25_000:
            res = session.execute(batch)
            print('values sent', str(batch_size))
            batch.clear()
            batch_size = 0
    # send remaining values
    if batch_size > 0:
        res = session.execute(batch)
        print('values sent', str(batch_size))
        batch.clear()
        batch_size = 0
def insert_author(minuteslot, records):
    sorted_r = sorted(records.items(), key=itemgetter(1), reverse=True)[:20]
    count = 0
    batch = BatchStatement()
    for author, count in sorted_r:
        batch.add(cql_author_stmt, (minuteslot, author, count))
        count += 1
        if (count == 50):
            session.execute(batch)
            count = 0
            batch = BatchStatement()

    if (count > 0):
        session.execute(batch)
 def execute_in_batch(self, session, reader, query, extract_data,
                      get_params):
     batch = BatchStatement()
     query_count = 0
     for line in itertools.islice(reader, self.ROW_COUNT):
         # Store data to be used by other methods
         extract_data(self, line)
         batch.add(query, get_params(self, line))
         query_count = query_count + 1
         if query_count % self.BATCH_SIZE == 0:
             session.execute(batch)
             batch = BatchStatement()
     # Execute last batch
     session.execute(batch)
Exemple #34
0
    def test_batch_statement(self):
        session, writer = self._traced_session()

        batch = BatchStatement()
        batch.add(SimpleStatement("INSERT INTO test.person (name, age, description) VALUES (%s, %s, %s)"), ("Joe", 1, "a"))
        batch.add(SimpleStatement("INSERT INTO test.person (name, age, description) VALUES (%s, %s, %s)"), ("Jane", 2, "b"))
        session.execute(batch)

        spans = writer.pop()
        eq_(len(spans), 1)
        s = spans[0]
        eq_(s.resource, 'BatchStatement')
        eq_(s.get_metric('cassandra.batch_size'), 2)
        assert 'test.person' in s.get_tag('cassandra.query')
    def test_clear(self):
        keyspace = 'keyspace'
        routing_key = 'routing_key'
        custom_payload = {'key': six.b('value')}

        ss = SimpleStatement('whatever', keyspace=keyspace, routing_key=routing_key, custom_payload=custom_payload)

        batch = BatchStatement()
        batch.add(ss)

        self.assertTrue(batch._statements_and_parameters)
        self.assertEqual(batch.keyspace, keyspace)
        self.assertEqual(batch.routing_key, routing_key)
        self.assertEqual(batch.custom_payload, custom_payload)

        batch.clear()
        self.assertFalse(batch._statements_and_parameters)
        self.assertIsNone(batch.keyspace)
        self.assertIsNone(batch.routing_key)
        self.assertFalse(batch.custom_payload)

        batch.add(ss)
Exemple #36
0
def main():
    #Defining input directory, keyspace and table name
    MAX_LINES = 500
    temp_line = 0
    inputs = sys.argv[1]
    keyspace = sys.argv[2]
    table_name = sys.argv[3]

    #Cluster configuration
    cluster = Cluster(['199.60.17.136', '199.60.17.173'])
    session = cluster.connect(keyspace)
    session.execute('USE %s;' % keyspace)

    #Defining the query for inserting values into table nasalogs
    insert_query = session.prepare(
        "INSERT INTO %s (host, datetime, path, bytes) VALUES (?, ?, ?, ?);" %
        table_name)
    linere = re.compile(
        "^(\\S+) - - \\[(\\S+) [+-]\\d+\\] \"[A-Z]+ (\\S+) HTTP/\\d\\.\\d\" \\d+ (\\d+)$"
    )

    for f in os.listdir(inputs):
        with gzip.GzipFile(os.path.join(inputs, f)) as logfile:
            batch = BatchStatement()
            for line in logfile:
                #splitting the row data as per the regular expression
                single_row = linere.split(line)

                #retrieving required values in the specific format as host,datetime,path and bytes
                if len(single_row) == 6:
                    host = single_row[1]
                    #stripping date-time to its format
                    date_time = dt.datetime.strptime(single_row[2],
                                                     '%d/%b/%Y:%H:%M:%S')
                    path = single_row[3]
                    bytes_transferred = single_row[4]

                #packaging multiple insert queries into one batch statement
                if temp_line <= MAX_LINES:
                    temp_line += 1
                    batch.add(insert_query,
                              [host, date_time, path,
                               int(bytes_transferred)])
                    session.execute(batch)

                if temp_line == MAX_LINES:
                    #checking batch threshold and clearing the batch when it meets the threshold
                    batch.clear()
                    temp_line = 0
def load_businessindex(session, csv_path):
    log.info("Opening " + csv_path)

    # get the number of lines
    with open(csv_path, 'rb') as csvlines:
        line_count = sum(1 for line in csvlines) - 1

    number_of_batches = int(math.ceil(Decimal(line_count)/BATCH_SIZE))
    batches_complete = 0

    with open(csv_path, 'rb') as csvfile:

        reader = csv.DictReader(csvfile, skipinitialspace=True)

        query = SimpleStatement("""
            INSERT INTO %s (%s)
            VALUES (%s)
            """ % (BUSINESSINDEX_TABLE, FIELDNAMES, REPLACEMENTS))

        log.info("Query statement:\n%s" % query.query_string)

        batch = BatchStatement()
        for row in reader:
            try:
                row["TURNOVER"] = Decimal(row.get("TURNOVER"))
            except InvalidOperation:
                # TURNOVER is empty
                row["TURNOVER"] = None

            try:
                row["TOTAL_EMPLOYEES"] = int(row.get("TOTAL_EMPLOYEES"))
            except ValueError:
                # TOTAL_EMPLOYEES is empty
                row["TOTAL_EMPLOYEES"] = None

            batch.add(query, row)

            if (reader.line_num - 1) % BATCH_SIZE == 0:
                log.debug("Executing batch %s/%s insert with %s records" % (batches_complete+1, number_of_batches,
                                                                            BATCH_SIZE))
                session.execute(batch)
                batches_complete += 1
                batch = BatchStatement()

        if number_of_batches > batches_complete:
            log.debug("Executing batch %s/%s insert with %s records" % (batches_complete+1, number_of_batches,
                                                                        line_count-(batches_complete*BATCH_SIZE)))
            session.execute(batch)
            batches_complete += 1
Exemple #38
0
    def _update_index_async(self, old_index, task):
        """ Updates the index table after leasing a task.

    Args:
      old_index: The row to remove from the index table.
      task: A Task object to create a new index entry for.
    Returns:
      A cassandra-driver future.
    """
        session = self.db_access.session

        old_eta = old_index.eta
        update_index = BatchStatement(retry_policy=BASIC_RETRIES)

        statement = """
      DELETE FROM pull_queue_tasks_index
      WHERE app=?
      AND queue=?
      AND eta=?
      AND id=?
    """
        if statement not in self.prepared_statements:
            self.prepared_statements[statement] = session.prepare(statement)
        delete_old_index = self.prepared_statements[statement]

        parameters = [self.app, self.name, old_eta, task.id]
        update_index.add(delete_old_index, parameters)

        statement = """
      INSERT INTO pull_queue_tasks_index (app, queue, eta, id, tag, tag_exists)
      VALUES (?, ?, ?, ?, ?, ?)
    """
        if statement not in self.prepared_statements:
            self.prepared_statements[statement] = session.prepare(statement)
        create_new_index = self.prepared_statements[statement]

        try:
            tag = task.tag
        except AttributeError:
            tag = ''
        tag_exists = tag != ''

        parameters = [
            self.app, self.name, task.leaseTimestamp, task.id, tag, tag_exists
        ]
        update_index.add(create_new_index, parameters)

        return self.db_access.session.execute_async(update_index)
def insert_newlog(log_file, table_name):
    count = 1
    insert_query = session.prepare(
        "INSERT INTO " + table_name +
        " (host, id, datetime, path, bytes) VALUES (?, uuid(), ?, ?, ?)")
    batch = BatchStatement()
    for line in log_file:
        values = log_dissemble.split(line)
        if len(
                values
        ) >= 4:  # Only consider lines which can be split as host, dtime, path, num_bytes
            host = values[1]
            dtime = datetime.datetime.strptime(values[2], '%d/%b/%Y:%H:%M:%S')
            path = values[3]
            num_bytes = int(values[4])
            count += 1
            batch.add(insert_query, (host, dtime, path, num_bytes))
            if count == 300:
                session.execute(batch)
                batch.clear()
                count = 1
    session.execute(batch)
    batch.clear()
    def import_from_activity(self, filename):
        """ import from csv file to db """
        f = open(filename)

        # read the csv fle and skip th next header
        csv_f = csv.reader(f)
        next(csv_f, None)

        # insert ActivityByUser
        insert_count_activity_by_user = self._db_cur.prepare("UPDATE COUNT_ACTIVITY_BY_USER SET counter = counter + 1 where date = ? and company = ? ")
        insert_activity_by_user = self._db_cur.prepare("INSERT INTO ACTIVITY_BY_USER (date, event, domain, id, url, user_id, datetime) VALUES(?, ?, ?, ?, ?, ?, ? )")

        batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM)
        for row in csv_f:
            if len(row) != 4:
                continue
            (user_id, event, url, time_stamp) = row
            domain = re.findall('https?://[^/]*',url, re.IGNORECASE)

            if len(domain) != 1:
                domain = ""
            else:
                domain = domain[0][7:]
                if domain[0] == '/':
                    domain = domain[1:]

            # if not correct time format. continue
            if( not self.check_time(time_stamp)):
                continue

            try:
                dt = datetime.datetime.strptime(time_stamp, "%Y/%m/%d %H:%M:%S.%f")    
            except ValueError:
                dt = datetime.datetime.strptime(time_stamp, "%Y/%m/%d %H:%M:%S")

            while len(batch) > self._batch:
                self._db_cur.execute(batch)
                batch.clear()
            
            bucket = dt.strftime("%Y/%m/%d")
            company = self.get_company(user_id)
            batch.add(insert_activity_by_user,(bucket, event, domain, uuid.uuid1(), url, int(user_id), dt))
            self._db_cur.execute(insert_count_activity_by_user, (bucket, company))
            self.update_event_by_company(event, company, bucket)
        
        if len(batch) != 0:
            self._db_cur.execute(batch)
Exemple #41
0
    def delivery(self, w, carrier):
        ti = time.time()
        self.dc += 1

        s = self.s
        orderdc = {}
        for i in range(1, 40):
            #query = 'select o_id from o_carrier where w_id='+str(w)+' and d_id='+str(i)+' and o_carrier_id = 1  limit 1'
            rows = s.execute(self.d_s_carr, (w, i))
            for row in rows:
                orderdc[i] = row.o_id
        #print "first query:",time.time()-ti
        ubatch = BatchStatement()
        d_date = datetime.datetime.now()
        obatch = BatchStatement()
        ltime = time.time()
        for d_id, o_id in orderdc.iteritems():
            ubatch.add(self.d_u_carr, (carrier, w, d_id, o_id))
            future = s.execute_async(self.d_s_order, (w, d_id, o_id))
            try:
                tq = time.time()
                #print "Second query:",tq-ltime
                rows = future.result()
                c = 0
                ol_amount = 0
                ol_count = 0
                for row in rows:
                    ol_count = row.o_ol_cnt + 1
                    c = row.o_c_id
                    ol_amount = ol_amount + int(row.ol_amount)
                for i in range(1, ol_count):
                    ubatch.add(self.d_u_order, (d_date, w, d_id, o_id, i))
                tq = time.time()
                s.execute_async(self.d_i_cbal, (ol_amount, w, d_id, c))
                #print "third query ",time.time()-tq
            except Exception:
                out = "Time-out in delivery"
                print out
        tq = time.time()
        s.execute_async(ubatch)
        #print "Loop Time:",time.time()-ti
        self.dtime += time.time() - ti
    def config_check(self):
        cluster = None
        configuration_df = pd.DataFrame()
        try:

            select_query = "SELECT algorithm_name,file_param_name,flag,blobAsText(file_content) as file_content,param_value,type  from {}.{} WHERE " \
                           "algorithm_name = '{}' and flag=1 ALLOW FILTERING;".format(KEYSPACE, CONFIG_TABLE_NAME,
                                                                                      ALGORITHM_NAME)
            session, cluster = self.connect_cassandra()
            latest_df = session.execute(select_query)
            configuration_df = pd.DataFrame(latest_df)
            # dff=pd.DataFrame(latest_df)
            if not configuration_df.empty:
                param_df = configuration_df[configuration_df['type'] == 1]
                config_files_df = configuration_df[configuration_df['type'] ==
                                                   2]
                batch = BatchStatement()
                insert_user = session.prepare(
                    "INSERT INTO {}.{} (algorithm_name, file_param_name,flag) VALUES (?,?,?)"
                    .format(KEYSPACE, CONFIG_TABLE_NAME))
                if not param_df.empty:
                    param_list = list(param_df['file_param_name'])
                    param_df = param_df.set_index('file_param_name')
                    config_param_df = pd.read_csv(
                        CONFIG_PARAM_FILE_PATH).set_index('Parameter')

                    for param in param_list:
                        config_param_df.loc[param, 'Value'] = param_df.loc[
                            param, 'param_value']
                        batch.add(insert_user, (ALGORITHM_NAME, param, 0))
                    config_param_df.to_csv(CONFIG_PARAM_FILE_PATH)
                if not config_files_df.empty:
                    for row in config_files_df.itertuples():
                        df_json = json.loads(row[4])
                        df = pd.DataFrame.from_dict(df_json)
                        filename = CONFIG_FOLDER_NAME + os.sep + row[2]
                        df.to_csv(filename, index=False)
                        batch.add(insert_user, (ALGORITHM_NAME, row[2], 0))
                session.execute(batch)
        except Exception as ex:
            logger.error("Error while updating the config files %s", ex)

        finally:
            if cluster:
                cluster.shutdown()
Exemple #43
0
def sendCassandra1(iter):
    print("send to cassandra")
    cluster = Cluster(cassandra_hosts)
    session = cluster.connect(CASSANDRA_KEYSPACE)
    session.set_keyspace("ks")

    insert_statement1 = session.prepare("INSERT INTO totalInputCountSecond (global_id,edit_time,count) VALUES (?,?,?)")

    batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM)
    count=0
    for record in iter.collect():
        batch.add(insert_statement1, ('a',record[0], record[1]))
        count += 1
        if count % 500 == 0:
            session.execute(batch)
            batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM)
    session.execute(batch)
    session.shutdown()
Exemple #44
0
def sendCassandra4(iter):
    print("send to cassandra")
    cluster = Cluster(cassandra_hosts)
    session = cluster.connect(CASSANDRA_KEYSPACE)
    session.set_keyspace("ks")

    insert_statement4 = session.prepare("INSERT INTO useravgactivity (username,count) VALUES (?,?)")

    batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM)
    count=0
    for record in iter.collect():
        batch.add(insert_statement4, (record[0], record[1]))
        count += 1
        if count % 500 == 0:
            session.execute(batch)
            batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM)
    session.execute(batch)
    session.shutdown()
Exemple #45
0
def handler_new(iters):
    session = CassandraConnector().cassandraConnection()
    batch = BatchStatement()
    val = 0
    for record in iters:
        print("REC", record)
        insertQuery = "insert into adtracker ({0}) values('{1}','{2}','{3}','{4}','{5}','{6}'){}".format(
            optional_params, str(record['date']), str(record['phone_number']),
            str(record['imein']), str(record['credit_card']),
            str(record['ip']), record['ssn_number'])
        batch.add(SimpleStatement(insertQuery))
        val = val + 1
        if val > 20:
            session.execute(batch)
            batch = BatchStatement()
            val = 0
    if val != 0:
        session.execute(batch)
Exemple #46
0
def insert(cluster, keyspace, cql_stmt, generator, batch_size):
    session = cluster.connect(keyspace)
    session.default_timeout = 60
    session.default_consistency_level = ConsistencyLevel.LOCAL_ONE
    prepared_stmt = session.prepare(cql_stmt)
    batch_stmt = BatchStatement()

    values = take(batch_size, generator)
    count = 0
    while values:
        batch_stmt.add_all([prepared_stmt] * batch_size, values)
        session.execute(batch_stmt)

        values = take(batch_size, generator)
        batch_stmt.clear()
        if (count % 1e3) == 0:
            print('#blocks {:,.0f}'.format(count), end='\r')
        count += batch_size
Exemple #47
0
 def write_next_blocks(self, start_block):
     next_block = blockutil.hash_str(start_block)
     while next_block:
         block_json = blockutil.fetch_block_json(next_block)
         if "nextblockhash" in block_json.keys():
             next_block, block, txs = blockutil.transform_json(block_json)
             batchStmt = BatchStatement()
             batchStmt.add(self.__insert_block_stmt, block)
             for transaction in txs:
                 batchStmt.add(self.__insert_transaction_stmt, transaction)
             while True:
                 try:
                     self.__session.execute(batchStmt)
                 except Exception as err:
                     print("Exception ", err, " retrying...", end="\r")
                     continue
                 break
             print("Wrote block %d" % (block[0]), end="\r")
Exemple #48
0
def read_files(inputs, table):
    record_counter = 0
    batch_counter = 0

    batch_insert = BatchStatement()
    insert_statement = session.prepare(
        "INSERT INTO " + table +
        " (host, id, datetime, path, bytes) VALUES (?, ?, ?, ?, ?)")

    # get all files in input folder
    for file in os.listdir(inputs):

        # unzip files
        with gzip.open(os.path.join(inputs, file), 'rt',
                       encoding='utf-8') as logfile:

            # read file line by line
            for line in logfile:
                # create a tuple of requried fields
                log_object = separate_columns(line)

                # if log object is valid
                if (log_object is not None):
                    record_counter += 1
                    batch_insert.add(
                        insert_statement,
                        (log_object[0], log_object[1], log_object[2],
                         log_object[3], log_object[4]))

                # insert records when reached to declared batch size
                if (record_counter >= BATCH_SIZE):
                    print("writing batch " + str(batch_counter))

                    session.execute(batch_insert)
                    batch_insert.clear()

                    record_counter = 0
                    batch_counter += 1

    # to insert the final part with number of rows less than batch size
    if (record_counter > 0):

        print("writing final batch " + str((batch_counter + 1)))
        session.execute(batch_insert)
Exemple #49
0
def batchDataInsert(session, data, table):

    #tableDict is a dictionary mapping between the table name that the user provides and the columns in that table
    #the columns will now be stored as text files with columns seperated by newlines in docs directory
    #any new tables add the mapping here
    #probably a better way to do this, feel free to change it
    #tableDict = {"pitches": settings.docDir + "all_pitch_test_columns.txt",
    #             "pitches_all": settings.docDir + "all_db_columns.txt",
    #             }

    # opens the column file from docs corresponding to the table you chose
    #with open(tableDict[table]) as f:
    #     returns a list of columns from your table
    #    read_data = f.read().splitlines()

    # generates a string like "col1,col2,col3...."
    #columns = ','.join(read_data)
    cols=data[0].keys()
    columns=','.join(cols)
    # generates the appropriate number of ? marks for the number of columns in your table
    quests = ('?,' * (len(cols) - 1)) + '?'
    prepareString = "INSERT INTO " + table + "(" + columns + ")" + "VALUES" + "(" + quests + ")"

    insert_data = session.prepare(prepareString)
    batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM)

    counter = 0
    batchList = []

    while counter < len(data):
        batch.add(insert_data.bind(data[counter]))

        #Keep batches short otherwise an error is thrown
        if counter % 50 == 0 and counter > 0:
            batchList.append(batch)
            batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM)

        if counter == len(data)-1:
            batchList.append(batch)

        counter += 1

    for batches in batchList:
        session.execute(batches)
Exemple #50
0
    def _update_index(self, old_index, task):
        """ Updates the index table after leasing a task.

    Args:
      old_index: The row to remove from the index table.
      task: A Task object to create a new index entry for.
    """
        old_eta = old_index.eta
        update_index = BatchStatement(retry_policy=self.db_access.retry_policy)
        delete_old_index = SimpleStatement("""
      DELETE FROM pull_queue_tasks_index
      WHERE app = %(app)s
      AND queue = %(queue)s
      AND eta = %(eta)s
      AND id = %(id)s
    """)
        parameters = {
            'app': self.app,
            'queue': self.name,
            'eta': old_eta,
            'id': task.id
        }
        update_index.add(delete_old_index, parameters)

        create_new_index = SimpleStatement("""
      INSERT INTO pull_queue_tasks_index (app, queue, eta, id, tag, tag_exists)
      VALUES (%(app)s, %(queue)s, %(eta)s, %(id)s, %(tag)s, %(tag_exists)s)
    """)
        parameters = {
            'app': self.app,
            'queue': self.name,
            'eta': task.leaseTimestamp,
            'id': task.id
        }
        try:
            parameters['tag'] = task.tag
        except AttributeError:
            parameters['tag'] = ''
        parameters['tag_exists'] = parameters['tag'] != ''
        update_index.add(create_new_index, parameters)

        self.db_access.session.execute(update_index)
def sendPartition(iter):
    cassandra_cluster = Cluster(['54.71.115.97', '35.166.209.97', '35.166.89.248', '34.218.167.77'])
    cassandra_session = cassandra_cluster.connect('oilwell')
    insert_statement = cassandra_session.prepare(
        "INSERT INTO well_pressure (id, dt, well_name, pressure_1, pressure_2, pressure_3, pressure_4) VALUES (?, ?, ?, ?, ?, ?, ?)")
    count = 0
    # batch insert into cassandra database
    batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM)

    for record in iter.collect():
        batch.add(insert_statement,
                  (int(record[0]), str(record[1]), str(record[2]), int(record[3]), int(record[4]), int(record[5]), int(record[6])))
        # split the batch, so that the batch will not exceed the size limit
        count += 1
        if count % 300 == 0:
            cassandra_session.execute(batch)
            batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM)
        # send the batch that is less than 300
    cassandra_session.execute(batch)
    cassandra_session.shutdown()
Exemple #52
0
    def test_batch_statement(self):
        session, tracer = self._traced_session()

        batch = BatchStatement()
        batch.add(
            SimpleStatement("INSERT INTO test.person_write (name, age, description) VALUES (%s, %s, %s)"),
            ("Joe", 1, "a"),
        )
        batch.add(
            SimpleStatement("INSERT INTO test.person_write (name, age, description) VALUES (%s, %s, %s)"),
            ("Jane", 2, "b"),
        )
        session.execute(batch)

        spans = tracer.pop()
        assert len(spans) == 1
        s = spans[0]
        assert s.resource == "BatchStatement"
        assert s.get_metric("cassandra.batch_size") == 2
        assert "test.person" in s.get_tag("cassandra.query")
Exemple #53
0
    def test_batch_statement(self):
        session, tracer = self._traced_session()

        batch = BatchStatement()
        batch.add(
            SimpleStatement('INSERT INTO test.person_write (name, age, description) VALUES (%s, %s, %s)'),
            ('Joe', 1, 'a'),
        )
        batch.add(
            SimpleStatement('INSERT INTO test.person_write (name, age, description) VALUES (%s, %s, %s)'),
            ('Jane', 2, 'b'),
        )
        session.execute(batch)

        spans = tracer.pop()
        assert len(spans) == 1
        s = spans[0]
        assert s.resource == 'BatchStatement'
        assert s.get_metric('cassandra.batch_size') == 2
        assert 'test.person' in s.get_tag('cassandra.query')
 def write_next_blocks(self, start_block):
     next_block = blockutil.hash_str(start_block)
     while next_block:
         block_json = blockutil.fetch_block_json(next_block)
         next_block, block, txs = blockutil.transform_json(block_json)
         batchStmt = BatchStatement()
         batchStmt.add(self.__insert_block_stmt, block)
         block_group = block[0] // 10000
         tx_number = 0
         for transaction in txs:
             batchStmt.add(self.__insert_transaction_stmt,
                           [block_group, tx_number] + transaction)
             tx_number += 1
         while True:
             try:
                 self.__session.execute(batchStmt)
             except Exception as err:
                 print("Exception ", err, " retrying...", end="\r")
                 continue
             break
         print("Wrote block %d" % (block[0]), end="\r")
Exemple #55
0
 def batch_execute_prepared_sqls(self, sqls, parameters, consistency_level=None, keyspace=None):
     """
     不同同操作(语义)的SQL,批量执行,比如批量插入和更新等
     :sqls
         执行Prepared Statements 的Sql,是一个List,每个元素是一条SQL操作
     :parameters
         是一个List,元素是也是一个list或者tuple
     """
     if len(sqls) != len(parameters):
         return False
     
     session = self.get_session(keyspace)
     if consistency_level:
         batch = BatchStatement(consistency_level=consistency_level)
     else:
         batch = BatchStatement(consistency_level=ConsistencyLevel.LOCAL_QUORUM)
     
     for sql, paras in zip(sqls, parameters):
         batch.add(SimpleStatement(sql), paras)
     
     return session.execute(batch)
Exemple #56
0
    def save_async(self, trades):
        """
        :type entry_type: str
        :type quote: RTQuote
        """

        query = \
        """
        INSERT INTO trades
        ({})
        VALUES ({})
        """.format(','.join(FIELDS_Trades),
                   ','.join("%s" for _ in FIELDS_Trades))
        batch_statement = BatchStatement()
        for i, trade in trades.iterrows():
            data = tuple(trade[field] for field in FIELDS_Trades)
            batch_statement.add(query,data)
            if len(batch_statement)>= MAX_BATCH_SIZE:
                get_async_manager().execute_async(self._session,batch_statement)
                batch_statement = BatchStatement()
        if len(batch_statement) > 0:
            get_async_manager().execute_async(self._session,batch_statement)
Exemple #57
0
def main():
    row_count = 100000
    max_insert = 10

    log.info('truncate table')
    session.execute('truncate ooi.vel3d_k_wfp_instrument')
    log.info('done truncating')

    log.info('generating row data')
    rows = create_rows(row_count)
    now = time.time()
    batches = []
    batch = BatchStatement()
    for i, row in enumerate(rows):
        if (i + 1) % max_insert == 0:
            batches.append((batch, []))
            batch = BatchStatement()
        batch.add(insert, row)
    batches.append((batch, []))
    log.info('inserting')
    execute_concurrent(session, batches, concurrency=50)
    log.info('%d rows: %7.2f sec elapsed', row_count, time.time() - now)
Exemple #58
0
    def insert(cls, params):

        idx_start, idx_end = params

        batch_size = 25
        batch_stmt = BatchStatement()

        for index in range(idx_start, idx_end, batch_size):

            curr_batch_size = min(batch_size, idx_end - index)
            for i in range(0, curr_batch_size):
                block = cls.chain[index + i]
                block_tx = [block.height, [tx_stats(x) for x in block.txes]]
                batch_stmt.add(cls.prepared_stmt, block_tx)

            try:
                cls.session.execute(batch_stmt)
            except Exception as e:
                # ingest single blocks batch ingest fails
                # (batch too large error)
                print(e)
                for i in range(0, curr_batch_size):
                    while True:
                        try:
                            block = cls.chain[index + i]
                            block_tx = [
                                block.height,
                                [tx_stats(x) for x in block.txes]
                            ]
                            cls.session.execute(cls.prepared_stmt, block_tx)
                        except Exception as e:
                            print(e)
                            continue
                        break
            batch_stmt.clear()

            with cls.counter.get_lock():
                cls.counter.value += curr_batch_size
            print('#blocks {:,.0f}'.format(cls.counter.value), end='\r')
Exemple #59
0
def main(inputs, key_space, table):
    cluster = Cluster(['199.60.17.188', '199.60.17.216'])
    session = cluster.connect(key_space)
    session.execute("""
            CREATE TABLE IF NOT EXISTS nasalogs (
                host TEXT,                
                datetime TIMESTAMP,
                path TEXT,
                bytes INT,
                recId UUID,
                PRIMARY KEY (host,recId)
            )
            """)
    session.execute("""TRUNCATE nasalogs;""")
    insert_log = session.prepare(
        "INSERT INTO " + table +
        " (host,datetime,path,bytes,recId) VALUES (?,?,?,?,?)")
    batch = BatchStatement(consistency_level=ConsistencyLevel.ONE)
    c = 0
    for g_file in os.listdir(inputs):
        with gzip.open(os.path.join(inputs, g_file), 'rt',
                       encoding='utf-8') as logfile:
            for line in logfile:
                w = get_words(line)
                if len(w) > 4:
                    c += 1
                    batch.add(
                        insert_log,
                        (w[1],
                         datetime.datetime.strptime(w[2], '%d/%b/%Y:%H:%M:%S'),
                         w[3], int(w[4]), uid()))
                if (c == 400):
                    session.execute(batch)
                    batch.clear()
                    c = 0

    session.execute(batch)
    cluster.shutdown()
Exemple #60
0
    def save_async(self, quotes):
        """
        :type entry_type: str
        :type quote: RTQuote
        """


        batch_statement = BatchStatement()
        for quote in quotes:
            query = \
                """
                INSERT INTO quotes
                ({})
                VALUES ({})
                """.format(','.join(quote.keys()),
                           ','.join("%s" for _ in quote.keys()))
            data = tuple(quote[field] for field in quote.keys())
            batch_statement.add(query,data)
            if len(batch_statement)>= MAX_BATCH_SIZE:
                get_async_manager().execute_async(self._session,batch_statement)
                batch_statement = BatchStatement()
        if len(batch_statement) > 0:
            get_async_manager().execute_async(self._session,batch_statement)