def saveToCassandra(rdd, keyspace=None, table=None, columns=None, row_format=None, keyed=None, write_conf=None, **write_conf_kwargs): ''' Saves an RDD to Cassandra. The RDD is expected to contain dicts with keys mapping to CQL columns. Arguments: @param rdd(RDD): The RDD to save. Equals to self when invoking saveToCassandra on a monkey patched RDD. @param keyspace(string):in The keyspace to save the RDD in. If not given and the rdd is a CassandraRDD the same keyspace is used. @param table(string): The CQL table to save the RDD in. If not given and the rdd is a CassandraRDD the same table is used. Keyword arguments: @param columns(iterable): The columns to save, i.e. which keys to take from the dicts in the RDD. If None given all columns are be stored. @param row_format(RowFormat): Make explicit how to map the RDD elements into Cassandra rows. If None given the mapping is auto-detected as far as possible. @param keyed(bool): Make explicit that the RDD consists of key, value tuples (and not arrays of length two). @param write_conf(WriteConf): A WriteConf object to use when saving to Cassandra @param **write_conf_kwargs: WriteConf parameters to use when saving to Cassandra ''' keyspace = keyspace or getattr(rdd, 'keyspace', None) if not keyspace: raise ValueError("keyspace not set") table = table or getattr(rdd, 'table', None) if not table: raise ValueError("table not set") # create write config as map write_conf = WriteConf.build(write_conf, **write_conf_kwargs) write_conf = as_java_object(rdd.ctx._gateway, write_conf.settings()) # convert the columns to a string array columns = as_java_array(rdd.ctx._gateway, "String", columns) if columns else None helper(rdd.ctx) \ .saveToCassandra( rdd._jrdd, keyspace, table, columns, row_format, keyed, write_conf, )
def deleteFromCassandra(dstream, keyspace=None, table=None, deleteColumns=None, keyColumns=None, row_format=None, keyed=None, write_conf=None, **write_conf_kwargs): """Delete data from Cassandra table, using data from the RDD as primary keys. Uses the specified column names. Arguments: @param dstream(DStream) The DStream to join. Equals to self when invoking joinWithCassandraTable on a monkey patched RDD. @param keyspace(string):in The keyspace to save the RDD in. If not given and the rdd is a CassandraRDD the same keyspace is used. @param table(string): The CQL table to save the RDD in. If not given and the rdd is a CassandraRDD the same table is used. Keyword arguments: @param deleteColumns(iterable): The list of column names to delete, empty ColumnSelector means full row. @param keyColumns(iterable): The list of column names to delete, empty ColumnSelector means full row. @param row_format(RowFormat): Primary key columns selector, Optional. All RDD primary columns columns will be checked by default @param keyed(bool): Make explicit that the RDD consists of key, value tuples (and not arrays of length two). @param write_conf(WriteConf): A WriteConf object to use when saving to Cassandra @param **write_conf_kwargs: WriteConf parameters to use when saving to Cassandra """ ctx = dstream._ssc._sc gw = ctx._gateway # create write config as map write_conf = WriteConf.build(write_conf, **write_conf_kwargs) write_conf = as_java_object(gw, write_conf.settings()) # convert the columns to a string array deleteColumns = as_java_array(gw, "String", deleteColumns) if deleteColumns else None keyColumns = as_java_array(gw, "String", keyColumns) \ if keyColumns else None return helper(ctx).deleteFromCassandra(dstream._jdstream, keyspace, table, deleteColumns, keyColumns, row_format, keyed, write_conf)
def saveToCassandra(dstream, keyspace, table, columns=None, row_format=None, keyed=None, write_conf=None, **write_conf_kwargs): ctx = dstream._ssc._sc gw = ctx._gateway # create write config as map write_conf = WriteConf.build(write_conf, **write_conf_kwargs) write_conf = as_java_object(gw, write_conf.settings()) # convert the columns to a string array columns = as_java_array(gw, "String", columns) if columns else None return helper(ctx).saveToCassandra(dstream._jdstream, keyspace, table, columns, row_format, keyed, write_conf)
def test_write_conf(self): rdd = self.sc.parallelize( [{'key': i, 'text': i, 'int': i} for i in range(10)]) save = partial(rdd.saveToCassandra, self.keyspace, self.table) save(batch_size=100) save(batch_buffer_size=100) save(batch_grouping_key='replica_set') save(batch_grouping_key='partition') save(consistency_level='ALL') save(consistency_level=ConsistencyLevel.LOCAL_QUORUM) save(parallelism_level=10) save(throughput_mibps=10) save(ttl=5) save(ttl=timedelta(minutes=30)) save(timestamp=time.clock() * 1000 * 1000) save(timestamp=datetime.now()) save(metrics_enabled=True) save(write_conf=WriteConf(ttl=3, metrics_enabled=True))
def deleteFromCassandra(rdd, keyspace=None, table=None, deleteColumns=None, keyColumns=None, row_format=None, keyed=None, write_conf=None, **write_conf_kwargs): """ Delete data from Cassandra table, using data from the RDD as primary keys. Uses the specified column names. Arguments: @param rdd(RDD): The RDD to save. Equals to self when invoking saveToCassandra on a monkey patched RDD. @param keyspace(string):in The keyspace to save the RDD in. If not given and the rdd is a CassandraRDD the same keyspace is used. @param table(string): The CQL table to save the RDD in. If not given and the rdd is a CassandraRDD the same table is used. Keyword arguments: @param deleteColumns(iterable): The list of column names to delete, empty ColumnSelector means full row. @param keyColumns(iterable): The list of column names to delete, empty ColumnSelector means full row. @param row_format(RowFormat): Primary key columns selector, Optional. All RDD primary columns columns will be checked by default @param keyed(bool): Make explicit that the RDD consists of key, value tuples (and not arrays of length two). @param write_conf(WriteConf): A WriteConf object to use when saving to Cassandra @param **write_conf_kwargs: WriteConf parameters to use when saving to Cassandra """ keyspace = keyspace or getattr(rdd, 'keyspace', None) if not keyspace: raise ValueError("keyspace not set") table = table or getattr(rdd, 'table', None) if not table: raise ValueError("table not set") # create write config as map write_conf = WriteConf.build(write_conf, **write_conf_kwargs) write_conf = as_java_object(rdd.ctx._gateway, write_conf.settings()) # convert the columns to a string array deleteColumns = as_java_array(rdd.ctx._gateway, "String", deleteColumns) \ if deleteColumns else None keyColumns = as_java_array(rdd.ctx._gateway, "String", keyColumns) \ if keyColumns else None helper(rdd.ctx) \ .deleteFromCassandra( rdd._jrdd, keyspace, table, deleteColumns, keyColumns, row_format, keyed, write_conf, )