コード例 #1
0
ファイル: rdd.py プロジェクト: OlegGorj/pyspark-cassandra-1
def saveToCassandra(rdd, keyspace=None, table=None, columns=None, row_format=None, keyed=None,
                    write_conf=None, **write_conf_kwargs):
    '''
        Saves an RDD to Cassandra. The RDD is expected to contain dicts with keys mapping to CQL
        columns.

        Arguments:
        @param rdd(RDD):
            The RDD to save. Equals to self when invoking saveToCassandra on a monkey patched RDD.
        @param keyspace(string):in
            The keyspace to save the RDD in. If not given and the rdd is a CassandraRDD the same
            keyspace is used.
        @param table(string):
            The CQL table to save the RDD in. If not given and the rdd is a CassandraRDD the same
            table is used.

        Keyword arguments:
        @param columns(iterable):
            The columns to save, i.e. which keys to take from the dicts in the RDD.
            If None given all columns are be stored.

        @param row_format(RowFormat):
            Make explicit how to map the RDD elements into Cassandra rows.
            If None given the mapping is auto-detected as far as possible.
        @param keyed(bool):
            Make explicit that the RDD consists of key, value tuples (and not arrays of length
            two).

        @param write_conf(WriteConf):
            A WriteConf object to use when saving to Cassandra
        @param **write_conf_kwargs:
            WriteConf parameters to use when saving to Cassandra
    '''

    keyspace = keyspace or getattr(rdd, 'keyspace', None)
    if not keyspace:
        raise ValueError("keyspace not set")

    table = table or getattr(rdd, 'table', None)
    if not table:
        raise ValueError("table not set")

    # create write config as map
    write_conf = WriteConf.build(write_conf, **write_conf_kwargs)
    write_conf = as_java_object(rdd.ctx._gateway, write_conf.settings())
    # convert the columns to a string array
    columns = as_java_array(rdd.ctx._gateway, "String", columns) if columns else None

    helper(rdd.ctx) \
        .saveToCassandra(
            rdd._jrdd,
            keyspace,
            table,
            columns,
            row_format,
            keyed,
            write_conf,
        )
コード例 #2
0
def deleteFromCassandra(dstream, keyspace=None, table=None, deleteColumns=None,
                        keyColumns=None,
                        row_format=None, keyed=None, write_conf=None,
                        **write_conf_kwargs):
    """Delete data from Cassandra table, using data from the RDD as primary
    keys. Uses the specified column names.

    Arguments:
       @param dstream(DStream)
        The DStream to join. Equals to self when invoking
        joinWithCassandraTable on a monkey patched RDD.
        @param keyspace(string):in
            The keyspace to save the RDD in. If not given and the rdd is a
            CassandraRDD the same keyspace is used.
        @param table(string):
            The CQL table to save the RDD in. If not given and the rdd is a
            CassandraRDD the same table is used.

        Keyword arguments:
        @param deleteColumns(iterable):
            The list of column names to delete, empty ColumnSelector means full
            row.

        @param keyColumns(iterable):
            The list of column names to delete, empty ColumnSelector means full
            row.

        @param row_format(RowFormat):
            Primary key columns selector, Optional. All RDD primary columns
            columns will be checked by default
        @param keyed(bool):
            Make explicit that the RDD consists of key, value tuples (and not
            arrays of length two).

        @param write_conf(WriteConf):
            A WriteConf object to use when saving to Cassandra
        @param **write_conf_kwargs:
            WriteConf parameters to use when saving to Cassandra
    """

    ctx = dstream._ssc._sc
    gw = ctx._gateway

    # create write config as map
    write_conf = WriteConf.build(write_conf, **write_conf_kwargs)
    write_conf = as_java_object(gw, write_conf.settings())
    # convert the columns to a string array
    deleteColumns = as_java_array(gw, "String",
                                  deleteColumns) if deleteColumns else None
    keyColumns = as_java_array(gw, "String", keyColumns) \
        if keyColumns else None

    return helper(ctx).deleteFromCassandra(dstream._jdstream, keyspace, table,
                                           deleteColumns, keyColumns,
                                           row_format,
                                           keyed, write_conf)
コード例 #3
0
ファイル: streaming.py プロジェクト: clatko/pyspark-cassandra
def saveToCassandra(dstream, keyspace, table, columns=None, row_format=None, keyed=None, write_conf=None,
                    **write_conf_kwargs):
    ctx = dstream._ssc._sc
    gw = ctx._gateway

    # create write config as map
    write_conf = WriteConf.build(write_conf, **write_conf_kwargs)
    write_conf = as_java_object(gw, write_conf.settings())
    # convert the columns to a string array
    columns = as_java_array(gw, "String", columns) if columns else None

    return helper(ctx).saveToCassandra(dstream._jdstream, keyspace, table, columns, row_format, keyed, write_conf)
コード例 #4
0
def saveToCassandra(dstream, keyspace, table, columns=None, row_format=None,
                    keyed=None,
                    write_conf=None, **write_conf_kwargs):
    ctx = dstream._ssc._sc
    gw = ctx._gateway

    # create write config as map
    write_conf = WriteConf.build(write_conf, **write_conf_kwargs)
    write_conf = as_java_object(gw, write_conf.settings())
    # convert the columns to a string array
    columns = as_java_array(gw, "String", columns) if columns else None

    return helper(ctx).saveToCassandra(dstream._jdstream, keyspace, table,
                                       columns, row_format,
                                       keyed, write_conf)
コード例 #5
0
    def test_write_conf(self):
        rdd = self.sc.parallelize(
            [{'key': i, 'text': i, 'int': i} for i in range(10)])
        save = partial(rdd.saveToCassandra, self.keyspace, self.table)

        save(batch_size=100)
        save(batch_buffer_size=100)
        save(batch_grouping_key='replica_set')
        save(batch_grouping_key='partition')
        save(consistency_level='ALL')
        save(consistency_level=ConsistencyLevel.LOCAL_QUORUM)
        save(parallelism_level=10)
        save(throughput_mibps=10)
        save(ttl=5)
        save(ttl=timedelta(minutes=30))
        save(timestamp=time.clock() * 1000 * 1000)
        save(timestamp=datetime.now())
        save(metrics_enabled=True)
        save(write_conf=WriteConf(ttl=3, metrics_enabled=True))
コード例 #6
0
def deleteFromCassandra(rdd,
                        keyspace=None,
                        table=None,
                        deleteColumns=None,
                        keyColumns=None,
                        row_format=None,
                        keyed=None,
                        write_conf=None,
                        **write_conf_kwargs):
    """
        Delete data from Cassandra table, using data from the RDD as primary
        keys. Uses the specified column names.

        Arguments:
        @param rdd(RDD):
            The RDD to save. Equals to self when invoking saveToCassandra on a
            monkey patched RDD.
        @param keyspace(string):in
            The keyspace to save the RDD in. If not given and the rdd is a
            CassandraRDD the same keyspace is used.
        @param table(string):
            The CQL table to save the RDD in. If not given and the rdd is a
            CassandraRDD the same table is used.

        Keyword arguments:
        @param deleteColumns(iterable):
            The list of column names to delete, empty ColumnSelector means full
            row.

        @param keyColumns(iterable):
            The list of column names to delete, empty ColumnSelector means full
            row.

        @param row_format(RowFormat):
            Primary key columns selector, Optional. All RDD primary columns
            columns will be checked by default
        @param keyed(bool):
            Make explicit that the RDD consists of key, value tuples (and not
            arrays of length two).

        @param write_conf(WriteConf):
            A WriteConf object to use when saving to Cassandra
        @param **write_conf_kwargs:
            WriteConf parameters to use when saving to Cassandra
    """

    keyspace = keyspace or getattr(rdd, 'keyspace', None)
    if not keyspace:
        raise ValueError("keyspace not set")

    table = table or getattr(rdd, 'table', None)
    if not table:
        raise ValueError("table not set")

    # create write config as map
    write_conf = WriteConf.build(write_conf, **write_conf_kwargs)
    write_conf = as_java_object(rdd.ctx._gateway, write_conf.settings())

    # convert the columns to a string array
    deleteColumns = as_java_array(rdd.ctx._gateway, "String", deleteColumns) \
        if deleteColumns else None
    keyColumns = as_java_array(rdd.ctx._gateway, "String", keyColumns) \
        if keyColumns else None

    helper(rdd.ctx) \
        .deleteFromCassandra(
        rdd._jrdd,
        keyspace,
        table,
        deleteColumns,
        keyColumns,
        row_format,
        keyed,
        write_conf,
    )