def deleteFromNeo4j(dstream,
                    keyspace=None,
                    table=None,
                    deleteColumns=None,
                    keyColumns=None,
                    row_format=None,
                    keyed=None,
                    write_conf=None,
                    **write_conf_kwargs):
    """Delete data from Neo4j table, using data from the RDD as primary
    keys. Uses the specified column names.

    Arguments:
       @param dstream(DStream)
        The DStream to join. Equals to self when invoking
        joinWithNeo4jTable on a monkey patched RDD.
        @param keyspace(string):in
            The keyspace to save the RDD in. If not given and the rdd is a
            Neo4jRDD the same keyspace is used.
        @param table(string):
            The CQL table to save the RDD in. If not given and the rdd is a
            Neo4jRDD the same table is used.

        Keyword arguments:
        @param deleteColumns(iterable):
            The list of column names to delete, empty ColumnSelector means full
            row.

        @param keyColumns(iterable):
            The list of column names to delete, empty ColumnSelector means full
            row.

        @param row_format(RowFormat):
            Primary key columns selector, Optional. All RDD primary columns
            columns will be checked by default
        @param keyed(bool):
            Make explicit that the RDD consists of key, value tuples (and not
            arrays of length two).

        @param write_conf(WriteConf):
            A WriteConf object to use when saving to Neo4j
        @param **write_conf_kwargs:
            WriteConf parameters to use when saving to Neo4j
    """

    ctx = dstream._ssc._sc
    gw = ctx._gateway

    # create write config as map
    write_conf = WriteConf.build(write_conf, **write_conf_kwargs)
    write_conf = as_java_object(gw, write_conf.settings())
    # convert the columns to a string array
    deleteColumns = as_java_array(gw, "String",
                                  deleteColumns) if deleteColumns else None
    keyColumns = as_java_array(gw, "String", keyColumns) \
        if keyColumns else None

    return helper(ctx).deleteFromNeo4j(dstream._jdstream, keyspace, table,
                                       deleteColumns, keyColumns, row_format,
                                       keyed, write_conf)
Exemple #2
0
 def where(self, clause, *args):
     """Creates a Neo4jRDD with a CQL where clause applied.
     @param clause: The where clause, either complete or with ? markers
     @param *args: The parameters for the ? markers in the where clause.
     """
     args = as_java_array(self.ctx._gateway, "Object", args)
     return self._specialize('where', *[clause, args])
def joinWithNeo4jTable(dstream,
                       keyspace,
                       table,
                       selected_columns=None,
                       join_columns=None):
    """Joins a DStream (a stream of RDDs) with a Neo4j table

    Arguments:
        @param dstream(DStream)
        The DStream to join. Equals to self when invoking
        joinWithNeo4jTable on a monkey patched RDD.
        @param keyspace(string):
            The keyspace to join on.
        @param table(string):
            The CQL table to join on.
        @param selected_columns(string):
            The columns to select from the Neo4j table.
        @param join_columns(string):
            The columns used to join on from the Neo4j table.
    """

    ssc = dstream._ssc
    ctx = ssc._sc
    gw = ctx._gateway

    selected_columns = as_java_array(
        gw, "String", selected_columns) if selected_columns else None
    join_columns = as_java_array(gw, "String",
                                 join_columns) if join_columns else None

    h = helper(ctx)
    dstream = h.joinWithNeo4jTable(dstream._jdstream, keyspace, table,
                                   selected_columns, join_columns)
    dstream = h.pickleRows(dstream)
    dstream = h.javaDStream(dstream)

    return DStream(dstream, ssc, AutoBatchedSerializer(PickleSerializer()))
def saveToNeo4j(dstream,
                keyspace,
                table,
                columns=None,
                row_format=None,
                keyed=None,
                write_conf=None,
                **write_conf_kwargs):
    ctx = dstream._ssc._sc
    gw = ctx._gateway

    # create write config as map
    write_conf = WriteConf.build(write_conf, **write_conf_kwargs)
    write_conf = as_java_object(gw, write_conf.settings())
    # convert the columns to a string array
    columns = as_java_array(gw, "String", columns) if columns else None

    return helper(ctx).saveToNeo4j(dstream._jdstream, keyspace, table, columns,
                                   row_format, keyed, write_conf)
Exemple #5
0
def deleteFromNeo4j(rdd, keyspace=None, table=None, deleteColumns=None,
                    keyColumns=None, row_format=None, keyed=None,
                    write_conf=None, **write_conf_kwargs):
    """
        Delete data from Neo4j table, using data from the RDD as primary
        keys. Uses the specified column names.

        Arguments:
        @param rdd(RDD):
            The RDD to save. Equals to self when invoking saveToNeo4j on a
            monkey patched RDD.
        @param keyspace(string):in
            The keyspace to save the RDD in. If not given and the rdd is a
            Neo4jRDD the same keyspace is used.
        @param table(string):
            The CQL table to save the RDD in. If not given and the rdd is a
            Neo4jRDD the same table is used.

        Keyword arguments:
        @param deleteColumns(iterable):
            The list of column names to delete, empty ColumnSelector means full
            row.

        @param keyColumns(iterable):
            The list of column names to delete, empty ColumnSelector means full
            row.

        @param row_format(RowFormat):
            Primary key columns selector, Optional. All RDD primary columns
            columns will be checked by default
        @param keyed(bool):
            Make explicit that the RDD consists of key, value tuples (and not
            arrays of length two).

        @param write_conf(WriteConf):
            A WriteConf object to use when saving to Neo4j
        @param **write_conf_kwargs:
            WriteConf parameters to use when saving to Neo4j
    """

    keyspace = keyspace or getattr(rdd, 'keyspace', None)
    if not keyspace:
        raise ValueError("keyspace not set")

    table = table or getattr(rdd, 'table', None)
    if not table:
        raise ValueError("table not set")

    # create write config as map
    write_conf = WriteConf.build(write_conf, **write_conf_kwargs)
    write_conf = as_java_object(rdd.ctx._gateway, write_conf.settings())

    # convert the columns to a string array
    deleteColumns = as_java_array(rdd.ctx._gateway, "String", deleteColumns) \
        if deleteColumns else None
    keyColumns = as_java_array(rdd.ctx._gateway, "String", keyColumns) \
        if keyColumns else None

    helper(rdd.ctx) \
        .deleteFromNeo4j(
        rdd._jrdd,
        keyspace,
        table,
        deleteColumns,
        keyColumns,
        row_format,
        keyed,
        write_conf,
    )
Exemple #6
0
 def on(self, *columns):
     columns = as_java_array(self.ctx._gateway, "String",
                             (str(c) for c in columns))
     return self._specialize('on', columns)
Exemple #7
0
def saveToNeo4j(rdd, keyspace=None, table=None, columns=None,
                row_format=None, keyed=None,
                write_conf=None, **write_conf_kwargs):
    """
        Saves an RDD to Neo4j. The RDD is expected to contain dicts with
        keys mapping to CQL columns.

        Arguments:
        @param rdd(RDD):
            The RDD to save. Equals to self when invoking saveToNeo4j on a
            monkey patched RDD.
        @param keyspace(string):in
            The keyspace to save the RDD in. If not given and the rdd is a
            Neo4jRDD the same keyspace is used.
        @param table(string):
            The CQL table to save the RDD in. If not given and the rdd is a
            Neo4jRDD the same table is used.

        Keyword arguments:
        @param columns(iterable):
            The columns to save, i.e. which keys to take from the dicts in the
            RDD. If None given all columns are be stored.

        @param row_format(RowFormat):
            Make explicit how to map the RDD elements into Neo4j rows.
            If None given the mapping is auto-detected as far as possible.
        @param keyed(bool):
            Make explicit that the RDD consists of key, value tuples (and not
            arrays of length two).

        @param write_conf(WriteConf):
            A WriteConf object to use when saving to Neo4j
        @param **write_conf_kwargs:
            WriteConf parameters to use when saving to Neo4j
    """

    keyspace = keyspace or getattr(rdd, 'keyspace', None)
    if not keyspace:
        raise ValueError("keyspace not set")

    table = table or getattr(rdd, 'table', None)
    if not table:
        raise ValueError("table not set")

    # create write config as map
    write_conf = WriteConf.build(write_conf, **write_conf_kwargs)
    write_conf = as_java_object(rdd.ctx._gateway, write_conf.settings())

    if isinstance(columns, dict):
        # convert the columns to a map where the value is the
        # action inside Neo4j
        columns = as_java_object(rdd.ctx._gateway, columns) if columns else None
    else:
        # convert the columns to a string array
        columns = as_java_array(rdd.ctx._gateway, "String",
                                columns) if columns else None

    helper(rdd.ctx) \
        .saveToNeo4j(
        rdd._jrdd,
        keyspace,
        table,
        columns,
        row_format,
        keyed,
        write_conf,
    )
Exemple #8
0
 def select(self, *columns):
     """Creates a Neo4jRDD with the select clause applied."""
     columns = as_java_array(self.ctx._gateway, "String",
                             (str(c) for c in columns))
     return self._specialize('select', columns)