def deleteFromNeo4j(dstream, keyspace=None, table=None, deleteColumns=None, keyColumns=None, row_format=None, keyed=None, write_conf=None, **write_conf_kwargs): """Delete data from Neo4j table, using data from the RDD as primary keys. Uses the specified column names. Arguments: @param dstream(DStream) The DStream to join. Equals to self when invoking joinWithNeo4jTable on a monkey patched RDD. @param keyspace(string):in The keyspace to save the RDD in. If not given and the rdd is a Neo4jRDD the same keyspace is used. @param table(string): The CQL table to save the RDD in. If not given and the rdd is a Neo4jRDD the same table is used. Keyword arguments: @param deleteColumns(iterable): The list of column names to delete, empty ColumnSelector means full row. @param keyColumns(iterable): The list of column names to delete, empty ColumnSelector means full row. @param row_format(RowFormat): Primary key columns selector, Optional. All RDD primary columns columns will be checked by default @param keyed(bool): Make explicit that the RDD consists of key, value tuples (and not arrays of length two). @param write_conf(WriteConf): A WriteConf object to use when saving to Neo4j @param **write_conf_kwargs: WriteConf parameters to use when saving to Neo4j """ ctx = dstream._ssc._sc gw = ctx._gateway # create write config as map write_conf = WriteConf.build(write_conf, **write_conf_kwargs) write_conf = as_java_object(gw, write_conf.settings()) # convert the columns to a string array deleteColumns = as_java_array(gw, "String", deleteColumns) if deleteColumns else None keyColumns = as_java_array(gw, "String", keyColumns) \ if keyColumns else None return helper(ctx).deleteFromNeo4j(dstream._jdstream, keyspace, table, deleteColumns, keyColumns, row_format, keyed, write_conf)
def where(self, clause, *args): """Creates a Neo4jRDD with a CQL where clause applied. @param clause: The where clause, either complete or with ? markers @param *args: The parameters for the ? markers in the where clause. """ args = as_java_array(self.ctx._gateway, "Object", args) return self._specialize('where', *[clause, args])
def joinWithNeo4jTable(dstream, keyspace, table, selected_columns=None, join_columns=None): """Joins a DStream (a stream of RDDs) with a Neo4j table Arguments: @param dstream(DStream) The DStream to join. Equals to self when invoking joinWithNeo4jTable on a monkey patched RDD. @param keyspace(string): The keyspace to join on. @param table(string): The CQL table to join on. @param selected_columns(string): The columns to select from the Neo4j table. @param join_columns(string): The columns used to join on from the Neo4j table. """ ssc = dstream._ssc ctx = ssc._sc gw = ctx._gateway selected_columns = as_java_array( gw, "String", selected_columns) if selected_columns else None join_columns = as_java_array(gw, "String", join_columns) if join_columns else None h = helper(ctx) dstream = h.joinWithNeo4jTable(dstream._jdstream, keyspace, table, selected_columns, join_columns) dstream = h.pickleRows(dstream) dstream = h.javaDStream(dstream) return DStream(dstream, ssc, AutoBatchedSerializer(PickleSerializer()))
def saveToNeo4j(dstream, keyspace, table, columns=None, row_format=None, keyed=None, write_conf=None, **write_conf_kwargs): ctx = dstream._ssc._sc gw = ctx._gateway # create write config as map write_conf = WriteConf.build(write_conf, **write_conf_kwargs) write_conf = as_java_object(gw, write_conf.settings()) # convert the columns to a string array columns = as_java_array(gw, "String", columns) if columns else None return helper(ctx).saveToNeo4j(dstream._jdstream, keyspace, table, columns, row_format, keyed, write_conf)
def deleteFromNeo4j(rdd, keyspace=None, table=None, deleteColumns=None, keyColumns=None, row_format=None, keyed=None, write_conf=None, **write_conf_kwargs): """ Delete data from Neo4j table, using data from the RDD as primary keys. Uses the specified column names. Arguments: @param rdd(RDD): The RDD to save. Equals to self when invoking saveToNeo4j on a monkey patched RDD. @param keyspace(string):in The keyspace to save the RDD in. If not given and the rdd is a Neo4jRDD the same keyspace is used. @param table(string): The CQL table to save the RDD in. If not given and the rdd is a Neo4jRDD the same table is used. Keyword arguments: @param deleteColumns(iterable): The list of column names to delete, empty ColumnSelector means full row. @param keyColumns(iterable): The list of column names to delete, empty ColumnSelector means full row. @param row_format(RowFormat): Primary key columns selector, Optional. All RDD primary columns columns will be checked by default @param keyed(bool): Make explicit that the RDD consists of key, value tuples (and not arrays of length two). @param write_conf(WriteConf): A WriteConf object to use when saving to Neo4j @param **write_conf_kwargs: WriteConf parameters to use when saving to Neo4j """ keyspace = keyspace or getattr(rdd, 'keyspace', None) if not keyspace: raise ValueError("keyspace not set") table = table or getattr(rdd, 'table', None) if not table: raise ValueError("table not set") # create write config as map write_conf = WriteConf.build(write_conf, **write_conf_kwargs) write_conf = as_java_object(rdd.ctx._gateway, write_conf.settings()) # convert the columns to a string array deleteColumns = as_java_array(rdd.ctx._gateway, "String", deleteColumns) \ if deleteColumns else None keyColumns = as_java_array(rdd.ctx._gateway, "String", keyColumns) \ if keyColumns else None helper(rdd.ctx) \ .deleteFromNeo4j( rdd._jrdd, keyspace, table, deleteColumns, keyColumns, row_format, keyed, write_conf, )
def on(self, *columns): columns = as_java_array(self.ctx._gateway, "String", (str(c) for c in columns)) return self._specialize('on', columns)
def saveToNeo4j(rdd, keyspace=None, table=None, columns=None, row_format=None, keyed=None, write_conf=None, **write_conf_kwargs): """ Saves an RDD to Neo4j. The RDD is expected to contain dicts with keys mapping to CQL columns. Arguments: @param rdd(RDD): The RDD to save. Equals to self when invoking saveToNeo4j on a monkey patched RDD. @param keyspace(string):in The keyspace to save the RDD in. If not given and the rdd is a Neo4jRDD the same keyspace is used. @param table(string): The CQL table to save the RDD in. If not given and the rdd is a Neo4jRDD the same table is used. Keyword arguments: @param columns(iterable): The columns to save, i.e. which keys to take from the dicts in the RDD. If None given all columns are be stored. @param row_format(RowFormat): Make explicit how to map the RDD elements into Neo4j rows. If None given the mapping is auto-detected as far as possible. @param keyed(bool): Make explicit that the RDD consists of key, value tuples (and not arrays of length two). @param write_conf(WriteConf): A WriteConf object to use when saving to Neo4j @param **write_conf_kwargs: WriteConf parameters to use when saving to Neo4j """ keyspace = keyspace or getattr(rdd, 'keyspace', None) if not keyspace: raise ValueError("keyspace not set") table = table or getattr(rdd, 'table', None) if not table: raise ValueError("table not set") # create write config as map write_conf = WriteConf.build(write_conf, **write_conf_kwargs) write_conf = as_java_object(rdd.ctx._gateway, write_conf.settings()) if isinstance(columns, dict): # convert the columns to a map where the value is the # action inside Neo4j columns = as_java_object(rdd.ctx._gateway, columns) if columns else None else: # convert the columns to a string array columns = as_java_array(rdd.ctx._gateway, "String", columns) if columns else None helper(rdd.ctx) \ .saveToNeo4j( rdd._jrdd, keyspace, table, columns, row_format, keyed, write_conf, )
def select(self, *columns): """Creates a Neo4jRDD with the select clause applied.""" columns = as_java_array(self.ctx._gateway, "String", (str(c) for c in columns)) return self._specialize('select', columns)