예제 #1
0
    def __init__(self, keyspace, table, ctx, row_format=None, read_conf=None):
        if not keyspace:
            raise ValueError("keyspace not set")

        if not table:
            raise ValueError("table not set")

        if row_format is None:
            row_format = RowFormat.ROW
        elif row_format < 0 or row_format >= len(RowFormat.values):
            raise ValueError("invalid row_format %s" % row_format)

        self.keyspace = keyspace
        self.table = table
        self.read_conf = read_conf
        self.row_format = row_format

        jread_conf = as_java_object(ctx._gateway,
                                    read_conf.__dict__) if read_conf else None

        self._helper = ctx._jvm.java.lang.Thread.currentThread().getContextClassLoader() \
         .loadClass("pyspark_cassandra.PythonHelper").newInstance()

        self._cjrdd = self._helper \
         .cassandraTable(
          keyspace,
          table,
          ctx._jsc,
          jread_conf,
         )

        jrdd = self._helper.parseRows(self._cjrdd, row_format)

        super(CassandraRDD, self).__init__(jrdd, ctx)
예제 #2
0
	def __init__(self, keyspace, table, ctx, row_format=None, read_conf=None):
		if not keyspace:
			raise ValueError("keyspace not set")
		
		if not table:
			raise ValueError("table not set")
		
		if not row_format:
			row_format = RowFormat.ROW
		elif row_format < 0 or row_format >= len(RowFormat.values):
			raise ValueError("invalid row_format %s" % row_format)
		
		self.keyspace = keyspace
		self.table = table
		self.read_conf = read_conf
		self.row_format = row_format
		
		jread_conf = as_java_object(ctx._gateway, read_conf.__dict__) if read_conf else None
		
		self._helper = ctx._jvm.java.lang.Thread.currentThread().getContextClassLoader() \
			.loadClass("pyspark_cassandra.PythonHelper").newInstance()
		
		self._cjrdd = self._helper \
			.cassandraTable(
				keyspace,
				table,
				ctx._jsc,
				jread_conf,
			)
			
		jrdd = self._helper.parseRows(self._cjrdd, row_format)
		
		super(CassandraRDD, self).__init__(jrdd, ctx)
예제 #3
0
def saveToCassandra(rdd,
                    keyspace=None,
                    table=None,
                    columns=None,
                    write_conf=None,
                    row_format=None):
    '''
		Saves an RDD to Cassandra. The RDD is expected to contain dicts with keys mapping to CQL columns.
	
		Arguments:
		@param rdd(RDD):
			The RDD to save. Equals to self when invoking saveToCassandra on a monkey patched RDD.
		@param keyspace(string):in
			The keyspace to save the RDD in. If not given and the rdd is a CassandraRDD the same keyspace is used.
		@param table(string):
			The CQL table to save the RDD in. If not given and the rdd is a CassandraRDD the same table is used.
	
		Keyword arguments:
		@param columns(iterable):
			The columns to save, i.e. which keys to take from the dicts in the RDD.
			If None given all columns are be stored. 
		
		@param write_conf(WriteConf):
			A WriteConf object to use when saving to Cassandra
		
		@param row_format(RowFormat):
			Make explicit how to map the RDD elements into Cassandra rows.
			If None given the mapping is auto-detected as far as possible.
	'''

    keyspace = keyspace or rdd.keyspace
    if not keyspace:
        raise ValueError("keyspace not set")

    table = table or rdd.table
    if not table:
        raise ValueError("table not set")

    # create write config as map and convert the columns to a string array
    write_conf = as_java_object(rdd.ctx._gateway,
                                write_conf.__dict__) if write_conf else None
    columns = as_java_array(rdd.ctx._gateway, "String",
                            columns) if columns else None

    # create a helper object
    helper = rdd.ctx._jvm.java.lang.Thread.currentThread().getContextClassLoader() \
     .loadClass("pyspark_cassandra.PythonHelper").newInstance()

    # delegate to helper
    helper \
     .saveToCassandra(
      rdd._jrdd,
      keyspace,
      table,
      columns,
      write_conf,
      row_format
     )
예제 #4
0
def saveToCassandra(rdd, keyspace=None, table=None, columns=None, row_format=None, keyed=None, write_conf=None,
                    **write_conf_kwargs):
    '''
        Saves an RDD to Cassandra. The RDD is expected to contain dicts with keys mapping to CQL columns.

        Arguments:
        @param rdd(RDD):
            The RDD to save. Equals to self when invoking saveToCassandra on a monkey patched RDD.
        @param keyspace(string):in
            The keyspace to save the RDD in. If not given and the rdd is a CassandraRDD the same keyspace is used.
        @param table(string):
            The CQL table to save the RDD in. If not given and the rdd is a CassandraRDD the same table is used.

        Keyword arguments:
        @param columns(iterable):
            The columns to save, i.e. which keys to take from the dicts in the RDD.
            If None given all columns are be stored.

        @param row_format(RowFormat):
            Make explicit how to map the RDD elements into Cassandra rows.
            If None given the mapping is auto-detected as far as possible.
        @param keyed(bool):
            Make explicit that the RDD consists of key, value tuples (and not arrays of length two).

        @param write_conf(WriteConf):
            A WriteConf object to use when saving to Cassandra
        @param **write_conf_kwargs:
            WriteConf parameters to use when saving to Cassandra
    '''

    keyspace = keyspace or getattr(rdd, 'keyspace', None)
    if not keyspace:
        raise ValueError("keyspace not set")

    table = table or getattr(rdd, 'table', None)
    if not table:
        raise ValueError("table not set")

    # create write config as map
    write_conf = WriteConf.build(write_conf, **write_conf_kwargs)
    write_conf = as_java_object(rdd.ctx._gateway, write_conf.settings())
    # convert the columns to a string array
    columns = as_java_array(rdd.ctx._gateway, "String", columns) if columns else None

    helper(rdd.ctx) \
        .saveToCassandra(
            rdd._jrdd,
            keyspace,
            table,
            columns,
            row_format,
            keyed,
            write_conf,
        )
예제 #5
0
def saveToCassandra(rdd, keyspace=None, table=None, columns=None, write_conf=None, row_format=None):
	'''
		Saves an RDD to Cassandra. The RDD is expected to contain dicts with keys mapping to CQL columns.
	
		Arguments:
		@param rdd(RDD):
			The RDD to save. Equals to self when invoking saveToCassandra on a monkey patched RDD.
		@param keyspace(string):in
			The keyspace to save the RDD in. If not given and the rdd is a CassandraRDD the same keyspace is used.
		@param table(string):
			The CQL table to save the RDD in. If not given and the rdd is a CassandraRDD the same table is used.
	
		Keyword arguments:
		@param columns(iterable):
			The columns to save, i.e. which keys to take from the dicts in the RDD.
			If None given all columns are be stored. 
		
		@param write_conf(WriteConf):
			A WriteConf object to use when saving to Cassandra
		
		@param row_format(RowFormat):
			Make explicit how to map the RDD elements into Cassandra rows.
			If None given the mapping is auto-detected as far as possible.
	'''
	
	keyspace = keyspace or rdd.keyspace
	if not keyspace:
		raise ValueError("keyspace not set")
	
	table = table or rdd.table
	if not table:
		raise ValueError("table not set")
	
	# create write config as map and convert the columns to a string array
	write_conf = as_java_object(rdd.ctx._gateway, write_conf.__dict__) if write_conf else None
	columns = as_java_array(rdd.ctx._gateway, "String", columns) if columns else None

	# create a helper object
	helper = rdd.ctx._jvm.java.lang.Thread.currentThread().getContextClassLoader() \
		.loadClass("pyspark_cassandra.PythonHelper").newInstance()
		
	# delegate to helper
	helper \
		.saveToCassandra(
			rdd._jrdd,
			keyspace,
			table,
			columns,
			write_conf,
			row_format
		)
예제 #6
0
    def __init__(self, ctx, keyspace, table, row_format=None, read_conf=None, **read_conf_kwargs):
        super(CassandraTableScanRDD, self).__init__(ctx, keyspace, table, row_format, read_conf, **read_conf_kwargs)

        self._key_by = ColumnSelector.none()

        read_conf = as_java_object(ctx._gateway, self.read_conf.settings())

        self.crdd = self._helper \
            .cassandraTable(
                ctx._jsc,
                keyspace,
                table,
                read_conf,
            )