def generate_pre_transform_specs_data_frame(self, spark_context=None, sql_context=None): data_frame_reader = DataFrameReader(sql_context) pre_transform_specs_data_frame = data_frame_reader.jdbc( DbUtil.get_java_db_connection_string(), 'pre_transform_specs') data = [] for item in pre_transform_specs_data_frame.collect(): spec = json.loads(item['pre_transform_spec']) data.append(json.dumps(spec)) data_frame = sql_context.read.json(spark_context.parallelize(data)) self.pre_transform_specs_data_frame = data_frame
def generate_pre_transform_specs_data_frame(self, spark_context=None, sql_context=None): data_frame_reader = DataFrameReader(sql_context) pre_transform_specs_data_frame = data_frame_reader.jdbc( self.get_connection_string(), 'pre_transform_specs' ) data = [] for item in pre_transform_specs_data_frame.collect(): spec = json.loads(item['pre_transform_spec']) data.append(json.dumps(spec)) data_frame = sql_context.jsonRDD(spark_context.parallelize(data)) self.pre_transform_specs_data_frame = data_frame
def generate_transform_specs_data_frame(self, spark_context=None, sql_context=None): data_frame_reader = DataFrameReader(sql_context) transform_specs_data_frame = data_frame_reader.jdbc( DbUtil.get_java_db_connection_string(), 'transform_specs' ) data = [] for item in transform_specs_data_frame.collect(): spec = json.loads(item['transform_spec']) data.append(json.dumps(spec)) data_frame = sql_context.read.json(spark_context.parallelize(data)) self.transform_specs_data_frame = data_frame
def method2(sql_context: SQLContext, database_URL: str, database_properties: dict): print('fetching jdbc dataframe...') # Create a DataFrameReader interface jdbc_df = DataFrameReader(sql_context).option("fetchSize", "5001") # Create a DataFrame object jdbc_df = jdbc_df.jdbc( url=database_URL, table='RATINGS', # column="SERVICE_ID", # lowerBound="0", # upperBound="4", # numPartitions=4, properties=database_properties) return jdbc_df