def __init__(self, sqlContext, dataset, view_name, site_filter=None): """ constructor for Reader object to read from filodb :param sqlContext: current spark's sqlContext :param dataset: the filodb dataset name or dataframe that should be loaded :param view_name: the name to temp table, that will be used in constructed queries :param site_filter: filter string to filter the dataset. Eg: 'siteRef == "Site"' :return: Reader object """ self._sqlContext = sqlContext self._fc = FlintContext(self._sqlContext) self.view_name = view_name self.load_filter = site_filter self._date_filter = None self._tag_filter = None self._is_sorted = True if isinstance(dataset, str): self.filodb_dataset = dataset df = self._sqlContext.read.format("filodb.spark").option( "dataset", dataset).load() else: self.filodb_dataset = None df = dataset if site_filter is not None: df = df.filter(site_filter) self._df = df self._df.createOrReplaceTempView(self.view_name) self._timestamp = True self._tag_query = None
def connect(q_host, q_port): spark = SparkSession.builder.appName("ts").getOrCreate() sqlContext = SQLContext(spark) fc = FlintContext(sqlContext) q = qconnection.QConnection(host=q_host, port=q_port, pandas=True) q.open() return q, fc, spark
def __init__(self, spark_sql_context): """ Constructor. """ from pyspark.sql import SQLContext from ts.flint import FlintContext if (spark_sql_context is None) or \ (not isinstance(spark_sql_context, SQLContext)): raise TypeError( "spark_sql_context must be a Spark SQLContext object") self._flintContext = FlintContext(spark_sql_context)
def __setup(cls, options=None): '''Starts spark and sets attributes `sc,sqlContext and flintContext''' from pyspark import SparkContext, SparkConf from pyspark.sql import SQLContext from ts.flint import FlintContext default_options = (SparkConf().setAppName( cls.__name__).setMaster("local")) setattr(cls, '_env', dict(os.environ)) setattr(cls, '_path', list(sys.path)) options = collections.ChainMap(options, default_options) spark_context = SparkContext(conf=SparkConf(options)) sql_context = SQLContext(spark_context) flint_context = FlintContext(sql_context) setattr(cls, 'sc', spark_context) setattr(cls, 'sqlContext', sql_context) setattr(cls, 'flintContext', flint_context)
# Databricks notebook source from pyspark.sql.functions import * #from pyspark.sql.types import * import ts.flint from ts.flint import FlintContext flintContext = FlintContext(sqlContext) df_control = flintContext.read.dataframe(spark.sql("select * from KEY_CONTROLS").where("RESULT_KEY_NBR = 11").select('DATE','ACTL_VAL') \ .withColumn('time',unix_timestamp(col('DATE'), "yyyy-MM-dd").cast("timestamp")) \ .select('time','ACTL_VAL').orderBy('time')) df_control.show() #df_control.coalesce(1).write.format("com.databricks.spark.csv").option("header", "true").save("dbfs:/FileStore/ActVal_Key11.csv") # COMMAND ---------- from ts.flint import windows df_control_previous_day_val = df_control.shiftTime( windows.future_absolute_time('1day')).toDF('time', 'previous_day_val') df_control_previous_wk_val = df_control.shiftTime( windows.future_absolute_time('7day')).toDF('time', 'previous_wk_val') df_control_joined = df_control.leftJoin(df_control_previous_day_val).leftJoin( df_control_previous_wk_val) df_control_joined.show() # COMMAND ---------- from ts.flint import summarizers
def flintContext(pyspark, sqlContext): from ts.flint import FlintContext return FlintContext(sqlContext)