Ejemplo n.º 1
0
 def __init__(self, sqlContext, dataset, view_name, site_filter=None):
     """
     constructor for Reader object to read from filodb
     :param sqlContext: current spark's sqlContext
     :param dataset: the filodb dataset name or dataframe that should be loaded
     :param view_name: the name to temp table, that will be used in constructed queries
     :param site_filter: filter string to filter the dataset. Eg: 'siteRef == "Site"'
     :return: Reader object
     """
     self._sqlContext = sqlContext
     self._fc = FlintContext(self._sqlContext)
     self.view_name = view_name
     self.load_filter = site_filter
     self._date_filter = None
     self._tag_filter = None
     self._is_sorted = True
     if isinstance(dataset, str):
         self.filodb_dataset = dataset
         df = self._sqlContext.read.format("filodb.spark").option(
             "dataset", dataset).load()
     else:
         self.filodb_dataset = None
         df = dataset
     if site_filter is not None:
         df = df.filter(site_filter)
     self._df = df
     self._df.createOrReplaceTempView(self.view_name)
     self._timestamp = True
     self._tag_query = None
Ejemplo n.º 2
0
def connect(q_host, q_port):
    spark = SparkSession.builder.appName("ts").getOrCreate()
    sqlContext = SQLContext(spark)
    fc = FlintContext(sqlContext)
    q = qconnection.QConnection(host=q_host, port=q_port, pandas=True)
    q.open()
    return q, fc, spark
Ejemplo n.º 3
0
    def __init__(self, spark_sql_context):
        """
    Constructor.
    """
        from pyspark.sql import SQLContext
        from ts.flint import FlintContext

        if (spark_sql_context is None) or \
        (not isinstance(spark_sql_context, SQLContext)):
            raise TypeError(
                "spark_sql_context must be a Spark SQLContext object")

        self._flintContext = FlintContext(spark_sql_context)
Ejemplo n.º 4
0
    def __setup(cls, options=None):
        '''Starts spark and sets attributes `sc,sqlContext and flintContext'''
        from pyspark import SparkContext, SparkConf
        from pyspark.sql import SQLContext
        from ts.flint import FlintContext

        default_options = (SparkConf().setAppName(
            cls.__name__).setMaster("local"))
        setattr(cls, '_env', dict(os.environ))
        setattr(cls, '_path', list(sys.path))
        options = collections.ChainMap(options, default_options)
        spark_context = SparkContext(conf=SparkConf(options))
        sql_context = SQLContext(spark_context)
        flint_context = FlintContext(sql_context)
        setattr(cls, 'sc', spark_context)
        setattr(cls, 'sqlContext', sql_context)
        setattr(cls, 'flintContext', flint_context)
Ejemplo n.º 5
0
# Databricks notebook source
from pyspark.sql.functions import *
#from pyspark.sql.types import *

import ts.flint
from ts.flint import FlintContext
flintContext = FlintContext(sqlContext)

df_control = flintContext.read.dataframe(spark.sql("select * from KEY_CONTROLS").where("RESULT_KEY_NBR = 11").select('DATE','ACTL_VAL') \
    .withColumn('time',unix_timestamp(col('DATE'), "yyyy-MM-dd").cast("timestamp")) \
    .select('time','ACTL_VAL').orderBy('time'))

df_control.show()

#df_control.coalesce(1).write.format("com.databricks.spark.csv").option("header", "true").save("dbfs:/FileStore/ActVal_Key11.csv")

# COMMAND ----------

from ts.flint import windows

df_control_previous_day_val = df_control.shiftTime(
    windows.future_absolute_time('1day')).toDF('time', 'previous_day_val')
df_control_previous_wk_val = df_control.shiftTime(
    windows.future_absolute_time('7day')).toDF('time', 'previous_wk_val')
df_control_joined = df_control.leftJoin(df_control_previous_day_val).leftJoin(
    df_control_previous_wk_val)
df_control_joined.show()

# COMMAND ----------

from ts.flint import summarizers
Ejemplo n.º 6
0
def flintContext(pyspark, sqlContext):
    from ts.flint import FlintContext
    return FlintContext(sqlContext)