Ejemplo n.º 1
0
def create_collection(args):
    if args.profile:
        rs = Client(profile=args.profile)
    else:
        rs = Client()

    with open(args.schema, 'r') as f:
        schema = yaml.full_load(f)
        field_types = schema['field_types']
        clustering_fields = schema.get('clustering_key')
    column_names, column_types = get_columns(field_types)
    format_params = rs.Source.csv_params(
        separator=args.separator,
        encoding='UTF-8',
        first_line_as_column_names=False,
        column_names=column_names,
        column_types=column_types
    )
    sources = [
        rs.Source.s3(
            bucket=args.s3_bucket,
            prefix=args.s3_prefix,
            format_params=format_params
        )
    ]

    if clustering_fields:
        clustering_key = [
            rs.ClusteringKey.clusteringField(field_name=cf, cluster_type='AUTO')
            for cf in clustering_fields
        ]
        rs.Collection.create(
            name=args.collection,
            workspace=args.workspace,
            sources=sources,
            clustering_key=clustering_key
        )
    else:
        rs.Collection.create(
            name=args.collection,
            workspace=args.workspace,
            sources=sources,
        )

    print(
        'Successfully created collection {}.{}'.format(
            args.workspace, args.collection
        )
    )
Ejemplo n.º 2
0
def rockset_querymaker(query):
    # connect to Rockset

    rs = Client(api_key=api_key)
    print("query is", query)

    if query not in queries:
        print("Err!")

        return

    time = timeit.timeit(str(rs.sql(Q(queries[query]))))

    print(query, 1000 * time)

    return query, time * 1000
Ejemplo n.º 3
0
def do_backfill(n):
    # Import here to avoid requiring these dependencies in lambda
    from rockset import Client, Q, F, ParamDict
    from concurrent.futures import ThreadPoolExecutor, wait

    # query rockset for failed GHA job ids
    client = Client(
        api_key=ROCKSET_API_KEY,
        api_server="https://api.rs2.usw2.rockset.com",
    )
    qlambda = client.QueryLambda.retrieve("unclassified",
                                          version="d39e66c0ed0aa238",
                                          workspace="commons")

    params = ParamDict()
    results = qlambda.execute(parameters=params).results
    # q = (
    #     Q("GitHub-Actions.workflow_job")
    #     .where(F["conclusion"] == "failure")
    #     .highest(n, F["_event_time"])
    #     .select(F["id"])
    # )
    # results = client.sql(q)
    ids = [result["id"] for result in results]
    with ThreadPoolExecutor() as executor:
        futures = []
        for id in ids:
            futures.append(executor.submit(send_lambda_request, id))
        wait(futures)

    logger.info("done!")
Ejemplo n.º 4
0
def get_avg_pm10_results():
    rs = Client(api_key=ROCKSET_API_KEY,
                api_server='https://api.rs2.usw2.rockset.com')

    # retrieve Query Lambda
    qlambda = rs.QueryLambda.retrieve('YOUR QUERY LAMBDA NAME',
                                      version=4,
                                      workspace='commons')
    params = ParamDict()
    results = qlambda.execute(parameters=params)
    return results.results
Ejemplo n.º 5
0
def run(args):
    if args.profile:
        rs = Client(profile=args.profile)
    else:
        rs = Client()

    queries = []
    for f in os.listdir(args.query_dir):
        if not f.endswith('.sql'):
            continue
        query_id = os.path.splitext(f)[0]
        with open(os.path.join(args.query_dir, f), 'r') as f:
            query_str = f.read()
        queries.append((query_id, query_str))
    queries = sorted(queries)
    print('Found {} queries to run. Will run each {} times and take the median'
          ' runtime.'.format(len(queries), args.runs))
    print('=' * 70)

    for query_id, query_str in queries:
        times = []
        rows = None
        error = False
        for _ in range(args.runs):
            start = time.time()
            try:
                resp = rs.sql(Q(query_str))
                if rows is None:
                    rows = len(resp.results())
                else:
                    assert rows == len(resp.results())
                times.append(1000 * (time.time() - start))
            except Exception as e:
                print('Query {} produced an error: {}'.format(
                    query_id, str(e)))
                error = True
                break
        if not error:
            print('Query {} produced {:>3d} rows in {:>5.0f} ms'.format(
                query_id, rows, statistics.median(times)))
Ejemplo n.º 6
0
import json
import math
import os

from rockset import Client, ParamDict
import pandas as pd

ROCKSET_API_KEY = os.environ.get("ROCKSET_API_KEY")
if ROCKSET_API_KEY is None:
    raise RuntimeError("ROCKSET_API_KEY not set")

with open("rockset/prodVersions.json") as f:
    prod_versions = json.load(f)

client = Client(
    api_key=ROCKSET_API_KEY,
    api_server="https://api.rs2.usw2.rockset.com",
)
qlambda = client.QueryLambda.retrieve(
    "correlation_matrix",
    version=prod_versions["metrics"]["correlation_matrix"],
    workspace="metrics",
)

params = ParamDict()
results = qlambda.execute(parameters=params)

pivot = defaultdict(dict)

# Results look like (is_green, head_sha, name)
# Turn results into a nested dict of head_sha => name => is_green
for result in results.results:
Ejemplo n.º 7
0
from os import getenv
from json import loads
import flask
from rockset import Client, Q

app = flask.Flask(__name__, template_folder='js')
rs = Client(api_key=getenv('RS2_TOKEN'), api_server='api.rs2.usw2.rockset.com')


def redirect(link):
    return '<script>window.location.replace("{}")</script>'.format(link)


selects = {
    'treasure': 'drops',
    'monsters': 'drops',
    'materials': 'cooking_effect, hearts_recovered',
    'equipment': 'attack, defense'
}
creatures_selects = {
    'food': 'hearts_recovered, cooking_effect',
    'others': 'drops'
}


def creatures_category():
    others = list(
        rs.sql(
            Q('select id, name, description, {} from "botw-api".creatures where cooking_effect is null'
              .format(creatures_selects['others']))))
    foods = list(
Ejemplo n.º 8
0
from typing import Any, Dict
from datetime import datetime, timedelta
from gitutils import _check_output

from rockset import Client, ParamDict  # type: ignore[import]
import os

rs = Client(api_key=os.getenv("ROCKSET_API_KEY", None))
qlambda = rs.QueryLambda.retrieve('commit_jobs_batch_query',
                                  version='15aba20837ae9d75',
                                  workspace='commons')


def parse_args() -> Any:
    from argparse import ArgumentParser
    parser = ArgumentParser("Print latest commits")
    parser.add_argument("--minutes",
                        type=int,
                        default=30,
                        help="duration in minutes of last commits")
    return parser.parse_args()


def print_latest_commits(minutes: int = 30) -> None:
    current_time = datetime.now()
    time_since = current_time - timedelta(minutes=minutes)
    timestamp_since = datetime.timestamp(time_since)
    commits = _check_output(
        [
            "git",
            "rev-list",
Ejemplo n.º 9
0
from rockset import Client, Q, F
rs=Client() # requires an active profile in rockset credentials file

aws_integration=rs.Integration.retrieve('aws_key_haneesh')
sources=[
    rs.Source.kinesis(
        stream_name="twitter-stream",
        integration=aws_integration)]
twitter_kinesis_demo=rs.Collection.create("twitter-kinesis-demo", sources=sources)
Ejemplo n.º 10
0
import os
from rockset import Client, Q, F

collectionName = 'GadhaBotUsers'
rs = Client(api_key=os.environ.get('ROCKSET_SECRET'),
            api_server="api.rs2.usw2.rockset.com")
collection = rs.Collection.retrieve(collectionName)


def store(userid, city):
    print('City store query')
    docs = {'id': userid, 'city': city}
    collection.add_docs([docs])
    return ('City stored. Type !weather to see weather in your city.')


def get(userid):
    struserid = str(userid)
    res = rs.sql(
        Q(collectionName).where(F['_id'] == struserid).select(F['city']))
    cityobj = res[0]
    city = cityobj['city']
    return city
Ejemplo n.º 11
0
import json

from rockset import Client, Q

import os
from os import path
from dotenv import load_dotenv

CURRENT_DIR = path.dirname(os.path.abspath(__file__))

load_dotenv(dotenv_path=os.path.join(CURRENT_DIR, 'config.env'), override=True)

client = Client(api_server=os.environ.get('ROCKSET_APISERVER'),
                api_key=os.environ.get('ROCKSET_APIKEY'))

TOP_CONTRIBUTORS_QUERY = '''
WITH multi_contributor_repos as (
    SELECT gh.repo.name AS repo_name
    FROM "github" gh
    WHERE type = 'CommitCommentEvent'
    GROUP BY gh.repo.name
    HAVING COUNT(DISTINCT gh.actor.display_login) > 10
)
SELECT gh.actor.display_login Contributor, COUNT(gh.actor.display_login) AS Commits
FROM "github" gh
WHERE type = 'CommitCommentEvent' AND gh.repo.name IN (SELECT * FROM multi_contributor_repos)
GROUP BY gh.actor.display_login
ORDER BY Commits DESC
LIMIT 10;
'''
Ejemplo n.º 12
0
from rockset import Client, Q
from lambdarest import lambda_handler
from credentials import API_KEY
import json

rs = Client(api_key=API_KEY,
            api_server='https://api.rs2.usw2.rockset.com')

def lambda_handler(event, context):
    if 'queryStringParameters' in event:
        if 'interval' in event["queryStringParameters"]:
            interval = event["queryStringParameters"]["interval"]

    res = rs.sql(Q(f'''-- unnest tweets with stock ticker symbols from the past 1 day
WITH stock_tweets AS
      (SELECT t.user.name, t.text, upper(sym.text) AS ticker
       FROM   "twitter-firehose" AS t, unnest(t.entities.symbols) AS sym
       WHERE  t.entities.symbols[1] is not null
         AND  t._event_time > current_timestamp() - INTERVAL {interval}),
-- aggregate stock ticker symbol tweet occurrences 
    top_stock_tweets AS
      (SELECT ticker, count(*) AS tweet_count
       FROM   stock_tweets
       GROUP BY ticker),
-- join stock ticker symbol in tweets with NASDAQ company list data
    stock_info_with_tweets AS 
      (SELECT top_stock_tweets.ticker, top_stock_tweets.tweet_count,
              tickers.Name, tickers.Industry, tickers.MarketCap
       FROM top_stock_tweets JOIN tickers
         ON top_stock_tweets.ticker = tickers.Symbol)
NOTE: This example uses IPs as document IDs, so they cannot be `None`
"""

import flask
from flask_geomapper import flask_geomapper
from apscheduler.schedulers.background import BackgroundScheduler
from rockset import Client, Q
from os import getenv

app = flask.Flask(__name__)
fg = flask_geomapper(app, debug=True)

token = getenv("RS2_TOKEN") # or set token to a string with your API key

rs = Client(token, "https://api.rs2.usw2.rockset.com") # configure server based off your location (this one is us west)
collection_name = "flask-locations" # configure based off your collection name and workspace (if not in "commons")
collection = rs.Collection.retrieve(collection_name)

previous_locations = list(rs.sql(Q(f"select * from \"{collection_name}\""))) # retrieve previous locations from database

if previous_locations != []: fg.add_locations(previous_locations, ip_key="_id") # if there are any items in the database, add them to flask-geomapper

def add_docs():
    collection.add_docs(fg.shape_to_docs())

scheduler = BackgroundScheduler(daemon=True) # init scheduler
scheduler.add_job(func=collection.add_docs, args=(fg.shape_to_docs(ip_key="_id"), ), trigger="interval", seconds=10)
"""
^^^
Add documents to collection every ten seconds.
Ejemplo n.º 14
0
import multiprocessing
import time
from rockset import Client, Q, F, P, ParamDict

rs = Client()


def audio_Stage1(process_name, tasks, results):

    print('[%s] evaluation routine starts' % process_name)

    qlambda = rs.QueryLambda.retrieveByVersion(
        'all_audios_stage1',
        version='c728db03ee4e3c50',
        #version= '3064713fedc5e63a' , #version2
        # '80cb8e8a16cafb5c', for. recall_ads collections
        workspace='commons')

    params = ParamDict()
    overlap = 5000

    while True:

        task_present = tasks.get()

        if (isinstance(task_present, tuple)) == False:

            print('[%s] evaluation routine quits' % process_name)
            # Indicate finished
            results.put(-1)
            break
Ejemplo n.º 15
0
import flask
from requests import get
from rockset import Client, ParamDict, Q, F
from os import getenv

rs = Client(api_key=getenv('ROCKSET_SECRET'),
            api_server='api.rs2.usw2.rockset.com')

app = flask.Flask(__name__)


@app.route('/', methods=['GET'])
def null():
    return (
        '<script>location.replace("http://github.com/gadhagod/Heroku-Status-Badges")</script>'
    )


@app.route('/<app_name>', methods=['GET'])
def running(app_name):
    code = get('https://{}.herokuapp.com/'.format(app_name)).status_code
    if code >= 500 and code <= 599:
        return (flask.send_file('badges/down.png', mimetype='image/png'))
    elif code == 404:
        return (flask.send_file('badges/not_found.png', mimetype='image/png'))
    else:
        return (flask.send_file('badges/running.png', mimetype='image/png'))
Ejemplo n.º 16
0
import flask
from os import getenv
from rockset import Client, Q
from flask_limiter import Limiter
from flask_limiter.util import get_remote_address
from requests import get

app = flask.Flask(__name__)
rs = Client(api_key=getenv("RS2_TOKEN"), api_server="api.rs2.usw2.rockset.com")
collection = rs.Collection.retrieve("schoology-extension-downloads")
limiter = Limiter(
    app,
    key_func=get_remote_address,
    default_limits=["30 per day"]
)

@app.route("/")
def main():
    latest_zip = get("https://api.github.com/repos/Harker-Hackers/schoology-extension/releases/latest").json()["zipball_url"]
    return flask.redirect(latest_zip)

@app.route("/dashboard")
def dashboard():
    if flask.request.args.get("token") == getenv("DASHBOARD_TOKEN"):
        return {"data": list(rs.sql(Q("select _event_time from \"schoology-extension-downloads\"")))}
    else:
        return main()

@app.after_request
def log(response):
    if response._status_code == 302:
Ejemplo n.º 17
0
"""Create a dashboard on Rockset Data using Dash (https://plot.ly/products/dash/)"""

import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.graph_objs as go
from rockset import Client, Q

from config import ROCKSET_API_SERVER, ROCKSET_API_KEY
from constants import *

rs = Client(api_key=ROCKSET_API_KEY, api_server=ROCKSET_API_SERVER)


def get_data(query, x_label, y_label):
    """
    Execute query on Rockset
    Args:
        query (str): Rockset compatible SQL Query
        x_label (str): Values of this column will be mapped on x-axis of the graph
        y_label (str): Values of this column will be mapped on x-axis of the graph

    Returns:
        dict
    """
    result = rs.sql(Q(query))

    result[0]
    return {
        'x': [record[x_label] for record in result],
        'y': [record[y_label] for record in result]