def render_api_login_form(api_choice, user_id): if r.get(f"{user_id}_{api_choice}_api") is not None: # Previously connected return logins_ui_mapping[api_choice][1] else: return logins_ui_mapping[api_choice][0]
def __init__(self, dataset=None): if dataset is None: self.dataset = dataset elif dataset == "*": raise NotImplementedError() else: self.dataset = pickle.loads(r.get(dataset))
def show_schema(api_choice, user_id): if api_choice is None: return [html.H4("Nothing selected.")] else: df = get_data(api_choice, user_id) if df is None: return [html.H4("Nothing to display")] schema = r.get(f"{user_id}_{api_choice}_schema") if schema is None: sample = df.sample(n=50, replace=True).dropna() types, subtypes = infer_types(sample, is_sample=True) r.set(f"{user_id}_{api_choice}_schema", dill.dumps({ "types": types, "subtypes": subtypes })) else: schema = dill.loads(schema) types, subtypes = schema["types"], schema["subtypes"] return [ html.Br(), dcc.ConfirmDialog(id="schema_confirmation"), html.Button("Update schema", id="update_schema"), schema_table(df[:500], types, subtypes) ]
def Pipeline_Options(options, user_id): """ Generate the layout of the dashboard. Args: options (list(dict)): Available datasets as options for `dcc.Dropdown`. Returns: A Dash element or list of elements. """ if user_id.startswith("python_generated_ssid"): # Trim id user_id = user_id.split("-")[-1] available_pipelines = { k.decode(): r.get(k) for k in r.keys(f'{user_id}_pipeline_*') } return html.Div(children=[ # Dataset is defined in the ModelBuilder # Choose an algorithm html.Div(create_dropdown("Choose algorithm type", options=[{ 'label': f'Pipeline --> {pipe_name}', 'value': pipe_name } for pipe_name in available_pipelines], multi=False, id="algo_choice_pipeline"), className="horizontal_dropdowns"), # Available choices for fitting html.Div( id="variable_choices_pipeline", children=[ # Debuggers html.Button("Fit model", id="fit_model", n_clicks=0, style={"display": "none"}), html.Div( create_dropdown("", [], multi=True, id="xvars_pipeline", style={"display": "none"})), html.Div( create_dropdown("", [], multi=True, id="yvars_pipeline", style={"display": "none"})), ]), # The results html.Div(id="training_results_pipeline"), ])
def __init__(self, func_name="", cols=None, dataset_choice=None, user_id=None): self.query = r.get(func_name) self.cols = cols self.func = lambda x: x self.dataset_choice = dataset_choice self.user_id = user_id
def get_users_tweets(n_clicks, acc_name, user_id): if n_clicks > 0: # Get the API handle api = pickle.loads(r.get(f"{user_id}_twitter_api_handle")) # TODO: This is a cache so consider a better implementation. query = r.get(f"{user_id}_twitter_data_{acc_name}") if query is None: # TODO: Consider saving for future use / as a dataset. query = api.GetUserTimeline(screen_name=acc_name) # Expire the retrieved tweets cache in one hour r.set(f"{user_id}_twitter_data_{acc_name}", pickle.dumps(query), ex=3600) else: query = pickle.loads(query) return [html.P(str(status.text)) for status in query] else: raise PreventUpdate()
def cache_data_retrieve(key): data = json.loads(r.get(key)) def fetch_live_data(): print('Cache is invalid') return retrieve_data_from_hive(key) if int(time.time()) - data["cached_on"] <= 86400: if 'ETag' in data: if cache_valid_check(key, data['ETag']): print('Returning Cached Data') return data else: return fetch_live_data() else: return fetch_live_data() else: return fetch_live_data()
def fulfil_request(url, etag=None): if etag and cache_data_exists(url): data = json.loads(r.get(url)) if data['ETag'] == etag: return Response('', status=304) try: if cache_data_exists(url): resp = cache_data_retrieve(url) else: resp = retrieve_data_from_hive(url) return Response(json.dumps(resp['data']), headers={'ETag': resp['ETag']}, mimetype='application/json') except Exception as error: logger.info({ 'error': str(error), })
def compare_torrents(name, files, sites): torrents = r.get(name) if not torrents: torrents = db.select_torrent(name) r.set(name, json.dumps(torrents)) else: torrents = json.loads(str(torrents, encoding='utf-8')) cmp_success = [] cmp_warning = [] for t in torrents: success_count = failure_count = 0 torrent_files = eval(t['files']) result_site = format_sites(t['sites_existed'], sites) if not result_site: continue if len(torrent_files): if type(files) is int: continue keys = list(files.keys()) for key in keys: files[key.replace('\\', '/')] = files.pop(key) # 对于Windows,将\\更改为/,以适配数据库 for k, v in torrent_files.items(): if v * 0.95 < files.get(k, -1) < v * 1.05: success_count += 1 else: failure_count += 1 if failure_count: if success_count > failure_count: db.hit(t['id']) cmp_warning.append({'id': t['id'], 'sites': result_site}) else: db.hit(t['id']) cmp_success.append({'id': t['id'], 'sites': result_site}) else: if type(files) is not int: continue if t['length'] * 0.95 < files < t['length'] * 1.05: db.hit(t['id']) cmp_success.append({'id': t['id'], 'sites': result_site}) return {'name': name, 'cmp_success': cmp_success, 'cmp_warning': cmp_warning}
def display_reddit_posts(n_clicks, subreddit_choice, user_id): """ For the Reddit API, allow the user to specify a subreddit \ to get data from. Args: n_clicks (int): Number of times button was clicked. subreddit_choice (str): The name of the subreddit. user_id (str): Session/user id. Returns: list: A list of dash components. """ if n_clicks is not None and n_clicks >= 1: if subreddit_choice is not None: api = pickle.loads(r.get(f"{user_id}_reddit_api_handle")) subreddit = api.subreddit(subreddit_choice) posts = [ html.Div([ dbc.Card([ dbc.CardHeader([ html.H4(post.title), html.A("view at reddit", href=post.permalink), ]), dbc.CardBody([ dbc.CardTitle(f"Written by {post.author.name}, " f"score: {post.score}"), dbc.CardText(dcc.Markdown(post.selftext), ), ]), ]), html.Br(), ]) for post in subreddit.hot(limit=5) ] return posts else: return [html.H4("No subreddit choice")] else: return [html.H4("No reddit data to display.")]
def get_users_ganalytics(n_clicks, metrics, user_id): if metrics is None: raise PreventUpdate() if n_clicks > 0: # TODO: Why have this requester here if below you do your own request?! # Get the API handle requester = dill.loads(r.get(f"{user_id}_ganalytics_api_handle")) if not isinstance(metrics, list): metrics = [metrics] # requests response object response = requests.get( f"http://127.0.0.1:5000/{user_id}/{','.join(metrics)}") # parse the results and reform them results = json.loads(response.text) for metric in metrics: data = results["data"][metric[3:]] # TODO: This signifies duplication of storage. The other # server already stores the results in a redis cache # but we cannot remove this because other parts of the # code depend on this storage. Consider reworking the # REST API, but using the same database for 2 servers # is an anti-pattern for micro-services architectures. r.set(f"{user_id}_ganalytics_data_{metric}", pickle.dumps(data), ex=3600) return [html.Br(), html.P(str(results))] else: raise PreventUpdate()
def render_variable_choices_pipeline(algo_choice_pipeline, user_id): """ Create a menu of dcc components to select pipeline and variables. Args: algo_choice_pipeline (str): Choice among (pre)defined pipelines. user_id (str): Session/user id. Returns: list: Dash elements. """ # Make sure all variables have a value before returning choices if algo_choice_pipeline is None: return [html.H4("Select a pipeline first.")] model = dill.loads(r.get(algo_choice_pipeline)) input_node = pipeline_creator.find_pipeline_node( pipeline_classes.BaseInput)(model) terminal_node = pipeline_creator.find_pipeline_node( pipeline_classes.TerminalNode)(model) # defaults layout = [] options = [] if isinstance(input_node, pipeline_classes.GenericInput): try: dataset_choice = input_node.dataset except AttributeError: return [html.H4("Something went wrong with the input")] if isinstance(input_node, pipeline_classes.TwitterAPI): layout += [ html.Button("Fit model", id="fit_model", n_clicks=0), # Debugger html.Div( dcc.Dropdown(options=[], multi=True, id="xvars_pipeline", style={"display": "none"})), ] else: df = get_data(dataset_choice, user_id) # Truncate labels so they don't fill the whole dropdown options = [{'label': col[:35], 'value': col} for col in df.columns] layout += [ html.Div(create_dropdown("X variable(s)", options, multi=True, id="xvars_pipeline"), className="horizontal_dropdowns"), # to debug the next callback html.Button("Fit model", id="fit_model", n_clicks=0, style={"display": "none"}) ] else: layout += [ # Debuggers html.Button("Fit model", id="fit_model", n_clicks=0, style={"display": "none"}), html.Div( dcc.Dropdown(options=[], multi=True, id="xvars_pipeline", style={"display": "none"})), ] if not isinstance(terminal_node, pipeline_classes.UnsupervisedLearner): layout += [ html.Div(dcc.Dropdown(options=options, multi=False, id="yvars_pipeline"), className="horizontal_dropdowns") ] else: # Only existing for debugging the next callback, no yvars needed layout += [ dcc.Dropdown(options=[], id="yvars_pipeline", style={"display": "none"}) ] return layout
def fit_pipeline_model(xvars, yvars, fit_model, algo_choice_pipeline, user_id): """ Take user choices and, if all are present, fit the appropriate model. Args: xvars (list(str)): predictor variables. yvars (str): target variable. algo_choice_pipeline (str): Choice among (pre)defined pipelines. user_id: Session/user id. Returns: list: Dash element(s) with the results of model fitting. """ if algo_choice_pipeline is None: raise PreventUpdate() # We have the dictionary that maps keys to models so use that model = dill.loads(r.get(algo_choice_pipeline)) input_node = pipeline_creator.find_pipeline_node( pipeline_classes.GenericInput)(model) terminal_node = pipeline_creator.find_pipeline_node( pipeline_classes.TerminalNode)(model) if isinstance(input_node, pipeline_classes.GenericInput): if isinstance(input_node, pipeline_classes.TwitterAPI): if fit_model == 0: # Don't fit raise PreventUpdate() X = [] else: try: dataset_choice = input_node.dataset except AttributeError: return [html.H4("Something went wrong with the input")] df = get_data(dataset_choice, user_id) ## Make sure all variables have a value before fitting if any(x is None for x in [xvars, df, dataset_choice]): raise PreventUpdate() # if we used df[xvars] directly the ordering of variables that the user # gave would actually affect the model. This forces those variables to # be in the order of the original df. It matters only here since the # user might have defined a FeatureMaker that depends on this. xvars = [xvar for xvar in df.columns if xvar in xvars] X = df[xvars] else: X = [] if isinstance(terminal_node, pipeline_classes.UnsupervisedLearner): model.fit(X) return [html.H4(str(model.predict(X)))] else: if yvars is None: raise PreventUpdate() model.fit(X, df[yvars]) # TODO: Implement score function for all models. return [ html.H4( f"Pipeline model scored: {model.score(df[xvars], df[yvars])}") ]
def render_table(api_choice, user_id): """ Create a display for the chosen dataset. Args: api_choice (str): Value from the dropdown. user_id (str): Session/user id. Returns: list: A list of dash components. """ if api_choice is None: return [html.H4("Nothing selected.")] if api_choice == "twitter_api": api = pickle.loads(r.get(f"{user_id}_{api_choice}_handle")) return pretty_print_tweets(api, 5) elif api_choice == "reddit_api": # No need to get the api here # TODO: But maybe this change? return [ html.H4("Write the name of a subreddit:"), dcc.Input( id="subreddit_choice", type="text", value="", ), html.Button("Gimme dem reddits", id="reddit_submit"), html.Br(), html.Br(), html.Div(id="subreddit_posts"), ] elif api_choice == "spotify_api": spotify = pickle.loads(r.get(f"{user_id}_{api_choice}_handle")) top_playlists = spotify.category_playlists( "toplists")["playlists"]["items"] posts = [ html.Div([ dbc.Card([ dbc.CardHeader([ html.H4(playlist["name"]), html.A("listen on Spotify", href=playlist["external_urls"]["spotify"]), ]), dbc.CardBody([ dbc.CardTitle( f"Owner: {playlist['owner']['display_name']}"), dbc.CardText( f"{playlist['tracks']['total']} total tracks"), ]), ]), html.Br(), ]) for playlist in top_playlists ] return posts elif api_choice == "quandl_api": df = get_data(api_choice, user_id) else: df = get_data(api_choice, user_id) if df is None: return [html.H4("Nothing to display")] df = df[df.columns[:10]] return [ html.Br(), create_table(df), ]