def plot_gps_clusters(ds, user_id:str, zoom=5): """ Plots GPS coordinates Args: ds (DataStream): datastream object user_id (str): uuid of a user zoom: min 0 and max 100, zoom map """ pdf = ds_to_pdf(ds, user_id) marker_list = [] center = None for index, row in pdf.iterrows(): if center is None: center = [row["latitude"], row["longitude"]] marker_list.append(Marker(location=(row["latitude"], row["longitude"]))) m = Map(center=(center), zoom=zoom) marker_cluster = MarkerCluster( markers=(marker_list) ) m.add_layer(marker_cluster) return m
def plot_gantt(ds, user_id): """ Args: ds: user_id: """ pdf = ds_to_pdf(ds, user_id) data = [] for index, row in pdf.iterrows(): data.append( dict(Task=row["stresser_sub"], Start=row["start_time"], Finish=row["end_time"], Resource=row["stresser_main"])) fig = ff.create_gantt(data, index_col='Resource', title='Stressers, Main & Sub Categories', show_colorbar=True, bar_width=0.8, showgrid_x=True, showgrid_y=True) fig['layout']['yaxis'].update({"showticklabels": False}) iplot(fig, filename='gantt-hours-minutes')
def plot_bar(ds, x_axis_column="stresser_main"): """ Args: ds: user_id: x_axis_column: """ pdf = ds_to_pdf(ds) grouped_pdf = pdf.groupby(["user", x_axis_column], as_index=False).agg('mean') user_ids = pdf.groupby("user", as_index=False).last() data = [] for index, row in user_ids.iterrows(): sub = grouped_pdf.loc[grouped_pdf['user'] == row["user"]] sub.sort_values(x_axis_column) data.append( go.Bar({ 'y': sub["density"], 'x': sub[x_axis_column], 'name': row["user"] })) layout = go.Layout( title="All Participants' Stress Levels By Each Stressors", yaxis=dict(title='Average Stress Density')) fig = go.Figure(data=data, layout=layout) iplot(fig, filename='basic-line')
def plot_sankey(ds, user_id, cat_cols=["stresser_main", "stresser_sub"], value_cols='density', title="Stressers' Sankey Diagram"): """ Args: ds: user_id: cat_cols: value_cols: title: """ pdf = ds_to_pdf(ds, user_id) labelList = [] for catCol in cat_cols: labelListTemp = list(set(pdf[catCol].values)) labelList = labelList + labelListTemp # remove duplicates from labelList labelList = list(dict.fromkeys(labelList)) # transform df into a source-target pair for i in range(len(cat_cols) - 1): if i == 0: sourceTargetDf = pdf[[cat_cols[i], cat_cols[i + 1], value_cols]] sourceTargetDf.columns = ['source', 'target', 'density'] else: tempDf = pdf[[cat_cols[i], cat_cols[i + 1], value_cols]] tempDf.columns = ['source', 'target', 'density'] sourceTargetDf = pd.concat([sourceTargetDf, tempDf]) sourceTargetDf = sourceTargetDf.groupby(['source', 'target']).agg({ 'density': 'mean' }).reset_index() # add index for source-target pair sourceTargetDf['sourceID'] = sourceTargetDf['source'].apply( lambda x: labelList.index(x)) sourceTargetDf['targetID'] = sourceTargetDf['target'].apply( lambda x: labelList.index(x)) # creating the sankey diagram data = dict(type='sankey', node=dict(pad=15, thickness=20, line=dict(color="black", width=0.5), label=labelList), link=dict(source=sourceTargetDf['sourceID'], target=sourceTargetDf['targetID'], value=sourceTargetDf['density'])) layout = dict(title=title, font=dict(size=10)) fig = dict(data=[data], layout=layout) iplot(fig, validate=False)
def plot_timeseries(ds, user_id: str = None, x_axis_column: str = "timestamp", y_axis_column: list = "all", graph_title: str = "Graph"): """ line plot of timeseries data Args: ds: user_id (str): uuid of a user x_axis_column (str): timestamp or localtime are acceptable values only y_axis_column (list): set this to "all" if you want to plot all columns graph_title (str): title of the graph """ pdf = ds_to_pdf(ds, user_id) user_ids = list(pdf.user.unique()) subplot_titles = [ "Participant ID: {}".format(x.upper()) for x in list(pdf.user.unique()) ] if x_axis_column not in ["timestamp", "localtime"]: raise Exception(" X axis can only be timestamp or localtime column.") if y_axis_column == "all": y_axis = sorted( list( set(pdf.columns.to_list()) - set(["timestamp", "user", "version", "localtime"]))) elif isinstance(y_axis_column, list): y_axis = y_axis_column elif isinstance(y_axis_column, str): y_axis = [y_axis_column] fig = make_subplots(rows=len(user_ids), cols=1, subplot_titles=subplot_titles) row_id = 1 for sid in user_ids: if user_ids[-1] == sid: lagend = True else: lagend = False for y in y_axis: fig.add_trace(go.Scatter(x=pdf[x_axis_column][pdf.user == sid], y=pdf[y][pdf.user == sid], showlegend=lagend, name=y), row=row_id, col=1) fig.update_xaxes(title_text="Timestamp", row=row_id, col=1) row_id += 1 height = 500 * len(user_ids) fig.update_layout(height=height, width=900, title_text=graph_title) fig.show()
def plot_comparison(ds, x_axis_column="stresser_main", usr_id=None, compare_with="all"): """ Args: ds: x_axis_column: usr_id: compare_with: """ pdf = ds_to_pdf(ds) data = [] if usr_id: usr_data = pdf.loc[pdf['user'] == str(usr_id)] if compare_with == "all" or compare_with is None: compare_with_data = pdf.loc[pdf['user'] != str(usr_id)] else: compare_with_data = pdf.loc[pdf['user'] == str(compare_with)] grouped_user_pdf = usr_data.groupby([x_axis_column], as_index=False).agg('mean') grouped_compare_with_pdf = compare_with_data.groupby( [x_axis_column], as_index=False).agg('mean') data.append( go.Bar({ 'y': grouped_user_pdf["density"], 'x': grouped_user_pdf[x_axis_column], 'name': usr_id })) if compare_with == "all": compare_with = "All Participants" data.append( go.Bar({ 'y': grouped_compare_with_pdf["density"], 'x': grouped_compare_with_pdf[x_axis_column], 'name': compare_with })) layout = go.Layout( title="Comparison of Stress Levels Amongst Participants", yaxis=dict(title='Average Stress Density')) fig = go.Figure(data=data, layout=layout) iplot(fig, filename='basic-line') else: raise Exception("usr_id cannot be None/Blank.")
def plot_pie(ds, user_id, group_by_column="stresser_main"): """ Args: ds: user_id: group_by_column: """ pdf = ds_to_pdf(ds, user_id) pdf = pdf.groupby(str(group_by_column), as_index=False).agg('count') labels = [] values = [] for index, row in pdf.iterrows(): labels.append(row["stresser_main"]) values.append(row["density"]) trace = go.Pie(labels=labels, values=values) iplot([trace], filename='stresser_pie_chart')
def plot_gps_clusters(ds, user_id: str = None, zoom=10): """ Plots GPS coordinates Args: ds (DataStream): datastream object user_id (str): uuid of a user zoom: min 0 and max 100, zoom map """ pdf = ds_to_pdf(ds, user_id) center = None for index, row in pdf.iterrows(): if center is None: center = [row["latitude"], row["longitude"]] m = folium.Map(location=center, zoom_start=zoom) folium.Marker(location=(row["latitude"], row["longitude"])).add_to(m) return m
def plot_hist(ds, user_id: str, x_axis_column=None): """ histogram plot of timeseries data Args: ds (DataStream): user_id (str): uuid of a user x_axis_column (str): x axis column of the plot """ pdf = ds_to_pdf(ds, user_id) cf.set_config_file(offline=True, world_readable=True, theme='ggplot') init_notebook_mode(connected=True) pdf = _remove_cols(pdf) if x_axis_column: data = [go.Histogram(x=pdf[str(x_axis_column)])] iplot(data, filename='basic histogram') else: pdf.iplot(kind='histogram', filename='basic histogram')
def plot_timeseries(ds: DataStream, user_id: str, y_axis_column: str = None): """ line plot of timeseries data Args: ds (DataStream): user_id (str): uuid of a user y_axis_column (str): x axis column is hard coded as timestamp column. only y-axis can be passed as a param """ pdf = ds_to_pdf(ds, user_id) cf.set_config_file(offline=True, world_readable=True, theme='ggplot') init_notebook_mode(connected=True) ts = pdf['timestamp'] pdf = _remove_cols(pdf) if y_axis_column: data = [go.Scatter(x=ts, y=pdf[str(y_axis_column)])] iplot(data, filename='time-series-plot') else: iplot([{ 'x': ts, 'y': pdf[col], 'name': col } for col in pdf.columns], filename='time-series-plot')