Beispiel #1
0
def confusion_matrix_fig(model, X_train, X_test, y_train, y_test):
    """ Given a classification algorithm, it returns the confusion matrix,
    inspired from plotly-dash plots. """

    # Fit the model
    model.fit(X_train_scaled, y_train)

    # Get the predictions
    y_pred_test = model.predict(X_test_scaled)

    # Create the confusion matrix
    matrix = confusion_matrix(y_true=y_test, y_pred=y_pred_test)

    # Label the true negative, false positive, false negative, and true positive
    tn, fp, fn, tp = matrix.ravel()

    # Plot parameters
    values = [tp, fn, fp, tn]

    label_text = [
        "True Positive", "False Negative", "False Positive", "True Negative"
    ]

    labels = ["TP", "FN", "FP", "TN"]

    blue = cl.flipper()["seq"]["9"]["Blues"]
    red = cl.flipper()["seq"]["9"]["Reds"]

    colors = ["#13c6e9", blue[1], "#ff916d", "#ff744c"]

    # Create the trace of the pie chart
    trace0 = go.Pie(
        labels=label_text,
        values=values,
        hoverinfo="label+value+percent",
        textinfo="text+value",
        text=labels,
        sort=False,
        marker=dict(colors=colors),
        insidetextfont={"color": "white"},
        rotation=90,
    )
    # Layout parameters
    layout = go.Layout(
        # title = "Confusion Matrix",
        margin=dict(l=50, r=50, t=100, b=10),
        legend=dict(bgcolor="#282b38",
                    font={"color": "#a5b1cd"},
                    orientation="h"),
        plot_bgcolor="#282b38",
        paper_bgcolor="#282b38",
        font=dict(color="#a5b1cd", size=18),
        title_font=dict(size=22),
        width=500,
        height=500,
    )
    # Plug in our parameters above to the plotly go figure objects to create our plots
    data = [trace0]
    confusion_matrix_figure = go.Figure(data=data, layout=layout)
    return confusion_matrix_figure
Beispiel #2
0
def serve_pie_confusion_matrix(model, X_test, y_test, Z, threshold):
    # Compute threshold
    scaled_threshold = threshold * (Z.max() - Z.min()) + Z.min()
    y_pred_test = (model.decision_function(X_test) >
                   scaled_threshold).astype(int)

    matrix = metrics.confusion_matrix(y_true=y_test, y_pred=y_pred_test)
    tn, fp, fn, tp = matrix.ravel()

    values = [tp, fn, fp, tn]
    label_text = [
        "True Positive", "False Negative", "False Positive", "True Negative"
    ]
    labels = ["TP", "FN", "FP", "TN"]
    blue = cl.flipper()['seq']['9']['Blues']
    red = cl.flipper()['seq']['9']['Reds']
    colors = [blue[4], blue[1], red[1], red[4]]

    trace0 = go.Pie(labels=label_text,
                    values=values,
                    hoverinfo='label+value+percent',
                    textinfo='text+value',
                    text=labels,
                    sort=False,
                    marker=dict(colors=colors))

    layout = go.Layout(title=f'Confusion Matrix',
                       margin=dict(l=10, r=10, t=60, b=10),
                       legend=dict(bgcolor='rgba(255,255,255,0)',
                                   orientation='h'))

    data = [trace0]
    figure = go.Figure(data=data, layout=layout)

    return figure
Beispiel #3
0
def plotAnswerIDCount(submodel):

    data = []
    yrs = list(range(1,16))
    clrs = ['Greens','RdPu','Blues','YlOrRd','Purples','Reds', 'Greys']
    i = 0
    
    #Get mean AnswerID count for number of years electrified
    for c in submodel['class'].unique():
        selectdata = submodel[submodel['class']==c][['YearsElectrified',
                         'AnswerID_count']].groupby('YearsElectrified').mean().applymap(
                         lambda x: ceil(x))
        t = selectdata.reindex(yrs, fill_value=0).reset_index()
        
        trace = go.Bar(
                x=yrs,
                y=t['AnswerID_count'],
                name=c,
                marker=dict(color=cl.flipper()['seq']['3'][clrs[i]][1])
                )
        data.append(trace)
        i+=1
    
    layout = go.Layout(
                barmode='stack',
                title = 'Number of AnswerIDs inferred for each customer class for 1 - 15+ years after electrification',
                xaxis = dict(title='Years Electrified',
                                tickvals = yrs),
                yaxis = dict(title='AnswerID count'),
                margin = dict(t=100,r=150,b=50,l=150))
    
    fig = go.Figure(data=data, layout=layout)

    return offline.iplot(fig, filename=os.path.join(image_dir,'answer_id_count'+'.png'))
Beispiel #4
0
def serve_pie_confusion_matrix(model, X_test, y_test, Z, threshold):
    # Compute threshold
    scaled_threshold = threshold * (Z.max() - Z.min()) + Z.min()
    y_pred_test = (model.predict_proba(X_test) > scaled_threshold).astype(int)
    if len(y_pred_test.shape) == 2:
        y_pred_test = y_pred_test[:, 1]

    matrix = metrics.confusion_matrix(y_true=y_test, y_pred=y_pred_test)
    tn, fp, fn, tp = matrix.ravel()

    values = [tp, fn, fp, tn]
    label_text = [
        "True Positive", "False Negative", "False Positive", "True Negative"
    ]
    labels = ["TP", "FN", "FP", "TN"]
    blue = cl.flipper()["seq"]["9"]["Blues"]
    red = cl.flipper()["seq"]["9"]["Reds"]
    colors = ["#13c6e9", blue[1], "#ff916d", "#ff744c"]

    trace0 = go.Pie(
        labels=label_text,
        values=values,
        hoverinfo="label+value+percent",
        textinfo="text+value",
        text=labels,
        sort=False,
        marker=dict(colors=colors),
        insidetextfont={"color": "white"},
        rotation=90,
    )

    layout = go.Layout(
        title="Confusion Matrix",
        margin=dict(l=50, r=50, t=100, b=10),
        legend=dict(bgcolor="#282b38",
                    font={"color": "#a5b1cd"},
                    orientation="h"),
        plot_bgcolor="#282b38",
        paper_bgcolor="#282b38",
        font={"color": "#a5b1cd"},
    )

    data = [trace0]
    figure = go.Figure(data=data, layout=layout)

    return figure
Beispiel #5
0
def serve_pie_confusion_matrix(model, X_test, y_test, Z, threshold):
    # Compute threshold
    scaled_threshold = threshold * (Z.max() - Z.min()) + Z.min()
    y_pred_test = (model.decision_function(X_test) > scaled_threshold).astype(int)

    matrix = metrics.confusion_matrix(y_true=y_test, y_pred=y_pred_test)
    tn, fp, fn, tp = matrix.ravel()

    values = [tp, fn, fp, tn]
    label_text = ["Low probability", "Low probability points in red area", "High probability points in blue area", "High probability"]
    labels = ["LP", "BinH", "HinB", "HP"]
    blue = cl.flipper()["seq"]["9"]["Blues"]
    red = cl.flipper()["seq"]["9"]["Reds"]
    colors = ["#13c6e9", blue[1], "#ff916d", "#ff744c"]

    trace0 = go.Pie(
        labels=label_text,
        values=values,
        hoverinfo="label+value+percent",
        textinfo="text+value",
        text=labels,
        sort=False,
        marker=dict(colors=colors),
        insidetextfont={"color": "white"},
        rotation=90,
    )

    layout = go.Layout(
        title="Existence Ratio",
        margin=dict(l=50, r=50, t=100, b=10),
        legend=dict(bgcolor="#282b38", font={"color": "#a5b1cd"}, orientation="h"),
        plot_bgcolor="#282b38",
        paper_bgcolor="#282b38",
        font={"color": "#a5b1cd"},
    )

    data = [trace0]
    figure = go.Figure(data=data, layout=layout)

    return figure
Beispiel #6
0
    def plot_pie(self):

        cnf_matrix = confusion_matrix(self.y_test, self.y_pred)

        FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix)
        FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix)
        TP = np.diag(cnf_matrix)
        TN = cnf_matrix.sum() - (FP + FN + TP)
        fp = FP.astype(float).sum()
        fn = FN.astype(float).sum()
        tp = TP.astype(float).sum()
        tn = TN.astype(float).sum()

        label_text = [
            "True Positive", "False Negative", "False Positive",
            "True Negative"
        ]
        labels = ["TP", "FN", "FP", "TN"]
        blue = cl.flipper()['seq']['9']['Blues']
        red = cl.flipper()['seq']['9']['Reds']
        colors = [blue[4], blue[1], red[1], red[4]]

        trace0 = go.Pie(labels=label_text,
                        values=[tp, fn, fp, tn],
                        hoverinfo='label+value+percent',
                        textinfo='text+value',
                        text=labels,
                        sort=False,
                        marker=dict(colors=colors))

        layout = go.Layout(title=f'TP, TN, FP, FN',
                           margin=dict(l=10, r=10, t=60, b=10),
                           legend=dict(bgcolor='rgba(255,255,255,0)',
                                       orientation='h'))

        data = [trace0]
        figure = go.Figure(data=data, layout=layout)
        return figure
Beispiel #7
0
def plotMaxDemandSpread(md):

    table = pd.pivot_table(md, values='Unitsread_kw', index=['month','hour'],aggfunc='count')
    table.reset_index(inplace=True)
    
    data = [go.Heatmap(
        x=table['month'],
        y=table['hour'], 
        z = table['Unitsread_kw'],
        colorscale=[[0.0, cl.flipper()['seq']['3']['Oranges'][0]],
                    [1.0, cl.flipper()['seq']['3']['Oranges'][-1]]]
        )]
    
    layout = go.Layout(
                title = 'Spread of occurence of maximum demand for all households',
                xaxis = dict(title='month',
                                tickvals = list(range(1,13))),
                yaxis = dict(title='hour',
                             tickvals = list(range(1,25)))
                )
                
    fig = go.Figure(data=data, layout=layout)

    return offline.iplot(fig, filename=os.path.join(image_dir,'max-demand-spread'+'.png'))
Beispiel #8
0
def plot15YearBmDemandSummary(model_dir=dpet_dir):
    """
    This function plots the average monthly energy consumption for all customer classes from
    1 to 15 years since electrification. Data is based on the DPET model.
    """

    clrs = ['Greens', 'RdPu', 'Blues', 'YlOrRd', 'Purples', 'Reds', 'Greys']

    summary = bmDemandSummary(model_dir)
    df = summary[['class', 'YearsElectrified',
                  'Energy [kWh]']].sort_values(by='Energy [kWh]')
    data = []

    count = 0
    for c in df['class'].unique():

        trace = go.Scatter(x=df.loc[df['class'] == c, 'YearsElectrified'],
                           y=df.loc[df['class'] == c, 'Energy [kWh]'],
                           name=c,
                           fill='tonexty',
                           mode='lines',
                           line=dict(
                               color=cl.flipper()['seq']['3'][clrs[count]][1],
                               width=3))
        data.append(trace)
        count += 1

    layout = go.Layout(
        title=
        'Annualised Monthly Energy Consumption for Domestic Energy Consumers',
        xaxis=dict(title='years since electrification',
                   tickfont=dict(size=14, color='rgb(107, 107, 107)')),
        yaxis=dict(title='average annual kWh/month',
                   titlefont=dict(size=16, color='rgb(107, 107, 107)')),
    )

    return offline.iplot({
        "data": data,
        "layout": layout
    },
                         filename=os.path.join(
                             image_dir, '15year_demand_summary' + '.png'))
Beispiel #9
0
def plotClassDist(year, class_dir):
    """
    This function plots the probability distribution over all the inferred classes for all the AnswerIDs 
    in a given year.
    """
    colors = cl.flipper()['div']['5']['RdGy']
    scl = [[0, colors[2]], [0.25, colors[3]], [0.5, colors[4]],
           [0.75, colors[1]], [1, colors[0]]]

    df = readClasses(year, class_dir)
    melt = df.reset_index().melt(id_vars='AnswerID')
    melt['tixnames'] = melt.apply(
        lambda x: 'AnswerID: ' + str(x['AnswerID']) + '<br />class: ' + x[
            'variable'] + '<br />likelihood: ' + "{0:.3f}".format(x['value']),
        axis=1)
    trace = go.Heatmap(z=melt.value,
                       x=melt.AnswerID,
                       y=melt.variable,
                       colorscale=scl,
                       colorbar=dict(title='likelihood'),
                       text=melt['tixnames'],
                       hoverinfo='text')

    data = [trace]
    layout = go.Layout(
        title='Probability Distribution of Customer Classes for ' + str(year),
        xaxis=dict(
            title='household IDs',
            type='category',
            showticklabels=False,
            ticks='',
            showline=True,
        ),
        yaxis=dict(
            type='category',
            showline=True,
        ),
        margin=go.Margin(l=175, r=75, b=50, t=100))

    return offline.iplot({"data": data, "layout": layout})
Beispiel #10
0
def bar_chart(series: pd.Series, colors: str = 'Blues', **kwargs):
    """
    Returns a bar chart from a pd.Series with:
        x-axis defined by the index
        y-values determined by the values

    Args:
        series: pd.Series
        colors: color-scale from the colorlover package. Argument should be taken from available
        token in cl.flipper()['seq']
        **kwargs: keyworded arguments that will be passed to go.Layout

    Returns: go.Figure object that can be displayed, saved, etc

    """
    tmp = series.copy()
    data = [go.Bar(
        x=tmp.index,
        y=tmp.values,
        marker=dict(color=cl.flipper()['seq']['3'][colors][-1])
    )]
    layout = go.Layout(**kwargs)
    return go.Figure(data, layout)
Beispiel #11
0
 def test_flipper(self):
     flipped = cl.flipper()['div']['3']['RdYlBu']
     self.assertEqual(
         flipped,
         ['rgb(252,141,89)', 'rgb(255,255,191)', 'rgb(145,191,219)'])
Beispiel #12
0
def plotClassYearRange(yearstart, yearend, class_dir):
    """
    This function creates subplots of the probability distribution over all the inferred classes 
    for a range of years.
    """
    colors = cl.flipper()['div']['5']['RdGy']
    scl = [[0, colors[2]], [0.25, colors[3]], [0.5, colors[4]],
           [0.75, colors[1]], [1, colors[0]]]

    ncol = 3
    nplots = yearend - yearstart + 1
    nrow = int(ceil(nplots / ncol))

    fig = tools.make_subplots(rows=nrow,
                              cols=int(ncol),
                              subplot_titles=list(range(
                                  yearstart, yearend + 1)),
                              print_grid=False)

    r = 1
    c = 1

    for y in range(yearstart, yearend + 1):
        if c == ncol + 1:
            c = 1
        ro = int(ceil(r / ncol))

        if r == 1:  #toggle colorscale
            scl_switch = True
        else:
            scl_switch = False

        try:
            df = readClasses(y, class_dir)
            melt = df.reset_index().melt(id_vars='AnswerID')
            melt['tixnames'] = melt.apply(
                lambda x: 'AnswerID: ' + str(x['AnswerID']) + '<br />class: ' +
                x['variable'] + '<br />likelihood: ' + "{0:.3f}".format(x[
                    'value']),
                axis=1)
            trace = go.Heatmap(z=melt.value,
                               x=melt.AnswerID,
                               y=melt.variable,
                               text=melt['tixnames'],
                               hoverinfo='text',
                               colorscale=scl,
                               showscale=scl_switch,
                               colorbar=dict(title='likelihood',
                                             len=0.5,
                                             yanchor='bottom'))
            fig.append_trace(trace, ro, c)

        except:
            pass

        c += 1
        r += 1

    fig['layout'].update(
        showlegend=False,
        title='Probability Distribution of Customer Classes from' +
        str(yearstart) + '-' + str(yearend),
        height=350 + 300 * (nrow - 1),
        margin=dict(l=140))

    for k in np.arange(1, yearend + 1, 3):
        fig['layout'].update({
            'yaxis{}'.format(k):
            go.YAxis(type='category', showline=True),
            'xaxis{}'.format(k):
            go.XAxis(  #title = 'household IDs', 
                type='category',
                showticklabels=False,
                ticks='',
                showline=True)
        })

    for k in np.setdiff1d(np.arange(1, 8), np.arange(1, 8, 3)):
        fig['layout'].update({
            'yaxis{}'.format(k):
            go.YAxis(showticklabels=False, ticks='', showline=True),
            'xaxis{}'.format(k):
            go.XAxis(  #title = 'household IDs', 
                type='category',
                showticklabels=False,
                ticks='',
                showline=True)
        })

    return offline.iplot(fig)
Beispiel #13
0
    def set_fontcolor(self,
                      by='scores',
                      colorscale='YlOrRd',
                      custom_colors=None):
        """
        This function can be used to pick a metric which decides the font color
        for each extracted keyword. By default, the font color is assigned 
        based on the score of each keyword.
        
        Fonts can be picked by: 'random', 'scores', 'pos_tag', 'clustering_criteria'
        
        You can also choose custom font colors by passing in a list of 
        (R,G,B) tuples with values for each component falling in [0,255].
        
        Parameters
        ----------
        
        by : str or None, optional
            The metric used to assign font sizes. Can be None if custom colors 
            are being used
        colorscale: str or None, optional
            One of [Greys, YlGnBu, Greens, YlOrRd, Bluered, RdBu, Reds, Blues].
            When by=='scores', this will be used to determine the colorscale.
        custom_colors : list of 3-tuple, optional
            A list of RGB tuples. Each tuple corresponding to the color of
            a keyword.
            
        Returns
        -------
        None
        """
        if custom_colors is not None:
            assert len(custom_colors) == len(self.keywords)
            if isinstance(custom_colors[0], str):
                self.fontcolors = custom_colors
            else:
                self.fontcolors = []
                for rgb in custom_colors:
                    assert len(rgb) == 3
                    self.fontcolors.append('rgb' + str(rgb))

        elif by == 'random':
            tone = np.random.choice(list(cl.flipper()['seq']['3'].keys()))
            self.fontcolors = np.random.choice(list(cl.flipper()['seq']\
                                                    ['3'][tone]),
                                                    len(self.keywords))

        elif by == 'scores':

            scales = {**cl.scales['8']['div'], **cl.scales['8']['seq']}
            #Even though, currently all colorscales in 'scales.keys()' can be
            #used, only the ones listed in the doc can be used for creating a
            #colorbar in the plotly plot

            assert colorscale in [
                'Greys', 'YlGnBu', 'Greens', 'YlOrRd', 'Bluered', 'RdBu',
                'Reds', 'Blues'
            ]
            colors = scales[colorscale].copy()
            colors.reverse()

            #The keywords are binned based on their scores
            mn, mx = self.scores.min(), self.scores.max()
            bins = np.linspace(mn, mx, 8)
            indices = np.digitize(self.scores, bins) - 1

            self.fontcolors = [colors[i] for i in indices]

        elif by == 'pos_tag':
            c = cl.scales['5']['qual']['Set2'] + [
                'rgb(254,254,254)', 'rgb(254,254,254)'
            ]
            tags = ['NOUN', 'PROPN', 'ADJ', 'VERB', 'ADV', 'SYM', 'ADP']
            mapping = {tag: c[i] for i, tag in enumerate(tags)}
            self.fontcolors = list(map(mapping.get, self.pos_tags))

        elif by == 'clustering_criteria':
            mds = MDS(3, dissimilarity='precomputed').\
                                 fit_transform(self.similarity_matrix)
            mds = mds - mds.min()
            mds = mds * 205 / mds.max() + 50
            self.fontcolors = ['rgb' + str(tuple(rgb)) for rgb in mds]

        else:
            raise ValueError()

        #raise flag to indicate that the fontcolors have been modified
        self._flag_fontcolors = True
Beispiel #14
0
# Change COMPLNT_FR_TM to datetime.time type
data.CMPLNT_FR_TM = pd.to_datetime(data.CMPLNT_FR_TM, format='%H:%M:%S').dt.time
type(data.CMPLNT_FR_TM[0])

data = pd.read_csv('/content/gdrive/My Drive/Colab Notebooks/Criminal_Prediction/NYC_Crime_sampling.csv')

data = data.drop(columns=['Unnamed: 0'])

print('We have total {} observations and {} variables.'.format(data.shape[0], data.shape[1]))
print("\nHere are first 5 row of our sample data.\n")
data.head()

import colorlover as cl
from IPython.display import HTML
HTML(cl.to_html( cl.flipper()['seq']['3'] ))

colors = cl.scales['9']['seq']['GnBu']
print('Color we chose in this notebook:\n')
HTML(cl.to_html(colors))

# Visulize number of crimes in NYC Boroughs
data.BORO_NM.value_counts().sort_index()

import plotly.graph_objs as go
import plotly.offline as py

trace1 = go.Bar(x = data.BORO_NM.value_counts().sort_index().index, 
                y = data.BORO_NM[data.LAW_CAT_CD == 'MISDEMEANOR'].value_counts().sort_index(),
                name = 'MISDEMEANOR', 
                text = (data.BORO_NM[data.LAW_CAT_CD == 'MISDEMEANOR'].value_counts()/data.BORO_NM.value_counts()).apply(lambda x: format(x, '.2%')).sort_index(),
Beispiel #15
0
def dic(c, small_location):

    my_path = os.path.abspath(os.path.dirname(__file__))
    path = os.path.join(my_path, "../input_fields.csv")
    path_in = os.path.join(my_path, "../data/ratings/")

    input_fields = pd.read_csv(path)

    code = input_fields["code_or_ticker"]

    glassdoor = pd.read_csv(path_in + c + "_gdoor_employee_rate.csv")
    glassdoor_m = pd.read_csv(path_in + c + "_gdoor_mgmt_rate.csv")
    df_tick = pd.read_csv(path_in + c + "_stock_rate.csv")
    yelp = pd.read_csv(path_in + "all_yelps_rates_" + c + ".csv")

    c_corr = input_fields[input_fields["code_or_ticker"] ==
                          code]["ticker"].reset_index(drop=True)[0]

    #
    trace_emp = go.Scatter(x=glassdoor["Review Date"],
                           y=glassdoor["Final_Rating"],
                           name="Employees Sentiment",
                           line=dict(color='#17BECF'),
                           opacity=0.8)

    trace_wlb = go.Scatter(x=glassdoor["Review Date"],
                           y=glassdoor["Final_Work Life Balance"],
                           name="Work Life Balance",
                           line=dict(color='#17BECF'),
                           legendgroup='Employees',
                           opacity=0.2)

    trace_cva = go.Scatter(x=glassdoor["Review Date"],
                           y=glassdoor["Final_Culture Values"],
                           name="Culture Values",
                           line=dict(color='#17BECF'),
                           legendgroup='Employees',
                           opacity=0.2)

    trace_cop = go.Scatter(x=glassdoor["Review Date"],
                           y=glassdoor["Final_Career Opportunities"],
                           name="Career Opportunities",
                           line=dict(color='#17BECF'),
                           legendgroup='Employees',
                           opacity=0.2)

    trace_cbe = go.Scatter(x=glassdoor["Review Date"],
                           y=glassdoor["Final_Comp Benefits"],
                           name="Comp Benefits",
                           line=dict(color='#17BECF'),
                           legendgroup='Employees',
                           opacity=0.2)

    trace_sma = go.Scatter(x=glassdoor["Review Date"],
                           y=glassdoor["Final_Senior Management"],
                           name="Management Competence",
                           line=dict(color='#17BECF'),
                           legendgroup='Employees',
                           opacity=0.2)

    trace_mse = go.Scatter(x=glassdoor_m["date"],
                           y=glassdoor_m["trace_mse"],
                           name="Management Sentiment",
                           line=dict(color='green'),
                           opacity=0.8)

    ###

    trace_mwlb = go.Scatter(x=glassdoor_m["date"],
                            y=glassdoor_m["trace_mwlb"],
                            name="Work Life Balance",
                            line=dict(color='#17BECF'),
                            legendgroup='Employees',
                            opacity=0.2)

    trace_mcva = go.Scatter(x=glassdoor_m["date"],
                            y=glassdoor_m["trace_mcva"],
                            name="Culture Values",
                            line=dict(color='#17BECF'),
                            legendgroup='Employees',
                            opacity=0.2)

    trace_mcop = go.Scatter(x=glassdoor_m["date"],
                            y=glassdoor_m["trace_mcop"],
                            name="Career Opportunities",
                            line=dict(color='#17BECF'),
                            legendgroup='Employees',
                            opacity=0.2)

    trace_mcbe = go.Scatter(x=glassdoor_m["date"],
                            y=glassdoor_m["trace_mcbe"],
                            name="Comp Benefits",
                            line=dict(color='#17BECF'),
                            legendgroup='Employees',
                            opacity=0.2)

    trace_msma = go.Scatter(x=glassdoor_m["date"],
                            y=glassdoor_m["trace_msma"],
                            name="Upper Management Competence",
                            line=dict(color='#17BECF'),
                            legendgroup='Employees',
                            opacity=0.2)

    ###

    trace_sto = go.Scatter(x=df_tick["date"],
                           y=df_tick["close"],
                           name="Stock Price",
                           line=dict(color='#7F7F7F'),
                           opacity=1)

    trace_cus = go.Scatter(x=yelp["date"],
                           y=yelp["all"],
                           name="Customer Sentiment",
                           line=dict(color="orange"),
                           opacity=0.8)

    my_path = os.path.abspath(os.path.dirname(__file__))
    path_in_search = os.path.join(my_path[:-7] + "/data/search/")

    # Google Search #

    #search_df = pd.read_csv("/Users/dereksnow/crc-status-dash/data/search/correlate-"+c+".csv")
    print(path_in_search)

    search_df = pd.read_csv(path_in_search + "correlate-" + c_corr + ".csv")
    rat = pd.read_csv(path_in_search + "rat_search.csv")
    search = []

    import colorlover as cl

    daf = ["red", "green", "blue", "violet", "purple", "grey"]

    search_dandas = pd.read_csv(
        path_in_search + "searches_BRJI_dandas.csv"
    )  #  This has to be changed for new categories.

    trace_search_all = go.Scatter(x=search_dandas["date"],
                                  y=search_dandas.sum(axis=1) /
                                  (len(search_dandas.columns) - 1),
                                  name="Search Sentiment",
                                  opacity=0.8)

    rit = -1
    for col in search_dandas.drop(["date"], axis=1).columns:
        rit = rit + 1
        trace = go.Scatter(x=search_dandas["date"],
                           y=search_dandas[col],
                           line=dict(color=daf[rit]),
                           name=col,
                           legendgroup=col,
                           opacity=0.8)
        search.append(trace)
    #print(rat)#

    color_dict = {}
    sam = -1
    for i in ["Reds", "Greens", "Blues", "PuRd", "Purples", "Greys"]:
        sam = sam + 1
        dan = cl.flipper()['seq'][str(
            rat.groupby("type").count().max()[0] + 1)][i]
        color_dict[sam] = dan

    for col in search_df.drop(["Date"], axis=1).columns:
        tio = -1
        for g in rat["type"].unique():
            tio = tio + 1
            ban = daf[tio]
            if col in rat[rat["type"] == g]["0"].values:
                trace = go.Scatter(x=search_df["Date"],
                                   y=search_df[col],
                                   line=dict(color=ban),
                                   name=col,
                                   legendgroup=g,
                                   opacity=0.05)
                search.append(trace)

    #dat = pd.read_csv("all_yelps_rates.csv")

    yelp["new"] = yelp["all"]**(1 * (np.sqrt(
        np.abs(np.log(np.abs(yelp["all"].iloc[-1] - yelp["all"].iloc[1]))))**
                                     3.5)) / 10000

    yelp["new"] = (yelp["all"].iloc[-1] / yelp["new"].iloc[-1]) * yelp["new"]

    multiplier = (glassdoor_m["trace_mse"].tail(1)
                  ).values[0] / yelp["new"].tail(1).values[0]

    yelp["new"] = yelp["new"] * multiplier

    dat = yelp

    yep = []

    trace_all_yelp = go.Scatter(x=dat["date"],
                                y=dat["new"],
                                line=dict(color='orange'),
                                name="Customer Sentiment Avg.",
                                legendgroup="yelps",
                                opacity=0.8)
    yep.append(trace_all_yelp)
    for col in dat.drop(["date", "all"], axis=1).columns:
        if col.lower() == small_location.lower():
            trace = go.Scatter(x=dat["date"],
                               y=dat[col],
                               line=dict(color='orange'),
                               name=col,
                               legendgroup="yelps",
                               opacity=0.50)
            yep.append(trace)

    #df_rick = df_tick[df_tick["date"]<search_dandas["date"].max()]
    df_rick = df_tick
    trace_stock = go.Scatter(x=df_rick["date"],
                             y=df_rick["close"],
                             name="Stock",
                             line=dict(color='#7F7F7F'),
                             opacity=1)

    search.append(trace_stock)
    yep.append(trace_sto)

    # now do the api call####

    data = [trace_sto, trace_emp, trace_mse, trace_all_yelp]

    layout = dict(
        margin=dict(
            t=20,
            b=15,
            #r=0,
            #l=30
        ),
        xaxis=dict(rangeselector=dict(buttons=list([
            dict(count=1, label='1m', step='month', stepmode='backward'),
            dict(count=6, label='6m', step='month', stepmode='backward'),
            dict(step='all')
        ])),
                   rangeslider=dict(),
                   type='date'),
        hovermode="closest")

    fig_overall = dict(data=data, layout=layout)
    #py.iplot(fig, filename = "Time Series with Rangeslider")####

    fig_search = dict(data=search, layout=layout)

    emp_data = [
        trace_sto, trace_emp, trace_wlb, trace_cop, trace_cbe, trace_sma
    ]

    fig_emp = dict(data=emp_data, layout=layout)

    mgm_data = [
        trace_sto, trace_mse, trace_mwlb, trace_mcop, trace_mcbe, trace_msma
    ]

    fig_mgm = dict(data=mgm_data, layout=layout)

    #
    share_data = [trace_sto]

    fig_sha = dict(data=share_data, layout=layout)

    fig_cus = dict(data=yep, layout=layout)

    # Used elsewhere

    from scipy import signal
    glassdoor["ben_smooth"] = signal.savgol_filter(
        glassdoor["Final_Comp Benefits"], 199, 3)

    trace_cbe_smoothed = go.Scatter(x=glassdoor["Review Date"],
                                    y=glassdoor["ben_smooth"],
                                    name="Smoothed",
                                    showlegend=False,
                                    legendgroup='Employees',
                                    opacity=0.8)

    trace_cbe_new = go.Scatter(x=glassdoor["Review Date"],
                               y=glassdoor["Final_Comp Benefits"],
                               name="Benefits",
                               legendgroup='Employees',
                               showlegend=False,
                               opacity=0.8)

    tal = -1
    for c in code:
        tal = tal + 1
        glassdoor = pd.read_csv(path_in + c + "_gdoor_employee_rate.csv")
        glassdoor[c] = signal.savgol_filter(glassdoor["Final_Comp Benefits"],
                                            199, 3)
        glassdoor["Review Date"] = pd.to_datetime(glassdoor["Review Date"],
                                                  infer_datetime_format=True)
        if tal == 0:
            full = glassdoor[[c, "Review Date"]].set_index("Review Date")
        else:
            full = pd.merge(full,
                            glassdoor[[c, "Review Date"
                                       ]].set_index("Review Date"),
                            left_index=True,
                            right_index=True,
                            how="outer")

    full = full.fillna(method="bfill")
    full = full.fillna(method="ffill").reset_index()

    full["Inds"] = full.mean(axis=1)

    trace_cbe_smoothed_all = go.Scatter(x=full["Review Date"],
                                        y=full["Inds"],
                                        name="Smoothed Bench",
                                        showlegend=False,
                                        legendgroup='Employees',
                                        opacity=0.8)

    fig_ben = dict(
        data=[trace_cbe_new, trace_cbe_smoothed, trace_cbe_smoothed_all],
        layout=layout)

    #path_in = os.path.join(my_path, "../data/ratings/")

    d = {}
    d["fig_overall"] = fig_overall
    d["fig_search"] = fig_search
    d["fig_emp"] = fig_emp
    d["fig_mgm"] = fig_mgm
    d["fig_sha"] = fig_sha
    d["fig_cus"] = fig_cus
    d["fig_ben"] = fig_ben

    return d
Beispiel #16
0
        results_dir,
        f'mppdc_price_change_deviation_case_transition_year_{transition_year}.pickle',
        'YEAR_CUMULATIVE_SCHEME_REVENUE',
        stage='stage_3_price_targeting',
        iteration='max')
    v_heuristic = analysis.extract_results(
        results_dir,
        f'heuristic_price_change_deviation_case_transition_year_{transition_year}.pickle',
        'YEAR_CUMULATIVE_SCHEME_REVENUE',
        stage='stage_3_price_targeting',
        iteration='max',
        model='primal')

    # Create figures
    c = cl.to_numeric(
        cl.flipper()['qual']['5']['Set1']
    )  # ['Accent', 'Dark2', 'Paired', 'Pastel1', 'Pastel2', 'Set1', 'Set2', 'Set3'])
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2)

    # Average prices
    # --------------
    # Dash spacing
    p_y = 10
    p_z = 0.05
    p_dash_length = p_y / (3 + (3 * p_z))
    p_dash_spacer = p_z * p_dash_length
    p_dash_spacing = (2 * p_dash_length) + (3 * p_dash_spacer)

    # p_t1 = (p_dash_spacer*6.5, (p_dash_length, p_dash_spacing))
    # p_t2 = (p_dash_length + (2 * p_dash_spacer), (p_dash_length, p_dash_spacing))
    # p_t3 = ((2 * p_dash_length) + (3 * p_dash_spacer), (p_dash_length, p_dash_spacing))
Beispiel #17
0
 def test_flipper(self):
     flipped = cl.flipper()['div']['3']['RdYlBu']
     self.assertEqual(
         flipped,
         ['rgb(252,141,89)', 'rgb(255,255,191)', 'rgb(145,191,219)']
     )
Beispiel #18
0
def plot_average_prices(results_dir, output_dir):
    """Plot average prices under different schemes"""

    # Prices from different models
    p_bau = analysis.get_average_prices(results_dir, 'bau_case.pickle', None,
                                        'PRICES', -1)
    p_rep = analysis.get_average_prices(results_dir, 'rep_case.pickle',
                                        'stage_2_rep', 'PRICES', -1)
    p_tax = analysis.get_average_prices(results_dir, 'rep_case.pickle',
                                        'stage_1_carbon_tax', 'PRICES', -1)
    p_price_dev_mppdc = analysis.get_average_prices(
        results_dir, 'mppdc_price_change_deviation_case.pickle',
        'stage_3_price_targeting', 'lamb', 1)
    p_price_dev_heuristic = analysis.get_average_prices(
        results_dir, 'heuristic_price_change_deviation_case.pickle',
        'stage_3_price_targeting', 'PRICES', -1)

    # Create figures
    c = cl.to_numeric(
        cl.flipper()['qual']['5']['Set1']
    )  # ['Accent', 'Dark2', 'Paired', 'Pastel1', 'Pastel2', 'Set1', 'Set2', 'Set3'])
    fig, ax = plt.subplots()
    ax.plot(p_bau.index.tolist(),
            p_bau['average_price_real'].tolist(),
            color=scale_rgb(c[1]),
            alpha=0.7,
            linewidth=0.9)
    ax.plot(p_tax.index.tolist(),
            p_tax['average_price_real'].tolist(),
            color=scale_rgb(c[0]),
            alpha=0.7,
            linewidth=0.9)
    ax.plot(p_rep.index.tolist(),
            p_rep['average_price_real'].tolist(),
            color=scale_rgb(c[2]),
            alpha=0.7,
            linewidth=0.9)
    ax.plot(p_price_dev_mppdc.index.tolist(),
            p_price_dev_mppdc['average_price_real'].tolist(),
            color=scale_rgb(c[3]),
            alpha=0.7,
            linewidth=0.9)
    ax.plot(p_price_dev_heuristic.index.tolist(),
            p_price_dev_heuristic['average_price_real'].tolist(),
            color=scale_rgb(c[4]),
            alpha=0.6,
            linewidth=0.9)

    fig.set_size_inches(3, 2.3)

    ax.set_ylabel('Average price ($/MWh)', fontsize=9, labelpad=-0.1)
    ax.set_xlabel('Year', fontsize=9)
    ax.tick_params(labelsize=8)
    ax.xaxis.set_major_locator(MultipleLocator(5))
    ax.xaxis.set_minor_locator(MultipleLocator(1))
    ax.yaxis.set_major_locator(MultipleLocator(20))
    ax.yaxis.set_minor_locator(MultipleLocator(5))

    ax.legend(['BAU', 'Tax', 'REP', 'MPPDC', 'Heuristic'],
              fontsize=7,
              ncol=2,
              frameon=False)
    fig.subplots_adjust(left=0.16, bottom=0.18, top=0.98, right=0.98)
    fig.savefig(os.path.join(output_dir, 'average_prices.png'))
    fig.savefig(os.path.join(output_dir, 'average_prices.pdf'))

    plt.show()
Beispiel #19
0
def dic(c):

    my_path = os.path.abspath(os.path.dirname(__file__))
    path = os.path.join(my_path, "../input_fields.csv")
    path_in = os.path.join(my_path, "../data/ratings/")

    input_fields = pd.read_csv(path)

    code = input_fields["code_or_ticker"]

    glassdoor = pd.read_csv(path_in + c + "_gdoor_employee_rate.csv")
    glassdoor_m = pd.read_csv(path_in + c + "_gdoor_mgmt_rate.csv")
    df_tick = pd.read_csv(path_in + c + "_stock_rate.csv")
    yelp = pd.read_csv(path_in + "all_yelps_rates_" + c + ".csv")

    #
    trace_emp = go.Scatter(x=glassdoor["Review Date"],
                           y=glassdoor["Final_Rating"],
                           name="Employees Sentiment",
                           line=dict(color='#17BECF'),
                           opacity=0.8)

    trace_wlb = go.Scatter(x=glassdoor["Review Date"],
                           y=glassdoor["Final_Work Life Balance"],
                           name="Work Life Balance",
                           line=dict(color='#17BECF'),
                           legendgroup='Employees',
                           opacity=0.2)

    trace_cva = go.Scatter(x=glassdoor["Review Date"],
                           y=glassdoor["Final_Culture Values"],
                           name="Culture Values",
                           line=dict(color='#17BECF'),
                           legendgroup='Employees',
                           opacity=0.2)

    trace_cop = go.Scatter(x=glassdoor["Review Date"],
                           y=glassdoor["Final_Career Opportunities"],
                           name="Career Opportunities",
                           line=dict(color='#17BECF'),
                           legendgroup='Employees',
                           opacity=0.2)

    trace_cbe = go.Scatter(x=glassdoor["Review Date"],
                           y=glassdoor["Final_Comp Benefits"],
                           name="Comp Benefits",
                           line=dict(color='#17BECF'),
                           legendgroup='Employees',
                           opacity=0.2)

    trace_sma = go.Scatter(x=glassdoor["Review Date"],
                           y=glassdoor["Final_Senior Management"],
                           name="Management Likability",
                           line=dict(color='#17BECF'),
                           legendgroup='Employees',
                           opacity=0.2)

    trace_mse = go.Scatter(x=glassdoor_m["date"],
                           y=glassdoor_m["trace_mse"],
                           name="Management Sentiment",
                           line=dict(color='green'),
                           opacity=0.8)

    ###

    trace_mwlb = go.Scatter(x=glassdoor_m["date"],
                            y=glassdoor_m["trace_mwlb"],
                            name="Work Life Balance",
                            line=dict(color='#17BECF'),
                            legendgroup='Employees',
                            opacity=0.2)

    trace_mcva = go.Scatter(x=glassdoor_m["date"],
                            y=glassdoor_m["trace_mcva"],
                            name="Culture Values",
                            line=dict(color='#17BECF'),
                            legendgroup='Employees',
                            opacity=0.2)

    trace_mcop = go.Scatter(x=glassdoor_m["date"],
                            y=glassdoor_m["trace_mcop"],
                            name="Career Opportunities",
                            line=dict(color='#17BECF'),
                            legendgroup='Employees',
                            opacity=0.2)

    trace_mcbe = go.Scatter(x=glassdoor_m["date"],
                            y=glassdoor_m["trace_mcbe"],
                            name="Comp Benefits",
                            line=dict(color='#17BECF'),
                            legendgroup='Employees',
                            opacity=0.2)

    trace_msma = go.Scatter(x=glassdoor_m["date"],
                            y=glassdoor_m["trace_msma"],
                            name="Management Likability",
                            line=dict(color='#17BECF'),
                            legendgroup='Employees',
                            opacity=0.2)

    ###

    trace_sto = go.Scatter(x=df_tick["date"],
                           y=df_tick["close"],
                           name="Stock Price",
                           line=dict(color='#7F7F7F'),
                           opacity=1)

    trace_cus = go.Scatter(x=yelp["date"],
                           y=yelp["all"],
                           name="Customer Sentiment",
                           line=dict(color="orange"),
                           opacity=0.8)

    my_path = os.path.abspath(os.path.dirname(__file__))
    path_in_search = os.path.join(my_path[:-7] + "/data/search/")

    # Google Search #

    #search_df = pd.read_csv("/Users/dereksnow/crc-status-dash/data/search/correlate-"+c+".csv")
    print(path_in_search)
    search_df = pd.read_csv(path_in_search + "correlate-" + c + ".csv")
    rat = pd.read_csv(path_in_search + "rat_search.csv")
    search = []

    import colorlover as cl

    daf = ["red", "green", "blue", "violet", "purple", "grey"]

    search_dandas = pd.read_csv(
        path_in_search + "searches_BRJI_dandas.csv"
    )  #  This has to be changed for new categories.

    trace_search_all = go.Scatter(x=search_dandas["date"],
                                  y=search_dandas.sum(axis=1) /
                                  (len(search_dandas.columns) - 1),
                                  name="Search Sentiment",
                                  opacity=0.8)

    rit = -1
    for col in search_dandas.drop(["date"], axis=1).columns:
        rit = rit + 1
        trace = go.Scatter(x=search_dandas["date"],
                           y=search_dandas[col],
                           line=dict(color=daf[rit]),
                           name=col,
                           legendgroup=col,
                           opacity=0.8)
        search.append(trace)
    #print(rat)#

    color_dict = {}
    sam = -1
    for i in ["Reds", "Greens", "Blues", "PuRd", "Purples", "Greys"]:
        sam = sam + 1
        dan = cl.flipper()['seq'][str(
            rat.groupby("type").count().max()[0] + 1)][i]
        color_dict[sam] = dan

    for col in search_df.drop(["Date"], axis=1).columns:
        tio = -1
        for g in rat["type"].unique():
            tio = tio + 1
            ban = daf[tio]
            if col in rat[rat["type"] == g]["0"].values:
                trace = go.Scatter(x=search_df["Date"],
                                   y=search_df[col],
                                   line=dict(color=ban),
                                   name=col,
                                   legendgroup=g,
                                   opacity=0.05)
                search.append(trace)

    #dat = pd.read_csv("all_yelps_rates.csv")

    dat = yelp

    yep = []

    trace_all_yelp = go.Scatter(x=dat["date"],
                                y=dat["all"],
                                line=dict(color='orange'),
                                name="Customer Sentiment",
                                legendgroup="yelps",
                                opacity=0.8)
    yep.append(trace_all_yelp)
    for col in dat.drop(["date", "all"], axis=1).columns:
        trace = go.Scatter(x=dat["date"],
                           y=dat[col],
                           line=dict(color='orange'),
                           name=col,
                           legendgroup="yelps",
                           opacity=0.10)
        yep.append(trace)

    df_rick = df_tick[df_tick["date"] < search_dandas["date"].max()]
    trace_stock = go.Scatter(x=df_rick["date"],
                             y=df_rick["close"],
                             name="Stock",
                             line=dict(color='#7F7F7F'),
                             opacity=1)

    search.append(trace_stock)
    yep.append(trace_sto)

    # now do the api call####

    data = [trace_sto, trace_emp, trace_mse, trace_all_yelp, trace_search_all]

    layout = dict(xaxis=dict(rangeselector=dict(buttons=list([
        dict(count=1, label='1m', step='month', stepmode='backward'),
        dict(count=6, label='6m', step='month', stepmode='backward'),
        dict(step='all')
    ])),
                             rangeslider=dict(),
                             type='date'),
                  hovermode="closest")

    fig_overall = dict(data=data, layout=layout)
    #py.iplot(fig, filename = "Time Series with Rangeslider")####

    fig_search = dict(data=search, layout=layout)

    emp_data = [
        trace_sto, trace_emp, trace_wlb, trace_cop, trace_cbe, trace_sma
    ]

    fig_emp = dict(data=emp_data, layout=layout)

    mgm_data = [
        trace_sto, trace_mse, trace_mwlb, trace_mcop, trace_mcbe, trace_msma
    ]

    fig_mgm = dict(data=mgm_data, layout=layout)

    #
    share_data = [trace_sto]

    fig_sha = dict(data=share_data, layout=layout)

    fig_cus = dict(data=yep, layout=layout)

    # Used elsewhere

    from scipy import signal
    glassdoor["ben_smooth"] = signal.savgol_filter(
        glassdoor["Final_Comp Benefits"], 199, 3)

    trace_cbe_smoothed = go.Scatter(x=glassdoor["Review Date"],
                                    y=glassdoor["ben_smooth"],
                                    name="Smoothed",
                                    showlegend=False,
                                    legendgroup='Employees',
                                    opacity=0.8)

    trace_cbe_new = go.Scatter(x=glassdoor["Review Date"],
                               y=glassdoor["Final_Comp Benefits"],
                               name="Benefits",
                               legendgroup='Employees',
                               showlegend=False,
                               opacity=0.8)

    fig_ben = dict(data=[trace_cbe_new, trace_cbe_smoothed], layout=layout)

    d = {}
    d["fig_overall"] = fig_overall
    d["fig_search"] = fig_search
    d["fig_emp"] = fig_emp
    d["fig_mgm"] = fig_mgm
    d["fig_sha"] = fig_sha
    d["fig_cus"] = fig_cus
    d["fig_ben"] = fig_ben

    return d
def show_forecast(X,
                  y,
                  button_name_prefix,
                  graph_name_prefix,
                  chart_name,
                  anomaly=None,
                  timeinterval=None,
                  date_range=None):
    ''' Visualization function
    '''

    colors = [color for color in cl.flipper()['seq']['9'].values()]
    data = defaultdict(list)

    for i, value in enumerate([(key, value) for key, value in y.items()]):
        button, dict_graphs = value
        fact_data = []
        if i == 0:
            ButtonVisible = True
        else:
            ButtonVisible = False

# Аномальные значения
        if anomaly is None:
            anomaly_data = []
        else:
            anomaly_data = [
                go.Scatter(x=[
                    anomaly[button][i], (anomaly[button][i] + timeinterval)[0]
                ],
                           y=[1, 1],
                           fill='tozeroy',
                           fillcolor='rgba(190,127,188,0.5)',
                           line=dict(width=0),
                           mode='none',
                           legendgroup='anomaly',
                           name='anomaly',
                           visible=ButtonVisible,
                           showlegend=True if i == 0 else False)
                for i in range(len(anomaly[button]))
            ]

        # фактические значения
        for j, value in enumerate([(key, value)
                                   for key, value in dict_graphs.items()]):
            graph, list_values = value

            dash = 'longdash'

            if j % 2 == 0:
                dash = 'solid'
            elif j % 3 == 0:
                dash = 'dash'
            elif j % 5 == 0:
                dash = 'dot'

            if (ButtonVisible == True) & (j != 0):
                ButtonVisible = 'legendonly'

            colorpal = random.randint(0, len(colors) - 1)
            colorintensity = random.randint(2, 8)
            fact_data.append(
                go.Scatter(name=graph_name_prefix + str(graph),
                           x=X[button][graph],
                           y=y[button][graph],
                           mode='lines',
                           line=dict(color=colors[colorpal][colorintensity],
                                     dash=dash,
                                     width=2),
                           visible=ButtonVisible))

        data[button] = list(filter(None.__ne__, [*fact_data, *anomaly_data]))

    updatemenus = list([
        dict(type="buttons",
             x=-0.07,
             buttons=list([
                 dict(label=button_name_prefix + str(button),
                      method='update',
                      args=[
                          {
                              'visible':
                              list(
                                  itertools.chain.from_iterable([
                                      ([True] +
                                       (len(y[key]) - 1) * ['legendonly'] +
                                       (len(values) - len(y[key])) * [True]) if
                                      key == button else len(values) * [False]
                                      for key, values in data.items()
                                  ]))
                          },
                      ]) for i, button in enumerate([key for key in y.keys()])
             ]))
    ])

    layout = dict(
        title=chart_name,
        showlegend=True,
        updatemenus=updatemenus,
        xaxis=dict(range=date_range,
                   rangeselector=dict(buttons=list([
                       dict(count=1,
                            label='1d',
                            step='day',
                            stepmode='backward'),
                       dict(count=7,
                            label='1w',
                            step='day',
                            stepmode='backward'),
                       dict(count=1,
                            label='1m',
                            step='month',
                            stepmode='backward'),
                       dict(step='all', stepmode='backward')
                   ]), ),
                   rangeslider=dict(visible=True),
                   type='date'),
        yaxis=dict(ticks='outside', zeroline=False),
    )

    return dict(data=list(
        itertools.chain.from_iterable([value for key, value in data.items()])),
                layout=layout)
Beispiel #21
0
def drawTilePlot(pangenome, output, nocloud=False):
    checkPangenomeInfo(pangenome,
                       needAnnotations=True,
                       needFamilies=True,
                       needGraph=True)
    if pangenome.status["partitionned"] == "No":
        raise Exception(
            "Cannot draw the tile plot as your pangenome has not been partitionned"
        )
    if len(pangenome.organisms) > 500 and nocloud is False:
        logging.getLogger().warning(
            "You asked to draw a tile plot for a lot of organisms (>500). Your browser will probably not be able to open it."
        )
    logging.getLogger().info("Drawing the tile plot...")
    data = []
    all_indexes = []
    all_columns = []
    fam2index = {}
    index2fam = {}
    if nocloud:
        families = {
            fam
            for fam in pangenome.geneFamilies
            if not fam.partition.startswith("C")
        }
    else:
        families = set(pangenome.geneFamilies)
    org_index = pangenome.getIndex()
    index2org = {}
    for org, index in org_index.items():
        index2org[index] = org
    COLORS = {
        "pangenome": "black",
        "exact_accessory": "#EB37ED",
        "exact_core": "#FF2828",
        "soft_core": "#c7c938",
        "soft_accessory": "#996633",
        "shell": "#00D860",
        "persistent": "#F7A507",
        "cloud": "#79DEFF",
        "undefined": "#828282"
    }

    logging.getLogger().info("start with matrice")

    for row, fam in enumerate(families):
        new_col = [org_index[org] for org in fam.organisms]
        all_indexes.extend([row] * len(new_col))
        all_columns.extend(new_col)
        data.extend([1.0] * len(new_col))
        index2fam[row] = fam.name
        fam2index[fam.name] = row

    mat_p_a = csc_matrix((data, (all_indexes, all_columns)),
                         shape=(len(families), len(pangenome.organisms)),
                         dtype='float')
    dist = pdist(1 - jaccard_similarities(mat_p_a, 0).todense())
    hc = linkage(dist, 'single')

    dendro = dendrogram(hc, no_plot=True)
    logging.getLogger().info(
        "done with making the dendrogram to order the organisms on the plot")

    order_organisms = [index2org[index] for index in dendro["leaves"]]

    binary_data = []
    text_data = []
    fam_order = []
    partitions_dict = defaultdict(list)
    shell_subs = set()
    for fam in families:
        partitions_dict[fam.partition].append(fam)
        if fam.partition.startswith("S"):
            shell_subs.add(
                fam.partition
            )  #number of elements will tell the number of subpartitions
    ordered_nodes = []
    ordored_nodes_p = sorted(partitions_dict["P"],
                             key=lambda n: len(n.organisms),
                             reverse=True)
    ordored_nodes_c = sorted(partitions_dict["C"],
                             key=lambda n: len(n.organisms),
                             reverse=True)
    sep_p = len(ordored_nodes_p) - 0.5
    separators = [sep_p]
    shell_NA = None
    if len(shell_subs) == 1:
        ordored_nodes_s = sorted(partitions_dict[shell_subs.pop()],
                                 key=lambda n: len(n.organisms),
                                 reverse=True)
        ordered_nodes = ordored_nodes_p + ordored_nodes_s + ordored_nodes_c
        separators.append(separators[len(separators) - 1] +
                          len(ordored_nodes_s))
        separators.append(separators[len(separators) - 1] +
                          len(ordored_nodes_c))
    else:
        ordered_nodes = ordored_nodes_p
        for subpartition in sorted(shell_subs):
            if subpartition == "S_":
                shell_NA = len(separators) - 1
            ordored_nodes_s = sorted(partitions_dict[subpartition],
                                     key=lambda n: len(n.organisms),
                                     reverse=True)
            ordered_nodes += ordored_nodes_s
            separators.append(separators[len(separators) - 1] +
                              len(ordored_nodes_s))
        ordered_nodes += ordored_nodes_c
        separators.append(separators[len(separators) - 1] +
                          len(ordored_nodes_c))

    logging.getLogger().info(
        "Getting the gene name(s) and the number for each tile of the plot ..."
    )
    for node in ordered_nodes:
        fam_order.append('\u200c' + node.name)
        data = node.organisms
        binary_data.append([
            len(node.getGenesPerOrg(org)) if org in data else numpy.nan
            for org in order_organisms
        ])
        text_data.append([("\n".join(map(str, node.getGenesPerOrg(org))))
                          if org in data else numpy.nan
                          for org in order_organisms])

    xaxis_values = ['\u200c' + org.name for org in order_organisms]

    logging.getLogger().info(
        "Done extracting names and numbers. Making the heatmap ...")

    heatmap = go.Heatmap(z=binary_data,
                         x=xaxis_values,
                         y=fam_order,
                         text=text_data,
                         zauto=False,
                         zmin=1,
                         zmax=2,
                         autocolorscale=False,
                         colorscale=[[0.50, 'rgb(100, 15, 78)'],
                                     [1, 'rgb(59, 157, 50)']],
                         colorbar=dict(title='Presence/Absence',
                                       titleside='top',
                                       tickmode='array',
                                       tickvals=[1, 2],
                                       ticktext=['Presence', 'Multicopy'],
                                       ticks='outside'))
    shell_color = None
    if len(shell_subs) > 1:
        if "S_" not in shell_subs:
            shell_color = cl.interp(cl.flipper()['seq']['9']['Greens'][1:7],
                                    len(shell_subs))
        else:
            shell_color = cl.interp(cl.flipper()['seq']['9']['Greens'][1:7],
                                    len(shell_subs) - 1)
    shapes = []
    sep_prec = 0
    for nb, sep in enumerate(separators):
        color = None
        if nb == 0:
            color = COLORS["persistent"]
        elif nb == (len(separators) - 1):
            color = COLORS["cloud"]
        elif len(shell_subs) > 1:
            if shell_NA is not None and nb == shell_NA:
                color = COLORS["shell"]
            else:
                color = shell_color.pop()
        else:
            color = COLORS["shell"]
        shapes.append(
            dict(type='line',
                 x0=-1,
                 x1=-1,
                 y0=sep_prec,
                 y1=sep,
                 line=dict(dict(width=10, color=color))))
        shapes.append(
            dict(type='line',
                 x0=len(pangenome.organisms),
                 x1=len(pangenome.organisms),
                 y0=sep_prec,
                 y1=sep,
                 line=dict(dict(width=10, color=color))))
        shapes.append(
            dict(type='line',
                 x0=-1,
                 x1=len(pangenome.organisms),
                 y0=sep,
                 y1=sep,
                 line=dict(dict(width=1, color=color))))
        sep_prec = sep

    layout = go.Layout(title="presence/absence matrix",
                       xaxis=go.layout.XAxis(ticktext=xaxis_values,
                                             title='organisms',
                                             tickvals=xaxis_values,
                                             automargin=True,
                                             tickfont=dict(size=10)),
                       yaxis=go.layout.YAxis(ticktext=fam_order,
                                             tickvals=fam_order,
                                             title='gene families',
                                             automargin=True,
                                             tickfont=dict(size=10)),
                       shapes=shapes,
                       plot_bgcolor='#ffffff')
    logging.getLogger().info("Drawing the figure itself...")
    out_plotly.plot(go.Figure(data=[heatmap], layout=layout),
                    filename=output + "/tile_plot.html",
                    auto_open=False)
    logging.getLogger().info(
        f"Done with the tile plot : '{output+'/tile_plot.html'}' ")
Beispiel #22
0
for col in search_dandas.drop(["date"], axis=1).columns:
    rit = rit + 1
    trace = go.Scatter(x=search_dandas["date"],
                       y=search_dandas[col],
                       line=dict(color=daf[rit]),
                       name=col,
                       legendgroup=col,
                       opacity=0.8)
    search.append(trace)
#print(rat)#

color_dict = {}
sam = -1
for i in ["Reds", "Greens", "Blues", "PuRd", "Purples", "Greys"]:
    sam = sam + 1
    dan = cl.flipper()['seq'][str(rat.groupby("type").count().max()[0] + 1)][i]
    color_dict[sam] = dan

for col in search_df.drop(["date"], axis=1).columns:
    tio = -1
    for g in rat["type"].unique():
        tio = tio + 1
        ban = daf[tio]
        if col in rat[rat["type"] == g]["0"].values:
            trace = go.Scatter(x=search_df["date"],
                               y=search_df[col],
                               line=dict(color=ban),
                               name=col,
                               legendgroup=g,
                               opacity=0.05)
            search.append(trace)
Beispiel #23
0
# -*- coding: utf-8 -*-
"""
Created on Thu Feb 23 23:49:44 2017

@author: nmishra
"""

import numpy as np
import pandas as pd
from tabulate import tabulate
import matplotlib.pyplot as plt
from collections import defaultdict
import matplotlib
import colorlover as cl
from IPython.display import HTML
HTML(cl.to_html( cl.flipper()['seq']['3'] ))
matplotlib.style.use('ggplot')

Colors   = ['blue','green','red','cyan', 'magenta', 'beige', 
            'bisque', 'black', 'dimgray','darkturquoise','blanchedalmond','darkviolet']

deadline = ['0.60','0.65','0.70','0.75','0.80','0.85','0.90']
table    = []
All_data = defaultdict(dict)
for x in ['eff','lat']:
    for y in deadline:
        filename = '../results/single-app/dyn-' + x +'-' +y  +'-v3.txt'
        f        = open(filename, 'r')  
        table    = [row.strip().split('\t')[0].split() for row in f if 'AVERAGE']
        All_data[x][y]= np.array([table[i][1:] for i in range(1,len(table)-1)]).astype(float)
        f.close()
Beispiel #24
0
def multiplotDemandSimilarity(merged_ds):
    """
    daytype = one of [Weekday, Saturday, Sunday]
    """
    data = []
    lay = []
    clrs = ['Greens','RdPu','Blues','YlOrRd','Purples','Reds', 'Greys']


    #generate existing and new model traces for each customer subclass
    count=1
    for c in merged_ds['class'].unique():
        d = merged_ds.loc[(merged_ds['class']==c)][['YearsElectrified','Energy [kWh]','M_kw_mean','M_kw_std']]

        slope, intercept, r_value, p_value, std_err = stats.linregress(d['YearsElectrified'].values, d['M_kw_mean'].values)
        line = slope*d['YearsElectrified'].values+intercept
        
        trace0 = go.Bar(
                x=d['YearsElectrified'].values,
                y=d['Energy [kWh]'].values,
                xaxis='x'+str(count),
                yaxis='y'+str(count),
                marker=dict(
                        color=cl.flipper()['seq']['3'][clrs[count-1]][-1]),
                name=c + ' benchmark',
                )
                
        trace1 = go.Bar(
            x=d['YearsElectrified'].values,
            y=d['M_kw_mean'].values,            
            name=c + ' data model',
            marker=dict(
                    color=cl.flipper()['seq']['3'][clrs[count-1]][1]), 
            )
        
        trace2 = go.Scatter(
            x=d['YearsElectrified'].values,
            y=line,
            mode='lines',
            line=dict(color=cl.flipper()['seq']['3'][clrs[count-1]][1], 
                             width=3),
            name=c + ' data lin_reg'
            )
        
        lay.append({'yaxis{}'.format(count): go.YAxis(type = 'linear',
                            title='annual mean monthly<br /> consumption (kWh)'),
                    'xaxis{}'.format(count): go.XAxis(title = 'time electrified (years)',
                            ticktext = list(range(0, d.YearsElectrified.max()+1)), 
                            tickvals = np.arange(0, d.YearsElectrified.max()+1, 1))
                     })
 
        data.append(trace1)
        data.append(trace2)
        data.append(trace0)
        count+=1

    #create subplot graph objects
    rows = int(len(data)/3)
    fig = py.tools.make_subplots(rows=rows, cols=1, subplot_titles=list(merged_ds['class'].unique()), horizontal_spacing = 0.1, print_grid=False)    

    for i in list(range(0,len(data))):
        r = floor(i/3)+1
        fig.append_trace(data[i],r,1)

    fig['layout'].update(
                title='Annual mean monthly demand model similarity')
    
    #update layout for all subplots
    for k in range(0,rows):
        fig['layout'].update(lay[k])
                                
    return offline.iplot(fig, filename=os.path.join(image_dir,'multiplot-demand-similarity'+'.png'))
Beispiel #25
0
def plotBmHourlyHeatmap(customer_class,
                        year_list,
                        daytype='Weekday',
                        model_dir=dpet_dir):
    """
    This function plots the hourly load profiles for a specified customer class, day type and list of years since electrification. Data is based on the DPET model.
    """

    df = bmHourlyProfiles(model_dir)
    maxdemand = df['Mean [kVA]'].max(
    )  #get consistent max demand & color scale across classes
    df = df[(df['daytype'] == daytype) & (df['class'] == customer_class)]

    #set heatmap colours
    colors = cl.flipper()['div']['5']['RdYlBu']
    scl = [[0, colors[0]], [0.25, colors[1]], [0.5, colors[2]],
           [0.75, colors[3]], [1, colors[4]]]

    #set subplot parameters
    if len(year_list) < 3:
        ncol = len(year_list)
    else:
        ncol = 3
    nrow = ceil(len(year_list) / ncol)
    fig = py.tools.make_subplots(
        rows=nrow,
        cols=ncol,
        subplot_titles=['Year ' + str(x) for x in year_list],
        horizontal_spacing=0.1,
        print_grid=False)
    r = 1  #initiate row
    c = 1  #initiate column

    for yr in year_list:
        if c == ncol + 1:
            c = 1
        ro = ceil(r / ncol)

        #set colorbar parameters
        if nrow == 1:
            cblen = 1
            yanc = 'middle'
        else:
            cblen = 0.5
            yanc = 'bottom'

        if r == 1:  #toggle colorscale
            scl_switch = True
        else:
            scl_switch = False

        #generate trace
        try:
            data = df[df['YearsElectrified'] == yr]
            z = data['Mean [kVA]'].reset_index(drop=True)
            x = data['hour']
            y = data.month
            hovertext = list()
            for yi, yy in enumerate(y.unique()):
                hovertext.append(list())
                for xi, xx in enumerate(x.unique()):
                    hovertext[-1].append(
                        'hour: {}<br />month: {}<br />{:.3f} kVA'.format(
                            xx, yy, z[24 * yi + xi]))
            trace = go.Heatmap(z=z,
                               x=x,
                               y=y,
                               zmin=0,
                               zmax=maxdemand,
                               text=hovertext,
                               hoverinfo="text",
                               colorscale=scl,
                               reversescale=True,
                               showscale=scl_switch,
                               colorbar=dict(title='kVA',
                                             len=cblen,
                                             yanchor=yanc))
            fig.append_trace(trace, ro, c)

        except:
            pass

        c += 1
        r += 1

    fig['layout'].update(
        showlegend=False,
        title='<b>' + customer_class + '</b> mean estimated <b>' + daytype +
        '</b> energy demand (kVA) <br />' +
        ', '.join(map(str, year_list[:-1])) + ' and ' + str(year_list[-1]) +
        ' years after electrification',
        height=350 + 300 * (nrow - 1))

    for k in range(1, len(year_list) + 2):
        fig['layout'].update({
            'yaxis{}'.format(k):
            go.YAxis(
                type='category',
                ticktext=[
                    'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
                    'Sep', 'Oct', 'Nov', 'Dec'
                ],  #data.month.unique(),
                tickvals=np.arange(1, 13, 1),
                tickangle=-15,
                tickwidth=0.5),
            'xaxis{}'.format(k):
            go.XAxis(title='Time of day (hours)', tickvals=np.arange(0, 24, 2))
        })

    return offline.iplot(fig, filename='testagain')
Beispiel #26
0
def plotDemandSimilarity(merged_ds):
    """
    daytype = one of [Weekday, Saturday, Sunday]
    """
    data = []
    trcs = len(merged_ds['class'].unique())
    clrs = ['Greens','RdPu','Blues','YlOrRd','Purples','Reds', 'Greys']

    #generate existing and new model traces for each customer subclass
    count=0
    for c in merged_ds['class'].unique():
        d = merged_ds.loc[(merged_ds['class']==c)][['YearsElectrified','Energy [kWh]','M_kw_mean','M_kw_std']]
        
        wx = 0.8/trcs
        ox = -wx*(count)
        slope, intercept, r_value, p_value, std_err = stats.linregress(d['YearsElectrified'].values, d['M_kw_mean'].values)
        line = slope*d['YearsElectrified'].values+intercept

        trace0 = go.Bar(
                x=d['YearsElectrified'].values,
                y=d['Energy [kWh]'].values,
                marker=dict(
                        color=cl.flipper()['seq']['3'][clrs[count]][-1]),
                name=c + ' benchmark',
                opacity=0.6,
                width = wx,
                offset = ox,
                )
                
        trace1 = go.Bar(
            x=d['YearsElectrified'].values,
            y=d['M_kw_mean'].values,            
            name=c + ' data model',
            marker=dict(
                    color=cl.flipper()['seq']['3'][clrs[count]][1]), 
            width = wx,
            offset = ox,            
            )
        
        trace2 = go.Scatter(
                    x=d['YearsElectrified'].values,
                    y=line,
                    mode='lines',
                    line=dict(color=cl.flipper()['seq']['3'][clrs[count]][1], 
                                     width=3),
                    name=c + ' data lin_reg'
            )
 
        data.append(trace1)
        data.append(trace2)
        data.append(trace0)
        count+=1
    
    layout = go.Layout(
                    title='Annual mean monthly demand model similarity',
                    xaxis = dict(title='time electrified (years)',
                        tickvals = list(range(1,16))),
                    yaxis = dict(title='annual mean monthly consumption (kWh)')
                    )
    fig = go.Figure(data=data, layout=layout)
                                
    return offline.iplot(fig, filename=os.path.join(image_dir,'demand-similarity'+'.png'))