Beispiel #1
0
def plot_LDA(data, features):
    X = data[features]
    y = data["categoria"]

    X_std = StandardScaler().fit_transform(X)
    LDA = LinearDiscriminantAnalysis()
    Y = LDA.fit_transform(X_std, y)

    results = []

    for name in (2, 3, 13):
        result = go.Scatter(x=Y[y == name, 0],
                            y=Y[y == name, 1],
                            mode="markers",
                            name=name,
                            marker=go.Marker(size=8,
                                             line=go.Line(
                                                 color="rgba(225,225,225,0.2)",
                                                 width=0.5),
                                             opacity=0.75))
        results.append(result)

    data = go.Data(results)
    layout = go.Layout(xaxis=go.XAxis(title="CP1", showline=False),
                       yaxis=go.YAxis(title="CP2", showline=False))

    fig = go.Figure(data=data, layout=layout)
    py.iplot(fig)

    return fig
Beispiel #2
0
def hellinger_length_plot(hellinger_stats, filename):
    """
    :param hellinger_stats: path to the savd file for the hellinger statistics from calculate_hellinger_distance function 
    :param filename: file name with directory where the results are to be stored(dtype:str)
    Description: Plots a scatter plot between number of features activated for every neuron vs hellinger distance between 
                the two models
    """

    with open(hellinger_stats, 'rb') as handle:
        hellinger_dict = pickle.load(handle)

    num_token_list, distance_list = ([] for i in range(2))
    for activation,(distance,num_tokens) in hellinger_dict.items():   
        num_token_list.append(num_tokens)
        distance_list.append(distance)

    fig = px.scatter(x= num_token_list ,y= distance_list)
    
    plot_title = str(len(hellinger_dict)) + " neurons activated"
    fig.update_layout(barmode='relative', 
                        title_text=plot_title,
                        xaxis_title="Log Hellinger length",
                        yaxis_title="Hellinger distance",
                        xaxis_type="log",
                        xaxis = go.XAxis(showticklabels=False),
                        yaxis = go.YAxis(showticklabels=False)
                        )
    
    plotly.offline.plot(fig, filename = filename,auto_open=False)
    fig.show()
Beispiel #3
0
def signup():
    version = randint(0, 1000000)
    login = LoginForm()
    address = AddressForm()
    if address.validate_on_submit():
        x, y = get_lambert(address.address.data)
        size = int(address.window.data)
        tif = GeoTIFF.get_tif_from_point(x, y).crop_location(x, y, size, size)
        if address.projection.data == "2D": tif.png()
        else:
            xaxis = go.XAxis(range=[0.2, 1],
                             showgrid=False,
                             zeroline=False,
                             visible=False)
            yaxis = go.YAxis(range=[0.2, 1],
                             showgrid=False,
                             zeroline=False,
                             visible=False)
            layout = go.Layout(xaxis=xaxis,
                               yaxis=yaxis,
                               paper_bgcolor='rgba(0,0,0,0)',
                               scene_aspectmode='manual',
                               scene_aspectratio=dict(x=1.5, y=1.5, z=0.5),
                               margin=dict(l=0, r=0, b=0, t=0))
            fig = go.Figure(data=[go.Surface(z=tif.arr)], layout=layout)
            fig.write_image(directory + "/app/static/plot.png")

    return render_template("geoloc.html",
                           version=version,
                           form={
                               "login": login,
                               "address": address
                           })
Beispiel #4
0
def plotly_histogram2(X, columns, target):
    colors = {
        2: 'rgb(255,127,20)',
        3: 'rgb(31, 220, 120)',
        13: 'rgb(44, 50, 180)'
    }
    traces = []
    _targets = sorted(X[target].unique().tolist())

    legend = {2: True, 3: True, 13: True}

    for col in range(2):
        for key in range(len(_targets)):
            traces.append(
                go.Histogram(x=X[X[target] == _targets[key]][columns[col]],
                             opacity=0.7,
                             xaxis="x%s" % (col + 1),
                             marker=go.Marker(color=colors[_targets[key]]),
                             name=_targets[key],
                             showlegend=legend[_targets[key]]))
        legend = {2: False, 3: False, 13: False}

    data = go.Data(traces)
    layout = go.Layout(barmode="overlay",
                       xaxis=go.XAxis(domain=[0, 0.48], title=columns[0]),
                       xaxis2=go.XAxis(domain=[0.52, 1], title=columns[1]),
                       yaxis=go.YAxis(title="Numero de Defectos"),
                       title="Histograma caracteristicas")

    fig = go.Figure(data=data, layout=layout)
    py.iplot(fig)

    return fig
Beispiel #5
0
def update_clustering_figure(selectedData, value):
    print("c")
    selectedData = json.loads(selectedData)
    kmeans_val = value
    clickpoint = selectedData['selectedData']
    print(clickpoint)
    if clickpoint != 'null':
        clickpoint = json.loads(clickpoint)
        print("in here")
        clicked_type = clickpoint['points'][0]['label']
        # if prev_clicked_cluster_type != clicked_type:
        #     prev_clicked_cluster_type = clicked_type
        local_df = df[df.Type_1 == clicked_type]
        stats = local_df.iloc[:, 5:11]
        normalized_stats = stats
        for i in stats.columns:
            mini, maxi = stats[i].min(), stats[i].max()
            normalized_stats[i] = (stats[i] - mini) / (maxi - mini)
        pca = PCA(n_components=2).fit(normalized_stats)
        stats2d = pca.transform(normalized_stats)
        df_stats2d = pd.DataFrame(stats2d, index=local_df.index)

        model, z = cluster(kmeans_val, normalized_stats.iloc[:, 0:5])
        trace = go.Scatter(x=df_stats2d.iloc[:, 0],
                           y=df_stats2d.iloc[:, 1],
                           text=local_df['Name'],
                           name='',
                           mode='markers',
                           marker=go.Marker(opacity=0.5, color=z),
                           showlegend=False)
    else:
        print("error")
        stats = df.iloc[:, 5:11]
        normalized_stats = stats
        for i in stats.columns:
            mini, maxi = stats[i].min(), stats[i].max()
            normalized_stats[i] = (stats[i] - mini) / (maxi - mini)
        pca = PCA(n_components=2).fit(normalized_stats)
        stats2d = pca.transform(normalized_stats)
        model, z = cluster(kmeans_val, normalized_stats.iloc[:, 0:5])
        trace = go.Scatter(x=stats2d[:, 0],
                           y=stats2d[:, 1],
                           text=df['Name'],
                           name='',
                           mode='markers',
                           marker=go.Marker(opacity=0.5, color=z),
                           showlegend=False)
    layout = go.Layout(
        title='k-means clustering of catch_rate and total stats',
        xaxis=go.XAxis(showgrid=False, zeroline=False, showticklabels=False),
        yaxis=go.YAxis(showgrid=False, zeroline=False, showticklabels=False),
        hovermode='closest')
    data = go.Data([trace])
    fig = go.Figure(data=data, layout=layout)

    return fig
Beispiel #6
0
def mass_activation_plot(unsup_data, zero_shot_data, sup_data, data_dict):
    """
    :param unsup_data: Unsupervised data(dtype:pandas dataframe)
    :param zero_shot_data: Zero shot data(dtype:pandas dataframe)
    :param sup_data: Supervised data(dtype:pandas dataframe)
    :param data_dict: dictionary containing input instructions(dtype:dict)
    Plots the mass activation plot and save it in data_dict["visualize"]["plot_directory"]
    """
    
    if not os.path.exists(data_dict["visualize"]["plot_directory"]):
        os.makedirs(data_dict["visualize"]["plot_directory"])
        
    zero_shot_neurons = list(zero_shot_data['max_activation_index'].unique())
    unsup_neurons = list(unsup_data['max_activation_index'].unique())
    sup_neurons = list(sup_data['max_activation_index'].unique())
    
    zero_shot_mass_dict, unsup_mass_dict, sup_mass_dict = ({} for i in range(3))
    
    for neuron in unsup_neurons:
        temp = unsup_data[unsup_data['max_activation_index']==neuron]
        unsup_mass_dict[neuron] = sum(temp['max_activations'])
    for neuron in zero_shot_neurons:
        temp = zero_shot_data[zero_shot_data['max_activation_index']==neuron]
        zero_shot_mass_dict[neuron] = sum(temp['max_activations'])
    for neuron in sup_neurons:
        temp = sup_data[sup_data['max_activation_index']==neuron]
        sup_mass_dict[neuron] = sum(temp['max_activations'])
        
    sup = [value[1] for value in sorted(sup_mass_dict.items(), key=operator.itemgetter(1), reverse=True)]
    unsup = [value[1] for value in sorted(unsup_mass_dict.items(), key=operator.itemgetter(1), reverse=True)]
    zshot = [value[1] for value in sorted(zero_shot_mass_dict.items(), key=operator.itemgetter(1), reverse=True)] 
    
    fig = go.Figure()
    fig.add_trace(go.Bar(y=sup, name="sup", marker_color=data_dict['visualize']['viz_colors']['sup_color']))
    fig.add_trace(go.Bar(y=unsup, name="unsup", marker_color=data_dict['visualize']['viz_colors']['unsup_color']))
    fig.add_trace(go.Bar(y=zshot, name="zshot", marker_color=data_dict['visualize']['viz_colors']['zero_shot_color']))
    
    fig.update_layout(barmode='relative', 
                    title_text='Mass activations for neurons',
                    xaxis_title="Neurons",
                    yaxis_title="Log mass Activations",
                    yaxis_type="log",
                    xaxis = go.XAxis(showticklabels=False),
                    yaxis = go.YAxis(showticklabels=False)
                    )

    # fig.write_image(os.path.join(data_dict["visualize"]["plot_directory"], "mass_activation_plot.pdf"))
    plotly.offline.plot(fig, filename = os.path.join(data_dict["visualize"]["plot_directory"], "mass_activation_plot.pdf"),
                        auto_open=False)
    fig.show()
Beispiel #7
0
def index():
    version = randint(0, 1000000)

    if current_user.is_anonymous:
        return redirect(url_for("login"))

    login_form = LoginForm()
    geo_form = GeoForm()

    if login_form.validate_on_submit() and login_form.login.data:
        user = User.query.filter_by(username=login_form.username.data).first()
        if user is None or not user.check_password(login_form.password.data):
            return redirect(url_for("login"))
        login_user(user, remember=False)
        return redirect(url_for("index"))

    elif geo_form.validate_on_submit() and geo_form.plot.data:
        x, y = get_lambert(geo_form.address.data)
        size = int(geo_form.window.data)
        tif = GeoTIFF.get_containing_tif(x, y, size)
        tif = tif.crop_location(x, y, size, size)
        if geo_form.projection.data == "2D": tif.png()
        else:
            xaxis = go.XAxis(range=[0.2, 1],
                             showgrid=False,
                             zeroline=False,
                             visible=False)
            yaxis = go.YAxis(range=[0.2, 1],
                             showgrid=False,
                             zeroline=False,
                             visible=False)
            layout = go.Layout(xaxis=xaxis,
                               yaxis=yaxis,
                               paper_bgcolor='rgba(0,0,0,0)',
                               scene_aspectmode='manual',
                               scene_aspectratio=dict(x=1.5, y=1.5, z=0.5),
                               margin=dict(l=0, r=0, b=0, t=0))
            fig = go.Figure(data=[go.Surface(z=tif.arr)], layout=layout)
            fig.write_image(directory + "/app/static/plot.png")

    return render_template("geoloc.html",
                           version=version,
                           logged=current_user.is_authenticated,
                           login_form=login_form,
                           geo_form=geo_form)
Beispiel #8
0
def plot_least_10_hellinger_neurons(hellinger_stats, model1_data, model2_data, color1, color2, modelname1, modelname2, 
                                  data_dict, foldername, n_tokens=0, process_data_flag=False):
    """
    :param hellinger_stats: path to the savd file for the hellinger statistics from calculate_hellinger_distance function 
    :param model1_data:data from trained model 1(dtype:dataframe)
    :param model2_data:data from trained model 2(dtype:dataframe)
    :param color1:color for model 1(dtype:str)
    :param color2:color for model 2(dtype:str)
    :param modelname1:model1 label(dtype:str)
    :param modelname2:model2 label(dtype:str)
    :param data_dict: dictionary containing input instructions(dtype:dict)
    :param foldername: pickled file name and directory to store the results
    :param n_tokens: number of tokens you want to plot(dtype:int)
    :param process_data_flag: True if the pickle files need to be generated, False if you want to load the pickle 
                              files.
    :Description: Generates the plot for the least 10 neurons with highest hellinger distances in hellinger_stats
    """
    # removing the whitespaces
    model1_data['POS'] = model1_data['POS'].apply(lambda x:x.replace(" ",""))
    model2_data['POS'] = model2_data['POS'].apply(lambda x:x.replace(" ",""))
    
    # Getting all the POS tags activated
    model1_pos = list(model1_data['POS'].unique())
    model1_pos = list(model2_data['POS'].unique())
    all_pos = set(model1_pos + model1_pos)
    # all_pos = [pos.strip() for pos in all_pos]
    
    # loading the Hellinger distance dictionary
    with open(hellinger_stats, 'rb') as handle:
        hellinger_dict = pickle.load(handle)
        
    least_10_neurons = heapq.nsmallest(10, hellinger_dict, key=hellinger_dict.get)
    for neuron in least_10_neurons:
        path = os.path.join(data_dict["visualize"]["plot_directory"],foldername,"least_10",str(neuron))
        
        if not os.path.exists(path):
            os.makedirs(path)
        
        model1_data_temp = model1_data[model1_data['max_activation_index']==neuron]
        model2_data_temp = model2_data[model2_data['max_activation_index']==neuron]
                
        # Getting the pos stats from all the dictionaries
        model1_pos_dict = dict(Counter(model1_data_temp['POS']))
        model2_pos_dict = dict(Counter(model2_data_temp['POS']))
        # Creating dataframe from the dictionaries
        model1_pos = pd.DataFrame.from_dict(model1_pos_dict, orient='index', columns=[modelname1])
        model2_pos = pd.DataFrame.from_dict(model2_pos_dict, orient='index', columns=[modelname2])
        # Normalizing the statistics
        model1_pos[modelname1] = model1_pos[modelname1].apply(lambda x: x/model1_pos[modelname1].sum())
        model2_pos[modelname2] = model2_pos[modelname2].apply(lambda x: x/model2_pos[modelname2].sum())
        # Merging dataframe
        data = [model1_pos[modelname1], model2_pos[modelname2]]
        df = pd.concat(data,axis=1)
        # Again converting the dataframe to dictionary for further computations.
        all_pos_stats = df.to_dict()
        
        # Getting all the pos stats into a dictionary
        for viz_data in all_pos_stats.keys():
            for tags in all_pos:
                if tags not in all_pos_stats[viz_data].keys():
                    all_pos_stats[viz_data][tags] = None
            
        # Converting pos stats to a dataframe
        # all_pos_stats = pd.DataFrame.from_dict(all_pos_stats)
        
        if process_data_flag == True:
            # Getting the data.
            model1_neurondata = model1_data[model1_data['max_activation_index']==neuron]
            model1_neurondata['POS'] = model1_neurondata['POS'].apply(lambda x: x.strip())
            model2_neurondata = model2_data[model2_data['max_activation_index']==neuron]
            model2_neurondata['POS'] = model2_neurondata['POS'].apply(lambda x: x.strip())
            
            # Converting the other pos tags to the least three ones
            model1_least_pos = choose_top_pos_from_data(model1_neurondata)
            model2_least_pos = choose_top_pos_from_data(model2_neurondata)
            
            model1_tokens = list(model1_neurondata['inputs'])
            model1_pos = list(model1_neurondata['POS'])
            model2_tokens = list(model2_neurondata['inputs'])
            model2_pos = list(model2_neurondata['POS'])

            for index, pos in enumerate(model1_pos):
                if pos not in model1_least_pos[model1_tokens[index]]:
                    model1_pos[index] = model1_least_pos[model1_tokens[index]][0]
            for index, pos in enumerate(model2_pos):
                if pos not in model2_least_pos[model2_tokens[index]]:
                    model2_pos[index] = model2_least_pos[model2_tokens[index]][0]
                    
            model1_neurondata['POS'] = model1_pos
            model2_neurondata['POS'] = model2_pos
            
            # Getting all the unique tokens
            model1_unique_tokens = model1_neurondata["inputs"].unique()
            model2_unique_tokens = model2_neurondata["inputs"].unique()
            
            model1_dict,model2_dict = ({} for i in range(2))
            
            # Generating model1 visualization
            # Getting mean for all the unique tokens
            for tokens in model1_unique_tokens:
                temp_df = model1_neurondata[model1_neurondata["inputs"] == tokens]
                pos = list(temp_df["POS"].unique())
                activation_temp = []
                for unique_pos in pos:
                    activation_temp.append(temp_df[temp_df['POS']==unique_pos]["max_activations"].mean())
                model1_dict[tokens] = {"POS":pos, "activation":activation_temp}
            
            # Getting the least 20 activation tokens
            model1_least_20 = {}
            temp_activations, temp_tokens = ([] for i in range(2))
            for key, value in model1_dict.items():
                for index in range(len(value['POS'])):
                    temp_tokens.append(key)
                    temp_activations.append(value['activation'][index])      
            model1_least_20_activation_index = sorted(range(len(temp_activations)), key=lambda x: temp_activations[x])[-n_tokens:]
            for indexes in model1_least_20_activation_index:
                model1_least_20[temp_tokens[indexes]] = model1_dict[temp_tokens[indexes]]
            
            # Flipping the dictionary to get it in the order of {pos-tags:list(tuple(token,mean_activations))}
            model1_token_dict = defaultdict(list)
            for token,stats in model1_least_20.items():
                for index,value in enumerate(stats['POS']):
                    model1_token_dict[stats['POS'][index]].append((token,stats['activation'][index]))
            
            # Adding the null features for the tags not present
            for tags in all_pos:
                if tags not in model1_token_dict.keys():
                    model1_token_dict[tags].append((' ',0.0))

            # Sorting dict on the basis of the names
            sorted_model1_dict = {}
            for key in sorted(model1_token_dict.keys()):
                sorted_model1_dict[key] = model1_token_dict[key]
                
            with open(os.path.join(path,'model1_data.pickle'), 'wb') as handle:
                pickle.dump(sorted_model1_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
                
            # Generating model2 visualization
            # Getting mean for all the unique tokens
            for tokens in model2_unique_tokens:
                temp_df = model2_neurondata[model2_neurondata["inputs"] == tokens]
                pos = list(temp_df["POS"].unique())
                activation_temp = []
                for unique_pos in pos:
                    activation_temp.append(temp_df[temp_df['POS']==unique_pos]["max_activations"].mean())
                model2_dict[tokens] = {"POS":pos, "activation":activation_temp}
            
            # Getting the least 20 activation tokens
            model2_least_20 = {}
            temp_activations, temp_tokens = ([] for i in range(2))
            for key, value in model2_dict.items():
                for index in range(len(value['POS'])):
                    temp_tokens.append(key)
                    temp_activations.append(value['activation'][index])      
            model2_least_20_activation_index = sorted(range(len(temp_activations)), key=lambda x: temp_activations[x])[-n_tokens:]
            for indexes in model2_least_20_activation_index:
                model2_least_20[temp_tokens[indexes]] = model2_dict[temp_tokens[indexes]]
            
            # Flipping the dictionary to get it in the order of {pos-tags:list(tuple(token,mean_activations))}
            model2_token_dict = defaultdict(list)
            for token,stats in model2_least_20.items():
                for index,value in enumerate(stats['POS']):
                    model2_token_dict[stats['POS'][index]].append((token,stats['activation'][index]))
            
            # Adding the null features for the tags not present
            for tags in all_pos:
                if tags not in model2_token_dict.keys():
                    model2_token_dict[tags].append((' ',0.0))

            # Sorting dict on the basis of the names
            sorted_model2_dict = {}
            for key in sorted(model2_token_dict.keys()):
                sorted_model2_dict[key] = model2_token_dict[key]
                
            with open(os.path.join(path,'model2_data.pickle'), 'wb') as handle:
                pickle.dump(sorted_model2_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
                
        else:
            # loading the dictionary
            with open(os.path.join(path,'model1_data.pickle'), 'rb') as handle:
                sorted_model1_dict = pickle.load(handle)
            with open(os.path.join(path,'model2_data.pickle'), 'rb') as handle:
                sorted_model2_dict = pickle.load(handle)
                
        fig = go.Figure()
        # Plotting the bar plot
        fig.add_trace(go.Bar(x=list(all_pos_stats[modelname1].keys()), y=list(all_pos_stats[modelname1].values()), 
                             name=modelname1, marker_color=color1, opacity=0.6))
        fig.add_trace(go.Bar(x=list(all_pos_stats[modelname2].keys()), y=list(all_pos_stats[modelname2].values()), 
                             name=modelname2, marker_color=color2, opacity=0.6))

        # Plotting the tokens on the bar plot
        pos_model1 = list(sorted_model1_dict.keys())
        values_model1 = list(sorted_model1_dict.values())

        pos_model2 = list(sorted_model2_dict.keys())
        values_model2 = list(sorted_model2_dict.values())
        model1_value = [[(value[0],np.nan) if value[1]==0.0 else (value[0],value[1]) for value in pairs] for pairs in values_model1]
        model2_value = [[(value[0],np.nan) if value[1]==0.0 else (value[0],value[1]) for value in pairs] for pairs in values_model2]

        model1_token = [[value[0] for value in pairs] for pairs in model1_value]
        model1_activations = [[value[1] for value in pairs] for pairs in model1_value]

        model2_token = [[value[0] for value in pairs] for pairs in model2_value]
        model2_activations = [[value[1] for value in pairs] for pairs in model2_value]

        pos_model1_list, activation_model1_list, token_model1_list = ([] for i in range(3))
        for index in range(len(pos_model1)):
            for activation_list_index, activation in enumerate(model1_activations[index]):
                pos_model1_list.append(pos_model1[index])
                activation_model1_list.append(activation)
                token_model1_list.append(model1_token[index][activation_list_index])
        fig.add_trace(go.Scatter(x=pos_model1_list, y=activation_model1_list, text=token_model1_list, 
                                 mode='markers+text', marker_color=color1, name=modelname1, 
                                 textfont={'color':color1}))

        pos_model2_list, activation_model2_list, token_model2_list = ([] for i in range(3))
        for index in range(len(pos_model2)):
            for activation_list_index, activation in enumerate(model2_activations[index]):
                pos_model2_list.append(pos_model2[index])
                activation_model2_list.append(activation)
                token_model2_list.append(model2_token[index][activation_list_index])
        fig.add_trace(go.Scatter(x=pos_model2_list, y=activation_model2_list, text=token_model2_list, 
                                 mode='markers+text', marker_color=color2, name=modelname2, 
                                 textfont={'color':color2}))
        
        fig.update_layout(title_text='Hellinger plot for ' + str(neuron) + "-neuron" ,
                    xaxis_title="POS-tags",
                    yaxis_title="Activation",
                    xaxis = go.XAxis(showticklabels=True),
                    yaxis = go.YAxis(showticklabels=True)
                    )
        
        plotly.offline.plot(fig, filename = os.path.join(path,str(neuron)+".pdf"), auto_open=False)
        fig.show()
Beispiel #9
0
def _create_graph(features, x_axis, y_axis, z_axis, simulation):

    if simulation is not None:
        dataframe_to_plot = dict_of_df[simulation].copy()
        ctx = dash.callback_context
        input_ = ctx.triggered[0]['value']
        if input_ == 'False':
            input_ = False
        if input_ == 'True':
            input_ = True
        if (sum([1 for v in features if v is None]) >
                2) or (x_axis is None) or (y_axis is None) or (z_axis is None):
            raise dash.exceptions.PreventUpdate
        else:
            for tupla in names_and_values:
                if (tupla[0] != None) and (tupla[1] != None):
                    dataframe_to_plot = dataframe_to_plot[dataframe_to_plot[
                        tupla[0]] == tupla[1]]

            dataframe_to_plot = (dataframe_to_plot.groupby(
                [y_axis, x_axis])[z_axis].sum()).to_frame()
            dataframe_to_plot = dataframe_to_plot.pivot_table(columns=x_axis,
                                                              index=y_axis,
                                                              values=z_axis)
            dataframe_to_plot = dataframe_to_plot.round().astype('Int64')
            dataframe_to_plot.fillna(0, inplace=True)

            fig1 = ff.create_annotated_heatmap(
                x=dataframe_to_plot.columns.to_list(),
                y=dataframe_to_plot.index.to_list(),
                z=dataframe_to_plot.values,
                annotation_text=dataframe_to_plot.values,
                colorscale='greens',
                showscale=True)

            fig = plotly.subplots.make_subplots(rows=1,
                                                cols=1,
                                                subplot_titles=("HEATMAP",
                                                                'dfdf'))
            fig.add_trace(fig1.data[0], 1, 1)
            annot1 = list(fig1.layout.annotations)
            for k in range(len(annot1)):
                annot1[k]['xref'] = 'x1'
                annot1[k]['yref'] = 'y1'
                annot1[k]['align'] = 'center'
            for anno in annot1:
                fig.add_annotation(anno)

            fig['layout'].update(
                xaxis_title=x_axis,
                yaxis_title=y_axis,
                height=700,
                yaxis=go.YAxis(ticks='',
                               dtick=[86400000.0 if y_axis == 'date' else ""
                                      ][0]),
                xaxis=go.YAxis(ticks='',
                               dtick=[86400000.0 if x_axis == 'date' else ""
                                      ][0]),
                autosize=True,
                paper_bgcolor='#f9f9f9')
            stilo = {
                'position': 'fixed',
                'width': '54%',
                'top': '50px',
                'right': '0px',
                'backgroundColor': '#f9f9f9'
            }

            return [stilo, fig]
    else:
        raise dash.exceptions.PreventUpdate
Beispiel #10
0
    def plotIndices(self, depth=400):
        import pandas as pd
        import plotly.express as px
        from sklearn import preprocessing
        import plotly.graph_objects as go
        import app.TA2 as TA2
        # fill the indeces

        dji = yahoo2('^DJI', '1d', period=depth)
        ftse = yahoo2('^FTSE', '1d', period=depth)

        dji.Date = dji.Date.apply(lambda x: x[0:9])
        ftse.Date = ftse.Date.apply(lambda x: x[0:9])

        d, f = dji, ftse

        engine = create_engine('sqlite:///004.sqlite')
        conn = engine.connect()

        hsbc = pd.DataFrame.from_dict(
            conn.execute('select * from R72', as_dict=True))
        hsbc.columns = ['Close', 'Date', 'id']
        hsbc = hsbc.drop(['id'], axis=1)
        #print(hsbc)

        hsbc.Date = hsbc.Date.apply(
            lambda x: (datetime.strptime(x, '%Y-%m-%d').strftime("%d %b %y")))
        hsbc['RealMACD'] = TA2.MACD(hsbc, 12, 26)['MACD_12_26']

        conn.close()

        #shift = 90
        dji = TA2.MACD(dji, 12, 26).tail(depth)  #['MACD_12_26']
        dji = TA2.STO(dji, 10, 10, 3)
        ftse = TA2.MACD(ftse, 12, 26).tail(depth)  #['MACD_12_26']
        ftse = TA2.STO(ftse, 10, 10, 3)
        hsbc = hsbc.tail(depth)

        macd=ftse.merge(dji,on='Date',how='left',copy=False).\
         drop(['Open_x','Open_y','High_x','High_y','Low_x','Low_y','MACDsign_12_26_x',
           'MACDsign_12_26_y','MACDdiff_12_26_x','MACDdiff_12_26_y','Volume_x','Volume_y',
           'SO%d10_x','SO%d10_y'],axis=1)

        macd = macd.merge(hsbc, on='Date', how='left', copy=False)  #.dropna()
        macd.columns = [
            'Date', 'FTSE', 'FTSE_MACD', 'FTSE_K', 'DJI', 'DJI_MACD', 'DJI_K',
            'R72', 'R72_re_MACD'
        ]  #,'R72_im_MACD','R72_im']
        #macd = macd.dropna().drop_duplicates(subset=['Date'])

        min_max_scaler = preprocessing.MinMaxScaler()

        coef_x, coef_y = 0.76764361, 0.42319705

        for c in macd.columns:
            macd[c] = min_max_scaler.fit_transform(macd[c].values.reshape(
                -1, 1)) if c != 'Date' else macd[c]

            macd[
                'R72_im_MACD'] = macd.DJI_MACD * coef_x + macd.FTSE_MACD * coef_y
            macd.R72_im_MACD = min_max_scaler.fit_transform(
                macd.R72_im_MACD.values.reshape(-1, 1))
            macd['R72_im'] = macd.DJI * coef_x + macd.FTSE * coef_y
            macd.R72_im = min_max_scaler.fit_transform(
                macd.R72_im.values.reshape(-1, 1))
            macd['R72_im_K'] = macd.DJI_K * coef_x + macd.FTSE_K * coef_y
            macd.R72_im_MACD = min_max_scaler.fit_transform(
                macd.R72_im_MACD.values.reshape(-1, 1))

            macd = macd.tail(
                depth)  ##################################### cut the DF

            # https://mdipierro.github.io/Publications/2011-web2py-for-Scientific-Applications.pdf
            # https://www.quora.com/Is-there-a-way-to-use-Plotly-with-web2py

        plot = {}
        plot['macd'] = go.Figure({
            'data': [
                {
                    'y': macd.DJI_MACD.values.tolist(),
                    'type': 'scatter',
                    'name': 'DJI',
                    'x': macd.Date
                },
                {
                    'y': macd.FTSE_MACD.values.tolist(),
                    'type': 'scatter',
                    'name': 'FTSE',
                    'x': macd.Date
                },
                {
                    'y': macd.R72_im_MACD.values.tolist(),
                    'type': 'scatter',
                    'name': 'R72_im',
                    'x': macd.Date
                },
                {
                    'y': macd.R72_re_MACD.values.tolist(),
                    'type': 'scatter',
                    'name': 'R72_re',
                    'x': macd.Date
                },
            ],
            'layout':
            go.Layout(xaxis=go.XAxis(title='Date'),
                      yaxis=go.YAxis(title='MACD for main Indices'))
        })  #, include_plotlyjs=False, output_type='div')

        plot['sto'] = go.Figure({
            'data': [
                {
                    'y': macd.DJI_K.values.tolist(),
                    'type': 'scatter',
                    'name': 'DJI',
                    'x': macd.Date
                },
                {
                    'y': macd.FTSE_K.values.tolist(),
                    'type': 'scatter',
                    'name': 'FTSE',
                    'x': macd.Date
                },
                {
                    'y': macd.R72_im_K.values.tolist(),
                    'type': 'scatter',
                    'name': 'HSBC im',
                    'x': macd.Date
                },
                #{'y': macd.R72_re_K.values.tolist(), 'type': 'scatter', 'name': 'R72_re'},
            ],
            'layout':
            go.Layout(xaxis=go.XAxis(title='Date'),
                      yaxis=go.YAxis(title='STO for main Indices'))
        })  #, include_plotlyjs=False, output_type='div')

        plot['norm'] = go.Figure({
            'data': [
                {
                    'y': macd.DJI.tolist(),
                    'type': 'scatter',
                    'name': 'DJI',
                    'x': macd.Date
                },
                {
                    'y': macd.FTSE.tolist(),
                    'type': 'scatter',
                    'name': 'FTSE',
                    'x': macd.Date
                },
                {
                    'y': macd.R72_im.tolist(),
                    'type': 'scatter',
                    'name': 'HSBC im',
                    'x': macd.Date
                },
                {
                    'y': macd.R72.tolist(),
                    'type': 'scatter',
                    'name': 'real HSBC',
                    'x': macd.Date
                },
            ],
            'layout':
            go.Layout(
                xaxis=go.XAxis(title='Date'),
                yaxis=go.YAxis(title='Normilised values for main Indices'))
        })  #, include_plotlyjs=False, output_type='div')

        plot['r72'] = go.Figure({
            'data': [
                {
                    'y': hsbc.Close.tolist(),
                    'type': 'scatter',
                    'name': 'R72_re',
                    'x': hsbc.Date
                },
            ],
            'layout':
            go.Layout(xaxis=go.XAxis(title='Date'),
                      yaxis=go.YAxis(title='Real R72'))
        })  #, include_plotlyjs=False, output_type='div')

        plot['dji'] = go.Figure(data=[
            go.Candlestick(x=d['Date'],
                           open=d['Open'],
                           high=d['High'],
                           low=d['Low'],
                           close=d['Close'])
        ])

        plot['dji'].update_layout(xaxis_rangeslider_visible=False)

        plot['ftse'] = go.Figure(data=[
            go.Candlestick(x=f['Date'],
                           open=f['Open'],
                           high=f['High'],
                           low=f['Low'],
                           close=f['Close'])
        ])

        plot['ftse'].update_layout(xaxis_rangeslider_visible=False)

        return render_template('plot.html',
                               fig1=pio.to_html(plot['macd']),
                               fig2=pio.to_html(plot['sto']),
                               fig3=pio.to_html(plot['norm']),
                               fig4=pio.to_html(plot['r72']),
                               fig5=pio.to_html(plot['dji']),
                               fig6=pio.to_html(plot['ftse']))
Beispiel #11
0
import dash_html_components as html
import dash_core_components as dcc
from dash.dependencies import Input, Output

df = pd.read_csv('sitedata3.csv')
img = io.imread('assets/factorysite.png')
fig = make_subplots(1, 1)

app = dash.Dash(__name__)
server = app.server

layout = go.Layout(
    showlegend=True,
    autosize=True,
    xaxis=go.XAxis(showticklabels=False),
    yaxis=go.YAxis(showticklabels=False),
    margin=dict(l=10, r=10, b=50, t=20),
)

fig_site = px.scatter(df,
                      x='Lat',
                      y='Lon',
                      animation_frame="Date",
                      size='Result',
                      color='Color')
fig_site.add_layout_image(
    dict(source=Image.open('example.png'),
         xref="x",
         yref="y",
         x=0,
         y=3,
Beispiel #12
0
print('Valores propios en orden descendente:')
for ep in eigen_pairs:
    print(ep[0])

total_sum = sum(eig_vals)
var_exp = [(i/total_sum)*100 for i in sorted(eig_vals, reverse=True)]
cum_var_exp = np.cumsum(var_exp)
print(f'var_exp => {var_exp}') # Porcentaje de informacion valiosa que cada columna aporta al dataset
print(f'cum_var_exp => {cum_var_exp}') # suma acumulada

plot1 = ir.Bar(x=['CP %s'%i for i in range(1,5)], y=var_exp, showlegend=False)
plot2 = ir.Scatter(x=['CP %s'%i for i in range(1,5)], y=cum_var_exp, showlegend=True, name="% de Varianza Explicada Acumulada")

data = ir.Data([plot1, plot2])
layout = ir.Layout(xaxis=ir.XAxis(title='Componentes principales'), yaxis=ir.YAxis(title='Porcentaje de varianza explicada'), title='Porcentaje de variabilidad explicada por cada componente principal')

fig = ir.Figure(data=data, layout=layout)
fig.show()

# Point 4
W = np.hstack((eigen_pairs[0][1].reshape(4,1), eigen_pairs[1][1].reshape(4,1))) # Getting first 2 columns (first represents 74% and the second one 22% (96% aprox))
print(f'W => {W}')

# Point 5
Y = X_std.dot(W)
print(f'Y =>\n{Y}')

results = []

for name in ('setosa', 'versicolor', 'virginica'):
Beispiel #13
0
def get_band_html(vasprun_file, kpts_file):
    vasp = VaspRun(vasprun_file)
    rec_lat = vasp.recip_lat()
    eigval_origin = vasp.read_eigenvals()[0]
    origin_kpt = vasp.read_kpoints()
    kpts = np.dot(origin_kpt, rec_lat)
    kpt_path = np.zeros((np.shape(kpts)[0], 1))
    kpt_path[1:] = np.linalg.norm(kpts[1:] - kpts[:-1], axis=1).reshape(
        (-1, 1))
    kpt_path[1:] = np.cumsum(kpt_path[1:]).reshape((-1, 1))
    eigval_shape = np.shape(eigval_origin)
    eigval = np.zeros((eigval_shape[0], eigval_shape[1] * 2))
    for i in range(eigval_shape[1]):
        eigval[:, 2 * i:2 * i + 2] = eigval_origin[:, i, :]
    fermi, _, _ = vasp.read_dos()

    labels, high_kpts = read_kpoints(kpts_file)
    high_kpts = high_kpts @ rec_lat
    high_kpts_path = np.zeros((np.shape(high_kpts)[0], 1))
    high_kpts_path[1:] = np.linalg.norm(high_kpts[1:] - high_kpts[:-1],
                                        axis=1).reshape((-1, 1))
    high_kpts_path[1:] = np.cumsum(high_kpts_path[1:]).reshape((-1, 1))

    fig = go.Figure()
    for ii in range(eigval_shape[1]):
        # import pdb; pdb.set_trace()
        fig.add_trace(
            go.Scatter(x=kpt_path.reshape((len(kpt_path), )),
                       y=eigval[:, 2 * ii] - fermi,
                       mode='lines',
                       line=dict(color='blue', width=2)))

    annotations = []
    for i, label in enumerate(labels):
        annotations.append(
            go.Annotation(x=high_kpts_path[i][0],
                          y=-5,
                          xref="x1",
                          yref="y1",
                          text=label,
                          xanchor="center",
                          yanchor="top",
                          showarrow=False))
        fig.add_trace(
            go.Scatter(x=[high_kpts_path[i][0], high_kpts_path[i][0]],
                       y=[-5, 5],
                       mode='lines',
                       line=dict(color='black', width=1)))

    # In[4]:

    bandxaxis = go.XAxis(title="k-points",
                         range=[0, kpt_path[-1]],
                         showgrid=True,
                         showline=True,
                         ticks="",
                         showticklabels=False,
                         mirror=True,
                         linewidth=2)
    bandyaxis = go.YAxis(title="$E - E_f \quad / \quad \\text{eV}$",
                         range=[-5, 5],
                         showgrid=True,
                         showline=True,
                         zeroline=True,
                         mirror="ticks",
                         ticks="inside",
                         linewidth=2,
                         tickwidth=2,
                         zerolinewidth=2)

    bandlayout = go.Layout(title="Bands diagram",
                           xaxis=bandxaxis,
                           yaxis=bandyaxis,
                           annotations=go.Annotations(annotations))
    fig.update_layout(bandlayout)
    fig.update(layout_showlegend=False)
    # fig.show()

    # graphJSON = json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder)
    # In[5]:

    # Get HTML representation of plotly.js and this figure
    plot_div = pplot(fig, output_type='div', include_plotlyjs=False)
    # Get id of html div element that looks like
    # <div id="301d22ab-bfba-4621-8f5d-dc4fd855bb33" ... >
    res = re.search('<div id="([^"]*)"', plot_div)
    div_id = res.groups()[0]

    # Build JavaScript callback for handling clicks
    # and opening the URL in the trace's customdata
    js_callback = """
    <script>
    var plot_element = document.getElementById("{div_id}");
    plot_element.on('plotly_click', function(data){{
        console.log(data);
        var point = data.points[0];
        if (point) {{
            console.log(point.customdata);
            window.open(point.customdata);
        }}
    }})
    </script>
    """.format(div_id=div_id)

    # Build HTML string
    html_str = """
    <html>
    <body>
    {plot_div}
    {js_callback}
    <script type="text/javascript" async
          src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-MML-AM_SVG">
        </script>
    </body>
    </html>
    """.format(plot_div=plot_div, js_callback=js_callback)
    return html_str, plot_div
Beispiel #14
0
def main():
    st.header("The Economist election model simulation")
    st.write(
        "The R version of the code is kindly provided by G. Elliot Morris from The Economist"
    )
    st.write(
        "https://gist.github.com/elliottmorris/c70fd4d32049c9986a45e2dfc07fb4f0\n"
    )
    st.write(
        'The code takes in The Economist election prediction model and allow user to do simulation base on true election outcome. The adjustmentable parameters are states won by candidate and lower/upper bound of vote share in the state.'
    )

    mu, Sigma, ev = read_file()

    biden_states = []
    trump_states = []
    biden_share_list = {}

    col0, col1, col2, col3 = st.beta_columns(4)
    sort_states_keys = sorted(ev.keys())

    box_0, box_1, box_2, box_3 = [[[] for i in range(14)] for j in range(4)]
    slider_0, slider_1, slider_2, slider_3 = [[[] for i in range(14)]
                                              for j in range(4)]
    for i in range(14):

        box_0[i] = col0.selectbox(str(sort_states_keys[i]),
                                  ('None', 'Trump', 'Biden'),
                                  key='box_' + str(sort_states_keys[i]))
        box_1[i] = col1.selectbox(str(sort_states_keys[14 + i]),
                                  ('None', 'Trump', 'Biden'),
                                  key='box_' + str(sort_states_keys[14 + i]))
        box_2[i] = col2.selectbox(str(sort_states_keys[28 + i]),
                                  ('None', 'Trump', 'Biden'),
                                  key='box_' + str(sort_states_keys[28 + i]))
        box_3[i] = col3.selectbox(str(sort_states_keys[42 + i]),
                                  ('None', 'Trump', 'Biden'),
                                  key='box_' + str(sort_states_keys[42 + i]))

        slider_0[i] = col0.slider('Biden share%',
                                  min_value=0,
                                  max_value=100,
                                  value=(5, 95),
                                  key='slider_' + str(sort_states_keys[i]))
        slider_1[i] = col1.slider('Biden share%',
                                  min_value=0,
                                  max_value=100,
                                  value=(5, 95),
                                  key='slider_' +
                                  str(sort_states_keys[14 + i]))
        slider_2[i] = col2.slider('Biden share%',
                                  min_value=0,
                                  max_value=100,
                                  value=(5, 95),
                                  key='slider_' +
                                  str(sort_states_keys[28 + i]))
        slider_3[i] = col3.slider('Biden share%',
                                  min_value=0,
                                  max_value=100,
                                  value=(5, 95),
                                  key='slider_' +
                                  str(sort_states_keys[42 + i]))

    for i, box_group in enumerate([box_0, box_1, box_2, box_3]):
        for j, box in enumerate(box_group):
            if box == 'Trump':
                trump_states.append(sort_states_keys[14 * i + j])
            if box == 'Biden':
                biden_states.append(sort_states_keys[14 * i + j])

    for i, slider_group in enumerate([slider_0, slider_1, slider_2, slider_3]):
        for j, slider in enumerate(slider_group):
            biden_share_list[sort_states_keys[14 * i +
                                              j]] = [slider[0], slider[1]]

    with st.spinner('Sampling from simulation please wait...'):
        try:
            state_win, p, sd, ev_dist = update_prob(mu,
                                                    Sigma,
                                                    ev,
                                                    biden_states=biden_states,
                                                    trump_states=trump_states,
                                                    biden_scores_list=None)

            st.write(
                pd.DataFrame({
                    'Trump state win %':
                    round(100 * (1 - state_win), 1),
                    '':
                    ''
                }).T)
            trump_win_chance = 100 * len(ev_dist[ev_dist < 269]) / float(
                len(ev_dist))
            st.write("Trump State: {}".format(trump_states))
            st.write("Biden State: {}".format(biden_states))
            st.write('')
            st.write("Trump win = {:.1f}%".format(trump_win_chance))
            layout = go.Layout(title='Simulation of electoral vote',
                               xaxis=go.XAxis(title='Electoral Votes'),
                               yaxis=go.YAxis(showticklabels=False))
            # fig = px.histogram(pd.DataFrame({'Electoral votes':ev_dist}), histnorm='probability density')
            fig = go.Figure(layout=layout)
            fig.add_trace(
                go.Histogram(x=ev_dist[ev_dist > 269],
                             name='Biden win',
                             xbins=dict(start=0, end=538, size=1),
                             marker_color='#0000ff'))
            fig.add_trace(
                go.Histogram(x=ev_dist[ev_dist < 269],
                             name='Trump win',
                             xbins=dict(start=0, end=538, size=1),
                             marker_color='#ff0000'))
            fig.add_trace(
                go.Histogram(x=ev_dist[ev_dist == 269],
                             name='Draw',
                             xbins=dict(start=0, end=538, size=1),
                             marker_color='#bfbfbf'))
            # fig.update_traces(,marker_color='#FF0000')

            plot = st.plotly_chart(fig, use_container_width=True)

        except ValueError:
            st.warning(
                'More than 99.99% of the samples are rejected; you should relax some contraints.'
            )
Beispiel #15
0
               color='PERSON_SEX',
               title="Crashes dependent on age and gender",
               labels={"PERSON_SEX": ""})
fig1.update_layout(xaxis_title="Age", title={'x': 0.5, 'xanchor': 'center'})

data = [
    go.Scatter(x=injuryplot['Year'],
               y=injuryplot["NUMBER OF PERSONS INJURED"],
               name='Injured'),
    go.Scatter(x=injuryplot['Year'],
               y=injuryplot["NUMBER OF PERSONS KILLED"],
               name='Killed',
               yaxis='y2')
]
# settings for the new y axis
y1 = go.YAxis(title='Injured', titlefont=go.Font(color='Blue'))
y2 = go.YAxis(title='Killed', titlefont=go.Font(color='Red'))
y2.update(overlaying='y', side='right')
# adding the second y axis
layout = go.Layout(yaxis1=y1, yaxis2=y2)
fig2 = go.Figure(data=data, layout=layout)
fig2.update_layout(title={
    'text': "Number of killed and injured persons",
    'x': 0.5,
    'xanchor': 'center'
},
                   xaxis_title='Year')

fig3 = px.line(df_ratios,
               x="hour_of_the_week",
               y="Crash/Volume Ratio",