Beispiel #1
0
def make_race(drug_name=base_drug):
    """
    Ceates bar plot relating the race of the victims to the specific drug
    and returns bar plot race 

    Parameters
    ----------
    drug_name : Chosen from from the dropdown menu
        
    Returns
    -------
    Plots: race
    

    Examples
    --------
    >>> ake_demographics(drug_name = 'Heroin')
    """
    if drug_name == 'Everything':
        query = pivoted_data
    else:
        query = pivoted_data.query(drug_name + ' == 1')
    race = alt.Chart(query).mark_bar().encode(
        x=alt.X("Race:N", title="Race", axis=alt.AxisConfig(labelAngle=-45)),
        y='count()',
        color=alt.Color('Race:N',
                        scale=alt.Scale(scheme='viridis'),
                        legend=None)).properties(
                            title='Race distribution for ' + drug_name,
                            width=400,
                            height=180)
    return race
    def build_chart(self):
        """
        Altair Chart constructed using the selected data from SimpleSpectrum and SimpleSpectralLines objects, along with the set chart parameters

        Returns
        -------

        The Altair Chart
        """
        layer_list = []

        for spectrum in self.spectrum_dict.values():
            layer = spectrum.build_chart(
                wavelength_axis_label=self._get_wavelength_title(),
                flux_axis_label=self._get_flux_title(),
                wavelength_unit=self.wavelength_unit)
            if layer is not None:
                layer_list.append(layer)

        if self.lines is not None:
            _, wavelength_min, wavelength_max = self.get_all_spectrum_chart_data(
            )
            self.lines.set_wavelength_limits(wavelength_min=wavelength_min,
                                             wavelength_max=wavelength_max)
            layer_list.extend(
                self.lines.build_chart(
                    wavelength_axis_label=self._get_wavelength_title(),
                    wavelength_unit=self.wavelength_unit))

        base_chart = alt.layer(*layer_list, ).configure(
            axis=alt.AxisConfig(grid=self._show_grid),
            legend=alt.LegendConfig(disable=not (self._show_legend)),
        ).properties(**self._chart_properties).interactive()

        return base_chart
Beispiel #3
0
def make_race(drug_name="Amphet"):
    query = data.query(drug_name + ' == 1')
    race = alt.Chart(query).mark_bar().encode(
        x=alt.X("Race:N", title="Race", axis=alt.AxisConfig(labelAngle=45)),
        y='count()',
        color=alt.Color('Race:N',
                        scale=alt.Scale(scheme='viridis'))).properties(
                            title='Race distribution for ' + drug_name,
                            width=400,
                            height=180)
    return race
Beispiel #4
0
def concat(plot_1, plot_2, dash_name):
    plot = alt.vconcat(plot_1, plot_2, spacing=60).configure(
        legend=alt.LegendConfig(labelFontSize=16,
                                titleFontSize=16,
                                symbolSize=100,
                                labelFont='Lato'),
        axis=alt.AxisConfig(labelFontSize=16,
                            tickSize=16,
                            labels=True,
                            titleFontSize=16),
        header=alt.HeaderConfig(titleFontSize=20,
                                labelFontSize=20)).configure_title(fontSize=24)
    plot.save(f"app/templates/plot/{dash_name}.html")
Beispiel #5
0
def make_demographics(drug_name=base_drug):
    """
    Contains the mds theme and creates two bar plots relating the age and the gender of the victims to the specific drug
    and returns two plots age and gender 

    Parameters
    ----------
    drug_name : Chosen from from the dropdown menu
        
    Returns
    -------
    Plots: age | gender
    

    Examples
    --------
    >>> make_demographics(drug_name = 'Heroin')
    """
    def mds_special():
        font = "Arial"
        axisColor = "#000000"
        gridColor = "#DEDDDD"
        return {
            "config": {
                "title": {
                    "fontSize": 24,
                    "font": font,
                    "anchor": "start",  # equivalent of left-aligned.
                    "fontColor": "#000000"
                },
                'view': {
                    "height": 300,
                    "width": 400
                },
                "axisX": {
                    "domain": True,
                    #"domainColor": axisColor,
                    "gridColor": gridColor,
                    "domainWidth": 1,
                    "grid": False,
                    "labelFont": font,
                    "labelFontSize": 12,
                    "labelAngle": 0,
                    "tickColor": axisColor,
                    "tickSize":
                    5,  # default, including it just to show you can change it
                    "titleFont": font,
                    "titleFontSize": 16,
                    "titlePadding":
                    10,  # guessing, not specified in styleguide
                    "title": "X Axis Title (units)",
                },
                "axisY": {
                    "domain": False,
                    "grid": True,
                    "gridColor": gridColor,
                    "gridWidth": 1,
                    "labelFont": font,
                    "labelFontSize": 14,
                    "labelAngle": 0,
                    #"ticks": False, # even if you don't have a "domain" you need to turn these off.
                    "titleFont": font,
                    "titleFontSize": 16,
                    "titlePadding":
                    10,  # guessing, not specified in styleguide
                    "title": "Y Axis Title (units)",
                    # titles are by default vertical left of axis so we need to hack this
                    #"titleAngle": 0, # horizontal
                    #"titleY": -10, # move it up
                    #"titleX": 18, # move it to the right so it aligns with the labels
                },
            }
        }

    # Register the custom theme under a chosen name
    alt.themes.register('mds_special', mds_special)

    # Enable the newly registered theme
    alt.themes.enable('mds_special')

    # Creat plots
    sub_data = pivoted_data.query("Sex == 'Male' | Sex == 'Female'")
    if drug_name == 'Everything':
        query = sub_data
    else:
        query = sub_data.query(drug_name + ' == 1')
    chart = alt.Chart(query)
    age = chart.mark_bar(color="#3f7d4e").encode(
        x=alt.X("Age:Q",
                title="Age",
                bin=alt.Bin(maxbins=10),
                axis=alt.AxisConfig(labelAngle=-45)),
        y='count()').properties(title='Age distribution for ' + drug_name,
                                width=290,
                                height=200)
    gender = chart.mark_bar().encode(
        x=alt.X("Sex:N", title="Sex", axis=alt.AxisConfig(labelAngle=-45)),
        y='count()',
        color=alt.Color('Sex:N',
                        scale=alt.Scale(scheme='viridis'),
                        legend=None)).properties(
                            title='Gender distribution for ' + drug_name,
                            width=190,
                            height=200)
    return (age | gender)
Beispiel #6
0
def main(input, output):

    X_train = pd.read_csv(input + "/" + "X_train.csv")
    y_train = pd.read_csv(input + "/" + "y_train.csv")
    X_test = pd.read_csv(input + "/" + "X_test.csv")
    y_test = pd.read_csv(input + "/" + "y_test.csv")

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    df = pd.DataFrame(
        columns=['n_features_to_select', 'Train_error', 'Test_error'])

    for n in range(1, 11):
        lr = LinearRegression()
        rfe = RFE(estimator=lr, n_features_to_select=n)
        rfe.fit(X_train, y_train)
        X_train_sel = X_train_scaled[:, rfe.support_]
        X_test_sel = X_test_scaled[:, rfe.support_]
        errors = fit_and_report(lr,
                                X_train_sel,
                                y_train,
                                X_test_sel,
                                y_test,
                                mode='regression')
        df = df.append(
            {
                'n_features_to_select': n,
                'Train_error': errors[0],
                'Test_error': errors[1]
            },
            ignore_index=True)

    df = pd.melt(df,
                 id_vars=['n_features_to_select'],
                 value_vars=['Train_error', 'Test_error'],
                 var_name='error_type',
                 value_name='value')

    feature_plot = alt.Chart(df).mark_line().encode(
        x=alt.X('n_features_to_select:N',
                title='Number of features to select',
                axis=alt.AxisConfig(labelAngle=0)),
        y='value:Q',
        color=alt.Color('error_type',
                        sort=['Train error'])).configure_scale(round=True)

    feature_plot.configure_header(titleFontSize=80, labelFontSize=80)

    feature_plot.configure().properties(
        title="The relationship between MSE and number of features",
        width=800,
        height=400).save(output + "/ranked_features.png")

    lr = LinearRegression()
    rfe = RFE(estimator=lr, n_features_to_select=6)
    rfe.fit(X_train, y_train)

    X_train_sel = X_train_scaled[:, rfe.support_]
    X_test_sel = X_test_scaled[:, rfe.support_]

    lr.fit(X_train_sel, y_train)
    errors = fit_and_report(lr,
                            X_train_sel,
                            y_train,
                            X_test_sel,
                            y_test,
                            mode='regression')
    print(errors)

    relevant_features_bool = rfe.support_
    relevant_features_list = pd.DataFrame(
        list(X_train.iloc[:, relevant_features_bool].columns))
    relevant_features_list

    relevant_features_list['weights'] = lr.coef_[0]
    relevant_features_list = relevant_features_list.rename(
        columns={0: "features"})

    # test
    assert len(
        relevant_features_list) == 6, 'The dimension of y_pred_df is wrong'

    print(relevant_features_list)

    feature_weight_plot = alt.Chart(relevant_features_list).mark_bar().encode(
        alt.Y('features:N',
              sort=alt.EncodingSortField(field="features",
                                         op="count",
                                         order='ascending')),
        alt.X('weights:Q'))

    feature_weight_plot.configure_header(titleFontSize=80, labelFontSize=80)

    feature_weight_plot.configure().properties(
        title="The feature weights", width=800,
        height=400).save(output + "/feature_weight_plot.png")

    y_pred_df = pd.DataFrame(lr.predict(X_test_sel))
    y_pred_df = y_pred_df.rename(columns={0: "predicted"})
    y_true_df = y_test.rename(columns={'quality': "actual"})

    result_df = pd.concat([y_pred_df, y_true_df], axis=1)

    plot_result = alt.Chart(result_df).mark_boxplot().encode(
        alt.X('actual:O',
              scale=alt.Scale(zero=False),
              axis=alt.AxisConfig(labelAngle=0)),
        alt.Y('predicted', scale=alt.Scale(zero=False)))

    plot_result.configure_header(titleFontSize=80, labelFontSize=80)

    plot_result.configure(numberFormat="0.4f").properties(
        title="The prediction result", width=800,
        height=400).save(output + "/prediction_result.png")

    print('This is the end of analysis, Hooray!!!')