def make_race(drug_name=base_drug): """ Ceates bar plot relating the race of the victims to the specific drug and returns bar plot race Parameters ---------- drug_name : Chosen from from the dropdown menu Returns ------- Plots: race Examples -------- >>> ake_demographics(drug_name = 'Heroin') """ if drug_name == 'Everything': query = pivoted_data else: query = pivoted_data.query(drug_name + ' == 1') race = alt.Chart(query).mark_bar().encode( x=alt.X("Race:N", title="Race", axis=alt.AxisConfig(labelAngle=-45)), y='count()', color=alt.Color('Race:N', scale=alt.Scale(scheme='viridis'), legend=None)).properties( title='Race distribution for ' + drug_name, width=400, height=180) return race
def build_chart(self): """ Altair Chart constructed using the selected data from SimpleSpectrum and SimpleSpectralLines objects, along with the set chart parameters Returns ------- The Altair Chart """ layer_list = [] for spectrum in self.spectrum_dict.values(): layer = spectrum.build_chart( wavelength_axis_label=self._get_wavelength_title(), flux_axis_label=self._get_flux_title(), wavelength_unit=self.wavelength_unit) if layer is not None: layer_list.append(layer) if self.lines is not None: _, wavelength_min, wavelength_max = self.get_all_spectrum_chart_data( ) self.lines.set_wavelength_limits(wavelength_min=wavelength_min, wavelength_max=wavelength_max) layer_list.extend( self.lines.build_chart( wavelength_axis_label=self._get_wavelength_title(), wavelength_unit=self.wavelength_unit)) base_chart = alt.layer(*layer_list, ).configure( axis=alt.AxisConfig(grid=self._show_grid), legend=alt.LegendConfig(disable=not (self._show_legend)), ).properties(**self._chart_properties).interactive() return base_chart
def make_race(drug_name="Amphet"): query = data.query(drug_name + ' == 1') race = alt.Chart(query).mark_bar().encode( x=alt.X("Race:N", title="Race", axis=alt.AxisConfig(labelAngle=45)), y='count()', color=alt.Color('Race:N', scale=alt.Scale(scheme='viridis'))).properties( title='Race distribution for ' + drug_name, width=400, height=180) return race
def concat(plot_1, plot_2, dash_name): plot = alt.vconcat(plot_1, plot_2, spacing=60).configure( legend=alt.LegendConfig(labelFontSize=16, titleFontSize=16, symbolSize=100, labelFont='Lato'), axis=alt.AxisConfig(labelFontSize=16, tickSize=16, labels=True, titleFontSize=16), header=alt.HeaderConfig(titleFontSize=20, labelFontSize=20)).configure_title(fontSize=24) plot.save(f"app/templates/plot/{dash_name}.html")
def make_demographics(drug_name=base_drug): """ Contains the mds theme and creates two bar plots relating the age and the gender of the victims to the specific drug and returns two plots age and gender Parameters ---------- drug_name : Chosen from from the dropdown menu Returns ------- Plots: age | gender Examples -------- >>> make_demographics(drug_name = 'Heroin') """ def mds_special(): font = "Arial" axisColor = "#000000" gridColor = "#DEDDDD" return { "config": { "title": { "fontSize": 24, "font": font, "anchor": "start", # equivalent of left-aligned. "fontColor": "#000000" }, 'view': { "height": 300, "width": 400 }, "axisX": { "domain": True, #"domainColor": axisColor, "gridColor": gridColor, "domainWidth": 1, "grid": False, "labelFont": font, "labelFontSize": 12, "labelAngle": 0, "tickColor": axisColor, "tickSize": 5, # default, including it just to show you can change it "titleFont": font, "titleFontSize": 16, "titlePadding": 10, # guessing, not specified in styleguide "title": "X Axis Title (units)", }, "axisY": { "domain": False, "grid": True, "gridColor": gridColor, "gridWidth": 1, "labelFont": font, "labelFontSize": 14, "labelAngle": 0, #"ticks": False, # even if you don't have a "domain" you need to turn these off. "titleFont": font, "titleFontSize": 16, "titlePadding": 10, # guessing, not specified in styleguide "title": "Y Axis Title (units)", # titles are by default vertical left of axis so we need to hack this #"titleAngle": 0, # horizontal #"titleY": -10, # move it up #"titleX": 18, # move it to the right so it aligns with the labels }, } } # Register the custom theme under a chosen name alt.themes.register('mds_special', mds_special) # Enable the newly registered theme alt.themes.enable('mds_special') # Creat plots sub_data = pivoted_data.query("Sex == 'Male' | Sex == 'Female'") if drug_name == 'Everything': query = sub_data else: query = sub_data.query(drug_name + ' == 1') chart = alt.Chart(query) age = chart.mark_bar(color="#3f7d4e").encode( x=alt.X("Age:Q", title="Age", bin=alt.Bin(maxbins=10), axis=alt.AxisConfig(labelAngle=-45)), y='count()').properties(title='Age distribution for ' + drug_name, width=290, height=200) gender = chart.mark_bar().encode( x=alt.X("Sex:N", title="Sex", axis=alt.AxisConfig(labelAngle=-45)), y='count()', color=alt.Color('Sex:N', scale=alt.Scale(scheme='viridis'), legend=None)).properties( title='Gender distribution for ' + drug_name, width=190, height=200) return (age | gender)
def main(input, output): X_train = pd.read_csv(input + "/" + "X_train.csv") y_train = pd.read_csv(input + "/" + "y_train.csv") X_test = pd.read_csv(input + "/" + "X_test.csv") y_test = pd.read_csv(input + "/" + "y_test.csv") scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) df = pd.DataFrame( columns=['n_features_to_select', 'Train_error', 'Test_error']) for n in range(1, 11): lr = LinearRegression() rfe = RFE(estimator=lr, n_features_to_select=n) rfe.fit(X_train, y_train) X_train_sel = X_train_scaled[:, rfe.support_] X_test_sel = X_test_scaled[:, rfe.support_] errors = fit_and_report(lr, X_train_sel, y_train, X_test_sel, y_test, mode='regression') df = df.append( { 'n_features_to_select': n, 'Train_error': errors[0], 'Test_error': errors[1] }, ignore_index=True) df = pd.melt(df, id_vars=['n_features_to_select'], value_vars=['Train_error', 'Test_error'], var_name='error_type', value_name='value') feature_plot = alt.Chart(df).mark_line().encode( x=alt.X('n_features_to_select:N', title='Number of features to select', axis=alt.AxisConfig(labelAngle=0)), y='value:Q', color=alt.Color('error_type', sort=['Train error'])).configure_scale(round=True) feature_plot.configure_header(titleFontSize=80, labelFontSize=80) feature_plot.configure().properties( title="The relationship between MSE and number of features", width=800, height=400).save(output + "/ranked_features.png") lr = LinearRegression() rfe = RFE(estimator=lr, n_features_to_select=6) rfe.fit(X_train, y_train) X_train_sel = X_train_scaled[:, rfe.support_] X_test_sel = X_test_scaled[:, rfe.support_] lr.fit(X_train_sel, y_train) errors = fit_and_report(lr, X_train_sel, y_train, X_test_sel, y_test, mode='regression') print(errors) relevant_features_bool = rfe.support_ relevant_features_list = pd.DataFrame( list(X_train.iloc[:, relevant_features_bool].columns)) relevant_features_list relevant_features_list['weights'] = lr.coef_[0] relevant_features_list = relevant_features_list.rename( columns={0: "features"}) # test assert len( relevant_features_list) == 6, 'The dimension of y_pred_df is wrong' print(relevant_features_list) feature_weight_plot = alt.Chart(relevant_features_list).mark_bar().encode( alt.Y('features:N', sort=alt.EncodingSortField(field="features", op="count", order='ascending')), alt.X('weights:Q')) feature_weight_plot.configure_header(titleFontSize=80, labelFontSize=80) feature_weight_plot.configure().properties( title="The feature weights", width=800, height=400).save(output + "/feature_weight_plot.png") y_pred_df = pd.DataFrame(lr.predict(X_test_sel)) y_pred_df = y_pred_df.rename(columns={0: "predicted"}) y_true_df = y_test.rename(columns={'quality': "actual"}) result_df = pd.concat([y_pred_df, y_true_df], axis=1) plot_result = alt.Chart(result_df).mark_boxplot().encode( alt.X('actual:O', scale=alt.Scale(zero=False), axis=alt.AxisConfig(labelAngle=0)), alt.Y('predicted', scale=alt.Scale(zero=False))) plot_result.configure_header(titleFontSize=80, labelFontSize=80) plot_result.configure(numberFormat="0.4f").properties( title="The prediction result", width=800, height=400).save(output + "/prediction_result.png") print('This is the end of analysis, Hooray!!!')