def salary_v_rating_scatter(df_chart, legend, separate=False): """ Description: Create an altair chart for a salary vs rating scatterplot. :param df_chart: Dataframe of job listings with both salaries and ratings that will be used to make chart object :type df_chart: pandas.DataFrame :param legend: How we want plot to be organized i.e. by industry :type legend: str in cols :author: Jake Kim """ cols = [ 'Job_title', 'Company', 'State', 'City', 'Min_Salary', 'Max_Salary', 'Job_Desc', 'Industry', 'Rating', 'Date_Posted', 'Valid_until', 'Job_Type' ] assert isinstance(df_chart, pd.DataFrame) assert legend in cols avg_sal = list() for i in range(df_chart.shape[0]): assert df_chart.loc[i, 'Max_Salary'] > 0 assert df_chart.loc[i, 'Min_Salary'] > 0 avg_sal.append( (df_chart.loc[i, 'Max_Salary'] + df_chart.loc[i, 'Min_Salary']) / 2) df_chart['Avg_Salary'] = avg_sal df_chart = Chart(df_chart) if separate: chart = df_chart.mark_point().encode( x=alt.X('Avg_Salary', title='Avg Salary (USD)'), y='Rating', column=legend, color=alt.Color(legend, scale=alt.Scale(scheme='dark2'))) else: chart = df_chart.mark_point().encode(x=alt.X('Avg_Salary', title='Avg Salary (USD)'), y='Rating', color=legend) chart = chart.properties(title='Salary vs. Company Ratings', height=700, width=900) chart = chart.configure_header(titleFontSize=18, labelFontSize=18) chart = chart.configure_axis(titleFontSize=18, labelFontSize=18, tickCount=10) chart = chart.configure_title(fontSize=18) chart = chart.configure_point(size=75) chart = chart.configure_legend(titleFontSize=18, labelFontSize=16) return chart
def _plot_with_data_bar(source: alt.Chart, line: alt.Chart, type_: ValueType, width: Optional[int] = None, height: Optional[int] = None) -> alt.Chart: width = 800 if width is None else width height = 400 if height is None else height # TODO: Add validation that the x and y series names are correct # Create a selection that chooses the nearest point & selects based on x-value nearest = alt.selection(type='single', nearest=True, on='mouseover', fields=['days'], empty='none') # Transparent selectors across the chart. This is what tells us # the x-value of the cursor selectors = alt.Chart(source).mark_point().encode( x='days:Q', opacity=alt.value(0), ).add_selection( nearest ) # Draw points on the line, and highlight based on selection points = line.mark_point().encode( opacity=alt.condition(nearest, alt.value(1), alt.value(0)) ) # Draw text labels near the points, and highlight based on selection text = line.mark_text(align='left', dx=5, dy=10).encode( text=alt.condition(nearest, f'{type_.value}:Q', alt.value(' '), format=".0f") ) # Draw a rule at the location of the selection rules = alt.Chart(source).mark_rule(color='gray').encode( x='days:Q', ).transform_filter( nearest ) # Put the five layers into a chart and bind the data chart = alt.layer( line, selectors, points, rules, text ).properties( width=width, height=height ) return chart
def plot_capita_map(CA_data,path): alt.renderers.enable('default') countyinfo=pd.read_csv('data/CA_County_Pop_Miles.csv') countyinfo=pd.read_csv('data/CA_County_Pop_Miles.csv', index_col=0) county_count=pd.DataFrame(CA_data.groupby('County')['ID'].count()) #print(county_count) countyinfo['Count']=county_count['ID'] #print(countyinfo) countyinfo['County']=countyinfo.index countyinfo['Accidents_Per_Capita']=countyinfo['Count']/countyinfo['pop2020'] countyinfo['Accidents_Per_Square_Mile']=countyinfo['Count']/countyinfo['Square_Miles'] countyinfo['Pop_Density']=countyinfo['pop2020']/countyinfo['Square_Miles'] newdf=countyinfo[['Pop_Density','Accidents_Per_Square_Mile']] corrMatrix = newdf.corr() sns.heatmap(corrMatrix, annot=True,annot_kws={"size":20}) plt.show() countyinfo['Pop_Density']=countyinfo['pop2020']/countyinfo['Square_Miles'] chart=Chart(countyinfo) chart.mark_point().encode(y=alt.X('Accidents_Per_Square_Mile',axis=alt.Axis(title='Accidents Per Square Mile')), x=alt.Y('Pop_Density',axis=alt.Axis(title='Population Density'))).properties( title='Accidents Per Sq Mile vs Population Density').interactive() plt.show() newdf=countyinfo[['Pop_Density','Accidents_Per_Capita']] corrMatrix = newdf.corr() sns.heatmap(corrMatrix, annot=True, annot_kws={"size":20}) plt.show() countyinfo['Pop_Density']=countyinfo['pop2020']/countyinfo['Square_Miles'] chart=Chart(countyinfo) chart.mark_point().encode(y=alt.X('Accidents_Per_Capita',axis=alt.Axis(title='Accidents_Per_Capita')), x=alt.Y('Pop_Density',scale=alt.Scale(type='log'),axis=alt.Axis(title='Population Density (log(x))'))).properties( title='Accidents Per Capita vs Population Density').interactive() plt.show()
def salary_v_listings_scatter(df_chart, legend, separate=False): """ Description: Create an altair chart for a salary vs job listings scatterplot :param df_chart: Dataframe of job listings with both salaries and listings that will be used to make chart object :type df_chart: pandas.DataFrame :param legend: How we want plot to be organized i.e. by industry :type legend: str in cols :author: Jake Kim """ cols = [ 'Job_title', 'Company', 'State', 'City', 'Min_Salary', 'Max_Salary', 'Job_Desc', 'Industry', 'Rating', 'Date_Posted', 'Valid_until', 'Job_Type' ] assert legend in cols assert isinstance(df_chart, pd.DataFrame) listings = list(df_chart[legend]) listing_set = set(listings) list_dict = dict.fromkeys(listing_set, 0) for i in range(len(listings)): list_dict[listings[i]] += 1 entries = list() for i in range(df_chart.shape[0]): entries.append(list_dict[df_chart.loc[i, legend]]) df_chart['Listings'] = entries avg_sal = list() for i in range(df_chart.shape[0]): assert df_chart.loc[i, 'Max_Salary'] > 0 assert df_chart.loc[i, 'Min_Salary'] > 0 avg_sal.append( (df_chart.loc[i, 'Max_Salary'] + df_chart.loc[i, 'Min_Salary']) / 2) df_chart['Avg_Salary'] = avg_sal df_chart = Chart(df_chart) if separate: chart = df_chart.mark_point().encode(x='Listings', y=alt.Y('Avg_Salary', title='Avg Salary (USD)'), color=legend, column=legend) else: chart = df_chart.mark_point().encode(x='Listings', y=alt.Y('Avg_Salary', title='Avg Salary (USD)'), color=legend) chart = chart.properties(title='Salary vs. # of Job Lstings', height=700, width=900) chart = chart.configure_header(titleFontSize=14, labelFontSize=14) chart = chart.configure_axis(titleFontSize=14, labelFontSize=14, tickCount=10) chart = chart.configure_title(fontSize=14) chart = chart.configure_legend(titleFontSize=14, labelFontSize=12) chart = chart.configure_point(size=75) return chart
prices_predict = compute_all_y(array, coefficients) MSE2 = compute_mse(prices_actual, prices_predict) if MSE2 > MSE1: coefficients[1] = coefficients[1] - 0.02 prices_predict = compute_all_y(array, coefficients) MSE3 = compute_mse(prices_actual, prices_predict) if MSE3 < MSE1: MSE1 = MSE3 else: MSE1 = MSE2 coefficients[2] = coefficients[2] + 0.01 prices_predict = compute_all_y(array, coefficients) MSE2 = compute_mse(prices_actual, prices_predict) if MSE2 > MSE1: coefficients[2] = coefficients[2] - 0.02 prices_predict = compute_all_y(array, coefficients) MSE3 = compute_mse(prices_actual, prices_predict) if MSE3 < MSE1: MSE1 = MSE3 else: MSE1 = MSE2 MSE_list.append(MSE1) count += 1 attempt.append(count) data = Data(attempt=attempt, MSE_list=MSE_list) chart = Chart(data) mark = chart.mark_point() enc = mark.encode(x='attempt:Q', y='MSE_list:Q', ) enc.display()