Ejemplo n.º 1
0
def salary_v_rating_scatter(df_chart, legend, separate=False):
    """
    Description:    Create an altair chart for a salary vs rating scatterplot. 
    :param df_chart:   Dataframe of job listings with both salaries and ratings that will be used to make chart object 
    :type df_chart:    pandas.DataFrame 
    :param legend:     How we want plot to be organized i.e. by industry
    :type legend:      str in cols
    :author: Jake Kim 
    """
    cols = [
        'Job_title', 'Company', 'State', 'City', 'Min_Salary', 'Max_Salary',
        'Job_Desc', 'Industry', 'Rating', 'Date_Posted', 'Valid_until',
        'Job_Type'
    ]
    assert isinstance(df_chart, pd.DataFrame)
    assert legend in cols

    avg_sal = list()
    for i in range(df_chart.shape[0]):
        assert df_chart.loc[i, 'Max_Salary'] > 0
        assert df_chart.loc[i, 'Min_Salary'] > 0
        avg_sal.append(
            (df_chart.loc[i, 'Max_Salary'] + df_chart.loc[i, 'Min_Salary']) /
            2)

    df_chart['Avg_Salary'] = avg_sal

    df_chart = Chart(df_chart)

    if separate:
        chart = df_chart.mark_point().encode(
            x=alt.X('Avg_Salary', title='Avg Salary (USD)'),
            y='Rating',
            column=legend,
            color=alt.Color(legend, scale=alt.Scale(scheme='dark2')))
    else:
        chart = df_chart.mark_point().encode(x=alt.X('Avg_Salary',
                                                     title='Avg Salary (USD)'),
                                             y='Rating',
                                             color=legend)

    chart = chart.properties(title='Salary vs. Company Ratings',
                             height=700,
                             width=900)
    chart = chart.configure_header(titleFontSize=18, labelFontSize=18)
    chart = chart.configure_axis(titleFontSize=18,
                                 labelFontSize=18,
                                 tickCount=10)
    chart = chart.configure_title(fontSize=18)
    chart = chart.configure_point(size=75)
    chart = chart.configure_legend(titleFontSize=18, labelFontSize=16)

    return chart
Ejemplo n.º 2
0
def _plot_with_data_bar(source: alt.Chart,
                        line: alt.Chart,
                        type_: ValueType,
                        width: Optional[int] = None,
                        height: Optional[int] = None) -> alt.Chart:

    width = 800 if width is None else width
    height = 400 if height is None else height

    # TODO: Add validation that the x and y series names are correct

    # Create a selection that chooses the nearest point & selects based on x-value
    nearest = alt.selection(type='single', nearest=True, on='mouseover', fields=['days'],
                            empty='none')

    # Transparent selectors across the chart. This is what tells us
    # the x-value of the cursor
    selectors = alt.Chart(source).mark_point().encode(
        x='days:Q',
        opacity=alt.value(0),
    ).add_selection(
        nearest
    )

    # Draw points on the line, and highlight based on selection
    points = line.mark_point().encode(
        opacity=alt.condition(nearest, alt.value(1), alt.value(0))
    )

    # Draw text labels near the points, and highlight based on selection
    text = line.mark_text(align='left', dx=5, dy=10).encode(
        text=alt.condition(nearest, f'{type_.value}:Q', alt.value(' '), format=".0f")
    )

    # Draw a rule at the location of the selection
    rules = alt.Chart(source).mark_rule(color='gray').encode(
        x='days:Q',
    ).transform_filter(
        nearest
    )

    # Put the five layers into a chart and bind the data
    chart = alt.layer(
        line, selectors, points, rules, text
    ).properties(
        width=width, height=height
    )

    return chart
Ejemplo n.º 3
0
def plot_capita_map(CA_data,path):
    alt.renderers.enable('default')
    countyinfo=pd.read_csv('data/CA_County_Pop_Miles.csv')
    countyinfo=pd.read_csv('data/CA_County_Pop_Miles.csv', index_col=0)
    county_count=pd.DataFrame(CA_data.groupby('County')['ID'].count())
    #print(county_count)
    countyinfo['Count']=county_count['ID']
    #print(countyinfo)
    countyinfo['County']=countyinfo.index
    countyinfo['Accidents_Per_Capita']=countyinfo['Count']/countyinfo['pop2020']
    countyinfo['Accidents_Per_Square_Mile']=countyinfo['Count']/countyinfo['Square_Miles']
    countyinfo['Pop_Density']=countyinfo['pop2020']/countyinfo['Square_Miles']
    
    newdf=countyinfo[['Pop_Density','Accidents_Per_Square_Mile']]
    corrMatrix = newdf.corr()
    sns.heatmap(corrMatrix, annot=True,annot_kws={"size":20})
    plt.show()

    countyinfo['Pop_Density']=countyinfo['pop2020']/countyinfo['Square_Miles']
    chart=Chart(countyinfo)
    chart.mark_point().encode(y=alt.X('Accidents_Per_Square_Mile',axis=alt.Axis(title='Accidents Per Square Mile')),
                          x=alt.Y('Pop_Density',axis=alt.Axis(title='Population Density'))).properties(
    title='Accidents Per Sq Mile vs Population Density').interactive()
    plt.show()
    
    newdf=countyinfo[['Pop_Density','Accidents_Per_Capita']]
    corrMatrix = newdf.corr()
    sns.heatmap(corrMatrix, annot=True, annot_kws={"size":20})
    plt.show()

    countyinfo['Pop_Density']=countyinfo['pop2020']/countyinfo['Square_Miles']
    chart=Chart(countyinfo)
    chart.mark_point().encode(y=alt.X('Accidents_Per_Capita',axis=alt.Axis(title='Accidents_Per_Capita')),
                          x=alt.Y('Pop_Density',scale=alt.Scale(type='log'),axis=alt.Axis(title='Population Density (log(x))'))).properties(
    title='Accidents Per Capita vs Population Density').interactive()
    plt.show()
Ejemplo n.º 4
0
def salary_v_listings_scatter(df_chart, legend, separate=False):
    """
    Description:    Create an altair chart for a salary vs job listings scatterplot
        
    :param df_chart:   Dataframe of job listings with both salaries and listings that will be used to make chart object
    :type df_chart:    pandas.DataFrame
    :param legend:     How we want plot to be organized i.e. by industry
    :type legend:      str in cols
    :author: Jake Kim 
    """
    cols = [
        'Job_title', 'Company', 'State', 'City', 'Min_Salary', 'Max_Salary',
        'Job_Desc', 'Industry', 'Rating', 'Date_Posted', 'Valid_until',
        'Job_Type'
    ]
    assert legend in cols
    assert isinstance(df_chart, pd.DataFrame)

    listings = list(df_chart[legend])
    listing_set = set(listings)
    list_dict = dict.fromkeys(listing_set, 0)

    for i in range(len(listings)):
        list_dict[listings[i]] += 1

    entries = list()
    for i in range(df_chart.shape[0]):
        entries.append(list_dict[df_chart.loc[i, legend]])

    df_chart['Listings'] = entries

    avg_sal = list()
    for i in range(df_chart.shape[0]):
        assert df_chart.loc[i, 'Max_Salary'] > 0
        assert df_chart.loc[i, 'Min_Salary'] > 0
        avg_sal.append(
            (df_chart.loc[i, 'Max_Salary'] + df_chart.loc[i, 'Min_Salary']) /
            2)

    df_chart['Avg_Salary'] = avg_sal

    df_chart = Chart(df_chart)

    if separate:
        chart = df_chart.mark_point().encode(x='Listings',
                                             y=alt.Y('Avg_Salary',
                                                     title='Avg Salary (USD)'),
                                             color=legend,
                                             column=legend)
    else:
        chart = df_chart.mark_point().encode(x='Listings',
                                             y=alt.Y('Avg_Salary',
                                                     title='Avg Salary (USD)'),
                                             color=legend)

    chart = chart.properties(title='Salary vs. # of Job Lstings',
                             height=700,
                             width=900)
    chart = chart.configure_header(titleFontSize=14, labelFontSize=14)
    chart = chart.configure_axis(titleFontSize=14,
                                 labelFontSize=14,
                                 tickCount=10)
    chart = chart.configure_title(fontSize=14)
    chart = chart.configure_legend(titleFontSize=14, labelFontSize=12)
    chart = chart.configure_point(size=75)

    return chart
Ejemplo n.º 5
0
    prices_predict = compute_all_y(array, coefficients)
    MSE2 = compute_mse(prices_actual, prices_predict)
    if MSE2 > MSE1:
        coefficients[1] = coefficients[1] - 0.02
        prices_predict = compute_all_y(array, coefficients)
        MSE3 = compute_mse(prices_actual, prices_predict)
        if MSE3 < MSE1:
            MSE1 = MSE3
    else:
        MSE1 = MSE2
    coefficients[2] = coefficients[2] + 0.01
    prices_predict = compute_all_y(array, coefficients)
    MSE2 = compute_mse(prices_actual, prices_predict)
    if MSE2 > MSE1:
        coefficients[2] = coefficients[2] - 0.02
        prices_predict = compute_all_y(array, coefficients)
        MSE3 = compute_mse(prices_actual, prices_predict)
        if MSE3 < MSE1:
            MSE1 = MSE3
    else:
        MSE1 = MSE2
    MSE_list.append(MSE1)
    count += 1
    attempt.append(count)

data = Data(attempt=attempt, MSE_list=MSE_list)
chart = Chart(data)
mark = chart.mark_point()
enc = mark.encode(x='attempt:Q', y='MSE_list:Q', )
enc.display()