Ejemplo n.º 1
0
def emp_dist(values):
    """
    Takes an array of values and returns an empirical distribution

    Parameters
    ----------
    values : array
        Array of values that will be grouped by the distribution

    Returns
    -------
    Table
        A distribution

    Examples
    --------
    >>> x = make_array(1, 1, 1, 1, 1, 2, 3, 3, 3, 4)
    >>> emp_dist(x)
    Value | Proportion
    1     | 0.5
    2     | 0.1
    3     | 0.3
    4     | 0.1
    """

    total = len(values)

    position_counts = Table().with_column('position', values).group(0)
    new_dist = Table().values(position_counts.column(0))
    return new_dist.with_column(
        'Proportion',
        position_counts.column(1) / total
    )
Ejemplo n.º 2
0
def sanitize_dataframe(df: Table):
    """Sanitize a DataFrame to prepare it for serialization.
    
    copied from the ipyvega project
    * Make a copy
    * Convert categoricals to strings.
    * Convert np.bool_ dtypes to Python bool objects
    * Convert np.int dtypes to Python int objects
    * Convert floats to objects and replace NaNs/infs with None.
    * Convert DateTime dtypes into appropriate string representations
    """
    import numpy as np

    if df is None:
        return None
        # raise InternalLogicalError("Cannot sanitize empty df")

    df = df.copy()

    def to_list_if_array(val):
        if isinstance(val, np.ndarray):
            return val.tolist()
        else:
            return val

    for col_name in df.labels:
        dtype = df.column(col_name).dtype
        if str(dtype) == 'category':
            # XXXX: work around bug in to_json for categorical types
            # https://github.com/pydata/pandas/issues/10778
            df[col_name] = df[col_name].astype(str)
        elif str(dtype) == 'bool':
            # convert numpy bools to objects; np.bool is not JSON serializable
            df[col_name] = df[col_name].astype(object)
        elif np.issubdtype(dtype, np.integer):
            # convert integers to objects; np.int is not JSON serializable
            df[col_name] = df[col_name].astype(object)
        elif np.issubdtype(dtype, np.floating):
            # For floats, convert to Python float: np.float is not JSON serializable
            # Also convert NaN/inf values to null, as they are not JSON serializable
            col = df[col_name]
            bad_values = np.isnan(col) | np.isinf(col)
            df[col_name] = np.where(bad_values, None, col).astype(object)
            # col.astype(object)[~bad_values]= None
        elif str(dtype).startswith('datetime'):
            # Convert datetimes to strings
            # astype(str) will choose the appropriate resolution
            new_column = df[col_name].astype(str)
            new_column[new_column == 'NaT'] = ''
            df[col_name] = new_column
        elif dtype == object:
            # Convert numpy arrays saved as objects to lists
            # Arrays are not JSON serializable
            col = np.vectorize(to_list_if_array)(df[col_name])
            df[col_name] = np.where(notnull(col), col, None).astype(object)
    return df
Ejemplo n.º 3
0
from newsapi import NewsApiClient
from flask import Flask, url_for, render_template, request, jsonify, session
import weight, os, menuScraping, graph, weightranking
from datetime import datetime
from datascience import Table

import matplotlib.pyplot as plt
import io
import base64
from graph import build_graph

bias = Table().read_table("bias.csv").select("News Source", "Horizontal Rank")
news_sources = bias.column("News Source")
news_rankings = bias.column("Horizontal Rank")
news_dict = {}
for x in range(0, (len(news_sources))):
  news_dict[news_sources[x]] = news_rankings[x]


app = Flask(__name__)
newsapi = NewsApiClient(api_key='672b5745f9aa4ecbbc044a0025fc28d3')

sources = "cnn, the-new-york-times, bbc-news, the-guardian-uk, associated-press, usa-today, the-economist, the-hill, fortune"
sourcesarray = sources.split(", ")

def get_news_by_category(category):
  top_news = []
  if category == 'economy':
    top_news.append(newsapi.get_top_headlines(q='econ', sources=sources))
    top_news.append(newsapi.get_top_headlines(q='money', sources=sources))
    top_news.append(newsapi.get_top_headlines(q='monetary', sources=sources))
Ejemplo n.º 4
0
def slope(tbl, col_x, col_y):
    r = find_r(tbl, col_x, col_y)
    return r * np.std(tbl.column(col_y)) / np.std(tbl.column(col_x))


def intercept(tbl, col_x, col_y):
    return np.mean(tbl.column(col_y)) - slope(tbl, col_x, col_y) * np.mean(
        tbl.column(col_x))


# Visualizing the Comparison of Hispanic Percentages to African American Wages in 2000 and 2017

#scatterplot2000
line_2000 = (
    slope(tbl_2000, "Hispanic Percent 2000", "African American Wages 2000") *
    tbl_2000.column("Hispanic Percent 2000")) + intercept(
        tbl_2000, "Hispanic Percent 2000", "African American Wages 2000")
tbl_2000.scatter("Hispanic Percent 2000",
                 "African American Wages 2000",
                 fit_line=True)

#scatterplot2017
line_2017 = (slope(tbl_2017, 0, 1) * tbl_2017.column(0)) + intercept(
    tbl_2017, 0, 1)
tbl_2017.scatter("Hispanic Percent 2017",
                 "African American Wages 2017",
                 fit_line=True)

# Creating Bootstraps By Resampling

#bootstrap2000