Ejemplo n.º 1
0
 def __init__(self, mag_affilations, max_worker=2):
     self.aff = mag_affilations
     self.gc = geonamescache.GeonamesCache()
     cities = self.gc.get_cities()
     countries = self.gc.get_countries()
     self.cities = {v['name'] for k, v in cities.items()}
     self.countries = {v['name'] for k, v in countries.items()}
     self._max_workers = max_worker
     self.mapper = country(from_key='iso', to_key='name')
Ejemplo n.º 2
0
 def __init__(self, data):
     self.id = data.key.lower()
     mapper = country(from_key='iso', to_key='name')
     try:
         self.title = mapper(data.key)
     except AttributeError:
         # if we have a country code with no name mapping, skip it to prevent 500
         pass
     self.count = data.doc_count
Ejemplo n.º 3
0
def serve_layout():
    # Links to time series datasets on github:
    url_confirmed = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
    url_deaths = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
    # Create dataframes from datasets:
    df_confirmed = pd.read_csv(url_confirmed)
    df_deaths = pd.read_csv(url_deaths)
    # Replace null values with zeroes:
    df_confirmed[df_confirmed.columns[4:]] = df_confirmed[
        df_confirmed.columns[4:]].fillna(0, downcast='infer')
    df_deaths[df_deaths.columns[4:]] = df_deaths[df_deaths.columns[4:]].fillna(
        0, downcast='infer')

    # Try today's date. If not yet updated use yesterday's date for daily reports:
    try:
        date = datetime.now().strftime('%m-%d-%Y')
        url_daily_reports = f'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/{date}.csv'
        df_daily_reports = pd.read_csv(url_daily_reports,
                                       dtype={'FIPS': object})
        df_daily_reports['FIPS'] = df_daily_reports['FIPS'].str.zfill(5)
    except:
        date = (datetime.now() - timedelta(days=1)).strftime('%m-%d-%Y')
        url_daily_reports = f'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/{date}.csv'
        df_daily_reports = pd.read_csv(url_daily_reports,
                                       dtype={'FIPS': object})
        df_daily_reports['FIPS'] = df_daily_reports['FIPS'].str.zfill(5)

    # Subsets of confirmed cases:
    df_china = df_confirmed[df_confirmed['Country/Region'] == 'China']
    df_other = df_confirmed[df_confirmed['Country/Region'] != 'China']

    # Add ISO3 codes to daily updating df
    mapper = country(from_key='name', to_key='iso3')

    country_index = {}
    country_index['West Bank and Gaza'] = 'PSE'
    country_index['Taiwan*'] = 'TWN'
    country_index['Timor-Leste'] = 'TLS'
    country_index['Holy See'] = 'VAT'
    country_index['Republic of the Congo'] = 'COG'
    country_index['Congo (Brazzaville)'] = 'COG'
    country_index['Congo (Kinshasa)'] = 'COD'

    df_confirmed['ISO3'] = df_confirmed['Country/Region'].apply(
        lambda x: country_index.get(x, mapper(x)))

    # Reformat for global choropleth:
    df_global = df_confirmed.groupby(['ISO3',
                                      'Country/Region']).sum().reset_index()
    # Convert date columns to rows:
    df_global = pd.melt(df_global,
                        id_vars=['ISO3', 'Country/Region', 'Lat', 'Long'],
                        value_vars=list(
                            df_global.select_dtypes(include='int64')),
                        var_name='Date',
                        value_name='Confirmed Cases')

    # Setup df containing states with most cases:
    df_us = df_daily_reports[df_daily_reports['Country_Region'] == 'US']
    leading_states = df_us.groupby('Province_State')['Confirmed'].sum(
    ).sort_values(ascending=False)[0:10].index
    df_us_leading_states = df_us[df_us['Province_State'].isin(
        leading_states)].groupby('Province_State').sum().sort_values(
            by=['Confirmed'], ascending=False).reset_index()
    df_us_leading_states[
        'Active'] = df_us_leading_states['Confirmed'] - df_us_leading_states[
            'Recovered'] - df_us_leading_states['Deaths']

    # Setup df containing states with most deaths:
    leading_states_deaths = df_us.groupby('Province_State')['Deaths'].sum(
    ).sort_values(ascending=False)[0:10].index
    df_us_leading_states_deaths = df_us[df_us['Province_State'].isin(
        leading_states_deaths)].groupby('Province_State').sum().sort_values(
            by=['Deaths'], ascending=False).reset_index()

    # Setup df containing countries with most cases:
    leading_countries = df_daily_reports.groupby(
        'Country_Region')['Confirmed'].sum().sort_values(
            ascending=False)[0:10].index
    df_leading_countries = df_daily_reports[
        df_daily_reports['Country_Region'].isin(leading_countries)].groupby(
            'Country_Region').sum().sort_values(by=['Confirmed'],
                                                ascending=False).reset_index()
    df_leading_countries[
        'Active'] = df_leading_countries['Confirmed'] - df_leading_countries[
            'Recovered'] - df_leading_countries['Deaths']

    # Setup df containing countries with most deaths:
    leading_countries_deaths = df_daily_reports.groupby(
        'Country_Region')['Deaths'].sum().sort_values(
            ascending=False)[0:10].index
    df_leading_countries_deaths = df_daily_reports[df_daily_reports[
        'Country_Region'].isin(leading_countries_deaths)].groupby(
            'Country_Region').sum().sort_values(by=['Deaths'],
                                                ascending=False).reset_index()

    # df for US choropleth:
    df_us_choro = df_us.groupby('Province_State').sum().reset_index()

    # Add dict for state abbreviations for US choropleth:
    us_state_abbrev = {
        'Alabama': 'AL',
        'Alaska': 'AK',
        'Arizona': 'AZ',
        'Arkansas': 'AR',
        'California': 'CA',
        'Colorado': 'CO',
        'Connecticut': 'CT',
        'Delaware': 'DE',
        'District of Columbia': 'DC',
        'Florida': 'FL',
        'Georgia': 'GA',
        'Hawaii': 'HI',
        'Idaho': 'ID',
        'Illinois': 'IL',
        'Indiana': 'IN',
        'Iowa': 'IA',
        'Kansas': 'KS',
        'Kentucky': 'KY',
        'Louisiana': 'LA',
        'Maine': 'ME',
        'Maryland': 'MD',
        'Massachusetts': 'MA',
        'Michigan': 'MI',
        'Minnesota': 'MN',
        'Mississippi': 'MS',
        'Missouri': 'MO',
        'Montana': 'MT',
        'Nebraska': 'NE',
        'Nevada': 'NV',
        'New Hampshire': 'NH',
        'New Jersey': 'NJ',
        'New Mexico': 'NM',
        'New York': 'NY',
        'North Carolina': 'NC',
        'North Dakota': 'ND',
        'Northern Mariana Islands': 'MP',
        'Ohio': 'OH',
        'Oklahoma': 'OK',
        'Oregon': 'OR',
        'Palau': 'PW',
        'Pennsylvania': 'PA',
        'Puerto Rico': 'PR',
        'Rhode Island': 'RI',
        'South Carolina': 'SC',
        'South Dakota': 'SD',
        'Tennessee': 'TN',
        'Texas': 'TX',
        'Utah': 'UT',
        'Vermont': 'VT',
        'Virgin Islands': 'VI',
        'Virginia': 'VA',
        'Washington': 'WA',
        'West Virginia': 'WV',
        'Wisconsin': 'WI',
        'Wyoming': 'WY',
    }

    df_us_choro['Abbrev'] = df_us_choro['Province_State'].map(
        us_state_abbrev).fillna(df_us_choro['Province_State'])
    df_us_choro = df_us_choro[df_us_choro['Abbrev'].apply(
        lambda x: len(x) < 3)]

    # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
    # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~

    ## TIME SERIES

    fig_time = go.Figure()
    # Confirmed cases in mainland China
    fig_time.add_trace(
        go.Scatter(
            x=[i[:-3] for i in list(df_other.select_dtypes(include='int64'))],
            y=list(df_china.select_dtypes(include='int64').sum()),
            name='China',
            line_color='#7f7f7f'))
    # Confirmed cases for the rest of the world
    fig_time.add_trace(
        go.Scatter(
            x=[i[:-3] for i in list(df_other.select_dtypes(include='int64'))],
            y=list(df_other.select_dtypes(include='int64').sum()),
            name='Rest of World',
            line_color='#ff7f0e'))
    # Worldwide deaths
    fig_time.add_trace(
        go.Scatter(
            x=[i[:-3] for i in list(df_other.select_dtypes(include='int64'))],
            y=list(df_deaths.select_dtypes(include='int64').sum()),
            name='Worldwide Deaths',
            line_color='#d62728'))

    for trace in fig_time.data:
        trace.hovertemplate = '%{x}<br>%{y}'

    fig_time.update_yaxes(hoverformat=',f')
    fig_time.update_layout(
        title_text='Coronavirus over Time',
        legend={
            'x': 0.02,
            'y': 0.55
        },
        legend_bgcolor='rgba(0,0,0,0.1)',
        height=350,
        margin={
            'r': 10,
            't': 50,
            'l': 10,
            'b': 70
        },
        annotations=[
            dict(xshift=10,
                 yshift=-10,
                 x=0,
                 y=1.0,
                 showarrow=False,
                 text='Total Cases: ' +
                 f'{sum(df_daily_reports["Confirmed"]):,}' +
                 '<br>Total Deaths: ' + f'{sum(df_daily_reports["Deaths"]):,}',
                 xref='paper',
                 yref='paper',
                 font=dict(size=16, color='#ffffff'),
                 align='left',
                 bordercolor='rgba(0,0,0,0.1)',
                 borderwidth=2,
                 borderpad=4,
                 bgcolor='#ff7f0e')
        ])

    ## GLOBAL CHOROPLETH

    fig_global = px.choropleth(
        df_global,
        locations='ISO3',
        color='Confirmed Cases',
        hover_name='Country/Region',
        hover_data=['Date'],
        projection='natural earth',
        animation_frame='Date',
        range_color=(0, df_global['Confirmed Cases'].max()),
        color_continuous_scale=[
            [0, 'rgb(250, 250, 250)'],  #0
            [1 / 10000, 'rgb(250, 175, 100)'],  #10
            [1 / 1000, 'rgb(250, 125, 0)'],  #100
            [1 / 100, 'rgb(200, 100, 0)'],  #1000
            [1 / 10, 'rgb(250, 50, 50)'],  #10000
            [1, 'rgb(100, 0, 0)'],  #100000
        ])

    # Must loop though traces AND frames to format hovertemplate
    for trace in fig_global.data:
        trace.hovertemplate = '<b>%{hovertext}</b> (%{customdata[0]})<br>%{z:,f}'
    for frame in fig_global.frames:
        frame.data[
            0].hovertemplate = '<b>%{hovertext}</b> (%{customdata[0]})<br>%{z:,f}'
    # Animation speed and slider/button locations
    fig_global.layout.updatemenus[0].buttons[0].args[1]['frame'][
        'duration'] = 50
    fig_global.layout.updatemenus[0].pad = {'l': 10, 't': 0}
    fig_global.layout.sliders[0].pad = {'b': 10, 't': -20, 'l': 10}
    fig_global.layout.sliders[0].currentvalue = {'prefix': 'Date = '}
    fig_global.layout.coloraxis.colorbar.title.text = 'Confirmed<br>Cases'

    fig_global.update_layout(
        title='Global Time Series',
        margin={
            'r': 0,
            't': 50,
            'l': 0,
            'b': 10
        },
    )

    ## US CHOROPLETH

    fig_us = px.choropleth(
        df_daily_reports,
        geojson=counties,
        locations='FIPS',
        scope='usa',
        color='Confirmed',
        hover_name='Admin2',
        hover_data=['Province_State'],
        range_color=(0, df_daily_reports[df_daily_reports['Country_Region'] ==
                                         'US']['Confirmed'].max()),
        color_continuous_scale=[
            [0, 'rgb(250, 250, 250)'],  #0
            [1 / 10000, 'rgb(250, 175, 100)'],  #10
            [1 / 1000, 'rgb(250, 125, 0)'],  #100
            [1 / 100, 'rgb(200, 100, 0)'],  #1000
            [1 / 10, 'rgb(250, 50, 50)'],  #10000
            [1, 'rgb(100, 0, 0)'],  #100000
        ])

    for trace in fig_us.data:
        trace.hovertemplate = '<b>%{hovertext}</b> (%{customdata[0]})<br>%{z:,f}'

    fig_us.layout.coloraxis.colorbar.title.text = 'Confirmed<br>Cases'

    fig_us.update_traces(marker_line_width=0.1)

    fig_us.update_layout(
        title=f'US Counties ({date})',
        margin={
            'r': 0,
            't': 50,
            'l': 0,
            'b': 30
        },
    )

    ## MOST AFFECTED

    trace_glob_c = go.Bar(x=df_leading_countries['Country_Region'],
                          y=df_leading_countries['Confirmed'],
                          marker={'color': 'rgb(250, 175, 100)'},
                          visible=True)
    trace_glob_d = go.Bar(x=df_leading_countries_deaths['Country_Region'],
                          y=df_leading_countries_deaths['Deaths'],
                          marker={'color': 'rgb(250, 50, 50)'},
                          visible=False)
    trace_us_c = go.Bar(x=df_us_leading_states['Province_State'],
                        y=df_us_leading_states['Confirmed'],
                        marker={'color': 'rgb(250, 175, 100)'},
                        visible=True)
    trace_us_d = go.Bar(x=df_us_leading_states_deaths['Province_State'],
                        y=df_us_leading_states_deaths['Deaths'],
                        marker={'color': 'rgb(250, 50, 50)'},
                        visible=False)

    fig_most_affected = make_subplots(rows=1, cols=2)

    fig_most_affected.append_trace(trace_glob_c, 1, 1)
    fig_most_affected.append_trace(trace_glob_d, 1, 1)
    fig_most_affected.append_trace(trace_us_c, 1, 2)
    fig_most_affected.append_trace(trace_us_d, 1, 2)

    for trace in fig_most_affected.data:
        trace.name = ''
        trace.hovertemplate = '%{x}<br>%{y}'
    fig_most_affected.update_yaxes(hoverformat=',f')

    fig_most_affected.update_layout(
        title=f'Leading Countries and US States ({date})',
        showlegend=False,
        height=350,
        margin={
            'r': 10,
            't': 50,
            'l': 40,
            'b': 10
        },
        updatemenus=[
            dict(pad={
                'r': 10,
                't': 10
            },
                 x=1.0,
                 y=1.0,
                 active=0,
                 buttons=list([
                     dict(label='Confirmed',
                          method='update',
                          args=[{
                              'visible': [True, False, True, False]
                          }]),
                     dict(label='Deaths',
                          method='update',
                          args=[{
                              'visible': [False, True, False, True]
                          }]),
                 ]))
        ])

    # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
    # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~

    return html.Div(children=[
        html.Div(
            [
                html.H3('Coronavirus Dashboard'),
                html.Div([
                    html.P(f'Updated: {date}', style={'font-style': 'italic'}),
                ],
                         style={'display': 'inline-block'}),
                html.Div(
                    [
                        dcc.Markdown(
                            '''Source: [Johns Hopkins University CSSE](https://github.com/CSSEGISandData/COVID-19)''',
                            style={'font-style': 'italic'})
                    ],
                    style={
                        'display': 'inline-block',
                        'float': 'right',
                        'color': '#ff7f0e'
                    }),
            ],
            style={
                'color': 'white',
                'paddingLeft': '10px',
                'background':
                'linear-gradient(to right, #ff7f0e 25%, 50%, white)'
            }),
        html.Div(children=[
            html.Div([dcc.Graph(figure=fig_time, )],
                     style={'margin': '0'},
                     className='five columns'),
            html.Div([dcc.Graph(figure=fig_most_affected, )],
                     style={'margin': '0'},
                     className='seven columns'),
        ],
                 className='twelve columns'),
        html.Div(children=[
            html.Div([dcc.Graph(figure=fig_us, )],
                     style={'margin': '0'},
                     className='six columns'),
            html.Div([dcc.Graph(figure=fig_global, )],
                     style={'margin': '0'},
                     className='six columns')
        ],
                 className='twelve columns'),
        html.Div([
            html.Div([
                dcc.Markdown(
                    '''If you find this dashboard helpful, please share it and consider donating to a charity on the frontlines 
                of COVID-19, such as [Doctors Without Borders](https://donate.doctorswithoutborders.org/onetime.cfm).  \nCreated 
                and maintained by [John Larson](https://www.linkedin.com/in/johnlarson2016/).'''
                ),
            ],
                     style={
                         'paddingLeft': '10px',
                         'paddingTop': '20px'
                     }),
        ],
                 className='twelve columns')
    ])
from geonamescache.mappers import country
import csv

mapper = country(from_key='name', to_key='continentcode')

with open("meteor-fell-before.csv",
          "rb") as csvdata, open("meteor-fell.csv", "wb") as out:
    input = csv.reader(csvdata, delimiter=',')
    out = csv.writer(out, delimiter=',')

    header = input.next()
    header.append('Continent')
    out.writerow(header)
    index = header.index("Country")

    for row in input:
        row.append(mapper(row[index]))
        out.writerow(row)
Ejemplo n.º 5
0
Introduction to Web Science
Assignment 5
Question 3
Team : golf

Script used to extract data from the article-per-line file and process it
to finaly write it in a csv file
"""
import pandas as pd
from geonamescache import GeonamesCache
from geonamescache.mappers import country

gc = GeonamesCache()  # we use the GeonamesCache to get the name of countries

# creating a mapper between the iso3 code and the country name
mapper = country(from_key='name', to_key='iso3')
countries = list(gc.get_dataset_by_key(
    gc.get_countries(),
    'name',
).keys())
# for the US we are going to use the states
states = list(gc.get_us_states_by_names())
#print(countries)
# any of these key words could indicate that we are reading about a star
key_words = ['movie', 'film', 'TV', 'television', 'actor', 'actress']
articles = []
dataset = {}

with open('article-per-line.txt', 'r', encoding="utf8") as f:
    articles = f.read().splitlines()
Ejemplo n.º 6
0
 def test_country_name_iso3_mapper(self):
     mapper = country(from_key='name', to_key='iso3')
     self.assertEquals(mapper('Burma'), 'MMR')
     self.assertEquals(mapper('South Korea'), 'KOR')
     self.assertEquals(mapper('The Netherlands'), 'NLD')
     self.assertEquals(mapper('USA'), 'USA')
Ejemplo n.º 7
0
 def convert_iso3_to_iso2(cls, iso3):
     mapper = country(from_key='iso3', to_key='iso')
     return mapper(iso3)
Ejemplo n.º 8
0
def parse():


    #combining locations into an array
    with open('compiled_data_ourworld.json') as f:
        our_world_data = json.load(f)

    countries_dict = our_world_data['location']
    tests_dict = our_world_data['tests']
    date_dict = our_world_data['dt']

    #zeros in on values in dictionary
    countries_list = list(countries_dict.values())
    tests_list = list(tests_dict.values())
    date_list = list(date_dict.values())

    #convert tests list into integers: N/A is converted to -1
    i=0
    while(i < len(tests_list)):
        tests_list[i] = tests_list[i].replace(',','')
        if(tests_list[i].isdigit()):
            tests_list[i] = int(tests_list[i])
        else:
            tests_list[i] = -1
        i+=1

    #create ISO_3_list
    ISO_3_list = coco.convert(names = countries_list, to = 'ISO3')

    #create ISO_2_list
    ISO_2_list = coco.convert(names = ISO_3_list, to = "ISO2")
    for z in range(len(ISO_2_list)):
        ISO_2_list[z] = ISO_2_list[z].lower()

    #mapper converts iso name to population
    mapper = country(from_key='iso3', to_key='population')
    population_list = []

    #remove local data points or countries with no aggregate figures (-1)
    index = 0
    length = len(countries_list)
    while(index < length):
        if ("–" in countries_list[index]):
            countries_list.remove(countries_list[index])
            ISO_3_list.remove(ISO_3_list[index])
            ISO_2_list.remove(ISO_2_list[index])
            tests_list.remove(tests_list[index])
            date_list.remove(date_list[index])
            length = length - 1
            continue
        index += 1

    index = 0
    length = len(tests_list)
    while(index < length):
        if (tests_list[index] == -1):
            countries_list.remove(countries_list[index])
            ISO_3_list.remove(ISO_3_list[index])
            ISO_2_list.remove(ISO_2_list[index])
            tests_list.remove(tests_list[index])
            date_list.remove(date_list[index])
            length = length - 1
            continue
        index += 1

    #create population list
    j=0
    while(j < len(ISO_3_list)):
        population_list.append(mapper(ISO_3_list[j]))
        j+=1


    #fix duplicates - add tests in same country together
    #
    # l=0
    # m=1
    # length = len(ISO_3_list)
    # while (m < length):
    #     if(ISO_3_list[l] == ISO_3_list[m]):
    #         total = tests_list[l] + tests_list[m]
    #         tests_list[l] = total
    #         tests_list[m] = total
    #         length -= 1
    #     l += 1
    #     m += 1

    #tpc - tests per million people = (tests/population) * million
    tpc_list = []
    #create tests per million list
    k=0
    while(k < len(tests_list)):
        tpc_list.append(round(tests_list[k]/population_list[k] * 1000000))
        k+=1

    # #fix duplicates - remove diplicate entries
    # n=0
    # o=1
    # length = len(ISO_3_list)
    # while (o < length):
    #     if(ISO_3_list[n] == ISO_3_list[o]):
    #         del ISO_3_list[n]
    #         del population_list[n]
    #         del tests_list[n]
    #         del tpc_list[n]
    #         length -= 1
    #     n += 1
    #     o += 1

    #create lattitude and longitude list
    location_lat=[]
    location_long=[]
    with open('countrycode-latlong.json') as g:
        lat_long_data = json.load(g)

    for iso2_code in ISO_2_list:
        location_lat.append(lat_long_data[iso2_code]["lat"])
        location_long.append(lat_long_data[iso2_code]["long"])


    #create dataframe and export
    compiled_data = pd.DataFrame(
        {
            "location": countries_list,
            "location_code": ISO_3_list,
            "location_lat": location_lat,
            "location_long": location_long,
            "location_pop": population_list,
            "tests": tests_list,
            "tests_per_million": tpc_list,
            "date": date_list
        })

    print(compiled_data)
    compiled_data.to_json('./our_world_parsed.json', orient='records')
    storage.child('/')
    storage.child('our_world_parsed.json').put('./our_world_parsed.json')
Ejemplo n.º 9
0
    def tzname(self, dt):
        return self.name


INFLUX_HOST = 'localhost'
INFLUX_DB = 'covid19'
INFLUX_MEASUREMENT = 'covid19_JHU'
INFLUX_DBPORT = 8086
INFLUX_USER = ''
INFUX_PASS = ''
INFLUX_DROPMEASUREMENT = True
client = InfluxDBClient(INFLUX_HOST, INFLUX_DBPORT, INFLUX_USER, INFUX_PASS,
                        INFLUX_DB)
GMT = Zone(0, False, 'GMT')
mapperPop = country(from_key='name', to_key='population')
mapperISO3 = country(from_key='name', to_key='iso3')
#Direct Links to the 3 CSV Files maintained by JHU CCSE
inputfiles = {
    "confirmed":
    "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv",
    "deaths":
    "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv",
    "recovered":
    "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv"
}
measurements = []
measurements_hash = {}
#Iterate through each Source File and build hash table
for i in sorted(inputfiles.keys()):
    field = i
Ejemplo n.º 10
0
 def test_country_name_iso3_mapper(self):
     mapper = country(from_key='name', to_key='iso3')
     self.assertEquals(mapper('Burma'), 'MMR')
     self.assertEquals(mapper('South Korea'), 'KOR')
     self.assertEquals(mapper('The Netherlands'), 'NLD')
     self.assertEquals(mapper('USA'), 'USA')
Ejemplo n.º 11
0
def main():

    fy.setup_fiscal_calendar(start_month=9)
    now = fy.FiscalDateTime.now()
    start_date = now.prev_quarter.start.strftime('%Y-%m-%d')
    end_date = now.prev_quarter.end.strftime('%Y-%m-%d')
    print(start_date)
    print(end_date)

    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description="Retrieve Google Analytics data.")
    parser.add_argument("-d", "--debug",
        help="Enable debugging messages", action="store_true")
    parser.add_argument("output_file", metavar="OUTPUT_FILE",
        nargs="?",
        default="sessions.csv",
        help="Output CSV file")
    parser.add_argument("-s", "--start-date",
        default=start_date,
        help="Start date")
    parser.add_argument("-e", "--end-date",
        default=end_date,
        help="End date")
    args = parser.parse_args()

    if args.start_date != start_date:
        args.start_date = parse_date(args.start_date)
        print(args.start_date)

    if args.end_date != end_date:
        args.end_date = parse_date(args.end_date)
        print(args.end_date)

    pd.set_option('display.max_columns', None)
    pd.set_option('display.max_rows', None)
    pd.set_option('display.max_colwidth', None)
    # pd.set_option('display.float_format', '{:,.0f}'.format)

    gc = GeonamesCache()
    global countries
    countries = gc.get_countries()
    countries['ZZ'] = {'iso3': 'ZZZ'}

    mapper = country(from_key='iso', to_key='iso3')

    scope = ['https://www.googleapis.com/auth/analytics.readonly']

    # Authenticate and construct service.
    service = get_service('analytics', 'v3', scope, 'client_secrets.json')

    profile_ids = get_profile_ids(service)
    pprint.pprint(profile_ids)

    total = pd.DataFrame()

    for profile_id in profile_ids:
        results = get_results(service, profile_id,
            args.start_date, args.end_date)
        df = create_dataframe(results)
        with pd.option_context('display.max_rows', None,
                'display.max_columns', None):
            print(df)
        total = total.add(df, fill_value=0)

    total.index = [conv_iso_2_to_3(i) for i in total.index]
    total.index.name = 'iso3'
    total.columns = [re.sub(r'^ga:', '', col) for col in total.columns]
    set_int(total)

    total.to_csv(args.output_file)