Ejemplo n.º 1
0
def write_network_table(update=True):
    '''
    Writes network-level downloads table
    https://stackoverflow.com/questions/23103962/how-to-write-dataframe-to-postgres-table
    '''
    engine = create_engine(dns)

    # Normal update table with last day's data
    if update:
        start_date = (datetime.datetime.now()).date()
        dat = json.loads(
            getSimplecastResponse(
                f'/analytics/downloads?account={account_id}&start_date={start_date}'
            ))
        df = pd.DataFrame(dat['by_interval'])
        print(df)
        df.to_sql('network_level', con=engine, if_exists='append')

    # Write all-time data to db
    elif not update:
        dat = json.loads(
            getSimplecastResponse(
                f'/analytics/downloads?account={account_id}'))
        df = pd.DataFrame(dat['by_interval'])
        df.to_sql('network_level', engine)
    print('Database Network table updated!')
Ejemplo n.º 2
0
def network_pod_table():
    '''
	We're sitting down and writng this table

	Should output table (can be csv for now) with following cols:
	['Pod Title', '# Downloads', '# Listeners', '#Avg Downloads']
	'''
    pod_ids = [x['value'] for x in podIDs()]

    print('Getting Network Table Data')
    podcasts = []
    # pod_dt = []
    n = 1
    for p in pod_ids:
        # Dictionary to hold 4 values: Title, # Downloads, # Listeners, Avg Downloads
        pod_data = {}

        # Getting Pod Title; this should be simpler
        title = json.loads(getSimplecastResponse(f'/podcasts/{p}'))['title']
        pod_data['Podcast Title'] = title

        # Getting total downloads for pod
        downloads = json.loads(
            getSimplecastResponse(
                f'/analytics/downloads?podcast={p}'))['total']
        pod_data['Total Downloads'] = downloads

        # Getting listener data
        listeners = json.loads(
            getSimplecastResponse(
                f'/analytics/episodes/listeners?podcast={p}'))['total']
        pod_data['Total Listeners'] = listeners

        # Getting Avg Download data
        avg_downloads = json.loads(
            getSimplecastResponse(
                f'/analytics/episodes/average_downloads?podcast={p}'))['total']
        pod_data['Average Downloads'] = avg_downloads

        # Setting up pod ID to be included
        pod_data['Podcast ID'] = p

        print('Pod Data:', pod_data)
        podcasts.append(pod_data)

        # pod_dt.append(tuple(n, title, downloads, listeners, avg_downloads))
        print('######################')
        n += 1

    df = pd.DataFrame(podcasts)
    print('Final Dataframe:\n', df)
    csv_path = os.path.join('.', 'db', 'podcast-table.csv')
    df.to_csv(csv_path, index=False)
    return df
Ejemplo n.º 3
0
def format_response(query, tag):
    '''
    Format Simplecast response into proper data format for update_db
    Should return data as a list of tuples, 
    length of the tuples should match the # of datatable columns for that stat

    query: str, Simplecast API call, passed as a param to getSimplecast Response
    tag: str, Key name for Simplecast response, typically 'by_interval' or 'collection' depending on type
    '''
    res = json.loads(getSimplecastResponse(query))
    data = res[tag]

    # print('Response data:', data)
    new_data = []
    n = 1
    # Looping through items in data response
    for d in data:
        dlist = list(d.values())
        # dlist[1] =
        dlist.insert(0, n)
        new_data.append(tuple(dlist))

        n += 1

    print(new_data[0:10], '...')
    return new_data
Ejemplo n.º 4
0
def get_network_downloads():
    '''
	Function to grab network download data
	Returns df with columns ['interval', 'downloads_total', 'downloads_percent'] & total downloads for network
	'''

    response = getSimplecastResponse(
        f'/analytics/downloads?account={account_id}')  # &limit=1000

    total_downloads = json.loads(response)['total']
    df = pd.DataFrame(json.loads(response)['by_interval'])
    print(type(total_downloads))

    df.to_csv(os.path.join('.', 'db', 'network-downloads.csv'), index=False)
    return df, total_downloads
Ejemplo n.º 5
0
def network_pull():
    '''
	Function to get netweork data with one pull from our BW simplecast account
	From Lem's email
	add '/current_user' to query string
	'''
    # Figure out how to set this as an env variable

    res = getSimplecastResponse(
        f'/analytics/podcasts?account={account_id}&limit=1000')
    res_dict = json.loads(res)
    # print(res_dict['collection'])
    # print(res_dict.keys(), len(res_dict['collection']))

    df = pd.DataFrame(res_dict['collection'])
    print(df)

    return df
Ejemplo n.º 6
0
def get_listeners():
    '''
	Getting unique listener data for each pod
	'''
    listener_dfs = []
    for p in pod_ids:

        listener_dat = json.loads(
            getSimplecastResponse(f'/analytics/listeners?podcast={p}'))
        print(listener_dat['by_interval'])
        listener_dfs.append(pd.DataFrame(listener_dat['by_interval']))

    listeners = pd.concat(listener_dfs)

    listener_path = os.path.join('.', 'db', 'network-listeners-by-date.csv')
    listeners = listeners.groupby('interval', as_index=False)['total'].sum()
    print('Listeners by date:\n', listeners)
    listeners.to_csv(listener_path, index=False)
    return listeners
Ejemplo n.º 7
0
def update_podcast_table():
    '''
    Updating pods_by_interval table
    '''
    for p in pod_ids:
        print(p)
    res = json.loads(
        getSimplecastResponse(
            f'/analytics/podcasts?account={account_id}&limit=1000'))
    commands = ("""
        ALTER TABLE podcasts_by_interval
        ADD COLUMN IF NOT EXISTS interval DATE NOT NULL,
        ADD COLUMN IF NOT EXISTS downloads_total INT NOT NULL,
        """)
    conn = None

    try:
        # https://stackoverflow.com/questions/61022590/psycopg2-cant-execute-an-empty-query
        print('Connecting to db...')
        # connect to the PostgreSQL server
        conn = psycopg2.connect(dns)  #, sslmode='require')
        cur = conn.cursor()
        print('DB connection established!')
        # create table one by one
        for command in commands:
            # cur.execute(command)
            print('Table cols created!')

            m = 1
            # for d in res['collection']:
            #     print(d)
            # cur.execute(...)

        cur.close()
        # commit the changes
        conn.commit()

    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()
Ejemplo n.º 8
0
def network_data():
    '''
	Function to get network-level data (# pods, Total # downloads, Total # Episodes)
	Should be scheduled run (i.e. runs once per day/hour)
	Want this to update a db/file for our app to pull from
	'''
    # pod_ids = [x['value'] for x in podIDs()]
    # Getting download and episode totals for each pod in network
    episodes = []
    downloads = []

    downloads_by_interval = []

    for p in pod_ids:
        print(f'Getting data for podcast: {p}')

        # Download data grab
        download_dat = json.loads(
            getSimplecastResponse(f'/analytics/downloads?podcast={p}'))
        print(type(download_dat['by_interval']), download_dat)

        downloads_by_interval.append(pd.DataFrame(download_dat['by_interval']))
        pod_downloads = download_dat['total']
        print(f'Total # of downloads: {pod_downloads}')

        # Episode data grab
        episode_data = json.loads(
            getSimplecastResponse(f'/podcasts/{p}/episodes'))
        n_episodes = episode_data['count']
        print(f'Number of episodes: {n_episodes}')

        # listener data for each pod
        # listener_dat = json.loads(getSimplecastResponse(f'/analytics/episodes/listeners?podcast={p}'))
        # unique_listeners = listener_dat['total']
        # print('LISTENERS:',listener_dat.keys())

        print('#########################')
        # Writing data to list, then a file
        downloads.append(pod_downloads)
        episodes.append(n_episodes)

    # Getting Network totals
    downloads_total = np.sum(downloads)
    episodes_total = np.sum(episodes)

    # Setting up a DF with network stats

    network_stats = pd.DataFrame.from_dict({
        'Number of Podcasts': [len(pod_ids)],
        'Total Downloads': [downloads_total],
        'Total Episodes': [episodes_total]
    })

    print('#########################')
    print('NETWORK TOTALS')
    print(f'Total Downloads: {downloads_total}')
    print(f'Total Episodes: {episodes_total}', '\n')

    # Getting network downloads table -- want to sum downlaods based on common interval vals
    network_downloads = pd.concat(downloads_by_interval)
    network_downloads_grouped = network_downloads.groupby(
        'interval', as_index=False)['downloads_total'].sum()
    print('NETWORK DOWNLOADS BY DAY:')
    print(network_downloads_grouped, network_downloads_grouped.columns,
          type(network_downloads_grouped))

    # Writing to a csv for both network stats and downloads
    network_downloads_grouped.to_csv(os.path.join('.', 'db',
                                                  'network-downloads.csv'),
                                     index=False)
    network_stats.to_csv(os.path.join('.', 'db', 'network-stats.csv'),
                         index=False)