def get_all():
    data = _get_spreadsheet_data(google_spreadsheet_url, config.get('spreadsheet','worksheet'))
    people = []
    for row in data:
        first_name = row[1]
        last_name = row[2]
        full_name = row[0]
        city = row[6]
        state = row[7]
        population = row[8]
        cause = row[9]
        sex = row[3]
        date_of_death = row[4]
        age = row[5]
        person =  {
            'full_name': full_name, 
            'first_name': first_name, 
            'last_name': last_name, 
            'sex': sex, 
            'date_of_death': date_of_death, 
            'age': age, 
            'city': city, 
            'state': state, 
            'cause': cause, 
            'population': population
        }
        people.append(person)
    return people
Beispiel #2
0
def get_all():
    data = _get_spreadsheet_data(google_spreadsheet_url,
                                 config.get('spreadsheet', 'worksheet'))
    people = []
    for row in data:
        first_name = row[1]
        last_name = row[2]
        full_name = row[0]
        city = row[6]
        state = row[7]
        population = row[8]
        cause = row[9]
        sex = row[3]
        date_of_death = row[4]
        age = row[5]
        person = {
            'full_name': full_name,
            'first_name': first_name,
            'last_name': last_name,
            'sex': sex,
            'date_of_death': date_of_death,
            'age': age,
            'city': city,
            'state': state,
            'cause': cause,
            'population': population
        }
        people.append(person)
    return people
def get_query_adjustments():
    google_worksheet_name = config.get('spreadsheet','query_adjustement_worksheet')
    all_data = _get_spreadsheet_worksheet(google_spreadsheet_url, google_worksheet_name)
    log.info("  loaded %d rows" % len(all_data))
    all_data = iter(all_data)
    next(all_data)
    adjustment_map = {} # full name to keyword query terms
    for row in all_data:
        full_name = row[0]
        custom_query = row[4]
        if(len(custom_query)>0):
            adjustment_map[full_name] = custom_query
    log.info("  Found %d query keyword adjustments " % len(adjustment_map))
    return adjustment_map
Beispiel #4
0
def get_query_adjustments():
    google_worksheet_name = config.get('spreadsheet',
                                       'query_adjustement_worksheet')
    all_data = _get_spreadsheet_worksheet(google_spreadsheet_url,
                                          google_worksheet_name)
    log.info("  loaded %d rows" % len(all_data))
    all_data = iter(all_data)
    next(all_data)
    adjustment_map = {}  # full name to keyword query terms
    for row in all_data:
        full_name = row[0]
        custom_query = row[4]
        if (len(custom_query) > 0):
            adjustment_map[full_name] = custom_query
    log.info("  Found %d query keyword adjustments " % len(adjustment_map))
    return adjustment_map
# DEPRECATED - USE INCIDENTSV4.PY, WHICH USES GOOGLE SHEETS APIv4

import requests, gspread, unicodecsv, logging, os
from oauth2client.client import GoogleCredentials

from mpv import basedir, config

log = logging.getLogger(__name__)

google_spreadsheet_url = config.get('spreadsheet','url')

def _get_spreadsheet_worksheet(google_sheets_url, google_worksheet_name):
    all_data = None
    log.info("Loading spreadsheet/"+google_worksheet_name+" data from url")
    credentials = GoogleCredentials.get_application_default()
    credentials = credentials.create_scoped(['https://spreadsheets.google.com/feeds'])
    gc = gspread.authorize(credentials)
    # Needed to share the document with the app-generated email in the credentials JSON file for discovery/access to work
    sh = gc.open_by_url(google_sheets_url)
    worksheet = sh.worksheet(google_worksheet_name)
    all_data = worksheet.get_all_values()
    return all_data

def _get_spreadsheet_data(google_sheets_url, google_worksheet_name):
    all_data = _get_spreadsheet_worksheet(google_sheets_url, google_worksheet_name)
    log.info("  loaded %d rows" % len(all_data))
    # write it to a local csv for inspection and storage
    outfile = open(os.path.join(basedir,'data','mpv-input-data.csv'), 'wb')
    outcsv = unicodecsv.writer(outfile,encoding='utf-8')
    for row in all_data:
        outcsv.writerow(row)
from googleapiclient import discovery
import oauth2client
from oauth2client import client
from oauth2client import tools
from oauth2client.file import Storage

from mpv import basedir, config

try:
    import argparse
    flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
except ImportError:
    flags = None
    
YEAR = int(config.get('spreadsheet','year')) # SET THIS TO THE YEAR OF DATA YOU WANT

# IDs for google spreadsheets of each year
SPREADSHEET_IDS = {2013: '1ArisyAjhUE1eeuA490-rPPI1nfft2cJIyDpaeOBqyj8',
                   2014: '1699_rxlNIK3KSNzqpoczw0ehiwTp4IKEaEP_dfWo6vM',
                   2015: '1HoG8jdioarEbxVI_IbuqRwQFCFqbUxzCHc6T2SymRUY',
                   2016: '19wsyttAqa4jbPnqmxQWbu79rwzp3eq_EHbzzsRiomTU'}
                   
# If modifying these scopes, delete your previously saved credentials
# at ~/.credentials/sheets.googleapis.com-python-quickstart.json
SCOPES = 'https://www.googleapis.com/auth/spreadsheets.readonly'
CLIENT_SECRET_FILE = 'client_secret.json'
APPLICATION_NAME = 'mapping police violence'

def _get_credentials():
    """Gets valid user credentials from storage.
import logging, os, sys, time, json, datetime, copy, unicodecsv
from oauth2client.client import GoogleCredentials
import mediacloud
from mpv import basedir, config, mc, incidentsv4, cache, dest_dir
from mpv.util import build_mpv_daterange

# turn off the story counting, useful if you just want to generate the giant query files
WRITE_STORY_COUNT_CSVS = True

# set up logging
logging.basicConfig(filename=os.path.join(
    basedir, 'logs',
    config.get('spreadsheet', 'year') + 'count-story-totals.log'),
                    level=logging.DEBUG)
log = logging.getLogger(__name__)
log.info(
    "---------------------------------------------------------------------------"
)
log.info("Writing output to %s" % dest_dir)
start_time = time.time()
requests_logger = logging.getLogger('requests')
requests_logger.setLevel(logging.INFO)
mc_logger = logging.getLogger('mediacloud')
mc_logger.setLevel(logging.INFO)

data = incidentsv4.get_all()
custom_query_keywords = incidentsv4.get_query_adjustments()

# set up a csv to record all the story urls
if WRITE_STORY_COUNT_CSVS:
    story_count_csv_file = open(
Beispiel #8
0
import logging, os, sys, time, json, datetime, copy
import requests, gspread, unicodecsv
import mediacloud
from mpv import basedir, config, mc, mca, cache, incidentsv4, dest_dir
from mpv.util import build_mpv_daterange

CONTROVERSY_ID = config.get('mediacloud', 'controversy_id')

# set up logging
logging.basicConfig(filename=os.path.join(
    basedir, 'logs',
    config.get('spreadsheet', 'year') + 'list-all-stories.log'),
                    level=logging.DEBUG)
log = logging.getLogger(__name__)
log.info(
    "---------------------------------------------------------------------------"
)
start_time = time.time()
requests_logger = logging.getLogger('requests')
requests_logger.setLevel(logging.INFO)
mc_logger = logging.getLogger('mediacloud')
mc_logger.setLevel(logging.INFO)

log.info("Using redis db %s as a cache" %
         config.get('cache', 'redis_db_number'))

log.info("Working from controversy %s" % CONTROVERSY_ID)

results = mc.storyCount("{~ topic:" + CONTROVERSY_ID + "}")
log.info("  %s total stories" % results)
import logging, os, sys, time, json, datetime, copy
import requests, gspread, unicodecsv
import mediacloud
from mpv import basedir, config, mc, mca, cache, incidentsv4, dest_dir
from mpv.util import build_mpv_daterange

CONTROVERSY_ID = config.get("mediacloud", "controversy_id")

# set up logging
logging.basicConfig(
    filename=os.path.join(basedir, "logs", config.get("spreadsheet", "year") + "list-all-stories.log"),
    level=logging.DEBUG,
)
log = logging.getLogger(__name__)
log.info("---------------------------------------------------------------------------")
start_time = time.time()
requests_logger = logging.getLogger("requests")
requests_logger.setLevel(logging.INFO)
mc_logger = logging.getLogger("mediacloud")
mc_logger.setLevel(logging.INFO)

log.info("Using redis db %s as a cache" % config.get("cache", "redis_db_number"))

log.info("Working from controversy %s" % CONTROVERSY_ID)

results = mc.storyCount("{~ topic:" + CONTROVERSY_ID + "}")
log.info("  %s total stories" % results)

data = incidentsv4.get_all()
custom_query_keywords = incidentsv4.get_query_adjustments()
import logging, os, sys, time, json, datetime, copy, unicodecsv
from oauth2client.client import GoogleCredentials
import mediacloud
from mpv import basedir, config, mc, incidentsv4, cache, dest_dir
from mpv.util import build_mpv_daterange

# turn off the story counting, useful if you just want to generate the giant query files
WRITE_STORY_COUNT_CSVS = True

# set up logging
logging.basicConfig(filename=os.path.join(basedir,'logs',
    config.get('spreadsheet','year')+'count-story-totals.log'),level=logging.DEBUG)
log = logging.getLogger(__name__)
log.info("---------------------------------------------------------------------------")
log.info("Writing output to %s" % dest_dir)
start_time = time.time()
requests_logger = logging.getLogger('requests')
requests_logger.setLevel(logging.INFO)
mc_logger = logging.getLogger('mediacloud')
mc_logger.setLevel(logging.INFO)

data = incidentsv4.get_all()
custom_query_keywords = incidentsv4.get_query_adjustments()

# set up a csv to record all the story urls
if WRITE_STORY_COUNT_CSVS:
    story_count_csv_file = open(os.path.join(dest_dir,'mpv-total-story-counts.csv'), 'wb') #'wb' for windows
    fieldnames = ['full_name', 'date_of_death', 'total_stories', 'stories_about_person', 'normalized_stories_about_person', 'query', 'filter' ]
    story_count_csv = unicodecsv.DictWriter(story_count_csv_file, fieldnames = fieldnames, 
        extrasaction='ignore', encoding='utf-8')
    story_count_csv.writeheader()
# `export GOOGLE_APPLICATION_CREDENTIALS=./GoogleSpreadsheetAccess-be765243bfb4.json`
import logging, os, sys, time, json, datetime, copy
import requests, gspread, unicodecsv
import mediacloud
from mpv import basedir, config, mc, cache, incidentsv4, dest_dir
from mpv.util import build_mpv_daterange

CONTROVERSY_ID = config.get('mediacloud','controversy_id')

YEAR = config.get('spreadsheet','year')

# set up logging
logging.basicConfig(filename=os.path.join(basedir,'logs',
    YEAR+'count-coverage.log'),level=logging.DEBUG)
log = logging.getLogger(__name__)
log.info("---------------------------------------------------------------------------")
start_time = time.time()
requests_logger = logging.getLogger('requests')
requests_logger.setLevel(logging.INFO)
mc_logger = logging.getLogger('mediacloud')
mc_logger.setLevel(logging.INFO)

log.info("Using redis db %s as a cache" % config.get('cache','redis_db_number'))

log.info("Working from controversy %s" % CONTROVERSY_ID)

controversy_filter = "{~ topic:"+CONTROVERSY_ID+"}"
results = mc.storyCount(controversy_filter)
log.info("  %s total stories" % CONTROVERSY_ID)

# load the queries we wrote already
Beispiel #12
0
# DEPRECATED - USE INCIDENTSV4.PY, WHICH USES GOOGLE SHEETS APIv4

import requests, gspread, unicodecsv, logging, os
from oauth2client.client import GoogleCredentials

from mpv import basedir, config

log = logging.getLogger(__name__)

google_spreadsheet_url = config.get('spreadsheet', 'url')


def _get_spreadsheet_worksheet(google_sheets_url, google_worksheet_name):
    all_data = None
    log.info("Loading spreadsheet/" + google_worksheet_name + " data from url")
    credentials = GoogleCredentials.get_application_default()
    credentials = credentials.create_scoped(
        ['https://spreadsheets.google.com/feeds'])
    gc = gspread.authorize(credentials)
    # Needed to share the document with the app-generated email in the credentials JSON file for discovery/access to work
    sh = gc.open_by_url(google_sheets_url)
    worksheet = sh.worksheet(google_worksheet_name)
    all_data = worksheet.get_all_values()
    return all_data


def _get_spreadsheet_data(google_sheets_url, google_worksheet_name):
    all_data = _get_spreadsheet_worksheet(google_sheets_url,
                                          google_worksheet_name)
    log.info("  loaded %d rows" % len(all_data))
    # write it to a local csv for inspection and storage