def get_all(): data = _get_spreadsheet_data(google_spreadsheet_url, config.get('spreadsheet','worksheet')) people = [] for row in data: first_name = row[1] last_name = row[2] full_name = row[0] city = row[6] state = row[7] population = row[8] cause = row[9] sex = row[3] date_of_death = row[4] age = row[5] person = { 'full_name': full_name, 'first_name': first_name, 'last_name': last_name, 'sex': sex, 'date_of_death': date_of_death, 'age': age, 'city': city, 'state': state, 'cause': cause, 'population': population } people.append(person) return people
def get_all(): data = _get_spreadsheet_data(google_spreadsheet_url, config.get('spreadsheet', 'worksheet')) people = [] for row in data: first_name = row[1] last_name = row[2] full_name = row[0] city = row[6] state = row[7] population = row[8] cause = row[9] sex = row[3] date_of_death = row[4] age = row[5] person = { 'full_name': full_name, 'first_name': first_name, 'last_name': last_name, 'sex': sex, 'date_of_death': date_of_death, 'age': age, 'city': city, 'state': state, 'cause': cause, 'population': population } people.append(person) return people
def get_query_adjustments(): google_worksheet_name = config.get('spreadsheet','query_adjustement_worksheet') all_data = _get_spreadsheet_worksheet(google_spreadsheet_url, google_worksheet_name) log.info(" loaded %d rows" % len(all_data)) all_data = iter(all_data) next(all_data) adjustment_map = {} # full name to keyword query terms for row in all_data: full_name = row[0] custom_query = row[4] if(len(custom_query)>0): adjustment_map[full_name] = custom_query log.info(" Found %d query keyword adjustments " % len(adjustment_map)) return adjustment_map
def get_query_adjustments(): google_worksheet_name = config.get('spreadsheet', 'query_adjustement_worksheet') all_data = _get_spreadsheet_worksheet(google_spreadsheet_url, google_worksheet_name) log.info(" loaded %d rows" % len(all_data)) all_data = iter(all_data) next(all_data) adjustment_map = {} # full name to keyword query terms for row in all_data: full_name = row[0] custom_query = row[4] if (len(custom_query) > 0): adjustment_map[full_name] = custom_query log.info(" Found %d query keyword adjustments " % len(adjustment_map)) return adjustment_map
# DEPRECATED - USE INCIDENTSV4.PY, WHICH USES GOOGLE SHEETS APIv4 import requests, gspread, unicodecsv, logging, os from oauth2client.client import GoogleCredentials from mpv import basedir, config log = logging.getLogger(__name__) google_spreadsheet_url = config.get('spreadsheet','url') def _get_spreadsheet_worksheet(google_sheets_url, google_worksheet_name): all_data = None log.info("Loading spreadsheet/"+google_worksheet_name+" data from url") credentials = GoogleCredentials.get_application_default() credentials = credentials.create_scoped(['https://spreadsheets.google.com/feeds']) gc = gspread.authorize(credentials) # Needed to share the document with the app-generated email in the credentials JSON file for discovery/access to work sh = gc.open_by_url(google_sheets_url) worksheet = sh.worksheet(google_worksheet_name) all_data = worksheet.get_all_values() return all_data def _get_spreadsheet_data(google_sheets_url, google_worksheet_name): all_data = _get_spreadsheet_worksheet(google_sheets_url, google_worksheet_name) log.info(" loaded %d rows" % len(all_data)) # write it to a local csv for inspection and storage outfile = open(os.path.join(basedir,'data','mpv-input-data.csv'), 'wb') outcsv = unicodecsv.writer(outfile,encoding='utf-8') for row in all_data: outcsv.writerow(row)
from googleapiclient import discovery import oauth2client from oauth2client import client from oauth2client import tools from oauth2client.file import Storage from mpv import basedir, config try: import argparse flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args() except ImportError: flags = None YEAR = int(config.get('spreadsheet','year')) # SET THIS TO THE YEAR OF DATA YOU WANT # IDs for google spreadsheets of each year SPREADSHEET_IDS = {2013: '1ArisyAjhUE1eeuA490-rPPI1nfft2cJIyDpaeOBqyj8', 2014: '1699_rxlNIK3KSNzqpoczw0ehiwTp4IKEaEP_dfWo6vM', 2015: '1HoG8jdioarEbxVI_IbuqRwQFCFqbUxzCHc6T2SymRUY', 2016: '19wsyttAqa4jbPnqmxQWbu79rwzp3eq_EHbzzsRiomTU'} # If modifying these scopes, delete your previously saved credentials # at ~/.credentials/sheets.googleapis.com-python-quickstart.json SCOPES = 'https://www.googleapis.com/auth/spreadsheets.readonly' CLIENT_SECRET_FILE = 'client_secret.json' APPLICATION_NAME = 'mapping police violence' def _get_credentials(): """Gets valid user credentials from storage.
import logging, os, sys, time, json, datetime, copy, unicodecsv from oauth2client.client import GoogleCredentials import mediacloud from mpv import basedir, config, mc, incidentsv4, cache, dest_dir from mpv.util import build_mpv_daterange # turn off the story counting, useful if you just want to generate the giant query files WRITE_STORY_COUNT_CSVS = True # set up logging logging.basicConfig(filename=os.path.join( basedir, 'logs', config.get('spreadsheet', 'year') + 'count-story-totals.log'), level=logging.DEBUG) log = logging.getLogger(__name__) log.info( "---------------------------------------------------------------------------" ) log.info("Writing output to %s" % dest_dir) start_time = time.time() requests_logger = logging.getLogger('requests') requests_logger.setLevel(logging.INFO) mc_logger = logging.getLogger('mediacloud') mc_logger.setLevel(logging.INFO) data = incidentsv4.get_all() custom_query_keywords = incidentsv4.get_query_adjustments() # set up a csv to record all the story urls if WRITE_STORY_COUNT_CSVS: story_count_csv_file = open(
import logging, os, sys, time, json, datetime, copy import requests, gspread, unicodecsv import mediacloud from mpv import basedir, config, mc, mca, cache, incidentsv4, dest_dir from mpv.util import build_mpv_daterange CONTROVERSY_ID = config.get('mediacloud', 'controversy_id') # set up logging logging.basicConfig(filename=os.path.join( basedir, 'logs', config.get('spreadsheet', 'year') + 'list-all-stories.log'), level=logging.DEBUG) log = logging.getLogger(__name__) log.info( "---------------------------------------------------------------------------" ) start_time = time.time() requests_logger = logging.getLogger('requests') requests_logger.setLevel(logging.INFO) mc_logger = logging.getLogger('mediacloud') mc_logger.setLevel(logging.INFO) log.info("Using redis db %s as a cache" % config.get('cache', 'redis_db_number')) log.info("Working from controversy %s" % CONTROVERSY_ID) results = mc.storyCount("{~ topic:" + CONTROVERSY_ID + "}") log.info(" %s total stories" % results)
import logging, os, sys, time, json, datetime, copy import requests, gspread, unicodecsv import mediacloud from mpv import basedir, config, mc, mca, cache, incidentsv4, dest_dir from mpv.util import build_mpv_daterange CONTROVERSY_ID = config.get("mediacloud", "controversy_id") # set up logging logging.basicConfig( filename=os.path.join(basedir, "logs", config.get("spreadsheet", "year") + "list-all-stories.log"), level=logging.DEBUG, ) log = logging.getLogger(__name__) log.info("---------------------------------------------------------------------------") start_time = time.time() requests_logger = logging.getLogger("requests") requests_logger.setLevel(logging.INFO) mc_logger = logging.getLogger("mediacloud") mc_logger.setLevel(logging.INFO) log.info("Using redis db %s as a cache" % config.get("cache", "redis_db_number")) log.info("Working from controversy %s" % CONTROVERSY_ID) results = mc.storyCount("{~ topic:" + CONTROVERSY_ID + "}") log.info(" %s total stories" % results) data = incidentsv4.get_all() custom_query_keywords = incidentsv4.get_query_adjustments()
import logging, os, sys, time, json, datetime, copy, unicodecsv from oauth2client.client import GoogleCredentials import mediacloud from mpv import basedir, config, mc, incidentsv4, cache, dest_dir from mpv.util import build_mpv_daterange # turn off the story counting, useful if you just want to generate the giant query files WRITE_STORY_COUNT_CSVS = True # set up logging logging.basicConfig(filename=os.path.join(basedir,'logs', config.get('spreadsheet','year')+'count-story-totals.log'),level=logging.DEBUG) log = logging.getLogger(__name__) log.info("---------------------------------------------------------------------------") log.info("Writing output to %s" % dest_dir) start_time = time.time() requests_logger = logging.getLogger('requests') requests_logger.setLevel(logging.INFO) mc_logger = logging.getLogger('mediacloud') mc_logger.setLevel(logging.INFO) data = incidentsv4.get_all() custom_query_keywords = incidentsv4.get_query_adjustments() # set up a csv to record all the story urls if WRITE_STORY_COUNT_CSVS: story_count_csv_file = open(os.path.join(dest_dir,'mpv-total-story-counts.csv'), 'wb') #'wb' for windows fieldnames = ['full_name', 'date_of_death', 'total_stories', 'stories_about_person', 'normalized_stories_about_person', 'query', 'filter' ] story_count_csv = unicodecsv.DictWriter(story_count_csv_file, fieldnames = fieldnames, extrasaction='ignore', encoding='utf-8') story_count_csv.writeheader()
# `export GOOGLE_APPLICATION_CREDENTIALS=./GoogleSpreadsheetAccess-be765243bfb4.json` import logging, os, sys, time, json, datetime, copy import requests, gspread, unicodecsv import mediacloud from mpv import basedir, config, mc, cache, incidentsv4, dest_dir from mpv.util import build_mpv_daterange CONTROVERSY_ID = config.get('mediacloud','controversy_id') YEAR = config.get('spreadsheet','year') # set up logging logging.basicConfig(filename=os.path.join(basedir,'logs', YEAR+'count-coverage.log'),level=logging.DEBUG) log = logging.getLogger(__name__) log.info("---------------------------------------------------------------------------") start_time = time.time() requests_logger = logging.getLogger('requests') requests_logger.setLevel(logging.INFO) mc_logger = logging.getLogger('mediacloud') mc_logger.setLevel(logging.INFO) log.info("Using redis db %s as a cache" % config.get('cache','redis_db_number')) log.info("Working from controversy %s" % CONTROVERSY_ID) controversy_filter = "{~ topic:"+CONTROVERSY_ID+"}" results = mc.storyCount(controversy_filter) log.info(" %s total stories" % CONTROVERSY_ID) # load the queries we wrote already
# DEPRECATED - USE INCIDENTSV4.PY, WHICH USES GOOGLE SHEETS APIv4 import requests, gspread, unicodecsv, logging, os from oauth2client.client import GoogleCredentials from mpv import basedir, config log = logging.getLogger(__name__) google_spreadsheet_url = config.get('spreadsheet', 'url') def _get_spreadsheet_worksheet(google_sheets_url, google_worksheet_name): all_data = None log.info("Loading spreadsheet/" + google_worksheet_name + " data from url") credentials = GoogleCredentials.get_application_default() credentials = credentials.create_scoped( ['https://spreadsheets.google.com/feeds']) gc = gspread.authorize(credentials) # Needed to share the document with the app-generated email in the credentials JSON file for discovery/access to work sh = gc.open_by_url(google_sheets_url) worksheet = sh.worksheet(google_worksheet_name) all_data = worksheet.get_all_values() return all_data def _get_spreadsheet_data(google_sheets_url, google_worksheet_name): all_data = _get_spreadsheet_worksheet(google_sheets_url, google_worksheet_name) log.info(" loaded %d rows" % len(all_data)) # write it to a local csv for inspection and storage