def main(): parser = get_argparser() args = parser.parse_args() if args.backfill: backfill(args.lang, args.project, args.backfill, args.update) return if not args.date: input_date = date.today() else: input_date = datetime.strptime(args.date, '%Y%m%d').date() if args.poll: poll_td = parse_timedelta(args.poll) if args.poll_interval: poll_interval = parse_timedelta(args.poll_interval) else: poll_interval = POLL_INTERVAL # if args.poll % POLL_INCR_MINS: # raise ValueError('poll time must be in increments of %r minutes' # % POLL_INCR_MINS) err_write = sys.stderr.write count = 0 max_time = datetime.now() + poll_td while 1: count += 1 try: save_traffic_stats(args.lang, args.project, input_date) break except (urllib2.HTTPError, urllib2.URLError) as he: # tried to be nice but the API gives back all sorts of statuses # if he.getcode() != 404: # raise try: status_code = he.getcode() except: status_code = he if (datetime.now() + poll_interval) <= max_time: if count == 1: err_write('# ' + datetime.now().isoformat()) err_write(' - got %s - polling every %r mins until %s.\n' % (status_code, poll_interval.total_seconds() / 60.0, max_time.isoformat())) time.sleep(poll_interval.total_seconds()) else: err_write('\n!! - ') err_write(datetime.now().isoformat()) err_write(' - no results after %r attempts and %r minutes,' ' exiting.\n\n' % (count, poll_td.total_seconds() / 60)) break else: save_traffic_stats(args.lang, args.project, input_date) if args.update: print update_charts(input_date, args.lang, args.project)
def main(): parser = get_argparser() args = parser.parse_args() if args.backfill: backfill(args.lang, args.project, args.backfill, args.update) return if not args.date: input_date = date.today() else: input_date = datetime.strptime(args.date, '%Y%m%d').date() if args.poll: poll_td = parse_timedelta(args.poll) if args.poll_interval: poll_interval = parse_timedelta(args.poll_interval) else: poll_interval = POLL_INTERVAL # if args.poll % POLL_INCR_MINS: # raise ValueError('poll time must be in increments of %r minutes' # % POLL_INCR_MINS) err_write = sys.stderr.write count = 0 max_time = datetime.now() + poll_td while 1: count += 1 try: save_traffic_stats(args.lang, args.project, input_date) break except (urllib2.HTTPError, urllib2.URLError) as he: # tried to be nice but the API gives back all sorts of statuses # if he.getcode() != 404: # raise try: status_code = he.getcode() except: status_code = he if (datetime.now() + poll_interval) <= max_time: if count == 1: err_write('# ' + datetime.now().isoformat()) err_write( ' - got %s - polling every %r mins until %s.\n' % (status_code, poll_interval.total_seconds() / 60.0, max_time.isoformat())) time.sleep(poll_interval.total_seconds()) else: err_write('\n!! - ') err_write(datetime.now().isoformat()) err_write(' - no results after %r attempts and %r minutes,' ' exiting.\n\n' % (count, poll_td.total_seconds() / 60)) break else: save_traffic_stats(args.lang, args.project, input_date) if args.update: print update_charts(input_date, args.lang, args.project)
def _date_param(text): text = text.strip() if text.startswith('-'): td = parse_timedelta(text) dt = datetime.datetime.utcnow() + td # adding a negative return dt dt = isoparse(text) return dt
def from_path(cls, path, auto_start_state=True): config_data = yaml.safe_load(open(path + '/config.yaml', 'rb')) kwargs = dict(config_data) kwargs['article_list_config'] = dict(kwargs.pop('article_list')) kwargs['base_path'] = path if kwargs.get('save_frequency'): kwargs['save_frequency'] = parse_timedelta( kwargs['save_frequency']) if kwargs.get('fetch_frequency'): kwargs['fetch_frequency'] = parse_timedelta( kwargs['fetch_frequency']) ret = cls(**kwargs) needs_backfill = False with tlog.info('load_start_state') as _act: try: start_state = PTCampaignState.from_timestamp( ret, ret.campaign_start_date) except StateNotFound as snf: if not auto_start_state: raise needs_backfill = True _act.failure( 'start state not found (got {0!r}), backfilling...', snf) if needs_backfill: with tlog.critical('backfill_start_state', verbose=True): ret.load_article_list() start_state = PTCampaignState.from_api(ret, ret.campaign_start_date) start_state.save() ret.start_state = start_state return ret
PREFIXES, LOCAL_LANG_MAP, STRINGS_PATH_TMPL, DEFAULT_PROJECT, DEFAULT_LANG) import crisco from log import tlog DEFAULT_LIMIT = 100 DEFAULT_IMAGE = ('https://upload.wikimedia.org/wikipedia/commons/thumb/5/5a/' 'Wikipedia%27s_W.svg/400px-Wikipedia%27s_W.svg.png') DEFAULT_SUMMARY = None DEFAULT_GROUP_SIZE = 20 POLL_INTERVAL = parse_timedelta('10m') @tlog.wrap('critical') def get_wiki_info(lang, project): '''\ Get the mainpage title and local namespace map. ''' url = MW_API_URL.format(lang=lang, project=project) params = {'action': 'query', 'meta': 'siteinfo', 'format': 'json', 'siprop': 'general|namespaces'} resp = urllib2.urlopen(url + urlencode(params)) data = json.loads(resp.read()) mainpage = data['query']['general']['mainpage'].replace(' ', '_')
from build_page import update_charts from word_filter import word_filter from common import (DATA_PATH_TMPL, PERMALINK_TMPL, DATE_PERMALINK_TMPL, TOP_API_URL, MW_API_URL, TOTAL_TRAFFIC_URL, DEBUG, PREFIXES, LOCAL_LANG_MAP, STRINGS_PATH_TMPL, DEFAULT_PROJECT, DEFAULT_LANG) import crisco from log import tlog DEFAULT_LIMIT = 100 DEFAULT_IMAGE = ('https://upload.wikimedia.org/wikipedia/commons/thumb/5/5a/' 'Wikipedia%27s_W.svg/400px-Wikipedia%27s_W.svg.png') DEFAULT_SUMMARY = None DEFAULT_GROUP_SIZE = 20 POLL_INTERVAL = parse_timedelta('10m') @tlog.wrap('critical') def get_wiki_info(lang, project): '''\ Get the mainpage title and local namespace map. ''' url = MW_API_URL.format(lang=lang, project=project) params = { 'action': 'query', 'meta': 'siteinfo', 'format': 'json', 'siprop': 'general|namespaces' } resp = urllib2.urlopen(url + urlencode(params))