Example #1
0
def main():
    parser = get_argparser()
    args = parser.parse_args()
    if args.backfill:
        backfill(args.lang, args.project, args.backfill, args.update)
        return

    if not args.date:
        input_date = date.today()
    else:
        input_date = datetime.strptime(args.date, '%Y%m%d').date()

    if args.poll:
        poll_td = parse_timedelta(args.poll)
        if args.poll_interval:
            poll_interval = parse_timedelta(args.poll_interval)
        else:
            poll_interval = POLL_INTERVAL

        # if args.poll % POLL_INCR_MINS:
        #     raise ValueError('poll time must be in increments of %r minutes'
        #                      % POLL_INCR_MINS)
        err_write = sys.stderr.write
        count = 0
        max_time = datetime.now() + poll_td
        while 1:
            count += 1
            try:
                save_traffic_stats(args.lang, args.project, input_date)
                break
            except (urllib2.HTTPError, urllib2.URLError) as he:
                # tried to be nice but the API gives back all sorts of statuses
                # if he.getcode() != 404:
                #     raise
                try:
                    status_code = he.getcode()
                except:
                    status_code = he
                if (datetime.now() + poll_interval) <= max_time:
                    if count == 1:
                        err_write('# ' + datetime.now().isoformat())
                        err_write(' - got %s - polling every %r mins until %s.\n'
                                  % (status_code,
                                     poll_interval.total_seconds() / 60.0,
                                     max_time.isoformat()))
                    time.sleep(poll_interval.total_seconds())
                else:
                    err_write('\n!! - ')
                    err_write(datetime.now().isoformat())
                    err_write(' - no results after %r attempts and %r minutes,'
                              ' exiting.\n\n' % (count,
                                                 poll_td.total_seconds() / 60))
                    break
    else:
        save_traffic_stats(args.lang, args.project, input_date)
    if args.update:
        print update_charts(input_date, args.lang, args.project)
Example #2
0
def main():
    parser = get_argparser()
    args = parser.parse_args()
    if args.backfill:
        backfill(args.lang, args.project, args.backfill, args.update)
        return

    if not args.date:
        input_date = date.today()
    else:
        input_date = datetime.strptime(args.date, '%Y%m%d').date()

    if args.poll:
        poll_td = parse_timedelta(args.poll)
        if args.poll_interval:
            poll_interval = parse_timedelta(args.poll_interval)
        else:
            poll_interval = POLL_INTERVAL

        # if args.poll % POLL_INCR_MINS:
        #     raise ValueError('poll time must be in increments of %r minutes'
        #                      % POLL_INCR_MINS)
        err_write = sys.stderr.write
        count = 0
        max_time = datetime.now() + poll_td
        while 1:
            count += 1
            try:
                save_traffic_stats(args.lang, args.project, input_date)
                break
            except (urllib2.HTTPError, urllib2.URLError) as he:
                # tried to be nice but the API gives back all sorts of statuses
                # if he.getcode() != 404:
                #     raise
                try:
                    status_code = he.getcode()
                except:
                    status_code = he
                if (datetime.now() + poll_interval) <= max_time:
                    if count == 1:
                        err_write('# ' + datetime.now().isoformat())
                        err_write(
                            ' - got %s - polling every %r mins until %s.\n' %
                            (status_code, poll_interval.total_seconds() / 60.0,
                             max_time.isoformat()))
                    time.sleep(poll_interval.total_seconds())
                else:
                    err_write('\n!! - ')
                    err_write(datetime.now().isoformat())
                    err_write(' - no results after %r attempts and %r minutes,'
                              ' exiting.\n\n' %
                              (count, poll_td.total_seconds() / 60))
                    break
    else:
        save_traffic_stats(args.lang, args.project, input_date)
    if args.update:
        print update_charts(input_date, args.lang, args.project)
Example #3
0
def _date_param(text):
    text = text.strip()
    if text.startswith('-'):
        td = parse_timedelta(text)
        dt = datetime.datetime.utcnow() + td  # adding a negative
        return dt
    dt = isoparse(text)
    return dt
Example #4
0
    def from_path(cls, path, auto_start_state=True):
        config_data = yaml.safe_load(open(path + '/config.yaml', 'rb'))

        kwargs = dict(config_data)
        kwargs['article_list_config'] = dict(kwargs.pop('article_list'))
        kwargs['base_path'] = path

        if kwargs.get('save_frequency'):
            kwargs['save_frequency'] = parse_timedelta(
                kwargs['save_frequency'])
        if kwargs.get('fetch_frequency'):
            kwargs['fetch_frequency'] = parse_timedelta(
                kwargs['fetch_frequency'])

        ret = cls(**kwargs)

        needs_backfill = False
        with tlog.info('load_start_state') as _act:
            try:
                start_state = PTCampaignState.from_timestamp(
                    ret, ret.campaign_start_date)
            except StateNotFound as snf:
                if not auto_start_state:
                    raise
                needs_backfill = True
                _act.failure(
                    'start state not found (got {0!r}), backfilling...', snf)

        if needs_backfill:
            with tlog.critical('backfill_start_state', verbose=True):
                ret.load_article_list()
                start_state = PTCampaignState.from_api(ret,
                                                       ret.campaign_start_date)
                start_state.save()

        ret.start_state = start_state

        return ret
Example #5
0
                    PREFIXES,
                    LOCAL_LANG_MAP,
                    STRINGS_PATH_TMPL,
                    DEFAULT_PROJECT,
                    DEFAULT_LANG)
import crisco
from log import tlog


DEFAULT_LIMIT = 100
DEFAULT_IMAGE = ('https://upload.wikimedia.org/wikipedia/commons/thumb/5/5a/'
                 'Wikipedia%27s_W.svg/400px-Wikipedia%27s_W.svg.png')
DEFAULT_SUMMARY = None
DEFAULT_GROUP_SIZE = 20

POLL_INTERVAL = parse_timedelta('10m')


@tlog.wrap('critical')
def get_wiki_info(lang, project):
    '''\
    Get the mainpage title and local namespace map.
    '''
    url = MW_API_URL.format(lang=lang, project=project)
    params = {'action': 'query',
              'meta': 'siteinfo',
              'format': 'json',
              'siprop': 'general|namespaces'}
    resp = urllib2.urlopen(url + urlencode(params))
    data = json.loads(resp.read())
    mainpage = data['query']['general']['mainpage'].replace(' ', '_')
Example #6
0
from build_page import update_charts
from word_filter import word_filter
from common import (DATA_PATH_TMPL, PERMALINK_TMPL, DATE_PERMALINK_TMPL,
                    TOP_API_URL, MW_API_URL, TOTAL_TRAFFIC_URL, DEBUG,
                    PREFIXES, LOCAL_LANG_MAP, STRINGS_PATH_TMPL,
                    DEFAULT_PROJECT, DEFAULT_LANG)
import crisco
from log import tlog

DEFAULT_LIMIT = 100
DEFAULT_IMAGE = ('https://upload.wikimedia.org/wikipedia/commons/thumb/5/5a/'
                 'Wikipedia%27s_W.svg/400px-Wikipedia%27s_W.svg.png')
DEFAULT_SUMMARY = None
DEFAULT_GROUP_SIZE = 20

POLL_INTERVAL = parse_timedelta('10m')


@tlog.wrap('critical')
def get_wiki_info(lang, project):
    '''\
    Get the mainpage title and local namespace map.
    '''
    url = MW_API_URL.format(lang=lang, project=project)
    params = {
        'action': 'query',
        'meta': 'siteinfo',
        'format': 'json',
        'siprop': 'general|namespaces'
    }
    resp = urllib2.urlopen(url + urlencode(params))