Ejemplo n.º 1
0
def get_v1_params():
    v1_params = reqparse.RequestParser()

    v1_params.add_argument('source', type=str, required=True)
    v1_params.add_argument('target', type=str, required=True)
    v1_params.add_argument(
        'count',
        type=inputs.int_range(low=0,
                              high=configuration.get_config_int(
                                  'api', 'count_max')),
        required=False,
        default=configuration.get_config_int('api', 'count_default'))
    v1_params.add_argument('seed',
                           type=inputs.regex(r'^([^|]+(\|[^|]+)*)?$'),
                           required=False)
    v1_params.add_argument('include_pageviews',
                           type=inputs.boolean,
                           required=False,
                           default=True)
    v1_params.add_argument(
        'search',
        type=str,
        required=False,
        default='morelike',
        choices=['morelike', 'wiki', 'related_articles', 'category'])
    v1_params.add_argument('rank_method',
                           type=str,
                           required=False,
                           default='default',
                           choices=['default', 'sitelinks'])
    v1_params.add_argument('campaign', type=str, required=False, default='')

    return v1_params
Ejemplo n.º 2
0
def get_legacy_params():
    legacy_params = reqparse.RequestParser()

    legacy_params.add_argument('s', type=str, dest='source', required=True)
    legacy_params.add_argument('t', type=str, dest='target', required=True)
    legacy_params.add_argument(
        'n',
        type=inputs.int_range(low=0,
                              high=configuration.get_config_int(
                                  'api', 'count_max')),
        dest='count',
        required=False,
        default=configuration.get_config_int('api', 'count_default'))
    legacy_params.add_argument('article',
                               type=inputs.regex(r'^([^|]+(\|[^|]+)*)?$'),
                               dest='seed',
                               required=False)
    legacy_params.add_argument('pageviews',
                               type=inputs.boolean,
                               dest='include_pageviews',
                               required=False,
                               default=True)
    legacy_params.add_argument('search',
                               type=str,
                               required=False,
                               default='morelike',
                               choices=['morelike', 'wiki'])

    return legacy_params
def get_pageview_query_url(source, title):
    start_days = configuration.get_config_int('single_article_pageviews', 'start_days')
    end_days = configuration.get_config_int('single_article_pageviews', 'end_days')
    query = configuration.get_config_value('single_article_pageviews', 'query')
    start = get_relative_timestamp(start_days)
    end = get_relative_timestamp(end_days)
    query = query.format(source=source, title=title, start=start, end=end)
    return query
Ejemplo n.º 4
0
def get_pageview_query_url(source, title):
    start_days = configuration.get_config_int('single_article_pageviews',
                                              'start_days')
    end_days = configuration.get_config_int('single_article_pageviews',
                                            'end_days')
    query = configuration.get_config_value('single_article_pageviews', 'query')
    start = get_relative_timestamp(start_days)
    end = get_relative_timestamp(end_days)
    query = query.format(source=source, title=title, start=start, end=end)
    return query
Ejemplo n.º 5
0
def get_most_popular_articles(source, campaign=''):
    days = configuration.get_config_int('popular_pageviews', 'days')
    date_format = configuration.get_config_value('popular_pageviews',
                                                 'date_format')
    query = configuration.get_config_value('popular_pageviews', 'query')
    date = (datetime.datetime.utcnow() -
            datetime.timedelta(days=days)).strftime(date_format)
    query = query.format(source=source, date=date)
    try:
        data = get(query)
    except ValueError:
        log.info('pageview query failed')
        return []

    if 'items' not in data or len(
            data['items']) < 1 or 'articles' not in data['items'][0]:
        log.info('pageview data is not in a known format')
        return []

    articles = []

    for article in data['items'][0]['articles']:
        articles.append({
            'title': article['article'],
            'pageviews': article['views']
        })

    return articles
def get_v1_articles_params():
    v1_articles_params = reqparse.RequestParser()

    v1_articles_params.add_argument('source', type=str, required=True)
    v1_articles_params.add_argument(
        'count',
        type=inputs.int_range(low=0,
                              high=configuration.get_config_int(
                                  'api', 'count_max')),
        required=False,
        default=configuration.get_config_int('api', 'count_default'))
    v1_articles_params.add_argument('seed',
                                    type=inputs.regex(r'^[^|]+(\|[^|]+)*$'),
                                    required=True)

    return v1_articles_params
def chunk_query_for_parameter(params, parameter, values):
    """
    This takes in general params for a query that needs to be performed
     for a set of values, and then adds a specified parameter with the
     chunked values until all the values have been in a query.

     Ex:
        chunk_query_for_parameter(
            {'foo': 'bar'},
            'additional',
            ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'])

        results in the following queries if chunk_size is 3:

        query({'foo': 'bar',
               'additional': 'a|b|c'})
        query({'foo': 'bar',
               'additional': 'd|e|f'})
        query({'foo': 'bar',
               'additional': 'g|h|i'})
        query({'foo': 'bar',
               'additional': 'j'})

        the results are appended in the appropriate order and returned
    """
    chunk_size = configuration.get_config_int('external_api_parameters',
                                              'wikidata_chunk_size')

    param_groups = []
    for group in itertools.zip_longest(*[iter(values)] * chunk_size):
        p = params.copy()
        p[parameter] = '|'.join(item for item in group if item is not None)
        param_groups.append(p)

    if param_groups:
        with multiprocessing.Pool(processes=len(param_groups)) as pool:
            result = pool.map(query, param_groups)
        return list(itertools.chain(*result))
    else:
        return []
    def query_pageviews(self, s):
        """
        Query pageview API and parse results
        """
        days = configuration.get_config_int('popular_pageviews', 'days')
        date_format = configuration.get_config_value('popular_pageviews', 'date_format')
        query = configuration.get_config_value('popular_pageviews', 'query')
        date = (datetime.datetime.utcnow() - datetime.timedelta(days=days)).strftime(date_format)
        query = query.format(source=s, date=date)
        try:
            data = data_fetcher.get(query)
        except ValueError:
            return []

        article_pv_tuples = []

        try:
            for d in data['items'][0]['articles']:
                article_pv_tuples.append((d['article'], d['views']))
        except:
            log.info('Could not get most popular articles for %s from pageview API. Try using a seed article.', s)

        return article_pv_tuples
        args = get_v1_articles_params().parse_args()
        recs = process_request(args)
        if len(recs) == 0:
            abort_no_candidates()
        return recs


ItemSpec = collections.namedtuple('Item', ['wikidata_id', 'score'])

v1_items_params = reqparse.RequestParser()

v1_items_params.add_argument('seed', type=str, required=True)
v1_items_params.add_argument(
    'count',
    type=inputs.int_range(low=0,
                          high=configuration.get_config_int(
                              'api', 'count_max')),
    required=False,
    default=configuration.get_config_int('api', 'count_default'))

v1_items_model = v1.model(
    ItemSpec.__name__,
    ItemSpec(wikidata_id=fields.String(description='wikidata_id',
                                       required=True),
             score=fields.Float(description='score', required=True))._asdict())

v1_items_doc = dict(
    description=
    'Gets recommendations of Wikidata items that are related to a seed item',
    params=dict(seed='Seed Wikidata item',
                count='Number of recommendations to fetch'))