Example #1
0
def api_explorer_demo_compare_words():
    search_id = int(
        request.args['search_id']) if 'search_id' in request.args else None

    if search_id not in [None, -1]:
        sample_searches = load_sample_searches()
        compared_sample_queries = sample_searches[search_id]['queries']
        results = []
        for cq in compared_sample_queries:
            solr_q, solr_fq = parse_query_with_keywords(cq)
            word_count_result = query_wordcount(solr_q, solr_fq)
            results.append(word_count_result)
    else:
        compared_queries = request.args['compared_queries[]'].split(',')
        results = []
        for cq in compared_queries:
            dictq = {
                x[0]: x[1]
                for x in [x.split("=") for x in cq[1:].split("&")]
            }
            solr_q, solr_fq = parse_query_with_keywords(dictq)
            word_count_result = query_wordcount(solr_q, solr_fq)
            results.append(word_count_result)

    return jsonify({"results": results})
def api_explorer_story_split_count_csv():
    filename = 'stories-over-time'
    data = request.form
    if 'searchId' in data:
        solr_q, solr_fq = parse_as_sample(data['searchId'], data['index'])
        filename = filename  # don't have this info + current_query['q']
        SAMPLE_SEARCHES = load_sample_searches()
        queries = SAMPLE_SEARCHES[data['searchId']]['queries']
    else:
        queries = json.loads(data['queries'])
    label = " ".join([q['label'] for q in queries])
    filename = file_name_for_download(label, filename)
    # now compute total attention for all results
    story_count_results = []
    for q in queries:
        solr_q, solr_fq = parse_query_with_keywords(q)
        solr_open_query = concatenate_query_for_solr(solr_seed_query='*', media_ids=q['sources'],
                                                     tags_ids=q['collections'])
        story_counts = apicache.normalized_and_story_count(solr_q, solr_fq, solr_open_query)
        story_count_results.append({
            'date': q['startDate'],
            'query': q['label'],
            'matching_stories': story_counts['total'],
            'total_stories': story_counts['normalized_total'],
            'ratio': float(story_counts['total']) / float(story_counts['normalized_total'])
        })
    props = ['date','query', 'matching_stories', 'total_stories', 'ratio']
    return csv.stream_response(story_count_results, props, filename)
def api_explorer_story_sample():
    solr_q, solr_fq = parse_query_with_keywords(request.args)
    story_sample_result = apicache.random_story_list(solr_q, solr_fq, SAMPLE_STORY_COUNT)

    for story in story_sample_result:
        story["media"] = server.views.apicache.media(story["media_id"])
    return jsonify({"results": story_sample_result})
def api_explorer_story_sample():
    solr_q, solr_fq = parse_query_with_keywords(request.args)
    story_sample_result = apicache.random_story_list(solr_q, solr_fq, SAMPLE_STORY_COUNT)

    for story in story_sample_result:
        story["media"] = apicache.media(story["media_id"])
    return jsonify(story_sample_result)
Example #5
0
def api_explorer_sentences_list():
    around_word = 'word' in request.form
    if only_queries_reddit(request.form):
        start_date, end_date = parse_query_dates(request.form)
        provider = RedditPushshiftProvider()
        results = provider.samples(query=request.args['q'],
                                   start_date=start_date,
                                   end_date=end_date,
                                   subreddits=NEWS_SUBREDDITS)
        results = [{
            'sentence': r['title'],
            'publish_date': r['publish_date'],
            'story': r,
        } for r in results]
    else:
        solr_q, solr_fq = parse_query_with_keywords(request.form)
        # so we can support large samples or just a few to show
        rows = int(request.form['rows']) if 'rows' in request.form else 10
        results = apicache.sentence_list(solr_q,
                                         solr_fq,
                                         rows=rows,
                                         include_stories=(not around_word))
    if around_word:
        word = request.form['word']
        results = [
            _sentence_fragment_around(word, s['sentence']) for s in results
            if s['sentence'] is not None
        ]
        results = [s for s in results if s is not None]
    return jsonify({'results': results})
def api_explorer_demo_sentences_count():
    two_weeks_before_now = datetime.datetime.now() - datetime.timedelta(
        days=14)
    start_date = two_weeks_before_now.strftime("%Y-%m-%d")
    end_date = datetime.datetime.now().strftime("%Y-%m-%d")

    search_id = int(
        request.args['search_id']) if 'search_id' in request.args else None
    index = int(request.args['index']) if 'index' in request.args else None

    if isinstance(search_id, int) and search_id not in [None, -1]:
        SAMPLE_SEARCHES = load_sample_searches()
        current_search = SAMPLE_SEARCHES[search_id]['queries']
        solr_query = parse_query_with_args_and_sample_search(
            request.args, current_search)

        if index < len(current_search):
            start_date = current_search[index]['startDate']
            end_date = current_search[index]['endDate']
    else:
        solr_query = parse_query_with_keywords(request.args)
    # why is this call fundamentally different than the cache call???
    sentence_count_result = mc.sentenceCount(solr_query=solr_query,
                                             split_start_date=start_date,
                                             split_end_date=end_date,
                                             split=True)
    results = cached_by_query_sentence_counts(solr_query, start_date, end_date)

    return jsonify(results)
Example #7
0
def explorer_story_count_csv():
    filename = u'total-story-count'
    data = request.form
    if 'searchId' in data:
        # TODO: don't load this query twice because that is kind of dumb
        sample_searches = load_sample_searches()
        queries = sample_searches[data['searchId']]['queries']
    else:
        queries = json.loads(data['queries'])
    label = " ".join([q['label'] for q in queries])
    filename = file_name_for_download(label, filename)
    # now compute total attention for all results
    story_count_results = []
    for q in queries:
        solr_q, solr_fq = parse_query_with_keywords(q)
        solr_open_query = concatenate_query_for_solr(solr_seed_query='*',
                                                     media_ids=q['sources'],
                                                     tags_ids=q['collections'])
        story_counts = apicache.normalized_and_story_count(
            solr_q, solr_fq, solr_open_query)
        story_count_results.append({
            'query':
            q['label'],
            'matching_stories':
            story_counts['total'],
            'total_stories':
            story_counts['normalized_total'],
            'ratio':
            float(story_counts['total']) /
            float(story_counts['normalized_total'])
        })
    props = ['query', 'matching_stories', 'total_stories', 'ratio']
    return csv.stream_response(story_count_results, props, filename)
def explorer_story_count_csv():
    filename = 'total-story-count'
    data = request.form
    if 'searchId' in data:
        queries = SAMPLE_SEARCHES[data['searchId']]['queries']
    else:
        queries = json.loads(data['queries'])
    label = " ".join([q['label'] for q in queries])
    filename = file_name_for_download(label, filename)
    # now compute total attention for all results
    story_count_results = []
    for q in queries:
        if (len(q['collections']) == 0) and only_queries_reddit(q['sources']):
            start_date, end_date = parse_query_dates(q)
            story_counts = pushshift.reddit_submission_normalized_and_split_story_count(query=q['q'],
                                                                                        start_date=start_date,
                                                                                        end_date=end_date,
                                                                                        subreddits=pushshift.NEWS_SUBREDDITS)
        else:
            solr_q, solr_fq = parse_query_with_keywords(q)
            solr_open_query = concatenate_query_for_solr(solr_seed_query='*', media_ids=q['sources'],
                                                         tags_ids=q['collections'])
            story_counts = apicache.normalized_and_story_count(solr_q, solr_fq, solr_open_query)
        story_count_results.append({
            'query': q['label'],
            'matching_stories': story_counts['total'],
            'total_stories': story_counts['normalized_total'],
            'ratio': float(story_counts['total']) / float(story_counts['normalized_total'])
        })
    props = ['query', 'matching_stories', 'total_stories', 'ratio']
    return csv.stream_response(story_count_results, props, filename)
def explorer_story_count_csv():
    filename = 'total-story-count'
    data = request.form
    if 'searchId' in data:
        # TODO: don't load this query twice because that is kind of dumb
        sample_searches = load_sample_searches()
        queries = sample_searches[data['searchId']]['queries']
    else:
        queries = json.loads(data['queries'])
    label = " ".join([q['label'] for q in queries])
    filename = file_name_for_download(label, filename)
    # now compute total attention for all results
    story_count_results = []
    for q in queries:
        solr_q, solr_fq = parse_query_with_keywords(q)
        solr_open_query = concatenate_query_for_solr(solr_seed_query='*', media_ids=q['sources'],
                                                     tags_ids=q['collections'])
        story_counts = apicache.normalized_and_story_count(solr_q, solr_fq, solr_open_query)
        story_count_results.append({
            'query': q['label'],
            'matching_stories': story_counts['total'],
            'total_stories': story_counts['normalized_total'],
            'ratio': float(story_counts['total']) / float(story_counts['normalized_total'])
        })
    props = ['query', 'matching_stories', 'total_stories', 'ratio']
    return csv.stream_response(story_count_results, props, filename)
Example #10
0
def stream_story_count_csv(fn, search_id_or_query_list):
    '''
    Helper method to stream a list of stories back to the client as a csv.  Any args you pass in will be
    simply be passed on to a call to topicStoryList.
    '''
    # if we have a search id, we load the samples from our sample searches file
    filename = ''
    story_count_results = []
    SAMPLE_SEARCHES = load_sample_searches()
    try:
        search_id = int(search_id_or_query_list)
        if search_id >= 0:
            SAMPLE_SEARCHES = load_sample_searches()

            sample_queries = SAMPLE_SEARCHES[search_id]['queries']

            for query in sample_queries:
                solr_query = prep_simple_solr_query(query)
                storyList = cached_story_count(solr_query)
                query_and_story_count = {'query' : query['label'], 'count' : storyList['count']}
                story_count_results.append(query_and_story_count)

    except Exception as e:
        custom_queries = json.loads(search_id_or_query_list)

        for query in custom_queries:
            solr_query = parse_query_with_keywords(query)
            filename = fn + query['q']

            storyList = cached_story_count(solr_query)
            query_and_story_count = {'query' : query['label'], 'count' : storyList['count']}
            story_count_results.append(query_and_story_count)
    
    props = ['query','count']
    return csv.stream_response(story_count_results, props, filename)
Example #11
0
def _get_word_count():
    sample_size = int(
        request.form['sampleSize']
    ) if 'sampleSize' in request.form else WORD_COUNT_SAMPLE_SIZE
    solr_q, solr_fq = parse_query_with_keywords(request.form)
    word_data = query_wordcount(solr_q, solr_fq, sample_size=sample_size)
    # return combined data
    return jsonify({"results": word_data, "sample_size": str(sample_size)})
Example #12
0
def api_explorer_compare_words():
    compared_queries = request.args['compared_queries[]'].split(',')
    results = []
    for cq in compared_queries:
        dictq = {x[0]: x[1] for x in [x.split("=") for x in cq[1:].split("&")]}
        solr_q, solr_fq = parse_query_with_keywords(dictq)
        word_count_result = query_wordcount(solr_q, solr_fq)
        results.append(word_count_result)
    return jsonify({"list": results})  
Example #13
0
def api_explorer_compare_words():
    compared_queries = request.args['compared_queries[]'].split(',')
    results = []
    for cq in compared_queries:
        dictq = {x[0]: x[1] for x in [x.split("=") for x in cq[1:].split("&")]}
        solr_q, solr_fq = parse_query_with_keywords(dictq)
        word_count_result = query_wordcount(solr_q, solr_fq)
        results.append(word_count_result)
    return jsonify({"list": results})
Example #14
0
def explorer_geo_csv():
    filename = 'sampled-geographic-coverage'
    data = request.form
    query_object = json.loads(data['q'])
    solr_q, solr_fq = parse_query_with_keywords(query_object)
    filename = file_name_for_download(query_object['label'], filename)
    data = apicache.top_tags_with_coverage(solr_q, solr_fq, tags.GEO_TAG_SET)
    props = ['tags_id', 'label', 'count', 'pct']
    return csv.stream_response(data['results'], props, filename)
Example #15
0
def _get_word_count():
    search_id = int(request.args['search_id']) if 'search_id' in request.args else None
    sample_size = int(request.args['sampleSize']) if 'sampleSize' in request.args else WORD_COUNT_SAMPLE_SIZE
    if search_id not in [None, -1]:
        solr_q, solr_fq = parse_as_sample(search_id, request.args['index'])
    else:
        solr_q, solr_fq = parse_query_with_keywords(request.args)
    word_data = query_wordcount(solr_q, solr_fq, sample_size=sample_size)
    # return combined data
    return jsonify({"results": word_data, "sample_size": str(sample_size)})
Example #16
0
def api_explorer_demo_geotag_count():
    search_id = int(
        request.args['search_id']) if 'search_id' in request.args else None
    if search_id not in [None, -1]:
        solr_q, solr_fq = parse_as_sample(search_id, request.args['index'])
    else:
        solr_q, solr_fq = parse_query_with_keywords(request.args)
    data = apicache.top_tags_with_coverage(solr_q, solr_fq, tags.GEO_TAG_SET)
    data['results'] = _filter_for_countries(data['results'])
    return jsonify(data)
Example #17
0
def api_explorer_sentences_list():
    solr_q, solr_fq = parse_query_with_keywords(request.args)
    rows = int(request.args['rows']) if 'rows' in request.args else 10   # so we can support large samples or just a few to show
    around_word = 'word' in request.args
    results = apicache.sentence_list(solr_q, solr_fq, rows=rows, include_stories=(not around_word))
    if around_word:
        word = request.args['word']
        results = [_sentence_fragment_around(word, s['sentence']) for s in results if s['sentence'] is not None]
        results = [s for s in results if s is not None]
    return jsonify({'results': results})
Example #18
0
def demo_top_tags_with_coverage(tag_sets_id,):
    # parses the query for you
    search_id = int(request.args['search_id']) if 'search_id' in request.args else None
    if search_id not in [None, -1]:
        sample_searches = load_sample_searches()
        current_search = sample_searches[search_id]['queries']
        solr_q, solr_fq = parse_query_with_args_and_sample_search(request.args, current_search)
    else:
        solr_q, solr_fq = parse_query_with_keywords(request.args)
    return apicache.top_tags_with_coverage(solr_q, solr_fq, tag_sets_id)
Example #19
0
def demo_top_tags_with_coverage(tag_sets_id, ):
    # parses the query for you
    search_id = int(
        request.args['search_id']) if 'search_id' in request.args else None
    query_index = int(
        request.args['index']) if 'index' in request.args else None
    if (query_index is None) and (search_id not in [None, -1]):
        solr_q, solr_fq = parse_as_sample(search_id, request.args)
    else:
        solr_q, solr_fq = parse_query_with_keywords(request.args)
    return apicache.top_tags_with_coverage(solr_q, solr_fq, tag_sets_id)
Example #20
0
def explorer_entities_csv(tag_sets_id):
    tag_set = base_apicache.tag_set(tag_sets_id)
    filename = 'sampled-{}'.format(tag_set['label'])
    data = request.form
    query_object = json.loads(data['q'])
    solr_q, solr_fq = parse_query_with_keywords(query_object)
    filename = file_name_for_download(query_object['label'], filename)
    top_tag_counts = apicache.top_tags_with_coverage(
        solr_q, solr_fq, tag_sets_id, TAG_COUNT_DOWNLOAD_LENGTH)['results']
    return csv.stream_response(top_tag_counts, ENTITY_DOWNLOAD_COLUMNS,
                               filename)
Example #21
0
def api_explorer_demo_geotag_count():
    search_id = int(request.args['search_id']) if 'search_id' in request.args else None
    if search_id not in [None, -1]:
        SAMPLE_SEARCHES = load_sample_searches()
        current_search = SAMPLE_SEARCHES[search_id]['queries']
        solr_q, solr_fq = parse_query_with_args_and_sample_search(request.args, current_search)
    else:
        solr_q, solr_fq= parse_query_with_keywords(request.args)
    data = apicache.top_tags_with_coverage(solr_q, solr_fq, tags.GEO_TAG_SET)
    data['results'] = _filter_for_countries(data['results'])
    return jsonify(data)
Example #22
0
def demo_top_tags_with_coverage(tag_sets_id,):
    # parses the query for you
    search_id = int(request.args['search_id']) if 'search_id' in request.args else None
    query_index = int(request.args['index']) if 'index' in request.args else None
    if (query_index is None )and (search_id not in [None, -1]):
        sample_searches = load_sample_searches()
        current_search = sample_searches[search_id]['queries']
        solr_q, solr_fq = parse_as_sample(search_id, request.args)
    else:
        solr_q, solr_fq = parse_query_with_keywords(request.args)
    return apicache.top_tags_with_coverage(solr_q, solr_fq, tag_sets_id)
Example #23
0
def api_explorer_demo_compare_words():
    search_id = int(request.args['search_id']) if 'search_id' in request.args else None
    
    if search_id not in [None, -1]:
        sample_searches = load_sample_searches()
        compared_sample_queries = sample_searches[search_id]['queries']
        results = []
        for cq in compared_sample_queries:
            solr_q, solr_fq = parse_query_with_keywords(cq)
            word_count_result = query_wordcount(solr_q, solr_fq)
            results.append(word_count_result)
    else:
        compared_queries = request.args['compared_queries[]'].split(',')
        results = []
        for cq in compared_queries:
            dictq = {x[0]:x[1] for x in [x.split("=") for x in cq[1:].split("&")]}
            solr_q, solr_fq = parse_query_with_keywords(dictq)
            word_count_result = query_wordcount(solr_q, solr_fq)
            results.append(word_count_result)

    return jsonify({"results": results})
Example #24
0
def explorer_wordcount_csv():
    data = request.form
    ngram_size = data['ngramSize'] if 'ngramSize' in data else 1    # defaul to words if ngram not specified
    sample_size = data['sample_size'] if 'sample_size' in data else WORD_COUNT_SAMPLE_SIZE
    filename = 'sampled-{}-ngrams-{}'.format(sample_size, ngram_size)
    if 'searchId' in data:
        solr_q, solr_fq = parse_as_sample(data['searchId'], data['index'])
    else:
        query_object = json.loads(data['q'])
        solr_q, solr_fq = parse_query_with_keywords(query_object)
        filename = file_name_for_download(query_object['label'], filename)
    return stream_wordcount_csv(filename, solr_q, solr_fq, ngram_size, sample_size)
Example #25
0
def explorer_entities_csv(tag_sets_id):
    tag_set = apicache.tag_set(tag_sets_id)
    filename = 'sampled-{}'.format(tag_set['label'])
    data = request.form
    if 'searchId' in data:
        solr_q, solr_fq = parse_as_sample(data['searchId'], data['index'])
    else:
        query_object = json.loads(data['q'])
        solr_q, solr_fq = parse_query_with_keywords(query_object)
        filename = file_name_for_download(query_object['label'], filename)
    top_tag_counts = apicache.top_tags_with_coverage(solr_q, solr_fq, tag_sets_id, TAG_COUNT_DOWNLOAD_LENGTH)['results']
    return csv.stream_response(top_tag_counts, ENTITY_DOWNLOAD_COLUMNS, filename)
Example #26
0
def explorer_wordcount_csv():
    data = request.form
    ngram_size = data[
        'ngramSize'] if 'ngramSize' in data else 1  # defaul to words if ngram not specified
    sample_size = data[
        'sample_size'] if 'sample_size' in data else WORD_COUNT_SAMPLE_SIZE
    filename = 'sampled-{}-ngrams-{}'.format(sample_size, ngram_size)
    query_object = json.loads(data['q'])
    solr_q, solr_fq = parse_query_with_keywords(query_object)
    filename = file_name_for_download(query_object['label'], filename)
    return stream_wordcount_csv(filename, solr_q, solr_fq, ngram_size,
                                sample_size)
Example #27
0
def api_explorer_demo_story_count():
    search_id = int(request.args['search_id']) if 'search_id' in request.args else None
    if search_id not in [None, -1]:
        SAMPLE_SEARCHES = load_sample_searches()
        current_search = SAMPLE_SEARCHES[search_id]['queries']
        solr_query = parse_query_with_args_and_sample_search(request.args, current_search)
    else:
        solr_query = parse_query_with_keywords(request.args)

    story_count_result = cached_story_count(solr_query)
    # maybe check admin role before we run this?
    return jsonify(story_count_result)  # give them back new data, so they can update the client
Example #28
0
def api_explorer_demo_story_sample():
    search_id = int(request.args['search_id']) if 'search_id' in request.args else None
    
    if search_id not in [None, -1]:
        SAMPLE_SEARCHES = load_sample_searches()
        current_search = SAMPLE_SEARCHES[search_id]['queries']
        solr_query = parse_query_with_args_and_sample_search(request.args, current_search)
    else:
        solr_query = parse_query_with_keywords(request.args)
 
    story_count_result = cached_story_samples(solr_query)
    return jsonify(story_count_result)  
Example #29
0
def explorer_wordcount_csv():
    data = request.form
    ngram_size = data[
        'ngramSize'] if 'ngramSize' in data else 1  # defaul to words if ngram not specified
    filename = u'sampled-ngrams-{}'.format(ngram_size)
    if 'searchId' in data:
        solr_q, solr_fq = parse_as_sample(data['searchId'], data['index'])
    else:
        query_object = json.loads(data['q'])
        solr_q, solr_fq = parse_query_with_keywords(query_object)
        filename = file_name_for_download(query_object['label'], filename)
    return stream_wordcount_csv(filename, solr_q, solr_fq, ngram_size)
def api_explorer_demo_story_sample():
    search_id = int(request.args['search_id']) if 'search_id' in request.args else None
    if search_id not in [None, -1]:
        sample_searches = load_sample_searches()
        current_search = sample_searches[search_id]['queries']
        solr_q, solr_fq = parse_as_sample(search_id, request.args['index'])
    else:
        solr_q, solr_fq = parse_query_with_keywords(request.args)

    story_sample_result = apicache.random_story_list(solr_q, solr_fq, SAMPLE_STORY_COUNT)
    for story in story_sample_result:
        story["media"] = server.views.apicache.media(story["media_id"])
    return jsonify({"results": story_sample_result})
Example #31
0
def get_word_count():
    search_id = int(
        request.args['search_id']) if 'search_id' in request.args else None
    if search_id not in [None, -1]:
        sample_searches = load_sample_searches()
        current_search = sample_searches[search_id]['queries']
        solr_q, solr_fq = parse_query_with_args_and_sample_search(
            request.args, current_search)
    else:
        solr_q, solr_fq = parse_query_with_keywords(request.args)
    word_data = query_wordcount(solr_q, solr_fq)
    # return combined data
    return jsonify({"list": word_data})
Example #32
0
def explorer_geo_csv():
    filename = u'sampled-geographic-coverage'
    data = request.form
    if 'searchId' in data:
        solr_q, solr_fq = parse_as_sample(data['searchId'], data['index'])
    else:
        query_object = json.loads(data['q'])
        solr_q, solr_fq = parse_query_with_keywords(query_object)
        filename = file_name_for_download(query_object['label'], filename)
    data = apicache.top_tags_with_coverage(solr_q, solr_fq, tags.GEO_TAG_SET)
    data['results'] = _filter_for_countries(data['results'])
    props = ['label', 'count', 'pct', 'alpha3', 'iso-a2', 'geonamesId', 'tags_id', 'tag']
    return csv.stream_response(data['results'], props, filename)
Example #33
0
def _get_word_count():
    search_id = int(
        request.args['search_id']) if 'search_id' in request.args else None
    sample_size = int(
        request.args['sampleSize']
    ) if 'sampleSize' in request.args else WORD_COUNT_SAMPLE_SIZE
    if search_id not in [None, -1]:
        solr_q, solr_fq = parse_as_sample(search_id, request.args['index'])
    else:
        solr_q, solr_fq = parse_query_with_keywords(request.args)
    word_data = query_wordcount(solr_q, solr_fq, sample_size=sample_size)
    # return combined data
    return jsonify({"results": word_data, "sample_size": str(sample_size)})
Example #34
0
def explorer_wordcount_csv():
    
    search_id = int(request.args['search_id']) if 'search_id' in request.args else None
    
    if search_id not in [None, -1]:
        SAMPLE_SEARCHES = load_sample_searches()
        current_search = SAMPLE_SEARCHES[search_id]['queries']
        solr_query = parse_query_with_args_and_sample_search(request.args, current_search)
    else:
        solr_query = parse_query_with_keywords(request.args)
        # TODO what about other params: date etc for demo..

    return stream_wordcount_csv(mc, 'wordcounts-Explorer', solr_query)
def api_explorer_demo_story_sample():
    search_id = int(request.args['search_id']) if 'search_id' in request.args else None
    if search_id not in [None, -1]:
        sample_searches = load_sample_searches()
        current_search = sample_searches[search_id]['queries']
        solr_q, solr_fq = parse_query_with_args_and_sample_search(request.args, current_search)
    else:
        solr_q, solr_fq = parse_query_with_keywords(request.args)

    story_sample_result = apicache.random_story_list(solr_q, solr_fq, SAMPLE_STORY_COUNT)
    for story in story_sample_result:
        story["media"] = apicache.media(story["media_id"])
    return jsonify(story_sample_result)
Example #36
0
def api_explorer_story_sample():
    if only_queries_reddit(request.form):
        start_date, end_date = parse_query_dates(request.form)
        provider = RedditPushshiftProvider()
        results = provider.sample(query=request.form['q'],
                                  start_date=start_date, end_date=end_date,
                                  subreddits=NEWS_SUBREDDITS)
    else:
        solr_q, solr_fq = parse_query_with_keywords(request.form)
        results = base_cache.story_list(None, solr_q, solr_fq, rows=SAMPLE_STORY_COUNT,
                                        sort=MediaCloud.SORT_RANDOM)
        for story in results:  # add in media info so we can show it to user if they click into the drill-down
            story["media"] = base_cache.media(story["media_id"])
    return jsonify({"results": results})
def explorer_stories_csv():
    filename = 'sampled-stories'
    data = request.form
    if 'searchId' in data:
        solr_q, solr_fq = parse_as_sample(data['searchId'], data['uid'])
        filename = filename  # don't have this info + current_query['q']
        # for demo users we only download 100 random stories (ie. not all matching stories)
        return _stream_story_list_csv(filename, solr_q, solr_fq, 100, MediaCloud.SORT_RANDOM, 1)
    else:
        query_object = json.loads(data['q'])
        solr_q, solr_fq = parse_query_with_keywords(query_object)
        filename = file_name_for_download(query_object['label'], filename)
        # now page through all the stories and download them
        return _stream_story_list_csv(filename, solr_q, solr_fq)
def explorer_stories_csv():
    filename = u'sampled-stories'
    data = request.form
    if 'searchId' in data:
        solr_q, solr_fq = parse_as_sample(data['searchId'], data['index'])
        filename = filename  # don't have this info + current_query['q']
        # for demo users we only download 100 random stories (ie. not all matching stories)
        return _stream_story_list_csv(filename, solr_q, solr_fq, 100, MediaCloud.SORT_RANDOM, 1)
    else:
        query_object = json.loads(data['q'])
        solr_q, solr_fq = parse_query_with_keywords(query_object)
        filename = file_name_for_download(query_object['label'], filename)
        # now page through all the stories and download them
        return _stream_story_list_csv(filename, solr_q, solr_fq)
def api_explorer_demo_story_split_count():
    search_id = int(request.args['search_id']) if 'search_id' in request.args else None

    if isinstance(search_id, int) and search_id not in [None, -1]:
        solr_q, solr_fq = parse_as_sample(search_id, request.args['index'])
    else:
        start_date, end_date = parse_query_dates(request.args)
        solr_q, solr_fq = parse_query_with_keywords(request.args)
    # why is this call fundamentally different than the cache call???
    solr_open_query = concatenate_query_for_solr(solr_seed_query='*',
                                                 media_ids=[],
                                                 tags_ids=DEFAULT_COLLECTION_IDS)
    results = apicache.normalized_and_story_split_count(solr_q, solr_open_query, start_date, end_date)

    return jsonify({'results': results})
def api_explorer_demo_story_split_count():
    search_id = int(request.args['search_id']) if 'search_id' in request.args else None

    if isinstance(search_id, int) and search_id not in [None, -1]:
        SAMPLE_SEARCHES = load_sample_searches()
        current_search = SAMPLE_SEARCHES[search_id]['queries']
        solr_q, solr_fq = parse_as_sample(search_id, request.args['index'])
    else:
        solr_q, solr_fq = parse_query_with_keywords(request.args)
    # why is this call fundamentally different than the cache call???
    solr_open_query = concatenate_query_for_solr(solr_seed_query='*',
                                                 media_ids=[],
                                                 tags_ids=DEFAULT_COLLECTION_IDS)
    results = apicache.normalized_and_story_split_count(solr_q, solr_fq, solr_open_query)

    return jsonify({'results': results})
def _get_word_count():
    search_id = int(
        request.args['search_id']) if 'search_id' in request.args else None
    sample_size = int(
        request.args['sample_size']
    ) if 'sample_size' in request.args else WORD_COUNT_SAMPLE_SIZE
    if search_id not in [None, -1]:
        sample_searches = load_sample_searches()
        current_search = sample_searches[search_id]['queries']
        solr_q, solr_fq = parse_query_with_args_and_sample_search(
            request.args, current_search)
    else:
        solr_q, solr_fq = parse_query_with_keywords(request.args)
    word_data = query_wordcount(solr_q, solr_fq, sample_size=sample_size)
    # return combined data
    return jsonify({"list": word_data, "sample_size": str(sample_size)})
Example #42
0
def stream_geo_csv(fn, search_id_or_query, index):
    filename = ''

    # TODO: there is duplicate code here...
    SAMPLE_SEARCHES = load_sample_searches()
    try:
        search_id = int(search_id_or_query)
        if search_id >= 0:
            SAMPLE_SEARCHES = load_sample_searches()
            current_search = SAMPLE_SEARCHES[search_id]['queries']
            solr_query = parse_query_with_args_and_sample_search(
                search_id, current_search)

            if int(index) < len(current_search):
                start_date = current_search[int(index)]['startDate']
                end_date = current_search[int(index)]['endDate']
                filename = fn + current_search[int(index)]['q']
    except Exception as e:
        # so far, we will only be fielding one keyword csv query at a time, so we can use index of 0
        query = json.loads(search_id_or_query)
        current_query = query[0]
        solr_query = parse_query_with_keywords(current_query)
        filename = fn + current_query['q']

    res = cached_geotags(solr_query)
    res = [
        r for r in res
        if int(r['tag'].split('_')[1]) in COUNTRY_GEONAMES_ID_TO_APLHA3.keys()
    ]
    for r in res:
        geonamesId = int(r['tag'].split('_')[1])
        if geonamesId not in COUNTRY_GEONAMES_ID_TO_APLHA3.keys(
        ):  # only include countries
            continue
        r['geonamesId'] = geonamesId
        r['alpha3'] = COUNTRY_GEONAMES_ID_TO_APLHA3[geonamesId]
        r['count'] = (
            float(r['count']) / float(tag_utl.GEO_SAMPLE_SIZE)
        )  # WTF: why is the API returning this as a string and not a number?
        for hq in HIGHCHARTS_KEYS:
            if hq['properties']['iso-a3'] == r['alpha3']:
                r['iso-a2'] = hq['properties']['iso-a2']
                r['value'] = r['count']

    props = ['label', 'count']
    return csv.stream_response(res, props, filename)
Example #43
0
def api_explorer_story_split_count():
    start_date, end_date = parse_query_dates(request.form)
    if only_queries_reddit(request.form):
        provider = RedditPushshiftProvider()
        results = provider.normalized_count_over_time(query=request.form['q'],
                                                      start_date=start_date, end_date=end_date,
                                                      subreddits=NEWS_SUBREDDITS)
    else:
        # get specific stories by keyword
        solr_q, _solr_fq = parse_query_with_keywords(request.form)
        # get all the stories (no keyword) so we can support normalization
        solr_open_query = concatenate_query_for_solr(solr_seed_query='*',
                                                     media_ids=request.form['sources'],
                                                     tags_ids=request.form['collections'],
                                                     custom_ids=request.form['searches'])
        results = apicache.normalized_and_story_split_count(solr_q, solr_open_query, start_date, end_date)
    return jsonify({'results': results})
def api_explorer_story_split_count():
    search_id = int(request.args['search_id']) if 'search_id' in request.args else None
    index = int(request.args['index']) if 'index' in request.args else None

    #get specific stories by keyword
    if isinstance(search_id, int) and search_id not in [None, -1]:
        SAMPLE_SEARCHES = load_sample_searches()
        current_search = SAMPLE_SEARCHES[search_id]['queries']
        solr_q, solr_fq = parse_as_sample(search_id, request.args['index'])
    else:
        solr_q, solr_fq = parse_query_with_keywords(request.args)

    # get all the stories (no keyword)
    solr_open_query = concatenate_query_for_solr(solr_seed_query='*',
                                                 media_ids=request.args['sources'],
                                                 tags_ids=request.args['collections'])
    results = apicache.normalized_and_story_split_count(solr_q, solr_fq, solr_open_query)

    return jsonify({'results': results})
Example #45
0
def top_entities_people():
    solr_q, solr_fq = parse_query_with_keywords(request.args)
    results = apicache.top_tags_with_coverage(solr_q, solr_fq, CLIFF_PEOPLE)
    return jsonify(results)
Example #46
0
def top_entities_organizations():
    solr_q, solr_fq = parse_query_with_keywords(request.args)
    results = apicache.top_tags_with_coverage(solr_q, solr_fq, CLIFF_ORGS)
    return jsonify(results)
Example #47
0
def top_themes():
    solr_q, solr_fq = parse_query_with_keywords(request.args)
    results = apicache.top_tags_with_coverage(solr_q, solr_fq, NYT_LABELS_TAG_SET_ID)

    return jsonify(results)