Esempio n. 1
0
def query_network():
    """Handle API request '/network'.

    API Request Parameters
    ----------------------
        ids : list of int
        nodes_limit : int
        edges_limit : int
        include_user_mentions : bool

    API Response Keys
    -----------------
        status : string
        num_of_entries : int
        edges : dict
            canonical_url : string
            date_published : string formatted datetime
            domain : string
            from_user_id : string
            from_user_screen_name : string
            id : int
            is_mention : bool
            site_type : {'claim', 'fact_checking'}
            title : string
            to_user_id : string
            to_user_screen_name : string
            tweet_created_at : string formatted datetime
            tweet_id: string
            tweet_type: {'origin', 'retweet', 'quote', 'reply'}
    """
    lucene.getVMEnv().attachCurrentThread()
    q_network_schema = Schema({
        'ids': Use(flask.json.loads),
        Optional('nodes_limit', default=1000): And(Use(int), lambda i: i > 0),
        Optional('edges_limit', default=12500): And(Use(int), lambda i: i > 0),
        Optional('include_user_mentions', default=True): And(
            unicode, Use(lambda s: s.lower()),
            lambda s: s in ('true', 'false'),
            Use(lambda s: True if s == 'true' else False)),
    })
    q_kwargs = copy_req_args(request.args)
    try:
        q_kwargs = q_network_schema.validate(q_kwargs)
        df = db_query_network(engine, **q_kwargs)
        if len(df) == 0:
            raise APINoResultError('No edge could be built!')
        response = dict(status='OK',
                        num_of_entries=len(df),
                        edges=flask.json.loads(df.to_json(**TO_JSON_KWARGS)))
    except SchemaError as e:
        response = dict(status='ERROR', error=str(e))
    except APINoResultError as e:
        response = dict(status='No result error', error=str(e))
    except Exception as e:
        logger.exception(e)
        response = dict(status='ERROR', error='Server error, query failed')
    return flask.jsonify(response)
Esempio n. 2
0
def test_api_flow(n1=N1,
                  n2=N2,
                  min_score=MIN_SCORE,
                  min_date_published=STRAMING_START_AT,
                  query='',
                  sort_by='relevant',
                  use_lucene_syntax=False):
    try:
        t0 = time.time()
        logger.debug('Starting lucene query %r at %r', query, t0)
        n, df = searcher.search(
            n1=N1,
            n2=N2,
            min_score_of_recent_sorting=MIN_SCORE,
            min_date_published=STRAMING_START_AT,
            query=query,
            sort_by=sort_by,
            use_lucene_syntax=use_lucene_syntax)
        t1 = time.time()
        logger.debug('Starting filtering disabled sites at %r', t1)
        df = db_query_filter_disabled_site(engine, df)
        t2 = time.time()
        logger.debug('Starting querying number tweets sharing at %r', t2)
        df = db_query_twitter_shares(engine, df)
        if len(df) == 0:
            raise APINoResultError('No article found!')
        # sort dataframe by 'number_of_tweets'
        df = df.sort_values('number_of_tweets', ascending=False)
        t3 = time.time()
        ids = df.iloc[:10].id.tolist()
        logger.debug('Starting querying network %s by old api at %r', ids, t3)
        df = db_query_network_old(engine, ids=ids)
        t4 = time.time()
        logger.debug('Starting querying network %s by new api at %r', ids, t4)
        df = db_query_network(engine, ids=ids)
        t5 = time.time()
    except Exception as e:
        logger.error(e)
        return None
    return dict(
        t0_lucene_query=(t1 - t0),
        t1_article_filtering=(t2 - t1),
        t2_article_sharing=(t3 - t2),
        t3_network_building_old=(t4 - t3),
        t4_network_buiding_new=(t5 - t4))
Esempio n. 3
0
from hoaxy.database import ENGINE as engine
from hoaxy.ir.search import db_query_network

import logging

logger = logging.getLogger(__name__)

gids = [1, 2, 3, 4, 5]

df1 = db_query_network(engine, ids=gids)
# df2 = db_query_network_old(engine, ids=gids)

# number of retweet edges
unweighted_retweet_cols = ['tweet_id', 'from_user_id', 'to_user_id']
logger.info('New api, number of unweighted retweet edges is %s',
            len(df1[unweighted_retweet_cols].drop_duplicates()))
# logger.info('Old api, number of unweighted retweet edges is %s',
#         len(df2[unweighted_retweet_cols].drop_duplicates()))

cols = list(df1.columns.values)
cols.remove('url_id')

s1 = set(tuple(x) for x in df1[cols].values)
# s2 = set(tuple(x) for x in df2[cols].values)