Example #1
0
def query_network():
    """Handle API request '/network'.

    API Request Parameters
    ----------------------
        ids : list of int
        nodes_limit : int
        edges_limit : int
        include_user_mentions : bool

    API Response Keys
    -----------------
        status : string
        num_of_entries : int
        edges : dict
            canonical_url : string
            date_published : string formatted datetime
            domain : string
            from_user_id : string
            from_user_screen_name : string
            id : int
            is_mention : bool
            site_type : {'claim', 'fact_checking'}
            title : string
            to_user_id : string
            to_user_screen_name : string
            tweet_created_at : string formatted datetime
            tweet_id: string
            tweet_type: {'origin', 'retweet', 'quote', 'reply'}
    """
    lucene.getVMEnv().attachCurrentThread()
    q_network_schema = Schema({
        'ids': Use(flask.json.loads),
        Optional('nodes_limit', default=1000): And(Use(int), lambda i: i > 0),
        Optional('edges_limit', default=12500): And(Use(int), lambda i: i > 0),
        Optional('include_user_mentions', default=True): And(
            unicode, Use(lambda s: s.lower()),
            lambda s: s in ('true', 'false'),
            Use(lambda s: True if s == 'true' else False)),
    })
    q_kwargs = copy_req_args(request.args)
    try:
        q_kwargs = q_network_schema.validate(q_kwargs)
        df = db_query_network(engine, **q_kwargs)
        if len(df) == 0:
            raise APINoResultError('No edge could be built!')
        response = dict(status='OK',
                        num_of_entries=len(df),
                        edges=flask.json.loads(df.to_json(**TO_JSON_KWARGS)))
    except SchemaError as e:
        response = dict(status='ERROR', error=str(e))
    except APINoResultError as e:
        response = dict(status='No result error', error=str(e))
    except Exception as e:
        logger.exception(e)
        response = dict(status='ERROR', error='Server error, query failed')
    return flask.jsonify(response)
Example #2
0
def query_top_spreaders():
    """Handle API request '/top-user'.

    API Request Parameters
    ----------------------
        upper_day : string formatted datetime
        most_recent : bool

    API Response Keys
    -----------------
        status : string
        num_of_entries : int
        spreaders : dict
            bot_score : float
            number_of_tweets : int
            site_type : {'claim', 'fact_checking'}
            spreading_type : {'active', 'influencial'}
            upper_day : string formatted datetime
            user_id : int
            user_raw_id : string
            user_screen_name : string

    """
    lucene.getVMEnv().attachCurrentThread()
    yesterday = datetime.utcnow().date() - timedelta(days=1)
    yesterday = yesterday.strftime('%Y-%m-%d')

    q_top_spreaders_schema = Schema({
        Optional('upper_day', default=yesterday):
        And(Regex('^\d{4}-\d{2}-\d{2}$'),
            Use(dateutil.parser.parse),
            error='Invalid date, should be yyyy-mm-dd format'),
        Optional('most_recent', default=True):
        And(unicode,
            Use(lambda s: s.lower()), lambda s: s in ('true', 'false'),
            Use(lambda s: True if s == 'true' else False)),
    })
    q_kwargs = copy_req_args(request.args)
    try:
        q_kwargs = q_top_spreaders_schema.validate(q_kwargs)
        df = db_query_top_spreaders(engine, **q_kwargs)
        if len(df) == 0:
            raise APINoResultError('No top spreader found!')
        response = dict(
            status='OK',
            num_of_entries=len(df),
            spreaders=flask.json.loads(df.to_json(**TO_JSON_KWARGS)))
    except SchemaError as e:
        response = dict(status='ERROR', error=str(e))
    except APINoResultError as e:
        response = dict(status='No result error', error=str(e))
    except Exception as e:
        logger.exception(e)
        response = dict(status='ERROR', error='Server error, query failed')
    return flask.jsonify(response)
Example #3
0
def query_top_articles():
    """Handle API request 'top-articles'

    API Request Parameters
    ----------------------
        upper_day : string formatted datetime
        most_recent : bool

    API Response Keys
    -----------------
        status : string
        num_of_entries : int
        articles : dict
            canonical_url : string
            date_captured : string formatted datetime
            number_of_tweets : int
            site_type : {'claim', 'fact_checking'}
            title : string
            upper_day : string formatted datetime
    """
    lucene.getVMEnv().attachCurrentThread()
    yesterday = datetime.utcnow().date() - timedelta(days=1)
    yesterday = yesterday.strftime('%Y-%m-%d')
    q_top_article_schema = Schema({
        Optional('upper_day', default=yesterday):
        And(Regex('^\d{4}-\d{2}-\d{2}$'),
            Use(dateutil.parser.parse),
            error='Invalid date, shoul be yyyy-mm-dd format'),
        Optional('most_recent', default=True):
        And(unicode,
            Use(lambda s: s.lower()), lambda s: s in ('true', 'false'),
            Use(lambda s: True if s == 'true' else False)),
        Optional('exclude_tags', default=[]):
        And(Use(eval), error='Invalid exclude_tags input format'),
    })
    q_kwargs = copy_req_args(request.args)
    try:
        q_kwargs = q_top_article_schema.validate(q_kwargs)
        df = db_query_top_articles(engine, **q_kwargs)
        if len(df) == 0:
            raise APINoResultError('No top article found!')
        response = dict(
            status='OK',
            num_of_entries=len(df),
            articles=flask.json.loads(df.to_json(**TO_JSON_KWARGS)))
    except SchemaError as e:
        response = dict(status='ERROR', error=str(e))
    except APINoResultError as e:
        response = dict(status='No result error', error=str(e))
    except Exception as e:
        logger.exception(e)
        response = dict(status='ERROR', error='Server error, query failed')
    return flask.jsonify(response)
Example #4
0
def query_tweets():
    """Handle API '/tweets'.

    API Request Parameters
    ----------------------
        ids : list of int

    API Response Keys
    -----------------
        status : string
        num_of_entries : int
        tweets : dict
            canonical_url : string
            domain : string
            id : int
            date_published : string formatted datetime
            site_type : {'claim', 'fact_checking'}
            title : string
            tweet_created_at : string formatted datetime
            tweet_id : string
    """
    lucene.getVMEnv().attachCurrentThread()
    q_tweets_schema = Schema({
        'ids':
        And(Use(flask.json.loads, error="Format error of `ids`"),
            lambda s: len(s) > 0,
            error='Empty of `ids`'),
    })
    q_kwargs = copy_req_args(request.args)
    try:
        q_kwargs = q_tweets_schema.validate(q_kwargs)
        df = db_query_tweets(engine, q_kwargs['ids'])
        if len(df) == 0:
            raise APINoResultError('No tweet found!')
        response = dict(
            status='OK',
            num_of_entries=len(df),
            tweets=flask.json.loads(df.to_json(**TO_JSON_KWARGS)))
    except SchemaError as e:
        response = dict(status='ERROR', error=str(e))
    except APINoResultError as e:
        response = dict(status='No result error', error=str(e))
    except Exception as e:
        logger.exception(e)
        response = dict(status='ERROR', error='Server error, query failed')
    return flask.jsonify(response)
Example #5
0
def query_timeline():
    """Handle API '/timeline'.

    API Request Parameters
    ----------------------
        ids : list of int
        resolution : character in 'HDWM'

    API Response Keys
    -----------------
        status : string
        timeline : dict
            claim : dict
                timestamp : list of string formatted datetime
                volume : list of int
            fact_checking : dict
                timestamp : list of string formatted datetime
                volume : list of int
    """
    lucene.getVMEnv().attachCurrentThread()
    q_tweets_schema = Schema({
        'ids':
        And(Use(flask.json.loads, error="Format error of `ids`"),
            lambda s: len(s) > 0,
            error='Empty of `ids`'),
        Optional('resolution', default='D'):
        And(Use(lambda s: s.upper()), lambda s: s in 'HDWM'),
    })

    q_kwargs = copy_req_args(request.args)
    try:
        q_kwargs = q_tweets_schema.validate(q_kwargs)
        rule = '1' + q_kwargs.pop('resolution')
        df = db_query_tweets(engine, q_kwargs['ids'])
        if len(df) == 0:
            raise APINoResultError('No tweet found!')
        df = df.set_index('tweet_created_at')
        df1 = df.loc[df['site_type'] == N_FACT_CHECKING]
        s1 = df1['tweet_id'].drop_duplicates()
        s1 = s1.resample(rule).count()
        df2 = df.loc[df['site_type'] == N_CLAIM]
        s2 = df2['tweet_id'].drop_duplicates()
        s2 = s2.resample(rule).count()
        s1, s2 = s1.align(s2, join='outer', fill_value=0)
        s1 = s1.cumsum()
        s2 = s2.cumsum()
        response = dict(
            status='OK',
            timeline=dict(
                fact_checking=dict(
                    timestamp=s1.index.strftime('%Y-%m-%dT%H:%M:%SZ').tolist(),
                    volume=s1.tolist()),
                claim=dict(
                    timestamp=s2.index.strftime('%Y-%m-%dT%H:%M:%SZ').tolist(),
                    volume=s2.tolist())))
    except SchemaError as e:
        response = dict(status='ERROR', error=str(e))
    except APINoResultError as e:
        response = dict(status='No result error', error=str(e))
    except Exception as e:
        logger.exception(e)
        response = dict(status='ERROR', error='Server error, query failed')
    return flask.jsonify(response)
Example #6
0
def query_latest_articles():
    """Handle API request '/latest-articles'.

    API Request Parameters
    ----------------------
    past_hours : int
        Set the hours from now to past to be defined as latest hours.
    domains : object
        If None, return all articles in the latest hours;
        If str, should be one of {'fact_checking', 'claim', 'fake'}:
            if 'fact_checking', return fact checking articles,
            if 'claim', return claim articles,
            if 'fake', return selected fake articles, which is a subset of
               claim, which is selected by us.
        If array of domain, return articles belonging to these domains.
    domains_file : str
        When `domains` is 'fake', the actual used domains are loaded from
        file `domains_file`. If this file doesn't exist, then `claim` type
        domains would be used.

    API Response Keys
    -----------------
        status : string
        num_of_entries : int
        articles : dict
            keys are:
                canonical_url : string
                date_published : string formatted datetime
                domain : string
                id : int
                site_type : {'claim', 'fact_checking'}
                title : string
    """
    lucene.getVMEnv().attachCurrentThread()
    # Validate input of request
    q_articles_schema = Schema({
        'past_hours':
        And(Use(int), lambda x: x > 0, error='Invalid value of `past_hours`'),
        Optional('domains', default=None):
        Or(lambda s: s in ('fact_checking', 'claim', 'fake'),
           Use(flask.json.loads,
               error='Not valid values nor JSON string of `domains`'))
    })
    q_kwargs = copy_req_args(request.args)
    try:
        q_kwargs = q_articles_schema.validate(q_kwargs)
        domains_file = CONF['api'].get('selected_fake_domains_path')
        df = db_query_latest_articles(
            engine, domains_file=domains_file, **q_kwargs)
        if len(df) == 0:
            raise APINoResultError('No articles found!')
        response = dict(
            status='OK',
            num_of_entries=len(df),
            articles=flask.json.loads(df.to_json(**TO_JSON_KWARGS)))
    except SchemaError as e:
        response = dict(status='Parameter error', error=str(e))
    except APIParseError as e:
        response = dict(status='Invalide query', error=str(e))
    except APINoResultError as e:
        response = dict(status='No result error', error=str(e))
    except Exception as e:
        logger.exception(e)
        response = dict(status='ERROR', error='Server error, query failed!')
    return flask.jsonify(response)
Example #7
0
def query_articles():
    """Handle API request '/articles'.

    API Request Parameters
    ----------------------
        query : string
        sort_by : {'relevant', 'recent'}
        use_lucene_syntax : bool

    API Response Keys
    -----------------
        status : string
        num_of_entries : int
        total_hits : int
        articles : dict
            keys are:
                canonical_url : string
                date_published : string formatted datetime
                domain : string
                id : int
                number_of_tweets : int
                score : float
                site_type : {'claim', 'fact_checking'}
                title : string

    """
    lucene.getVMEnv().attachCurrentThread()
    # Validate input of request
    q_articles_schema = Schema({
        'query':
        lambda s: len(s) > 0,
        Optional('sort_by', default='relevant'):
        And(unicode, lambda s: s in ('relevant', 'recent')),
        Optional('use_lucene_syntax', default=True):
        And(unicode,
            Use(lambda s: s.lower()), lambda s: s in ('true', 'false'),
            Use(lambda s: True if s == 'true' else False)),
    })
    q_kwargs = copy_req_args(request.args)
    try:
        q_kwargs = q_articles_schema.validate(q_kwargs)
        n, df = searcher.search(
            n1=N1,
            n2=N2,
            min_score_of_recent_sorting=MIN_SCORE,
            min_date_published=STRAMING_START_AT,
            **q_kwargs)
        df = db_query_filter_disabled_site(engine, df)
        df = db_query_twitter_shares(engine, df)
        if len(df) == 0:
            raise APINoResultError('No article found!')
        # sort dataframe by 'number_of_tweets'
        df = df.sort_values('number_of_tweets', ascending=False)
        response = dict(
            status='OK',
            num_of_entries=len(df),
            total_hits=n,
            articles=flask.json.loads(df.to_json(**TO_JSON_KWARGS)))
    except SchemaError as e:
        response = dict(status='Parameter error', error=str(e))
    except APIParseError as e:
        response = dict(status='Invalide query', error=str(e))
    except APINoResultError as e:
        response = dict(status='No result error', error=str(e))
    except Exception as e:
        logger.exception(e)
        response = dict(status='ERROR', error='Server error, query failed!')
    return flask.jsonify(response)