Beispiel #1
0
def query_tweets():
    """Handle API '/tweets'.

    API Request Parameters
    ----------------------
        ids : list of int

    API Response Keys
    -----------------
        status : string
        num_of_entries : int
        tweets : dict
            canonical_url : string
            domain : string
            id : int
            date_published : string formatted datetime
            site_type : {'claim', 'fact_checking'}
            title : string
            tweet_created_at : string formatted datetime
            tweet_id : string
    """
    lucene.getVMEnv().attachCurrentThread()
    q_tweets_schema = Schema({
        'ids':
        And(Use(flask.json.loads, error="Format error of `ids`"),
            lambda s: len(s) > 0,
            error='Empty of `ids`'),
    })
    q_kwargs = copy_req_args(request.args)
    try:
        q_kwargs = q_tweets_schema.validate(q_kwargs)
        df = db_query_tweets(engine, q_kwargs['ids'])
        if len(df) == 0:
            raise APINoResultError('No tweet found!')
        response = dict(
            status='OK',
            num_of_entries=len(df),
            tweets=flask.json.loads(df.to_json(**TO_JSON_KWARGS)))
    except SchemaError as e:
        response = dict(status='ERROR', error=str(e))
    except APINoResultError as e:
        response = dict(status='No result error', error=str(e))
    except Exception as e:
        logger.exception(e)
        response = dict(status='ERROR', error='Server error, query failed')
    return flask.jsonify(response)
Beispiel #2
0
def query_timeline():
    """Handle API '/timeline'.

    API Request Parameters
    ----------------------
        ids : list of int
        resolution : character in 'HDWM'

    API Response Keys
    -----------------
        status : string
        timeline : dict
            claim : dict
                timestamp : list of string formatted datetime
                volume : list of int
            fact_checking : dict
                timestamp : list of string formatted datetime
                volume : list of int
    """
    lucene.getVMEnv().attachCurrentThread()
    q_tweets_schema = Schema({
        'ids':
        And(Use(flask.json.loads, error="Format error of `ids`"),
            lambda s: len(s) > 0,
            error='Empty of `ids`'),
        Optional('resolution', default='D'):
        And(Use(lambda s: s.upper()), lambda s: s in 'HDWM'),
    })

    q_kwargs = copy_req_args(request.args)
    try:
        q_kwargs = q_tweets_schema.validate(q_kwargs)
        rule = '1' + q_kwargs.pop('resolution')
        df = db_query_tweets(engine, q_kwargs['ids'])
        if len(df) == 0:
            raise APINoResultError('No tweet found!')
        df = df.set_index('tweet_created_at')
        df1 = df.loc[df['site_type'] == N_FACT_CHECKING]
        s1 = df1['tweet_id'].drop_duplicates()
        s1 = s1.resample(rule).count()
        df2 = df.loc[df['site_type'] == N_CLAIM]
        s2 = df2['tweet_id'].drop_duplicates()
        s2 = s2.resample(rule).count()
        s1, s2 = s1.align(s2, join='outer', fill_value=0)
        s1 = s1.cumsum()
        s2 = s2.cumsum()
        response = dict(
            status='OK',
            timeline=dict(
                fact_checking=dict(
                    timestamp=s1.index.strftime('%Y-%m-%dT%H:%M:%SZ').tolist(),
                    volume=s1.tolist()),
                claim=dict(
                    timestamp=s2.index.strftime('%Y-%m-%dT%H:%M:%SZ').tolist(),
                    volume=s2.tolist())))
    except SchemaError as e:
        response = dict(status='ERROR', error=str(e))
    except APINoResultError as e:
        response = dict(status='No result error', error=str(e))
    except Exception as e:
        logger.exception(e)
        response = dict(status='ERROR', error='Server error, query failed')
    return flask.jsonify(response)