def query_tweets(): """Handle API '/tweets'. API Request Parameters ---------------------- ids : list of int API Response Keys ----------------- status : string num_of_entries : int tweets : dict canonical_url : string domain : string id : int date_published : string formatted datetime site_type : {'claim', 'fact_checking'} title : string tweet_created_at : string formatted datetime tweet_id : string """ lucene.getVMEnv().attachCurrentThread() q_tweets_schema = Schema({ 'ids': And(Use(flask.json.loads, error="Format error of `ids`"), lambda s: len(s) > 0, error='Empty of `ids`'), }) q_kwargs = copy_req_args(request.args) try: q_kwargs = q_tweets_schema.validate(q_kwargs) df = db_query_tweets(engine, q_kwargs['ids']) if len(df) == 0: raise APINoResultError('No tweet found!') response = dict( status='OK', num_of_entries=len(df), tweets=flask.json.loads(df.to_json(**TO_JSON_KWARGS))) except SchemaError as e: response = dict(status='ERROR', error=str(e)) except APINoResultError as e: response = dict(status='No result error', error=str(e)) except Exception as e: logger.exception(e) response = dict(status='ERROR', error='Server error, query failed') return flask.jsonify(response)
def query_timeline(): """Handle API '/timeline'. API Request Parameters ---------------------- ids : list of int resolution : character in 'HDWM' API Response Keys ----------------- status : string timeline : dict claim : dict timestamp : list of string formatted datetime volume : list of int fact_checking : dict timestamp : list of string formatted datetime volume : list of int """ lucene.getVMEnv().attachCurrentThread() q_tweets_schema = Schema({ 'ids': And(Use(flask.json.loads, error="Format error of `ids`"), lambda s: len(s) > 0, error='Empty of `ids`'), Optional('resolution', default='D'): And(Use(lambda s: s.upper()), lambda s: s in 'HDWM'), }) q_kwargs = copy_req_args(request.args) try: q_kwargs = q_tweets_schema.validate(q_kwargs) rule = '1' + q_kwargs.pop('resolution') df = db_query_tweets(engine, q_kwargs['ids']) if len(df) == 0: raise APINoResultError('No tweet found!') df = df.set_index('tweet_created_at') df1 = df.loc[df['site_type'] == N_FACT_CHECKING] s1 = df1['tweet_id'].drop_duplicates() s1 = s1.resample(rule).count() df2 = df.loc[df['site_type'] == N_CLAIM] s2 = df2['tweet_id'].drop_duplicates() s2 = s2.resample(rule).count() s1, s2 = s1.align(s2, join='outer', fill_value=0) s1 = s1.cumsum() s2 = s2.cumsum() response = dict( status='OK', timeline=dict( fact_checking=dict( timestamp=s1.index.strftime('%Y-%m-%dT%H:%M:%SZ').tolist(), volume=s1.tolist()), claim=dict( timestamp=s2.index.strftime('%Y-%m-%dT%H:%M:%SZ').tolist(), volume=s2.tolist()))) except SchemaError as e: response = dict(status='ERROR', error=str(e)) except APINoResultError as e: response = dict(status='No result error', error=str(e)) except Exception as e: logger.exception(e) response = dict(status='ERROR', error='Server error, query failed') return flask.jsonify(response)