def get_post_fetcher(topic_seed_query: dict) -> Optional[AbstractPostFetcher]: """get the fetch_posts function for the given topic_seed_query, or None.`""" source = topic_seed_query['source'] platform = topic_seed_query['platform'] if source == 'brandwatch' and platform == 'twitter': fetch = BrandwatchTwitterPostFetcher() elif source == 'postgres' and platform == 'generic_post': fetch = PostgresPostFetcher() elif source == 'csv' and platform == 'generic_post': fetch = CSVStaticPostFetcher() elif source == 'pushshift' and platform == 'reddit': fetch = PushshiftRedditPostFetcher() else: fetch = None return fetch
def test_pushshift_query_builder() -> None: """Test the internal Pushshift submission search query builder method""" QUERY = "trump" QUERY_SIZE = 100 RANDOMIZE = True START_DATE = datetime.datetime(2019, 1, 1, 0, 0) END_DATE = datetime.datetime(2019, 7, 1, 0, 0) es_query = prpf._pushshift_query_builder(query=QUERY, size=QUERY_SIZE, randomize=RANDOMIZE, start_date=START_DATE, end_date=END_DATE) # Check that size parameter is present and matches requested size assert 'size' in es_query assert es_query['size'] == 100 # Check that query object has an integer random seed assert isinstance( es_query['query']['function_score']['random_score']['seed'], int) # Check that date ranges are correct for obj in es_query['query']['function_score']['query']['bool']['must']: if 'range' in obj and 'gte' in obj['range']['created_utc']: assert obj['range']['created_utc']['gte'] == START_DATE.timestamp() elif 'range' in obj and 'lt' in obj['range']['created_utc']: assert obj['range']['created_utc']['lt'] == END_DATE.timestamp() # Check that both title and selftext fields are included in the search for obj in es_query['query']['function_score']['query']['bool']['must']: if 'simple_query_string' in obj: for key in ['selftext', 'title']: assert key in obj['simple_query_string']['fields'] # Check that the default boolean operator is AND assert obj['simple_query_string']['default_operator'] == 'and' # Assert query is correct for requested search terms assert obj['simple_query_string']['query'] == QUERY
def test_epoch_conversion(): """Test epoch conversion to UTC datetime.""" iso_8601_date = prpf._convert_epoch_to_iso8601(1540000000) print(iso_8601_date) assert iso_8601_date == "2018-10-20 01:46:40"