def test_search(self, mock_paging):
        mock_paging.return_value = self._search_test_data

        kind = "TestKind"
        expected_url = "https://test-domain.pushshift.io/reddit/{}/search".format(
            kind)
        api = PushshiftAPIMinimal(
            domain="test-domain",
            rate_limit_per_minute=self._rate_limit,
            detect_local_tz=False,
        )

        result_gen = api._search(kind)

        for data_grp in self._search_test_data:
            for test_item in data_grp["data"]:
                actual_item = next(result_gen)

                self.assertIn(kind, str(actual_item))
                self.assertEqual(test_item["created_utc"], actual_item.created)
                self.assertDictEqual(test_item, actual_item.d_)

                for key, val in test_item.items():
                    self.assertEqual(val, getattr(actual_item, key))

        mock_paging.assert_called_once_with(expected_url, {})

        # Make sure everything is complete
        try:
            next(result_gen)
            self.fail("Expected StopIteration")
        except StopIteration:
            pass
    def test_limited(self):
        # Test all of the arguments listed at
        # https://pushshift.io/api-parameters/
        for arg in self._pushshift_args:
            self.assertFalse(PushshiftAPIMinimal._limited({arg: True}))

        # Test the limited arguments
        for arg in PushshiftAPIMinimal._limited_args:
            self.assertTrue(PushshiftAPIMinimal._limited({arg: True}))
    def test_handle_paging_no_limit(self, mock_get):
        expected_last_timestamp = 1530047819
        test_url = "example.com/route"
        mock_get.return_value = {
            "data": [
                {
                    "created_utc": 1530046703,
                    "id": "e1ccvn7",
                    "score": 1
                },
                {
                    "created_utc": 1530047319,
                    "id": "e1ccvn8",
                    "score": 2
                },
                {
                    "created_utc": 1530047619,
                    "id": "e1ccvn9",
                    "score": -3
                },
                {
                    "created_utc": 1530047719,
                    "id": "e1ccvna",
                    "score": 5
                },
                {
                    "created_utc": expected_last_timestamp,
                    "id": "e1ccvnb",
                    "score": 8
                },
            ]
        }

        api = PushshiftAPIMinimal(max_results_per_request=10,
                                  rate_limit_per_minute=self._rate_limit)
        results = api._handle_paging(test_url, {})

        # Run the first call outside of the loop.
        # The call values will vary slightly after the first call.
        self.assertEqual(mock_get.return_value, next(results))
        self.assertEqual(expected_last_timestamp, api._last_timestamp)
        mock_get.assert_called_once()
        mock_get.assert_called_with(test_url, {"limit": 10})

        # This could go on forever. We stop after 15 calls.
        for call_count in range(2, 15):
            self.assertEqual(mock_get.return_value, next(results))
            self.assertEqual(call_count, mock_get.call_count)
            self.assertEqual(expected_last_timestamp, api._last_timestamp)
            mock_get.assert_called_with(test_url, {
                "limit": 10,
                "before": expected_last_timestamp
            })
    def test_search_stop_cond_batch(self, mock_paging):
        mock_paging.return_value = self._search_test_data

        kind = "TestKind"
        expected_url = "https://test-domain.pushshift.io/reddit/{}/search".format(
            kind)
        api = PushshiftAPIMinimal(
            domain="test-domain",
            rate_limit_per_minute=self._rate_limit,
            detect_local_tz=False,
        )

        result_gen = api._search(
            kind,
            stop_condition=lambda x: x.created > 1530049619,
            return_batch=True)

        for data_grp in self._search_test_data:
            # Transform our source data to match what we expect with the stop condition
            expected_batch = list(
                filter(lambda x: x["created_utc"] <= 1530049619,
                       data_grp["data"]))
            actual_batch = next(result_gen)

            self.assertEqual(len(expected_batch), len(actual_batch))

            for idx, test_item in enumerate(expected_batch):
                actual_item = actual_batch[idx]

                self.assertIn(kind, str(actual_item))
                self.assertEqual(test_item["created_utc"], actual_item.created)
                self.assertDictEqual(test_item, actual_item.d_)

                for key, val in test_item.items():
                    self.assertEqual(val, getattr(actual_item, key))

            # Indicates that we hit the stop condition
            if len(expected_batch) < len(data_grp["data"]):
                break

        mock_paging.assert_called_once_with(expected_url, {})

        # Make sure everything is complete
        try:
            next(result_gen)
            self.fail("Expected StopIteration")
        except StopIteration:
            pass
    def test_utc_offset_secs(self):
        api = PushshiftAPIMinimal(detect_local_tz=False,
                                  rate_limit_per_minute=self._rate_limit)
        self.assertEqual(0, api.utc_offset_secs)

        api = PushshiftAPIMinimal(detect_local_tz=True,
                                  rate_limit_per_minute=self._rate_limit)
        for timezone in pytz.common_timezones:
            api._utc_offset_secs = None
            os.environ["TZ"] = timezone
            time.tzset()

            expected_secs = dt.utcnow().astimezone().utcoffset().total_seconds(
            )
            actual_secs = api.utc_offset_secs

            self.assertEqual(expected_secs, actual_secs)
    def test_handle_paging_low_limit(self, mock_get):
        expected_last_timestamp = 1530047819
        test_url = "example.com/route"
        mock_get.return_value = {
            "data": [
                {
                    "created_utc": 1530046703,
                    "id": "e1ccvn7",
                    "score": 1
                },
                {
                    "created_utc": 1530047319,
                    "id": "e1ccvn8",
                    "score": 2
                },
                {
                    "created_utc": 1530047619,
                    "id": "e1ccvn9",
                    "score": -3
                },
                {
                    "created_utc": 1530047719,
                    "id": "e1ccvna",
                    "score": 5
                },
                {
                    "created_utc": expected_last_timestamp,
                    "id": "e1ccvnb",
                    "score": 8
                },
            ]
        }

        api = PushshiftAPIMinimal(max_results_per_request=10,
                                  rate_limit_per_minute=self._rate_limit)
        results = api._handle_paging(test_url, {"limit": 5})

        self.assertEqual(mock_get.return_value, next(results))
        self.assertEqual(expected_last_timestamp, api._last_timestamp)
        mock_get.assert_called_once_with(test_url, {"limit": 5})

        try:
            next(results)
            self.fail("Expected StopIteration")
        except StopIteration:
            pass
    def test_epoch_utc_to_local(self):
        timestamps = [
            1429981843,
            1519981843,
            1528981843,
            1529781843,
            1529881843,
            1529931843,
            1529981843,
        ]

        for timestamp in timestamps:
            api = PushshiftAPIMinimal(detect_local_tz=False,
                                      rate_limit_per_minute=self._rate_limit)
            self.assertEqual(timestamp, api._epoch_utc_to_local(timestamp))

            api = PushshiftAPIMinimal(detect_local_tz=True,
                                      rate_limit_per_minute=self._rate_limit)
            for timezone in pytz.common_timezones:
                api._utc_offset_secs = None
                os.environ["TZ"] = timezone
                time.tzset()

                expected_secs = (
                    timestamp -
                    dt.utcnow().astimezone().utcoffset().total_seconds())
                actual_secs = api._epoch_utc_to_local(timestamp)

                self.assertEqual(expected_secs, actual_secs)
    def test_get(self, mock_get, mock_rate_limit):
        max_retries = 7
        expected_result = "test_text"
        test_url = "example.com/route"

        api = PushshiftAPIMinimal(max_retries=max_retries,
                                  rate_limit_per_minute=self._rate_limit)

        mock_get.return_value = MockResponse(status_code=200,
                                             text=json.dumps(expected_result))

        self.assertEqual(expected_result, api._get(test_url))

        # Ensure the correct count of retries triggered
        mock_get.assert_called_with(test_url, params={})
        self.assertEqual(1, mock_get.call_count)

        # Ensure the rate limit was applied
        self.assertEqual(1, mock_rate_limit.call_count)
        mock_rate_limit.assert_has_calls([mock.call(0)])

        mock_get.return_value.raise_for_status.assert_called_once()
    def test_wrap_thing(self):
        test_data = {
            "created_utc": dt.utcnow().timestamp(),
            "some": 12,
            "arbitrary": True,
            "Set": "of random",
            "keys": "to",
            "test": 15.0,
        }

        kind = "TestKind"

        api = PushshiftAPIMinimal(detect_local_tz=False,
                                  rate_limit_per_minute=self._rate_limit)
        wrapped = api._wrap_thing(test_data, kind)

        self.assertIn(kind, str(wrapped))
        self.assertEqual(test_data["created_utc"], wrapped.created)

        self.assertDictEqual(test_data, wrapped.d_)

        for key, val in test_data.items():
            self.assertEqual(val, getattr(wrapped, key))
    def test_get_raise_for_status(self, mock_get, mock_rate_limit):
        max_retries = 7
        expected_result = "test_text"
        test_url = "example.com/route"

        api = PushshiftAPIMinimal(max_retries=max_retries,
                                  rate_limit_per_minute=self._rate_limit)

        # Test a subset of codes that should cause an outright failure
        for idx, status_code in enumerate(
            [400, 401, 403, 404, 405, 500, 502, 503, 504]):
            mock_get.return_value = MockResponse(
                status_code=status_code, text=json.dumps(expected_result))

            try:
                api._get(test_url)
                self.fail("call failed to trigger expected exception")
            except HTTPError as exc:
                self.assertIn(
                    "{} {} Error".format(
                        status_code,
                        "Server" if status_code >= 500 else "Client"),
                    str(exc),
                )

            expected_calls = max_retries * (idx + 1)

            # Ensure the correct count of retries triggered
            mock_get.assert_called_with(test_url, params={})
            self.assertEqual(expected_calls, mock_get.call_count)

            # Ensure the rate limit was applied
            self.assertEqual(expected_calls, mock_rate_limit.call_count)
            mock_rate_limit.assert_has_calls(
                [mock.call(idx) for idx in range(0, max_retries)])

            mock_get.return_value.raise_for_status.assert_called_once()
    def test_get_429(self, mock_get, mock_rate_limit):
        max_retries = 7
        expected_result = "test_text"
        test_url = "example.com/route"

        api = PushshiftAPIMinimal(max_retries=max_retries,
                                  rate_limit_per_minute=self._rate_limit)

        mock_get.return_value = MockResponse(status_code=429,
                                             text=json.dumps(expected_result))

        self.assertEqual(expected_result, api._get(test_url))

        # Ensure the correct count of retries triggered
        mock_get.assert_called_with(test_url, params={})
        self.assertEqual(max_retries, mock_get.call_count)

        # Ensure the rate limit was applied
        self.assertEqual(max_retries, mock_rate_limit.call_count)
        mock_rate_limit.assert_has_calls(
            [mock.call(idx) for idx in range(0, max_retries)])

        # This is the key difference with code 429
        mock_get.return_value.raise_for_status.assert_not_called()
    def test_apply_timestamp(self):
        api = PushshiftAPIMinimal(rate_limit_per_minute=self._rate_limit)

        api._last_timestamp = None
        self.assertDictEqual({"rand_field": "rand_val"},
                             api._apply_timestamp({"rand_field": "rand_val"}))

        api._last_timestamp = 12307501
        self.assertDictEqual(
            {
                "rand_field": "rand_val",
                "before": 12307501
            },
            api._apply_timestamp({"rand_field": "rand_val"}),
        )

        self.assertDictEqual(
            {
                "rand_field": "rand_val",
                "sort": "desc",
                "before": 12307501
            },
            api._apply_timestamp({
                "rand_field": "rand_val",
                "sort": "desc"
            }),
        )

        self.assertDictEqual(
            {
                "rand_field": "rand_val",
                "sort": "asc",
                "after": 12307501
            },
            api._apply_timestamp({
                "rand_field": "rand_val",
                "sort": "asc"
            }),
        )
 def test_init(self):
     api = PushshiftAPIMinimal(**self._base_init_kwargs)
     self._test_base_init(api)
 def test_base_url(self):
     api = PushshiftAPIMinimal(domain="test-domain",
                               rate_limit_per_minute=self._rate_limit)
     self.assertEqual("https://test-domain.pushshift.io/{endpoint}",
                      api.base_url)
    def test_impose_rate_limit(self, mock_sleep):
        mock_rlcache = mock.NonCallableMock(blocked=False, interval=13)

        max_sleep = 69
        backoff = 11
        api = PushshiftAPIMinimal(max_sleep=max_sleep,
                                  backoff=backoff,
                                  rate_limit_per_minute=self._rate_limit)
        api._rlcache = mock_rlcache

        api._impose_rate_limit()
        mock_sleep.assert_not_called()

        mock_rlcache.blocked = True

        api._impose_rate_limit()
        mock_sleep.assert_called_with(13)

        mock_rlcache.interval = 87

        api._impose_rate_limit()
        mock_sleep.assert_called_with(max_sleep)

        mock_rlcache.interval = 0

        api._impose_rate_limit(6)
        mock_sleep.assert_called_with(6 * backoff)
    def test_handle_paging_high_limit(self, mock_get):
        test_url = "example.com/route"
        test_data = [
            {
                "data": [
                    {
                        "created_utc": 1530046703,
                        "id": "e1ccvn7",
                        "score": 1
                    },
                    {
                        "created_utc": 1530047319,
                        "id": "e1ccvn8",
                        "score": 2
                    },
                    {
                        "created_utc": 1530047619,
                        "id": "e1ccvn9",
                        "score": -3
                    },
                    {
                        "created_utc": 1530047719,
                        "id": "e1ccvna",
                        "score": 5
                    },
                    {
                        "created_utc": 1530047819,
                        "id": "e1ccvnb",
                        "score": 8
                    },
                ]
            },
            {
                "data": [
                    {
                        "created_utc": 1530048703,
                        "id": "e1cdvn7",
                        "score": -1
                    },
                    {
                        "created_utc": 1530049319,
                        "id": "e1cdvn8",
                        "score": -2
                    },
                    {
                        "created_utc": 1530049619,
                        "id": "e1cdvn9",
                        "score": 3
                    },
                    {
                        "created_utc": 1530049719,
                        "id": "e1cdvna",
                        "score": -5
                    },
                    {
                        "created_utc": 1530049819,
                        "id": "e1cdvnb",
                        "score": -8
                    },
                ]
            },
            {
                "data": [
                    {
                        "created_utc": 1530148703,
                        "id": "e1cdvn7",
                        "score": -1
                    },
                    {
                        "created_utc": 1530149319,
                        "id": "e1cdvn8",
                        "score": -2
                    },
                    {
                        "created_utc": 1530149619,
                        "id": "e1cdvn9",
                        "score": 3
                    },
                    {
                        "created_utc": 1530149719,
                        "id": "e1cdvna",
                        "score": -5
                    },
                    {
                        "created_utc": 1530149819,
                        "id": "e1cdvnb",
                        "score": -8
                    },
                ]
            },
        ]
        mock_get.side_effect = test_data

        api = PushshiftAPIMinimal(max_results_per_request=10,
                                  rate_limit_per_minute=self._rate_limit)
        results = api._handle_paging(test_url, {"limit": 25})

        self.assertEqual(test_data[0], next(results))
        self.assertEqual(1530047819, api._last_timestamp)
        self.assertEqual(1, mock_get.call_count)
        mock_get.assert_called_with(test_url, {"limit": 10})

        self.assertEqual(test_data[1], next(results))
        self.assertEqual(1530049819, api._last_timestamp)
        self.assertEqual(2, mock_get.call_count)
        mock_get.assert_called_with(test_url, {
            "limit": 10,
            "before": 1530047819
        })

        self.assertEqual(test_data[2], next(results))
        self.assertEqual(1530149819, api._last_timestamp)
        self.assertEqual(3, mock_get.call_count)
        mock_get.assert_called_with(test_url, {
            "limit": 5,
            "before": 1530049819
        })

        try:
            next(results)
            self.fail("Expected StopIteration")
        except StopIteration:
            pass
    def test_raise_for_unpageable(self):
        max_results_per_request = 10
        valid_payloads = [
            {},
            {
                "sort_type": "created_utc"
            },
            {
                "sort_type": "created_utc",
                "sort": "desc"
            },
            {
                "sort_type": "created_utc",
                "sort": "asc"
            },
            {
                "sort_type": "score",
                "sort": "desc",
                "limit": 2
            },
            {
                "sort_type": "num_comments",
                "sort": "asc",
                "limit": 5
            },
            {
                "sort_type": "whatever",
                "sort": "desc",
                "limit": 8
            },
            {
                "sort_type": "seriously_whatever",
                "sort": "desc",
                "limit": 10
            },
        ]

        invalid_payloads = [
            {
                "sort_type": "score"
            },
            {
                "sort_type": "num_comments",
                "sort": "desc"
            },
            {
                "sort_type": "whatever",
                "sort": "asc"
            },
            {
                "sort_type": "score",
                "sort": "desc",
                "limit": 11
            },
            {
                "sort_type": "num_comments",
                "sort": "asc",
                "limit": 15
            },
            {
                "sort_type": "whatever",
                "sort": "desc",
                "limit": 18
            },
            {
                "sort_type": "seriously_whatever",
                "sort": "desc",
                "limit": 110
            },
        ]

        api = PushshiftAPIMinimal(
            max_results_per_request=max_results_per_request,
            rate_limit_per_minute=self._rate_limit,
        )

        for payload in valid_payloads:
            # Everything should page fine
            api._raise_for_unpageable(payload)

        for payload in invalid_payloads:
            try:
                api._raise_for_unpageable(payload)
                self.fail("Expected exception failed to trigger")
            except NotImplementedError as exc:
                msg = str(exc)
                # General error
                self.assertIn(PushshiftAPIMinimal._page_error_msg, msg)

                # Error specifics
                if "limit" in payload:
                    self.assertIn(
                        "queries require limit <= max_results_per_request",
                        msg)
                else:
                    self.assertIn("must provide a limit", msg)
 def test_init_none_rate_limit(self, mock_get):
     mock_get.return_value = {"server_ratelimit_per_minute": 420}
     api = PushshiftAPIMinimal(rate_limit_per_minute=None)
     self.assertEqual(420, api._rlcache.max_storage)
    def test_add_nec_args(self):
        max_results_per_request = 127
        api = PushshiftAPIMinimal(
            max_results_per_request=max_results_per_request,
            rate_limit_per_minute=self._rate_limit,
        )

        expected_payload = {
            key: True
            for key in PushshiftAPIMinimal._limited_args
        }

        # Ensure limited calls aren't altered
        self.assertDictEqual(
            expected_payload,
            api._add_nec_args(
                {key: True
                 for key in PushshiftAPIMinimal._limited_args}),
        )

        # Ensure limit is added as expected
        self.assertDictEqual(
            {
                "arbitrary": "value",
                "limit": max_results_per_request
            },
            api._add_nec_args({"arbitrary": "value"}),
        )

        # Ensure created_utc is appended to filter
        self.assertDictEqual(
            {
                "more_arbitrary": "more_value",
                "limit": max_results_per_request,
                "filter": ["created_utc"],
            },
            api._add_nec_args({
                "more_arbitrary": "more_value",
                "filter": []
            }),
        )

        # Ensure string filter turned to list
        self.assertDictEqual(
            {
                "more_arbitrary": "more_value",
                "limit": max_results_per_request,
                "filter": ["some_string", "created_utc"],
            },
            api._add_nec_args({
                "more_arbitrary": "more_value",
                "filter": "some_string"
            }),
        )

        # Ensure iterable-but-not-list filter turned to list
        self.assertDictEqual(
            {
                "more_arbitrary": "more_value",
                "limit": max_results_per_request,
                "filter": [0, 1, 2, "created_utc"],
            },
            api._add_nec_args({
                "more_arbitrary": "more_value",
                "filter": set(x for x in range(0, 3))
            }),
        )

        # Ensure "created_utc" string filter turned to list
        self.assertDictEqual(
            {
                "more_arbitrary": "more_value",
                "limit": max_results_per_request,
                "filter": ["created_utc"],
            },
            api._add_nec_args({
                "more_arbitrary": "more_value",
                "filter": "created_utc"
            }),
        )