def test_dataset_can_be_searched(self): qs = CrunchbaseQueryset(dataset_uri=self.dataset_uri) # We're gonna try to search the first item, and we expect to get a list with that item item = self.sample_list_data['items'][0] results = qs.search(item['name']) self.assertGreaterEqual(len(results), 1) self.assertIn(item['name'], [x['name'] for x in results])
def test_dataset_items_search_detail_for_extra_information(self): qs = CrunchbaseQueryset(dataset_uri=self.dataset_uri) item = qs[0] self.assertTrue(item['properties__short_description']) # We're gonna start by matching exactly the requirements we used for the original list() implementation detail = CrunchbaseEndpoint( CrunchbaseQuery.ENDPOINTS['companies']).detail(item['path']) self.assertEqual(detail['data']['properties']['short_description'], item['properties__short_description'])
def test_items_from_following_pages_are_fetched_correctly(self): qs = CrunchbaseQueryset(dataset=self.sample_list_json, dataset_uri=self.dataset_uri) with mock.patch('crunchbase.views.requests', autospec=True) as req: resp = mock.Mock() resp.json.return_value = self.sample_list_json req.get.return_value = resp item = qs[0] # Now, we're going to try to fetch an item with an index greater than the available items, so new_page_json = self.sample_list_json.copy() new_page_json['data']['paging']['current_page'] = 2 resp.json.return_value = new_page_json item = qs[1001]
def test_data_is_fetched_from_cb_on_evaluate(self): # we're going with a lazy implementation - only when length or items are requested we're going to get stuff with mock.patch('crunchbase.views.requests', autospec=True) as req: # To avoid picklingerrors, we're going to mock the cache too with mock.patch('crunchbase.views.cache', cache=mock.Mock()) as c: c.get = mock.Mock(return_value=None) resp = mock.Mock() resp.json.return_value = self.sample_list_json req.get.return_value = resp qs = CrunchbaseQueryset(dataset_uri=self.dataset_uri) self.assertEqual(req.get.call_count, 0) len(qs) self.assertEqual(req.get.call_count, 1) item = qs[0] self.assertEqual(req.get.call_count, 1)
def test_dataset_contains_paging_and_metadata_as_properties(self): qs = CrunchbaseQueryset(dataset_uri=self.dataset_uri) with mock.patch('crunchbase.views.requests', autospec=True) as req: with mock.patch('crunchbase.views.cache', cache=mock.Mock()) as c: c.get = mock.Mock(return_value=None) c.set = mock.Mock(side_effect=lambda *args, **kwargs: cache. set(*args, **kwargs)) req.get.return_value = self.page1 self.assertEqual(req.get.call_count, 0) self.assertDictEqual(qs.paging, self.page1.json()['data']['paging']) self.assertEqual(req.get.call_count, 1) self.assertDictEqual(qs.metadata, self.page1.json()['metadata']) self.assertEqual(req.get.call_count, 1) # We really expect the cache to work here
def setUpClass(cls): super(CBQuerysetTest, cls).setUpClass() cls.cbqs = CrunchbaseQueryset(cls.sample_list_json) cls.dataset_uri = CrunchbaseEndpoint.BASE_URI + 'organizations' cls.page1 = cache.get('test_page1') if cls.page1 is None: cls.page1 = requests.get( cls.dataset_uri, params={'user_key': settings.CRUNCHBASE_USER_KEY}) cache.set('test_page1', cls.page1, timeout=None) cls.page2 = cache.get('test_page2') if cls.page2 is None: cls.page2 = requests.get(cls.dataset_uri, params={ 'user_key': settings.CRUNCHBASE_USER_KEY, 'page': 2 }) cache.set('test_page2', cls.page2, timeout=None)
def test_data_is_fetched_when_not_present_in_current_page(self): qs = CrunchbaseQueryset(dataset=self.sample_list_json, dataset_uri=self.dataset_uri) with mock.patch('crunchbase.views.requests', autospec=True) as req: with mock.patch('crunchbase.views.cache', cache=mock.Mock()) as c: c.get = mock.Mock(return_value=None) self.assertEqual(req.get.call_count, 0) len(qs) self.assertEqual( req.get.call_count, 0) # The dataset is already present so no need to call item = qs[0] self.assertEqual(req.get.call_count, 0) # As above # Now, we're going to try to fetch an item with an index greater than the available items, so item = qs[1001] req.get.assert_called_once_with( self.dataset_uri, params={ 'user_key': settings.CRUNCHBASE_USER_KEY, 'page': 2 })
def test_dataset_can_be_sliced(self): def pick_page(*args, **kwargs): if 'page' in kwargs: if kwargs['page'] == 2: return self.page2 return requests.get(self.dataset_uri, params={ 'user_key': settings.CRUNCHBASE_USER_KEY, 'page': kwargs['page'] }) return self.page1 qs = CrunchbaseQueryset(dataset_uri=self.dataset_uri) with mock.patch('crunchbase.views.requests', autospec=True) as req: req.get = mock.Mock(side_effect=pick_page) self.assertEqual(len(qs[:50]), 50) self.assertEqual(len(qs[100:200]), 100) self.assertEqual(len(qs[1050:1100]), 50) # second page # I have decided that slicing across pages should not be permitted - I think I have a working solution but # the test output doesn't smell good to me, so I'll just go with an exception instead self.assertRaises(IndexError, lambda: len(qs[:2500]))
def test_dataset_is_cached(self): qs = CrunchbaseQueryset(dataset_uri=self.dataset_uri) with mock.patch('crunchbase.views.requests', autospec=True) as req: with mock.patch('crunchbase.views.cache', cache=mock.Mock()) as c: c.get = mock.Mock(return_value=None) c.set = mock.Mock(side_effect=lambda *args, **kwargs: cache. set(*args, **kwargs)) req.get.return_value = self.page1 item = qs[0] self.assertEqual(req.get.call_count, 1) # When retrieving from the second page, we have to make another GET req.get.return_value = self.page2 item = qs[1002] self.assertEqual(req.get.call_count, 2) # Now, if we go back to page 1, we should not have to request again - it should be cached (we didn't mock set()) req.get.return_value = self.page1 # This should not be called anyway item = qs[1] self.assertEqual(req.get.call_count, 2) # Going back to page 2... item = qs[1005] self.assertEqual(req.get.call_count, 2)