def test_parse_base_vessel(self, selector): # GIVEN expected_vessel = Vessel({ 'build_year': 2017, 'call_sign': 'SVCO6', 'dead_weight': 158871, 'flag_name': 'Greece', 'gross_tonnage': 81349, 'imo': '9745263', 'mmsi': '241492000', 'name': 'AEGEAN FIGHTER', 'status': 'Launched', # TODO not required in VesselRegistry model, # clarify with analysts on criticality # 'status_date': '2016-12-31T00:00:00', 'type': 'Crude Oil Tanker', 'reported_date': '2017-03-28T00:00:00', }) # WHEN vessel = parser._parse_base_vessel(selector) # THEN self.assertEqual(strip_meta_fields(vessel), strip_meta_fields(expected_vessel))
def test_retrieve_positions(self): response = FakeXmlResponse( os.path.join(FIXTURE_PATH, 'position-and-eta.xml')) # result = etree.fromstring(response.body) spider = MarineTrafficSpider(fleet_name='MT_API', msgtype='extended') responses = [{ 'aisType': 'T-AIS', 'master_imo': '7357452', 'master_mmsi': '205194000', 'master_callsign': 'ONAE', 'master_name': 'METHANIA', # 'master_shipType': 'tankships', 'nextDestination_aisType': 'T-AIS', 'nextDestination_destination': 'MARSEILLE', 'nextDestination_eta': '2015-12-16T10:00:00', 'position_aisType': 'T-AIS', 'position_course': '245', 'position_draught': '9.5', 'position_lat': '43.332330', 'position_lon': '5.342667', 'position_navState': None, 'position_speed': 0.1, 'position_timeReceived': '2015-06-12T10:46:46', 'provider_id': 'MT_API', }] idx = None for idx, item in enumerate(spider.parse(response)): relevant_fields = list(strip_meta_fields(item).keys()) for key in relevant_fields: self.assertEqual(item.get(key), responses[idx].get(key)) # Ensure the item and our test dict have the same number of keys. self.assertEqual(len(relevant_fields), len(responses[idx])) self.assertIsNotNone(idx) # Ensures the loop iterated at least once
def test_response_with_voyage_only(self): response = FakeSoapResponse('eta-only.xml') spider = VesselTrackerSpider(username='******', password='******') responses = [{ 'aisType': 'VT', 'master_imo': '7400704', 'master_mmsi': '605106030', 'master_callsign': '7TJC', 'master_name': 'MOURAD DIDOUCHE', 'master_dimA': '224', 'master_dimB': '50', 'master_dimC': '11', 'master_dimD': '31', 'master_shipType': 'tankships', 'nextDestination_aisType': 'VT', 'nextDestination_destination': 'ARZEW', 'nextDestination_eta': '2015-06-10T08:00:00+02:00', 'nextDestination_timeUpdated': '2015-06-10T03:41:32.047+02:00', 'position_draught': '10.8', 'provider_id': 'VT', }] for idx, item in enumerate(spider._parse_response(response)): item_bones = strip_meta_fields(item) for key in item_bones.keys(): self.assertEqual(item_bones.get(key), responses[idx][key]) # Ensure the item and our test dict have the same number of keys. self.assertEqual(len(item_bones), len(responses[idx]))
def test_response_with_position_only(self): response = FakeSoapResponse('position-only.xml') spider = VesselTrackerSpider(username='******', password='******') responses = [{ 'aisType': 'SAT', 'master_imo': '7400704', 'master_mmsi': '605106030', 'master_callsign': '7TJC', 'master_name': 'MOURAD DIDOUCHE', 'master_dimA': '224', 'master_dimB': '50', 'master_dimC': '11', 'master_dimD': '31', 'master_shipType': 'tankships', 'position_aisType': 'SAT', 'position_course': '206.0', 'position_heading': None, 'position_lat': '36.26815', 'position_lon': '0.03751666666666666', 'position_navState': None, 'position_speed': None, 'position_timeReceived': '2015-06-10T03:54:40.778+02:00', 'provider_id': 'VT', }] for idx, item in enumerate(spider._parse_response(response)): item_bones = strip_meta_fields(item) for key in item_bones.keys(): self.assertEqual(item_bones.get(key), responses[idx][key]) # Ensure the item and our test dict have the same number of keys. self.assertEqual(len(item_bones), len(responses[idx]))
def test_ship_finder_valid_response_with_vessel_twice(self): resp = response_factory( os.path.join(FIXTURE_PATH, 'response_twice_mmsi_477744900.json'), klass=FakeTextResponse, meta={'vessels': self._vessel_dict}, ) spider = ShipFinderSpider() with patch(VESSELS_MODULE, ForcedCollection(self._list_of_two_vessels)): for item in spider.parse(resp): for key in strip_meta_fields(item): self.assertEqual(item[key], self._item[key])
def test_response_data_when_missing_key(self, response): DateTimeWithChosenNow.chosen_now = datetime(2015, 6, 12, 11, 0, 0) spider = VesselFinderApi(apikey='key') idx = None for idx, item in enumerate(spider.retrieve_positions(response)): pass # To force consumption of the generator. self.assertEqual(idx, 0) expected_response = self.responses[0] expected_response.pop('master_mmsi') self.assertListEqual(list(strip_meta_fields(item).keys()), list(expected_response.keys()))
def process_item(self, item, spider): # NOTE `CsvItemExporter` expects bytes, not unicodes # NOTE if custom data exporting is enabled, don't export yielded items automatically if not spider.settings.get('KP_DRIVE_CUSTOM_EXPORT'): self.stats.inc_value(self._namespace('items_stored')) self.exporter.export_item( item if self.include_meta else strip_meta_fields(item)) # running jobs on scrapinghub will still store them # in their database. The point of this pipeline is # to allow analysts an easy interface to view and edit # data if needed return item
def test_parse_base_vessel_no_mmsi(self, selector): # GIVEN expected_vessel = Vessel({ 'build_year': 1983, 'call_sign': 'LAMW4', 'dead_weight': 56174, 'flag_name': 'Norway NIS', 'gross_tonnage': 50699, 'imo': '8016809', 'name': 'BERGE FROST', 'status': 'Broken Up', # TODO not required in VesselRegistry model, # clarify with analysts on criticality # 'status_date': '2011-03-04T00:00:00', 'type': 'LPG Tanker', 'reported_date': '2016-11-04T00:00:00', }) # WHEN vessel = parser._parse_base_vessel(selector) # THEN self.assertEqual(strip_meta_fields(vessel), strip_meta_fields(expected_vessel))
def test_retrieve_sattelite_position(self): response = FakeXmlResponse( os.path.join(FIXTURE_PATH, 'position-v5-sattelite.xml')) # result = etree.fromstring(response.body) spider = MarineTrafficSpider(fleet_name='MT_API', msgtype='simple') responses = [{ 'aisType': 'S-AIS', 'master_mmsi': '205194000', # 'master_shipType': 'tankships', 'position_aisType': 'S-AIS', 'position_course': '11', 'position_heading': 329, 'position_lat': '43.332330', 'position_lon': '5.342667', 'position_navState': '5', 'position_speed': 0.0, 'position_timeReceived': '2015-09-17T06:49:55', 'provider_id': 'MT_API', }] idx = None for idx, item in enumerate(spider.parse(response)): relevant_fields = list(strip_meta_fields(item).keys()) for key in relevant_fields: self.assertEqual(item.get(key), responses[idx].get(key)) # Ensure the item and our test dict have the same number of keys. ikeys = set(item.keys()) dkeys = set(responses[0]) self.assertEqual( len(relevant_fields), len(responses[idx]), 'Missing keys: {}, Extraneous keys: {}'.format( ', '.join(list(dkeys - ikeys)), ', '.join(list(ikeys - dkeys))), ) self.assertIsNotNone(idx) # Ensures the loop iterated at least once
def _check_ais_msg(self, spider, response): idx = None for idx, item in enumerate(spider.retrieve_positions(response)): for k, v in strip_meta_fields(item).items(): self.assertEqual(v, self.responses[idx][k]) self.assertIsNotNone(idx) # Ensures the loop iterated at least once