def fetch_wikicar_text_manually(self, language: str, title: str): splitted_title = title.split() reformatted_title = None for word in splitted_title: if reformatted_title is None: reformatted_title = word else: reformatted_title += '%20' + word try: page_text: str = None url = 'https://{}.wikipedia.org/w/api.php?action=query&prop=extracts&titles={}&explaintext=1&format=json'.format( language, reformatted_title) json_response: str = get_response(url=url).text texts = find_values_in_json(id='extract', json_repr=json_response) for text_element in texts: if page_text == None: page_text = text_element + ' ' else: page_text += text_element + ' ' return page_text.strip() except Exception as err: if type(err) == requests.exceptions.ConnectionError or type( err) == urllib3.exceptions.MaxRetryError or type( err) == urllib3.exceptions.NewConnectionError: for t in tqdm(range(0, 120), unit=' Wikipedia overloaded. Waiting a little'): time.sleep(1) self.fetch_wikicar_text_manually(language, title) else: print(err) print("Could not access or find json data") return ""
def fetch(cls, track_id): if crawl_test and cls.counter >= crawl_amount: pass else: statistics_url = url + '/tracks/{}/statistics'.format(track_id) statistics = file_management.get_response(statistics_url).json() cls.process(track_id, statistics) cls.counter += 1
def fetch(cls, sensor): global counter statistics_url = url + '/sensors/{}/statistics'.format(sensor) try: statistics = file_management.get_response(statistics_url).json() cls.sensor_statistics[sensor] = cls.process(statistics) counter += 1 except ValueError or AttributeError as err: print(err) pass
def repair_tracks(sensors, tracks): print("Checking and repairing tracks") missing_sensor = 0 track_counter = 0 for track in tqdm(tracks, total=len(tracks), unit=' Repairing Envirocar Tracks'): track_sensor = tracks[track]['sensor_id'] if track_sensor not in sensors: sensor = file_management.get_response(url=url + '/sensors/' + track_sensor).json() real_sensor_id = sensor['properties']['id'] tracks[track]['sensor_id'] = real_sensor_id missing_sensor += 1 # track_counter += 1 print("Successfully repaired {} of {} Tracks".format(missing_sensor, track_counter)) return tracks
def fetch(self, sensor_url): sensors = file_management.get_response(sensor_url).json()['sensors'] self.sensors.update(self.process(sensors))
def fetch(cls, track_url): content = get_response(url=track_url) for track in content.json()['tracks']: cls.tracks.update(cls.process(track))