def geocode_reverse(self, block: str, street: str) -> Geocode: """ Geocodes the street name, and reverse geocodes for a building around it. Hopefully the building is found (but very unlikely) Args: block (str): The block number street (str): The street name Returns: Optional[Geocode]: Geocode result """ # get street lat/long uri = 'file:///{}?g={}'.format( path.join(ProjUtils.get_curr_folder_path(), SDirectory._HTML_LOC), street) res = self._submit_query(uri) if res[0]['total'] == 0: return None street_lat = float(res[1]['y']) street_lng = float(res[1]['x']) # get buildings around it uri = 'file:///{}?r={},{}'.format( path.join(ProjUtils.get_curr_folder_path(), SDirectory._HTML_LOC), street_lat, street_lng) res = self._submit_query(uri) # identify the right building if res['no.'] != block: return None return self._json2geocode(res)
def main() -> None: """ Main function """ ProjUtils.set_project_cwd() hdb = Hdb() # for each Town towns = hdb.get_towns() print(towns) for i, town in enumerate(towns): print('------------------------------------') i_percent = float(i) / len(towns) print('Running town - {} ({:.0%})\n'.format(town, i_percent)) # for each Flat Type flat_types = hdb.get_flat_types(town) print('\t{}'.format(flat_types)) for j, flat_type in enumerate(flat_types): print('\t- - - - - - - - - - - - - - - - - -') j_percent = float(j) / len(flat_types) print('\tRunning flat type - {} ({:.0%}) + ({:.0%})\n'.format( flat_type, i_percent, j_percent)) # skip this Town and Flat Type if it is scraped already if not _is_completed(town, flat_type): blocks = [] # Scraping each block block_codes = hdb.get_blocks(town, flat_type) print('\t\t{}'.format(block_codes)) for k, block_code in enumerate(block_codes): k_percent = float(k) / len(block_codes) print('\t\tScraping {} ({:.0%}) + ({:.0%}) + ({:.0%}) ...'. format(block_code, i_percent, j_percent, k_percent)) start_time = time.time() block = hdb.get_block_details(town, flat_type, block_code) blocks.append(block) print('\t\tDone in {:2f} secs\n'.format(time.time() - start_time)) _serialize_blocks(town, flat_type, blocks) # Throttle the scraping time.sleep(random() * THROTTLE) else: print('\t\t Already done, skipping ...\n')
def _save_cache(cls) -> None: """ Saves geocache to file """ geocache_loc = path.join(ProjUtils.get_curr_folder_path(), cls._GEOCACHE_LOC) with open(geocache_loc, 'wb') as fstream: pickle.dump(cls._geocache, fstream)
def _load_cache(cls) -> None: """ Loads geocache from file into class variable """ geocache_loc = path.join(ProjUtils.get_curr_folder_path(), cls._GEOCACHE_LOC) if path.exists(geocache_loc): with open(geocache_loc, 'rb') as fstream: cls._geocache = pickle.load(fstream) else: cls._geocache = {}
def __init__(self) -> None: """ Constructor """ # Singleton if SqliteImporter._instantiated: raise ConnectionRefusedError( 'Close the previous instance of SqliteImporter first') SqliteImporter._instantiated = True # set paths self._sqlite_loc = path.join(ProjUtils.get_project_path(), 'data', 'database.sqlite') self._json_loc = path.join(ProjUtils.get_project_path(), 'data', 'json') self._schema_loc = path.join(ProjUtils.get_curr_folder_path(), 'schema.sql') # cached data self._cached_date_id = 0 self._cached_block_id = 0 self._cached_apt_id = 0
def main() -> None: """ Main Function """ ProjUtils.set_project_cwd() # Cleaning up RootJsonFix.run(_JSON_LOC, _LOG_LOC) _null_fix() _expand_address_acronym() _date_to_dict() ComputeLease.run(_JSON_LOC, _LOG_LOC) AddGeo.run(_JSON_LOC, _LOG_LOC) _change_types() # Check properties to_run = False _check_properties(to_run) # Import Data dbimporter = MongoImporter(_JSON_LOC) dbimporter.run()
def get_token(cls) -> str: auth_loc = path.join(ProjUtils.get_curr_folder_path(), cls._AUTH_LOC) token_loc = path.join(ProjUtils.get_curr_folder_path(), cls._TOKEN_LOC) token = cls._get_old_token(token_loc) if not token: # load auth with open(auth_loc, 'r') as fstream: auth = json.load(fstream) # submit auth headers = {'cache-control': 'no-cache'} res = requests.post(cls._ONEMAP_AUTH_API, json=auth, headers=headers) token_result = json.loads(res.text) if 'access_token' not in token_result: raise ConnectionError('Error in authentication!') # save auth with open(token_loc, 'w') as fstream: json.dump(token_result, fstream) token = token_result['access_token'] return token
def geocode(self, address: str) -> Optional[Geocode]: """ Obtains the lat and long of an address Args: address (str): Human readable address Returns: Optional[Geocode]: Geocode result """ uri = 'file:///{}?g={}'.format( path.join(ProjUtils.get_curr_folder_path(), SDirectory._HTML_LOC), address) res = self._submit_query(uri) result: Optional[Geocode] = None try: # try to favor the right postal code block = address.split(' ')[0] if block[-1].isalpha(): block = block[:-1] target_bldg = next( (_ for _ in res if 'pc' in _ and _['pc'][-3:] == block)) result = self._json2geocode(target_bldg) except StopIteration: try: # try to favor results that contain the word 'HDB' target_bldg = next( (_ for _ in res if 'a' in _ and 't' in _ and 'HDB' in _['t'])) result = self._json2geocode(target_bldg) except StopIteration: try: target_bldg = next( (_ for _ in res if 'a' in _ and 't' in _)) result = self._json2geocode(target_bldg) except StopIteration: pass return result
def __init__(self) -> None: full_path = path.join(ProjUtils.get_project_path(), SDirectory._PHANTOMJS_PATH) self._driver = webdriver.PhantomJS(executable_path=full_path)
import DatabaseUtils import DirectoryUtils import ProjUtils import json import math from dateutil import parser db = DatabaseUtils.DB_Helper('127.0.0.1', 'gisdb', 'postgres', '****') converter = ProjUtils.WGS84_WebMercatorConverter() #print (db.checkConnection()) tweetFields = {'id': 'id', \ 'tweet':'text', \ 'hourslot':'created_at', \ 'favourite_count' : 'favorite_count', \ 'retweet_count' : 'retweet_count', \ 'in_reply_to' : 'in_reply_to_status_id', \ 'quoted_status_id' : 'quoted_status_id', \ 'retweeted_status_id' : 'retweeted_status', \ 'user_id': 'user'} directory = DirectoryUtils.directoryHelper('/root/Downloads/TFM-BigData/') def GetMercatorCoordinates(coordinates): if (coordinates is not None): longitude = coordinates['coordinates'][0] latitude = coordinates['coordinates'][1] try: point = converter.Transform(longitude, latitude) return point