Exemple #1
0
    def geocode_reverse(self, block: str, street: str) -> Geocode:
        """
        Geocodes the street name, and reverse geocodes for a building around it.
        Hopefully the building is found (but very unlikely)

        Args:
            block (str): The block number
            street (str): The street name

        Returns:
            Optional[Geocode]: Geocode result
        """
        # get street lat/long
        uri = 'file:///{}?g={}'.format(
            path.join(ProjUtils.get_curr_folder_path(), SDirectory._HTML_LOC),
            street)
        res = self._submit_query(uri)
        if res[0]['total'] == 0:
            return None
        street_lat = float(res[1]['y'])
        street_lng = float(res[1]['x'])

        # get buildings around it
        uri = 'file:///{}?r={},{}'.format(
            path.join(ProjUtils.get_curr_folder_path(), SDirectory._HTML_LOC),
            street_lat, street_lng)
        res = self._submit_query(uri)

        # identify the right building
        if res['no.'] != block:
            return None
        return self._json2geocode(res)
Exemple #2
0
def main() -> None:
    """
    Main function
    """
    ProjUtils.set_project_cwd()

    hdb = Hdb()
    # for each Town
    towns = hdb.get_towns()
    print(towns)
    for i, town in enumerate(towns):
        print('------------------------------------')
        i_percent = float(i) / len(towns)
        print('Running town - {} ({:.0%})\n'.format(town, i_percent))

        # for each Flat Type
        flat_types = hdb.get_flat_types(town)
        print('\t{}'.format(flat_types))
        for j, flat_type in enumerate(flat_types):
            print('\t- - - - - - - - - - - - - - - - - -')
            j_percent = float(j) / len(flat_types)
            print('\tRunning flat type - {} ({:.0%}) + ({:.0%})\n'.format(
                flat_type, i_percent, j_percent))

            # skip this Town and Flat Type if it is scraped already
            if not _is_completed(town, flat_type):
                blocks = []

                # Scraping each block
                block_codes = hdb.get_blocks(town, flat_type)
                print('\t\t{}'.format(block_codes))
                for k, block_code in enumerate(block_codes):
                    k_percent = float(k) / len(block_codes)
                    print('\t\tScraping {} ({:.0%}) + ({:.0%}) + ({:.0%}) ...'.
                          format(block_code, i_percent, j_percent, k_percent))

                    start_time = time.time()
                    block = hdb.get_block_details(town, flat_type, block_code)
                    blocks.append(block)
                    print('\t\tDone in {:2f} secs\n'.format(time.time() -
                                                            start_time))
                _serialize_blocks(town, flat_type, blocks)

                # Throttle the scraping
                time.sleep(random() * THROTTLE)
            else:
                print('\t\t Already done, skipping ...\n')
Exemple #3
0
 def _save_cache(cls) -> None:
     """
     Saves geocache to file
     """
     geocache_loc = path.join(ProjUtils.get_curr_folder_path(),
                              cls._GEOCACHE_LOC)
     with open(geocache_loc, 'wb') as fstream:
         pickle.dump(cls._geocache, fstream)
Exemple #4
0
 def _load_cache(cls) -> None:
     """
     Loads geocache from file into class variable
     """
     geocache_loc = path.join(ProjUtils.get_curr_folder_path(),
                              cls._GEOCACHE_LOC)
     if path.exists(geocache_loc):
         with open(geocache_loc, 'rb') as fstream:
             cls._geocache = pickle.load(fstream)
     else:
         cls._geocache = {}
    def __init__(self) -> None:
        """
        Constructor
        """
        # Singleton
        if SqliteImporter._instantiated:
            raise ConnectionRefusedError(
                'Close the previous instance of SqliteImporter first')
        SqliteImporter._instantiated = True

        # set paths
        self._sqlite_loc = path.join(ProjUtils.get_project_path(), 'data',
                                     'database.sqlite')
        self._json_loc = path.join(ProjUtils.get_project_path(), 'data',
                                   'json')
        self._schema_loc = path.join(ProjUtils.get_curr_folder_path(),
                                     'schema.sql')

        # cached data
        self._cached_date_id = 0
        self._cached_block_id = 0
        self._cached_apt_id = 0
Exemple #6
0
def main() -> None:
    """
    Main Function
    """
    ProjUtils.set_project_cwd()

    # Cleaning up
    RootJsonFix.run(_JSON_LOC, _LOG_LOC)
    _null_fix()
    _expand_address_acronym()
    _date_to_dict()
    ComputeLease.run(_JSON_LOC, _LOG_LOC)
    AddGeo.run(_JSON_LOC, _LOG_LOC)
    _change_types()

    # Check properties
    to_run = False
    _check_properties(to_run)

    # Import Data
    dbimporter = MongoImporter(_JSON_LOC)
    dbimporter.run()
Exemple #7
0
    def get_token(cls) -> str:
        auth_loc = path.join(ProjUtils.get_curr_folder_path(), cls._AUTH_LOC)
        token_loc = path.join(ProjUtils.get_curr_folder_path(), cls._TOKEN_LOC)

        token = cls._get_old_token(token_loc)
        if not token:
            # load auth
            with open(auth_loc, 'r') as fstream:
                auth = json.load(fstream)

            # submit auth
            headers = {'cache-control': 'no-cache'}
            res = requests.post(cls._ONEMAP_AUTH_API,
                                json=auth, headers=headers)
            token_result = json.loads(res.text)
            if 'access_token' not in token_result:
                raise ConnectionError('Error in authentication!')

            # save auth
            with open(token_loc, 'w') as fstream:
                json.dump(token_result, fstream)
            token = token_result['access_token']
        return token
Exemple #8
0
    def geocode(self, address: str) -> Optional[Geocode]:
        """
        Obtains the lat and long of an address

        Args:
            address (str): Human readable address

        Returns:
            Optional[Geocode]: Geocode result
        """
        uri = 'file:///{}?g={}'.format(
            path.join(ProjUtils.get_curr_folder_path(), SDirectory._HTML_LOC),
            address)
        res = self._submit_query(uri)

        result: Optional[Geocode] = None
        try:
            # try to favor the right postal code
            block = address.split(' ')[0]
            if block[-1].isalpha():
                block = block[:-1]

            target_bldg = next(
                (_ for _ in res if 'pc' in _ and _['pc'][-3:] == block))
            result = self._json2geocode(target_bldg)
        except StopIteration:
            try:
                # try to favor results that contain the word 'HDB'
                target_bldg = next(
                    (_ for _ in res
                     if 'a' in _ and 't' in _ and 'HDB' in _['t']))
                result = self._json2geocode(target_bldg)
            except StopIteration:
                try:
                    target_bldg = next(
                        (_ for _ in res if 'a' in _ and 't' in _))
                    result = self._json2geocode(target_bldg)
                except StopIteration:
                    pass
        return result
Exemple #9
0
 def __init__(self) -> None:
     full_path = path.join(ProjUtils.get_project_path(),
                           SDirectory._PHANTOMJS_PATH)
     self._driver = webdriver.PhantomJS(executable_path=full_path)
Exemple #10
0
import DatabaseUtils
import DirectoryUtils
import ProjUtils
import json
import math
from dateutil import parser

db = DatabaseUtils.DB_Helper('127.0.0.1', 'gisdb', 'postgres', '****')
converter = ProjUtils.WGS84_WebMercatorConverter()
#print (db.checkConnection())

tweetFields = {'id': 'id', \
               'tweet':'text', \
               'hourslot':'created_at', \
               'favourite_count' : 'favorite_count', \
               'retweet_count' : 'retweet_count', \
               'in_reply_to' : 'in_reply_to_status_id', \
               'quoted_status_id' : 'quoted_status_id', \
               'retweeted_status_id' : 'retweeted_status', \
               'user_id': 'user'}

directory = DirectoryUtils.directoryHelper('/root/Downloads/TFM-BigData/')


def GetMercatorCoordinates(coordinates):
    if (coordinates is not None):
        longitude = coordinates['coordinates'][0]
        latitude = coordinates['coordinates'][1]
        try:
            point = converter.Transform(longitude, latitude)
            return point