예제 #1
0
class JsonPost:
    """Handles the data to find duplicates."""

    logger = LoggerFactory.get_enhancement_logger()

    def __init__(self, json):
        """Constructor of JsonPost."""

        self.json = json
        self.mod_json = self.__remove_not_needed_data(deepcopy(json))
        self.is_duplicate = False

    @staticmethod
    def __remove_not_needed_data(json):
        """Removes data from json which should not be use in the comparison."""
        JsonPost.logger.debug("__remove_not_needed_data()")

        if 'link' in json:
            del json['link']
        if 'source' in json:
            del json['source']
        if 'id' in json:
            del json['id']
        if 'post_struct' in json:
            del json['post_struct']

        return json
    def __init__(self, data, domain_name):
        """Constructor of Enhancer."""
        self.__data = data
        self.__domain_name = domain_name
        self.logger = LoggerFactory.get_enhancement_logger()

        # link function containing domain specific enhancement to said domain here
        # 'domain' : self.__enhance_domain_function_name
        # make sure to enter 'self.__enhance_domain_function_name' and not
        # 'self.__enhance_domain_function_name()' as brackets would make this
        # a function call instead of a reference to the function
        self.__function_map = {
            'ehrenamt_hessen': self.__enhance_ehrenamt_hessen,
            'weltwaerts': self.__enhance_weltwaerts,
            'gutetat_berlin': self.__enhance_gute_tat,
            'gutetat_hamburg': self.__enhance_gute_tat,
            'gutetat_munich': self.__enhance_gute_tat,
            'ein_jahr_freiwillig': self.__enhance_ein_jahr_freiwillig,
            'bundesfreiwilligendienst':
            self.__enhance_bundesfreiwilligendienst,
        }
예제 #3
0
from shared.LoggerFactory import LoggerFactory

logger = LoggerFactory.get_enhancement_logger()


def add_map_address(data):
    """ dummy function for adding map_adress """
    logger.debug("add_map_address()")

    for post in data:
        post['post_struct']['map_address'] = ''
class LatLonEnhancer:
    """Class handling the enhancement of posts by adding geo data."""

    logger = LoggerFactory.get_enhancement_logger()
    dict_file = os.path.join(os.getenv('ROOT_DIR'), 'data_enhancement',
                             'enhancement_location', 'geocoder_lat_lon.csv')
    lat_lon_dict = {}

    def __init__(self):
        """Initializes the enhancer."""
        self.__setup()
        self.geo_locator = Nominatim(user_agent="einander-helfen.org")
        self.__load_local_storage()

    def __setup(self):
        """Checks if the local storage file exists and creates it if it is missing"""
        LatLonEnhancer.logger.debug("__setup()")

        if not os.path.exists(self.dict_file):
            LatLonEnhancer.logger.warn(
                f"Create missing geocoder_lat_lon.csv as {self.dict_file}")
            open(self.dict_file, "x", encoding='utf-8')

    def enhance(self, post):
        """Adds latitude and longitude to a given post, if both are missing. Returns the enhanced post."""
        LatLonEnhancer.logger.debug("enhance()")

        # If object has lat lon: return object
        if None is post['geo_location']:

            request_string = LatLonEnhancer.get_api_request_string(post)

            lat_lon = self.__check_local_storage(request_string)

            if lat_lon is None:
                LatLonEnhancer.logger.info(f"enhancing lat lon for {post}")
                lat_lon = self.__handle_api_requests(request_string)
                if lat_lon:
                    self.__add_new_entry(request_string, lat_lon)

            post['geo_location'] = lat_lon
            post['post_struct']['geo_location'] = lat_lon

    def __check_local_storage(self, request_string):
        """Checks if local storage contains a result for the query. If it does, the geo_location object is returned.
           Returns None if local storage doesn't contain a result for the request"""
        LatLonEnhancer.logger.debug("__check_local_storage()")

        if request_string in self.lat_lon_dict:
            return self.lat_lon_dict[request_string]
        return None

    def __load_local_storage(self):
        """Reads local storage file (.csv) into class attribute"""
        LatLonEnhancer.logger.debug("__load_local_storage()")

        # Initialize the file, if it is not
        with open(self.dict_file, 'a', newline='',
                  encoding='utf-8') as csvfile:
            if not csvfile.tell():
                fieldnames = ['request', 'lat', 'lon']
                writer = csv.DictWriter(csvfile, fieldnames)
                writer.writeheader()

        # Read the file
        with open(self.dict_file, newline='', encoding='utf-8') as csvfile:
            geocoder_lat_lon = csv.reader(csvfile, delimiter=',')
            for row in geocoder_lat_lon:
                if row and row[0] != 'request':
                    # row[0]: request string, row[1]: lat, row[2]: lon
                    self.lat_lon_dict[row[0]] = {
                        'lat': float(row[1]),
                        'lon': float(row[2])
                    }

    def __add_new_entry(self, request_string, geo_location):
        """Adds new entry to local storage"""
        LatLonEnhancer.logger.debug("__add_new_entry()")

        self.lat_lon_dict[request_string] = geo_location
        with open(self.dict_file, 'a', newline='',
                  encoding='utf-8') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow([
                request_string,
                str(geo_location['lat']),
                str(geo_location['lon'])
            ])

        LatLonEnhancer.logger.info(
            f'Added geo location of \'{request_string}\' to the dictionary')

    def __handle_api_requests(self, request_string):
        """Executes the API request"""
        LatLonEnhancer.logger.debug(f"__handle_api_requests({request_string})")

        if request_string != "":
            location = self.geo_locator.geocode(request_string)
            time.sleep(1)

            if location:
                geo_location = {
                    'lat': location.latitude,
                    'lon': location.longitude
                }
                return geo_location
        return None

    @staticmethod
    def get_api_request_string(post):
        """Build the API request string"""
        LatLonEnhancer.logger.debug("get_api_request_string()")

        struct_data = post['post_struct']
        request_string = ""

        # Try to build request string from:
        # 1. structured location 2. structured address of contact 3. structured address of organisation
        for field in ['location', 'contact', 'organization']:
            if len(request_string
                   ) < 1 and field in struct_data and struct_data[
                       field] and len(struct_data[field]) > 0:
                request_string += struct_data[field]['street'] + ' ' if 'street' in struct_data[field] and \
                                                                        struct_data[field]['street'] else ''
                request_string += struct_data[field]['zipcode'] + ' ' if 'zipcode' in struct_data[field] and \
                                                                         struct_data[field]['zipcode'] else ''
                request_string += struct_data[field]['city'] + ' ' if 'city' in struct_data[field] and \
                                                                      struct_data[field]['city'] else ''
                request_string += struct_data[field]['country'] + ' ' if 'country' in struct_data[field] and \
                                                                         struct_data[field]['country'] else ''
                request_string = request_string.strip()

        return request_string