Example #1
0
def fill_price_range_calculated_table():
    util.setup_logging()
    city_mode_list = {}

    db = SqlHelper(constants.SQL_DATABASE_NAME)
    db.create_session()

    restaurants_without_price = db.fetch_entity_where('Restaurant', True, price_range=None)
    logger.info('Found {0} restaurants without Price'.format(len(restaurants_without_price)))

    for restaurant_without_price in restaurants_without_price:
        price_range_calculated = PriceRangeCalculated()

        restaurant_id = restaurant_without_price.id
        price_range_calculated.restaurant_id = restaurant_id

        city_name = restaurant_without_price.city
        if city_name:
            if city_name not in city_mode_list:
                logger.info('Calculating mode for {0}'.format(city_name))
                price_range_list = []
                restaurants_of_city = db.fetch_entity_where('Restaurant', True, city=city_name)
                logger.info('Found {0} restaurants for {1}'.format(len(restaurants_of_city), city_name))
                for restaurant_of_city in restaurants_of_city:
                    price_range = restaurant_of_city.price_range
                    if price_range:
                        price_range_list.append(price_range)
                if len(price_range_list) > 0:
                    try:
                        price_range_mode = mode(price_range_list)
                        logger.info('Mode for {0}: {1}'.format(city_name, price_range_mode))
                    except StatisticsError:
                        price_range_mode = '-1'
                        logger.info('Multiple modes found for {0}'.format(city_name))
                else:
                    price_range_mode = '-2'
                    logger.info('No price_range attribute found in {0}'.format(city_name))
                city_mode_list[city_name] = price_range_mode
            else:
                price_range_mode = city_mode_list[city_name]
                logger.info('Found price_range {0} for {1}'.format(price_range_mode, city_name))
            price_range_calculated.price_range = price_range_mode
            db.insert(price_range_calculated)

    logger.info('Calculated {0} price_range mode(s)'.format(len(city_mode_list)))

    db.commit_session()
    db.close_session()
def fill_buying_power_calculated_table():
    util.setup_logging()

    db = SqlHelper(constants.SQL_DATABASE_NAME)
    db.create_session()

    cities_without_buying_power = db.fetch_entity_where('City', True, buying_power=None)
    logger.info('Found {0} cities without Buying Power'.format(len(cities_without_buying_power)))

    buying_power_average = get_germany_buying_power_average()
    logger.info('Buying Power Germany: {0}'.format(buying_power_average))

    for city_without_buying_power in cities_without_buying_power:
        buying_power_calculated = BuyingPowerCalculated()

        city_id = city_without_buying_power.id
        buying_power_calculated.city_id = city_id
        buying_power_calculated.buying_power = buying_power_average

        db.insert(buying_power_calculated)

    db.commit_session()
    db.close_session()
Example #3
0
def main():
    rents = []
    util.setup_logging()

    db = SqlHelper(constants.SQL_DATABASE_NAME)
    db.create_session()

    result = db.fetch_entity_where('City', True, True, rent_avg=None)

    for city in result:
        rents.append(city.rent_avg)

    rent_avg = int(numpy.mean(rents))

    result = db.fetch_entity_where('City', True, False, rent_avg=None)

    for city in result:
        rent_avg_calculated = RentAvgCalculated()
        rent_avg_calculated.city_id = city.id
        rent_avg_calculated.rent_avg = rent_avg
        db.insert(rent_avg_calculated)

    db.commit_session()
    db.close_session()
Example #4
0
def check_price_range_availability_and_update():
    yelp_helper = YelpHelper()
    restaurants = []
    util.setup_logging()

    not_available_count = 0

    db = SqlHelper(constants.SQL_DATABASE_NAME)
    db.create_session()

    result = db.fetch_entity_where('Restaurant', True, price_range=None)
    logger.info('Found {0} restaurants'.format(str(len(result))))

    try:
        for restaurant in result:
            restaurant_id = restaurant.id
            business, status_code = yelp_helper.get_business(restaurant_id)
            if 'error' not in business:
                if 'price' in business:
                    price_range = business['price']
                    if price_range:
                        restaurant.price_range = price_range
                    else:
                        logger.info('Price Range is null')
                    db.insert(restaurant)
                else:
                    not_available_count += 1
            else:
                raise YelpError(business['error']['code'], business['error']['description'])
            logger.info(not_available_count)
    except YelpError as error:
        logger.exception(error)
        logger.info('Adding {0} updated restaurants to DB...'.format(len(restaurants)))
    finally:
        db.commit_session()
        db.close_session()
class Transporter(ABC, threading.Thread):

    database = None
    source_entity = None
    target_entity = None
    source_db = None
    test_mode = None
    source_entity_id = None
    city_name = None
    zip_codes = []
    top_how_much = None

    logger = logging.getLogger(__name__)

    def __init__(self, database, source_entity, test_mode, city_name):
        super(Transporter, self).__init__()
        self.logger.info(
            'Creating Transporter for Datastore Entity: {0}'.format(
                source_entity))
        self.database = database
        self.source_entity = source_entity
        self.source_db = DatastoreHelper()
        self.target_db = SqlHelper(self.database)
        self.test_mode = test_mode
        self.city_name = city_name

        if self.city_name:
            self._fetch_zip_codes_from_database()

    def run(self):
        results = []
        self.logger.info('Starting transport...')
        self.target_db.create_session()
        total = self._get_entities(None, None, True)
        self.logger.info('Found a total of %s entries in Google Datastore',
                         str(total))
        offset = 0
        while offset < total:
            result = self._transport(offset)
            results.append(result)
            offset += constants.GCP_FETCH_LIMIT
            # i dont know why but google datastore doesn't allow a offset greater than 2500
            if offset == 2500:
                self.logger.info('Resetting offset...')
                offset = 0
                total = self._get_entities(None, None, True)
                self.logger.info(
                    'Found a total of %s entries in Google Datastore',
                    str(total))
        for result in results:
            self.logger.info(result)
        self.logger.info('Done transporting Restaurants...')

    def _transport(self, offset):
        result = Result()
        limit = constants.GCP_FETCH_LIMIT
        source_entities = self._get_entities(limit, offset, False)
        if source_entities:
            for datastore_entity in source_entities:
                self.logger.info('Starting mapping...')
                entities = self.map(datastore_entity)
                entity_length = len(entities)
                self.logger.info('Mapped {0} entities...'.format(
                    str(entity_length)))
                if not self.test_mode:
                    if entity_length > 0:
                        try:
                            for entity in entities:
                                if entity:
                                    self.logger.info('Saving in database...')
                                    self.target_db.insert(entity)
                            self.logger.info('Commiting DB entries')
                            self.target_db.commit_session()
                            result.set_success(True)
                            result.set_message(
                                'Fetched entries from offset {0} with limit {1}'
                                .format(str(offset), str(limit)))
                        except SQLAlchemyError as err:
                            result.set_success(False)
                            result.set_message(err.code)
                            self.logger.exception('An SQLAlchemyError occured')
                        finally:
                            self.target_db.close_session()
                    else:
                        result.set_success(True)
                        result.set_message(
                            'There are no mapped entities that can be saved in database'
                        )
                    self.source_db.set_transported(datastore_entity, True)
                else:
                    result.set_success(True)
                    result.set_message('Test Mode active')
        else:
            result.set_success(False)
            result.set_message(self.source_entity +
                               ' could not be found in Google Datastore')
        return result

    def _fetch_zip_codes_from_database(self):
        sql = SqlHelper(constants.SQL_DATABASE_NAME)

        sql.create_session()
        city_from_db = sql.fetch_city_by_name(self.city_name)
        # get zip codes and close session afterwards
        zip_codes = city_from_db.zip_codes
        sql.close_session()

        for zip_code_obj in zip_codes:
            self.zip_codes.append(zip_code_obj.zip_code)

    def _fetch_entities_by_zip_code(self, entity_name, limit, offset,
                                    only_keys):
        result_all = []

        for zip_code in self.zip_codes:
            result = self.source_db.fetch_entity(entity_name,
                                                 limit=limit,
                                                 offset=offset,
                                                 only_keys=only_keys,
                                                 operator='=',
                                                 zip_code=str(zip_code),
                                                 transported=False)
            result_all += result
        return result_all

    def _get_entities(self, limit, offset, only_total):
        if not self.zip_codes:
            content = self.source_db.fetch_entity(self.source_entity,
                                                  limit,
                                                  offset,
                                                  only_total,
                                                  '=',
                                                  transported=False)
        else:
            content = self._fetch_entities_by_zip_code(self.source_entity,
                                                       limit, offset,
                                                       only_total)

        if only_total:
            result = len(content)
        else:
            result = content

        return result

    # maps target and source structure and returns a list of entities to save in db
    @abstractmethod
    def map(self, datastore_entity) -> List:
        pass