def fill_price_range_calculated_table(): util.setup_logging() city_mode_list = {} db = SqlHelper(constants.SQL_DATABASE_NAME) db.create_session() restaurants_without_price = db.fetch_entity_where('Restaurant', True, price_range=None) logger.info('Found {0} restaurants without Price'.format(len(restaurants_without_price))) for restaurant_without_price in restaurants_without_price: price_range_calculated = PriceRangeCalculated() restaurant_id = restaurant_without_price.id price_range_calculated.restaurant_id = restaurant_id city_name = restaurant_without_price.city if city_name: if city_name not in city_mode_list: logger.info('Calculating mode for {0}'.format(city_name)) price_range_list = [] restaurants_of_city = db.fetch_entity_where('Restaurant', True, city=city_name) logger.info('Found {0} restaurants for {1}'.format(len(restaurants_of_city), city_name)) for restaurant_of_city in restaurants_of_city: price_range = restaurant_of_city.price_range if price_range: price_range_list.append(price_range) if len(price_range_list) > 0: try: price_range_mode = mode(price_range_list) logger.info('Mode for {0}: {1}'.format(city_name, price_range_mode)) except StatisticsError: price_range_mode = '-1' logger.info('Multiple modes found for {0}'.format(city_name)) else: price_range_mode = '-2' logger.info('No price_range attribute found in {0}'.format(city_name)) city_mode_list[city_name] = price_range_mode else: price_range_mode = city_mode_list[city_name] logger.info('Found price_range {0} for {1}'.format(price_range_mode, city_name)) price_range_calculated.price_range = price_range_mode db.insert(price_range_calculated) logger.info('Calculated {0} price_range mode(s)'.format(len(city_mode_list))) db.commit_session() db.close_session()
def fill_buying_power_calculated_table(): util.setup_logging() db = SqlHelper(constants.SQL_DATABASE_NAME) db.create_session() cities_without_buying_power = db.fetch_entity_where('City', True, buying_power=None) logger.info('Found {0} cities without Buying Power'.format(len(cities_without_buying_power))) buying_power_average = get_germany_buying_power_average() logger.info('Buying Power Germany: {0}'.format(buying_power_average)) for city_without_buying_power in cities_without_buying_power: buying_power_calculated = BuyingPowerCalculated() city_id = city_without_buying_power.id buying_power_calculated.city_id = city_id buying_power_calculated.buying_power = buying_power_average db.insert(buying_power_calculated) db.commit_session() db.close_session()
def main(): rents = [] util.setup_logging() db = SqlHelper(constants.SQL_DATABASE_NAME) db.create_session() result = db.fetch_entity_where('City', True, True, rent_avg=None) for city in result: rents.append(city.rent_avg) rent_avg = int(numpy.mean(rents)) result = db.fetch_entity_where('City', True, False, rent_avg=None) for city in result: rent_avg_calculated = RentAvgCalculated() rent_avg_calculated.city_id = city.id rent_avg_calculated.rent_avg = rent_avg db.insert(rent_avg_calculated) db.commit_session() db.close_session()
def check_price_range_availability_and_update(): yelp_helper = YelpHelper() restaurants = [] util.setup_logging() not_available_count = 0 db = SqlHelper(constants.SQL_DATABASE_NAME) db.create_session() result = db.fetch_entity_where('Restaurant', True, price_range=None) logger.info('Found {0} restaurants'.format(str(len(result)))) try: for restaurant in result: restaurant_id = restaurant.id business, status_code = yelp_helper.get_business(restaurant_id) if 'error' not in business: if 'price' in business: price_range = business['price'] if price_range: restaurant.price_range = price_range else: logger.info('Price Range is null') db.insert(restaurant) else: not_available_count += 1 else: raise YelpError(business['error']['code'], business['error']['description']) logger.info(not_available_count) except YelpError as error: logger.exception(error) logger.info('Adding {0} updated restaurants to DB...'.format(len(restaurants))) finally: db.commit_session() db.close_session()
class Transporter(ABC, threading.Thread): database = None source_entity = None target_entity = None source_db = None test_mode = None source_entity_id = None city_name = None zip_codes = [] top_how_much = None logger = logging.getLogger(__name__) def __init__(self, database, source_entity, test_mode, city_name): super(Transporter, self).__init__() self.logger.info( 'Creating Transporter for Datastore Entity: {0}'.format( source_entity)) self.database = database self.source_entity = source_entity self.source_db = DatastoreHelper() self.target_db = SqlHelper(self.database) self.test_mode = test_mode self.city_name = city_name if self.city_name: self._fetch_zip_codes_from_database() def run(self): results = [] self.logger.info('Starting transport...') self.target_db.create_session() total = self._get_entities(None, None, True) self.logger.info('Found a total of %s entries in Google Datastore', str(total)) offset = 0 while offset < total: result = self._transport(offset) results.append(result) offset += constants.GCP_FETCH_LIMIT # i dont know why but google datastore doesn't allow a offset greater than 2500 if offset == 2500: self.logger.info('Resetting offset...') offset = 0 total = self._get_entities(None, None, True) self.logger.info( 'Found a total of %s entries in Google Datastore', str(total)) for result in results: self.logger.info(result) self.logger.info('Done transporting Restaurants...') def _transport(self, offset): result = Result() limit = constants.GCP_FETCH_LIMIT source_entities = self._get_entities(limit, offset, False) if source_entities: for datastore_entity in source_entities: self.logger.info('Starting mapping...') entities = self.map(datastore_entity) entity_length = len(entities) self.logger.info('Mapped {0} entities...'.format( str(entity_length))) if not self.test_mode: if entity_length > 0: try: for entity in entities: if entity: self.logger.info('Saving in database...') self.target_db.insert(entity) self.logger.info('Commiting DB entries') self.target_db.commit_session() result.set_success(True) result.set_message( 'Fetched entries from offset {0} with limit {1}' .format(str(offset), str(limit))) except SQLAlchemyError as err: result.set_success(False) result.set_message(err.code) self.logger.exception('An SQLAlchemyError occured') finally: self.target_db.close_session() else: result.set_success(True) result.set_message( 'There are no mapped entities that can be saved in database' ) self.source_db.set_transported(datastore_entity, True) else: result.set_success(True) result.set_message('Test Mode active') else: result.set_success(False) result.set_message(self.source_entity + ' could not be found in Google Datastore') return result def _fetch_zip_codes_from_database(self): sql = SqlHelper(constants.SQL_DATABASE_NAME) sql.create_session() city_from_db = sql.fetch_city_by_name(self.city_name) # get zip codes and close session afterwards zip_codes = city_from_db.zip_codes sql.close_session() for zip_code_obj in zip_codes: self.zip_codes.append(zip_code_obj.zip_code) def _fetch_entities_by_zip_code(self, entity_name, limit, offset, only_keys): result_all = [] for zip_code in self.zip_codes: result = self.source_db.fetch_entity(entity_name, limit=limit, offset=offset, only_keys=only_keys, operator='=', zip_code=str(zip_code), transported=False) result_all += result return result_all def _get_entities(self, limit, offset, only_total): if not self.zip_codes: content = self.source_db.fetch_entity(self.source_entity, limit, offset, only_total, '=', transported=False) else: content = self._fetch_entities_by_zip_code(self.source_entity, limit, offset, only_total) if only_total: result = len(content) else: result = content return result # maps target and source structure and returns a list of entities to save in db @abstractmethod def map(self, datastore_entity) -> List: pass