Ejemplo n.º 1
0
def fetch_reviews_from_postgres(with_categories):
    global language

    db = SqlHelper(constants.SQL_DATABASE_NAME)
    session = db.get_connection()

    if with_categories:
        query = 'SELECT r.rating, r.text, r.language, zip.zip_code, city.name as city, fc.name as category ' \
                'FROM review AS r ' \
                'JOIN restaurant AS rest ' \
                'ON (r.restaurant_id = rest.id) ' \
                'JOIN zip_code AS zip ' \
                'ON (rest.zip_code = zip.zip_code) ' \
                'JOIN city ' \
                'ON (zip.city_id = city.id) ' \
                'JOIN food_category AS fc ' \
                'ON (r.restaurant_id = fc.restaurant_id);'
    else:
        query = 'SELECT r.rating, r.text, r.language, zip.zip_code, city.name as city ' \
                'FROM review AS r ' \
                'JOIN restaurant AS rest ' \
                'ON (r.restaurant_id = rest.id) ' \
                'JOIN zip_code AS zip ' \
                'ON (rest.zip_code = zip.zip_code) ' \
                'JOIN city ' \
                'ON (zip.city_id = city.id);'

    df = pd.read_sql_query(sql=query, con=session)
    logger.info('Found {0} Reviews in {1}'.format(df.shape[0], language))

    return df
 def _fetch_top_city_from(self, top_how_much, table_name):
     db = SqlHelper(constants.SQL_DATABASE_NAME)
     db.create_session()
     df = db.fetch_table_as_dataframe(table_name)
     self.logger.info('Fetching Top {0}'.format(top_how_much))
     cities_dataframe = pd.DataFrame(data=df.iloc[:top_how_much],
                                     columns={'city'})
     return cities_dataframe['city'].values.tolist()
    def run(self):
        db = SqlHelper(constants.SQL_DATABASE_NAME)
        db.create_session()

        if self.top_how_much is not None:
            df = db.fetch_table_as_dataframe('top_cities')
            cities_dataframe = pd.DataFrame(data=df.iloc[:self.top_how_much],
                                            columns={'city', 'state'})
            cities = cities_dataframe.values.tolist()
            print(cities)
        elif self.city_name is None:
            city_objects = db.fetch_entity_where('TopCities')
            cities = [[city.state, city.city] for city in city_objects]
        else:
            city_objects = db.fetch_entity_where('TopCities',
                                                 True,
                                                 False,
                                                 city=self.city_name)
            cities = [[city.state, city.city] for city in city_objects]
        all_results = []
        try:
            for city in cities:
                self.current_city = city[1]
                self.current_state = city[0]
                result = {
                    'city': self.current_city,
                    'total': None,
                    'restaurants': []
                }
                self.logger.info('Starting to scrape {0}'.format(
                    self.current_city))
                spider = SpeisekarteSpider(self.current_city)
                spider.run()
                spider_result = spider.get_result()
                success = spider_result.get_success()
                if success and not self.test_mode:
                    data = spider_result.get_data()
                    restaurants = data['restaurants']
                    total = data['total']
                    result['total'] = total
                    for restaurant in restaurants:
                        restaurant_id = restaurant['id']
                        entity_id = self.current_city + '$' + restaurant_id
                        datastore_entity = self._create_datastore_entity(
                            restaurant)
                        success = self._save(entity_id, datastore_entity)
                        restaurant_result = {
                            'success': success,
                            'content': restaurant_id
                        }
                        result['restaurants'].append(restaurant_result)
                all_results.append(result)
        except HTTPError as error:
            self.logger.exception(
                'Encountered HTTP error %s on %s:\nAbort program.', error.code,
                error.url)
        except:
            self.logger.exception('Something went wrong')
        finally:
            db.close_session()
Ejemplo n.º 4
0
def reset_postgres():
    sql = SqlHelper(constants.SQL_DATABASE_NAME)

    sql.create_session()
    sql_entities = sql.fetch_entity_where('ZipCode', True)
    for sql_entity in sql_entities:
        sql_entity.requested = False

    sql.commit_session()
    sql.close_session()
Ejemplo n.º 5
0
    def __init__(self, database, source_entity, test_mode, city_name):
        super(Transporter, self).__init__()
        self.logger.info(
            'Creating Transporter for Datastore Entity: {0}'.format(
                source_entity))
        self.database = database
        self.source_entity = source_entity
        self.source_db = DatastoreHelper()
        self.target_db = SqlHelper(self.database)
        self.test_mode = test_mode
        self.city_name = city_name

        if self.city_name:
            self._fetch_zip_codes_from_database()
Ejemplo n.º 6
0
 def run(self):
     db = SqlHelper(constants.SQL_DATABASE_NAME)
     yelp_helper = YelpHelper()
     db.create_session()
     if self.city_name is None:
         cities = db.fetch_all(constants.SQL_TABLE_CITY)
     else:
         cities = db.fetch_entity_where('City', True, False, name=self.city_name)
     try:
         for city in cities:
             name = city.name
             for zip_code in city.zip_codes:
                 if not zip_code.requested:
                     zip_completed = True
                     self.location = str(zip_code.zip_code) + ', ' + str(name) + ', Deutschland'
                     self.offset = 0
                     content, status_code = yelp_helper.get_search(self.location, self.offset)
                     if 'error' not in content and not self.test_mode:
                         total = content['total']
                         entity_id = str(self.current_path) + str(self.location) + str(self.offset)
                         datastore_entity = self._create_datastore_entity(content)
                         save_success = self._save(entity_id, datastore_entity)
                         if save_success is False:
                             zip_completed = False
                         self.logger.info(u'Found {0} Entries...'.format(total))
                         while self.offset < total \
                                 and (self.offset + constants.YELP_SEARCH_LIMIT <= 1000) \
                                 and save_success is True:
                             content = yelp_helper.get_search(self.location, self.offset)
                             self.offset += constants.YELP_SEARCH_LIMIT + 1
                             if 'error' not in content:
                                 entity_id = str(self.current_path) + str(self.location) + str(self.offset)
                                 datastore_entity = self._create_datastore_entity(content)
                                 save_success = self._save(entity_id, datastore_entity)
                                 if save_success is False:
                                     zip_completed = False
                             else:
                                 raise YelpError(content['error']['code'], content['error']['description'])
                     else:
                         raise YelpError(content['error']['code'], content['error']['description'])
                     if zip_completed is True:
                         zip_code.requested = True
                         db.commit_session()
     except HTTPError as error:
         self.logger.exception('Encountered HTTP error %s on %s:\nAbort program.', error.code, error.url)
     except YelpError as err:
         self.logger.exception(err)
     finally:
         db.close_session()
Ejemplo n.º 7
0
def check_city(city_name):
    logger.info('Checking if city is available in PostgreSQL...')
    sql = SqlHelper(constants.SQL_DATABASE_NAME)
    sql.create_session()
    city_from_db = sql.fetch_city_by_name(city_name)
    while city_from_db is None:
        city_name = str(
            input("City {0} not available in database. Try again!".format(
                city_name)))
        city_from_db = sql.fetch_city_by_name(city_name)

    sql.close_session()
    return city_name
Ejemplo n.º 8
0
    def _fetch_zip_codes_from_database(self):
        sql = SqlHelper(constants.SQL_DATABASE_NAME)

        sql.create_session()
        city_from_db = sql.fetch_city_by_name(self.city_name)
        # get zip codes and close session afterwards
        zip_codes = city_from_db.zip_codes
        sql.close_session()

        for zip_code_obj in zip_codes:
            self.zip_codes.append(zip_code_obj.zip_code)
Ejemplo n.º 9
0
def fetch_zip_codes_from_database():
    global city_string
    sql = SqlHelper(constants.SQL_DATABASE_NAME)
    sql.create_session()
    city_from_db = sql.fetch_city_by_name(city_string)
    while city_from_db is None:
        city_string = str(
            input("City {0} not available in database. Try again!".format(
                city_string)))
        city_from_db = sql.fetch_city_by_name(city_string)

    # get zip codes and close session afterwards
    zip_codes = city_from_db.zip_codes
    sql.close_session()

    return zip_codes
Ejemplo n.º 10
0
def main():
    util.setup_logging()

    db = SqlHelper(constants.SQL_DATABASE_NAME)
    yelp = YelpHelper()
    db.create_session()

    result = db.fetch_entity_where('Restaurant', True, False, review_count=0)
    logger.info('Found {0} Restaurants with 0 Review Count'.format(
        len(result)))

    for restaurant in result:
        logger.info('Old Review Count Value: {0}'.format(
            restaurant.review_count))
        name = restaurant.name
        business_id = restaurant.id
        logger.info(name)
        result, status_code = yelp.get_business(business_id, 0)
        status_codes = [403, 404]
        if status_code not in status_codes:
            if 'error' not in result:
                review_count = result['review_count']
                if review_count is not None:
                    restaurant.review_count = review_count
                    logger.info('New Review Count Value: {0}'.format(
                        restaurant.review_count))
            else:
                logger.error('{0}: {1}'.format(result['error']['code'],
                                               result['error']['description']))
                break
        else:
            restaurant.review_count = 0
            logger.info('New Review Count Value: {0}'.format(
                restaurant.review_count))

    db.commit_session()
    db.close_session()
Ejemplo n.º 11
0
def fill_buying_power_calculated_table():
    util.setup_logging()

    db = SqlHelper(constants.SQL_DATABASE_NAME)
    db.create_session()

    cities_without_buying_power = db.fetch_entity_where('City', True, buying_power=None)
    logger.info('Found {0} cities without Buying Power'.format(len(cities_without_buying_power)))

    buying_power_average = get_germany_buying_power_average()
    logger.info('Buying Power Germany: {0}'.format(buying_power_average))

    for city_without_buying_power in cities_without_buying_power:
        buying_power_calculated = BuyingPowerCalculated()

        city_id = city_without_buying_power.id
        buying_power_calculated.city_id = city_id
        buying_power_calculated.buying_power = buying_power_average

        db.insert(buying_power_calculated)

    db.commit_session()
    db.close_session()
Ejemplo n.º 12
0
def fill_price_range_calculated_table():
    util.setup_logging()
    city_mode_list = {}

    db = SqlHelper(constants.SQL_DATABASE_NAME)
    db.create_session()

    restaurants_without_price = db.fetch_entity_where('Restaurant', True, price_range=None)
    logger.info('Found {0} restaurants without Price'.format(len(restaurants_without_price)))

    for restaurant_without_price in restaurants_without_price:
        price_range_calculated = PriceRangeCalculated()

        restaurant_id = restaurant_without_price.id
        price_range_calculated.restaurant_id = restaurant_id

        city_name = restaurant_without_price.city
        if city_name:
            if city_name not in city_mode_list:
                logger.info('Calculating mode for {0}'.format(city_name))
                price_range_list = []
                restaurants_of_city = db.fetch_entity_where('Restaurant', True, city=city_name)
                logger.info('Found {0} restaurants for {1}'.format(len(restaurants_of_city), city_name))
                for restaurant_of_city in restaurants_of_city:
                    price_range = restaurant_of_city.price_range
                    if price_range:
                        price_range_list.append(price_range)
                if len(price_range_list) > 0:
                    try:
                        price_range_mode = mode(price_range_list)
                        logger.info('Mode for {0}: {1}'.format(city_name, price_range_mode))
                    except StatisticsError:
                        price_range_mode = '-1'
                        logger.info('Multiple modes found for {0}'.format(city_name))
                else:
                    price_range_mode = '-2'
                    logger.info('No price_range attribute found in {0}'.format(city_name))
                city_mode_list[city_name] = price_range_mode
            else:
                price_range_mode = city_mode_list[city_name]
                logger.info('Found price_range {0} for {1}'.format(price_range_mode, city_name))
            price_range_calculated.price_range = price_range_mode
            db.insert(price_range_calculated)

    logger.info('Calculated {0} price_range mode(s)'.format(len(city_mode_list)))

    db.commit_session()
    db.close_session()
import pandas as pd


def calc(price, priceintervaltype, totalfloorspace, seats):
    if 'ONE_TIME_CHARGE' in str(priceintervaltype):
        multiplier = constants.BUY_FACTOR
    else:
        multiplier = constants.RENT_FACTOR
    rest_budget = constants.BUDGET - price * multiplier - constants.FURNISH_COST_PER_SQUARE_METER * totalfloorspace - seats * constants.FURNISH_COST_PER_SEAT
    return rest_budget


if __name__ == '__main__':
    from main.helper.db_helper import SqlHelper

    db = SqlHelper(constants.SQL_DATABASE_NAME)
    session = db.get_connection()
    immo_df = pd.read_sql_table(table_name=constants.SQL_TABLE_IMMOSCOUT, con=session)
    for index, row in immo_df.iterrows():
        print(str(index + 1) + ". " + row['city'])
    # Zero price means, you have to ask the advertiser
    filter_price_zero = immo_df[immo_df['price'] >= 0]

    # Calcualting min/max rest_budget for seats: 52 (100 %) to 65 (125 %)
    result = filter_price_zero.assign(min = lambda x: calc(x['price'], x['priceintervaltype'], x['totalfloorspace'], constants.SEATS_MAX),
                                      max = lambda x: calc(x['price'], x['priceintervaltype'], x['totalfloorspace'], constants.SEATS_MIN))

    result = result[result['min'] >= 0]

    print('city: ' + str(result['city']) +
          ' min_rest_budget: ' + str(result['min']) +
Ejemplo n.º 14
0
from config import constants
from main.helper import util
from main.helper.db_helper import DatastoreHelper, SqlHelper
import pandas as pd

from main.helper.text_analyzer import TextAnalyzer

datastore = DatastoreHelper()
sql = SqlHelper(constants.SQL_DATABASE_NAME)

sql.create_session()

# df = sql.fetch_table_as_dataframe('top_cities')
# cities = pd.DataFrame(data=df.iloc[0:10], columns={'city', 'state'})
# cities = cities.values.tolist()

# city_objects = sql.fetch_entity_where('TopCities')
# cities = [[city.state, city.city] for city in city_objects]
# print(cities[1])
# print(cities[0])

analyzer = TextAnalyzer('english', True, False, '../../data/tree_tagger')

review = 'Yes, the 5 stars are deserved: here you can drink and buy the best coffee in Bochum (and maybe in the Ruhr area?). I usually only take the roasted coffee'

text = analyzer.text_process(review)

print(text)
#
# menu_item_improved = util.convert_list_to_string(analyzer.text_process(text))
# if menu_item_improved:
    def run(self):
        result = Result()
        result.set_success(True)

        sql = SqlHelper(constants.SQL_DATABASE_NAME)
        sql.create_session()

        if self.current_city:
            self.cities.append(self.current_city)
        elif self.top_how_much:
            self.cities = self._fetch_top_city_from(self.top_how_much,
                                                    'top_cities')

        restaurants = self._fetch_all_restaurants()
        locale_list = ['en_US', 'de_DE']
        if restaurants:
            for restaurant in restaurants:
                self.current_restaurant_id = restaurant.id
                # if there is a change in zip codes;
                # all reviews from current zip code are successfully collected into db
                # set it to collected
                self.logger.debug('Restaurant Zip: {0}'.format(
                    restaurant.zip_code))
                if self.current_zip_code and restaurant.zip_code is not self.current_zip_code:
                    sql.update_entity('ZipCode', 'zip_code',
                                      str(self.current_zip_code),
                                      'review_collected', True)
                self.current_zip_code = restaurant.zip_code
                self.current_city = restaurant.city
                for locale in locale_list:
                    self.current_locale = locale
                    yelp_entity, status_code = self.yelp.get_reviews(
                        self.current_restaurant_id, locale)
                    if not status_code == 403:
                        if 'error' not in yelp_entity:
                            reviews = yelp_entity['reviews']
                            if len(reviews) > 0:
                                datastore_entity = self._create_datastore_entity(
                                    yelp_entity)
                                entity_id = self.current_city + '@' + \
                                            str(self.current_zip_code) + '@' + \
                                            str(self.current_restaurant_id) + '@' + \
                                            locale
                                if not self.test_mode:
                                    success = self._save(
                                        entity_id, datastore_entity)
                                    if success:
                                        result.set_success(success)
                                        sql.commit_session()
                            else:
                                self.logger.debug(
                                    'No Reviews found for restaurant {0} in {1}'
                                    .format(restaurant.name,
                                            self.current_city))
                        else:
                            message = yelp_entity['error']['description']
                            result.set_success(False)
                            result.set_message(message)
                            raise YelpError(yelp_entity['error']['code'],
                                            message)
                    else:
                        self.logger.debug(
                            'No Reviews found for restaurant {0} in {1}'.
                            format(restaurant.name, self.current_city))
            else:
                result.set_success(False)
                result.set_message(
                    'Failure when saving Review Entity to Datastore')
        else:
            result.set_success(True)
            result.set_message(
                'No Restaurants left to collect reviews from! Nice Job')

        sql.close_session()
        self.logger.info(result)
        return result
Ejemplo n.º 16
0
from main.helper.db_helper import SqlHelper


db = SqlHelper('fonethd')
db.create_session()
entries = db.fetch_all('city')
for row in entries:
    zip_codes = row.zip_codes
    name = row.name
    print('Zip Codes for' + str(name))
    for zip_code in zip_codes:
        print(zip_code.zip_code)
Ejemplo n.º 17
0
class Transporter(ABC, threading.Thread):

    database = None
    source_entity = None
    target_entity = None
    source_db = None
    test_mode = None
    source_entity_id = None
    city_name = None
    zip_codes = []
    top_how_much = None

    logger = logging.getLogger(__name__)

    def __init__(self, database, source_entity, test_mode, city_name):
        super(Transporter, self).__init__()
        self.logger.info(
            'Creating Transporter for Datastore Entity: {0}'.format(
                source_entity))
        self.database = database
        self.source_entity = source_entity
        self.source_db = DatastoreHelper()
        self.target_db = SqlHelper(self.database)
        self.test_mode = test_mode
        self.city_name = city_name

        if self.city_name:
            self._fetch_zip_codes_from_database()

    def run(self):
        results = []
        self.logger.info('Starting transport...')
        self.target_db.create_session()
        total = self._get_entities(None, None, True)
        self.logger.info('Found a total of %s entries in Google Datastore',
                         str(total))
        offset = 0
        while offset < total:
            result = self._transport(offset)
            results.append(result)
            offset += constants.GCP_FETCH_LIMIT
            # i dont know why but google datastore doesn't allow a offset greater than 2500
            if offset == 2500:
                self.logger.info('Resetting offset...')
                offset = 0
                total = self._get_entities(None, None, True)
                self.logger.info(
                    'Found a total of %s entries in Google Datastore',
                    str(total))
        for result in results:
            self.logger.info(result)
        self.logger.info('Done transporting Restaurants...')

    def _transport(self, offset):
        result = Result()
        limit = constants.GCP_FETCH_LIMIT
        source_entities = self._get_entities(limit, offset, False)
        if source_entities:
            for datastore_entity in source_entities:
                self.logger.info('Starting mapping...')
                entities = self.map(datastore_entity)
                entity_length = len(entities)
                self.logger.info('Mapped {0} entities...'.format(
                    str(entity_length)))
                if not self.test_mode:
                    if entity_length > 0:
                        try:
                            for entity in entities:
                                if entity:
                                    self.logger.info('Saving in database...')
                                    self.target_db.insert(entity)
                            self.logger.info('Commiting DB entries')
                            self.target_db.commit_session()
                            result.set_success(True)
                            result.set_message(
                                'Fetched entries from offset {0} with limit {1}'
                                .format(str(offset), str(limit)))
                        except SQLAlchemyError as err:
                            result.set_success(False)
                            result.set_message(err.code)
                            self.logger.exception('An SQLAlchemyError occured')
                        finally:
                            self.target_db.close_session()
                    else:
                        result.set_success(True)
                        result.set_message(
                            'There are no mapped entities that can be saved in database'
                        )
                    self.source_db.set_transported(datastore_entity, True)
                else:
                    result.set_success(True)
                    result.set_message('Test Mode active')
        else:
            result.set_success(False)
            result.set_message(self.source_entity +
                               ' could not be found in Google Datastore')
        return result

    def _fetch_zip_codes_from_database(self):
        sql = SqlHelper(constants.SQL_DATABASE_NAME)

        sql.create_session()
        city_from_db = sql.fetch_city_by_name(self.city_name)
        # get zip codes and close session afterwards
        zip_codes = city_from_db.zip_codes
        sql.close_session()

        for zip_code_obj in zip_codes:
            self.zip_codes.append(zip_code_obj.zip_code)

    def _fetch_entities_by_zip_code(self, entity_name, limit, offset,
                                    only_keys):
        result_all = []

        for zip_code in self.zip_codes:
            result = self.source_db.fetch_entity(entity_name,
                                                 limit=limit,
                                                 offset=offset,
                                                 only_keys=only_keys,
                                                 operator='=',
                                                 zip_code=str(zip_code),
                                                 transported=False)
            result_all += result
        return result_all

    def _get_entities(self, limit, offset, only_total):
        if not self.zip_codes:
            content = self.source_db.fetch_entity(self.source_entity,
                                                  limit,
                                                  offset,
                                                  only_total,
                                                  '=',
                                                  transported=False)
        else:
            content = self._fetch_entities_by_zip_code(self.source_entity,
                                                       limit, offset,
                                                       only_total)

        if only_total:
            result = len(content)
        else:
            result = content

        return result

    # maps target and source structure and returns a list of entities to save in db
    @abstractmethod
    def map(self, datastore_entity) -> List:
        pass
Ejemplo n.º 18
0
def main():
    rents = []
    util.setup_logging()

    db = SqlHelper(constants.SQL_DATABASE_NAME)
    db.create_session()

    result = db.fetch_entity_where('City', True, True, rent_avg=None)

    for city in result:
        rents.append(city.rent_avg)

    rent_avg = int(numpy.mean(rents))

    result = db.fetch_entity_where('City', True, False, rent_avg=None)

    for city in result:
        rent_avg_calculated = RentAvgCalculated()
        rent_avg_calculated.city_id = city.id
        rent_avg_calculated.rent_avg = rent_avg
        db.insert(rent_avg_calculated)

    db.commit_session()
    db.close_session()
    def run(self):
        result = Result()
        db = SqlHelper(constants.SQL_DATABASE_NAME)
        df = db.fetch_table_as_dataframe('top_cities')
        cities = pd.DataFrame(data=df.iloc[0:self.top_how_much],
                              columns={'city'})
        for index, row in cities.iterrows():
            self.logger.debug(str(index + 1) + ". " + row['city'])

        # cities = {'city': ['Heidelberg', 'Karlsruhe']}
        city_for_search = pd.DataFrame(cities, columns=['city'])

        immo_oauth = OAuth1(
            constants.IMMOSCOUT_CLIENT_KEY,
            client_secret=constants.IMMOSCOUT_CLIENT_SECRET,
            resource_owner_key=constants.IMMOSCOUT_RESOURCE_OWNER_KEY,
            resource_owner_secret=constants.IMMOSCOUT_RESOURCE_OWNER_SECRET)

        # create empty geo_df
        geo_df = pd.DataFrame(columns={'geoId', 'city'})
        # get geoid from Immoscout24 API
        geo_df = self.get_geo_id(city_for_search, geo_df, immo_oauth)

        # Fläche Retaurant:
        # https: // se909eeccf1caa559.jimcontent.com / download / version / 1507517357 / module / 11096440527 / name / AuszugDiplomarbeit_13.03.2006.pdf
        # Gast = 40 %
        # Technik = 12 %
        # Personal = 8 %
        # Gast = 40 %
        total_floor_space_min = constants.FLOOR_SPACE_GUEST * constants.SEATS_MIN / 40 * 100.0
        total_floor_space_max = constants.FLOOR_SPACE_GUEST * constants.SEATS_MAX / 40 * 100.0

        restaurant_df = pd.DataFrame()
        # get Immoscout24 object by geocode
        for index, row in geo_df.iterrows():
            params = {
                'realestatetype':
                'gastronomy',
                'geocodes':
                str(row['geoId']),
                'gastronomytypes':
                'restaurant',
                'channel':
                'is24',
                'numberofseats':
                str(constants.SEATS_MIN) + '-' + str(constants.SEATS_MAX),
                'pagesize':
                '200',
                'totalfloorspace':
                str(total_floor_space_min) + '-' + str(total_floor_space_max)
            }
            immo_search_response = requests.request(
                method='GET',
                url=constants.IMMOSCOUT_SEARCH_URL,
                params=params,
                headers=constants.IMMOSCOUT_HEADERS,
                auth=immo_oauth)
            immo_search_json = pd.read_json(immo_search_response.text)
            hits = immo_search_json['resultlist.resultlist'][0]['numberOfHits']
            self.logger.info("Hits: " + str(hits) + " for city: " +
                             str(row['city']) + "\r\n")
            if hits == 1:
                immo_object = immo_search_json['resultlist.resultlist'][1][0][
                    'resultlistEntry']['resultlist.realEstate']
                real_estate_id = immo_search_json['resultlist.resultlist'][1][
                    0]['resultlistEntry']['resultlist.realEstate']['@id']
                restaurant_df = restaurant_df.append(self.transform_df(
                    immo_object, real_estate_id),
                                                     ignore_index=True,
                                                     sort=True)
            elif hits >= 1:
                for i in range(hits):
                    immo_object = immo_search_json['resultlist.resultlist'][1][
                        0]['resultlistEntry'][i]['resultlist.realEstate']
                    real_estate_id = immo_search_json['resultlist.resultlist'][
                        1][0]['resultlistEntry'][i]['resultlist.realEstate'][
                            '@id']
                    restaurant_df = restaurant_df.append(self.transform_df(
                        immo_object, real_estate_id),
                                                         ignore_index=True,
                                                         sort=True)
            else:
                self.logger.info('No object found for city: ' +
                                 str(row['city']))
        self.logger.info(restaurant_df)
        result_json = restaurant_df.to_json(orient='records')
        attributes = self._create_datastore_entity(result_json)
        success = self._save(self.entity_id, attributes)
        result.set_success(success)
        self.logger.info(result)
        return result
Ejemplo n.º 20
0
    id = Column(Integer, primary_key=True, autoincrement=False)
    title = Column(String)
    city = Column(String)
    quarter = Column(String)
    postcode = Column(Integer)
    price = Column(Integer)
    currency = Column(String)
    marketingtype = Column(String)
    priceintervaltype = Column(String)
    totalfloorspace = Column(Numeric)

    def __str__(self):
        return 'id: {0}, name: {1}, updated at: {2} immoscout: {3}' \
            .format(self.id, self.name, self.updated_at, self.city)


class TopCities(Base):
    __tablename__ = constants.SQL_TABLE_TOP_CITY

    city = Column(String, primary_key=True, autoincrement=False)
    state = Column(String)
    potential = Column(Numeric)


if __name__ == '__main__':
    from main.helper.db_helper import SqlHelper

    db = SqlHelper(constants.SQL_DATABASE_NAME)
    engine = db.get_connection()
    Base.metadata.create_all(engine)
Ejemplo n.º 21
0
def check_price_range_availability_and_update():
    yelp_helper = YelpHelper()
    restaurants = []
    util.setup_logging()

    not_available_count = 0

    db = SqlHelper(constants.SQL_DATABASE_NAME)
    db.create_session()

    result = db.fetch_entity_where('Restaurant', True, price_range=None)
    logger.info('Found {0} restaurants'.format(str(len(result))))

    try:
        for restaurant in result:
            restaurant_id = restaurant.id
            business, status_code = yelp_helper.get_business(restaurant_id)
            if 'error' not in business:
                if 'price' in business:
                    price_range = business['price']
                    if price_range:
                        restaurant.price_range = price_range
                    else:
                        logger.info('Price Range is null')
                    db.insert(restaurant)
                else:
                    not_available_count += 1
            else:
                raise YelpError(business['error']['code'], business['error']['description'])
            logger.info(not_available_count)
    except YelpError as error:
        logger.exception(error)
        logger.info('Adding {0} updated restaurants to DB...'.format(len(restaurants)))
    finally:
        db.commit_session()
        db.close_session()