def run(self):
        db = SqlHelper(constants.SQL_DATABASE_NAME)
        db.create_session()

        if self.top_how_much is not None:
            df = db.fetch_table_as_dataframe('top_cities')
            cities_dataframe = pd.DataFrame(data=df.iloc[:self.top_how_much],
                                            columns={'city', 'state'})
            cities = cities_dataframe.values.tolist()
            print(cities)
        elif self.city_name is None:
            city_objects = db.fetch_entity_where('TopCities')
            cities = [[city.state, city.city] for city in city_objects]
        else:
            city_objects = db.fetch_entity_where('TopCities',
                                                 True,
                                                 False,
                                                 city=self.city_name)
            cities = [[city.state, city.city] for city in city_objects]
        all_results = []
        try:
            for city in cities:
                self.current_city = city[1]
                self.current_state = city[0]
                result = {
                    'city': self.current_city,
                    'total': None,
                    'restaurants': []
                }
                self.logger.info('Starting to scrape {0}'.format(
                    self.current_city))
                spider = SpeisekarteSpider(self.current_city)
                spider.run()
                spider_result = spider.get_result()
                success = spider_result.get_success()
                if success and not self.test_mode:
                    data = spider_result.get_data()
                    restaurants = data['restaurants']
                    total = data['total']
                    result['total'] = total
                    for restaurant in restaurants:
                        restaurant_id = restaurant['id']
                        entity_id = self.current_city + '$' + restaurant_id
                        datastore_entity = self._create_datastore_entity(
                            restaurant)
                        success = self._save(entity_id, datastore_entity)
                        restaurant_result = {
                            'success': success,
                            'content': restaurant_id
                        }
                        result['restaurants'].append(restaurant_result)
                all_results.append(result)
        except HTTPError as error:
            self.logger.exception(
                'Encountered HTTP error %s on %s:\nAbort program.', error.code,
                error.url)
        except:
            self.logger.exception('Something went wrong')
        finally:
            db.close_session()
 def _fetch_top_city_from(self, top_how_much, table_name):
     db = SqlHelper(constants.SQL_DATABASE_NAME)
     db.create_session()
     df = db.fetch_table_as_dataframe(table_name)
     self.logger.info('Fetching Top {0}'.format(top_how_much))
     cities_dataframe = pd.DataFrame(data=df.iloc[:top_how_much],
                                     columns={'city'})
     return cities_dataframe['city'].values.tolist()
    def run(self):
        result = Result()
        db = SqlHelper(constants.SQL_DATABASE_NAME)
        df = db.fetch_table_as_dataframe('top_cities')
        cities = pd.DataFrame(data=df.iloc[0:self.top_how_much],
                              columns={'city'})
        for index, row in cities.iterrows():
            self.logger.debug(str(index + 1) + ". " + row['city'])

        # cities = {'city': ['Heidelberg', 'Karlsruhe']}
        city_for_search = pd.DataFrame(cities, columns=['city'])

        immo_oauth = OAuth1(
            constants.IMMOSCOUT_CLIENT_KEY,
            client_secret=constants.IMMOSCOUT_CLIENT_SECRET,
            resource_owner_key=constants.IMMOSCOUT_RESOURCE_OWNER_KEY,
            resource_owner_secret=constants.IMMOSCOUT_RESOURCE_OWNER_SECRET)

        # create empty geo_df
        geo_df = pd.DataFrame(columns={'geoId', 'city'})
        # get geoid from Immoscout24 API
        geo_df = self.get_geo_id(city_for_search, geo_df, immo_oauth)

        # Fläche Retaurant:
        # https: // se909eeccf1caa559.jimcontent.com / download / version / 1507517357 / module / 11096440527 / name / AuszugDiplomarbeit_13.03.2006.pdf
        # Gast = 40 %
        # Technik = 12 %
        # Personal = 8 %
        # Gast = 40 %
        total_floor_space_min = constants.FLOOR_SPACE_GUEST * constants.SEATS_MIN / 40 * 100.0
        total_floor_space_max = constants.FLOOR_SPACE_GUEST * constants.SEATS_MAX / 40 * 100.0

        restaurant_df = pd.DataFrame()
        # get Immoscout24 object by geocode
        for index, row in geo_df.iterrows():
            params = {
                'realestatetype':
                'gastronomy',
                'geocodes':
                str(row['geoId']),
                'gastronomytypes':
                'restaurant',
                'channel':
                'is24',
                'numberofseats':
                str(constants.SEATS_MIN) + '-' + str(constants.SEATS_MAX),
                'pagesize':
                '200',
                'totalfloorspace':
                str(total_floor_space_min) + '-' + str(total_floor_space_max)
            }
            immo_search_response = requests.request(
                method='GET',
                url=constants.IMMOSCOUT_SEARCH_URL,
                params=params,
                headers=constants.IMMOSCOUT_HEADERS,
                auth=immo_oauth)
            immo_search_json = pd.read_json(immo_search_response.text)
            hits = immo_search_json['resultlist.resultlist'][0]['numberOfHits']
            self.logger.info("Hits: " + str(hits) + " for city: " +
                             str(row['city']) + "\r\n")
            if hits == 1:
                immo_object = immo_search_json['resultlist.resultlist'][1][0][
                    'resultlistEntry']['resultlist.realEstate']
                real_estate_id = immo_search_json['resultlist.resultlist'][1][
                    0]['resultlistEntry']['resultlist.realEstate']['@id']
                restaurant_df = restaurant_df.append(self.transform_df(
                    immo_object, real_estate_id),
                                                     ignore_index=True,
                                                     sort=True)
            elif hits >= 1:
                for i in range(hits):
                    immo_object = immo_search_json['resultlist.resultlist'][1][
                        0]['resultlistEntry'][i]['resultlist.realEstate']
                    real_estate_id = immo_search_json['resultlist.resultlist'][
                        1][0]['resultlistEntry'][i]['resultlist.realEstate'][
                            '@id']
                    restaurant_df = restaurant_df.append(self.transform_df(
                        immo_object, real_estate_id),
                                                         ignore_index=True,
                                                         sort=True)
            else:
                self.logger.info('No object found for city: ' +
                                 str(row['city']))
        self.logger.info(restaurant_df)
        result_json = restaurant_df.to_json(orient='records')
        attributes = self._create_datastore_entity(result_json)
        success = self._save(self.entity_id, attributes)
        result.set_success(success)
        self.logger.info(result)
        return result