Beispiel #1
0
 def __init__(self, db, **kwargs):
     BaseFeatureGenerator.__init__(self, db, **kwargs)
     self._yelp_api = YelpAPI()
     sf_center_location = (37.787925, -122.407515)  # union square
     cg_center_location = (41.892406, -87.632629)  #river north
     self._center_places = [sf_center_location, cg_center_location]
     self._api_restaurnt_dict = {}
Beispiel #2
0
    def __init__(self, db):
        self._db = db
        self._config_parser = getConfig()
        self._commit_threshold = 500

        self._yelp_api = YelpAPI()
        self._radius = 100
        self._print_threshold = 30
Beispiel #3
0
class Abstruct_Restaurant_Importer():
    def __init__(self, db):
        self._db = db
        self._config_parser = getConfig()
        self._commit_threshold = 500

        self._yelp_api = YelpAPI()
        self._radius = 100
        self._print_threshold = 30

    def abstract_parse_csv(self, delimiter, file_path):
        logging.info("open csv with encoding='ascii', mode='r', errors='ignore' ")
        f = codecs.open(file_path, encoding='ascii', mode='r', errors='ignore')
        logging.info("IMPORT CSV %s"%f)
        try:
            reader = csv.DictReader(f,delimiter=delimiter)
            logging.info("opened file")
            rest_dict = {}
            for row in reader:
                try:
                    self._row = row
                    self._print_info()
                    author = self.parse_row(row)
                    if author is None:
                        self._skip_row()
                        # logging.info("author is none")
                        continue
                    lat, int = self.extact_coardinates(author)
                    if lat is None or int is None:
                        self._skip_row()
                        # logging.info("failed to get location")
                        continue
                    yelp_id = self._yelp_api.get_restaurants_id(author.name, int, lat,self._radius)
                    if yelp_id is None:
                        self._skip_row()
                        # logging.info("failed to get yelp id")
                        continue
                    author.author_guid = yelp_id
                    if author.author_guid in rest_dict:
                        self._skip_row()
                        # logging.info("author guid in rest dict")
                        continue
                    author.domain = 'Restaurant'
                    rest_dict[author.author_guid]= author
                    self._valid_counter +=1
                    if self._valid_counter % self._commit_threshold==0:
                        logging.info("valid restaurants processed: "+str(self._valid_counter))
                        self._db.add_authors(list(rest_dict.values()))
                except Exception as exc:
                    self._skip_row()
                    logging.info(exc)
                    logging.info("encounter unknown error")
            self._db.add_authors(list(rest_dict.values()))
            self._db.commit()

        except Exception as e:
            logging.exception(e)
            logging.exception("error with row: "+str(self._row))
        finally:
            f.close()

    def extact_coardinates(self, coordinates_text):
        try:
            cor = coordinates_text.geo_enabled.replace('(', "")
            cor = cor.replace(')', "")
            cor = cor.split(', ')
            lat = float(cor[0])
            long = float(cor[1])
            if lat==0 or int==0:
                return None, None
            return lat, int
        except:
            return None, None

    def _skip_row(self):
        self._skipped_rows+=1

    def _print_info(self):
        try :
            self._row_counter += 1
            new_time = calendar.timegm(time.gmtime())
            if new_time - self._old_time > self._print_threshold:
                logging.info(
                    " processed rows: " + str(self._row_counter-1) + " valid rows: " + str(self._valid_counter) + " skipped_rows: " + str(
                        self._skipped_rows))
                self._old_time = new_time
                return
        except:
            self._row_counter = 1
            self._valid_counter = 0
            self._skipped_rows = 0
            self._old_time = calendar.timegm(time.gmtime())
            logging.info("initialized counters")
            return

    def setUp(self):
        pass
    def is_well_defined(self):
        return True
Beispiel #4
0
class Yelp_Feature_Generator(BaseFeatureGenerator):
    def __init__(self, db, **kwargs):
        BaseFeatureGenerator.__init__(self, db, **kwargs)
        self._yelp_api = YelpAPI()
        sf_center_location = (37.787925, -122.407515)  # union square
        cg_center_location = (41.892406, -87.632629)  #river north
        self._center_places = [sf_center_location, cg_center_location]
        self._api_restaurnt_dict = {}

    def cleanUp(self):
        pass
#region api features

    def distance_from_center_point(self, **kwargs):
        if 'author' in list(kwargs.keys()):
            restaurant = kwargs['author']
            location = eval(restaurant.geo_enabled)
            min_dist = 100
            for center in self._center_places:
                distance = abs(commons.distance_calculator(center, location))
                min_dist = min(min_dist, distance)
            return min_dist
        else:
            raise Exception('Author object was not passed as parameter')

    def price_level(self, **kwargs):
        api_restaurant = self._get_restaurant(**kwargs)
        return self._get_info_from_resturant_dict(api_restaurant, 'price')

    def review_count(self, **kwargs):
        api_restaurant = self._get_restaurant(**kwargs)
        return self._get_info_from_resturant_dict(api_restaurant,
                                                  'review_count')

    def number_of_categories(self, **kwargs):
        api_restaurant = self._get_restaurant(**kwargs)
        return len(api_restaurant['categories'])

    def reviews_rating(self, **kwargs):
        api_restaurant = self._get_restaurant(**kwargs)
        return self._get_info_from_resturant_dict(api_restaurant, 'rating')

#endregion

#region private helpers

    def _get_info_from_resturant_dict(self, restaurant_dict, field):
        try:
            return restaurant_dict[field]
        except:
            if 'id' not in restaurant_dict:
                logging.info('no id in dict, here is what there is- ' +
                             str(restaurant_dict))
            logging.info("skipped line: " + str(restaurant_dict['id']) +
                         " field: " + field)
            return -1

    def _get_restaurant(self, **kwargs):
        if 'author' in list(kwargs.keys()):
            restaurant = kwargs['author']
            api_restaurant = self._get_api_restaurant(restaurant)
            return api_restaurant
        else:
            raise Exception('Author object was not passed as parameter')

    def _price_to_int(self, price):
        return len(price)

    def _get_api_restaurant(self, restaurant):
        try:
            if restaurant.author_guid in self._api_restaurnt_dict:
                return self._api_restaurnt_dict[restaurant.author_guid]
            else:
                lat, int = self._extact_coardinates(restaurant)
                api_result = self._yelp_api.get_restaurant(
                    restaurant_name=restaurant.name,
                    longitude=int,
                    latitude=lat,
                    radius=100)
                self._api_restaurnt_dict[restaurant.author_guid] = api_result
                return api_result
        except:
            logging.info("Problem with getting restaurant by id: " +
                         str(restaurant.author_guid))

    def _extact_coardinates(self, coordinates_text):
        try:
            cor = coordinates_text.geo_enabled.replace('(', "")
            cor = cor.replace(')', "")
            cor = cor.split(', ')
            lat = float(cor[0])
            long = float(cor[1])
            if lat == 0 or int == 0:
                return None, None
            return lat, int
        except:
            return None, None