Ejemplo n.º 1
0
def _guessYelpId(placeName, lat, lon):
    safePlaceId = hashlib.md5(placeName).hexdigest()
    cachedId = db.child(eventsTable).child("cache/" + safePlaceId).get().val()
    if cachedId:
        return cachedId

    opts = {
      # 'term': placeName, # Yelp does a bad job with term searching
      'limit': 20,
      #'radius_filter': 1000,
      #'sort_by': 'distance',
      'sort': 1,
    }
    r = yelpClient.search_by_coordinates(lat, lon, **opts)
    if len(r.businesses) > 0:
        location = (lat, lon)
        businessesWithCoords = filter(
            lambda b:
                (b.location is not None) and (b.location.coordinate is not None),
            r.businesses)
        biz = min(businessesWithCoords, key=lambda b:
            geo.distance(location,
                         (b.location.coordinate.latitude, b.location.coordinate.longitude))
        )
        log.debug("%s --> %s" % (placeName, biz.name))
        researchVenue(biz)

        # Add bizId to cache
        record = { "cache/" +  safePlaceId: str(biz.id) }
        db.child(eventsTable).update(record)

        return biz.id
    else:
        log.info("Can't find %s" % placeName)
        return None
Ejemplo n.º 2
0
def searchLocation(lat, lng, radius):
    yelpVenues = search.getVenuesFromIndex(lat, lng, radius)

    log.debug("Writing venues...")

    writeYelpRecords(yelpVenues)

    log.info("Wrote %d venues" % len(yelpVenues))
Ejemplo n.º 3
0
def _getVenuesFromIndex(lat, lon, radius, sortOrder):
    locality = _singleSearchAPIQuery(lat, lon, radius * YELP_RADIUS_FACTOR,
                                     sortOrder, 0)
    log.debug("Crawling point: (%s, %s), radius: %s meters" %
              (lat, lon, radius))

    # If the current result set is greater than the max we can iterate through,
    # divide the search area and try again. Do this recursively so that we get
    # the full set of places.
    if locality.total > YELP_MAX_PER_SEARCH and radius > YELP_MIN_SEARCH_RADIUS:
        # We're collecting all places inside the square inscribed in the circle
        # defined by the location/radius. Each sub-circle is the smallest
        # circle that contains each quadrant of this square.
        dst = radius / sqrt(8)
        deltaLat = dst / geofire.g_METERS_PER_DEGREE_LATITUDE
        deltaLong = geofire.metersToLongitudeDegrees(dst, lat)

        def processQuadrant(quad):
            try:
                return _getVenuesFromIndex(lat + quad[0] * deltaLat,
                                           lon + quad[1] * deltaLong,
                                           radius / 2., sortOrder)
            except Exception as e:
                traceback.print_exc()
                return None

        pool = ThreadPool(4)
        yelpVenues = pool.map(processQuadrant, [(1, 1), (-1, 1), (-1, -1),
                                                (1, -1)])
        pool.close()
        pool.join()

        # Abort if any child threw an exception.
        if None in yelpVenues:
            return None

        # Recursively calling _getVenuesFromIndex leaves us with a list of
        # lists, so flatten the result.
        yelpVenues = list(itertools.chain.from_iterable(yelpVenues))

        return yelpVenues

    # Otherwise, iterate through all pages of the result set.
    yelpVenues = locality.businesses
    offset = YELP_MAX_PER_PAGE

    while offset < locality.total:
        locality = _singleSearchAPIQuery(lat, lon, radius * YELP_RADIUS_FACTOR,
                                         sortOrder, offset)
        yelpVenues += locality.businesses
        offset += YELP_MAX_PER_PAGE

    return yelpVenues
Ejemplo n.º 4
0
def getVenueIdentifiers(yelpID):
    yelpURL = "https://yelp.com/biz/%s" % yelpID
    mapping = {
        "id": yelpID,
        "version": CROSSWALK_CACHE_VERSION,
        "yelp": {
            "url": yelpURL
        }
    }
    try:
        if deployment == "production/":
            crosswalk = factualClient.table("crosswalk-us")
        else:
            crosswalk = factualClient.crosswalk()

        obj = crosswalk.filters({"url": yelpURL}).data()

        if len(obj) == 0:
            log.debug("Crosswalk empty for Yelp -> Factual " + yelpID)
            return mapping, True

        factualID = obj[0]["factual_id"]
        mapping["factualID"] = factualID
        mapping["factual"] = {"id": factualID}

        idList = crosswalk.filters({"factual_id": factualID}).data()

        if len(idList) == 0:
            log.warn("Crosswalk empty for Factual -> * " + yelpID + " " +
                     factualID)
        for idObj in idList:
            namespace = idObj["namespace"]
            del idObj["factual_id"]
            del idObj["namespace"]
            mapping[namespace] = idObj
        return mapping, True
    except APIException:
        log.error("Factual API failed again")
    except Exception:
        log.exception("Factual problem " + yelpID)
    return mapping, False
Ejemplo n.º 5
0
def searchLocation(lat, lng, radius, maxNum):
    # Fetch locations
    searchRecord = findSearchRecord((lat, lng), searchCacheRadius)
    if searchRecord is not None:
        log.debug("searchRecord: %s" % searchRecord)
        return
    else:
        writeSearchRecord(lat, lng)

    yelpVenues = search.getVenuesFromIndex(lat, lng, radius, maxNum)
    pool = ThreadPool(5)

    res = pool.map(researchVenue, yelpVenues)

    # Fetch events from Eventful
    eventListings = events.fetchEventsFromLocation(lat, lng)
    eRes = pool.map(researchEvent, eventListings)

    pool.close()
    pool.join()

    import json
    log.info("Found %d: %s" % (len(res), json.dumps(res)))
Ejemplo n.º 6
0
def searchLocation(lat, lng, radius=None):
    # Fetch locations
    searchRecord = findSearchRecord((lat, lng), searchCacheRadius)
    if searchRecord is not None:
        log.debug("searchRecord: %s" % searchRecord)
        return
    else:
        writeSearchRecord(lat, lng)

    if radius is None:
        radius = venueSearchRadius

    total = 1
    offset = 0
    yelpVenues = []
    while offset < total:
        locality = search._getVenuesFromIndex(lat,
                                              lng,
                                              offset=offset,
                                              radius=radius)
        total = locality.total
        yelpVenues += locality.businesses
        offset = len(yelpVenues)

    pool = ThreadPool(5)

    res = pool.map(researchVenue, yelpVenues)

    # Fetch events from Eventful
    eventListings = events.fetchEventsFromLocation(lat, lng)
    eRes = pool.map(researchEvent, eventListings)

    pool.close()
    pool.join()

    import json
    log.info("Finished: " + json.dumps(res))
Ejemplo n.º 7
0
def updateRecord(yelpID, **details):
    providers = {}

    # Yelp v3.
    if "yelp3" in details:
      info = details["yelp3"]
      categories = None
      if "categories" in info:
        categories = [
          c["alias"] for c in info["categories"]
          if "alias" in c
        ]
      providers["yelp3"] = {
        "images"    : _imageRecords(info.get("photos", []), info["url"]),
        "hours"     : _yelpHoursRecord(info.get("hours", None)),
        "categories": categories,
      }

    # Wikipedia.
    if "wikipedia" in details:
      info = details["wikipedia"]
      providers["wikipedia"] = {
        "url"        : info["url"],
        "description": info["summary"],
        "images"     : _imageRecords(info["images"], info["url"])
      }

    # TripAdvisor
    if "tripadvisor" in details:
      info = details["tripadvisor"]
      reviews = info.get("reviews", [])
      firstReview = ""
      if len(reviews) > 0:
          firstReview = reviews[0]["text"]

      try:
          providers["tripAdvisor"] = {
            "rating"          : float(info["rating"]) if info["rating"] else None, # This is the aggregate rating, and can be empty
            "totalReviewCount": int(info["num_reviews"]),
            "description"     : firstReview, # The rating of this review is not included
            "url"             : info["web_url"]
          }
      except KeyError:
          log.debug("TripAdvisor weird for " + biz.id)

    # Foursquare
    if "foursquare" in details:
      info = details["foursquare"]
      providers["foursquare"] = {
        "images": _imageRecords(info["images"], info["url"])
      }

    if "google" in details:
      info = details["google"]
      # No error, just no relevant website info
      providers["google"] = { "website": info.get("website", None) }

    # Factual Places
    if "factual" in details:
      info = details["factual"]
      providers["factual"] = {
        "url": info.get("website", None)
      }

    return {
      "providers": providers
    }
Ejemplo n.º 8
0
def venueRecord(biz, **details):
    # biz is the response object from the Yelp Search API

    from collections import OrderedDict
    # h is derived from the providers, but for the main body of the record.
    # h is for header.
    h = {
        "url": None,
        "description": [],
        "categories": OrderedDict(),
        "images": [],
        "hours": [],
    }
    providers = {}

    # Yelp.
    if "yelp" in details:
        info = details["yelp"]
        providers["yelp"] = {
            "rating": biz.rating,
            "totalReviewCount": biz.review_count,
            "ratingMax": 5,
            "description": biz.snippet_text,
            "url": biz.url
        }
        h["description"].append(_descriptionRecord("yelp", biz.snippet_text))
        if "categories" in info:
            h["categories"].update([(c["title"],
                                     _categoryRecord(c["alias"], c["title"]))
                                    for c in info["categories"]
                                    if "title" in c])
        h["images"] += _imageRecords("yelp", info.get("photos", []), biz.url)
        h["hours"] = _yelpHoursRecord(info.get("hours", None))

    # Wikipedia.
    if "wikipedia" in details:
        info = details["wikipedia"]
        providers["wikipedia"] = {
            "url": info["url"],
            "description": info["summary"]
        }
        h["description"].append(
            _descriptionRecord("wikipedia", info["summary"]))
        h["images"] += _imageRecords("wikipedia", info["images"], info["url"])

    # TripAdvisor
    if "tripadvisor" in details:
        info = details["tripadvisor"]
        reviews = info.get("reviews", [])
        firstReview = ""
        if len(reviews) > 0:
            firstReview = reviews[0]["text"]
            h["description"].append(
                _descriptionRecord("tripadvisor", firstReview))

        try:
            providers["tripAdvisor"] = {
                "rating":
                float(info["rating"]),  # This is the aggregate rating
                "totalReviewCount": int(info["num_reviews"]),
                "description":
                firstReview,  # The rating of this review is not included
                "url": info["web_url"]
            }
        except KeyError:
            log.debug("TripAdvisor weird for " + biz.id)

    # Foursquare
    if "foursquare" in details:
        info = details["foursquare"]
        h["images"] += _imageRecords("foursquare", info["images"], info["url"])

    # Factual Places
    if "factual" in details:
        info = details["factual"]
        h["url"] = info.get("website", None)

    images = h["images"]
    h["images"] = random.sample(images, len(images))
    coord = None
    if biz.location is not None and biz.location.coordinate is not None:
        coord = {
            "lat": biz.location.coordinate.latitude,
            "lng": biz.location.coordinate.longitude
        }
    return {
        "version": 1,
        "id": biz.id,
        "name": biz.name,
        "description": h["description"],
        "url": h["url"],
        "phone": biz.display_phone,
        "address": biz.location.display_address,
        "coordinates": coord,
        "categories": list(h["categories"].values()),
        "providers": providers,
        "images": h["images"],
        "hours": h["hours"],
    }