Esempio n. 1
0
  def _load_full_data(self, listing):
    if listing.is_fully_loaded():
      return
    s = open_page(self._br, listing.url)
    if 'This unit is not currently listed on StreetEasy' in s: return

    (days, s) = html_helper.advance_and_find(s, '<h6>Days On Market</h6>', 'p>', ' day')
    post_timestamp = self._get_post_timestamp(days)

    (brokerage, broker) = ('', '')
    (broker_stuff, s) = html_helper.find_in_between(s, 'Listed at', "<div class='closer'></div>")
    if broker_stuff != None:
      broker_stuff = html_helper.strip_tags(broker_stuff.replace('\n', ' '))
      (brokerage, broker) = broker_stuff.split(' by ')

    (descr, s) = html_helper.find_in_between(s, '<h2>Description</h2>', '</section>')
    descr = html_helper.strip_tags(descr)

    (amenities, s) = html_helper.find_in_between(s, '<h2>Amenities</h2>', '</section>')
    amenities = amenities.replace('</li>', ', ').replace('</h6>', ': ').replace('<h6>', '\n')
    amenities = html_helper.strip_tags(amenities)
    amenities = amenities.replace("googletag.cmd.push(function() {googletag.display('ad_amenity');});", '')
    amenities = amenities.replace(' \n', ' ').replace(', \n', '. ')

    listing.set_blurb(descr + '\n\n' + amenities)
    listing.set_posting_timestamp(post_timestamp)
    listing.set_broker(broker)
    listing.set_brokerage(brokerage)
    listing.save_to_db()
Esempio n. 2
0
  def _load_more_listing_data(self, listing):
    if listing.is_fully_loaded():
      return listing
    s = open_page(self._br, listing.url)

    pos = s.find(SQFT_MARKER)
    if pos >= 0:
      (sqft, s) = html_helper.find_in_between(s[pos:], COL_START, COL_END)
      listing.set_sqft(int(sqft))

    s = s[s.find(BLDG_MARKER):]
    (address, s) = html_helper.find_in_between(s, COL_START, COL_END)
    self._set_formatted_address(address, listing)

    s = s[s.find(BRKG_MARKER):]
    (brokerage, s) = html_helper.find_in_between(s, COL_START, COL_END)
    if brokerage:
      brokerage = html_helper.strip_tags(brokerage)
      listing.set_brokerage(brokerage)

    s = s[s.find(BRKR_MARKER):]
    (broker, s) = html_helper.find_in_between(s, COL_START, COL_END)
    if broker:
      broker = html_helper.strip_tags(broker)
      listing.set_broker(broker)

    pos = s.find(COMMENTS_MARKER)
    if pos >= 0:
      (blurb, s) = html_helper.find_in_between(s[pos:], ':', '<div class="cleanbreakdiv">')
      if blurb != None:
        listing.set_blurb(html_helper.strip_tags(blurb))

    listing.save_to_db()
    return listing
Esempio n. 3
0
  def _load_details(self, listing):
    if listing.is_fully_loaded():
      return
    s = open_page(self._br, listing.url)

    (broker, brokerage) = ('', '')
    if 'Brokerage:' in s:
      (broker, s) = html_helper.advance_and_find(s, 'Save to Favorites', '<span class="bold">', '</span>')
      (brokerage, s) = html_helper.advance_and_find(s, 'Brokerage: ', '<span class="bold"', '</span>')
      if brokerage and len(brokerage) > 0:
        brokerage = brokerage[1:]

    (features, s) = html_helper.find_in_between(s, 'Features &amp; Amenities', '<div style="width: 640px')
    blurb = html_helper.strip_tags(features.replace('<td', '\n<td'))

    has_no_fee = 'No Fee\n' in blurb
    listing.set_has_fee(not has_no_fee)

    (desc, s) = html_helper.find_in_between(s, '  Description', '<div id="panels"')
    if desc != None:
      blurb += '\n\n' + html_helper.strip_tags(desc)
    listing.set_blurb(blurb)

    (address, s) = html_helper.find_in_between(s, "var report_listing_address = '", "'")
    (long, s) = html_helper.find_in_between(s, "longitude = '", "'")
    (lat, s) = html_helper.find_in_between(s, "latitude = '", "'")
    address = get_address(lat, long)
    listing.set_location(lat, long, address)
    listing.set_broker(broker)
    listing.set_brokerage(brokerage)
    listing.save_to_db()
Esempio n. 4
0
def prepare_text(text):
    text = unicode(text)
    text = html_helper.strip_tags(text)
    text = ' '.join(text.splitlines())
    text = text.translate(punctuationTable)
    words = [stemmer.stemWord(word) for word in text.split(' ') if word]
    text = string.join(words, ' ')
    return text
Esempio n. 5
0
 def _get_menu_page(self, s):
     items = []
     while True:
         (item, s) = html_helper.advance_and_find(s, 'class="media-story"', '<h3>', '</h3>')
         if item == None:
             break
         items.append(html_helper.strip_tags(item).strip())
     return items
Esempio n. 6
0
  def _load_more_data(self, listing):
    if listing.is_fully_loaded():
      return
    s = open_page(self._browser, listing.url)

    (section, s) = html_helper.find_in_between(s, SECTION_MARKER, SECTION_END)
    section = html_helper.strip_tags(section)
    listing.set_blurb(section)
    listing.save_to_db()
Esempio n. 7
0
  def _find_listing(self, s):
    (url, s) = html_helper.advance_and_find(s, TITLE_PLACE_MARKER, 'href="', '"')
    (title, s) = html_helper.find_in_between(s, '>', '<')
    if url == None or title == None:
      return (None, s)
    title = html_helper.strip_tags(title)

    (price, s) = html_helper.advance_and_find(s, 'color-fg-green', '$', '<')
    price = int(float(price.strip().replace(',', '')))

    (_, s) = html_helper.advance_and_find(s, '<td', '', '<div')
    (recency, s) = html_helper.advance_and_find(s, '"bold font-size-100"', '>', '</div')
    recency = html_helper.strip_tags(recency).lower()
    dt = self._understand_recency(recency, url)

    listing = Apartment(SOURCE, title, price, url)
    listing.set_posting_timestamp(dt.strftime('%s'))
    return (listing, s)
Esempio n. 8
0
def FeaturedStreamsMenu(sender, page=None):
    dir = MediaContainer(viewGroup="List", title2="Featured Streams")
    url  = "%s?limit=%s" % (TWITCH_FEATURED_STREAMS, PAGE_LIMIT)

    featured = JSON.ObjectFromURL(url, cacheTime=CACHE_INTERVAL)
   
    for stream in featured['featured']:
        subtitle = "%s\n%s Viewers" % (stream['stream']['game'], stream['stream']['viewers'])
        summary = strip_tags(stream['text'])
        streamUrl = "%s&channel=%s" % (TWITCH_LIVE_PLAYER, stream['stream']['channel']['name'])
        dir.Append(WebVideoItem(streamUrl, title=stream['stream']['channel']['display_name'], subtitle=subtitle, summary=summary, thumb=stream['stream']['preview']))

    return dir
Esempio n. 9
0
def FeaturedStreamsMenu(sender, page=None):
    dir = ObjectContainer(title2='Featured')
    #dir = MediaContainer(viewGroup="List", title2="Featured Streams")
    url  = "%s?limit=%s" % (TWITCH_FEATURED_STREAMS, PAGE_LIMIT)

    featured = JSON.ObjectFromURL(url, cacheTime=CACHE_INTERVAL)
   
    for stream in featured['featured']:
        subtitle = "%s\n%s Viewers" % (stream['stream']['game'], stream['stream']['viewers'])
        summary = strip_tags(stream['text'])
        #streamUrl = "%s&channel=%s" % (TWITCH_LIVE_PLAYER, stream['stream']['channel']['name'])
        streamUrl = stream['stream']['channel']['url']
        dir.add(VideoClipObject(url=streamUrl, title=stream['stream']['channel']['display_name'], summary=summary, source_title=subtitle, thumb=stream['stream']['preview']['large']))

    return dir
Esempio n. 10
0
  def _set_formatted_address(self, address, listing):
    # Strip off first line if it has a building in it (>2 lines)
    num_breaks = address.count(BREAK)
    if num_breaks > 1:
      address = address[address.index(BREAK) + len(BREAK) :]

    # Strip out HTML tags, add NY if it's not there, fix extra spacing
    address = html_helper.strip_tags(address).replace('( map )', ', ')
    if num_breaks == 0:
      address += ', NY'
    address.replace('\n', ', ')
    address = html_helper.fix_spaces(address)

    location = get_latlong(address)
    if location != None:
      listing.set_location(location[0], location[1], address)
    else:
      listing.set_address(address)
      print 'error getting', address, listing.url
Esempio n. 11
0
def FollowedMenu(sender, page=None):
    
    dir = ObjectContainer(title2="Followed")
    url = TWITCH_FOLLOWED_STREAMS % Prefs['username'] 
    channel_arr = []
    
    followed = JSON.ObjectFromURL(url, cacheTime=CACHE_INTERVAL)
    for follow in followed['follows']:
        channel = follow['channel']
        ch_name = channel['name']
        channel_arr.append(ch_name)
        
    streams = JSON.ObjectFromURL(TWITCH_LIST_STREAMS+"?%s" % urllib.urlencode({'channel' : ','.join(channel_arr)}))
          
    
    for stream in streams['streams']:    
        subtitle = "%s\n%s Viewers" % (stream['game'], stream['viewers'])
        summary = strip_tags(stream['channel']['status'])
        streamUrl = stream['channel']['url']
        dir.add(VideoClipObject(url=streamUrl, title=stream['channel']['display_name'], summary=summary, source_title=subtitle, thumb=stream['preview']['large']))

    return dir