Example #1
0
  def post(self):
    artwork_json = json.loads(self.request.get('json'))

    publish_date = (datetime.datetime
        .utcfromtimestamp(artwork_json['publishDate'] / 1000)
        .date())
    if FeaturedArtwork.all().filter('publish_date=', publish_date).get() != None:
      webapp2.abort(409, message='Artwork already exists for this date.')

    crop_tuple = tuple(float(x) for x in json.loads(self.request.get('crop')))

    new_image_url, new_thumb_url = maybe_process_image(
        artwork_json['imageUri'],
        crop_tuple,
        publish_date.strftime('%Y%m%d') + ' '
            + artwork_json['title'] + ' '
            + artwork_json['byline'])

    if not new_thumb_url and 'thumbUri' in artwork_json:
      new_thumb_url = artwork_json['thumbUri']
    new_artwork = FeaturedArtwork(
        title=artwork_json['title'],
        byline=artwork_json['byline'],
        attribution=artwork_json['attribution'] if 'attribution' in artwork_json else None,
        image_url=new_image_url,
        thumb_url=new_thumb_url,
        details_url=artwork_json['detailsUri'],
        publish_date=publish_date)
    new_artwork.save()
    self.response.set_status(200)
Example #2
0
  def post(self):
    artwork_json = json.loads(self.request.get('json'))
    crop_tuple = tuple(float(x) for x in json.loads(self.request.get('crop')))
    publish_date = (datetime.datetime
        .utcfromtimestamp(artwork_json['publishDate'] / 1000)
        .date())

    new_image_url, new_thumb_url = maybe_process_image(
        artwork_json['imageUri'],
        crop_tuple,
        publish_date.strftime('%Y%m%d') + ' '
            + artwork_json['title'] + ' '
            + artwork_json['byline'])

    if not new_thumb_url and 'thumbUri' in artwork_json:
      new_thumb_url = artwork_json['thumbUri']
    new_artwork = FeaturedArtwork(
        title=artwork_json['title'],
        byline=artwork_json['byline'],
        image_url=new_image_url,
        thumb_url=new_thumb_url,
        details_url=artwork_json['detailsUri'],
        publish_date=publish_date)
    new_artwork.save()
    self.response.set_status(200)
Example #3
0
  def get(self):
    ARTWORKS = json.loads(open(os.path.join(os.path.split(__file__)[0], 'lt-artworks.json')).read())

    # ARTWORKS = filter(lambda a: '_stars' in a and a['_stars'] >= 1, ARTWORKS)

    # Fetch latest 300 artworks (for blacklisting)
    latest_artworks = (FeaturedArtwork.all()
        .order('-publish_date')
        .fetch(300))

    # List dates for which artwork exists
    dates_with_existing_art = set(a.publish_date for a in latest_artworks)

    # List target dates that we want artwork for, but for which no artwork exists
    target_dates = [date.today() + timedelta(days=n) for n in range(-1, LOOKAHEAD_DAYS)]
    target_dates = [d for d in target_dates if d not in dates_with_existing_art]

    # Create a blacklist of keys to avoid repeats
    blacklist = set(artwork_key(a.details_url) for a in latest_artworks)

    logging.debug('starting blacklist size: %d' % len(blacklist))

    chosen_artworks = []

    for target_date in target_dates:
      # Pick from available artworks, excluding artwork in the blacklist
      random_artwork = None
      while True:
        if len(ARTWORKS) == 0:
          logging.error('Ran out of artworks to choose from, cannot continue')
          return

        random_artwork = random.choice(ARTWORKS)
        key = artwork_key(random_artwork['detailsUri'])
        if key not in blacklist:
          # Once chosen, remove it from the list of artworks to choose next
          ARTWORKS.remove(random_artwork)
          chosen_artworks.append(random_artwork)
          break

      target_details_url = str(random_artwork['detailsUri'])
      logging.debug('%(date)s: setting to %(url)s' % dict(url=target_details_url, date=target_date))

      # Store the new artwork
      if self.request.get('dry-run', '') != 'true':
        new_artwork = FeaturedArtwork(
            title=random_artwork['title'],
            byline=random_artwork['byline'],
            attribution=random_artwork['attribution'],
            image_url=random_artwork['imageUri'],
            thumb_url=random_artwork['thumbUri'],
            details_url=random_artwork['detailsUri'],
            publish_date=target_date)
        new_artwork.save()

    if self.request.get('output', '') == 'html':
      self.response.out.write(get_html(artworks_json=json.dumps(chosen_artworks)))

    # Finish up
    logging.debug('done')
Example #4
0
    def post(self):
        artwork_json = json.loads(self.request.get("json"))

        publish_date = datetime.datetime.utcfromtimestamp(artwork_json["publishDate"] / 1000).date()
        if FeaturedArtwork.all().filter("publish_date=", publish_date).get() != None:
            webapp2.abort(409, message="Artwork already exists for this date.")

        crop_tuple = tuple(float(x) for x in json.loads(self.request.get("crop")))

        new_image_url, new_thumb_url = backroomarthelper.maybe_process_image(
            artwork_json["imageUri"],
            crop_tuple,
            publish_date.strftime("%Y%m%d") + " " + artwork_json["title"] + " " + artwork_json["byline"],
        )

        if not new_thumb_url and "thumbUri" in artwork_json:
            new_thumb_url = artwork_json["thumbUri"]
        new_artwork = FeaturedArtwork(
            title=artwork_json["title"],
            byline=artwork_json["byline"],
            attribution=artwork_json["attribution"] if "attribution" in artwork_json else None,
            image_url=new_image_url,
            thumb_url=new_thumb_url,
            details_url=artwork_json["detailsUri"],
            publish_date=publish_date,
        )
        new_artwork.save()
        self.response.set_status(200)
Example #5
0
  def process_html(self, url, html):
    soup = BeautifulSoup(html)

    details_url = re.sub(r'#.+', '', url, re.I | re.S) + '?utm_source=Muzei&utm_campaign=Muzei'
    title = soup.find(itemprop='name').get_text()
    author = soup.find(itemprop='author').get_text()
    completion_year_el = soup.find(itemprop='dateCreated')
    byline = author + ((', ' + completion_year_el.get_text()) if completion_year_el else '')
    image_url = soup.find(id='paintingImage')['href']

    if not title or not author or not image_url:
      self.response.out.write('Could not parse HTML')
      self.response.set_status(500)
      return

    publish_date = (datetime.datetime
        .utcfromtimestamp(int(self.request.get('publishDate')) / 1000)
        .date())
    image_url, thumb_url = maybe_process_image(image_url,
        NO_CROP_TUPLE,
        publish_date.strftime('%Y%m%d') + ' ' + title + ' ' + byline)

    # create the artwork entry
    new_artwork = FeaturedArtwork(
        title=title,
        byline=byline,
        image_url=image_url,
        thumb_url=thumb_url,
        details_url=details_url,
        publish_date=publish_date)
    new_artwork.save()
    self.response.set_status(200)
Example #6
0
    def process_html(self, url, html):
        soup = BeautifulSoup(html)

        details_url = re.sub(r"#.+", "", url, re.I | re.S) + "?utm_source=Muzei&utm_campaign=Muzei"
        title = soup.select("h1 span")[0].get_text()
        author = soup.find(itemprop="author").get_text()
        completion_year_el = soup.find(itemprop="dateCreated")
        byline = author + ((", " + completion_year_el.get_text()) if completion_year_el else "")
        image_url = soup.find(id="paintingImage")["href"]

        if not title or not author or not image_url:
            self.response.out.write("Could not parse HTML")
            self.response.set_status(500)
            return

        publish_date = datetime.datetime.utcfromtimestamp(int(self.request.get("publishDate")) / 1000).date()
        image_url, thumb_url = maybe_process_image(
            image_url, NO_CROP_TUPLE, publish_date.strftime("%Y%m%d") + " " + title + " " + byline
        )

        # create the artwork entry
        new_artwork = FeaturedArtwork(
            title=title,
            byline=byline,
            image_url=image_url,
            thumb_url=thumb_url,
            details_url=details_url,
            publish_date=publish_date,
        )
        new_artwork.save()
        self.response.set_status(200)
Example #7
0
 def post(self):
   artwork_json = json.loads(self.request.get('json'))
   new_artwork = FeaturedArtwork(
       title=artwork_json['title'],
       byline=artwork_json['byline'],
       image_url=artwork_json['imageUri'],
       thumb_url=(artwork_json['thumbUri'] if 'thumbUri' in artwork_json else None),
       details_url=artwork_json['detailsUri'],
       publish_date=datetime.datetime
           .utcfromtimestamp(artwork_json['publishDate'] / 1000)
           .date())
   new_artwork.save()
   self.response.set_status(200)
Example #8
0
    def post(self):
        id = long(self.request.get("id"))
        artwork_json = json.loads(self.request.get("json"))
        crop_tuple = tuple(float(x) for x in json.loads(self.request.get("crop")))
        target_artwork = FeaturedArtwork.get_by_id(id)
        if not target_artwork:
            self.response.set_status(404)
            return

        target_artwork.title = artwork_json["title"]
        target_artwork.byline = artwork_json["byline"]

        new_image_url, new_thumb_url = maybe_process_image(
            artwork_json["imageUri"],
            crop_tuple,
            target_artwork.publish_date.strftime("%Y%m%d") + " " + artwork_json["title"] + " " + artwork_json["byline"],
        )
        if not new_thumb_url and "thumbUri" in artwork_json:
            new_thumb_url = artwork_json["thumbUri"]

        target_artwork.image_url = new_image_url
        target_artwork.thumb_url = new_thumb_url
        target_artwork.details_url = artwork_json["detailsUri"]
        target_artwork.save()
        self.response.set_status(200)
Example #9
0
 def move_artwork(self, artwork, publish_date, initial_artwork_id):
     # cascade moves
     current_artwork_at_date = FeaturedArtwork.all().filter("publish_date =", publish_date).get()
     if current_artwork_at_date and current_artwork_at_date.key().id() != initial_artwork_id:
         self.move_artwork(current_artwork_at_date, publish_date + datetime.timedelta(hours=24), initial_artwork_id)
     artwork.publish_date = publish_date
     artwork.save()
Example #10
0
  def post(self):
    id = long(self.request.get('id'))
    artwork_json = json.loads(self.request.get('json'))
    crop_tuple = tuple(float(x) for x in json.loads(self.request.get('crop')))
    target_artwork = FeaturedArtwork.get_by_id(id)
    if not target_artwork:
      webapp2.abort(404)

    target_artwork.title = artwork_json['title']
    target_artwork.byline = artwork_json['byline']
    target_artwork.attribution = artwork_json['attribution'] if 'attribution' in artwork_json else None

    new_image_url, new_thumb_url = maybe_process_image(
        artwork_json['imageUri'],
        crop_tuple,
        target_artwork.publish_date.strftime('%Y%m%d') + ' '
            + artwork_json['title'] + ' '
            + artwork_json['byline'])
    if not new_thumb_url and 'thumbUri' in artwork_json:
      new_thumb_url = artwork_json['thumbUri']

    target_artwork.image_url = new_image_url
    target_artwork.thumb_url = new_thumb_url
    target_artwork.details_url = artwork_json['detailsUri']
    target_artwork.save()

    self.response.set_status(200)
    self.response.out.write(json.dumps(artwork_dict(target_artwork)))
Example #11
0
  def post(self):
    id = long(self.request.get('id'))
    artwork_json = json.loads(self.request.get('json'))
    crop_tuple = tuple(float(x) for x in json.loads(self.request.get('crop')))
    target_artwork = FeaturedArtwork.get_by_id(id)
    if not target_artwork:
      self.response.set_status(404)
      return

    target_artwork.title = artwork_json['title']
    target_artwork.byline = artwork_json['byline']

    new_image_url, new_thumb_url = maybe_process_image(
        artwork_json['imageUri'],
        crop_tuple,
        target_artwork.publish_date.strftime('%Y%m%d') + ' '
            + artwork_json['title'] + ' '
            + artwork_json['byline'])
    if not new_thumb_url and 'thumbUri' in artwork_json:
      new_thumb_url = artwork_json['thumbUri']

    target_artwork.image_url = new_image_url
    target_artwork.thumb_url = new_thumb_url
    target_artwork.details_url = artwork_json['detailsUri']
    target_artwork.save()
    self.response.set_status(200)
Example #12
0
    def post(self):
        id = long(self.request.get("id"))
        artwork_json = json.loads(self.request.get("json"))
        crop_tuple = tuple(float(x) for x in json.loads(self.request.get("crop")))
        target_artwork = FeaturedArtwork.get_by_id(id)
        if not target_artwork:
            webapp2.abort(404)

        target_artwork.title = artwork_json["title"]
        target_artwork.byline = artwork_json["byline"]
        target_artwork.attribution = artwork_json["attribution"] if "attribution" in artwork_json else None

        new_image_url, new_thumb_url = backroomarthelper.maybe_process_image(
            artwork_json["imageUri"],
            crop_tuple,
            target_artwork.publish_date.strftime("%Y%m%d") + " " + artwork_json["title"] + " " + artwork_json["byline"],
        )
        if not new_thumb_url and "thumbUri" in artwork_json:
            new_thumb_url = artwork_json["thumbUri"]

        target_artwork.image_url = new_image_url
        target_artwork.thumb_url = new_thumb_url
        target_artwork.details_url = artwork_json["detailsUri"]
        target_artwork.save()

        self.response.set_status(200)
        self.response.out.write(json.dumps(artwork_dict(target_artwork)))
Example #13
0
  def get(self):
    ARTWORKS = json.loads(open(os.path.join(os.path.split(__file__)[0], 'lt-artworks.json')).read())

    # Fetch latest 300 artworks (for blacklisting)
    latest_artworks = (FeaturedArtwork.all()
        .order('-publish_date')
        .fetch(300))

    # List dates for which artwork exists
    dates_with_existing_art = set(a.publish_date for a in latest_artworks)

    # List target dates that we want artwork for, but for which no artwork exists
    target_dates = [date.today() + timedelta(days=n) for n in range(-1, LOOKAHEAD_DAYS)]
    target_dates = [d for d in target_dates if d not in dates_with_existing_art]

    # Create a blacklist of keys to avoid repeats
    blacklist = set(artwork_key(a.details_url) for a in latest_artworks)

    self.response.out.write('starting blacklist size: %d<br>' % len(blacklist))

    for target_date in target_dates:
      # Pick from available artworks, excluding artwork in the blacklist
      random_artwork = None
      while True:
        random_artwork = random.choice(ARTWORKS)
        key = artwork_key(random_artwork['detailsUri'])
        if key not in blacklist:
          # Once chosen, add to the blacklist to avoid repeats within the lookahead
          blacklist.add(key)
          break

      target_details_url = str(random_artwork['detailsUri'])
      self.response.out.write('%(date)s: setting to <b>%(url)s</b><br>' % dict(url=target_details_url, date=target_date))

      # Store the new artwork
      new_artwork = FeaturedArtwork(
          title=random_artwork['title'],
          byline=random_artwork['byline'],
          attribution=random_artwork['attribution'],
          image_url=random_artwork['imageUri'],
          thumb_url=random_artwork['thumbUri'],
          details_url=random_artwork['detailsUri'],
          publish_date=target_date)
      new_artwork.save()

    # Finish up
    self.response.out.write('done<br>')
Example #14
0
  def process_html(self, url, html):
    soup = BeautifulSoup(html)

    if re.search(r'wikiart.org', url, re.I):
      details_url = re.sub(r'#.+', '', url, re.I | re.S) + '?utm_source=Muzei&utm_campaign=Muzei'
      title = soup.select('h1 span')[0].get_text()
      author = soup.find(itemprop='author').get_text()
      completion_year_el = soup.find(itemprop='dateCreated')
      byline = author + ((', ' + completion_year_el.get_text()) if completion_year_el else '')
      image_url = soup.find(id='paintingImage')['href']
    elif re.search(r'metmuseum.org', url, re.I):
      details_url = re.sub(r'[#?].+', '', url, re.I | re.S) + '?utm_source=Muzei&utm_campaign=Muzei'
      title = soup.find('h2').get_text()
      author = unicode(soup.find(text='Artist:').parent.next_sibling).strip()
      author = re.sub(r'\s*\(.*', '', author)
      completion_year_el = unicode(soup.find(text='Date:').parent.next_sibling).strip()
      byline = author + ((', ' + completion_year_el) if completion_year_el else '')
      image_url = soup.find('a', class_='download').attrs['href']
    else:
      self.response.out.write('Unrecognized URL')
      self.response.set_status(500)
      return      

    if not title or not author or not image_url:
      self.response.out.write('Could not parse HTML')
      self.response.set_status(500)
      return

    publish_date = (datetime.datetime
        .utcfromtimestamp(int(self.request.get('publishDate')) / 1000)
        .date())
    image_url, thumb_url = maybe_process_image(image_url,
        NO_CROP_TUPLE,
        publish_date.strftime('%Y%m%d') + ' ' + title + ' ' + byline)

    # create the artwork entry
    new_artwork = FeaturedArtwork(
        title=title,
        byline=byline,
        image_url=image_url,
        thumb_url=thumb_url,
        details_url=details_url,
        publish_date=publish_date)
    new_artwork.save()

    self.response.set_status(200)
    self.response.out.write(json.dumps(artwork_dict(new_artwork)))
Example #15
0
  def render_with_headers(self, callback):
    now = datetime.utcnow()
    headers = {}
    current = None

    # Get up to 5 artworks published earlier than 2 days from now, ordered by latest first
    latest_artworks = (FeaturedArtwork.all()
        .filter('publish_date <=', date.today() + timedelta(days=2))
        .order('-publish_date')
        .fetch(5))

    # Pick out the first artwork in that set that has actually been published
    for artwork in latest_artworks:
      if now >= datetime.combine(artwork.publish_date, START_TIME):
        current = artwork
        break

    ret_obj = dict()
    if current is not None:
      # Found the next featured artwork
      ret_obj = dict(
          title=current.title.strip(),
          byline=current.byline.strip(),
          imageUri=current.image_url,
          detailsUri=current.details_url)
      if current.thumb_url:
        ret_obj['thumbUri'] = current.thumb_url
      if current.attribution:
        ret_obj['attribution'] = current.attribution

      # The next update time is the next START_TIME
      next_start_time = datetime.combine(date.today(), START_TIME)
      while next_start_time < now:
        next_start_time += timedelta(hours=24)

      ret_obj['nextTime'] = _serialize_datetime(next_start_time + NEXT_PADDING)

      # Caches expire in an hour, but no later than the next start time minus padding
      cache_expire_time = min(
          now + MAX_HTTP_CACHE_AGE,
          next_start_time)
      expire_seconds = max(0, (cache_expire_time - now).total_seconds())

      # Note that this max-age header will be cached, so max-age may be off by the memcache
      # cache time which is set above to 60 seconds
      headers['Cache-Control'] = 'max-age=%d, must-revalidate, public' % expire_seconds
      headers['Expires'] = cache_expire_time.strftime('%a, %d %b %Y %H:%M:%S GMT')
      headers['Pragma'] = 'public'

    else:
      # Found no featured artwork; hopefully this is temporary; don't cache this response
      headers['Cache-Control'] = 'max-age=0, no-cache, no-store'
      headers['Pragma'] = 'no-cache'

    body = json.dumps(ret_obj, sort_keys=True)
    if callback:
      body = '%s(%s)' % (callback, body)

    return (body, headers)
Example #16
0
    def post(self):
        id = long(self.request.get("id"))
        target_artwork = FeaturedArtwork.get_by_id(id)
        if not target_artwork:
            webapp2.abort(404)

        target_artwork.delete()
        self.response.set_status(200)
Example #17
0
 def post(self):
   id = long(self.request.get('id'))
   target_artwork = FeaturedArtwork.get_by_id(id)
   if not target_artwork:
     self.response.set_status(404)
     return
   target_artwork.delete()
   self.response.set_status(200)
Example #18
0
 def render(self):
   start = datetime.date(day=1,
       month=int(self.request.get('month')) + 1,
       year=int(self.request.get('year')))
   start -= datetime.timedelta(weeks=2)
   queue = (FeaturedArtwork.all()
       .filter('publish_date >=', start)
       .order('publish_date')
       .fetch(1000))
   return json.dumps([artwork_dict(a) for a in queue])
Example #19
0
    def post(self):
        id = long(self.request.get("id"))
        publish_date = datetime.datetime.utcfromtimestamp(long(self.request.get("publishDate")) / 1000).date()
        target_artwork = FeaturedArtwork.get_by_id(id)
        if not target_artwork:
            webapp2.abort(404)

        # shift other artworks over
        self.move_artwork(target_artwork, publish_date, target_artwork.key().id())
        self.response.set_status(200)
Example #20
0
  def post(self):
    id = long(self.request.get('id'))
    publish_date = (datetime.datetime
        .utcfromtimestamp(long(self.request.get('publishDate')) / 1000)
        .date())
    target_artwork = FeaturedArtwork.get_by_id(id)
    if not target_artwork:
      self.response.set_status(404)
      return

    # shift other artworks over
    self.move_artwork(target_artwork, publish_date, target_artwork.key().id())
    self.response.set_status(200)
Example #21
0
 def render(self):
   queue = (FeaturedArtwork.all()
       .filter('publish_date >=', datetime.date.today() - datetime.timedelta(days=30))
       .order('publish_date')
       .fetch(1000))
   return json.dumps([dict(
       id=a.key().id(),
       title=a.title,
       byline=a.byline,
       imageUri=a.image_url,
       thumbUri=a.thumb_url,
       detailsUri=a.details_url,
       publishDate=date_to_timestamp(a.publish_date),)
       for a in queue])
Example #22
0
    def post(self):
        artwork_json = json.loads(self.request.get("json"))
        crop_tuple = tuple(float(x) for x in json.loads(self.request.get("crop")))
        publish_date = datetime.datetime.utcfromtimestamp(artwork_json["publishDate"] / 1000).date()

        new_image_url, new_thumb_url = maybe_process_image(
            artwork_json["imageUri"],
            crop_tuple,
            publish_date.strftime("%Y%m%d") + " " + artwork_json["title"] + " " + artwork_json["byline"],
        )

        if not new_thumb_url and "thumbUri" in artwork_json:
            new_thumb_url = artwork_json["thumbUri"]
        new_artwork = FeaturedArtwork(
            title=artwork_json["title"],
            byline=artwork_json["byline"],
            image_url=new_image_url,
            thumb_url=new_thumb_url,
            details_url=artwork_json["detailsUri"],
            publish_date=publish_date,
        )
        new_artwork.save()
        self.response.set_status(200)
Example #23
0
  def post(self):
    id = long(self.request.get('id'))
    artwork_json = json.loads(self.request.get('json'))
    target_artwork = FeaturedArtwork.get_by_id(id)
    if not target_artwork:
      self.response.set_status(404)
      return

    target_artwork.title = artwork_json['title']
    target_artwork.byline = artwork_json['byline']
    target_artwork.image_url = artwork_json['imageUri']
    target_artwork.thumb_url = (artwork_json['thumbUri']
               if 'thumbUri' in artwork_json
               else (artwork_json['imageUri'] + '!BlogSmall.jpg'))
    target_artwork.details_url = artwork_json['detailsUri']
    target_artwork.save()
    self.response.set_status(200)
Example #24
0
File: main.py Project: GggXp/muzei
  def render(self, callback):
    now = datetime.datetime.utcnow()
    current = None

    # Get up to 5 artworks published earlier than 2 days from now, ordered by latest first
    latest_artworks = (FeaturedArtwork.all()
        .filter('publish_date <=', datetime.date.today() + datetime.timedelta(days=2))
        .order('-publish_date')
        .fetch(5))

    # Pick out the first artwork in that set that has actually been published
    for artwork in latest_artworks:
      if now >= datetime.datetime.combine(artwork.publish_date, START_TIME):
        current = artwork
        break

    ret_obj = dict()
    if current is not None:
      featured = dict(
          title=current.title,
          byline=current.byline,
          imageUri=current.image_url,
          detailsUri=current.details_url)
      if current.thumb_url:
        featured['thumbUri'] = current.thumb_url

      # The next update time is at START_TIME tomorrow
      next_time = datetime.datetime.combine(datetime.date.today() \
          + datetime.timedelta(days=1), START_TIME) + NEXT_PADDING
      featured['nextTime'] = _serialize_datetime(next_time)

      # Caches expire in an hour, but no later than the next start time minus 5 minutes
      cache_expire_time = min(
          datetime.datetime.now() + datetime.timedelta(hours=1),
          next_time - datetime.timedelta(minutes=5))
      expire_seconds = max(0, (cache_expire_time - now).total_seconds())
      self.response.headers['Cache-Control'] = 'max-age=%d, must-revalidate, public' % expire_seconds
      self.response.headers['Expires'] = cache_expire_time.strftime('%a, %d %b %Y %H:%M:%S GMT')

      ret_obj = featured

    s = json.dumps(ret_obj, sort_keys=True)
    if callback:
      return '%s(%s)' % (callback, s)
    else:
      return s
Example #25
0
 def render(self):
   start = datetime.date(day=1,
       month=int(self.request.get('month')),
       year=int(self.request.get('year')))
   queue = (FeaturedArtwork.all()
       .filter('publish_date >=', start)
       .order('publish_date')
       .fetch(1000))
   return json.dumps([dict(
       id=a.key().id(),
       title=a.title,
       byline=a.byline,
       imageUri=a.image_url,
       thumbUri=a.thumb_url,
       detailsUri=a.details_url,
       publishDate=date_to_timestamp(a.publish_date),)
       for a in queue])
Example #26
0
 def render(self):
     start = datetime.date(day=1, month=int(self.request.get("month")) + 1, year=int(self.request.get("year")))
     start -= datetime.timedelta(weeks=2)
     queue = FeaturedArtwork.all().filter("publish_date >=", start).order("publish_date").fetch(1000)
     return json.dumps(
         [
             dict(
                 id=a.key().id(),
                 title=a.title,
                 byline=a.byline,
                 imageUri=a.image_url,
                 thumbUri=a.thumb_url,
                 detailsUri=a.details_url,
                 publishDate=date_to_timestamp(a.publish_date),
             )
             for a in queue
         ]
     )
Example #27
0
  def post(self):
    id = long(self.request.get('id'))
    artwork_json = json.loads(self.request.get('json'))
    target_artwork = FeaturedArtwork.get_by_id(id)
    if not target_artwork:
      self.response.set_status(404)
      return

    target_artwork.title = artwork_json['title']
    target_artwork.byline = artwork_json['byline']

    new_image_url, new_thumb_url = maybe_process_image(
        artwork_json['imageUri'],
        artwork_json['title'] + ' ' + artwork_json['byline'])
    if not new_thumb_url and 'thumbUri' in artwork_json:
      new_thumb_url = artwork_json['thumbUri']

    target_artwork.image_url = new_image_url
    target_artwork.thumb_url = new_thumb_url
    target_artwork.details_url = artwork_json['detailsUri']
    target_artwork.save()
    self.response.set_status(200)
Example #28
0
  def get(self):
    # Fetch latest 1000 artworks
    latest_artworks = (FeaturedArtwork.all()
        .order('-publish_date')
        .fetch(1000))

    # List dates for which artwork exists
    dates_with_existing_art = set(a.publish_date for a in latest_artworks)

    # List target dates that we want artwork for, but for which no artwork exists
    target_dates = [date.today() + timedelta(days=n) for n in range(-1, 9)]
    target_dates = [d for d in target_dates if d not in dates_with_existing_art]

    for target_date in target_dates:
      self.response.out.write('looking for artwork for date ' + str(target_date) + '<br>')

      # Create a blacklist of the most recent 200 artwork
      # (don't want to repeat one of the last 200!)
      blacklist_artwork_keys = set(sanitized_artwork_key(a) for a in latest_artworks[:200])
      if len(blacklist_artwork_keys) < 5:
        blacklist_artwork_keys = set() # should never happen, but just in case of a reset

      # Pick from one of the oldest 500, excluding artwork in the blacklist
      random_artwork = None
      while True:
        random_artwork = random.choice(latest_artworks[500:])
        key = sanitized_artwork_key(random_artwork)
        if 'wikiart.org' in key or 'wikipaintings.org' in key or 'metmuseum.org' in key:
          if key not in blacklist_artwork_keys:
            break

      target_details_url = str(random_artwork.details_url)
      self.response.out.write('recycling ' + target_details_url + ' for date ' + str(target_date) + '<br>')

      backroomarthelper.add_art_from_external_details_url(
          target_date,
          target_details_url)

    self.response.out.write('done<br>')
Example #29
0
  def get(self):
    now = datetime.datetime.utcnow()
    if self.request.get('datetime'):
      now = datetime.datetime.strptime(self.request.get('datetime'), '%Y-%m-%dT%H:%M:%S')

    current_date = (now.date() if now.time() > START_TIME
                    else now.date() - datetime.timedelta(days=1))
    current_month = current_date.month

    # list the expected archives up until this point, starting with current month's archive
    expected_archives = []

    if current_date > ARCHIVE_START_DATE:
      if (current_date + datetime.timedelta(days=1)).month != current_month:
        # end of the month
        expected_archives.append((current_date.year, current_date.month))
      else:
        # partial month for this month
        expected_archives.append((current_date.year, current_date.month, current_date.day))

    # list all other months
    if current_date.month != ARCHIVE_START_DATE.month or current_date.year != ARCHIVE_START_DATE.year:
      current_date = current_date.replace(day=1)
      while True:
        current_date -= datetime.timedelta(days=1) # previous month
        current_date = current_date.replace(day=1)
        expected_archives.append((current_date.year, current_date.month))
        if current_date <= ARCHIVE_START_DATE:
          break

    # at this point expected_archives has a list of all archives that should be built

    # list current archive items to determine which archives are missing
    current_archives = []
    current_archive_files = gcs.listbucket(CLOUD_STORAGE_ARCHIVE_PATH)
    self.response.out.write('<h1>current archives</h1>')
    for archive_file in current_archive_files:
      m = re.search(r'((?:\d){4})((?:\d){2})((?:\d){2})?\.txt', archive_file.filename)
      if m:
        if m.group(3):
          archive = (int(m.group(1)), int(m.group(2)), int(m.group(3)))
        else:
          archive = (int(m.group(1)), int(m.group(2)))
        current_archives.append(archive)
        self.response.out.write(repr(archive) + '<br>')
      #self.response.out.write(archivemeta.filename + '\n')
    current_archives = set(current_archives)
    expected_archives = set(expected_archives)
    missing_archives = expected_archives.difference(current_archives)

    # generate the missing archives
    self.response.out.write('<h1>building missing archives</h1>')
    for archive in missing_archives:
      self.response.out.write('<h2>' + repr(archive) + '</h2>')
      # when building an archive, try to start from an existing archive
      # find the latest archive from this month as a starting point
      other_archives_from_month = filter(
          lambda x: len(x) == 3 and x[0] == archive[0] and x[1] == archive[1],
          current_archives)
      latest_current_archive_from_month = None
      latest_archive_gcs_path = None

      archive_metadata = []
      archive_image_blobs = []

      if other_archives_from_month:
        latest_current_archive_from_month = reduce(
            lambda x, y: (x[0], x[1], max(x[2], y[2])), other_archives_from_month)
        self.response.out.write('starting from archive ' + repr(latest_current_archive_from_month) + '<br>')

        existing_archive_name = '%04d%02d%02d' % latest_current_archive_from_month
        try:
          latest_archive_gcs_path = CLOUD_STORAGE_ARCHIVE_PATH + '/' + existing_archive_name + '.txt'
          existing_archive = gcs.open(latest_archive_gcs_path)
          content = gzip_decompress(existing_archive.read())
          existing_archive_lines = content.split('\n')
          existing_archive.close()
          archive_metadata = json.loads(existing_archive_lines[0])
          archive_image_blobs = filter(lambda x: len(x) > 0, existing_archive_lines[1:])
        except:
          self.response.out.write('error reading from existing archive, starting from scratch<br>')
          latest_current_archive_from_month = None
          latest_archive_gcs_path = None

      # construct the query
      query_from = None
      if latest_current_archive_from_month:
        # get everything after the latest archive this month
        query_from = datetime.date(*latest_current_archive_from_month) + datetime.timedelta(days=1)
      else:
        # get everything from this month
        query_from = datetime.date(
            archive[0], archive[1], archive[2] if len(archive) == 3 else 1).replace(day=1)
      query_from = max(ARCHIVE_START_DATE, query_from)

      query_to = None
      archive_name = None
      if len(archive) == 3:
        # partial month archive
        archive_name = '%04d%02d%02d' % archive
        query_to = datetime.date(*archive)
      else:
        # full month archive
        archive_name = '%04d%02d' % archive
        next_month = datetime.date(archive[0], archive[1], 1)
        if next_month.month == 12:
          next_month = next_month.replace(year=next_month.year + 1, month=1)
        else:
          next_month = next_month.replace(month=next_month.month + 1)
        query_to = next_month - datetime.timedelta(days=1)

      # fetch artworks that match this query
      artwork_objs = (FeaturedArtwork.all()
          .order('publish_date')
          .filter('publish_date >=', query_from)
          .filter('publish_date <=', query_to)
          .fetch(1000))
      for artwork_obj in artwork_objs:
        metadata_item = dict(
            publish_date=artwork_obj.publish_date.isoformat(),
            title=artwork_obj.title,
            byline=artwork_obj.byline,
            thumb_url=artwork_obj.thumb_url,
            details_url=artwork_obj.details_url,)

        # fetch the image
        image_result = urlfetch.fetch(artwork_obj.thumb_url)
        if image_result.status_code < 200 or image_result.status_code >= 300:
          raise IOError('Error downloading image: HTTP %d.' % image_result.status_code)

        # resize and crop thumb
        thumb = images.Image(image_result.content)
        if thumb.width > thumb.height:
          thumb.resize(width=4000, height=ARCHIVE_IMAGE_SIZE)
          thumb.crop(
              (float(thumb.width - thumb.height) / thumb.width) / 2, 0.,
              1 - (float(thumb.width - thumb.height) / thumb.width) / 2, 1.)
        else:
          thumb.resize(width=ARCHIVE_IMAGE_SIZE, height=4000)
          thumb.crop(
              0., (float(thumb.height - thumb.width) / thumb.height) / 2,
              1., 1 - (float(thumb.height - thumb.width) / thumb.height) / 2)

        # compute average color
        histogram = thumb.histogram()
        avg_color = tuple([int(x) for x in img_weighed_average(histogram)])
        avg_color_hex = "#%0.2X%0.2X%0.2X" % avg_color
        metadata_item['color'] = avg_color_hex

        # export thumb
        thumb_data_uri = 'data:image/jpeg;base64,' + base64.b64encode(
            thumb.execute_transforms(output_encoding=images.JPEG, quality=40))

        # append the metadata
        archive_metadata.append(metadata_item)
        archive_image_blobs.append(thumb_data_uri)

      self.response.out.write('query: from ' + repr(query_from) + ' to ' + repr(query_to) + '<br>')
      self.response.out.write('artworks: ' + str(len(artwork_objs)) + '<br>')
      #self.response.out.write('<pre>' + json.dumps(archive_metadata, indent=2) + '</pre>')

      # create the archive contents
      s = json.dumps(archive_metadata) + '\n'
      for blob in archive_image_blobs:
        s += blob + '\n'

      # gzip and write the archive
      gcs_path = CLOUD_STORAGE_ARCHIVE_PATH + '/' + archive_name + '.txt'
      self.response.out.write('writing to: ' + gcs_path + '<br>')
      gcsf = gcs.open(gcs_path, 'w',
          content_type='text/plain', options={'content-encoding':'gzip'})
      gcsf.write(gzip_compress(s))
      gcsf.close()

      # delete the previous archive
      if latest_archive_gcs_path:
        gcs.delete(latest_archive_gcs_path)
Example #30
0
  def post(self):
    publish_date = (datetime.datetime
        .utcfromtimestamp(int(self.request.get('publishDate')) / 1000)
        .date())
    if FeaturedArtwork.all().filter('publish_date =', publish_date).get() != None:
      webapp2.abort(409, message='Artwork already exists for this date.')

    url = self.request.get('externalArtworkUrl')
    result = urlfetch.fetch(url)
    if result.status_code < 200 or result.status_code >= 300:
      webapp2.abort(400, message='Error processing URL: HTTP %d. Content: %s'
          % (result.status_code, result.content))

    soup = BeautifulSoup(result.content)
    attribution = None

    if re.search(r'wikiart.org', url, re.I):
      attribution = 'wikiart.org'
      details_url = re.sub(r'#.+', '', url, re.I | re.S) + '?utm_source=Muzei&utm_campaign=Muzei'
      title = soup.select('h1 span')[0].get_text()
      author = soup.find(itemprop='author').get_text()
      completion_year_el = soup.find(itemprop='dateCreated')
      byline = author + ((', ' + completion_year_el.get_text()) if completion_year_el else '')
      image_url = soup.find(id='paintingImage')['href']
    elif re.search(r'metmuseum.org', url, re.I):
      attribution = 'metmuseum.org'
      details_url = re.sub(r'[#?].+', '', url, re.I | re.S) + '?utm_source=Muzei&utm_campaign=Muzei'
      title = soup.find('h2').get_text()
      author = ''
      try:
        author = unicode(soup.find(text='Artist:').parent.next_sibling).strip()
      except:
        pass
      author = re.sub(r'\s*\(.*', '', author)
      completion_year_el = None
      try:
        completion_year_el = unicode(soup.find(text='Date:').parent.next_sibling).strip()
      except:
        pass
      byline = author + ((', ' + completion_year_el) if completion_year_el else '')
      image_url = soup.find('a', class_='download').attrs['href']
    else:
      webapp2.abort(400, message='Unrecognized URL')

    if not title or not author or not image_url:
      webapp2.abort(500, message='Could not parse HTML')

    image_url, thumb_url = maybe_process_image(image_url,
        NO_CROP_TUPLE,
        publish_date.strftime('%Y%m%d') + ' ' + title + ' ' + byline)

    # create the artwork entry
    new_artwork = FeaturedArtwork(
        title=title,
        byline=byline,
        attribution=attribution,
        image_url=image_url,
        thumb_url=thumb_url,
        details_url=details_url,
        publish_date=publish_date)
    new_artwork.save()

    self.response.set_status(200)
    self.response.out.write(json.dumps(artwork_dict(new_artwork)))