Exemple #1
0
def create_transcription(comicnum=1):
  comic_url = 'http://www.qwantz.com/index.php?comic=' + str(comicnum)
  xml_url = comic_url[11:]
  today_comic = urlopen(comic_url)
  result = today_comic.read()
  todayparsed = fromstring(result)
  subject_text = 'unknown'
  mouseover_text = 'unknown'
  try:
    imageurl = todayparsed.cssselect('img.comic')[0].attrib['src']
    mouseover_text = todayparsed.cssselect('img.comic')[0].attrib['title']
    comicnum = int(re.findall(r'comic2-([0-9]+)\.png',imageurl)[0])
    comic_title = 'unknown'
    for comment in [element for element in todayparsed.iter() if isinstance(element,HtmlComment)]:
      t = fromstring(comment.text_content()).cssselect('.rss-title')
      if len(t) != 0:
        comic_title = t[0].text_content()
    for item in todayparsed.cssselect('li'):
      if item.text_content() == 'contact':
        subject_text = item[0].attrib['href'].split('=')[1]
  except IndexError:
    return None
  img = StringIO(urlopen(imageurl).read())

  d = Dinocr(img,title=comic_title,url=xml_url,subject_text=subject_text,mouseover_text=mouseover_text)
  return d.string_new_xml()
Exemple #2
0
def compare_with_old_transcription(comicnum=1):
  comic_url = 'http://www.qwantz.com/index.php?comic=' + str(comicnum)
  xml_url = comic_url[11:]
  today_comic = urlopen(comic_url)
  result = today_comic.read()
  todayparsed = fromstring(result)
  try:
    imageurl = todayparsed.cssselect('img.comic')[0].attrib['src']
    comicnum = int(re.findall(r'comic2-([0-9]+)\.png',imageurl)[0])
    comic_title = 'unknown'
    for comment in [element for element in todayparsed.iter() if isinstance(element,HtmlComment)]:
      t = fromstring(comment.text_content()).cssselect('.rss-title')
      if len(t) != 0:
        comic_title = t[0].text_content()
  except IndexError:
    error_out("failed parsing qwantz.com",False)
  system('curl %s > /tmp/comic.png' % imageurl)
  d = Dinocr('/tmp/comic.png',title=comic_title,url=xml_url)
  d.print_comic()
  d.generate_old_xml()
  print xml_url
  xml_compare(d.old_xml, answers[xml_url])
def main():
  comicid = ''
  today_comic = urlopen('http://www.qwantz.com/index.php' + comicid)
  result = today_comic.read()
  todayparsed = fromstring(result)
  try:
    comicurl = todayparsed.cssselect('img.comic')[0].attrib['src']
    comicnum = int(re.findall(r'comic2-([0-9]+)\.png',comicurl)[0])
  except Exception as e:
    print "failed parsing qwantz.com: " + e.message
  tmpfilep = StringIO()
  comic_image = urlopen(comicurl).read()
  tmpfilep.write(comic_image)
  tmpfilep.seek(0)
  logging.debug("wrote a file of length %d" % len(comic_image))
  d = Dinocr(tmpfilep)
  if d.erased_pixels > 2000:
    error_out('large amount of erases in a new comic (%d erases, %d uncertainty)' % (d.erased_pixels,d.uncertainty),True)
  trigram = d.choose_random_trigram()
  anything = False
  # don't acept a trigram that has a non-alphanumeric word in it
  if any([all([not x.isalnum() for x in word]) for word in trigram.split()]):
    error_out('words in this trigram are weird: %s' % trigram,True)
  return trigram
Exemple #4
0
  comicid = ''
  if len(sys.argv) == 2:
    comicid = '?comic=' + str(sys.argv[1])
  today_comic = urlopen('http://www.qwantz.com/index.php' + comicid)
  result = today_comic.read()
  todayparsed = fromstring(result)
  try:
    comicurl = todayparsed.cssselect('img.comic')[0].attrib['src']
    comicnum = int(re.findall(r'comic2-([0-9]+)\.png',comicurl)[0])
  except Exception as e:
    error_out("failed parsing qwantz.com: " + e.message,True)
  if comicid == '':
    prev_comic = update_comicnum(comicnum)
    if comicnum == prev_comic:
      error_out("same comic")
    if comicnum != prev_comic + 1:
      error_out("unexpected comic number: previous was %d, this was %d" % (prev_comic,comicnum),True)
  system('curl %s > /tmp/comic.png' % comicurl)
  d = Dinocr('/tmp/comic.png')
  if d.erased_pixels > 2000:
    error_out('large amount of erases in a new comic (%d erases, %d uncertainty)' % (d.erased_pixels,d.uncertainty),True)
  trigram = d.choose_random_trigram()
  anything = False
  # don't acept a trigram that has a non-alphanumeric word in it
  if any([all([not x.isalnum() for x in word]) for word in trigram.split()]):
    error_out('words in this trigram are weird: %s' % trigram,True)
  result = post_to_twitter(trigram)
  infofile = open('trigramosaurus.txt','w')
  infofile.write(str(comicnum))
  infofile.close()