def main(argv): try: opts, args = getopt.getopt(argv, "h:u:p:d:l:", ["host=", "user="******"pass="******"db=","limit="]) except getopt.GetoptError: print "Usage: redditCrawler.py -h <host> -u <user> -p <password> -d <db_name> -l <limit> <subreddit1> <subreddit2> <...>" sys.exit(2) if (len(argv) < 9): print "Usage: redditCrawler.py -h <host> -u <user> -p <password> -d <db_name> -l <limit> <subreddit1> <subreddit2> <...>" sys.exit(2) for opt, arg in opts: if opt in ("-h", "--host"): host = arg elif opt in ("-u", "--user"): user = arg elif opt in ("-p", "--pass"): password = arg elif opt in ("-d", "--db"): db = arg elif opt in ("-l", "--limit"): limit = arg subNames = argv[10:] db = MySQLdb.connect(host=host, user=user, passwd=password, db=db,charset='utf8') cur = db.cursor() cur.execute("SELECT COUNT(*) FROM images") count = cur.fetchone()[0] for sub in subNames: print "Searching %s ..." % sub result = reddit.parse(sub,limit) print "Found %s items in %s" % (len(result),sub) for item in result: #print "[%s, %s]" % (item.sub,item.url) cur.execute("INSERT IGNORE INTO images (sub, title, author, permalink, link) VALUES (%s,%s,%s,%s,%s)",(item.sub,item.title,item.author,item.permalink,item.url)) cur.execute("ALTER TABLE images AUTO_INCREMENT = %s",count) db.commit()
def _run(self, msg): urls = self.reg.findall(' '.join(msg[1].arguments)) LOG.debug("Found reddit urls: %s" % urls) for url in urls: # gigimon, fix this ASAP url = url[0] LOG.info("Processing %s" % url) try: reddit_json = reddit.parse(url) try: post_text = reddit_json['selftext'].replace("\n", " ") except KeyError: post_text = reddit_json['body'].replace("\n", " ") pretty_text = self.colorize(reddit_json['author'], post_text, ups=reddit_json['ups'], downs=reddit_json['downs']) for text in self.split(pretty_text, colorized=True): msg[0].privmsg(msg[1].target, text) except Exception as e: LOG.warning("Problem in parsing page: %s" % e)
import sys if len(sys.argv) != 3: print("Usage: python3 src <post count> <skip>") exit(1) count = int(sys.argv[1]) skip = int(sys.argv[2]) intro = "This video was generated by the automatic video generation tool built by Evan Pratten." ## Video Start ## tts.write(intro) tts.play() feed = reddit.getRss("prorevenge")["entries"][skip:] for i in range(count): title = reddit.parse(feed[i])["title"] body = html2text.html2text(reddit.parse(feed[i])["body"]) tts.write(title) output.writeTitle(title) tts.play() tts.write(body) tts.play() output.clearAll() time.sleep(1)
page_title = 'TIS-100 Comparison' content_type = E.meta( {'http-equiv': 'Content-Type'}, content='text/html;charset=utf-8') head = E.head(content_type, E.title(page_title)) head.append(E.style(STYLE)) head.append(E.meta( name='viewport', content='width=device-width initial-scale=1')) body = E.body() body.append(E.h1(page_title)) body.append(content.table(*args, **kwargs)) body.append(E.p('This page was generated\n' + now.strftime('on %A, %d %B %Y at %I:%M:%S %p\n'))) page = E.html(head, body) doctype = '<!DOCTYPE html>' return etree.tostring( page, doctype=doctype, method='html', pretty_print=True) if __name__ == '__main__': # Assume save.dat or a symlink to it is in the directory with # this script. Assume the reddit thread has already been # fetched as "reddit.html" in the current directory. path = os.path.join(os.path.dirname(__file__), 'reddit.html') puzzles = reddit.parse(open(path).read()) path = os.path.join(os.path.dirname(__file__), 'save.dat') records = save.parse(open(path).read(), puzzles=puzzles) outpath = os.path.join(os.path.dirname(__file__), 'comparison.html') with open(outpath, 'w') as f: f.write(makepage(puzzles, records, os.getenv('USER')))