def main(argv):
    try:                                
        opts, args = getopt.getopt(argv, "h:u:p:d:l:", ["host=", "user="******"pass="******"db=","limit="])
    except getopt.GetoptError:
        print "Usage: redditCrawler.py -h <host> -u <user> -p <password> -d <db_name> -l <limit> <subreddit1> <subreddit2> <...>"
        sys.exit(2)  
    if (len(argv) < 9):
        print "Usage: redditCrawler.py -h <host> -u <user> -p <password> -d <db_name> -l <limit> <subreddit1> <subreddit2> <...>"
        sys.exit(2)
        
    for opt, arg in opts:
        if opt in ("-h", "--host"):
            host = arg
        elif opt in ("-u", "--user"):
            user = arg
        elif opt in ("-p", "--pass"):
            password = arg
        elif opt in ("-d", "--db"):
            db = arg
        elif opt in ("-l", "--limit"):
            limit = arg
    subNames = argv[10:]
    db = MySQLdb.connect(host=host, user=user, passwd=password, db=db,charset='utf8')
    cur = db.cursor() 
    cur.execute("SELECT COUNT(*) FROM images")
    count = cur.fetchone()[0]
    for sub in subNames:
        print "Searching %s ..." % sub 
        result = reddit.parse(sub,limit)
        print "Found %s items in %s" % (len(result),sub)
        for item in result:
            #print "[%s, %s]" % (item.sub,item.url)
            cur.execute("INSERT IGNORE INTO images (sub, title, author, permalink, link) VALUES (%s,%s,%s,%s,%s)",(item.sub,item.title,item.author,item.permalink,item.url))
            cur.execute("ALTER TABLE images AUTO_INCREMENT = %s",count)
    db.commit()
Beispiel #2
0
    def _run(self, msg):
        urls = self.reg.findall(' '.join(msg[1].arguments))
        LOG.debug("Found reddit urls: %s" % urls)

        for url in urls:
            # gigimon, fix this ASAP
            url = url[0] 
            LOG.info("Processing %s" % url)

            try:
                reddit_json = reddit.parse(url)

                try:
                    post_text = reddit_json['selftext'].replace("\n", " ")
                except KeyError:
                    post_text = reddit_json['body'].replace("\n", " ")

                pretty_text = self.colorize(reddit_json['author'], post_text,
                        ups=reddit_json['ups'], downs=reddit_json['downs'])
                for text in self.split(pretty_text, colorized=True):
                    msg[0].privmsg(msg[1].target, text)
            except Exception as e:
                LOG.warning("Problem in parsing page: %s" % e)
Beispiel #3
0
import sys

if len(sys.argv) != 3:
    print("Usage: python3 src <post count> <skip>")
    exit(1)

count = int(sys.argv[1])
skip = int(sys.argv[2])


intro = "This video was generated by the automatic video generation tool built by Evan Pratten."

## Video Start ##
tts.write(intro)
tts.play()

feed = reddit.getRss("prorevenge")["entries"][skip:]
for i in range(count):
    title = reddit.parse(feed[i])["title"]
    body = html2text.html2text(reddit.parse(feed[i])["body"])

    tts.write(title)
    output.writeTitle(title)
    tts.play()

    tts.write(body)
    tts.play()

    output.clearAll()
    time.sleep(1)
Beispiel #4
0
    page_title = 'TIS-100 Comparison'
    content_type = E.meta(
        {'http-equiv': 'Content-Type'},
        content='text/html;charset=utf-8')
    head = E.head(content_type, E.title(page_title))
    head.append(E.style(STYLE))
    head.append(E.meta(
        name='viewport', content='width=device-width initial-scale=1'))
    body = E.body()
    body.append(E.h1(page_title))
    body.append(content.table(*args, **kwargs))
    body.append(E.p('This page was generated\n' +
                now.strftime('on %A, %d %B %Y at %I:%M:%S %p\n')))
    page = E.html(head, body)
    doctype = '<!DOCTYPE html>'
    return etree.tostring(
        page, doctype=doctype, method='html', pretty_print=True)


if __name__ == '__main__':
    # Assume save.dat or a symlink to it is in the directory with
    # this script.  Assume the reddit thread has already been
    # fetched as "reddit.html" in the current directory.
    path = os.path.join(os.path.dirname(__file__), 'reddit.html')
    puzzles = reddit.parse(open(path).read())
    path = os.path.join(os.path.dirname(__file__), 'save.dat')
    records = save.parse(open(path).read(), puzzles=puzzles)
    outpath = os.path.join(os.path.dirname(__file__), 'comparison.html')
    with open(outpath, 'w') as f:
        f.write(makepage(puzzles, records, os.getenv('USER')))