Beispiel #1
0
def main():
    threads = []
    db = setup_db()

    with open("subreddits.txt") as f:
        SUBREDDITS = f.read().split("\n")

    for index in xrange(0, len(SUBREDDITS), 4):
        for x in xrange(index, index + 4):
            if x >= len(SUBREDDITS):
                break
            thread = Thread(target=mine, args=(db,), kwargs={"mined_from": SUBREDDITS[x]})
            thread.start()
            threads.append(thread)
            sleep(randint(3, 5))

        for i, thread in enumerate(threads):
            logging.info("Finished thread: %s" % SUBREDDITS[index + i])
            thread.join()

        threads = []
Beispiel #2
0
from time import sleep
from random import shuffle

from helpers.db import setup_db
from helpers.EntryParser import EntryParser
from os import listdir


parsed = []
for f in listdir('./json'):
    with open('./json/%s' % f) as data:
        parsed.extend(json.loads(data.read()))
shuffle(parsed)


db = setup_db()
entry_parser = EntryParser()


logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p',
                        filename='miner.log',level=logging.DEBUG)
for entry in parsed:
    saved_entry = db['entries'].find_one(reddit_id=entry['reddit_id'])
    
    if saved_entry is None:
        entry['article'] = None
        try:
            entry['article'] = entry_parser.get_content(entry['link'])
            db['entries'].insert(entry)
        except Exception, error:
            logging.error('Error: %s %s %s' % (entry['mined_from'], entry['link'], error))