Ejemplo n.º 1
0
def run_all_worker():
    try:
        # Producer is on !!!
        url_jobs = mp.JoinableQueue()
        topic_results = mp.JoinableQueue()
        article_results = mp.JoinableQueue()
        for _ in range(6):  # different process, different speed
            parse_article_proc = mp.Process(target=wxarticle_generator,
                                            args=(url_jobs, article_results))
            parse_article_proc.daemon = True
            parse_article_proc.start()

        write_topic_proc = mp.Process(target=topic_db_writer,
                                      args=(topic_results, ))
        write_topic_proc.daemon = True
        write_topic_proc.start()

        for _ in range(6):  #  4 processes to write article info into db
            write_article_proc = mp.Process(target=article_db_writer,
                                            args=(article_results, ))
            write_article_proc.daemon = True
            write_article_proc.start()

        try:
            seven_days_ago = (dt.today() - timedelta(6)).strftime("%Y-%m-%d")
            cp = mp.current_process()
            print dt.now().strftime(
                "%Y-%m-%d %H:%M:%S"), "Run All Word Process pid is %d" % (
                    cp.pid)
            conn = connect_database()
            if not conn:
                return False
            list_of_kw = read_topics_from_db(conn.cursor(),
                                             seven_days_ago)[::-1]
            wxurl_generator(list_of_kw, url_jobs, topic_results)
            topic_results.join()
            article_results.join()
            url_jobs.join()
            # if url_jobs.empty():
            #    print "-"*20, "url_jobs is empty ..."
            # if topic_results.empty():
            #    print "-"*20, "topic_results is empty ..."
            # if article_results.empty():
            #    print "-"*20, "article_results is empty ..."
        except mdb.OperationalError as e:
            traceback.print_exc()
            print dt.now().strftime("%Y-%m-%d %H:%M:%S")
        except Exception as e:
            traceback.print_exc()
        finally:
            conn.close()
            return True
    except Exception as e:
        traceback.print_exc()
        print dt.now().strftime(
            "%Y-%m-%d %H:%M:%S"), "Exception raise in Rn all Work"
    except KeyboardInterrupt:
        print dt.now().strftime(
            "%Y-%m-%d %H:%M:%S"
        ), "Interrupted by you and quit in force, but save the results"
def topic_db_writer(topic_results):
    """
    Consummer for topics
    """
    cp = mp.current_process()
    while True:
        print dt.now().strftime("%Y-%m-%d %H:%M:%S"), "Write Topics Process pid is %d" % (cp.pid)
        with connect_database() as cursor:
            topic_record = topic_results.get()
            write_status = write_hotest_into_db(cursor, topic_record)
            topic_results.task_done()
def test_parse_baidu_results():
    try:
        conn = connect_database()
        if not conn:
            return False
        list_of_kw = read_topics_from_db(conn)
        for kw in list_of_kw:
            for dr in DATE_ERANGES:  # do 3 times search, look whether solve the problem: day>week>month
                baidu_result = parse_baidu_search_page(kw, dr)
                print baidu_result['data']['search_url'], baidu_result['data'][
                    'hit_num']
    except Exception as e:
        traceback.print_exc()
    finally:
        conn.close()
Ejemplo n.º 4
0
def run_all_worker(concurrency=5):
    try:
        # Producer is on !!!
        conn = connect_database()
        list_of_kw = read_topics_from_db(conn)[:100]
        pool = MPool(concurrency)  # Processes pool
        pool.map(wxurl_generator, list_of_kw)  # Keep up generate keywords
        pool.close()
        pool.join()  # why join
    except KeyboardInterrupt:
        print "Interrupted by you and quit in force, but save the results"
    # Consummer followes
    wxarticle_generator()
    article_db_writer()
    topic_db_writer()
Ejemplo n.º 5
0
def article_db_writer(article_results):
    """
    Consummer for articles
    """
    cp = mp.current_process()
    while True:
        print dt.now().strftime(
            "%Y-%m-%d %H:%M:%S"), "Write Article Process pid is %d" % (cp.pid)
        with connect_database() as cursor:
            # using try-with-recources, auto-commit
            article_record = article_results.get()
            write_status = write_article_into_db(cursor, article_record)
            article_results.task_done()
    print dt.now().strftime(
        "%Y-%m-%d %H:%M:%S"), "Write Article Process %d finished" % (cp.pid)
Ejemplo n.º 6
0
def topic_db_writer(topic_results):
    """
    Consummer for topics
    """
    cp = mp.current_process()
    while True:
        print dt.now().strftime(
            "%Y-%m-%d %H:%M:%S"), "Write Topics Process pid is %d" % (cp.pid)
        try:
            conn = connect_database()
            topic_record = topic_results.get()
            write_status = write_baidu_topic_into_db(conn, topic_record)
            # print topic_record.items()
            topic_results.task_done()
        except Exception as e:
            traceback.print_exc()
        finally:
            conn.close()
Ejemplo n.º 7
0
def add_topic_jobs(target, start_date):
    todo = 0
    try:
        conn = connect_database()
        if not conn:
            return False
        list_of_kw = read_topics_from_db(conn.cursor(), start_date)
        for kw in list_of_kw:
            todo += 1
            target.put(kw)
    except mdb.OperationalError as e:
        traceback.print_exc()
        print dt.now().strftime("%Y-%m-%d %H:%M:%S")
    except Exception as e:
        traceback.print_exc()
    finally:
        conn.close()
        return todo
Ejemplo n.º 8
0
def add_topic_jobs(target):
    todo = 0
    one_week_ago = (dt.today() - timedelta(6)).strftime("%Y-%m-%d")
    try:
        conn = connect_database()
        if not conn:
            return False
        list_of_kw = read_topics_from_db(conn, one_week_ago)
        for kw in list_of_kw:
            todo += 1
            target.put(kw)
    except mdb.OperationalError as e:
        traceback.print_exc()
        print dt.now().strftime("%Y-%m-%d %H:%M:%S")
    except Exception as e:
        traceback.print_exc()
    finally:
        conn.close()
        return todo