Beispiel #1
0
    def __init__(self, query_, page):
        self.page = page
        self.query = query_.encode("utf-8")

        self.conn = db_conn.db_conn()
        self.db = model.model(self.conn)
        self.cnt = self.db.tb_detail_count(self.query)
Beispiel #2
0
def ypim_crawler_inven3():
    conn = db_conn.db_conn()

    model_ = model.model(conn)
    return_list = model_.tb_url_detail_select("inven", 3)
    url_list = []
    for i in return_list:
        url_list.append(i['href'])

    name_list = ["subjcont", "nicname"]

    for i in name_list:
        for j in url_list:
            inven_find(model_, j, i).start()
Beispiel #3
0
def ypim_crawler_inven2():
    conn = db_conn.db_conn()

    model_ = model.model(conn)
    return_list = model_.tb_url_detail_select("inven", 2)

    #url_list = ["http://www.inven.co.kr/board/powerbbs.php?come_idx=2730"]
    url_list = []
    for i in return_list:
        url_list.append(i['href'])

    name_list = ["subjcont", "nicname"]

    for i in name_list:
        for j in url_list:
            inven_find(model_, j, i).start()
Beispiel #4
0
def init(query_):
    global conn, query, lock

    conn = db_conn.db_conn()
    query = query_
    lock = threading.Lock()
Beispiel #5
0
def web_site(query, web_site):
    conn = db_conn.db_conn()
    model_ = model.model(conn)
    print query, web_site
    return jsonify(model_.tb_detail_group(query, web_site))
Beispiel #6
0
def groups(query):
    conn = db_conn.db_conn()
    model_ = model.model(conn)
    return jsonify(model_.tb_detail_groups_cnt(query))
Beispiel #7
0
def isfirst(query):
    conn = db_conn.db_conn()
    model_ = model.model(conn)
    return jsonify(
        str(model_.tb_query_select(query.encode("utf-8"))[0]['que_seqno']))
Beispiel #8
0
def inven_get_come_idx():
    list = []
    non_overlap_list = []
    """
    url = ["http://lovelive.inven.co.kr", "http://durango.inven.co.kr/"]

    for i in url:
        queue = Queue.Queue()
        t = YPIMcrawler_nonlogin(queue,  i, None, "li", "class", "firstMenuItem")
        t.start()
        list.append(queue.get())
    """
    try:

        for i in inven():
            for j in i.findAll('a'):
                queue = Queue.Queue()
                print j['href']
                t = YPIMcrawler_nonlogin(queue, j['href'], None, "li", "class",
                                         "firstMenuItem")
                t.start()
                list.append(queue.get())

        for i in list:
            for j in i:
                for x in j.parent:
                    try:
                        if (x.a['href'].find("come_idx") >= 0):
                            if (x.a['href'].find("category") >= 0):
                                non_overlap_list.append(x.a['href'])
                            else:
                                index = x.a['href'].find("come_idx")
                                try:
                                    href = int(x.a['href'][index + 9:index +
                                                           13])

                                    non_overlap_list.append(
                                        "http://www.inven.co.kr/board/powerbbs.php?come_idx="
                                        + str(href))
                                except Exception as e:
                                    print e
                                    pass
                    except:
                        pass
        db = db_conn.db_conn()
        conn = db.db_conn()

        model_ = model.model(conn)

        model_.tb_url_insert("inven")
        url_seqno = model_.tb_url_select("inven")[0]['url_seqno']

        part_num = 0
        lock = threading.Lock()
        for n, i in enumerate(set(non_overlap_list)):
            if (n % 100 == 0):
                part_num += 1

            lock.acquire()
            model_.tb_url_detail_insert(url_seqno, part_num, i)
            lock.release()

    except Exception as e:
        print e