コード例 #1
0
def saveToSqlite(spider_info,id):
    # 获取spider_info字典中的信息
    jingdong_good_id = spider_info['goods_id']
    site_id = id
    # 连接数据库并插入相应数据
    mongodb_conn = mongodb_options.mongodb_init_uestc()
    print "-------------->"
    print id
    mongodb_options.insert_goods_id(mongodb_conn,jingdong_good_id,site_id)
コード例 #2
0
def saveToSqlite(spider_info, id):
    # 获取spider_info字典中的信息
    title = spider_info['title']
    context = spider_info['context']
    url = "tieba.baidu.com" + spider_info['url']
    auther = spider_info['auther']
    create_time = spider_info['create_time']
    site_id = id
    # 连接数据库并插入相应数据
    mongodb_conn = mongodb_options.mongodb_init_uestc()
    print "-------------->"
    print id
    result1 = mongodb_options.insert_crawlinginfo(mongodb_conn, title, context,
                                                  url, auther, create_time, id)
コード例 #3
0
def saveToSqlite(spider_info, id):
    # 获取spider_info字典中的信息
    usr_id = spider_info["usr_id"]
    goods_id = spider_info["goods_id"]
    referenceName = spider_info["referenceName"]
    content = spider_info["content"]
    score = spider_info["score"]
    creationTime = spider_info["creationTime"]
    site_id = id
    # 连接数据库并插入相应数据
    mongodb_conn = mongodb_options.mongodb_init_uestc()
    print "-------------->"
    print id
    mongodb_options.insert_goods_comments(mongodb_conn, usr_id, goods_id,
                                          referenceName, content, score,
                                          creationTime, site_id)
コード例 #4
0
ファイル: spiderserver.py プロジェクト: xiaoleo911/Spider
def spider_server(site_data, id):
    if site_data[3] == 'baidutieba':  #爬取百度贴吧
        baidutieba_spider.startGrab(site_data[1], id)
    elif site_data[3] == 'jingdongpinglun':  #爬取京东商品评论
        #1、爬取商品列表
        jingdonggoods_spider.startGrab(site_data[2], id)
        #2、根据商品列表爬取商品的评论
        #2.1、获取商品id
        mongodb_conn = mongodb_options.mongodb_init_uestc()
        jingdong_goods_id_list = mongodb_options.jingdonggoods_find_all(
            mongodb_conn, id)
        print "00000000000000000000000000000000"
        print len(jingdong_goods_id_list)
        for jingdong_goods_iid in jingdong_goods_id_list:
            goods_iid = jingdong_goods_iid['goods_id']
            print goods_iid
        print "1111111111111111111111111111111"
        for jingdong_goods_id in jingdong_goods_id_list:
            goods_id = jingdong_goods_id['goods_id']
            jingdongcomments_spider.startGrab(goods_id, id)

    return
コード例 #5
0
#! /usr/bin/env python
# coding=utf-8
import os
import uuid

from django.shortcuts import HttpResponseRedirect
from django.shortcuts import render
from BigSpider_app.DataBase import mysql_options, redis_options, mongodb_options

db = mongodb_options.mongodb_init_spider_ms()
uestc = mongodb_options.mongodb_init_uestc()
uestc_redis = redis_options.redis_init()

# mysql_conn = mysql_options.mysql_init()


# 在url.py中,网页中名称 view中名称 html网页中名称
def pre_index(request):
    return render(request, 'pre_index.html')


# 用户页面
def index(request):
    flag = False
    if "username" in request.session:
        username = request.session['username']
        flag = True
    if flag:
        # 从redis中读取当前爬虫队列长度(爬取的url)
        crawling_queue = redis_options.crawling_queue(uestc_redis)