Esempio n. 1
0
def proxyList(request):
    db = MongoDBCli()
    spider_list = db.getAllSpider()
    content = {
        "spiderTasks": [],
        "taskList": [],
    }

    def __sortBydatetime(elem):
        return elem["time"]

    for spider in spider_list:
        # print(spider)
        tmp = {}
        tmp["name"] = spider["name"]
        tmp["url"] = urlparse(
            spider["config"]["url"]).scheme + "://" + urlparse(
                spider["config"]["url"]).netloc
        tmp["description"] = spider["description"]
        tmp["recentTime"] = datetime.datetime.now()
        tmp["spiderName"] = spider["config"]["name"]
        tmp["useRange"] = spider["config"]["useRange"]
        tmp["startIndex"] = spider["startIndex"]
        tmp["endIndex"] = spider["endIndex"]
        content["spiderTasks"].append(tmp)
    content["taskList"] = getLogInfoList()
    content["taskList"].sort(key=__sortBydatetime, reverse=True)
    return render(request, "proxy/proxyList.html", context=content)
Esempio n. 2
0
 def post(self, request):
     receive_data = checkJson(request)
     if receive_data == False:
         return JsonResponse({
             "flag": False,
             "massage": "spider info 参数错误",
         })
     db = MongoDBCli()
     db_ret = db.setOneSpider(receive_data)
     if db_ret != None:
         return JsonResponse({"flag": True, "massage": "OK"})
     return JsonResponse({"flag": False, "massage": "编辑错误"})
Esempio n. 3
0
 def get(self, request, spider_name):
     # print(time.time())
     # print(request.path)
     db = MongoDBCli()
     spider = db.getOneSpiderFromSpiderName(spider_name)
     # print("spider", spider)
     if spider == None:
         return render(request, '404.html')
     content = {
         "spider": spider,
         "spider_config_json": json.dumps(spider["config"]),
     }
     # print("content", content)
     return render(request, 'proxy/editSpiderConfig.html', context=content)
Esempio n. 4
0
def runSpider(request):
    print(request.POST)
    spider_name = request.POST.get("spiderName", None)

    if spider_name == None:
        return HttpResponse("错误的请求")
    db = MongoDBCli()
    spider_config = db.getOneSpiderFromSpiderName(spider_name)
    if spider_config == None:
        return HttpResponse("没有这个爬虫")
    result = task_runSpider.delay(
        spider_config["config"]["name"],
        getRandomLogFileName(spider_config["config"]["name"]),
        "-a si={} -a ei={}".format(
            spider_config["startIndex"],
            spider_config["endIndex"],
        ))
    return HttpResponse(result)
Esempio n. 5
0
def task_runSpider(spider_name, log_file=None, param=""):
    if log_file == None:
        log_file = getRandomLogFileName(spider_name + "-beat")
        db = MongoDBCli()
        spider_config = db.getOneSpiderFromSpiderName(spider_name)
        param = "-a si={} -a ei={}".format(
            spider_config["startIndex"],
            spider_config["endIndex"],
        )
    os.chdir(PROXY_SPIDER_DIR)
    log_file_abs = os.path.join(PROXY_SPIDER_LOG_DIR, log_file)
    print(log_file_abs)
    cmd = 'scrapy crawl genericSpider -a cn={}  -s LOG_FILE={} {}'.format(
        spider_name, log_file_abs, param)
    print(cmd)
    # cmdline.execute(cmd.split())
    # subprocess.Popen("notepad")
    # os.system(cmd)
    subprocess.Popen(cmd)
    return log_file
Esempio n. 6
0
 def post(self, request, spider_name):
     # receive_spider = json.loads()
     db = MongoDBCli()
     spider = db.getOneSpiderFromSpiderName(spider_name)
     if spider == None:
         return JsonResponse({
             "flag": False,
             "massage": "没有这个爬虫",
         })
     r_tmp = checkJson(request)
     # print(r_tmp)
     if r_tmp == False:
         return JsonResponse({
             "flag": False,
             "massage": "spider info 参数错误",
         })
     # print(spider, "\n>>\n", r_tmp)
     db_ret = db.setOneSpider(r_tmp)
     if db_ret != None:
         return JsonResponse({"flag": True, "massage": "OK"})
     return JsonResponse({"flag": False, "massage": "编辑错误"})
        "useRange": True,
        "header": {
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
            "Accept-Encoding": "gzip, deflate, br",
            "Accept-Language": "zh-CN,zh;q=0.9",
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
            "Host": "ip.jiangxianli.com",
            "Pragma": "no-cache",
            "Referer": "https://ip.jiangxianli.com/?page=1",
            "Upgrade-Insecure-Requests": "1",
        },
        "oneLine": ["//tbody/tr", ],
        "ip": "td[1]/text()",
        "port": "td[2]/text()"
    },

]
db = MongoDBCli()
SPIDER_INFO_LIST = []
SPIDER_INFO_LIST.extend(db.getAllSpiderConfig())

SPIDER_NAME_LIST = [
    x["name"] for x in SPIDER_INFO_LIST
]

if __name__ == "__main__":
    print(SPIDER_NAME_LIST)
    print(SPIDER_INFO_LIST)
    print(json.dumps(SPIDER_INFO_LIST1[2]))