Ejemplo n.º 1
0
def task_verifyIP(log_file=None):
    print("task_verifyIP 接收到的值", log_file)
    if log_file == None:
        log_file = getRandomLogFileName("verifyIP-beat")
    os.chdir(PROXY_SPIDER_DIR)
    log_file_abs = os.path.join(PROXY_SPIDER_LOG_DIR, log_file)
    cmd = 'scrapy crawl verify -s LOG_FILE={}'.format(log_file_abs)
    print(cmd)
    # os.system(cmd)
    subprocess.Popen(cmd)
    # cmdline.execute(cmd.split())
    return log_file
Ejemplo n.º 2
0
def runVerifyIP(request):
    print("sss")
    logger.info("得到请求runVerifyIP")
    result = None
    try:
        lf = getRandomLogFileName("verifyIP")
        print("开始调用", lf)
        result = task_verifyIP.delay(lf)
    except BaseException as e:
        print(e)
        logger.error(e)
    if result == None:
        return JsonResponse({"flag": False, "massage": "启动失败"})
    return JsonResponse({"flag": True, "massage": "启动成功"})
Ejemplo n.º 3
0
def runSpider(request):
    print(request.POST)
    spider_name = request.POST.get("spiderName", None)

    if spider_name == None:
        return HttpResponse("错误的请求")
    db = MongoDBCli()
    spider_config = db.getOneSpiderFromSpiderName(spider_name)
    if spider_config == None:
        return HttpResponse("没有这个爬虫")
    result = task_runSpider.delay(
        spider_config["config"]["name"],
        getRandomLogFileName(spider_config["config"]["name"]),
        "-a si={} -a ei={}".format(
            spider_config["startIndex"],
            spider_config["endIndex"],
        ))
    return HttpResponse(result)
Ejemplo n.º 4
0
def task_runSpider(spider_name, log_file=None, param=""):
    if log_file == None:
        log_file = getRandomLogFileName(spider_name + "-beat")
        db = MongoDBCli()
        spider_config = db.getOneSpiderFromSpiderName(spider_name)
        param = "-a si={} -a ei={}".format(
            spider_config["startIndex"],
            spider_config["endIndex"],
        )
    os.chdir(PROXY_SPIDER_DIR)
    log_file_abs = os.path.join(PROXY_SPIDER_LOG_DIR, log_file)
    print(log_file_abs)
    cmd = 'scrapy crawl genericSpider -a cn={}  -s LOG_FILE={} {}'.format(
        spider_name, log_file_abs, param)
    print(cmd)
    # cmdline.execute(cmd.split())
    # subprocess.Popen("notepad")
    # os.system(cmd)
    subprocess.Popen(cmd)
    return log_file
Ejemplo n.º 5
0
def clearIP(log_file=None):
    if log_file == None:
        log_file = getRandomLogFileName("clearIP-beat")

    proxys = Proxy.objects.filter(https=0, http=0)
    log_file_abs = os.path.join(PROXY_SPIDER_LOG_DIR, log_file)
    with open(log_file_abs, 'w', encoding="utf-8") as fp:
        fp.write("\t %-15s   %-6s  %-10s %-10s %-34s %-10s %-10s %-10s\n" %
                 ("ip", "port", "protocol", "anonymity", "verify_time", "http",
                  "https", "source"))
        fp.write("\t" + ("-" * 112) + "\n")
        for proxy in proxys:
            fp.write(
                "\t %-15s   %-6s  %-10s %-10s %-34s %-10s %-10s %-10s\n" %
                (proxy.ip, proxy.port, proxy.protocol, proxy.anonymity,
                 proxy.verify_time, proxy.http, proxy.https, proxy.source))
    try:
        proxys.delete()
    except:
        return "删除失败"
    return "删除成功"
Ejemplo n.º 6
0
 def post(self, request):
     print("开始调用")
     result = clearIP.delay(getRandomLogFileName("clearIP"))
     print(result)
     return JsonResponse({"flag": True, "massage": "启动成功"})