Beispiel #1
0
def readConfig():
    # 读取配置文件
    data = readFile('weibo_spider', ['conf.json'])
    blogSpiderNumber = data['blogSpiderNumber']
    bloggerSpiderNumber = data['bloggerSpiderNumber']
    imageSpiderNumber = data['imageSpiderNumber']
    return blogSpiderNumber, bloggerSpiderNumber, imageSpiderNumber
Beispiel #2
0
 def __getNew(self):
     """任务数消耗完毕,请求新的任务json,也就是更新self.jsonContent字段的值"""
     # 1. 发起请求、得到数据
     url = self.__buildPullUrl()
     limit = LIMIT
     crawler = CRAWLER
     # result = helper.sendRequest(url, limit, crawler)
     # 这里可读取本地json文件中的任务,取消下面一行注释,然后注释掉上面4行代码即可
     result = readFile(rootName='weibo_spider', fileList=['content.json']); result = json.dumps(result)
     # 2. 更新数据
     self.jsonContent = result
     # 3. 把字节串变成一堆中间任务
     tasks_ = self.__toMiddleTask()
     # 4. 把中间任务变成可执行任务、将任务存入到对应的管道中
     self.__toTasksAndSaveToQueue(tasks_)
from conf import PROJECT_NAME
from utils.fileHelper import readFile

data = readFile(PROJECT_NAME, ['conf.json'])

FEED_EXPORT_ENCODING = data['mysql']['FEED_EXPORT_ENCODING']
MYSQL_HOST = data['mysql']['MYSQL_HOST']
MYSQL_PORT = data['mysql']['MYSQL_PORT']
MYSQL_USERNAME = data['mysql']['MYSQL_USERNAME']
MYSQL_PASSWORD = data['mysql']['MYSQL_PASSWORD']
MYSQL_DATABASE = data['mysql']['MYSQL_DATABASE']
Beispiel #4
0
 def add(value: str):
     content = readFile(rootName=PROJECT_NAME,
                        fileList=PROXY_FILE_PATH).append(value)
     writeFile(rootName=PROJECT_NAME,
               fileList=PROXY_FILE_PATH,
               content=content)
Beispiel #5
0
 def isEnough(minCount: int):
     return True if len(
         readFile(rootName=PROJECT_NAME,
                  fileList=PROXY_FILE_PATH)) >= minCount else False
Beispiel #6
0
def getProxyFromFile():
    val = random.choice(
        readFile(rootName=PROJECT_NAME, fileList=PROXY_FILE_PATH))
    if not val:
        raise ValueError(f'待选proxy值不足1,请补充!')
    return val
Beispiel #7
0
def getCookieFromFile():
    val = random.choice(
        readFile(rootName=PROJECT_NAME, fileList=COOKIE_FILE_PATH))
    if not val:
        raise ValueError(f'待选cookie值不足1,请补充!')
    return val
# -*- coding: utf-8 -*-
from conf import PROJECT_NAME
from utils import fileHelper

data = fileHelper.readFile(PROJECT_NAME, ['conf.json'])

ACCESSKEY = data["ACCESSKEY"]
ACCESSSEC = data["ACCESSSEC"]
GET_URL = data["GET-URL"]
SAVE_URL = data["SAVE-URL"]
HOST = data["HOST"]
CRAWLER = data["CRAWLER"]
LIMIT = data["LIMIT"]
CONTENT_TYPE = data["CONTENT-TYPE"]
CONTENT_ENCODING = data["CONTENT-ENCODING"]
USER_AGENT = data["USER_AGENT"]
ACCEPT_ENCODING = data["ACCEPT-ENCODING"]

TASK_FROM = data["TASK-FROM"]

DEFAULT_TASK_JSON = data['DEFAULT-TASK-JSON']
LOCAL_SERVER_HOST = data['LOCAL-SERVER-HOST']
LOCAL_SERVER_PORT = data['LOCAL-SERVER-PORT']
LOCAL_SERVER_URL = data['LOCAL-SERVER-URL']