Ejemplo n.º 1
0
def main():
    logging.info("taskflow receiver is running")
    # get redis data
    redisdb = RedisDB()
    while True:
        data = redisdb.pop_msg_queue()
        if data is not None:
            message_process(int(data))
        time.sleep(1)
Ejemplo n.º 2
0
def getdata():
    try:
        r = RedisDB('localhost', priorities)
        r.cleanup()
        data = r.getvalues()
        if not data:
            raise ('No data')
    except:
        return jsonify(status='no data'), 404
    return data, 200
Ejemplo n.º 3
0
def getdata():
    try:
        r = RedisDB('localhost',priorities)
        r.cleanup()
        data = r.getvalues()
        if not data:
            raise('No data')
    except:
        return jsonify(status='no data'),404
    return data,200
Ejemplo n.º 4
0
def message_process(flow_instance_id):
    try:
        redisdb = RedisDB()
        # 获取需要运行的模块
        output_filename = settings.TASK_RUN_LOG_FORMAT % flow_instance_id
        logging.debug("output_filename:%s", output_filename)
        logging.debug("task_run_filename:%s", settings.TASK_RUN_FILE)
        logging.debug("task python bin location:%s", settings.PYTHONBIN)
        with open(output_filename, "a") as outfile:
            pm = subprocess.Popen([
                settings.PYTHONBIN, "-u", settings.TASK_RUN_FILE, "-i",
                str(flow_instance_id)
            ],
                                  close_fds=True,
                                  stdout=outfile,
                                  stderr=subprocess.STDOUT)
            json_data = {
                "worker_process_id": pm.pid,
                "worker_hostname": socket.gethostname(),
                "flow_instance_id": flow_instance_id,
                "start_time": time.time()
            }
            redisdb.add_running_instance(flow_instance_id, json_data)
            redisdb.close()
    except:
        logging.error('message_process err \n %s', traceback.format_exc())
Ejemplo n.º 5
0
def setdata():
    cnt = 0 
    for prio in priorities:
        try:
            jdata = request.get_json()
            assert(jdata['newalert'][prio][0]['text'])
        except:
            cnt = cnt + 1

    if len(priorities) == cnt:
        return jsonify(status='failed to parse data'),400
    else:
        r = RedisDB('localhost',priorities)
        current_timestamp = int(datetime.datetime.strftime(datetime.datetime.now(),'%s'))
    for prio in priorities:
        try:
            for j in jdata['newalert'][prio]:
                r.setvalues(prio,j['text'],current_timestamp+int(j['duration']))
        except:
            pass
    r.cleanup()
    return jsonify(status='success'),200
Ejemplo n.º 6
0
class Spider(object):
    downloaders = list()
    redisDB = None
    instanceConf=None
    def __init__(self, config = None):
        self.instanceConf = config
        self.redisDB = RedisDB(config=self.instanceConf)
        for url in config.START_URL:
            self.redisDB.Enqueue(url, depth=0, title='', trycount=0)
        self.startDownloader()
            
    def startDownloader(self):
        downloader = Downloader(self.redisDB,config=self.instanceConf)
        downloader.start()
Ejemplo n.º 7
0
def main():
    logging.info("taskflow sender is running")
    taskflowdb = TaskFlowDB()
    redisdb = RedisDB()
    while True:
        data = taskflowdb.get_undo_instances()
        if len(data) == 0:
            time.sleep(30)
            continue
        sended_ids = []
        try:
            for item in data:
                redisdb.push_msg_queue(item["id"])
                sended_ids.append(item["id"])
        except:
            logging.warning("push redis err \n %s", traceback.format_exc())
        if len(sended_ids):
            for item in sended_ids:
                taskflowdb.save_instance_status(item, 'running')

        if len(sended_ids) != len(data):
            logging.error("redis data error: sended len not equal data len")
            raise Exception("redis data error")
        time.sleep(2)
Ejemplo n.º 8
0
def setdata():
    cnt = 0
    for prio in priorities:
        try:
            jdata = request.get_json()
            assert (jdata['newalert'][prio][0]['text'])
        except:
            cnt = cnt + 1

    if len(priorities) == cnt:
        return jsonify(status='failed to parse data'), 400
    else:
        r = RedisDB('localhost', priorities)
        current_timestamp = int(
            datetime.datetime.strftime(datetime.datetime.now(), '%s'))
    for prio in priorities:
        try:
            for j in jdata['newalert'][prio]:
                r.setvalues(prio, j['text'],
                            current_timestamp + int(j['duration']))
        except:
            pass
    r.cleanup()
    return jsonify(status='success'), 200
Ejemplo n.º 9
0
def main(flow_instance_id):
    try:
        taskflowdb = TaskFlowDB()
        # 获取基础数据信息
        instance_data = taskflowdb.get_instance(flow_instance_id)
        flow_id = instance_data["flowid"]
        step_num = instance_data["curstepnum"]
        flow_step_data = taskflowdb.get_flow_step(flow_id, step_num)

        module_name = flow_step_data["modulename"]

        module_data = taskflowdb.get_module(module_name)
        arguments_definition = json.loads(module_data["arguments"])

        # 动态导入运行模块
        inner_module = importlib.import_module("modules.%s" % module_name)
        inner_method = getattr(inner_module, "main")

        # 处理参数数据
        # 实例获取到的参数
        dict_instance_arguments = json.loads(instance_data["arguments"])
        # 运行中的产生的参数
        dict_instance_run_data = taskflowdb.get_instance_run_data(
            flow_instance_id)
        inner_kwargs = {}

        # 处理输入参数别名的情况并设定模块运行数据
        input_argment_alias = json.loads(flow_step_data["inputargalias"])
        for arg_item in arguments_definition:
            key_name = arg_item["name"]
            input_key_name = input_argment_alias.get(key_name, key_name)
            if key_name in dict_instance_arguments:
                inner_kwargs[key_name] = dict_instance_arguments.get(
                    input_key_name)
            elif key_name in dict_instance_run_data:
                inner_kwargs[key_name] = dict_instance_run_data.get(
                    input_key_name)
            else:
                inner_kwargs[key_name] = None
        inner_kwargs["sys_taskflow_instance"] = instance_data

        # 记录instance_steps数据
        step_name = flow_step_data["stepname"]
        json_data = json.dumps(inner_kwargs, cls=CustomJSONEncoder)
        worker_name = socket.gethostname()
        instance_step_id = taskflowdb.add_instance_step(
            flow_instance_id, step_num, step_name, json_data, worker_name,
            'running', '')

        # 暂时关闭释放资源,因为连接串资源宝贵
        taskflowdb.close()
        # 运行模块
        result = True
        message = ""
        return_data = {}
        try:
            logging.info("----------run module: %s start----------" %
                         module_name)
            ret = inner_method(**inner_kwargs)
            logging.info("----------run module: %s finish----------" %
                         module_name)
            if ret is not None:
                if type(ret) is bool:
                    result = ret
                elif type(ret) is tuple:
                    len_ret = len(ret)
                    if len_ret > 0:
                        result = bool(ret[0])
                    if len_ret > 1:
                        message = str(ret[1])
                    if len_ret > 2:
                        return_data = dict(ret[2])
        except:
            result = False
            message = traceback.format_exc()
            logging.error("run module err \n %s", message)
        exec_status = u'success' if result else u'fail'
        # 重新开启db资源
        taskflowdb = TaskFlowDB()
        # 更新instance_steps 数据
        taskflowdb.save_instance_step_status(instance_step_id, exec_status,
                                             message)
        # 处理执行结果
        if result:
            # 执行成功
            # 参数别名处理与运行数据保存
            output_argment_alias = json.loads(flow_step_data["outputargalias"])
            for key, value in return_data.items():
                new_key_name = output_argment_alias.get(key, key)
                new_value = value
                if type(value) in [tuple, set]:
                    new_value = list(value)
                if type(value) in [list, set, dict, tuple]:
                    key_type = 'object'
                else:
                    key_type = 'simple'
                if 'object' == key_type:
                    new_value = json.dumps(new_value, cls=CustomJSONEncoder)
                taskflowdb.set_instance_run_data(flow_instance_id, key_type,
                                                 new_key_name, new_value)
            # 是否整个流程结束
            if step_num >= instance_data["stepcount"]:
                taskflowdb.save_instance_status(flow_instance_id, exec_status)
            else:
                # 下个步骤是否需要暂停
                nextstep_waitseconds = int(
                    flow_step_data["nextstep_waitseconds"])
                curstepnum = step_num + 1
                curstepruncount = 0
                if nextstep_waitseconds == -1:
                    exec_status = 'pause'
                    next_runtime = datetime.datetime.now()
                else:
                    exec_status = 'standby'
                    # 加入 next step 延迟执行逻辑
                    next_runtime = datetime.datetime.now(
                    ) + datetime.timedelta(seconds=nextstep_waitseconds)
                taskflowdb.save_instance_status(flow_instance_id, exec_status,
                                                curstepnum, curstepruncount,
                                                next_runtime)
        else:
            # 如果执行失败,则判断是否继续执行重试
            failed_retrycounts = int(flow_step_data["failed_retrycounts"])
            curstepruncount = int(instance_data["curstepruncount"]) + 1
            # 如果不重试 或者 超过重试次数
            if failed_retrycounts == 0 or curstepruncount > failed_retrycounts:
                taskflowdb.save_instance_status(
                    flow_instance_id,
                    exec_status,
                    cur_step_runcount=curstepruncount)
            else:
                exec_status = 'standby'
                # 默认一分钟后重试
                next_runtime = datetime.datetime.now() + datetime.timedelta(
                    seconds=60)
                taskflowdb.save_instance_status(
                    flow_instance_id,
                    exec_status,
                    cur_step_runcount=curstepruncount,
                    next_runtime=next_runtime)
    except:
        logging.error("task run err \n %s", traceback.format_exc())
    try:
        # remove running flow_instance_id
        redisdb = RedisDB()
        redisdb.remove_running_instance(flow_instance_id)
        redisdb.close()
    except:
        logging.error("task run remove redis running key err \n %s",
                      traceback.format_exc())
Ejemplo n.º 10
0
def SMEMBERS(peer_id):
    while (1):
        KEY = "PPFC_{0}".format(peer_id)
        VALUE = RedisDB.get(KEY)
        if VALUE[1:6] != 'empty':
            break
Ejemplo n.º 11
0
    logger.critical("API file '{}' is not found.".format(API_FILE))
    sys.exit(1)
except PermissionError:
    logger.critical("API file '{}' is not readable.".format(API_FILE))
    sys.exit(1)

# Redis backend for storing data
redis_host = CONFIG.get("redis", {}).get("host", "127.0.0.1")
redis_port = CONFIG.get("redis", {}).get("port", 6379)
redis_db = CONFIG.get("redis", {}).get("db", 0)
redis_prefix = CONFIG.get("redis", {}).get("data_prefix", "ipvisualizator")
initial_users = CONFIG.get("users", [])

try:
    db = RedisDB(host=redis_host,
                 port=redis_port,
                 db=redis_db,
                 data_prefix=redis_prefix,
                 initial_users=initial_users)
except redis.exceptions.ConnectionError as error:
    logger.critical("Can't connect to redis {}:{}.".format(
        redis_host, redis_port))
    sys.exit(1)

# Expose app for WSGI applications
application = app.app

# Run Flask development server
if __name__ == "__main__":
    app.run(port=CONFIG["app"]["port"])
Ejemplo n.º 12
0
 def __init__(self, config = None):
     self.instanceConf = config
     self.redisDB = RedisDB(config=self.instanceConf)
     for url in config.START_URL:
         self.redisDB.Enqueue(url, depth=0, title='', trycount=0)
     self.startDownloader()
Ejemplo n.º 13
0
sys.path.append("../")
from db_config import dmpMysqlConfig, dmpRedisConfig, bendiRedisConfig
from mysqldb import MysqlDB
from redisdb import RedisDB
# import join
import json
import time
import binascii
import joblib

environment = 'dev'
# environment = 'prod'

redis_config = dmpRedisConfig(environment)
redis_config['db'] = 7
redis_conn = RedisDB(config=redis_config).getRedisConn()

mysqlConfig = dmpMysqlConfig(environment)
mysqlConfig['db'] = 'dmp_spider'
mysql_conn = MysqlDB(config=mysqlConfig).getClient()
cur = mysql_conn.cursor()


def get_data():
    """58data"""
    try:
        count = 0
        url_list = []
        sql = "select source_url from house_five8_spider where house_city=''"
        cur.execute(sql)
        data_list = cur.fetchall()