Esempio n. 1
0
def search_match_files(directory):
    """对指定的目录文件扫描, 并结果入库.

        :parameter directory 指定的扫描目录
    """

    # 扫描文件
    for i in os.listdir(directory):
        sub_path = os.path.join(directory, i)
        if os.path.isdir(sub_path):

            # 跳过未适配的版块和作者.
            if ENABLE_FOLDER_RULE:
                base_name = os.path.basename(sub_path).lower()
                if base_name not in itertools.chain(
                        list(PLATE_MAP_CONFIG.keys()),
                        list(USER_MAP_CONFIG.keys())):
                    continue

            search_match_files(sub_path)
        else:

            # 跳过计划的文件列表.
            if SKIP_README_FILE:
                ignore_file_list = [x.lower() for x in IGNORE_FILE_LIST]
                if ignore_file_list and os.path.basename(
                        sub_path).lower() in ignore_file_list:
                    continue

            # 版块与作者(plate=0, author='')的对应.
            if ENABLE_FOLDER_RULE:
                author, plate = Utils.get_info_by_path(sub_path)[:2]
                plate = PLATE_MAP_CONFIG.get(plate)
            else:
                author, plate = '', 0

            # 如有重复记录到日志.
            md5sum = Utils.md5sum(sub_path)
            fid = redis_md5sum.get(md5sum)
            if fid:
                Surplus(sub_path,
                        plate=plate,
                        author=author,
                        md5sum=md5sum,
                        fid=fid).__save(robot_session)
                record_info.info("skipping: %s ==> %s" % (author, sub_path))
                continue

            record_info.info("indexing: %s ==> %s" % (author, sub_path))

            suffix = Utils.get_info_by_path(sub_path)[2]
            key_name = ''.join((uuid.uuid4().get_hex(), suffix))
            entity = Attachment(sub_path,
                                key_name,
                                plate=plate,
                                author=author,
                                md5sum=md5sum)
            robot_session.add(entity)
            robot_session.commit()
            redis_md5sum.set(entity.md5sum, entity.id)
Esempio n. 2
0
def spread_match_files(limit=5):
    """对结果入库的数据扫描, 并文件上传.

        :parameter limit: 扫描数据数量
    """

    attachment_entities = robot_session.query(Attachment).filter(
        Attachment.status == 1).order_by(Attachment.id).limit(limit).all()

    def author_uid_and_name(real_name):
        """由真实姓名拼音获取论坛账户(账户Id,账户名称)

            :parameter real_name: 账户名称
        """

        authors = USER_MAP_CONFIG.get(real_name).split("|")
        return int(authors[0]), authors[2]

    if attachment_entities:
        for attachment in attachment_entities:
            # 构建主题, 帖子, 附件
            file_base_name = os.path.basename(attachment.file_name)
            subject = message = os.path.splitext(file_base_name)[0]
            author = author_uid_and_name(attachment.author)
            fid = attachment.plate

            post_info.info("=" * 80)
            post_info.info("正在发帖:%s" % file_base_name)

            tid, pid, aid = spread_info(subject,
                                        message,
                                        author,
                                        fid,
                                        file_name=file_base_name,
                                        attachment=attachment.key_name)

            if tid and pid:
                try:
                    # 更新发帖成功的数据状态, 保存记录
                    attachment.status = 2
                    robot_record = Thread(tid, pid, fid, aid, attachment.id)

                    robot_session.add(attachment)
                    robot_session.add(robot_record)
                    robot_session.commit()
                    post_info.info("发帖成功: OK.")
                except Exception as ex:
                    robot_session.rollback()
                    post_info.exception(ex)
                finally:
                    robot_session.close()
            else:
                post_info.info("发帖失败: Error.")
    else:
        # 如果无数据静默五分钟
        time.sleep(5 * 60)
Esempio n. 3
0
def search_match_files(directory):
    """对指定的目录文件扫描, 并结果入库.

        :parameter directory 指定的扫描目录
    """

    # 扫描文件
    for i in os.listdir(directory):
        sub_path = os.path.join(directory, i)
        if os.path.isdir(sub_path):

            # 跳过未适配的版块和作者.
            if ENABLE_FOLDER_RULE:
                base_name = os.path.basename(sub_path).lower()
                if base_name not in itertools.chain(
                        list(PLATE_MAP_CONFIG.keys()),
                        list(USER_MAP_CONFIG.keys())):
                    continue

            search_match_files(sub_path)
        else:

            # 跳过计划的文件列表.
            if SKIP_README_FILE:
                ignore_file_list = [x.lower() for x in IGNORE_FILE_LIST]
                if ignore_file_list and os.path.basename(
                        sub_path).lower() in ignore_file_list:
                    continue

            # 版块与作者(plate=0, author='')的对应.
            if ENABLE_FOLDER_RULE:
                author, plate = Utils.get_info_by_path(sub_path)[:2]
                plate = PLATE_MAP_CONFIG.get(plate)
            else:
                author, plate = '', 0

            # 如有重复记录到日志.
            md5sum = Utils.md5sum(sub_path)
            fid = redis_md5sum.get(md5sum)
            if fid:
                Surplus(sub_path, plate=plate, author=author, md5sum=md5sum,
                        fid=fid).__save(robot_session)
                record_info.info("skipping: %s ==> %s" % (author, sub_path))
                continue

            record_info.info("indexing: %s ==> %s" % (author, sub_path))

            suffix = Utils.get_info_by_path(sub_path)[2]
            key_name = ''.join((uuid.uuid4().get_hex(), suffix))
            entity = Attachment(
                sub_path, key_name, plate=plate, author=author, md5sum=md5sum)
            robot_session.add(entity)
            robot_session.commit()
            redis_md5sum.set(entity.md5sum, entity.id)
Esempio n. 4
0
def spread_match_files(limit=5):
    """对结果入库的数据扫描, 并文件上传.

        :parameter limit: 扫描数据数量
    """

    attachment_entities = robot_session.query(Attachment).filter(
        Attachment.status == 1).order_by(Attachment.id).limit(limit).all()

    def author_uid_and_name(real_name):
        """由真实姓名拼音获取论坛账户(账户Id,账户名称)

            :parameter real_name: 账户名称
        """

        authors = USER_MAP_CONFIG.get(real_name).split("|")
        return int(authors[0]), authors[2]

    if attachment_entities:
        for attachment in attachment_entities:
            # 构建主题, 帖子, 附件
            file_base_name = os.path.basename(attachment.file_name)
            subject = message = os.path.splitext(file_base_name)[0]
            author = author_uid_and_name(attachment.author)
            fid = attachment.plate

            post_info.info("=" * 80)
            post_info.info("正在发帖:%s" % file_base_name)

            tid, pid, aid = spread_info(subject, message, author, fid,
                                        file_name=file_base_name,
                                        attachment=attachment.key_name)

            if tid and pid:
                try:
                    # 更新发帖成功的数据状态, 保存记录
                    attachment.status = 2
                    robot_record = Thread(tid, pid, fid, aid, attachment.id)

                    robot_session.add(attachment)
                    robot_session.add(robot_record)
                    robot_session.commit()
                    post_info.info("发帖成功: OK.")
                except Exception as ex:
                    robot_session.rollback()
                    post_info.exception(ex)
                finally:
                    robot_session.close()
            else:
                post_info.info("发帖失败: Error.")
    else:
        # 如果无数据静默五分钟
        time.sleep(5 * 60)
Esempio n. 5
0
def map_handler(_attachment):
    """使用map函数分发模式.

        :parameter _attachment 文件信息
    """

    _suffix = Utils.get_info_by_path(_attachment.file_name)[2]
    _key_name = ''.join((uuid.uuid4().get_hex(), _suffix))

    upload_info.info("=" * 80)
    upload_info.info("正在上传:%s" % _attachment.file_name)

    try:
        # 上传文件到七牛
        _ret, _info = put_up_datum(key=_key_name,
                                   kind="file",
                                   file_path=_attachment.file_name,
                                   progress_handler=progress_handler)
    except Exception as ex:
        upload_info.exception(ex)
    else:
        upload_info.info(_ret)
        upload_info.info(_info)
        if _ret and _ret["key"] == _key_name:
            try:
                attachment = _attachment.after_upload_action("")
                # 更新上传成功的数据
                robot_session.add(attachment)
                robot_session.commit()
            except Exception as ex:
                robot_session.rollback()
                upload_info.exception(ex)
                upload_error.info(
                    upload_only_log %
                    (_attachment.upload_datetime, _attachment.id))
            else:
                # 移走成功的文件.
                file_name_list = [attachment.file_name]
                try:
                    fileFinished.batch_move(file_name_list)
                except Exception as ex:
                    upload_info.exception(ex)
            finally:
                robot_session.close()
Esempio n. 6
0
def fake_post(gen_data_count=1):
    """虚拟对主题回帖.

        :parameter gen_data_count: 生成数据数量
    """

    for entity in FakePost().generate(gen_data_count):
        uid, tid, fid = entity["uid"], entity["tid"], entity["fid"]
        username, message = entity["username"], entity["message"]

        faker_post_info.info("=" * 80)
        faker_post_info.info("message = %s" % message)
        faker_post_info.info("(%s)正在回帖(%s)" % (username, tid))
        # print("uid = %s; tid = %s; fid = %s; username = %s;" % (
        # uid, tid, fid, username))

        pid = spread_post(uid, tid, fid, username, message)

        if pid:
            try:
                # 更新主题帖最后回帖信息
                forum_thread = forum_session.query(ForumThread).filter(
                    ForumThread.__tid == tid).first()
                forum_thread.__lastposter = username
                forum_thread.__lastpost = int(time.time())
                forum_session.add(forum_thread)
                forum_session.commit()

                post = Post(uid, tid, pid, fid)
                robot_session.add(post)
                robot_session.commit()
            except Exception as ex:
                robot_session.rollback()
                faker_post_info.exception(ex)
                faker_post_info.info("回帖成功但记录失败: OK.")
                time_now = datetime.datetime.now().strftime("%Y-%m-%d %X")
                faker_post_error.info(faker_post_only %
                                      (uid, tid, pid, time_now))
            else:
                faker_post_info.info("回帖成功: OK.")
            finally:
                robot_session.close()
        else:
            faker_post_info.info("回帖失败: Error.")
Esempio n. 7
0
def fake_post(gen_data_count=1):
    """虚拟对主题回帖.

        :parameter gen_data_count: 生成数据数量
    """

    for entity in FakePost().generate(gen_data_count):
        uid, tid, fid = entity["uid"], entity["tid"], entity["fid"]
        username, message = entity["username"], entity["message"]

        faker_post_info.info("=" * 80)
        faker_post_info.info("message = %s" % message)
        faker_post_info.info("(%s)正在回帖(%s)" % (username, tid))
        # print("uid = %s; tid = %s; fid = %s; username = %s;" % (
        # uid, tid, fid, username))

        pid = spread_post(uid, tid, fid, username, message)

        if pid:
            try:
                # 更新主题帖最后回帖信息
                forum_thread = forum_session.query(ForumThread).filter(
                    ForumThread.__tid == tid).first()
                forum_thread.__lastposter = username
                forum_thread.__lastpost = int(time.time())
                forum_session.add(forum_thread)
                forum_session.commit()

                post = Post(uid, tid, pid, fid)
                robot_session.add(post)
                robot_session.commit()
            except Exception as ex:
                robot_session.rollback()
                faker_post_info.exception(ex)
                faker_post_info.info("回帖成功但记录失败: OK.")
                time_now = datetime.datetime.now().strftime("%Y-%m-%d %X")
                faker_post_error.info(
                    faker_post_only % (uid, tid, pid, time_now))
            else:
                faker_post_info.info("回帖成功: OK.")
            finally:
                robot_session.close()
        else:
            faker_post_info.info("回帖失败: Error.")
Esempio n. 8
0
def map_handler(_attachment):
    """使用map函数分发模式.

        :parameter _attachment 文件信息
    """

    _suffix = Utils.get_info_by_path(_attachment.file_name)[2]
    _key_name = ''.join((uuid.uuid4().get_hex(), _suffix))

    upload_info.info("=" * 80)
    upload_info.info("正在上传:%s" % _attachment.file_name)

    try:
        # 上传文件到七牛
        _ret, _info = put_up_datum(key=_key_name,
                                   kind="file",
                                   file_path=_attachment.file_name,
                                   progress_handler=progress_handler)
    except Exception as ex:
        upload_info.exception(ex)
    else:
        upload_info.info(_ret)
        upload_info.info(_info)
        if _ret and _ret["key"] == _key_name:
            try:
                attachment = _attachment.after_upload_action("")
                # 更新上传成功的数据
                robot_session.add(attachment)
                robot_session.commit()
            except Exception as ex:
                robot_session.rollback()
                upload_info.exception(ex)
                upload_error.info(upload_only_log % (
                    _attachment.upload_datetime, _attachment.id))
            else:
                # 移走成功的文件.
                file_name_list = [attachment.file_name]
                try:
                    fileFinished.batch_move(file_name_list)
                except Exception as ex:
                    upload_info.exception(ex)
            finally:
                robot_session.close()
Esempio n. 9
0
def update_name_files(limit=20):
    """更新导入库的索引文件.

        :parameter limit: 每次限制数
    """

    attachment_entities = robot_session.query(Attachment).filter(
        Attachment.key_name == "",
        Attachment.status == 0).order_by(Attachment.id).limit(limit).all()

    result = False
    if attachment_entities:
        for attachment in attachment_entities:
            suffix = Utils.get_info_by_path(attachment.file_name)[2]

            # 生成唯一标识, 防冲突可能从cache比对已有值.
            while True:
                key_name = ''.join((uuid.uuid4().get_hex(), suffix))
                fid = redis_md5sum.get(key_name)
                if not fid:
                    break
            attachment.key_name = key_name

            # 放入cache供后续比对.
            redis_md5sum.set(key_name, 1)

        try:
            robot_session.add_all(attachment_entities)
            robot_session.commit()
        except Exception as ex:
            print(ex)
            robot_session.rollback()
        else:
            print("OK")
        finally:
            robot_session.close()
    else:
        result = True

    return result
Esempio n. 10
0
def update_name_files(limit=20):
    """更新导入库的索引文件.

        :parameter limit: 每次限制数
    """

    attachment_entities = robot_session.query(Attachment).filter(
        Attachment.key_name == "", Attachment.status == 0).order_by(
        Attachment.id).limit(limit).all()

    result = False
    if attachment_entities:
        for attachment in attachment_entities:
            suffix = Utils.get_info_by_path(attachment.file_name)[2]

            # 生成唯一标识, 防冲突可能从cache比对已有值.
            while True:
                key_name = ''.join((uuid.uuid4().get_hex(), suffix))
                fid = redis_md5sum.get(key_name)
                if not fid:
                    break
            attachment.key_name = key_name

            # 放入cache供后续比对.
            redis_md5sum.set(key_name, 1)

        try:
            robot_session.add_all(attachment_entities)
            robot_session.commit()
        except Exception as ex:
            print(ex)
            robot_session.rollback()
        else:
            print("OK")
        finally:
            robot_session.close()
    else:
        result = True

    return result
Esempio n. 11
0
def fake_member(gen_data_count=1):
    """创建虚拟账户.

        gen_data_count的取值建议不要大, 因为不希望在时间点上跳跃性增长.
        :parameter gen_data_count: 生成数据数量
    """

    member_status_data = FakeMemberStatus().generate(gen_data_count)
    member_status_list = [entity for entity in member_status_data]

    for index, entity in enumerate(FakeMember().generate(gen_data_count)):
        username = entity["username"].lower()

        length = random.randint(6, 20)
        random_string = ''.join(
            (entity["password"], str(entity["assist_number"])))
        random_string = [random.choice(random_string) for _ in range(length)]
        password = ''.join(random_string)

        # 用户中心md5后的实际密码.
        salt = "".join([
            random.choice(string.ascii_lowercase + string.digits)
            for _ in range(6)
        ])
        hash_password = Utils.dz_uc_md5(password, salt)

        # 会员表md5后的伪密码.
        fake_password = Utils.md5(str(random.randint(10 * 9, 10**10 - 1)))

        faker_user_info.info("=" * 80)
        faker_user_info.info("正在注册账户:%s" % username)

        try:
            common_member = CommonMember(__groupid=10,
                                         __username=username,
                                         __password=fake_password,
                                         __email=entity["email"],
                                         __regdate=int(time.time()))
            forum_session.add(common_member)
            forum_session.flush()
            uid = common_member.__uid

            center_member = CenterMember(__salt=salt,
                                         __username=username,
                                         __password=hash_password,
                                         __email=entity["email"],
                                         __regdate=int(time.time()),
                                         __uid=uid)
            forum_session.add(center_member)

            status_data = member_status_list[index]
            member_status = CommonMemberStatus(__uid=uid,
                                               __regip=status_data['reg_ip'],
                                               __lastip=status_data['last_ip'],
                                               __lastvisit=int(time.time()),
                                               __lastactivity=int(time.time()))
            forum_session.add(member_status)
            forum_session.commit()

            member = Member(username, password, entity["email"], uid)
            robot_session.add(member)
            robot_session.commit()
        except Exception as ex:
            faker_user_info.exception(ex)
            faker_user_info.info("注册账户失败: Error.")
            forum_session.rollback()
            robot_session.rollback()
        else:
            faker_user_info.info("注册账户成功: OK.")
            CacheService.cache_data_insert_model("common_member", member)
        finally:
            forum_session.close()
            robot_session.close()
Esempio n. 12
0
def upload_match_files(limit=5, loops=True):
    """对结果入库的数据扫描, 并文件上传.

        :parameter limit: 检索数据数量
        :parameter loops: 是否执行完数据再扫描
    """

    attachment_entities = robot_session.query(Attachment).filter(
        Attachment.status == 0).order_by(Attachment.id).limit(limit).all()

    if attachment_entities:
        # map(map_handler, attachment_entities)
        for attachment in attachment_entities:
            errors = False
            upload_info.info("=" * 80)
            upload_info.info("正在上传:%s" % attachment.file_name)

            try:
                # 上传文件到七牛
                ret, info = put_up_datum(key=attachment.key_name,
                                         kind="file",
                                         file_path=attachment.file_name,
                                         progress_handler=progress_handler)
            except Exception as ex:
                errors = True
                upload_info.exception(ex)
            else:
                upload_info.info(ret)
                upload_info.info(info)
                if ret and ret["key"] == attachment.key_name:
                    try:
                        attachment = attachment.after_upload_action("")
                        # 更新上传成功的数据
                        robot_session.add(attachment)
                        robot_session.commit()
                    except Exception as ex:
                        errors = True
                        robot_session.rollback()
                        upload_info.exception(ex)
                        upload_error.log(
                            upload_only_log %
                            (attachment.upload_datetime, attachment.id))
                    else:
                        # 移走成功的文件.
                        file_name_list = [attachment.file_name]
                        try:
                            fileFinished.batch_move(file_name_list)
                        except Exception as ex:
                            errors = True
                            upload_info.exception(ex)
                    finally:
                        robot_session.close()

            # 如果异常, 报警并跳过
            if errors:
                media_instance.play()
                continue
    else:
        # 如果无数据静默五分钟
        time.sleep(5 * 60)
        if loops:
            search_match_files(SEEK_DIRECTORY)
Esempio n. 13
0
def fake_member(gen_data_count=1):
    """创建虚拟账户.

        gen_data_count的取值建议不要大, 因为不希望在时间点上跳跃性增长.
        :parameter gen_data_count: 生成数据数量
    """

    member_status_data = FakeMemberStatus().generate(gen_data_count)
    member_status_list = [entity for entity in member_status_data]

    for index, entity in enumerate(FakeMember().generate(gen_data_count)):
        username = entity["username"].lower()

        length = random.randint(6, 20)
        random_string = ''.join(
            (entity["password"], str(entity["assist_number"])))
        random_string = [random.choice(random_string) for _ in range(length)]
        password = ''.join(random_string)

        # 用户中心md5后的实际密码.
        salt = "".join(
            [random.choice(string.ascii_lowercase + string.digits) for _ in
             range(6)])
        hash_password = Utils.dz_uc_md5(password, salt)

        # 会员表md5后的伪密码.
        fake_password = Utils.md5(str(random.randint(10 * 9, 10 ** 10 - 1)))

        faker_user_info.info("=" * 80)
        faker_user_info.info("正在注册账户:%s" % username)

        try:
            common_member = CommonMember(__groupid=10,
                                         __username=username,
                                         __password=fake_password,
                                         __email=entity["email"],
                                         __regdate=int(time.time()))
            forum_session.add(common_member)
            forum_session.flush()
            uid = common_member.__uid

            center_member = CenterMember(__salt=salt,
                                         __username=username,
                                         __password=hash_password,
                                         __email=entity["email"],
                                         __regdate=int(time.time()),
                                         __uid=uid)
            forum_session.add(center_member)

            status_data = member_status_list[index]
            member_status = CommonMemberStatus(__uid=uid,
                                               __regip=status_data['reg_ip'],
                                               __lastip=status_data['last_ip'],
                                               __lastvisit=int(time.time()),
                                               __lastactivity=int(time.time()))
            forum_session.add(member_status)
            forum_session.commit()

            member = Member(username, password, entity["email"], uid)
            robot_session.add(member)
            robot_session.commit()
        except Exception as ex:
            faker_user_info.exception(ex)
            faker_user_info.info("注册账户失败: Error.")
            forum_session.rollback()
            robot_session.rollback()
        else:
            faker_user_info.info("注册账户成功: OK.")
            CacheService.cache_data_insert_model("common_member", member)
        finally:
            forum_session.close()
            robot_session.close()
Esempio n. 14
0
def upload_match_files(limit=5, loops=True):
    """对结果入库的数据扫描, 并文件上传.

        :parameter limit: 检索数据数量
        :parameter loops: 是否执行完数据再扫描
    """

    attachment_entities = robot_session.query(Attachment).filter(
        Attachment.status == 0).order_by(Attachment.id).limit(limit).all()

    if attachment_entities:
        # map(map_handler, attachment_entities)
        for attachment in attachment_entities:
            errors = False
            upload_info.info("=" * 80)
            upload_info.info("正在上传:%s" % attachment.file_name)

            try:
                # 上传文件到七牛
                ret, info = put_up_datum(
                    key=attachment.key_name,
                    kind="file",
                    file_path=attachment.file_name,
                    progress_handler=progress_handler)
            except Exception as ex:
                errors = True
                upload_info.exception(ex)
            else:
                upload_info.info(ret)
                upload_info.info(info)
                if ret and ret["key"] == attachment.key_name:
                    try:
                        attachment = attachment.after_upload_action("")
                        # 更新上传成功的数据
                        robot_session.add(attachment)
                        robot_session.commit()
                    except Exception as ex:
                        errors = True
                        robot_session.rollback()
                        upload_info.exception(ex)
                        upload_error.log(upload_only_log % (
                            attachment.upload_datetime, attachment.id))
                    else:
                        # 移走成功的文件.
                        file_name_list = [attachment.file_name]
                        try:
                            fileFinished.batch_move(file_name_list)
                        except Exception as ex:
                            errors = True
                            upload_info.exception(ex)
                    finally:
                        robot_session.close()

            # 如果异常, 报警并跳过
            if errors:
                media_instance.play()
                continue
    else:
        # 如果无数据静默五分钟
        time.sleep(5 * 60)
        if loops:
            search_match_files(SEEK_DIRECTORY)