Example #1
0
    def __init__(self,
                 logdir,
                 experimental_name,
                 *,
                 save_model_with_input=None):
        """
        :param logdir:
        :param experimental_name:
        :param save_model_with_input: 默认不存储模型结构,当开启该参数时,
        """
        from pyxllib.prog.pupil import check_install_package
        check_install_package('visualdl')
        from visualdl import LogWriter

        super().__init__()
        # 这样奇怪地加后缀,是为了字典序后,每个实验的train显示在eval之前
        d = XlPath(logdir) / (experimental_name + '_train')
        # if d.exists(): shutil.rmtree(d)
        self.write = LogWriter(logdir=str(d))
        d = XlPath(logdir) / (experimental_name + '_val')
        # if d.exists(): shutil.rmtree(d)
        self.eval_writer = LogWriter(logdir=str(d))
        self.eval_times = 0

        self.save_model_with_input = save_model_with_input
Example #2
0
def ensure_content(ob=None, encoding=None):
    """
    :param ob:
        未输入:从控制台获取文本
        存在的文件名:读取文件的内容返回
            tex、py、
            docx、doc
            pdf
        有read可调用成员方法:返回f.read()
        其他字符串:返回原值
    :param encoding: 强制指定编码
    """
    # TODO: 如果输入的是一个文件指针,也能调用f.read()返回所有内容
    # TODO: 增加鲁棒性判断,如果输入的不是字符串类型也要有出错判断
    if ob is None:
        return sys.stdin.read()  # 注意输入是按 Ctrl + D 结束
    elif File(ob):  # 如果存在这样的文件,那就读取文件内容(bug点:如果输入是目录名会PermissionError)
        if ob.endswith('.docx'):  # 这里还要再扩展pdf、doc文件的读取
            # 安装详见: https://blog.csdn.net/code4101/article/details/79328636
            check_install_package('textract')
            text = textract.process(ob)
            return text.decode('utf8', errors='ignore')
        elif ob.endswith('.doc'):
            raise NotImplementedError
        elif ob.endswith('.pdf'):
            raise NotImplementedError
        else:  # 按照普通的文本文件读取内容
            return readtext(ob, encoding)
    else:  # 判断不了的情况,也认为是字符串
        return ob
Example #3
0
    def __init__(self, file, mode=None):
        """
        :param file: 要处理的文件
        :param mode: 要处理的格式,不输入会有一套智能匹配算法
            'rar':
            'zip': docx后缀的,默认采用zip格式解压
        """
        # 1 确定压缩格式
        name, ext = os.path.splitext(file)
        ext = ext.lower()
        if not mode:
            if ext in ('.docx', '.zip'):
                mode = 'zip'
            elif ext == '.rar':
                mode = 'rar'
            else:
                dprint(ext)  # 从文件扩展名无法得知压缩格式
                raise ValueError
        self.mode = mode

        # 2 确定是用的解压“引擎”
        if mode == 'zip':
            self.proc = zipfile.ZipFile(file)
        elif mode == 'rar':
            # 安装详见: https://blog.csdn.net/code4101/article/details/79328636
            check_install_package('unrar')
            from unrar.rarfile import RarFile
            self.proc = RarFile(file)
        # 3 解压文件夹目录,None表示还未解压
        self.tempfolder = None
Example #4
0
    def to_pdf(cls, docx_file, pdf_file=None):
        check_install_package('docx2pdf')  # 安装不成功的时候可以考虑加参数:--user
        import docx2pdf

        if pdf_file is None:
            pdf_file = docx_file.with_suffix('.pdf')

        docx2pdf.convert(str(docx_file), str(pdf_file))
        return pdf_file
Example #5
0
def type_text(text):
    """ 打印出文本内容

    相比pyautogui.write,这里支持中文等unicode格式

    这种需求一般也可以用剪切板实现,是剪切板不够静默、quit
    """
    check_install_package('pynput')
    from pynput.keyboard import Controller

    keyboard = Controller()
    keyboard.type(text)
Example #6
0
    def to_docx(self, docx_file=None):
        """ pdf转docx """
        check_install_package('pdf2docx')
        from pdf2docx import parse

        pdf_file = self.src_file

        if docx_file is None:
            docx_file = pdf_file.with_suffix('.docx')

        # 注意这里是日志显示进度,不是printf输出.
        parse(str(pdf_file), str(docx_file))
Example #7
0
    def f1_score(self, average='weighted'):
        """ 多分类任务是用F1分值 https://zhuanlan.zhihu.com/p/64315175

        :param average:
            weighted:每一类都算出f1,然后(按样本数)加权平均
            macro:每一类都算出f1,然后求平均值(样本不均衡下,有的类就算只出现1次,也会造成极大的影响)
            micro:按二分类形式直接计算全样本的f1,等价于accuracy
            all:我自己扩展的格式,会返回三种结果的字典值
        """
        check_install_package('sklearn', 'scikit-learn')
        from sklearn.metrics import f1_score

        if average == 'all':
            return {
                f'f1_{k}': self.f1_score(k)
                for k in ('weighted', 'macro', 'micro')
            }
        else:
            return round(f1_score(self.gt, self.pred, average=average), 4)
Example #8
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Author : 陈坤泽
# @Email  : [email protected]
# @Date   : 2020/06/15
"""
oss2 · PyPI: https://pypi.org/project/oss2/
"""

from pyxllib.prog.pupil import check_install_package

check_install_package('oss2')

import oss2

from pyxllib.file.specialist import File


class OssBucket:
    def __init__(self, bucket_name, endpoint, access_key_id,
                 access_key_secret):
        self.bucket = oss2.Bucket(oss2.Auth(access_key_id, access_key_secret),
                                  endpoint, bucket_name)

    def upload(self, key, localfile, if_exists='replace', force=False):
        """ 如果云端已存在,默认会进行覆盖

        :param key: 上传后存储的文件名
        :param localfile: 本地文件
        :param if_exists:
            replace, 如果oss上已存在也替换掉
Example #9
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Author : 陈坤泽
# @Email  : [email protected]
# @Date   : 2021/06/06 17:00

from pyxllib.prog.pupil import check_install_package

# 拼写检查库,即词汇库
#   spellchecker模块主要有两个类,SpellChecker和WordFrequency
#       WordFrequency是一个词频类
#       一般导入SpellChecker就行了:from spellchecker import SpellChecker
check_install_package('pyspellchecker')

from spellchecker import SpellChecker

from pyxllib.debug.pupil import dprint


class MySpellChecker(SpellChecker):
    """
    拼写检查
    190923周一21:54,源自 完形填空ocr 识别项目
    """
    def __init__(self,
                 language="en",
                 local_dictionary=None,
                 distance=2,
                 tokenizer=None,
                 case_sensitive=False,
                 df=None):
Example #10
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Author : 陈坤泽
# @Email  : [email protected]
# @Date   : 2021/04/04 17:03
""" 专门给utools的快捷命令扩展的一系列python工具库
"""
from pyxllib.prog.pupil import check_install_package

check_install_package('fire')
check_install_package('humanfriendly')
check_install_package('pandas')
check_install_package('pyautogui',
                      'PyAutoGui')  # 其实pip install不区分大小写,不过官方这里安装是驼峰名

import pathlib
import pyperclip
import re
import datetime
import json
import os

import fire
from humanfriendly import format_timespan
import pandas as pd
import pyautogui

from pyxllib.robot.autogui import type_text, clipboard_decorator
from pyxllib.file.specialist import File, Dir
from pyxllib.debug.specialist import browser, TicToc, parse_datetime
Example #11
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Author : 陈坤泽
# @Email  : [email protected]
# @Date   : 2021/08/31 09:56

from pyxllib.prog.pupil import check_install_package

check_install_package('win32com', 'pypiwin32')

import json
import os
import re

import pythoncom
from win32com.client import constants
import win32com.client as win32

from pyxllib.prog.newbie import RunOnlyOnce
from pyxllib.prog.pupil import DictTool, EnchantBase, EnchantCvt
from pyxllib.text.pupil import strwidth
from pyxllib.debug.specialist import File, Dir, get_etag, browser


def __docx():
    """ python-docx 相关封装
    """
    pass


class DocxTools:
Example #12
0
代码中,gt指ground truth,真实标注
    dt指detection,模型检测出来的结果

除了 label.py 中定义的
    CocoGtData 专门处理 gt 格式数据
    CocoData 同时处理 gt dt 格式数据
这里对外有两个类
    CocoEval 计算coco指标
    CocoMatch 进行一些高级的结果分析
        生成的结果可以用 xllabelme 打开 (pip install xllabelme)
"""

from pyxllib.prog.pupil import check_install_package

check_install_package('xlcocotools')

from collections import ChainMap, defaultdict, Counter
import copy
import json
import os
import pathlib
import random
import sys

import pandas as pd
from PIL import Image
from tqdm import tqdm

from pyxllib.stdlib.zipfile import ZipFile
from pyxllib.prog.newbie import round_int
Example #13
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Author : 陈坤泽
# @Email  : [email protected]
# @Date   : 2020/06/02 16:06

from pyxllib.prog.pupil import check_install_package

check_install_package('fitz', 'PyMuPdf>=1.18.17')

import json
import os
import pprint
import re

import fitz

from pyxllib.prog.newbie import round_int, RunOnlyOnce, decode_bitflags
from pyxllib.prog.pupil import DictTool, EnchantBase, EnchantCvt
from pyxllib.algo.newbie import round_unit
from pyxllib.algo.pupil import get_number_width
from pyxllib.file.specialist import File, Dir, writefile, get_etag
from pyxllib.debug.pupil import dprint
from pyxllib.debug.specialist import browser
from pyxllib.cv.expert import xlcv, xlpil
from pyxllib.data.labelme import LabelmeDict


def __fitz():
    print(fitz.__doc__)
Example #14
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Author : 陈坤泽
# @Email  : [email protected]
# @Date   : 2021/06/03 14:26
"""
并查集相关功能
"""

from pyxllib.prog.pupil import check_install_package

check_install_package('disjoint_set', 'disjoint-set==0.6.3')

from itertools import combinations

from disjoint_set import DisjointSet


def disjoint_set(items, join_checker):
    """ 按照一定的相连规则分组

    :param items: 项目清单
    :param join_checker: 检查任意两个对象是否相连,进行分组
    :return:

    算法:因为会转成下标,按照下标进行分组合并,所以支持items里有重复值,或者unhashable对象

    >>> disjoint_set([-1, -2, 2, 0, 0, 1], lambda x, y: x*y>0)
    [[-1, -2], [2, 1], [0], [0]]
    """
Example #15
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Author : 陈坤泽
# @Email  : [email protected]
# @Date   : 2021/05/26 17:24

from pyxllib.prog.pupil import check_install_package

check_install_package('qtpy', 'QtPy')

import json
import os.path as osp
import sys
import time

from PyQt5.QtCore import pyqtSignal
from qtpy import QtWidgets
from qtpy import QtGui
from qtpy.QtWidgets import QFrame, QInputDialog, QApplication

from pyxllib.prog.newbie import CvtType

here = osp.dirname(osp.abspath(__file__))


class QHLine(QFrame):
    """ https://stackoverflow.com/questions/5671354/how-to-programmatically-make-a-horizontal-line-in-qt """
    def __init__(self):
        super(QHLine, self).__init__()
        self.setFrameShape(QFrame.HLine)
        self.setFrameShadow(QFrame.Sunken)
Example #16
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Author : 陈坤泽
# @Email  : [email protected]
# @Date   : 2020/06/06

from pyxllib.prog.pupil import check_install_package

check_install_package('pyautogui')
check_install_package('keyboard')
check_install_package('klembord')

from collections import defaultdict
import json
import os
import time

import numpy as np
from pandas.api.types import is_list_like
import pyautogui
import pyscreeze  # NOQA pyautogui安装的时候会自动安装依赖的pyscreeze

from pyxllib.prog.newbie import first_nonnone, round_int
from pyxllib.prog.pupil import xlwait, DictTool, check_install_package
from pyxllib.algo.geo import ComputeIou, ltrb2xywh, xywh2ltrb
from pyxllib.algo.shapelylib import ShapelyPolygon
from pyxllib.file.specialist import File, Dir
from pyxllib.debug.specialist import TicToc
from pyxllib.cv.expert import xlcv, xlpil
from pyxllib.data.labelme import LabelmeDict
Example #17
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Author : 陈坤泽
# @Email  : [email protected]
# @Date   : 2021/06/06 17:01

from pyxllib.prog.pupil import check_install_package

# 这个需要C++14编译器 https://download.microsoft.com/download/5/f/7/5f7acaeb-8363-451f-9425-68a90f98b238/visualcppbuildtools_full.exe
# 在需要的时候安装,防止只是想用pyxllib很简单的功能,但是在pip install阶段处理过于麻烦
# MatchSimString计算编辑距离需要
check_install_package('Levenshtein', 'python-Levenshtein')

import Levenshtein
import pandas as pd

from pyxllib.text.pupil import briefstr
from pyxllib.debug.specialist.common import dataframe_str


class MatchSimString:
    """匹配近似字符串

    mss = MatchSimString()

    # 1 添加候选对象
    mss.append_candidate('福州+厦门2018初数暑假讲义-请录入-快乐学习\初一福厦培优-测试用')
    mss.append_candidate('2018_快乐数学_六年级_秋季_第01讲_圆柱与圆锥_教案(教师版)')
    mss.append_candidate('删除所有标签中间多余的空白')

    # 2 需要匹配的对象1
Example #18
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Author : 陈坤泽
# @Email  : [email protected]
# @Date   : 2021/06/08 22:53
"""
TODO 写一些图片相似度相关功能
"""

from pyxllib.prog.pupil import check_install_package

check_install_package('imagehash', 'ImageHash')

import imagehash
Example #19
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Author : 陈坤泽
# @Email  : [email protected]
# @Date   : 2020/05/30 21:14

"""
百度人工智能API接口
"""

from pyxllib.prog.pupil import check_install_package

check_install_package('aip', 'baidu-aip')

import aip
import base64
import cv2

from pyxllib.prog.pupil import is_url
from pyxllib.prog.specialist import XlOsEnv
from pyxllib.debug.specialist import TicToc
from pyxllib.cv.expert import xlcv


class AipOcr(aip.AipOcr):
    """
    封装该类
        目的1:合并输入文件和url的识别
        目的2:带透明底的png百度api识别不了,要先转成RGB格式
    """
Example #20
0
使用gitpython库,在python调用git进行一些版本分析的功能

Git
    list_commits,输出仓库的commit历史记录
    bcompare,对比一个文件在不同版本的内容,也会输出这个文件的历史commit清单
        show,获得一个文件某个版本的文本

TODO 清单
1、输入一个sha,分析某一次commit的细节(GUI有相应功能,不紧急)
2、按照周几、24小时制、时间轴等判断提交频率,结合files_changed、insertions、deletions判断工作量(不紧急)
3、将数据以图片的直观形式展现
"""

from pyxllib.prog.pupil import check_install_package

check_install_package('git', 'gitpython')

import os
import re

import git
import pandas as pd

from pyxllib.prog.newbie import swap_rowcol
from pyxllib.text.pupil import digit2weektag
from pyxllib.file.specialist import Dir, File, filesmatch
from pyxllib.debug.pupil import dprint
from pyxllib.debug.specialist import dataframe_str, bcompare


class Git:
Example #21
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Author : 陈坤泽
# @Email  : [email protected]
# @Date   : 2021/06/06 16:57

from pyxllib.prog.pupil import check_install_package

check_install_package('ahocorasick', 'pyahocorasick')

from collections import Counter
import re

import ahocorasick


def make_automaton(words):
    """ 根据输入的一串words模式,生成一个AC自动机 """
    a = ahocorasick.Automaton()
    for index, word in enumerate(words):
        a.add_word(word, (index, word))
    a.make_automaton()
    return a


def count_words(content, word, scope=2, exclude=None):
    # 1 统计所有词汇出现次数
    c = Counter()
    c += Counter(re.findall(f'.{{,{scope}}}{word}.{{,{scope}}}', content))
    # 2 排除掉不处理的词 (注意因为这里每句话都已经是被筛选过的,所以处理比较简单,并不需要复杂到用区间集处理)
    if exclude:
Example #22
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Author : 陈坤泽
# @Email  : [email protected]
# @Date   : 2020/06/03 09:52

from pyxllib.prog.pupil import check_install_package

check_install_package('bidict')
check_install_package('sqlalchemy')
check_install_package('mysqlclient')

import math

from bidict import bidict
import pandas as pd
import sqlalchemy

from pyxllib.file.specialist import File

SQL_LIB_ACCOUNT_FILE = File(__file__).parent / 'sqllibaccount.pkl'


def create_account_df(file='sqllibaccount.pkl'):
    """请在这里设置您个人的账户密码,并在运行完后,销毁明文信息"""
    df = pd.DataFrame.from_records(
        [
            ['ckz', 'rm.sbsql.rds.aliyuncs.com', '', '', 'dddddd'],
            ['ckzlocal', '0.0.0.0', '', '', 'eeeeee'],
        ],
        columns=['index_name', 'host', 'port', 'user', 'passwd'])
Example #23
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Author : 陈坤泽
# @Email  : [email protected]
# @Date   : 2020/06/02
"""
扩展了些自己的openpyxl工具
"""

from pyxllib.prog.pupil import check_install_package

check_install_package('openpyxl')
check_install_package('premailer')
check_install_package('xlrd2')
check_install_package('yattag')

import re

import openpyxl
from openpyxl import Workbook
from openpyxl.cell.cell import MergedCell
from openpyxl.styles import Font
from openpyxl.utils.cell import get_column_letter
import pandas as pd

from pyxllib.prog.newbie import RunOnlyOnce
from pyxllib.prog.pupil import EnchantBase, EnchantCvt
from pyxllib.algo.specialist import product
from pyxllib.debug.pupil import dprint
from pyxllib.debug.specialist import browser
Example #24
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Author : 陈坤泽
# @Email  : [email protected]
# @Date   : 2021/06/03 20:41

from pyxllib.prog.pupil import check_install_package

check_install_package('paramiko')
check_install_package('scp')
# 对 paramiko 进一步封装的库
# check_install_package('fabric')

import os
import re
import pathlib

import paramiko
from tqdm import tqdm
import scp as scplib
import humanfriendly

from pyxllib.algo.pupil import natural_sort
from pyxllib.file.specialist import XlPath
from pyxllib.debug.specialist import get_xllog

logger = get_xllog('location')


class SshCommandError(Exception):
    pass