Beispiel #1
0

    def sql_name(self):
        data_count = {}
        data_count["patent_count"] = self.patent_count
        data_count["caiding_count"] = self.caiding_count
        return data_count

    def run(self):
        data = self.com_true()
        return data

if __name__ == '__main__':
    db = OracleOperation()
    db1 = OracleOperation()
    db.execute_sql('select count(*) from jugement_table_0807')
    count_bar = db.get_one()[0]
    # _bar = tqdm(total=count_bar)
    db.execute_sql('select * from jugement_table_0807')
    null_count = 0
    panjue_count = 0
    all_count = 0
    caiding_count = 0
    patent_count = 0
    print('开始--------')
    while True :
        data = db.get_one()
        if not data:
            break
        # _bar.update(1)
        all_count+=1
Beispiel #2
0
# coding:utf-8
from tools.shujuku_conn.oracle_conn import OracleOperation
import re

db = OracleOperation()
DATA = db.execute_sql(
    "SELECT case_id,clear_about_patent FROM jugement_table_0728_re_qx where identify='专利号'"
)
data = db.get_data()
num = 0
#57687
for concent in data:
    case_id = concent[0]
    try:
        fmt_content_n = concent[1]
        data_num = []
        num_1_1 = re.findall(
            '[ZZzzCCcc][LLllNNnn](8[5-9][1-3]\d{5})\.?[\dXxXx×]?',
            fmt_content_n)
        num_1_2 = re.findall(
            '专利号为?是?\s?:?[ZZzzCCcc]?[LLllNNnn]?(8[5-9][1-3]\d{5})\.?[\dXxXx×]?',
            fmt_content_n)
        num_1_3 = re.findall(
            '申请号为?是?\s?:?[ZZzzCCcc]?[LLllNNnn]?(8[5-9][1-3]\d{5})\.?[\dXxXx×]?',
            fmt_content_n)
        num_1_4 = re.findall(
            '[ZZzzCCcc][LLllNNnn](9[0-9][1-3]\d{5})\.?[\dXxXx×]?',
            fmt_content_n)
        num_1_5 = re.findall(
            '专利号为?是?\s?:?[ZZzzCCcc]?[LLllNNnn]?(9[0-9][1-3]\d{5})\.?[\dXxXx×]?',
            fmt_content_n)
Beispiel #3
0
class Patent_num(object):
    def __init__(self):
        self.db = OracleOperation()
        self.db1 = OracleOperation()
        self.case_id = ''
        self.clear_one = ''
        self.clear_one_name = ''
        self.case_reason = ''
        self.case_name = ''
        self.num = 0

    def _sqlexe_num(self):
        self.db.execute_sql(
            "select count(*)  from jugement_table_0807_re_qx where identify='专利号'"
        )
        count_bar = self.db.get_one()[0]
        _bar = tqdm(total=count_bar)
        self.db.execute_sql(
            "select * from jugement_table_0807_re_qx where identify='专利号'")
        while True:
            data = self.db.get_one()
            _bar.update(1)
            if not data:
                _bar.close()
                break
            self.case_id = data[0]
            self.case_reason = data[1]
            self.case_name = data[2]
            self.clear_one = data[3]
            re_patent_num = self.re_num()
            try:
                sql1 = "insert into  jugement_table_0807_re_qx_bei values ('{}','{}','{}','{}','专利号','{}')".format(
                    self.case_id, self.case_reason, self.case_name,
                    self.clear_one, re_patent_num)
                self.db1.execute_sql(sql1)
                self.num += 1
            except:
                with open('0807.txt', 'a', encoding='utf-8') as e:
                    e.write(self.case_id)
                    e.write('\n')

    def _sqlexe_name(self):
        self.db.execute_sql(
            "select count(*)  from jugement_table_0807_re_qx where identify='专利名称'"
        )
        count_bar = self.db.get_one()[0]
        _bar = tqdm(total=count_bar)
        self.db.execute_sql(
            "select * from jugement_table_0807_re_qx where identify='专利名称'")
        while True:
            data = self.db.get_one()
            _bar.update(1)
            if not data:
                _bar.close()
                break
            self.case_id = data[0]
            self.case_reason = data[1]
            self.case_name = data[2]
            self.clear_one_name = data[3]
            re_patent_num = self.re_name()
            try:
                sql1 = "insert into  jugement_table_0807_re_qx_name values ('{}','{}','{}','{}','专利名称','{}')".format(
                    self.case_id, self.case_reason, self.case_name,
                    self.clear_one_name, re_patent_num)
                self.db1.execute_sql(sql1)
                self.num += 1
            except:
                with open('0807.txt', 'a', encoding='utf-8') as e:
                    e.write(self.case_id)
                    e.write('\n')

    def re_num(self):
        fmt_content_n = self.clear_one
        #8开头
        num_1_1 = re.findall(
            '(^[8XxXx×][5-9XxXx×][12389XxXx×][\dXxXx×]{5})\.?[\dXxXx×]?',
            fmt_content_n)
        num_1_2 = re.findall(
            '[ZZzzCCcc][LLllNNnn]([8XxXx×][5-9XxXx×][12389XxXx×][\dXxXx×]{5})\.?[\dXxXx×]?',
            fmt_content_n)
        num_1_3 = re.findall('[”‘“"《]([^”"’“‘,;。《]+)[“’”"》]', fmt_content_n)
        num_1_4 = re.findall(
            '[^\dLLllNNnnXxXx×]([8XxXx×][5-9XxXx×][12389XxXx×][\dXxXx×]{5})\.?[\dXxXx×]?',
            fmt_content_n)

        # 9开头
        num_2_1 = re.findall(
            '(^[9XxXx×][0-9XxXx×][12389XxXx×][\dXxXx×]{5})\.?[\dXxXx×]?',
            fmt_content_n)
        num_2_2 = re.findall(
            '[ZZzzCCcc][LLllNNnn]([9XxXx×][0-9XxXx×][12389XxXx×][\dXxXx×]{5})\.?[\dXxXx×]?',
            fmt_content_n)
        num_2_3 = re.findall(
            '([^\dLLllNNnnXxXx×][9XxXx×][0-9XxXx×][12389XxXx×][\dXxXx×]{5})\.?[\dXxXx×]?',
            fmt_content_n)

        #0开头
        num_3_1 = re.findall(
            '(^[0XxXx×][0-3XxXx×][12389XxXx×][\dXxXx×]{5})\.?[\dXxXx×]?',
            fmt_content_n)
        num_3_2 = re.findall(
            '[ZZzzCCcc][LLllNNnn]([0XxXx×][0-3XxXx×][12389XxXx×][\dXxXx×]{5})\.?[\dXxXx×]?',
            fmt_content_n)
        num_3_3 = re.findall(
            '[^\dLLllNNnnXxXx×]([0XxXx×][0-3XxXx×][12389XxXx×][\dXxXx×]{5})\.?[\dXxXx×]?',
            fmt_content_n)

        #200x
        num_4_1 = re.findall(
            '(^[2XxXx×][0XxXx×][0XxXx×][3-9XxXx×][12389XxXx×][\dXxXx×]{7})\.?[\dXxXx×]?',
            fmt_content_n)
        num_4_2 = re.findall(
            '[ZZzzCCcc][LLllNNnn]([2XxXx×][0XxXx×][0XxXx×][3-9XxXx×][12389XxXx×][\dXxXx×]{7})\.?[\dXxXx×]?',
            fmt_content_n)
        num_4_3 = re.findall(
            '[^\dLLllNNnnXxXx×]([2XxXx×][0XxXx×][0XxXx×][3-9XxXx×][12389XxXx×][\dXxXx×]{7})\.?[\dXxXx×]?',
            fmt_content_n)

        #201x
        num_5_1 = re.findall(
            '[^\dLLllNNnnXxXx×]([2XxXx×][0XxXx×][1XxXx×][0-9XxXx×][12389XxXx×][\dXxXx×]{7})\.?[\dXxXx×]?',
            fmt_content_n)
        num_5_2 = re.findall(
            '[ZZzzCCcc][LLllNNnn]([2XxXx×][0XxXx×][1XxXx×][0-9XxXx×][12389XxXx×][\dXxXx×]{7})\.?[\dXxXx×]?',
            fmt_content_n)
        num_5_3 = re.findall(
            '(^[2XxXx×][0XxXx×][1XxXx×][0-9XxXx×][12389XxXx×][\dXxXx×]{7})\.?[\dXxXx×]?',
            fmt_content_n)

        #2020
        num_6_1 = re.findall(
            '(^[2XxXx×][0XxXx×][2XxXx×][0XxXx×][12389XxXx×][\dXxXx×]{7})\.?[\dXxXx×]?',
            fmt_content_n)
        num_6_2 = re.findall(
            '[ZZzzCCcc][LLllNNnn]([2XxXx×][0XxXx×][2XxXx×][0XxXx×][12389XxXx×][\dXxXx×]{7})\.?[\dXxXx×]?',
            fmt_content_n)
        num_6_3 = re.findall(
            '[^\dLLllNNnnXxXx×]([2XxXx×][0XxXx×][2XxXx×][0XxXx×][12389XxXx×][\dXxXx×]{7})\.?[\dXxXx×]?',
            fmt_content_n)

        #全部匹配
        num_7_1 = re.findall(
            '专利号?为?是?\s?:?[ZZzzCCcc]?[LLllNNnn]?([\dXxXx×]{8,12})\.?[\dXxXx×]?',
            fmt_content_n)
        num_7_2 = re.findall(
            '申请号?为?是?\s?:?[ZZzzCCcc]?[LLllNNnn]?([\dXxXx×]{8,12})\.?[\dXxXx×]?',
            fmt_content_n)
        num_7_3 = re.findall(
            '外观设计为?是?\s?:?[ZZzzCCcc]?[LLllNNnn]?([\dXxXx×]{8,12})\.?[\dXxXx×]?',
            fmt_content_n)
        num_7_4 = re.findall(
            '发明为?是?\s?:?ZZzzCCcc]?[LLllNNnn]?([\dXxXx×]{8,12})\.?[\dXxXx×]?',
            fmt_content_n)
        num_7_5 = re.findall(
            '实用新型为?是?\s?:?[ZZzzCCcc]?[LLllNNnn]?([\dXxXx×]{8,12})\.?[\dXxXx×]?',
            fmt_content_n)
        num_7_6 = re.findall(
            '涉案为?是?\s?:?[ZZzzCCcc]?[LLllNNnn]?([\dXxXx×]{8,12})\.?[\dXxXx×]?',
            fmt_content_n)
        num_7_7 = re.findall(
            '[ZZzzCCcc]?[LLllNNnn]([\dXxXx×]{8,12})\.?[\dXxXx×]?',
            fmt_content_n)
        num_7_8 = re.findall(
            '[^\dLLllNNnnXxXx×]([\dXxXx×]{8,12})\.?[\dXxXx×]?', fmt_content_n)

        # num_7_1 = re.findall('[^\dLLllNNnn]([\dXxXx×]{8,12}\.?[\dXxXx×]?)', fmt_content_n)
        # num_7_2 = re.findall('[ZZzzCCcc][LLllNNnn]{8,12}\.?[\dXxXx×]?',fmt_content_n)
        # num_7_3 = re.findall('[ZZzzCCcc]?[LLllNNnn]?[\dXxXx×]{8,12}\.?[\dXxXx×]?号?、?名?称?为?[‘“"《][^”,;。《]+[’”"》]',fmt_content_n)
        # num_7_4 = re.findall('[‘“"《][^”,;。《]+[’”"》]的?第[ZZzzCCcc]?[LLllNNnn]?[\dXxXx×]{8,12}\.?[\dXxXx×]?号',fmt_content_n)

        #拼接并去重
        data_num = num_1_1 + num_1_2 + num_1_4 + \
                   num_2_1 + num_2_2 + num_2_3 + \
                   num_3_1 + num_3_2 + num_3_3 + \
                   num_4_1 + num_4_2 + num_4_3 + \
                   num_5_1 + num_5_2 + num_5_3 + \
                   num_6_1 + num_6_2 + num_6_3 + \
                   num_7_1 + num_7_2 + num_7_3 + num_7_4 + \
                   num_7_5 + num_7_6 + num_7_7 + num_7_8
        if len(data_num) > 0:
            data_num_set = list(set(data_num))
            test_num = []
            for i in data_num_set:
                if len(i) > 12:
                    i.replace(i[12:], '')
                    test_num.append(i)
                else:
                    test_num.append(i)
            test_num_add = test_num + num_1_3
            test_str_num = ';'.join(test_num_add)
        else:
            test_str_num = ''
        return test_str_num

    def re_name(self):
        fmt_content_n = self.clear_one_name
        name_1_1 = re.findall('[”‘“"《]([^”"’“‘,;。《]+)[“’”"》][,、]?\w{0,2}专利',
                              fmt_content_n)
        name_1_2 = re.findall('[”‘“"《]([^”"’“‘,;。《]+)[“’”"》][,、]?\w{0,2}发明',
                              fmt_content_n)
        name_1_3 = re.findall('[”‘“"《]([^”"’“‘,;。《]+)[“’”"》][,、]?\w{0,2}实用新型',
                              fmt_content_n)
        name_1_4 = re.findall('[”‘“"《]([^”"’“‘,;。《]+)[“’”"》][,、]?\w{0,2}外观',
                              fmt_content_n)
        name_1_5 = re.findall('[”‘“"《]([^”"’“‘,;。《]+)[“’”"》][,、]?\w{0,2}申请',
                              fmt_content_n)

        name_1_1_1 = re.findall('[”‘“"《]([^”,;。《]+)[“’”"》][,、]?\w{0,2}专利',
                                fmt_content_n)
        name_1_1_2 = re.findall('[”‘“"《]([^”,;。《]+)[“’”"》][,、]?\w{0,2}发明',
                                fmt_content_n)
        name_1_1_3 = re.findall('[”‘“"《]([^”,;。《]+)[“’”"》][,、]?\w{0,2}实用新型',
                                fmt_content_n)
        name_1_1_4 = re.findall('[”‘“"《]([^”,;。《]+)[“’”"》][,、]?\w{0,2}外观',
                                fmt_content_n)
        name_1_1_5 = re.findall('[”‘“"《]([^”,;。《]+)[“’”"》][,、]?\w{0,2}申请',
                                fmt_content_n)

        name_2_1 = re.findall('发明名?称?为?[”‘“"《]([^”"’“‘,;。《]+)[“’”"》]',
                              fmt_content_n)
        name_2_2 = re.findall('专利名?称?为?[”‘“"《]([^”"’“‘,;。《]+)[“’”"》]',
                              fmt_content_n)
        name_2_3 = re.findall('实用新型名?称?为?[”‘“"《]([^”"’“‘,;。《]+)[“’”"》]',
                              fmt_content_n)
        name_2_4 = re.findall('外观设?计?名?称?为?[”‘“"《]([^”"’“‘,;。《]+)[“’”"》]',
                              fmt_content_n)
        name_2_5 = re.findall('申请名?称?为?[”‘“"《]([^”"’“‘,;。《]+)[“’”"》]',
                              fmt_content_n)

        name_2_1_1 = re.findall('发明名?称?为?[”‘“"《]([^”,;。《]+)[“’”"》]',
                                fmt_content_n)
        name_2_2_2 = re.findall('专利名?称?为?[”‘“"《]([^”,;。《]+)[“’”"》]',
                                fmt_content_n)
        name_2_3_3 = re.findall('实用新型名?称?为?[”‘“"《]([^”,;。《]+)[“’”"》]',
                                fmt_content_n)
        name_2_4_4 = re.findall('外观设?计?名?称?为?[”‘“"《]([^”,;。《]+)[“’”"》]',
                                fmt_content_n)
        name_2_5_5 = re.findall('申请名?称?为?[”‘“"《]([^”,;。《]+)[“’”"》]',
                                fmt_content_n)

        name_3_1 = re.findall('名称为?[”‘“"《]([^”"’“‘,;。《]+)[“’”"》]',
                              fmt_content_n)
        name_3_1_1 = re.findall('名称为?[”‘“"《]([^”,;。《]+)[“’”"》]', fmt_content_n)

        name_3_2 = re.findall('名称为?([^。”;"’“‘]+)[的、,((]?\w{0,2}发明',
                              fmt_content_n)
        name_3_3 = re.findall('名称为?([^。”;"’“‘]+)[的、,((]?\w{0,2}专利',
                              fmt_content_n)
        name_3_4 = re.findall('名称为?([^。”;"’“‘]+)[的、,((]?\w{0,2}实用新型',
                              fmt_content_n)
        name_3_5 = re.findall('名称为?([^。”;"’“‘]+)[的、,((]?\w{0,2}外观',
                              fmt_content_n)
        name_3_6 = re.findall('名称为?([^。”;"’“‘]+)[的、,((]?\w{0,2}申请',
                              fmt_content_n)

        name_4_1 = re.findall('名为([^。”;"’“‘]+)[的、,((]?\w{0,2}发明',
                              fmt_content_n)
        name_4_2 = re.findall('名为([^。”;"’“‘]+)[的、,((]?\w{0,2}专利',
                              fmt_content_n)
        name_4_3 = re.findall('名为([^。”;"’“‘]+)[的、,((]?\w{0,2}实用新型',
                              fmt_content_n)
        name_4_4 = re.findall('名为([^。”;"’“‘]+)[的、,((]?\w{0,2}外观',
                              fmt_content_n)
        name_4_5 = re.findall('名为([^。”;"’“‘]+)[的、,((]?\w{0,2}申请',
                              fmt_content_n)

        data_name = name_1_1 + name_1_2 + name_1_3 + name_1_4 + name_1_5 + \
                    name_1_1_1 + name_1_1_2 + name_1_1_3 + name_1_1_4 + name_1_1_5 + \
                    name_2_1 + name_2_2 + name_2_3 + name_2_4 + name_2_5 + \
                    name_2_1_1 + name_2_2_2 + name_2_3_3 + name_2_4_4 + name_2_5_5 + \
                    name_3_1 + name_3_1_1 + name_3_2 + name_3_3 + name_3_4 + \
                    name_3_5 + name_3_6 + name_4_1 + name_4_2 + name_4_3 + \
                    name_4_4 + name_4_5
        if len(data_name) > 0:
            data_name_set = list(set(data_name))
            test_name = [i for i in data_name_set if i != '']
            test_str_name = ';'.join(test_name)
        else:
            test_str_name = ''
        return test_str_name

    def run(self):
        # self._sqlexe_num()
        self._sqlexe_name()
        print('已完成共', self.num, '条')
Beispiel #4
0
# coding:utf-8
import re
from caiduanwenshu.just_about_patent import just_about_patent
from tools.shujuku_conn.oracle_conn import OracleOperation
import re

if __name__ == '__main__':
    db = OracleOperation()
    db1 = OracleOperation()
    db.execute_sql(
        "select * from jugement_table_0807 where case_id='{}'".format(
            '258329cf5c82cef3e8ee6ba60c4c28f00'))
    null_count = 0
    panjue_count = 0
    all_count = 0
    caiding_count = 0
    patent_count = 0
    print('开始--------')
    while True:
        data = db.get_one()
        if not data:
            break
        all_count += 1
        case_id = data[0]
        case_name = data[1]
        if case_name == None:
            case_name = ''
        try:
            case_reason = data[2]
            if case_reason == None:
                case_reason = ''
Beispiel #5
0
class just_about_patent(object):
    def __init__(self):
        self.fmt_content_n = ''
        self.case_name = ''
        self.case_reason = ''
        self.db = OracleOperation()
        self.db1 = OracleOperation()
        self.patent_count = 0
        self.caiding_count = 0
        self.null_count = 0
        self.panjue_count = 0
        self.all_count = 0

    def com_true(self):
        #知识产权、专利、发明、实用新型、外观设计、设计人
        keyword = ['知识产权', '专利', '发明', '实用新型', '外观设计', '设计人']
        for key in keyword:
            if key not in self.case_name and key not in self.case_reason:
                continue
            else:
                self.patent_count += 1
                self.re_name(self.case_name, self.fmt_content_n)
                patent_num = self.re_patent_num()
                patent_name = self.re_patent_name()
                public_num = self.re_public_num()
                data = {}
                data["patent_num"] = patent_num
                data["patent_name"] = patent_name
                data["public_num"] = public_num
                return data

    def re_name(self, case_name, fmt_content):
        #替换特殊符号
        fmt_content_r = fmt_content.replace('&', '&').replace('×', '×').replace('“', '“'). \
            replace('&rdquo;', '”').replace('&lt;', '<').replace('&gt;', '>').replace('&quot;', '“'). \
            replace('&hellip;', '…').replace('&middot;', '·').replace('&mdash;', '—').replace('&nbsp;', ' ').\
            replace('01lydyh01','"')
        #如果case_name中包含’裁定‘两个字,不用走限定
        keyword = '裁定'
        if keyword not in case_name:
            #先删除参考文献
            #获取文章长度的35%
            length = int(round(len(fmt_content_r) * 0.35, 0))
            length_35_after = fmt_content_r[length:]
            wenxian = re.findall('参考文献.*', length_35_after)
            wenxianstr = ''.join(wenxian)
            fujian = re.findall('附件.*', length_35_after)
            fujianstr = ''.join(fujian)
            duibi = re.findall('对比文件.*', length_35_after)
            duibistr = ''.join(duibi)
            kangbian = re.findall('现有技术抗辩.*', length_35_after)
            kangbianstr = ''.join(kangbian)
            zhengju = re.findall('证据.*', length_35_after)
            zhengjustr = ''.join(zhengju)
            self.fmt_content_n = fmt_content_r.replace(wenxianstr, '').replace(
                fujianstr, '').replace(duibistr,
                                       '').replace(kangbianstr,
                                                   '').replace(zhengjustr, '')
        else:
            self.caiding_count += 1
            self.fmt_content_n = fmt_content_r

    def re_patent_num(self):
        fmt_content_n = self.fmt_content_n
        #8开头
        num_1_1 = re.findall(
            '[^\dLLllNNnnXxXx×]([8XxXx×][5-9XxXx×][12389XxXx×][\dXxXx×]{5}\.?[\dXxXx×]?)',
            fmt_content_n)
        num_1_2 = re.findall(
            '[ZZzzCCcc][LLllNNnn][8XxXx×][5-9XxXx×][12389XxXx×][\dXxXx×]{5}\.?[\dXxXx×]?',
            fmt_content_n)
        num_1_3 = re.findall(
            '[^\d]([ZZzzCCcc]?[LLllNNnn]?[8XxXx×][5-9XxXx×][12389XxXx×][\dXxXx×]{5}\.?[\dXxXx×]?号?、?名?称?为?[”‘“"《][^”"’“‘,;。《]+[“’”"》])',
            fmt_content_n)
        num_1_4 = re.findall(
            '[”‘“"《][^”"’“‘,;。《]+[“’”"》]的?第[ZZzzCCcc]?[LLllNNnn]?[8XxXx×][5-9XxXx×][12389XxXx×][\dXxXx×]{5}\.?[\dXxXx×]?号',
            fmt_content_n)

        # 9开头
        num_2_1 = re.findall(
            '[^\dLLllNNnnXxXx×]([9XxXx×][0-9XxXx×][12389XxXx×][\dXxXx×]{5}\.?[\dXxXx×]?)',
            fmt_content_n)
        num_2_2 = re.findall(
            '[ZZzzCCcc][LLllNNnn][9XxXx×][0-9XxXx×][12389XxXx×][\dXxXx×]{5}\.?[\dXxXx×]?',
            fmt_content_n)
        num_2_3 = re.findall(
            '[^\d]([ZZzzCCcc]?[LLllNNnn]?[9XxXx×][0-9XxXx×][12389XxXx×][\dXxXx×]{5}\.?[\dXxXx×]?号?、?名?称?为?[”‘“"《][^”"’“‘,;。《]+[“’”"》])',
            fmt_content_n)
        num_2_4 = re.findall(
            '[”‘“"《][^”"’“‘,;。《]+[“’”"》]的?第[ZZzzCCcc]?[LLllNNnn]?[9XxXx×][0-9XxXx×][12389XxXx×][\dXxXx×]{5}\.?[\dXxXx×]?号',
            fmt_content_n)

        #0开头
        num_3_1 = re.findall(
            '[^\dLLllNNnnXxXx×]([0XxXx×][0-3XxXx×][12389XxXx×][\dXxXx×]{5}\.?[\dXxXx×]?)',
            fmt_content_n)
        num_3_2 = re.findall(
            '[ZZzzCCcc][LLllNNnn][0XxXx×][0-3XxXx×][12389XxXx×][\dXxXx×]{5}\.?[\dXxXx×]?',
            fmt_content_n)
        num_3_3 = re.findall(
            '[^\d]([ZZzzCCcc]?[LLllNNnn]?[0XxXx×][0-3XxXx×][12389XxXx×][\dXxXx×]{5}\.?[\dXxXx×]?号?、?名?称?为?[”‘“"《][^”"’“‘,;。《]+[“’”"》])',
            fmt_content_n)
        num_3_4 = re.findall(
            '[”‘“"《][^”"’“‘,;。《]+[“’”"》]的?第[ZZzzCCcc]?[LLllNNnn]?[0XxXx×][0-3XxXx×][12389XxXx×][\dXxXx×]{5}\.?[\dXxXx×]?号',
            fmt_content_n)

        #200x
        num_4_1 = re.findall(
            '[^\dLLllNNnnXxXx×]([2XxXx×][0XxXx×][0XxXx×][3-9XxXx×][12389XxXx×][\dXxXx×]{7}\.?[\dXxXx×]?)',
            fmt_content_n)
        num_4_2 = re.findall(
            '[ZZzzCCcc][LLllNNnn][2XxXx×][0XxXx×][0XxXx×][3-9XxXx×][12389XxXx×][\dXxXx×]{7}\.?[\dXxXx×]?',
            fmt_content_n)
        num_4_3 = re.findall(
            '[^\d]([ZZzzCCcc]?[LLllNNnn]?[2XxXx×][0XxXx×][0XxXx×][3-9XxXx×][12389XxXx×][\dXxXx×]{7}\.?[\dXxXx×]?号?、?名?称?为?[”‘“"《][^”"’“‘,;。《]+[“’”"》])',
            fmt_content_n)
        num_4_4 = re.findall(
            '[”‘“"《][^”"’“‘,;。《]+[“’”"》]的?第[ZZzzCCcc]?[LLllNNnn]?[2XxXx×][0XxXx×][0XxXx×][3-9XxXx×][12389XxXx×][\dXxXx×]{7}\.?[\dXxXx×]?号',
            fmt_content_n)

        #201x
        num_5_1 = re.findall(
            '[^\dLLllNNnnXxXx×]([2XxXx×][0XxXx×][1XxXx×][0-9XxXx×][12389XxXx×][\dXxXx×]{7}\.?[\dXxXx×]?)',
            fmt_content_n)
        num_5_2 = re.findall(
            '[ZZzzCCcc][LLllNNnn][2XxXx×][0XxXx×][1XxXx×][0-9XxXx×][12389XxXx×][\dXxXx×]{7}\.?[\dXxXx×]?',
            fmt_content_n)
        num_5_3 = re.findall(
            '[^\d]([ZZzzCCcc]?[LLllNNnn]?[2XxXx×][0XxXx×][1XxXx×][0-9XxXx×][12389XxXx×][\dXxXx×]{7}\.?[\dXxXx×]?号?、?名?称?为?[”‘“"《][^”"’“‘,;。《]+[“’”"》])',
            fmt_content_n)
        num_5_4 = re.findall(
            '[”‘“"《][^”"’“‘,;。《]+[“’”"》]的?第[ZZzzCCcc]?[LLllNNnn]?[2XxXx×][0XxXx×][1XxXx×][0-9XxXx×][12389XxXx×][\dXxXx×]{7}\.?[\dXxXx×]?号',
            fmt_content_n)

        #2020
        num_6_1 = re.findall(
            '[^\dLLllNNnnXxXx×]([2XxXx×][0XxXx×][2XxXx×][0XxXx×][12389XxXx×][\dXxXx×]{7}\.?[\dXxXx×]?)',
            fmt_content_n)
        num_6_2 = re.findall(
            '[ZZzzCCcc][LLllNNnn][2XxXx×][0XxXx×][2XxXx×][0XxXx×][12389XxXx×][\dXxXx×]{7}\.?[\dXxXx×]?',
            fmt_content_n)
        num_6_3 = re.findall(
            '[^\d]([ZZzzCCcc]?[LLllNNnn]?[2XxXx×][0XxXx×][2XxXx×][0XxXx×][12389XxXx×][\dXxXx×]{7}\.?[\dXxXx×]?号?、?名?称?为?[”‘“"《][^”"’“‘,;。《]+[“’”"》])',
            fmt_content_n)
        num_6_4 = re.findall(
            '[”‘“"《][^”"’“‘,;。《]+[“’”"》]的?第[ZZzzCCcc]?[LLllNNnn]?[2XxXx×][0XxXx×][2XxXx×][0XxXx×][12389XxXx×][\dXxXx×]{7}\.?[\dXxXx×]?号',
            fmt_content_n)

        #全部匹配
        num_7_1 = re.findall(
            '专利号?为?是?\s?:?[ZZzzCCcc]?[LLllNNnn]?[\dXxXx×]{8,12}\.?[\dXxXx×]?',
            fmt_content_n)
        num_7_2 = re.findall(
            '申请号?为?是?\s?:?[ZZzzCCcc]?[LLllNNnn]?[\dXxXx×]{8,12}\.?[\dXxXx×]?',
            fmt_content_n)
        num_7_3 = re.findall(
            '外观设计为?是?\s?:?([ZZzzCCcc]?[LLllNNnn]?[\dXxXx×]{8,12}\.?[\dXxXx×]?)',
            fmt_content_n)
        num_7_4 = re.findall(
            '发明为?是?\s?:?([ZZzzCCcc]?[LLllNNnn]?[\dXxXx×]{8,12}\.?[\dXxXx×]?)',
            fmt_content_n)
        num_7_5 = re.findall(
            '实用新型为?是?\s?:?([ZZzzCCcc]?[LLllNNnn]?[\dXxXx×]{8,12}\.?[\dXxXx×]?)',
            fmt_content_n)
        num_7_6 = re.findall(
            '涉案为?是?\s?:?([ZZzzCCcc]?[LLllNNnn]?[\dXxXx×]{8,12}\.?[\dXxXx×]?)',
            fmt_content_n)
        num_7_7 = re.findall(
            '[^a-zA-Z]([ZZzzCCcc][LLllNNnn][\dXxXx×]{8,12}\.?[\dXxXx×]?)',
            fmt_content_n)
        # num_7_1 = re.findall('[^\dLLllNNnn]([\dXxXx×]{8,12}\.?[\dXxXx×]?)', fmt_content_n)
        # num_7_2 = re.findall('[ZZzzCCcc][LLllNNnn]{8,12}\.?[\dXxXx×]?',fmt_content_n)
        # num_7_3 = re.findall('[ZZzzCCcc]?[LLllNNnn]?[\dXxXx×]{8,12}\.?[\dXxXx×]?号?、?名?称?为?[‘“"《][^”,;。《]+[’”"》]',fmt_content_n)
        # num_7_4 = re.findall('[‘“"《][^”,;。《]+[’”"》]的?第[ZZzzCCcc]?[LLllNNnn]?[\dXxXx×]{8,12}\.?[\dXxXx×]?号',fmt_content_n)

        #拼接并去重
        data_num = num_1_1 + num_1_2 + num_1_3 + num_1_4 + \
                   num_2_1 + num_2_2 + num_2_3 + num_2_4 + \
                   num_3_1 + num_3_2 + num_3_3 + num_3_4 + \
                   num_4_1 + num_4_2 + num_4_3 + num_4_4 + \
                   num_5_1 + num_5_2 + num_5_3 + num_5_4 + \
                   num_6_1 + num_6_2 + num_6_3 + num_6_4 + \
                   num_7_1 + num_7_2 + num_7_3 + num_7_4 + \
                   num_7_5 + num_7_6 + num_7_7
        if len(data_num) > 0:
            data_num_set = list(set(data_num))
            test_num = [i for i in data_num_set if i != '']
            test_str_num = ';'.join(test_num)
        else:
            test_str_num = ''
        return test_str_num

    def re_patent_name(self):
        fmt_content_n = self.fmt_content_n
        # 发明
        #[”‘“"《][^”"’“‘,;。《]+[“’”"》]\w{0,2}专利
        #[”‘“"《][^”"’“‘,;。《]+[“’”"》]\w{0,2}发明
        #[”‘“"《][^”"’“‘,;。《]+[“’”"》]\w{0,2}实用新型
        #[”‘“"《][^”"’“‘,;。《]+[“’”"》]\w{0,2}外观
        #[”‘“"《][^”"’“‘,;。《]+[“’”"》]\w{0,2}申请
        #名称?为?[”‘“"《][^”"’“‘,;。《]+[“’”"》]
        #名称?为?\w[^。]+[的、,((]\w{0,2}发明
        name_1_1 = re.findall('[”‘“"《][^”"’“‘,;。《]+[“’”"》][,、]?\w{0,2}专利',
                              fmt_content_n)
        name_1_2 = re.findall('[”‘“"《][^”"’“‘,;。《]+[“’”"》][,、]?\w{0,2}发明',
                              fmt_content_n)
        name_1_3 = re.findall('[”‘“"《][^”"’“‘,;。《]+[“’”"》][,、]?\w{0,2}实用新型',
                              fmt_content_n)
        name_1_4 = re.findall('[”‘“"《][^”"’“‘,;。《]+[“’”"》][,、]?\w{0,2}外观',
                              fmt_content_n)
        name_1_5 = re.findall('[”‘“"《][^”"’“‘,;。《]+[“’”"》][,、]?\w{0,2}申请',
                              fmt_content_n)

        name_1_1_1 = re.findall('[”‘“"《][^”,;。《]+[“’”"》][,、]?\w{0,2}专利',
                                fmt_content_n)
        name_1_1_2 = re.findall('[”‘“"《][^”,;。《]+[“’”"》][,、]?\w{0,2}发明',
                                fmt_content_n)
        name_1_1_3 = re.findall('[”‘“"《][^”,;。《]+[“’”"》][,、]?\w{0,2}实用新型',
                                fmt_content_n)
        name_1_1_4 = re.findall('[”‘“"《][^”,;。《]+[“’”"》][,、]?\w{0,2}外观',
                                fmt_content_n)
        name_1_1_5 = re.findall('[”‘“"《][^”,;。《]+[“’”"》][,、]?\w{0,2}申请',
                                fmt_content_n)

        name_2_1 = re.findall('发明名?称?为?[”‘“"《][^”"’“‘,;。《]+[“’”"》]',
                              fmt_content_n)
        name_2_2 = re.findall('专利名?称?为?[”‘“"《][^”"’“‘,;。《]+[“’”"》]',
                              fmt_content_n)
        name_2_3 = re.findall('实用新型名?称?为?[”‘“"《][^”"’“‘,;。《]+[“’”"》]',
                              fmt_content_n)
        name_2_4 = re.findall('外观设?计?名?称?为?[”‘“"《][^”"’“‘,;。《]+[“’”"》]',
                              fmt_content_n)
        name_2_5 = re.findall('申请名?称?为?[”‘“"《][^”"’“‘,;。《]+[“’”"》]',
                              fmt_content_n)

        name_2_1_1 = re.findall('发明名?称?为?[”‘“"《][^”,;。《]+[“’”"》]',
                                fmt_content_n)
        name_2_2_2 = re.findall('专利名?称?为?[”‘“"《][^”,;。《]+[“’”"》]',
                                fmt_content_n)
        name_2_3_3 = re.findall('实用新型名?称?为?[”‘“"《][^”,;。《]+[“’”"》]',
                                fmt_content_n)
        name_2_4_4 = re.findall('外观设?计?名?称?为?[”‘“"《][^”,;。《]+[“’”"》]',
                                fmt_content_n)
        name_2_5_5 = re.findall('申请名?称?为?[”‘“"《][^”,;。《]+[“’”"》]',
                                fmt_content_n)

        name_3_1 = re.findall('名称为?[”‘“"《][^”"’“‘,;。《]+[“’”"》]', fmt_content_n)
        name_3_1_1 = re.findall('名称为?[”‘“"《][^”,;。《]+[“’”"》]', fmt_content_n)

        name_3_2 = re.findall('名称为?\w[^。”;"’“‘]+[的、,((]?\w{0,2}发明',
                              fmt_content_n)
        name_3_3 = re.findall('名称为?\w[^。”;"’“‘]+[的、,((]?\w{0,2}专利',
                              fmt_content_n)
        name_3_4 = re.findall('名称为?\w[^。”;"’“‘]+[的、,((]?\w{0,2}实用新型',
                              fmt_content_n)
        name_3_5 = re.findall('名称为?\w[^。”;"’“‘]+[的、,((]?\w{0,2}外观',
                              fmt_content_n)
        name_3_6 = re.findall('名称为?\w[^。”;"’“‘]+[的、,((]?\w{0,2}申请',
                              fmt_content_n)

        name_4_1 = re.findall('名为\w[^。”;"’“‘]+[的、,((]?\w{0,2}发明',
                              fmt_content_n)
        name_4_2 = re.findall('名为\w[^。”;"’“‘]+[的、,((]?\w{0,2}专利',
                              fmt_content_n)
        name_4_3 = re.findall('名为\w[^。”;"’“‘]+[的、,((]?\w{0,2}实用新型',
                              fmt_content_n)
        name_4_4 = re.findall('名为\w[^。”;"’“‘]+[的、,((]?\w{0,2}外观',
                              fmt_content_n)
        name_4_5 = re.findall('名为\w[^。”;"’“‘]+[的、,((]?\w{0,2}申请',
                              fmt_content_n)

        data_name = name_1_1 + name_1_2 + name_1_3 + name_1_4 + name_1_5 + \
                    name_1_1_1 + name_1_1_2 + name_1_1_3 + name_1_1_4 + name_1_1_5 + \
                    name_2_1 + name_2_2 + name_2_3 + name_2_4 + name_2_5 + \
                    name_2_1_1 + name_2_2_2 + name_2_3_3 + name_2_4_4 + name_2_5_5 + \
                    name_3_1 + name_3_1_1 + name_3_2 + name_3_3 + name_3_4 + \
                    name_3_5 + name_3_6 + name_4_1 + name_4_2 + name_4_3 + \
                    name_4_4 + name_4_5
        if len(data_name) > 0:
            data_name_set = list(set(data_name))
            test_name = [i for i in data_name_set if i != '']
            test_str_name = ';'.join(test_name)
        else:
            test_str_name = ''
        return test_str_name

    def re_public_num(self):
        fmt_content_n = self.fmt_content_n
        public_num_patent = re.findall(
            '[CcCc][NnNn][1-3]\d{5,9}[ABUSCDY]?[1-9]?', fmt_content_n)
        if len(public_num_patent) > 0:
            data_public_patent_set = list(set(public_num_patent))
            test_public_num_patent = [
                i for i in data_public_patent_set if i != ''
            ]
            test_str_public_num_patent = ';'.join(test_public_num_patent)
        else:
            test_str_public_num_patent = ''
        return test_str_public_num_patent

    def _sqlexe(self):
        self.db.execute_sql('select count(*) from jugement_table_0807')
        count_bar = self.db.get_one()[0]
        _bar = tqdm(total=count_bar)
        self.db.execute_sql('select * from jugement_table_0807')
        while True:
            data = self.db.get_one()
            _bar.update(1)
            if not data:
                break
            self.all_count += 1
            self.case_id = data[0]
            self.case_name = data[1]
            if self.case_name == None:
                self.case_name = ''
            self.case_reason = data[2]
            if self.case_reason == None:
                self.case_reason = ''
            fmt_content_clob = data[3]
            if fmt_content_clob == None:
                self.null_count += 1
                continue
            else:
                self.fmt_content_n = fmt_content_clob.read()
                data_patent = self.com_true()
                if data_patent == None:
                    self.panjue_count += 1
                    continue
                patent_num = data_patent["patent_num"]
                patent_name = data_patent["patent_name"]
                public_num = data_patent["public_num"]
            sql = "insert into jugement_table_0807_re1 values ('{}','{}','{}','{}','{}','{}')"\
                .format(self.case_id,self.case_reason,patent_num,patent_name,public_num,self.case_name)
            try:
                self.db1.execute_sql(sql)
            except:
                with open('0807.txt', 'a', encoding='utf-8') as a:
                    a.write(self.case_id)
                    a.write('\n')

    def run(self):
        self._sqlexe()
        print('共', self.all_count, '条', '--', '专利文书数量', self.patent_count,
              '--', '裁定文书数量', self.caiding_count, '--', '判决文书等数量',
              self.panjue_count, '--', '文书为空数量', self.null_count)
Beispiel #6
0
# coding:utf-8
from tools.shujuku_conn.oracle_conn import OracleOperation
import re


db = OracleOperation()
DATA = db.execute_sql("SELECT * FROM caiduanwenshu_0728_finally where clear_two is null and identify='商标号和商标名称'")
data = db.get_data()
num = 0
#43741
for concent in data:
    case_id = concent[0]
    try:
        clear_one = concent[1]
        identify = concent[3]
        clear_two = re.findall('\d{6,9}',clear_one)
        if len(clear_two) == 0:
            continue
        clear_two_set = list(set(clear_two))
        test_clear_two = [i for i in clear_two_set if i != '']
        test_str_clear_two = ';'.join(test_clear_two)
        sql = "update caiduanwenshu_0728_finally set clear_two='{}' where case_id='{}' and clear_one='{}' and clear_two is null and identify='{}'".format(test_str_clear_two,case_id,clear_one,identify)
        db.execute_sql(sql)
        num += 1
        print('已清洗', num, '条')
    except Exception:
        num += 1
        with open('log_null.txt', 'a', encoding='utf-8') as aa:
            aa.write(case_id)
            aa.write('\n')
# coding:utf-8
from tools.shujuku_conn.oracle_conn import OracleOperation
import re

db = OracleOperation()
DATA = db.execute_sql(
    "SELECT case_id,fmt_content FROM shangbiao_zhengju_position where case_id = '2ede79fa6ec19b40d67b1f8a8fa594190' "
)
data = db.get_data()
num = 0

for concent in data:
    case_id = concent[0]
    try:
        find_name = '证据'
        fmt_content_clob = concent[1]
        fmt_content = fmt_content_clob.read()
        # 匹配证据的个数
        find_count = fmt_content.count(find_name)
        #匹配到的第一个证据所占的位置百分比
        if len(fmt_content) > 0 or find_count == None:
            if find_count > 0:
                fmt_content_len = len(fmt_content)
                find_name_one_position = fmt_content.find(find_name)
                baifenbi_position_one = int(find_name_one_position) / int(
                    fmt_content_len) * 100
                found = round(baifenbi_position_one, 2)
                baifenbi_position_one_y = str(found) + '%'
                if find_count > 1:
                    #匹配到的第二个证据所占的位置百分比
                    find_name_two_position = fmt_content.find(
Beispiel #8
0
from tools.shujuku_conn.oracle_conn import OracleOperation
import re

db = OracleOperation()
sql ="select case_id from jugement_table_0807_re_qxxx "
db.execute_sql(sql)
data = db.get_data()


for datas in data:
    case_id = datas[0]
    sql1 = "select clear_two from jugement_table_0807_re_qxxx where case_id='{}' and identify='专利名称'".format(case_id)
    db.execute_sql(sql)
    clear_two  = db.get_data()
    num = 0
    for i in name:

Beispiel #9
0
# coding:utf-8
from tools.shujuku_conn.oracle_conn import OracleOperation
import re

db = OracleOperation()
DATA = db.execute_sql(
    'SELECT case_id,case_reason,fmt_content FROM jugement_table_0728')
data = db.get_data()
num = 0

for concent in data:
    case_id = concent[0]
    try:
        case_reason = concent[1]
        fmt_content_clob = concent[2]  # clob
        fmt_content = fmt_content_clob.read()
        if fmt_content == None:
            sql = "insert into jugement_table_0728_re values ('{}','{}','{}','{}','{}','{}','{}')".format(
                case_id, case_reason, '', '', '', '', '')
            db.execute_sql(sql)
            num += 1
            print('已筛选', num, '条')
            continue
        fmt_content_r = fmt_content.replace('&amp;', '&').replace('&times;', '×').replace('&ldquo;', '“').\
            replace('&rdquo;', '”').replace('&lt;', '<').replace('&gt;', '>').replace('&quot;', '“').\
            replace('&hellip;','…').replace('&middot;', '·').replace('&mdash;', '—').replace('&nbsp;', ' ')
        '''
        现有技术抗辩,参考文献,附件,一般都在文章后边,不限定位置,直接.*删除
        证据需要限定位置,位置还需要讨论
        对比文件需要限定位置,
        还有一种情况是文章中同事出现两种或两种以上