def __init__(self): self.db = OracleOperation() self.db1 = OracleOperation() self.case_id = '' self.clear_one = '' self.clear_one_name = '' self.case_reason = '' self.case_name = '' self.num = 0
def __init__(self): self.fmt_content_n = '' self.case_name = '' self.case_reason = '' self.db = OracleOperation() self.db1 = OracleOperation() self.patent_count = 0 self.caiding_count = 0 self.null_count = 0 self.panjue_count = 0 self.all_count = 0
# coding:utf-8 from tools.shujuku_conn.oracle_conn import OracleOperation import re db = OracleOperation() DATA = db.execute_sql( "SELECT case_id,clear_about_patent FROM jugement_table_0728_re_qx where identify='专利号'" ) data = db.get_data() num = 0 #57687 for concent in data: case_id = concent[0] try: fmt_content_n = concent[1] data_num = [] num_1_1 = re.findall( '[ZZzzCCcc][LLllNNnn](8[5-9][1-3]\d{5})\.?[\dXxXx×]?', fmt_content_n) num_1_2 = re.findall( '专利号为?是?\s?:?[ZZzzCCcc]?[LLllNNnn]?(8[5-9][1-3]\d{5})\.?[\dXxXx×]?', fmt_content_n) num_1_3 = re.findall( '申请号为?是?\s?:?[ZZzzCCcc]?[LLllNNnn]?(8[5-9][1-3]\d{5})\.?[\dXxXx×]?', fmt_content_n) num_1_4 = re.findall( '[ZZzzCCcc][LLllNNnn](9[0-9][1-3]\d{5})\.?[\dXxXx×]?', fmt_content_n) num_1_5 = re.findall( '专利号为?是?\s?:?[ZZzzCCcc]?[LLllNNnn]?(9[0-9][1-3]\d{5})\.?[\dXxXx×]?', fmt_content_n)
return test_str_public_num_patent def sql_name(self): data_count = {} data_count["patent_count"] = self.patent_count data_count["caiding_count"] = self.caiding_count return data_count def run(self): data = self.com_true() return data if __name__ == '__main__': db = OracleOperation() db1 = OracleOperation() db.execute_sql('select count(*) from jugement_table_0807') count_bar = db.get_one()[0] # _bar = tqdm(total=count_bar) db.execute_sql('select * from jugement_table_0807') null_count = 0 panjue_count = 0 all_count = 0 caiding_count = 0 patent_count = 0 print('开始--------') while True : data = db.get_one() if not data: break
# coding:utf-8 import re from caiduanwenshu.just_about_patent import just_about_patent from tools.shujuku_conn.oracle_conn import OracleOperation import re if __name__ == '__main__': db = OracleOperation() db1 = OracleOperation() db.execute_sql( "select * from jugement_table_0807 where case_id='{}'".format( '258329cf5c82cef3e8ee6ba60c4c28f00')) null_count = 0 panjue_count = 0 all_count = 0 caiding_count = 0 patent_count = 0 print('开始--------') while True: data = db.get_one() if not data: break all_count += 1 case_id = data[0] case_name = data[1] if case_name == None: case_name = '' try: case_reason = data[2] if case_reason == None: case_reason = ''
class Patent_num(object): def __init__(self): self.db = OracleOperation() self.db1 = OracleOperation() self.case_id = '' self.clear_one = '' self.clear_one_name = '' self.case_reason = '' self.case_name = '' self.num = 0 def _sqlexe_num(self): self.db.execute_sql( "select count(*) from jugement_table_0807_re_qx where identify='专利号'" ) count_bar = self.db.get_one()[0] _bar = tqdm(total=count_bar) self.db.execute_sql( "select * from jugement_table_0807_re_qx where identify='专利号'") while True: data = self.db.get_one() _bar.update(1) if not data: _bar.close() break self.case_id = data[0] self.case_reason = data[1] self.case_name = data[2] self.clear_one = data[3] re_patent_num = self.re_num() try: sql1 = "insert into jugement_table_0807_re_qx_bei values ('{}','{}','{}','{}','专利号','{}')".format( self.case_id, self.case_reason, self.case_name, self.clear_one, re_patent_num) self.db1.execute_sql(sql1) self.num += 1 except: with open('0807.txt', 'a', encoding='utf-8') as e: e.write(self.case_id) e.write('\n') def _sqlexe_name(self): self.db.execute_sql( "select count(*) from jugement_table_0807_re_qx where identify='专利名称'" ) count_bar = self.db.get_one()[0] _bar = tqdm(total=count_bar) self.db.execute_sql( "select * from jugement_table_0807_re_qx where identify='专利名称'") while True: data = self.db.get_one() _bar.update(1) if not data: _bar.close() break self.case_id = data[0] self.case_reason = data[1] self.case_name = data[2] self.clear_one_name = data[3] re_patent_num = self.re_name() try: sql1 = "insert into jugement_table_0807_re_qx_name values ('{}','{}','{}','{}','专利名称','{}')".format( self.case_id, self.case_reason, self.case_name, self.clear_one_name, re_patent_num) self.db1.execute_sql(sql1) self.num += 1 except: with open('0807.txt', 'a', encoding='utf-8') as e: e.write(self.case_id) e.write('\n') def re_num(self): fmt_content_n = self.clear_one #8开头 num_1_1 = re.findall( '(^[8XxXx×][5-9XxXx×][12389XxXx×][\dXxXx×]{5})\.?[\dXxXx×]?', fmt_content_n) num_1_2 = re.findall( '[ZZzzCCcc][LLllNNnn]([8XxXx×][5-9XxXx×][12389XxXx×][\dXxXx×]{5})\.?[\dXxXx×]?', fmt_content_n) num_1_3 = re.findall('[”‘“"《]([^”"’“‘,;。《]+)[“’”"》]', fmt_content_n) num_1_4 = re.findall( '[^\dLLllNNnnXxXx×]([8XxXx×][5-9XxXx×][12389XxXx×][\dXxXx×]{5})\.?[\dXxXx×]?', fmt_content_n) # 9开头 num_2_1 = re.findall( '(^[9XxXx×][0-9XxXx×][12389XxXx×][\dXxXx×]{5})\.?[\dXxXx×]?', fmt_content_n) num_2_2 = re.findall( '[ZZzzCCcc][LLllNNnn]([9XxXx×][0-9XxXx×][12389XxXx×][\dXxXx×]{5})\.?[\dXxXx×]?', fmt_content_n) num_2_3 = re.findall( '([^\dLLllNNnnXxXx×][9XxXx×][0-9XxXx×][12389XxXx×][\dXxXx×]{5})\.?[\dXxXx×]?', fmt_content_n) #0开头 num_3_1 = re.findall( '(^[0XxXx×][0-3XxXx×][12389XxXx×][\dXxXx×]{5})\.?[\dXxXx×]?', fmt_content_n) num_3_2 = re.findall( '[ZZzzCCcc][LLllNNnn]([0XxXx×][0-3XxXx×][12389XxXx×][\dXxXx×]{5})\.?[\dXxXx×]?', fmt_content_n) num_3_3 = re.findall( '[^\dLLllNNnnXxXx×]([0XxXx×][0-3XxXx×][12389XxXx×][\dXxXx×]{5})\.?[\dXxXx×]?', fmt_content_n) #200x num_4_1 = re.findall( '(^[2XxXx×][0XxXx×][0XxXx×][3-9XxXx×][12389XxXx×][\dXxXx×]{7})\.?[\dXxXx×]?', fmt_content_n) num_4_2 = re.findall( '[ZZzzCCcc][LLllNNnn]([2XxXx×][0XxXx×][0XxXx×][3-9XxXx×][12389XxXx×][\dXxXx×]{7})\.?[\dXxXx×]?', fmt_content_n) num_4_3 = re.findall( '[^\dLLllNNnnXxXx×]([2XxXx×][0XxXx×][0XxXx×][3-9XxXx×][12389XxXx×][\dXxXx×]{7})\.?[\dXxXx×]?', fmt_content_n) #201x num_5_1 = re.findall( '[^\dLLllNNnnXxXx×]([2XxXx×][0XxXx×][1XxXx×][0-9XxXx×][12389XxXx×][\dXxXx×]{7})\.?[\dXxXx×]?', fmt_content_n) num_5_2 = re.findall( '[ZZzzCCcc][LLllNNnn]([2XxXx×][0XxXx×][1XxXx×][0-9XxXx×][12389XxXx×][\dXxXx×]{7})\.?[\dXxXx×]?', fmt_content_n) num_5_3 = re.findall( '(^[2XxXx×][0XxXx×][1XxXx×][0-9XxXx×][12389XxXx×][\dXxXx×]{7})\.?[\dXxXx×]?', fmt_content_n) #2020 num_6_1 = re.findall( '(^[2XxXx×][0XxXx×][2XxXx×][0XxXx×][12389XxXx×][\dXxXx×]{7})\.?[\dXxXx×]?', fmt_content_n) num_6_2 = re.findall( '[ZZzzCCcc][LLllNNnn]([2XxXx×][0XxXx×][2XxXx×][0XxXx×][12389XxXx×][\dXxXx×]{7})\.?[\dXxXx×]?', fmt_content_n) num_6_3 = re.findall( '[^\dLLllNNnnXxXx×]([2XxXx×][0XxXx×][2XxXx×][0XxXx×][12389XxXx×][\dXxXx×]{7})\.?[\dXxXx×]?', fmt_content_n) #全部匹配 num_7_1 = re.findall( '专利号?为?是?\s?:?[ZZzzCCcc]?[LLllNNnn]?([\dXxXx×]{8,12})\.?[\dXxXx×]?', fmt_content_n) num_7_2 = re.findall( '申请号?为?是?\s?:?[ZZzzCCcc]?[LLllNNnn]?([\dXxXx×]{8,12})\.?[\dXxXx×]?', fmt_content_n) num_7_3 = re.findall( '外观设计为?是?\s?:?[ZZzzCCcc]?[LLllNNnn]?([\dXxXx×]{8,12})\.?[\dXxXx×]?', fmt_content_n) num_7_4 = re.findall( '发明为?是?\s?:?ZZzzCCcc]?[LLllNNnn]?([\dXxXx×]{8,12})\.?[\dXxXx×]?', fmt_content_n) num_7_5 = re.findall( '实用新型为?是?\s?:?[ZZzzCCcc]?[LLllNNnn]?([\dXxXx×]{8,12})\.?[\dXxXx×]?', fmt_content_n) num_7_6 = re.findall( '涉案为?是?\s?:?[ZZzzCCcc]?[LLllNNnn]?([\dXxXx×]{8,12})\.?[\dXxXx×]?', fmt_content_n) num_7_7 = re.findall( '[ZZzzCCcc]?[LLllNNnn]([\dXxXx×]{8,12})\.?[\dXxXx×]?', fmt_content_n) num_7_8 = re.findall( '[^\dLLllNNnnXxXx×]([\dXxXx×]{8,12})\.?[\dXxXx×]?', fmt_content_n) # num_7_1 = re.findall('[^\dLLllNNnn]([\dXxXx×]{8,12}\.?[\dXxXx×]?)', fmt_content_n) # num_7_2 = re.findall('[ZZzzCCcc][LLllNNnn]{8,12}\.?[\dXxXx×]?',fmt_content_n) # num_7_3 = re.findall('[ZZzzCCcc]?[LLllNNnn]?[\dXxXx×]{8,12}\.?[\dXxXx×]?号?、?名?称?为?[‘“"《][^”,;。《]+[’”"》]',fmt_content_n) # num_7_4 = re.findall('[‘“"《][^”,;。《]+[’”"》]的?第[ZZzzCCcc]?[LLllNNnn]?[\dXxXx×]{8,12}\.?[\dXxXx×]?号',fmt_content_n) #拼接并去重 data_num = num_1_1 + num_1_2 + num_1_4 + \ num_2_1 + num_2_2 + num_2_3 + \ num_3_1 + num_3_2 + num_3_3 + \ num_4_1 + num_4_2 + num_4_3 + \ num_5_1 + num_5_2 + num_5_3 + \ num_6_1 + num_6_2 + num_6_3 + \ num_7_1 + num_7_2 + num_7_3 + num_7_4 + \ num_7_5 + num_7_6 + num_7_7 + num_7_8 if len(data_num) > 0: data_num_set = list(set(data_num)) test_num = [] for i in data_num_set: if len(i) > 12: i.replace(i[12:], '') test_num.append(i) else: test_num.append(i) test_num_add = test_num + num_1_3 test_str_num = ';'.join(test_num_add) else: test_str_num = '' return test_str_num def re_name(self): fmt_content_n = self.clear_one_name name_1_1 = re.findall('[”‘“"《]([^”"’“‘,;。《]+)[“’”"》][,、]?\w{0,2}专利', fmt_content_n) name_1_2 = re.findall('[”‘“"《]([^”"’“‘,;。《]+)[“’”"》][,、]?\w{0,2}发明', fmt_content_n) name_1_3 = re.findall('[”‘“"《]([^”"’“‘,;。《]+)[“’”"》][,、]?\w{0,2}实用新型', fmt_content_n) name_1_4 = re.findall('[”‘“"《]([^”"’“‘,;。《]+)[“’”"》][,、]?\w{0,2}外观', fmt_content_n) name_1_5 = re.findall('[”‘“"《]([^”"’“‘,;。《]+)[“’”"》][,、]?\w{0,2}申请', fmt_content_n) name_1_1_1 = re.findall('[”‘“"《]([^”,;。《]+)[“’”"》][,、]?\w{0,2}专利', fmt_content_n) name_1_1_2 = re.findall('[”‘“"《]([^”,;。《]+)[“’”"》][,、]?\w{0,2}发明', fmt_content_n) name_1_1_3 = re.findall('[”‘“"《]([^”,;。《]+)[“’”"》][,、]?\w{0,2}实用新型', fmt_content_n) name_1_1_4 = re.findall('[”‘“"《]([^”,;。《]+)[“’”"》][,、]?\w{0,2}外观', fmt_content_n) name_1_1_5 = re.findall('[”‘“"《]([^”,;。《]+)[“’”"》][,、]?\w{0,2}申请', fmt_content_n) name_2_1 = re.findall('发明名?称?为?[”‘“"《]([^”"’“‘,;。《]+)[“’”"》]', fmt_content_n) name_2_2 = re.findall('专利名?称?为?[”‘“"《]([^”"’“‘,;。《]+)[“’”"》]', fmt_content_n) name_2_3 = re.findall('实用新型名?称?为?[”‘“"《]([^”"’“‘,;。《]+)[“’”"》]', fmt_content_n) name_2_4 = re.findall('外观设?计?名?称?为?[”‘“"《]([^”"’“‘,;。《]+)[“’”"》]', fmt_content_n) name_2_5 = re.findall('申请名?称?为?[”‘“"《]([^”"’“‘,;。《]+)[“’”"》]', fmt_content_n) name_2_1_1 = re.findall('发明名?称?为?[”‘“"《]([^”,;。《]+)[“’”"》]', fmt_content_n) name_2_2_2 = re.findall('专利名?称?为?[”‘“"《]([^”,;。《]+)[“’”"》]', fmt_content_n) name_2_3_3 = re.findall('实用新型名?称?为?[”‘“"《]([^”,;。《]+)[“’”"》]', fmt_content_n) name_2_4_4 = re.findall('外观设?计?名?称?为?[”‘“"《]([^”,;。《]+)[“’”"》]', fmt_content_n) name_2_5_5 = re.findall('申请名?称?为?[”‘“"《]([^”,;。《]+)[“’”"》]', fmt_content_n) name_3_1 = re.findall('名称为?[”‘“"《]([^”"’“‘,;。《]+)[“’”"》]', fmt_content_n) name_3_1_1 = re.findall('名称为?[”‘“"《]([^”,;。《]+)[“’”"》]', fmt_content_n) name_3_2 = re.findall('名称为?([^。”;"’“‘]+)[的、,((]?\w{0,2}发明', fmt_content_n) name_3_3 = re.findall('名称为?([^。”;"’“‘]+)[的、,((]?\w{0,2}专利', fmt_content_n) name_3_4 = re.findall('名称为?([^。”;"’“‘]+)[的、,((]?\w{0,2}实用新型', fmt_content_n) name_3_5 = re.findall('名称为?([^。”;"’“‘]+)[的、,((]?\w{0,2}外观', fmt_content_n) name_3_6 = re.findall('名称为?([^。”;"’“‘]+)[的、,((]?\w{0,2}申请', fmt_content_n) name_4_1 = re.findall('名为([^。”;"’“‘]+)[的、,((]?\w{0,2}发明', fmt_content_n) name_4_2 = re.findall('名为([^。”;"’“‘]+)[的、,((]?\w{0,2}专利', fmt_content_n) name_4_3 = re.findall('名为([^。”;"’“‘]+)[的、,((]?\w{0,2}实用新型', fmt_content_n) name_4_4 = re.findall('名为([^。”;"’“‘]+)[的、,((]?\w{0,2}外观', fmt_content_n) name_4_5 = re.findall('名为([^。”;"’“‘]+)[的、,((]?\w{0,2}申请', fmt_content_n) data_name = name_1_1 + name_1_2 + name_1_3 + name_1_4 + name_1_5 + \ name_1_1_1 + name_1_1_2 + name_1_1_3 + name_1_1_4 + name_1_1_5 + \ name_2_1 + name_2_2 + name_2_3 + name_2_4 + name_2_5 + \ name_2_1_1 + name_2_2_2 + name_2_3_3 + name_2_4_4 + name_2_5_5 + \ name_3_1 + name_3_1_1 + name_3_2 + name_3_3 + name_3_4 + \ name_3_5 + name_3_6 + name_4_1 + name_4_2 + name_4_3 + \ name_4_4 + name_4_5 if len(data_name) > 0: data_name_set = list(set(data_name)) test_name = [i for i in data_name_set if i != ''] test_str_name = ';'.join(test_name) else: test_str_name = '' return test_str_name def run(self): # self._sqlexe_num() self._sqlexe_name() print('已完成共', self.num, '条')
class just_about_patent(object): def __init__(self): self.fmt_content_n = '' self.case_name = '' self.case_reason = '' self.db = OracleOperation() self.db1 = OracleOperation() self.patent_count = 0 self.caiding_count = 0 self.null_count = 0 self.panjue_count = 0 self.all_count = 0 def com_true(self): #知识产权、专利、发明、实用新型、外观设计、设计人 keyword = ['知识产权', '专利', '发明', '实用新型', '外观设计', '设计人'] for key in keyword: if key not in self.case_name and key not in self.case_reason: continue else: self.patent_count += 1 self.re_name(self.case_name, self.fmt_content_n) patent_num = self.re_patent_num() patent_name = self.re_patent_name() public_num = self.re_public_num() data = {} data["patent_num"] = patent_num data["patent_name"] = patent_name data["public_num"] = public_num return data def re_name(self, case_name, fmt_content): #替换特殊符号 fmt_content_r = fmt_content.replace('&', '&').replace('×', '×').replace('“', '“'). \ replace('”', '”').replace('<', '<').replace('>', '>').replace('"', '“'). \ replace('…', '…').replace('·', '·').replace('—', '—').replace(' ', ' ').\ replace('01lydyh01','"') #如果case_name中包含’裁定‘两个字,不用走限定 keyword = '裁定' if keyword not in case_name: #先删除参考文献 #获取文章长度的35% length = int(round(len(fmt_content_r) * 0.35, 0)) length_35_after = fmt_content_r[length:] wenxian = re.findall('参考文献.*', length_35_after) wenxianstr = ''.join(wenxian) fujian = re.findall('附件.*', length_35_after) fujianstr = ''.join(fujian) duibi = re.findall('对比文件.*', length_35_after) duibistr = ''.join(duibi) kangbian = re.findall('现有技术抗辩.*', length_35_after) kangbianstr = ''.join(kangbian) zhengju = re.findall('证据.*', length_35_after) zhengjustr = ''.join(zhengju) self.fmt_content_n = fmt_content_r.replace(wenxianstr, '').replace( fujianstr, '').replace(duibistr, '').replace(kangbianstr, '').replace(zhengjustr, '') else: self.caiding_count += 1 self.fmt_content_n = fmt_content_r def re_patent_num(self): fmt_content_n = self.fmt_content_n #8开头 num_1_1 = re.findall( '[^\dLLllNNnnXxXx×]([8XxXx×][5-9XxXx×][12389XxXx×][\dXxXx×]{5}\.?[\dXxXx×]?)', fmt_content_n) num_1_2 = re.findall( '[ZZzzCCcc][LLllNNnn][8XxXx×][5-9XxXx×][12389XxXx×][\dXxXx×]{5}\.?[\dXxXx×]?', fmt_content_n) num_1_3 = re.findall( '[^\d]([ZZzzCCcc]?[LLllNNnn]?[8XxXx×][5-9XxXx×][12389XxXx×][\dXxXx×]{5}\.?[\dXxXx×]?号?、?名?称?为?[”‘“"《][^”"’“‘,;。《]+[“’”"》])', fmt_content_n) num_1_4 = re.findall( '[”‘“"《][^”"’“‘,;。《]+[“’”"》]的?第[ZZzzCCcc]?[LLllNNnn]?[8XxXx×][5-9XxXx×][12389XxXx×][\dXxXx×]{5}\.?[\dXxXx×]?号', fmt_content_n) # 9开头 num_2_1 = re.findall( '[^\dLLllNNnnXxXx×]([9XxXx×][0-9XxXx×][12389XxXx×][\dXxXx×]{5}\.?[\dXxXx×]?)', fmt_content_n) num_2_2 = re.findall( '[ZZzzCCcc][LLllNNnn][9XxXx×][0-9XxXx×][12389XxXx×][\dXxXx×]{5}\.?[\dXxXx×]?', fmt_content_n) num_2_3 = re.findall( '[^\d]([ZZzzCCcc]?[LLllNNnn]?[9XxXx×][0-9XxXx×][12389XxXx×][\dXxXx×]{5}\.?[\dXxXx×]?号?、?名?称?为?[”‘“"《][^”"’“‘,;。《]+[“’”"》])', fmt_content_n) num_2_4 = re.findall( '[”‘“"《][^”"’“‘,;。《]+[“’”"》]的?第[ZZzzCCcc]?[LLllNNnn]?[9XxXx×][0-9XxXx×][12389XxXx×][\dXxXx×]{5}\.?[\dXxXx×]?号', fmt_content_n) #0开头 num_3_1 = re.findall( '[^\dLLllNNnnXxXx×]([0XxXx×][0-3XxXx×][12389XxXx×][\dXxXx×]{5}\.?[\dXxXx×]?)', fmt_content_n) num_3_2 = re.findall( '[ZZzzCCcc][LLllNNnn][0XxXx×][0-3XxXx×][12389XxXx×][\dXxXx×]{5}\.?[\dXxXx×]?', fmt_content_n) num_3_3 = re.findall( '[^\d]([ZZzzCCcc]?[LLllNNnn]?[0XxXx×][0-3XxXx×][12389XxXx×][\dXxXx×]{5}\.?[\dXxXx×]?号?、?名?称?为?[”‘“"《][^”"’“‘,;。《]+[“’”"》])', fmt_content_n) num_3_4 = re.findall( '[”‘“"《][^”"’“‘,;。《]+[“’”"》]的?第[ZZzzCCcc]?[LLllNNnn]?[0XxXx×][0-3XxXx×][12389XxXx×][\dXxXx×]{5}\.?[\dXxXx×]?号', fmt_content_n) #200x num_4_1 = re.findall( '[^\dLLllNNnnXxXx×]([2XxXx×][0XxXx×][0XxXx×][3-9XxXx×][12389XxXx×][\dXxXx×]{7}\.?[\dXxXx×]?)', fmt_content_n) num_4_2 = re.findall( '[ZZzzCCcc][LLllNNnn][2XxXx×][0XxXx×][0XxXx×][3-9XxXx×][12389XxXx×][\dXxXx×]{7}\.?[\dXxXx×]?', fmt_content_n) num_4_3 = re.findall( '[^\d]([ZZzzCCcc]?[LLllNNnn]?[2XxXx×][0XxXx×][0XxXx×][3-9XxXx×][12389XxXx×][\dXxXx×]{7}\.?[\dXxXx×]?号?、?名?称?为?[”‘“"《][^”"’“‘,;。《]+[“’”"》])', fmt_content_n) num_4_4 = re.findall( '[”‘“"《][^”"’“‘,;。《]+[“’”"》]的?第[ZZzzCCcc]?[LLllNNnn]?[2XxXx×][0XxXx×][0XxXx×][3-9XxXx×][12389XxXx×][\dXxXx×]{7}\.?[\dXxXx×]?号', fmt_content_n) #201x num_5_1 = re.findall( '[^\dLLllNNnnXxXx×]([2XxXx×][0XxXx×][1XxXx×][0-9XxXx×][12389XxXx×][\dXxXx×]{7}\.?[\dXxXx×]?)', fmt_content_n) num_5_2 = re.findall( '[ZZzzCCcc][LLllNNnn][2XxXx×][0XxXx×][1XxXx×][0-9XxXx×][12389XxXx×][\dXxXx×]{7}\.?[\dXxXx×]?', fmt_content_n) num_5_3 = re.findall( '[^\d]([ZZzzCCcc]?[LLllNNnn]?[2XxXx×][0XxXx×][1XxXx×][0-9XxXx×][12389XxXx×][\dXxXx×]{7}\.?[\dXxXx×]?号?、?名?称?为?[”‘“"《][^”"’“‘,;。《]+[“’”"》])', fmt_content_n) num_5_4 = re.findall( '[”‘“"《][^”"’“‘,;。《]+[“’”"》]的?第[ZZzzCCcc]?[LLllNNnn]?[2XxXx×][0XxXx×][1XxXx×][0-9XxXx×][12389XxXx×][\dXxXx×]{7}\.?[\dXxXx×]?号', fmt_content_n) #2020 num_6_1 = re.findall( '[^\dLLllNNnnXxXx×]([2XxXx×][0XxXx×][2XxXx×][0XxXx×][12389XxXx×][\dXxXx×]{7}\.?[\dXxXx×]?)', fmt_content_n) num_6_2 = re.findall( '[ZZzzCCcc][LLllNNnn][2XxXx×][0XxXx×][2XxXx×][0XxXx×][12389XxXx×][\dXxXx×]{7}\.?[\dXxXx×]?', fmt_content_n) num_6_3 = re.findall( '[^\d]([ZZzzCCcc]?[LLllNNnn]?[2XxXx×][0XxXx×][2XxXx×][0XxXx×][12389XxXx×][\dXxXx×]{7}\.?[\dXxXx×]?号?、?名?称?为?[”‘“"《][^”"’“‘,;。《]+[“’”"》])', fmt_content_n) num_6_4 = re.findall( '[”‘“"《][^”"’“‘,;。《]+[“’”"》]的?第[ZZzzCCcc]?[LLllNNnn]?[2XxXx×][0XxXx×][2XxXx×][0XxXx×][12389XxXx×][\dXxXx×]{7}\.?[\dXxXx×]?号', fmt_content_n) #全部匹配 num_7_1 = re.findall( '专利号?为?是?\s?:?[ZZzzCCcc]?[LLllNNnn]?[\dXxXx×]{8,12}\.?[\dXxXx×]?', fmt_content_n) num_7_2 = re.findall( '申请号?为?是?\s?:?[ZZzzCCcc]?[LLllNNnn]?[\dXxXx×]{8,12}\.?[\dXxXx×]?', fmt_content_n) num_7_3 = re.findall( '外观设计为?是?\s?:?([ZZzzCCcc]?[LLllNNnn]?[\dXxXx×]{8,12}\.?[\dXxXx×]?)', fmt_content_n) num_7_4 = re.findall( '发明为?是?\s?:?([ZZzzCCcc]?[LLllNNnn]?[\dXxXx×]{8,12}\.?[\dXxXx×]?)', fmt_content_n) num_7_5 = re.findall( '实用新型为?是?\s?:?([ZZzzCCcc]?[LLllNNnn]?[\dXxXx×]{8,12}\.?[\dXxXx×]?)', fmt_content_n) num_7_6 = re.findall( '涉案为?是?\s?:?([ZZzzCCcc]?[LLllNNnn]?[\dXxXx×]{8,12}\.?[\dXxXx×]?)', fmt_content_n) num_7_7 = re.findall( '[^a-zA-Z]([ZZzzCCcc][LLllNNnn][\dXxXx×]{8,12}\.?[\dXxXx×]?)', fmt_content_n) # num_7_1 = re.findall('[^\dLLllNNnn]([\dXxXx×]{8,12}\.?[\dXxXx×]?)', fmt_content_n) # num_7_2 = re.findall('[ZZzzCCcc][LLllNNnn]{8,12}\.?[\dXxXx×]?',fmt_content_n) # num_7_3 = re.findall('[ZZzzCCcc]?[LLllNNnn]?[\dXxXx×]{8,12}\.?[\dXxXx×]?号?、?名?称?为?[‘“"《][^”,;。《]+[’”"》]',fmt_content_n) # num_7_4 = re.findall('[‘“"《][^”,;。《]+[’”"》]的?第[ZZzzCCcc]?[LLllNNnn]?[\dXxXx×]{8,12}\.?[\dXxXx×]?号',fmt_content_n) #拼接并去重 data_num = num_1_1 + num_1_2 + num_1_3 + num_1_4 + \ num_2_1 + num_2_2 + num_2_3 + num_2_4 + \ num_3_1 + num_3_2 + num_3_3 + num_3_4 + \ num_4_1 + num_4_2 + num_4_3 + num_4_4 + \ num_5_1 + num_5_2 + num_5_3 + num_5_4 + \ num_6_1 + num_6_2 + num_6_3 + num_6_4 + \ num_7_1 + num_7_2 + num_7_3 + num_7_4 + \ num_7_5 + num_7_6 + num_7_7 if len(data_num) > 0: data_num_set = list(set(data_num)) test_num = [i for i in data_num_set if i != ''] test_str_num = ';'.join(test_num) else: test_str_num = '' return test_str_num def re_patent_name(self): fmt_content_n = self.fmt_content_n # 发明 #[”‘“"《][^”"’“‘,;。《]+[“’”"》]\w{0,2}专利 #[”‘“"《][^”"’“‘,;。《]+[“’”"》]\w{0,2}发明 #[”‘“"《][^”"’“‘,;。《]+[“’”"》]\w{0,2}实用新型 #[”‘“"《][^”"’“‘,;。《]+[“’”"》]\w{0,2}外观 #[”‘“"《][^”"’“‘,;。《]+[“’”"》]\w{0,2}申请 #名称?为?[”‘“"《][^”"’“‘,;。《]+[“’”"》] #名称?为?\w[^。]+[的、,((]\w{0,2}发明 name_1_1 = re.findall('[”‘“"《][^”"’“‘,;。《]+[“’”"》][,、]?\w{0,2}专利', fmt_content_n) name_1_2 = re.findall('[”‘“"《][^”"’“‘,;。《]+[“’”"》][,、]?\w{0,2}发明', fmt_content_n) name_1_3 = re.findall('[”‘“"《][^”"’“‘,;。《]+[“’”"》][,、]?\w{0,2}实用新型', fmt_content_n) name_1_4 = re.findall('[”‘“"《][^”"’“‘,;。《]+[“’”"》][,、]?\w{0,2}外观', fmt_content_n) name_1_5 = re.findall('[”‘“"《][^”"’“‘,;。《]+[“’”"》][,、]?\w{0,2}申请', fmt_content_n) name_1_1_1 = re.findall('[”‘“"《][^”,;。《]+[“’”"》][,、]?\w{0,2}专利', fmt_content_n) name_1_1_2 = re.findall('[”‘“"《][^”,;。《]+[“’”"》][,、]?\w{0,2}发明', fmt_content_n) name_1_1_3 = re.findall('[”‘“"《][^”,;。《]+[“’”"》][,、]?\w{0,2}实用新型', fmt_content_n) name_1_1_4 = re.findall('[”‘“"《][^”,;。《]+[“’”"》][,、]?\w{0,2}外观', fmt_content_n) name_1_1_5 = re.findall('[”‘“"《][^”,;。《]+[“’”"》][,、]?\w{0,2}申请', fmt_content_n) name_2_1 = re.findall('发明名?称?为?[”‘“"《][^”"’“‘,;。《]+[“’”"》]', fmt_content_n) name_2_2 = re.findall('专利名?称?为?[”‘“"《][^”"’“‘,;。《]+[“’”"》]', fmt_content_n) name_2_3 = re.findall('实用新型名?称?为?[”‘“"《][^”"’“‘,;。《]+[“’”"》]', fmt_content_n) name_2_4 = re.findall('外观设?计?名?称?为?[”‘“"《][^”"’“‘,;。《]+[“’”"》]', fmt_content_n) name_2_5 = re.findall('申请名?称?为?[”‘“"《][^”"’“‘,;。《]+[“’”"》]', fmt_content_n) name_2_1_1 = re.findall('发明名?称?为?[”‘“"《][^”,;。《]+[“’”"》]', fmt_content_n) name_2_2_2 = re.findall('专利名?称?为?[”‘“"《][^”,;。《]+[“’”"》]', fmt_content_n) name_2_3_3 = re.findall('实用新型名?称?为?[”‘“"《][^”,;。《]+[“’”"》]', fmt_content_n) name_2_4_4 = re.findall('外观设?计?名?称?为?[”‘“"《][^”,;。《]+[“’”"》]', fmt_content_n) name_2_5_5 = re.findall('申请名?称?为?[”‘“"《][^”,;。《]+[“’”"》]', fmt_content_n) name_3_1 = re.findall('名称为?[”‘“"《][^”"’“‘,;。《]+[“’”"》]', fmt_content_n) name_3_1_1 = re.findall('名称为?[”‘“"《][^”,;。《]+[“’”"》]', fmt_content_n) name_3_2 = re.findall('名称为?\w[^。”;"’“‘]+[的、,((]?\w{0,2}发明', fmt_content_n) name_3_3 = re.findall('名称为?\w[^。”;"’“‘]+[的、,((]?\w{0,2}专利', fmt_content_n) name_3_4 = re.findall('名称为?\w[^。”;"’“‘]+[的、,((]?\w{0,2}实用新型', fmt_content_n) name_3_5 = re.findall('名称为?\w[^。”;"’“‘]+[的、,((]?\w{0,2}外观', fmt_content_n) name_3_6 = re.findall('名称为?\w[^。”;"’“‘]+[的、,((]?\w{0,2}申请', fmt_content_n) name_4_1 = re.findall('名为\w[^。”;"’“‘]+[的、,((]?\w{0,2}发明', fmt_content_n) name_4_2 = re.findall('名为\w[^。”;"’“‘]+[的、,((]?\w{0,2}专利', fmt_content_n) name_4_3 = re.findall('名为\w[^。”;"’“‘]+[的、,((]?\w{0,2}实用新型', fmt_content_n) name_4_4 = re.findall('名为\w[^。”;"’“‘]+[的、,((]?\w{0,2}外观', fmt_content_n) name_4_5 = re.findall('名为\w[^。”;"’“‘]+[的、,((]?\w{0,2}申请', fmt_content_n) data_name = name_1_1 + name_1_2 + name_1_3 + name_1_4 + name_1_5 + \ name_1_1_1 + name_1_1_2 + name_1_1_3 + name_1_1_4 + name_1_1_5 + \ name_2_1 + name_2_2 + name_2_3 + name_2_4 + name_2_5 + \ name_2_1_1 + name_2_2_2 + name_2_3_3 + name_2_4_4 + name_2_5_5 + \ name_3_1 + name_3_1_1 + name_3_2 + name_3_3 + name_3_4 + \ name_3_5 + name_3_6 + name_4_1 + name_4_2 + name_4_3 + \ name_4_4 + name_4_5 if len(data_name) > 0: data_name_set = list(set(data_name)) test_name = [i for i in data_name_set if i != ''] test_str_name = ';'.join(test_name) else: test_str_name = '' return test_str_name def re_public_num(self): fmt_content_n = self.fmt_content_n public_num_patent = re.findall( '[CcCc][NnNn][1-3]\d{5,9}[ABUSCDY]?[1-9]?', fmt_content_n) if len(public_num_patent) > 0: data_public_patent_set = list(set(public_num_patent)) test_public_num_patent = [ i for i in data_public_patent_set if i != '' ] test_str_public_num_patent = ';'.join(test_public_num_patent) else: test_str_public_num_patent = '' return test_str_public_num_patent def _sqlexe(self): self.db.execute_sql('select count(*) from jugement_table_0807') count_bar = self.db.get_one()[0] _bar = tqdm(total=count_bar) self.db.execute_sql('select * from jugement_table_0807') while True: data = self.db.get_one() _bar.update(1) if not data: break self.all_count += 1 self.case_id = data[0] self.case_name = data[1] if self.case_name == None: self.case_name = '' self.case_reason = data[2] if self.case_reason == None: self.case_reason = '' fmt_content_clob = data[3] if fmt_content_clob == None: self.null_count += 1 continue else: self.fmt_content_n = fmt_content_clob.read() data_patent = self.com_true() if data_patent == None: self.panjue_count += 1 continue patent_num = data_patent["patent_num"] patent_name = data_patent["patent_name"] public_num = data_patent["public_num"] sql = "insert into jugement_table_0807_re1 values ('{}','{}','{}','{}','{}','{}')"\ .format(self.case_id,self.case_reason,patent_num,patent_name,public_num,self.case_name) try: self.db1.execute_sql(sql) except: with open('0807.txt', 'a', encoding='utf-8') as a: a.write(self.case_id) a.write('\n') def run(self): self._sqlexe() print('共', self.all_count, '条', '--', '专利文书数量', self.patent_count, '--', '裁定文书数量', self.caiding_count, '--', '判决文书等数量', self.panjue_count, '--', '文书为空数量', self.null_count)
# coding:utf-8 from tools.shujuku_conn.oracle_conn import OracleOperation import re db = OracleOperation() DATA = db.execute_sql("SELECT * FROM caiduanwenshu_0728_finally where clear_two is null and identify='商标号和商标名称'") data = db.get_data() num = 0 #43741 for concent in data: case_id = concent[0] try: clear_one = concent[1] identify = concent[3] clear_two = re.findall('\d{6,9}',clear_one) if len(clear_two) == 0: continue clear_two_set = list(set(clear_two)) test_clear_two = [i for i in clear_two_set if i != ''] test_str_clear_two = ';'.join(test_clear_two) sql = "update caiduanwenshu_0728_finally set clear_two='{}' where case_id='{}' and clear_one='{}' and clear_two is null and identify='{}'".format(test_str_clear_two,case_id,clear_one,identify) db.execute_sql(sql) num += 1 print('已清洗', num, '条') except Exception: num += 1 with open('log_null.txt', 'a', encoding='utf-8') as aa: aa.write(case_id) aa.write('\n')
# coding:utf-8 from tools.shujuku_conn.oracle_conn import OracleOperation import re db = OracleOperation() DATA = db.execute_sql( "SELECT case_id,fmt_content FROM shangbiao_zhengju_position where case_id = '2ede79fa6ec19b40d67b1f8a8fa594190' " ) data = db.get_data() num = 0 for concent in data: case_id = concent[0] try: find_name = '证据' fmt_content_clob = concent[1] fmt_content = fmt_content_clob.read() # 匹配证据的个数 find_count = fmt_content.count(find_name) #匹配到的第一个证据所占的位置百分比 if len(fmt_content) > 0 or find_count == None: if find_count > 0: fmt_content_len = len(fmt_content) find_name_one_position = fmt_content.find(find_name) baifenbi_position_one = int(find_name_one_position) / int( fmt_content_len) * 100 found = round(baifenbi_position_one, 2) baifenbi_position_one_y = str(found) + '%' if find_count > 1: #匹配到的第二个证据所占的位置百分比 find_name_two_position = fmt_content.find(
from tools.shujuku_conn.oracle_conn import OracleOperation import re db = OracleOperation() sql ="select case_id from jugement_table_0807_re_qxxx " db.execute_sql(sql) data = db.get_data() for datas in data: case_id = datas[0] sql1 = "select clear_two from jugement_table_0807_re_qxxx where case_id='{}' and identify='专利名称'".format(case_id) db.execute_sql(sql) clear_two = db.get_data() num = 0 for i in name:
# coding:utf-8 from tools.shujuku_conn.oracle_conn import OracleOperation import re db = OracleOperation() DATA = db.execute_sql( 'SELECT case_id,case_reason,fmt_content FROM jugement_table_0728') data = db.get_data() num = 0 for concent in data: case_id = concent[0] try: case_reason = concent[1] fmt_content_clob = concent[2] # clob fmt_content = fmt_content_clob.read() if fmt_content == None: sql = "insert into jugement_table_0728_re values ('{}','{}','{}','{}','{}','{}','{}')".format( case_id, case_reason, '', '', '', '', '') db.execute_sql(sql) num += 1 print('已筛选', num, '条') continue fmt_content_r = fmt_content.replace('&', '&').replace('×', '×').replace('“', '“').\ replace('”', '”').replace('<', '<').replace('>', '>').replace('"', '“').\ replace('…','…').replace('·', '·').replace('—', '—').replace(' ', ' ') ''' 现有技术抗辩,参考文献,附件,一般都在文章后边,不限定位置,直接.*删除 证据需要限定位置,位置还需要讨论 对比文件需要限定位置, 还有一种情况是文章中同事出现两种或两种以上