def import_from_xls(file):
    rows = excel_table_byindex(file)
    for row in rows:
        patent_data = []
        cnt = 100
        while (cnt > 0):
            cnt -= 1
            patent_data.append(0)
        for key in row:
            print key, row[key]
            if len(row[key]) > 0:
                patent_data[dic[key]] = row[key]
        #record = excute_record()
        temp_patent = patent(  # 对应的执行记录
            #record=patent_data[1],
            # 申请号
            apply_number=patent_data[2],
            #名称
            name=patent_data[3],
            #主分类号
            main_classify_code=patent_data[4],
            #分类号
            classify_code=patent_data[5],
            #申请(专利权)人
            apply_man=patent_data[6],
            #发明(设计)人
            invente_man=patent_data[7],
            #公开(公告)日
            publicity_date=patent_data[8],
            #公开(公告)号
            publicity_code=patent_data[9],
            # 专利代理机构
            patent_agent=patent_data[10],
            # 代理人
            agent=patent_data[11],
            # 申请日
            aplly_date=patent_data[12],
            # 地址
            address=patent_data[13],
            # 优先权
            priority=patent_data[14],
            # 国省代码
            province_code=patent_data[15],
            # 摘要
            abstract=patent_data[16],
            # 主权项
            main_right=patent_data[17],
            # 国际申请
            international_apply=patent_data[18],
            # 国际公布
            international_publicity=patent_data[19],
            # 进入国家日期
            enter_country_date=patent_data[20],
            right_demand=patent_data[22],
            valid_state=patent_data[23],
            state_code=patent_data[24])
        temp_patent.save()
def import_from_xls(file):
    rows = excel_table_byindex(file)
    for row in rows:
        patent_data = []
        cnt =100
        while (cnt>0):
            cnt-=1
            patent_data.append(0)
        for key in row:
            print key, row[key]
            if len(row[key])>0:
                patent_data[dic[key]] = row[key]
        #record = excute_record()
        temp_patent = patent(  # 对应的执行记录
                               #record=patent_data[1],
                               # 申请号
                               apply_number=patent_data[2],
                               #名称
                               name=patent_data[3],
                               #主分类号
                               main_classify_code=patent_data[4],
                               #分类号
                               classify_code=patent_data[5],
                               #申请(专利权)人
                               apply_man=patent_data[6],
                               #发明(设计)人
                               invente_man=patent_data[7],
                               #公开(公告)日
                               publicity_date=patent_data[8],
                               #公开(公告)号
                               publicity_code=patent_data[9],
                               # 专利代理机构
                               patent_agent=patent_data[10],
                               # 代理人
                               agent=patent_data[11],
                               # 申请日
                               aplly_date=patent_data[12],
                               # 地址
                               address=patent_data[13],
                               # 优先权
                               priority=patent_data[14],
                               # 国省代码
                               province_code=patent_data[15],
                               # 摘要
                               abstract=patent_data[16],
                               # 主权项
                               main_right=patent_data[17],
                               # 国际申请
                               international_apply=patent_data[18],
                               # 国际公布
                               international_publicity=patent_data[19],
                               # 进入国家日期
                               enter_country_date=patent_data[20],
                               right_demand = patent_data[22],
                               valid_state=patent_data[23],
                               state_code=patent_data[24])
        temp_patent.save()
Ejemplo n.º 3
0
def scrap(start_day=None, end_day=None, start=1, end=20):
	logger.clear()
	logger.begin(start_day, end_day, start)
	logger.log("Try to get expressions...", flush=True)
	if end != None:
		expressions = expression.objects.filter(id__range=(start, end)).order_by('id')
	else:
		expressions = expression.objects.filter(id__range=(start, 3000)).order_by('id')

	s = spider()
	logger.log("Try to login...", flush=True)
	browser = s.login()
	cnt = 0
	file_path = ''
	for item in expressions:
		cnt += 1
		logger.log(u"第" + str(item.id) + u"个表达式:" + item.name, count=item.id, flush=True)
		#验证是否登录
		check_login = s.check_login(browser)
		if not json.loads(check_login)['success']:
			logger.log('check is not login , sleep 100s ,then try login again')
			time.sleep(100)
			browser = s.login()

		file_path = s.get_xls_by_expression(item.content, browser, start_day, end_day)
		if file_path != None:
			file_path = os.path.normpath(file_path)
			#logger.log(file_path)
			rows = excel_table_byindex(file_path, include_name=False)
			#删除文件
			os.remove(file_path)
			for row in rows:
				# 忽略第一行
				if row == rows[0]:
					continue
				apply_num = row[0]
				# 查重
				p = patent.objects.filter(apply_number=apply_num)
				if len(p) > 0:
					logger.log("{0} update!".format(apply_num))
					p = p[0]
					records = excute_record.objects.filter(expression=item, time_stamp=row[6])
					if len(records) > 0:
						record = records[0]
					else:
						record = excute_record(expression=item, time_stamp=row[6])
						record.save()
					p.record = record
					p.apply_number = row[0]
					p.name = row[1]
					p.main_classify_code = row[2]
					p.classify_code = row[3]
					p.apply_man = row[4]
					p.invente_man = row[5]
					p.publicity_date = row[6]
					p.publicity_code = row[7]
					p.patent_agent = row[8]
					p.agent = row[9]
					p.aplly_date = row[10]
					p.address = row[11]
					p.priority = row[12]
					p.province_code = row[13]
					p.abstract = row[14]
					p.main_right = row[15]
					p.international_apply = row[16]
					p.international_publicity = row[17]
					p.enter_country_date = row[18]
					p.right_demand = row[20]
					p.valid_state = row[21]
					p.state_code = row[22]
					p.type = row[23]
					p.save()
					continue
				logger.log(apply_num)
				#插入纪录
				records = excute_record.objects.filter(expression=item, time_stamp=row[6])  # row[6]==public data #
				if len(records) > 0:
					# logger.log("record already exist !")
					record = records[0]
				else:
					record = excute_record(expression=item, time_stamp=row[6])  # row[6]==public data #
					record.save()
				p = patent(  # 对应的执行记录
				             record=record,

				             # 申请号
				             apply_number=(row[0]),

				             # 名称
				             name=(row[1]),

				             # 主分类号
				             main_classify_code=row[2],

				             #分类号
				             classify_code=row[3],

				             #申请(专利权)人
				             apply_man=row[4],

				             #发明(设计)人
				             invente_man=row[5],

				             #公开(公告)日
				             publicity_date=(row[6]),

				             #公开(公告)号
				             publicity_code=row[7],

				             # 专利代理机构
				             patent_agent=row[8],

				             # 代理人
				             agent=row[9],
				             # 申请日
				             aplly_date=row[10],

				             # 地址
				             address=row[11],

				             # 优先权
				             priority=row[12],

				             # 国省代码
				             province_code=row[13],

				             # 摘要
				             abstract=row[14],

				             # 主权项
				             main_right=row[15],

				             # 国际申请
				             international_apply=row[16],

				             # 国际公布
				             international_publicity=row[17],

				             # 进入国家日期
				             enter_country_date=row[18],
				             # 权利要求书
				             right_demand=row[20],
				             # 法律状态
				             valid_state=row[21],
				             # 专利状态代码
				             state_code=row[22],
				             # 专利类型
				             type=row[23]
				             )
				try:
					p.save()
				except Exception, e:
					logger.log(str(e), flush=True)
					logger.log('failed to save patent!',flush=True)
Ejemplo n.º 4
0
def begin():
	file_path = './test.xls'
	rows = excel_table_byindex(file_path, include_name=False)

	for row in rows:
		# 忽略第一行
		if row == rows[0]:
			continue
		apply_num = row[0]
		# 查重
		p = patent.objects.filter(apply_number=apply_num)
		if len(p) > 0:
			print("{0} update!".format(apply_num))
			p = p[0]
			records = excute_record.objects.filter(expression=item, time_stamp=row[6])
			if len(records) > 0:
				record = records[0]
			else:
				record = excute_record(expression=item, time_stamp=row[6])
				record.save()
			p.record = record
			p.apply_number = row[0]
			p.name = row[1]
			p.main_classify_code = row[2]
			p.classify_code = row[3]
			p.apply_man = row[4]
			p.invente_man = row[5]
			p.publicity_date = row[6]
			p.publicity_code = row[7]
			p.patent_agent = row[8]
			p.agent = row[9]
			p.aplly_date = row[10]
			p.address = row[11]
			p.priority = row[12]
			p.province_code = row[13]
			p.abstract = row[14]
			p.main_right = row[15]
			p.international_apply = row[16]
			p.international_publicity = row[17]
			p.enter_country_date = row[18]
			p.right_demand = row[20]
			p.valid_state = row[21]
			p.state_code = row[22]
			p.type = row[23]
			p.save()
			continue
		logger.log(apply_num)
		#插入纪录
		records = excute_record.objects.filter(expression=item, time_stamp=row[6])  # row[6]==public data #
		if len(records) > 0:
			# logger.log("record already exist !")
			record = records[0]
		else:
			record = excute_record(expression=item, time_stamp=row[6])  # row[6]==public data #
			record.save()
		p = patent(  # 对应的执行记录
		             record=record,

		             # 申请号
		             apply_number=(row[0]),

		             # 名称
		             name=(row[1]),

		             # 主分类号
		             main_classify_code=row[2],

		             #分类号
		             classify_code=row[3],

		             #申请(专利权)人
		             apply_man=row[4],

		             #发明(设计)人
		             invente_man=row[5],

		             #公开(公告)日
		             publicity_date=(row[6]),

		             #公开(公告)号
		             publicity_code=row[7],

		             # 专利代理机构
		             patent_agent=row[8],

		             # 代理人
		             agent=row[9],
		             # 申请日
		             aplly_date=row[10],

		             # 地址
		             address=row[11],

		             # 优先权
		             priority=row[12],

		             # 国省代码
		             province_code=row[13],

		             # 摘要
		             abstract=row[14],

		             # 主权项
		             main_right=row[15],

		             # 国际申请
		             international_apply=row[16],

		             # 国际公布
		             international_publicity=row[17],

		             # 进入国家日期
		             enter_country_date=row[18],
		             # 权利要求书
		             right_demand=row[20],
		             # 法律状态
		             valid_state=row[21],
		             # 专利状态代码
		             state_code=row[22],
		             # 专利类型
		             type=row[23]
		             )
		try:
			p.save()
		except Exception, e:
			logger.log(str(e), flush=True)
			logger.log('failed to save patent!',flush=True)
Ejemplo n.º 5
0
def scrap(start_day=None, end_day=None, start=1, end=20):
    logger.clear()
    logger.begin(start_day, end_day, start)
    logger.log("Try to get expressions...", flush=True)
    if end != None:
        expressions = expression.objects.filter(id__range=(start,
                                                           end)).order_by('id')
    else:
        expressions = expression.objects.filter(
            id__range=(start, 3000)).order_by('id')

    s = spider()
    logger.log("Try to login...", flush=True)
    browser = s.login()
    cnt = 0
    file_path = ''
    for item in expressions:
        cnt += 1
        logger.log(u"第" + str(item.id) + u"个表达式:" + item.name,
                   count=item.id,
                   flush=True)
        #验证是否登录
        check_login = s.check_login(browser)
        if not json.loads(check_login)['success']:
            logger.log('check is not login , sleep 100s ,then try login again')
            time.sleep(100)
            browser = s.login()

        file_path = s.get_xls_by_expression(item.content, browser, start_day,
                                            end_day)
        if file_path != None:
            file_path = os.path.normpath(file_path)
            #logger.log(file_path)
            rows = excel_table_byindex(file_path, include_name=False)
            #删除文件
            os.remove(file_path)
            for row in rows:
                # 忽略第一行
                if row == rows[0]:
                    continue
                apply_num = row[0]
                # 查重
                p = patent.objects.filter(apply_number=apply_num)
                if len(p) > 0:
                    logger.log("{0} update!".format(apply_num))
                    p = p[0]
                    records = excute_record.objects.filter(expression=item,
                                                           time_stamp=row[6])
                    if len(records) > 0:
                        record = records[0]
                    else:
                        record = excute_record(expression=item,
                                               time_stamp=row[6])
                        record.save()
                    p.record = record
                    p.apply_number = row[0]
                    p.name = row[1]
                    p.main_classify_code = row[2]
                    p.classify_code = row[3]
                    p.apply_man = row[4]
                    p.invente_man = row[5]
                    p.publicity_date = row[6]
                    p.publicity_code = row[7]
                    p.patent_agent = row[8]
                    p.agent = row[9]
                    p.aplly_date = row[10]
                    p.address = row[11]
                    p.priority = row[12]
                    p.province_code = row[13]
                    p.abstract = row[14]
                    p.main_right = row[15]
                    p.international_apply = row[16]
                    p.international_publicity = row[17]
                    p.enter_country_date = row[18]
                    p.right_demand = row[20]
                    p.valid_state = row[21]
                    p.state_code = row[22]
                    p.type = row[23]
                    p.save()
                    continue
                logger.log(apply_num)
                #插入纪录
                records = excute_record.objects.filter(
                    expression=item,
                    time_stamp=row[6])  # row[6]==public data #
                if len(records) > 0:
                    # logger.log("record already exist !")
                    record = records[0]
                else:
                    record = excute_record(
                        expression=item,
                        time_stamp=row[6])  # row[6]==public data #
                    record.save()
                p = patent(  # 对应的执行记录
                    record=record,

                    # 申请号
                    apply_number=(row[0]),

                    # 名称
                    name=(row[1]),

                    # 主分类号
                    main_classify_code=row[2],

                    #分类号
                    classify_code=row[3],

                    #申请(专利权)人
                    apply_man=row[4],

                    #发明(设计)人
                    invente_man=row[5],

                    #公开(公告)日
                    publicity_date=(row[6]),

                    #公开(公告)号
                    publicity_code=row[7],

                    # 专利代理机构
                    patent_agent=row[8],

                    # 代理人
                    agent=row[9],
                    # 申请日
                    aplly_date=row[10],

                    # 地址
                    address=row[11],

                    # 优先权
                    priority=row[12],

                    # 国省代码
                    province_code=row[13],

                    # 摘要
                    abstract=row[14],

                    # 主权项
                    main_right=row[15],

                    # 国际申请
                    international_apply=row[16],

                    # 国际公布
                    international_publicity=row[17],

                    # 进入国家日期
                    enter_country_date=row[18],
                    # 权利要求书
                    right_demand=row[20],
                    # 法律状态
                    valid_state=row[21],
                    # 专利状态代码
                    state_code=row[22],
                    # 专利类型
                    type=row[23])
                try:
                    p.save()
                except Exception, e:
                    logger.log(str(e), flush=True)
                    logger.log('failed to save patent!', flush=True)