def mention_to_entity(mention): """输入名称->实体 :param mention: 输入名称 :return: 根据名称获取歧义关系 """ url = f'https://api.ownthink.com/kg/ambiguous?mention={mention}' # 知识图谱API,歧义关系 return get(url).text
def entity_to_knowledge(entity): """ 实体->知识 :param entity: 实体名 :return: 根据实体获取实体知识 """ url = f'https://api.ownthink.com/kg/knowledge?entity={entity}' # 知识图谱API,实体知识 return get(url).text
def entity_attribute_value(entity, attribute): """实体&属性->属性值 :param entity: 实体名 :param attribute: 属性名 :return: 根据实体、属性获取属性值 """ url = f'https://api.ownthink.com/kg/eav?entity={entity}&attribute={attribute}' # 知识图谱API,属性值 return get(url).text
def _get_start_end(): """获取开始期号和结束期号""" url = 'https://datachart.500.com/ssq/history/history.shtml' data = get(url) response = data.content.decode('gbk') search = re.search('<input id="end" name="end" value="(.+?)" size="10" />', response) start, end = search.start() + 34, search.end() - 14 return 3001, response[start:end]
def get_title(self): """ 获得资料的标题和类型 :return: """ data = get(self.url).content.decode('gbk') types = re.findall(r'\'docType\': \'\w+\'', data)[0][12:-1] title = re.findall(r'\'title\': \'.*\'', data)[0][10:-1] return types, title, data
def math_tex(tex, file_path=None): """根据Tex语言生成数学公式矢量图:关于Tex语法参考:https://blog.csdn.net/qfire/article/details/81382048 :param tex: Tex语言 :param file_path: 保存矢量图的地址,后缀名一定是: xxx.svg :return: 默认返回SVG数据。有地址保存到地址,返回True """ u = quote(tex) name = hash(tex) s = get(f'https://math.jianshu.com/math?formula={u}') data = s.text if not file_path: file_path = './' + str(name) + '.svg' w = open(file_path, 'w') w.write(data) w.flush() w.close() return True
def _ppt(self, dirs, save_path, title): """下载带有ppt格式""" content_url = "https://wenku.baidu.com/browse/getbcsurl?doc_id=" + self.id + "&pn=1&rn=99999&type=ppt" print(content_url) content = get(content_url).content.decode('gbk') data = json.loads(content) start = time.time() for size, img in enumerate(data, 1): print('\r[下载进度]:%s%.2f%%' % ('>' * int( (size * 50 / len(data))), float(size / len(data) * 100))) page, zoom = img['page'], img['zoom'] urlretrieve(zoom, filename=dirs + os.sep + str(page) + '.jpg') image_pdf(file_dir=dirs, pdf_address=save_path + os.sep + title) shutil.rmtree(dirs) end = time.time() print('\n下载成功,保存地址:', save_path + os.sep + title + '.pdf', '一共耗时:', end - start, '秒') print('删除临时文件夹成功!') return True
def double_data_chart(start=None, end=None): """爬取双色球数据,第一列数据是信息头。 :param start:开始期号:默认是第一期时间。 :param end:结束期号:默认是现在时间。 :return:二维列表。 """ if start is None and end is None: return double_data_chart(*_get_start_end()) header = ['期号', '红球1', '红球2', '红球3', '红球4', '红球5', '红球6', '篮球', '奖池', '一等奖注数', '一等奖奖金', '二等奖注数', '二等奖奖金', '总投注额', '开奖日期'] ls = [header] url = f'https://datachart.500.com/ssq/history/newinc/history.php?start={start}&end={end}' data = get(url) response = data.content.decode('utf-8') for charts in re.findall(r'<tr class="t_tr1">.+?</tr>', response): td = re.findall('<td.*?>(.+?)</td>', charts) td.pop(9) m = map(lambda x: x if '-' in x else int(x.replace(',', '')), td[1:]) ls.append(list(m)) return ls