예제 #1
0
def mention_to_entity(mention):
    """输入名称->实体
    :param mention: 输入名称
    :return: 根据名称获取歧义关系
    """
    url = f'https://api.ownthink.com/kg/ambiguous?mention={mention}'  # 知识图谱API,歧义关系
    return get(url).text
예제 #2
0
def entity_to_knowledge(entity):
    """ 实体->知识
    :param entity: 实体名
    :return: 根据实体获取实体知识
    """
    url = f'https://api.ownthink.com/kg/knowledge?entity={entity}'  # 知识图谱API,实体知识
    return get(url).text
예제 #3
0
def entity_attribute_value(entity, attribute):
    """实体&属性->属性值
    :param entity: 实体名
    :param attribute: 属性名
    :return: 根据实体、属性获取属性值
    """
    url = f'https://api.ownthink.com/kg/eav?entity={entity}&attribute={attribute}'  # 知识图谱API,属性值
    return get(url).text
예제 #4
0
def _get_start_end():
    """获取开始期号和结束期号"""
    url = 'https://datachart.500.com/ssq/history/history.shtml'
    data = get(url)
    response = data.content.decode('gbk')
    search = re.search('<input id="end" name="end" value="(.+?)" size="10" />', response)
    start, end = search.start() + 34, search.end() - 14
    return 3001, response[start:end]
예제 #5
0
 def get_title(self):
     """
     获得资料的标题和类型
     :return:
     """
     data = get(self.url).content.decode('gbk')
     types = re.findall(r'\'docType\': \'\w+\'', data)[0][12:-1]
     title = re.findall(r'\'title\': \'.*\'', data)[0][10:-1]
     return types, title, data
예제 #6
0
def math_tex(tex, file_path=None):
    """根据Tex语言生成数学公式矢量图:关于Tex语法参考:https://blog.csdn.net/qfire/article/details/81382048
    :param tex: Tex语言
    :param file_path: 保存矢量图的地址,后缀名一定是: xxx.svg
    :return: 默认返回SVG数据。有地址保存到地址,返回True
    """
    u = quote(tex)
    name = hash(tex)
    s = get(f'https://math.jianshu.com/math?formula={u}')
    data = s.text
    if not file_path:
        file_path = './' + str(name) + '.svg'
    w = open(file_path, 'w')
    w.write(data)
    w.flush()
    w.close()
    return True
예제 #7
0
 def _ppt(self, dirs, save_path, title):
     """下载带有ppt格式"""
     content_url = "https://wenku.baidu.com/browse/getbcsurl?doc_id=" + self.id + "&pn=1&rn=99999&type=ppt"
     print(content_url)
     content = get(content_url).content.decode('gbk')
     data = json.loads(content)
     start = time.time()
     for size, img in enumerate(data, 1):
         print('\r[下载进度]:%s%.2f%%' % ('>' * int(
             (size * 50 / len(data))), float(size / len(data) * 100)))
         page, zoom = img['page'], img['zoom']
         urlretrieve(zoom, filename=dirs + os.sep + str(page) + '.jpg')
     image_pdf(file_dir=dirs, pdf_address=save_path + os.sep + title)
     shutil.rmtree(dirs)
     end = time.time()
     print('\n下载成功,保存地址:', save_path + os.sep + title + '.pdf', '一共耗时:',
           end - start, '秒')
     print('删除临时文件夹成功!')
     return True
예제 #8
0
def double_data_chart(start=None, end=None):
    """爬取双色球数据,第一列数据是信息头。
    :param start:开始期号:默认是第一期时间。
    :param end:结束期号:默认是现在时间。
    :return:二维列表。
    """
    if start is None and end is None:
        return double_data_chart(*_get_start_end())
    header = ['期号', '红球1', '红球2', '红球3', '红球4', '红球5', '红球6', '篮球', '奖池',
              '一等奖注数', '一等奖奖金', '二等奖注数', '二等奖奖金', '总投注额', '开奖日期']
    ls = [header]
    url = f'https://datachart.500.com/ssq/history/newinc/history.php?start={start}&end={end}'
    data = get(url)
    response = data.content.decode('utf-8')
    for charts in re.findall(r'<tr class="t_tr1">.+?</tr>', response):
        td = re.findall('<td.*?>(.+?)</td>', charts)
        td.pop(9)
        m = map(lambda x: x if '-' in x else int(x.replace(',', '')), td[1:])
        ls.append(list(m))
    return ls