コード例 #1
0
def down(url: str):
    # 获取网页
    r = HTMLSession().get(url).html

    # 标题
    top = r.find('.bookname h1')[0].text
    top = top.split()
    if len(top) > 1:
        index = top[1].find('(')
        if index != -1:
            top = top[0] + ' ' + top[1][:index]
        else:
            top = ' '.join(top)
    else:
        top = top[0]

    # 获取内容
    text = r.find('#content')[0].text

    # 写入文件
    txtTop = saveDir + top + '.txt'
    try:
        if os.path.exists(txtTop):
            print(top + ' 文件已存在!')
        else:
            with open(txtTop, 'w') as f:
                f.write(text)
                print(top + ' 写入成功!')
    except:
        print('文件写入错误!')
コード例 #2
0
def retrieveCWEURLFromCVE(cve_full_url=None, cve_id=None):
    """
    Please specify either cve_full_url OR cve_id
    Priority non-null arg: cve_full_url

    Examples:
    cve_full_url=https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2015-7589
    OR
    cve_id=CVE-2015-7589
    """

    if (not cve_full_url is None) and (
            not cve_full_url.startswith('https://cve.mitre.org/')):
        raise ValueError('Invalid CVE url')

    if (cve_full_url is None) and (cve_id is None):
        return None

    url = ''

    if not (cve_full_url is None):
        url = cve_full_url
        cve_regex_test = re.search("CVE-[0-9]+-[0-9]+", url)
        if cve_regex_test:
            cve_id = cve_regex_test.group()
        else:
            cve_id = url
    else:
        url = CVE_BASE_PATH + cve_id

    print("[retrieveCWEURLFromCVE] Parsing CVE: " + cve_id + ", from url: " +
          url)

    cve_response = HTMLSession().get(url).html
    cve_url = cve_response.find(
        "div#GeneratedTable .ltgreybackground .larger a",
        first=True).attrs['href']
    cve_response = session.get(cve_url).html

    if not cve_response.find("div#vulnTechnicalDetailsDiv td a",
                             first=True) is None:
        cwe_url = cve_response.find("div#vulnTechnicalDetailsDiv td a",
                                    first=True).attrs['href']
    else:
        cwe_url = ''

    if PRINT_DEBUG is True:
        print("[retrieveCWEURLFromCVE] CWE Url of " + cve_id + ": " + cwe_url)

    return cwe_url
コード例 #3
0
ファイル: classes.py プロジェクト: markgyalus/444crawl
    def get_full_article(self):
        try:
            r = HTMLSession().get(url=self.url)
            r = r.html.find('article', first=True)
            to_return = ''

            for p in r.find('p'):
                to_return = to_return + p.text + '\n'

            return to_return

        except Exception as e:
            article_logger.error(f'Probléma a cikk kinyerésekor:\n{e}')
            return None
コード例 #4
0
def _parse_data_lists(html: HTMLSession):
    finn_codes = []
    data_lists = html.find('article')
    for el in data_lists:
        finn_codes.append(el.find('a')[0].attrs["href"].split("=")[-1])
    return finn_codes