Python findMatchedTexts Examples

Programming Language: Python

Namespace/Package Name: libs.patternMatcher

Method/Function: findMatchedTexts

Examples at hotexamples.com: 3

Python findMatchedTexts - 3 examples found. These are the top rated real world Python examples of libs.patternMatcher.findMatchedTexts extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: dailySamCrawler.py Project: ydj515/python_crawler

def parse(pageString):
    result = {}
    bsObj = BeautifulSoup(pageString, "html.parser")
    # print(bsObj)

    qtDayText2 = bsObj.find("div", {"id": "qtDay"})
    try:
        extDat = findMatchedTexts(qtDayText2.text, "201[\s\S]+")
        ss = extDat[0].split("\r\n        ")
        ss2 = "{} {}".format(ss[0].replace("\n", ". "), ss[1])
        result['date'] = ss2
        res = findMatchedTexts(qtDayText2.text, "\(.+\)")
        result['addr'] = getAddr(res[0])
    except Exception as e:
        print(e)

    box2Content = bsObj.find("div", {"class": "box2Content"})
    result['box2Content'] = box2Content.text

    # result['srcipt'] = script.text

    content = bsObj.find("div", {"id": "content"})
    ps = content.findAll("p")
    result['content'] = ps[4].text

    bx2 = bsObj.find("div", {"class": "bx2"})

    guideText = bx2.text
    result['bx2'] = addLine(guideText)

    return result

Example #2

Show file

def get_row(tr):
    tds = tr.find_all('td')
    atag = str(tds[0].find('a')).split('<span class="tit_info">')

    first = ''
    try:
        first = re.compile('\t.*\t').sub('', atag[0]).split('\n')[1]
        first = first.replace('R&amp;amp;amp;amp;D ', '')
    except:
        print('----------')

    second = ''
    try:
        second = atag[1].split('</span>')[0]
        second = second.replace('R&amp;amp;D ', '')
    except:
        print('---------')
    # print(tds[1], tds[2], tds[3])

    api_id = ''
    try:
        id_a = tds[0].find('h4').find('a')['href']
        api_id = findMatchedTexts(id_a, "javascript:view\('[0-9]+")[0]
        api_id = api_id.replace("javascript:view('", "")
    except Exception as e:
        print('----api id exception -----')

    service_types = []
    try:
        service_types_spans = tds[5].find('div', {
            'class': 'datatype'
        }).find_all('span')
        service_types = [span.text for span in service_types_spans]

    except Exception as e:
        print('----- serivce types exception -------')

    return {
        'api_id': api_id,
        'title': first,
        'subtitle': second,
        'count': tds[3].text,
        'service_types': service_types
    }

Example #3

Show file

File: network_tab.py Project: hiswordsini/Python_Web_Crawling

from libs.crawler import crawl
from bs4 import BeautifulSoup
from libs.patternMatcher import findMatchedTexts

url = "http://dart.fss.or.kr/corp/searchAutoComplete.do?textCrpNm=%EC%85%80%ED%8A%B8%EB%A6%AC%EC%98%A8&_=1561171426973"

pageString = crawl(url)

bsObj = BeautifulSoup(pageString, "html.parser")

names = findMatchedTexts(bsObj.text, "셀트리온[가-힣0-9a-zA-z]*")

print(names)

for name in names:
    print(name)