예제 #1
0
def parse(publishedFormat, titleElement, mainElement):
    if titleElement.sourceline >= mainElement.sourceline:
        return None
    publishedResult = None
    element = lxmlutil.getFullNext(titleElement)
    while element.sourceline < mainElement.sourceline:
        publishedResult = _getPublishedInside(publishedFormat, element)
        if publishedResult:
            break
        element = lxmlutil.getFullNext(element)
    return publishedResult
예제 #2
0
def _getNextText(element):
    while element is not None:
        if lxmlutil.isVisibleElement(element) and element.text:
            return element.text.strip()
        elif element.tail:
            return element.tail.strip()
        element = lxmlutil.getFullNext(element)
    return None
예제 #3
0
def parse(url, contentElement, titleElement, mainElement):
    startLine = titleElement.sourceline
    mainNext = lxmlutil.getFullNext(mainElement)
    if mainNext is None:
        endLine = -1
    else:
        endLine = mainNext.sourceline
    items = pyquery.PyQuery(contentElement)('img')
    result = []
    for item in items:
        if item.sourceline < startLine:
            continue
        if endLine > 0 and item.sourceline > endLine:
            break
        img = _parseImg(url, item)
        if img:
            result.append(img)

    return result