def crawDailyMetalComments(link, webNet): startContext = SYMetalNetSpiderUtils.returnStartContext(link, '<tbody>') currentList = [] i = 0 while i < 8: targetContext = SYMetalNetSpiderUtils.divisionTarget( startContext, '<tr>', '</tr>') startContext = targetContext['nextContext'] currentContext = targetContext['targetContext'] time = SYMetalNetSpiderUtils.filterContextByTarget( currentContext, '<td>', '</td>') nextContext = SYMetalNetSpiderUtils.filterAfterContext( currentContext, '</td>') pubDate = SYMetalNetSpiderUtils.filterContextByTarget( nextContext, '<td>', '</td>') pubDate = SYMetalNetSpiderUtils.removeSpecialCharacter(pubDate) nextContext = SYMetalNetSpiderUtils.filterAfterContext( nextContext, '</td>') linkUrl = webNet + SYMetalNetSpiderUtils.filterContextByTarget( nextContext, '<a href="', '">') title = SYMetalNetSpiderUtils.filterContextByTarget( nextContext, '">', '</a>') title = SYMetalNetSpiderUtils.removeSpecialCharacter(title) descriptContext = crawDescriptContext(linkUrl) currentList.append([ str(uuid.uuid1()), linkUrl, title, pubDate, descriptContext, 'METAL', 'SYNET' ]) i += 1 return currentList
def crawDailyMetalComments(link, webNet): startContext = SYMetalNetSpiderUtils.returnStartContext(link, "<tbody>") currentList = [] i = 0 while i < 8: targetContext = SYMetalNetSpiderUtils.divisionTarget(startContext, "<tr>", "</tr>") startContext = targetContext["nextContext"] currentContext = targetContext["targetContext"] time = SYMetalNetSpiderUtils.filterContextByTarget(currentContext, "<td>", "</td>") nextContext = SYMetalNetSpiderUtils.filterAfterContext(currentContext, "</td>") pubDate = SYMetalNetSpiderUtils.filterContextByTarget(nextContext, "<td>", "</td>") pubDate = SYMetalNetSpiderUtils.removeSpecialCharacter(pubDate) nextContext = SYMetalNetSpiderUtils.filterAfterContext(nextContext, "</td>") linkUrl = webNet + SYMetalNetSpiderUtils.filterContextByTarget(nextContext, '<a href="', '">') title = SYMetalNetSpiderUtils.filterContextByTarget(nextContext, '">', "</a>") title = SYMetalNetSpiderUtils.removeSpecialCharacter(title) descriptContext = crawDescriptContext(linkUrl) currentList.append([str(uuid.uuid1()), linkUrl, title, pubDate, descriptContext, "METAL", "SYNET"]) i += 1 return currentList