def downloadAndClassifyUrl( (url, params) ): (text, download_stats) = snowcrawl.downloadUrl( 'http://'+url ) (kept, classify_stats) = classifyText( text, params ) (edges, edge_stats) = snowcrawl.findEdges( url, text ) if kept: return (1, text, edges, edges, download_stats + classify_stats + edge_stats) else: return (0, text, [], edges, download_stats + classify_stats + edge_stats)
def myUrlProcesser( (url, params) ): (text, download_stats) = snowcrawl.downloadUrl( 'http://'+url ) (kept, classify_stats) = (0, []) (edges, edge_stats) = snowcrawl.findEdges( url, text ) if kept: return (kept, text, edges, edges, download_stats + classify_stats + edge_stats) else: return (kept, text, [], edges, download_stats + classify_stats + edge_stats)