def parse_urls(cls, urlList): parser = cls() for url in urlList: content = webutils.clean_html(webutils.read_url(url, "utf-8")) parser.feed(content) parser.close() return parser.get_happenings()
def parse_urls(cls, urlList): parser = cls() for url in urlList: print("content?") content = webutils.clean_html(webutils.read_url(url, "iso-8859-2")) print("before feed ", url) print("content ", content) parser.feed(content) parser.close() return parser.get_happenings()
def parse_urls(cls, urlList): parser = cls() for url in urlList: # try: content = webutils.clean_html(webutils.read_url(url, "utf-8")) print("before feed ", url) parser.feed(content) parser.fit_url(url) parser.close() # except Exception as e: # print("error ", e) return parser.get_happenings()
def obtain_happening_details(cls, url): if not url: return content = webutils.clean_html(webutils.read_url(url, )) happening_parser = cls() happening_parser.feed(content) happening_parser.close() happening = {} try: happening = happening_parser.get_happening() except: print("Błąd parsowania " + url) return happening
def obtain_happening_details(cls, url, counter): counter += 1 if not url: return content = webutils.clean_html(webutils.read_url(url, )) happening_parser = cls() happening_parser.feed_parsers(happening_parser,content) urls = happening_parser.extract_nexts(content) # for url in urls: # happening = {} # happening = {'source_url': result.get('url')} # if happening not in urls: # urls.append(happening) if counter < 3: happening_parser.happenings.extract(happening_parser.obtain_happenings_details(urls)) try: happening = happening_parser.get_happening() except : print("Błąd parsowania "+url ) return happening