links_descs.append(html.unescape(a.get_text().strip())) res_dict['news_related_url'] = links res_dict['news_related_url_desc'] = links_descs content = '\n'.join(temp_content).strip() if content: res_dict['news'] = html.unescape(content) if not res_dict or 'news' not in res_dict: content_parser.logger.error( 'Ettoday url: {} did not process properly'.format(url)) content_parser.errors['process_empty_content_(rss_id)'].append( [rss_id, url]) return return res_dict content_parser = ContentParser('ETtoday') # Query the data with source name unprocessed_data = content_parser.content_query() content_parser.content_processor(unprocessed_data, ettoday_content_processor) if content_parser.errors: content_parser.sent_error_email() content_parser.encoding_cursor.close() content_parser.mydb.close() content_parser.logger.info( "Processed Ettoday {} examples in {} seconds".format( len(unprocessed_data), time.time() - start))
else: prefix = '' content = prefix + '\n'.join(temp_content) #.replace('。 ', '。\n') res_dict['news'] = html.unescape(content) return res_dict else: content_parser_1.logger.error( 'Yahoo url: {} did not process properly'.format(url)) content_parser.errors['process_empty_content_(rss_id)'].append( [rss_id, url]) return start = time.time() content_parser_1 = ContentParser('Yahoo Source 1') unprocessed_data_1 = content_parser_1.content_query() content_parser_1.content_processor(unprocessed_data_1, yahoo_content_processor) if content_parser_1.errors: content_parser_1.sent_error_email() content_parser_1.encoding_cursor.close() content_parser_1.mydb.close() content_parser_1.logger.info( "Processed Yahoo Source 1 {} examples in {} seconds".format( len(unprocessed_data_1), time.time() - start)) start = time.time() content_parser_2 = ContentParser('Yahoo奇摩新聞') unprocessed_data_2 = content_parser_2.content_query() content_parser_2.content_processor(unprocessed_data_2, yahoo_content_processor) if content_parser_2.errors: