def read_feeds(self): """read the feeds and their latest update time""" if not os.path.exists(FEED_LIST_FILE): log.warn("feeds.list not found") return ([], None) histories = {} if os.path.exists(HISTORY_FILE): for parts in line_token_iterator(HISTORY_FILE): histories[parts[0]] = float(parts[1]) feeds = [] for parts in line_token_iterator(FEED_LIST_FILE, support_comment=True): total_parts = len(parts) url = parts[0] filter = total_parts > 1 and parts[1] or '.*' try: last_update_time = histories[url] except KeyError, e: last_update_time = None pass if last_update_time == None and total_parts > 2: try: tmp_time_struct = datetime.strptime(parts[2], INTERNAL_DATE_FORMAT).timetuple() last_update_time = time.mktime(tmp_time_struct) except ValueError, e: log.error('invalid date: %s' % parts[2], e) pass
def line_token_iterator(file, separator=None, support_comment=False, comment_char='#'): """convenient file iterator that could separate the lines with given separator and ignore comments""" try: for line in open(file): if support_comment: com_pos = line.find(comment_char) if com_pos >=0: line = line[:com_pos] parts = line.split(separator) if len(parts) > 0: yield tuple(parts) except IOError, e: log.error('error reading file %s' % file, e)