Python FeedStorage 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: feed_storage

클래스/타입: FeedStorage

hotexamples.com에서의 예제들: 2

Python FeedStorage - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 feed_storage.FeedStorage에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

FeedStorage(1)

feeds(1)

예제 #1

파일 보기

    def __init__(self,config_parser):
        # Connect to engine
        database_path  = get_from_config_parser(config_parser,'Database','path','database')
        database_debug = get_boolean_from_config_parser(config_parser,'Database','debug',False)
        dir = os.path.dirname(database_path)
        if not os.path.exists(dir):
            mkdir(dir)
        sys.stderr.write('Connecting to database at "%s"\n' % database_path)
        self._engine = create_engine('sqlite:///%s' % database_path,echo=database_debug)

        # Start session
        Session = sessionmaker(bind=self._engine)
        self._session = Session()
        # Initialize feed storage
        self._feed_storage = FeedStorage(self._engine,self._session)
        # Initialize item storage
        self._item_storage = ItemStorage(self._engine,self._session)
        # A list of subprocess.Popen processes that will be maintained
        # by the Coffer object.
        self._external_processes = []
        # File storage (data dump)
        file_storage_path = get_from_config_parser(config_parser,'FileStorage','path','datadump')
        max_block_size    = get_int_from_config_parser(config_parser,'FileStorage','max-block-size',
                                                       file_storage.DEFAULT_MAX_BLOCK_SIZE)
        bzip2_path = get_from_config_parser(config_parser,'FileStorage','bzip2-path','/usr/bin/bzip2')
        self._file_storage = FileStorage(self._external_processes,file_storage_path,
                                         max_block_size,bzip2_path)
        # Content fetcher configuration
        self._fetcher = Fetcher(config_parser)

예제 #2

파일 보기

class Coffer:
    def __init__(self,config_parser):
        # Connect to engine
        database_path  = get_from_config_parser(config_parser,'Database','path','database')
        database_debug = get_boolean_from_config_parser(config_parser,'Database','debug',False)
        dir = os.path.dirname(database_path)
        if not os.path.exists(dir):
            mkdir(dir)
        sys.stderr.write('Connecting to database at "%s"\n' % database_path)
        self._engine = create_engine('sqlite:///%s' % database_path,echo=database_debug)

        # Start session
        Session = sessionmaker(bind=self._engine)
        self._session = Session()
        # Initialize feed storage
        self._feed_storage = FeedStorage(self._engine,self._session)
        # Initialize item storage
        self._item_storage = ItemStorage(self._engine,self._session)
        # A list of subprocess.Popen processes that will be maintained
        # by the Coffer object.
        self._external_processes = []
        # File storage (data dump)
        file_storage_path = get_from_config_parser(config_parser,'FileStorage','path','datadump')
        max_block_size    = get_int_from_config_parser(config_parser,'FileStorage','max-block-size',
                                                       file_storage.DEFAULT_MAX_BLOCK_SIZE)
        bzip2_path = get_from_config_parser(config_parser,'FileStorage','bzip2-path','/usr/bin/bzip2')
        self._file_storage = FileStorage(self._external_processes,file_storage_path,
                                         max_block_size,bzip2_path)
        # Content fetcher configuration
        self._fetcher = Fetcher(config_parser)

    def clone_db_session(self):
        clone_session = sessionmaker(bind=self._engine)
        return clone_session()

    def finish(self):
        '''
        Waits for all external processes started by coffer to finish.
        '''
        sys.stderr.write('Waiting for sub-processes to finish..\n')
        for process in self._external_processes:
            process.wait()
        sys.stderr.write('  ..finished.\n\n')

    def check_processes(self):
        '''
        Checks if some of the external processes have finished and
        removes them from the external-process list if they have.
        '''
        end_i = len(self._external_processes)
        i     = 0
        while i < end_i:
            if self._external_processes[i].poll() is not None:
                del self._external_processes[i]
                end_i -= 1
            else:
                i += 1

    def run_command_shell(self):
        shell = CommandShell(self)
        shell.cmdloop()

    def get_feed_info(self,url):
        '''
        Obtain information on an RSS feed, given its URL. The
        information will be obtained directly from the URL,
        not from our database. This works for feeds regardless
        of whether they are stored in our database.
        '''
        feed_results = feedparser.parse(url)
        sys.stderr.write(str(feed_results))
        if 'title' in feed_results.feed:
            return feed_results.feed.title
        else:
            return None

    def current_items_feed(self,
                           session,
                           feed,
                           enable_ad_filter = False,
                           check_existence  = False,
                           debug_enabled    = False):
        '''
        Returns a generator for the list of current items, i.e. the
        current list of fresh items returned by all known feeds.
        @param enable_ad_filter: if True, advertisements will be filtered out
                       using the predefined regex
        @param check_existence: if True, only entries that are not already
                       stored in the items database will be returned.
        '''
        if enable_ad_filter and len(feed.ad_filters) > 0:
            exclude_pattern = re.compile(u'|'.join(feed.ad_filters))
        feed_results = feedparser.parse(feed.get_url())
        for entry in feed_results.entries:
            if 'link' not in entry.keys():
                sys.stderr.write((u'No link found in this item: "%s"\n' \
                                  % entry.title).encode('utf-8'))
                if debug_enabled:
                    sys.stderr.write('Keys:\n%s\n' % str(entry.keys()))
                continue
            if 'id' not in entry.keys():
                if debug_enabled:
                    sys.stderr.write((u'No entry id found in this item: "%s"\n' \
                                      % entry.title).encode('utf-8'))
                entry_id = entry.link
                if debug_enabled:
                    sys.stderr.write('Keys:\n%s\n' % str(entry.keys()))
                    sys.stderr.write((u'Using link [%s] instead of id.\n' \
                                      % entry_id).encode('utf-8'))
            else:
                entry_id = entry.id
            if check_existence:
                if self._item_storage.exists_in_session(session,entry_id):
                    continue
            if (not enable_ad_filter) or (len(feed.ad_filters) == 0) \
                   or (not exclude_pattern.search(entry.title)):
                yield (feed.get_id(),entry_id,entry)

    def current_items_in_session(self,
                                 session,
                                 enable_ad_filter = False,
                                 check_existence  = False,
                                 debug_enabled    = False):
        '''
        Returns a generator for the list of current items, i.e. the
        current list of fresh items returned by all known feeds.
        @param enable_ad_filter: if True, advertisements will be filtered out
                       using the predefined regex
        @param check_existence: if True, only entries that are not already
                       stored in the items database will be returned.
        '''
        for feed in self._feed_storage.feeds():
            for item in self.current_items_feed(session,feed,enable_ad_filter,check_existence,debug_enabled):
                yield item

    def current_items(self,
                      enable_ad_filter = False,
                      check_existence  = False,
                      debug_enabled    = False):
        '''
        Returns a generator for the list of current items, i.e. the
        current list of fresh items returned by all known feeds.
        @param enable_ad_filter: if True, advertisements will be filtered out
                       using the predefined regex
        @param check_existence: if True, only entries that are not already
                       stored in the items database will be returned.
        '''
        for feed in self._feed_storage.feeds():
            for item in self.current_items_feed(self._session,feed,enable_ad_filter,check_existence,debug_enabled):
                yield item

    def fetch_and_store(self,targets):
        '''
        Download target URLs and store them in the file storage.
        @param targets: A list of (feed-id,URL) pairs.
        '''
        text_objs_dict = self._fetcher.fetch(targets)
        self._file_storage.store_all(text_objs_dict)