コード例 #1
0
ファイル: AppDelegate.py プロジェクト: gdelfresno/Notipy
    def checkItems_(self, sender):

        NSLog("Checking items")
        if self.user is None or self.password is None:
            return
        
        #Retrieve all items
        ca = ClientAuthMethod(self.user,self.password)
        reader = GoogleReader(ca)
        reader.makeSpecialFeeds()
        specials = reader.getSpecialFeed(ReaderUrl.READING_LIST)
        specials.loadItems()
        
        if self.lastId is None:
            item = specials.items[0]

            self.lastId = item.id
            self.notifyNewItem(item)
        else:
            if len(specials.items) > 0:
                lastItem = specials.items[0].id
                for item in specials.items:
                    if item.id == self.lastId:
                        break
                    self.notifyNewItem(item)
                self.lastId = lastItem
        NSLog("Finished Checking items")
コード例 #2
0
    def test_reading_list_exists(self):
        ca = ClientAuthMethod(username,password)
        reader = GoogleReader(ca)
        reader.makeSpecialFeeds()
        feeds = reader.getFeedContent(reader.getSpecialFeed(ReaderUrl.READING_LIST))

        self.assertEqual(dict, type(feeds))

        list_match = re.search('reading list in Google Reader', feeds['title'])
        self.assertTrue(list_match)
コード例 #3
0
    def test_reading_list_exists(self):
        ca = ClientAuthMethod(username, password)
        reader = GoogleReader(ca)
        reader.makeSpecialFeeds()
        feeds = reader.getFeedContent(
            reader.getSpecialFeed(ReaderUrl.READING_LIST))

        self.assertEqual(dict, type(feeds))

        list_match = re.search('reading list in Google Reader', feeds['title'])
        self.assertTrue(list_match)
コード例 #4
0
ファイル: gr_import.py プロジェクト: dolanor-galaxy/1flow
def import_google_reader_begin(user_id, access_token):

    auth = OAuth2Method(settings.GOOGLE_OAUTH2_CLIENT_ID,
                        settings.GOOGLE_OAUTH2_CLIENT_SECRET)
    auth.authFromAccessToken(access_token)
    reader = GoogleReader(auth)

    django_user, mongo_user = get_user_from_dbs(user_id)
    username = django_user.username

    try:
        user_infos = reader.getUserInfo()

    except TypeError:
        LOGGER.exception(u'Could not start Google Reader import for user %s.',
                         username)
        # Don't refresh, it's now done by a dedicated periodic task.
        # If we failed, it means the problem is quite serious.
        #       import_google_reader_trigger(user_id, refresh=True)
        return

    GR_MAX_FEEDS = config.GR_MAX_FEEDS

    LOGGER.info(u'Starting Google Reader import for user %s.', username)

    gri = GoogleReaderImport(user_id)

    # take note of user informations now that we have them.
    gri.start(user_infos=user_infos)

    reader.buildSubscriptionList()

    total_reads, reg_date = reader.totalReadItems(without_date=False)
    total_starred, star1_date = reader.totalStarredItems(without_date=False)
    total_feeds = len(reader.feeds) + 1  # +1 for 'starred'

    gri.reg_date(pytime.mktime(reg_date.timetuple()))
    gri.star1_date(pytime.mktime(star1_date.timetuple()))
    gri.total_reads(total_reads)
    gri.total_starred(total_starred)

    LOGGER.info(
        u'Google Reader import for user %s: %s feed(s) and %s read '
        u'article(s) to go…', username, total_feeds, total_reads)

    if total_feeds > GR_MAX_FEEDS and not settings.DEBUG:
        mail_admins(
            'User {0} has more than {1} feeds: {2}!'.format(
                username, GR_MAX_FEEDS, total_feeds),
            u"\n\nThe GR import will be incomplete.\n\n"
            u"Just for you to know…\n\n")

    # We launch the starred feed import first. Launching it after the
    # standard feeds makes it being delayed until the world's end.
    reader.makeSpecialFeeds()
    starred_feed = reader.getSpecialFeed(ReaderUrl.STARRED_LIST)
    import_google_reader_starred.apply_async((user_id, username, starred_feed),
                                             queue='low')

    processed_feeds = 1
    feeds_to_import = []

    for gr_feed in reader.feeds[:GR_MAX_FEEDS]:

        try:
            feed = create_feed(gr_feed, mongo_user)

        except Feed.DoesNotExist:
            LOGGER.exception(
                u'Could not create feed “%s” for user %s, '
                u'skipped.', gr_feed.title, username)
            continue

        processed_feeds += 1
        feeds_to_import.append((user_id, username, gr_feed, feed))

        LOGGER.info(u'Imported feed “%s” (%s/%s) for user %s…', gr_feed.title,
                    processed_feeds, total_feeds, username)

    # We need to clamp the total, else task won't finish in
    # the case where the user has more feeds than allowed.
    #
    gri.total_feeds(min(processed_feeds, GR_MAX_FEEDS))

    for feed_args in feeds_to_import:
        import_google_reader_articles.apply_async(feed_args, queue='low')

    LOGGER.info(
        u'Imported %s/%s feeds in %s. Articles import already '
        u'started with limits: date: %s, %s waves of %s articles, '
        u'max articles: %s, reads: %s, starred: %s.', processed_feeds,
        total_feeds, naturaldelta(now() - gri.start()),
        naturaltime(max([gri.reg_date(), GR_OLDEST_DATE])),
        config.GR_WAVE_LIMIT, config.GR_LOAD_LIMIT, config.GR_MAX_ARTICLES,
        total_reads, total_starred)
コード例 #5
0
class HooverReader(object):
    '''
    Export everything that was saved in Google Reader as JSON objects. Keep
    as much information as possible, but especially ID (useful for
    cross-referencing), title, url, notes (probably gone) and read status.
    Each file should contain entries for just one category/tag. List of
    categories (folders in Google Reader) should be stores in categories.json.

    Script has no memory and will always fetch everything (doesn't do
    incremental updates). Script will NOT save list of feeds since those can
    be exported as OPML from Google Reader.

    If it hits rate limit, then it will pause up to half an hour before giving
    up.

    DILEMMAS:
    - Should we save feeds contents? How far?
    - Should we save categories contents? How far?
        (probably; categories can contain entries labeled with category label
         that are not otherwise tagged and hence not backed up)

    Save:
    - all tagged entries (labeled feeds; categories that don't contain feeds)
    - list of categories with feeds they contain

    Algorithm:
    - fetch a list of categories
    - fetch a list of all labels (which includes categories)
    - for every label which is not a category:
        - loadItems
        - execute loadMoreItems until items count remains same (or error)
        - dump data as JSON to file
    - for every category fetch a list of feeds it contains
    - dump the list of categories with feeds as JSON to a file
    '''
    def __init__(self, username, password):
        self.auth = ClientAuthMethod(username, password)
        self.reader = GoogleReader(self.auth)
        self.reader.makeSpecialFeeds()  # Fetches list of special feeds like starred
        self.reader.buildSubscriptionList()  # Fetches list of feeds AND categories

        self.categories = self.reader.categories
        self.feeds = self.reader.feeds[:]  # Make a copy so lib calls don't feel it with crap
        self.specialFeeds = self.reader.specialFeeds.copy()

    def __create_feed_filename(self, feed_label):
        return "{0}.json".format(feed_label)

    def get_tags(self):
        tags_json = self.reader.httpGet(
            'https://www.google.com/reader/api/0/tag/list',
            {'output': 'json'})
        tags = json.loads(tags_json)
        tags_list = tags['tags']
        self.tags = tags_list

    def load_items(self, feed):
        fetch_size = 1000
        tryagain = 0
        feed.loadItems(loadLimit=fetch_size)
        while (feed.lastLoadLength > 0 and feed.lastLoadLength == fetch_size) \
                or (tryagain > 0 and tryagain < 5):
            feed.loadMoreItems(loadLimit=fetch_size)
            if not feed.lastLoadOk:
                print "Error fetching items for feed '{0}'".format(
                    feed.title)
                pause_for = PAUSE_INTERVAL * (2 ** tryagain)
                print "Pausing for a {0} minute(s)...".format(pause_for / 60)
                # Double time to sleep on each iteration
                time.sleep(pause_for)
                tryagain += 1
            else:
                tryagain = 0
        return feed.items

    def process_item(self, item):
        values = {}
        keys = ('id', 'title', 'content', 'read', 'starred', 'shared', 'url')
        for key in keys:
            values[key] = getattr(item, key, u'')
        values['origin'] = getattr(item, 'origin', {})
        return values

    def get_feed_info(self, feed):
        feed_obj = {
            'feed_id': feed.id,
            'title': feed.title,
            'site_url': getattr(feed, "siteUrl", ""),
            'feed_url': getattr(feed, "feedUrl", ""),
            'last_updated': feed.lastUpdated,  # Unix timestamp; updated when feed is fetched
        }
        return feed_obj

    def save_to_file(self, filename, obj, subdir=None):
        save_dir = BACKUP_DIR
        if subdir:
            save_dir = join(BACKUP_DIR, subdir)
        if not os.path.exists(save_dir):
            try:
                os.makedirs(save_dir)
            except:  # Could not create it
                print 'Could not create backup directory {0}. Exiting.'.format(
                    save_dir)
                sys.exit(1)

        obj_json = toJSON(obj)
        fname = join(save_dir, filename)
        with open(fname, 'w') as f:
            f.write(obj_json)

    def save_feed(self, feed, subdir=None):
        items = []

        print 'Saving:', feed.title.encode('utf-8')
        try:
            raw_items = self.load_items(feed)
        except:
            print 'Failed. Moving on...'
            print
            return
        for item in raw_items:
            items.append(self.process_item(item))
        feed_obj = self.get_feed_info(feed)
        feed_obj['items'] = items
        feed_obj['items_count'] = len(items)
        self.save_to_file(self.__create_feed_filename(feed.title), feed_obj, subdir)

    def process_category(self, category):
        cat = {
            'id': category.id,
            'title': category.label,
        }
        cat['feeds'] = [self.get_feed_info(feed) for feed in category.feeds]
        return cat

    def save_tag(self, tag):
        cat = {
            'id': tag.id,
            'title': tag.label,
        }
        print 'Saving:', tag.label.encode('utf-8')
        cat['items'] = [self.process_item(item) for item in
                        self.load_items(tag)]
        cat['items_count'] = len(cat['items'])
        self.save_to_file(self.__create_feed_filename(cat['title']), cat, 'tags')

    def save_categories(self):
        categories = {
            'title': 'Google Reader Categories'
        }
        categories['categories'] = [self.process_category(cat) for cat in
                                    self.categories]
        if len(categories['categories']):
            self.save_to_file("categories.json", categories)
        else:
            print 'There are no categories to save.'

    def save_feed_list(self):
        feeds = {
            'title': 'Google Reader List of Feeds'
        }
        feeds_list = []
        for feed in self.feeds:
            feeds_list.append(self.get_feed_info(feed))
        feeds['feeds'] = feeds_list
        if len(feeds['feeds']):
            self.save_to_file("feeds.json", feeds)
        else:
            print 'There are no feeds to save.'

    def backup(self):
        if getattr(settings, 'SAVE_TAGS', True):
            print "Saving tags..."
            self.get_tags()
            for tag in self.tags:
                # Tag is really a category
                try:
                    label = tag['id'].rsplit('label/')[1]
                except:
                    # Special feeds (state/); skip, they are handled separately
                    continue
                ctag = Category(self.reader, label, tag['id'])
                self.save_tag(ctag)

        if getattr(settings, 'SAVE_FEEDS', False):
            print "Saving feeds..."
            for feed in self.feeds:
                self.save_feed(feed, 'feeds')

        print "Saving special feeds..."
        if getattr(settings, 'SAVE_SPECIAL_FEEDS_ALL', False):
            sf_keys = self.specialFeeds.keys()
        else:
            sf_keys = ('starred', )
        for feed_name in sf_keys:
            feed = self.specialFeeds[feed_name]
            self.save_feed(feed, 'special')

        if getattr(settings, 'SAVE_CATEGORIES', True):
            print "Saving list of feeds and categories..."
            self.save_feed_list()
            self.save_categories()
コード例 #6
0
ファイル: tasks.py プロジェクト: EliotBerriot/1flow
def import_google_reader_begin(user_id, access_token):

    auth = OAuth2Method(settings.GOOGLE_OAUTH2_CLIENT_ID,
                        settings.GOOGLE_OAUTH2_CLIENT_SECRET)
    auth.authFromAccessToken(access_token)
    reader = GoogleReader(auth)

    django_user, mongo_user = get_user_from_dbs(user_id)
    username = django_user.username

    try:
        user_infos = reader.getUserInfo()

    except TypeError:
        LOGGER.exception(u'Could not start Google Reader import for user %s.',
                         username)
        # Don't refresh, it's now done by a dedicated periodic task.
        # If we failed, it means the problem is quite serious.
        #       import_google_reader_trigger(user_id, refresh=True)
        return

    GR_MAX_FEEDS = config.GR_MAX_FEEDS

    LOGGER.info(u'Starting Google Reader import for user %s.', username)

    gri = GoogleReaderImport(user_id)

    # take note of user informations now that we have them.
    gri.start(user_infos=user_infos)

    reader.buildSubscriptionList()

    total_reads, reg_date     = reader.totalReadItems(without_date=False)
    total_starred, star1_date = reader.totalStarredItems(without_date=False)
    total_feeds               = len(reader.feeds) + 1  # +1 for 'starred'

    gri.reg_date(pytime.mktime(reg_date.timetuple()))
    gri.star1_date(pytime.mktime(star1_date.timetuple()))
    gri.total_reads(total_reads)
    gri.total_starred(total_starred)

    LOGGER.info(u'Google Reader import for user %s: %s feed(s) and %s read '
                u'article(s) to go…', username, total_feeds, total_reads)

    if total_feeds > GR_MAX_FEEDS and not settings.DEBUG:
        mail_admins('User {0} has more than {1} feeds: {2}!'.format(
                    username, GR_MAX_FEEDS, total_feeds),
                    u"\n\nThe GR import will be incomplete.\n\n"
                    u"Just for you to know…\n\n")

    # We launch the starred feed import first. Launching it after the
    # standard feeds makes it being delayed until the world's end.
    reader.makeSpecialFeeds()
    starred_feed = reader.getSpecialFeed(ReaderUrl.STARRED_LIST)
    import_google_reader_starred.apply_async((user_id, username, starred_feed),
                                             queue='low')

    processed_feeds = 1
    feeds_to_import = []

    for gr_feed in reader.feeds[:GR_MAX_FEEDS]:

        try:
            feed = create_feed(gr_feed, mongo_user)

        except Feed.DoesNotExist:
            LOGGER.exception(u'Could not create feed “%s” for user %s, '
                             u'skipped.', gr_feed.title, username)
            continue

        processed_feeds += 1
        feeds_to_import.append((user_id, username, gr_feed, feed))

        LOGGER.info(u'Imported feed “%s” (%s/%s) for user %s…',
                    gr_feed.title, processed_feeds, total_feeds, username)

    # We need to clamp the total, else task won't finish in
    # the case where the user has more feeds than allowed.
    #
    gri.total_feeds(min(processed_feeds, GR_MAX_FEEDS))

    for feed_args in feeds_to_import:
        import_google_reader_articles.apply_async(feed_args, queue='low')

    LOGGER.info(u'Imported %s/%s feeds in %s. Articles import already '
                u'started with limits: date: %s, %s waves of %s articles, '
                u'max articles: %s, reads: %s, starred: %s.',
                processed_feeds, total_feeds,
                naturaldelta(now() - gri.start()),
                naturaltime(max([gri.reg_date(), GR_OLDEST_DATE])),
                config.GR_WAVE_LIMIT, config.GR_LOAD_LIMIT,
                config.GR_MAX_ARTICLES, total_reads, total_starred)