Exemple #1
0
    def checkItems_(self, sender):

        NSLog("Checking items")
        if self.user is None or self.password is None:
            return
        
        #Retrieve all items
        ca = ClientAuthMethod(self.user,self.password)
        reader = GoogleReader(ca)
        reader.makeSpecialFeeds()
        specials = reader.getSpecialFeed(ReaderUrl.READING_LIST)
        specials.loadItems()
        
        if self.lastId is None:
            item = specials.items[0]

            self.lastId = item.id
            self.notifyNewItem(item)
        else:
            if len(specials.items) > 0:
                lastItem = specials.items[0].id
                for item in specials.items:
                    if item.id == self.lastId:
                        break
                    self.notifyNewItem(item)
                self.lastId = lastItem
        NSLog("Finished Checking items")
    def test_add_remove_single_feed_tag(self):
        ca = ClientAuthMethod(username,password)
        reader = GoogleReader(ca)
        container = SpecialFeed(reader, ReaderUrl.READING_LIST)
        container.loadItems()

        tag_name = 'test-single-tag'
        feed_1 = container.items[0]

        # assert tag doesn't exist yet
        self.assertFalse(any([tag_name in x for x in feed_1.data['categories']]))

        # add tag
        reader.addItemTag(feed_1, 'user/-/label/' + tag_name)

        #reload now
        container.clearItems()
        container.loadItems()
        feed_2 = container.items[0]

        # assert tag is in new
        self.assertTrue(any([tag_name in x for x in feed_2.data['categories']]))

        # remove tag
        reader.removeItemTag(feed_2, 'user/-/label/' + tag_name)

        #reload now
        container.clearItems()
        container.loadItems()
        feed_3 = container.items[0]

        # assert tag is removed
        self.assertFalse(any([tag_name in x for x in feed_3.data['categories']]))
Exemple #3
0
 def __init__(self, username, password, method='kmeans'):
     auth = ClientAuthMethod(username, password)
     self.reader = GoogleReader(auth)
     self.reader.buildSubscriptionList()
     self.categories = self.reader.getCategories()
     self.corpus = Corpus()
     self.method = method
Exemple #4
0
def main():
    auth = OAuthMethod(oauth_key, oauth_secret)
    auth.setRequestToken()
    print auth.buildAuthUrl()
    raw_input()
    auth.setAccessToken()
    reader = GoogleReader(auth)
    print reader.getUserInfo()
    def test_reading_list_exists(self):
        ca = ClientAuthMethod(username,password)
        reader = GoogleReader(ca)
        reader.makeSpecialFeeds()
        feeds = reader.getFeedContent(reader.getSpecialFeed(ReaderUrl.READING_LIST))

        self.assertEqual(dict, type(feeds))

        list_match = re.search('reading list in Google Reader', feeds['title'])
        self.assertTrue(list_match)
Exemple #6
0
def getGoogleFeeds(auth):
    try:
        google = GoogleReader(auth)
    except IOError:
        #handle username/password error
        #redirect to homepage with error?
        pass

    if google.buildSubscriptionList():
        return google.getFeeds()
Exemple #7
0
    def test_full_auth_process_without_callback(self):
        auth = OAuthMethod(oauth_key, oauth_secret)
        auth.setRequestToken()
        auth_url = auth.buildAuthUrl()
        response = automated_oauth_approval(auth_url)
        auth.setAccessToken()
        reader = GoogleReader(auth)

        info = reader.getUserInfo()
        self.assertEqual(dict, type(info))
        self.assertEqual(firstname, info['userName'])
Exemple #8
0
    def test_full_auth_process_without_callback(self):
        auth = OAuthMethod(oauth_key, oauth_secret)
        auth.setRequestToken()
        auth_url = auth.buildAuthUrl()
        response = automated_oauth_approval(auth_url)
        auth.setAccessToken()
        reader = GoogleReader(auth)

        info = reader.getUserInfo()
        self.assertEqual(dict, type(info))
        self.assertEqual(firstname, info['userName'])
Exemple #9
0
    def test_full_auth_and_access_userdata(self):
        auth = OAuth2Method(client_id, client_secret)
        auth.setRedirectUri(redirect_url)
        url = auth.buildAuthUrl()
        token = automated_oauth2_approval(url)
        auth.code = token
        auth.setAccessToken()

        reader = GoogleReader(auth)
        info = reader.getUserInfo()
        self.assertEqual(dict, type(info))
        self.assertEqual(firstname, info['userName'])
Exemple #10
0
    def test_full_auth_and_access_userdata(self):
        auth = OAuth2Method(client_id, client_secret)
        auth.setRedirectUri(redirect_url)
        url = auth.buildAuthUrl()
        token = automated_oauth2_approval(url)
        auth.code = token
        auth.setAccessToken()

        reader = GoogleReader(auth)
        info = reader.getUserInfo()
        self.assertEqual(dict, type(info))
        self.assertEqual(firstname, info['userName'])
def login():

    try:
        username, password = open('auth.txt').read().strip().split('\n')
    except:
        username = raw_input('username? ')
        password = getpass.getpass('password? ')
        open('auth.txt','w').write("{0}\n{1}\n".format(username,password))

    auth = ClientAuthMethod(username, password)
    reader = GoogleReader(auth)
    reader.buildSubscriptionList()
    return reader
def login():
    """
        requires an 'auth.txt' to exist in the current directory, this
        file contains two lines, the first line is your username, the
        second line is your password.
    """
    username, password = open('auth.txt').read().strip().split('\n')
    auth = ClientAuthMethod(username, password)
    reader = GoogleReader(auth)
    a = reader.buildSubscriptionList()
    if not a:
        return False
    else:
        return reader
    def test_transaction_add_feed_tags(self):
        ca = ClientAuthMethod(username, password)
        reader = GoogleReader(ca)
        container = SpecialFeed(reader, ReaderUrl.READING_LIST)
        container.loadItems()

        tags = ['test-transaction%s' % x for x in range(5)]
        feed_1 = container.items[0]

        reader.beginAddItemTagTransaction()
        for tag in tags:
            reader.addItemTag(feed_1, 'user/-/label/' + tag)
        reader.commitAddItemTagTransaction()

        #reload now
        container.clearItems()
        container.loadItems()
        feed_2 = container.items[0]

        # figure out if all tags were returned
        tags_exist = [
            any(map(lambda tag: tag in x, tags))
            for x in feed_2.data['categories']
        ]
        tag_exist_count = sum([1 for x in tags_exist if x])
        self.assertEqual(5, tag_exist_count)
Exemple #14
0
 def __init__(self, username, password, method='kmeans'):
     auth = ClientAuthMethod(username, password)
     self.reader = GoogleReader(auth)
     self.reader.buildSubscriptionList()
     self.categories = self.reader.getCategories()
     self.corpus = Corpus()
     self.method = method
Exemple #15
0
    def run(self, user, loadLimit, *args, **kwargs):
        # user.credential should always be valid when doing oauth2
        if user.credential:
            credential = user.credential
            auth = OAuth2Method(credential.client_id, credential.client_secret)
            auth.authFromAccessToken(credential.access_token)
            auth.setActionToken()
        # username/password auth method, should only be used by our tests
        elif len(args) == 2:
            auth = ClientAuthMethod(args[0], args[1])

        reader = GoogleReader(auth)

        try:
            reader.buildSubscriptionList()
        except TypeError, exc:
            SyncFromReaderAPITask().retry(exc=exc)
Exemple #16
0
    def test_loading_item_count(self):
        ca = ClientAuthMethod(username, password)
        reader = GoogleReader(ca)
        container = SpecialFeed(reader, ReaderUrl.READING_LIST)
        container.loadItems(loadLimit=5)

        self.assertEqual(5, len(container.items))
        self.assertEqual(5, container.countItems())
    def __init__(self, username, password):
        self.auth = ClientAuthMethod(username, password)
        self.reader = GoogleReader(self.auth)
        self.reader.makeSpecialFeeds()  # Fetches list of special feeds like starred
        self.reader.buildSubscriptionList()  # Fetches list of feeds AND categories

        self.categories = self.reader.categories
        self.feeds = self.reader.feeds[:]  # Make a copy so lib calls don't feel it with crap
        self.specialFeeds = self.reader.specialFeeds.copy()
Exemple #18
0
    def test_marking_read(self):
        ca = ClientAuthMethod(username, password)
        reader = GoogleReader(ca)
        container = SpecialFeed(reader, ReaderUrl.READING_LIST)
        container.loadItems()

        feed_item = container.items[0]
        self.assertTrue(feed_item.markRead())
        self.assertTrue(feed_item.isRead())
Exemple #19
0
	def get(self):
		user = users.get_current_user()
		
		if user:
			storage = StorageByKeyName(Credentials, user.user_id(), 'credentials')
			credentials = storage.get()
			
			auth = GAPDecoratorAuthMethod(credentials)
			reader = GoogleReader(auth)
			reader.buildSubscriptionList()
			
			feeds = reader.getFeeds()
			template_values = {
				'feeds': feeds
			}
			path = os.path.join(os.path.dirname(__file__), 'templates/template_slist.html')
			self.response.out.write(template.render(path, template_values))
		else:
			self.redirect(users.create_login_url(self.request.uri))
Exemple #20
0
    def test_full_auth_process_with_callback(self):
        auth = OAuthMethod(oauth_key, oauth_secret)
        #must be a working callback url for testing
        auth.setCallback("http://www.asktherelic.com")
        token, token_secret = auth.setAndGetRequestToken()
        auth_url = auth.buildAuthUrl()

        #callback section
        #get response, which is a redirect to the callback url
        response = automated_oauth_approval(auth_url)
        query_string = urlparse.urlparse(response.geturl()).query
        #grab the verifier token from the callback url query string
        token_verifier = urlparse.parse_qs(query_string)['oauth_verifier'][0]

        auth.setAccessTokenFromCallback(token, token_secret, token_verifier)
        reader = GoogleReader(auth)

        info = reader.getUserInfo()
        self.assertEqual(dict, type(info))
        self.assertEqual(firstname, info['userName'])
Exemple #21
0
    def test_full_auth_process_with_callback(self):
        auth = OAuthMethod(oauth_key, oauth_secret)
        #must be a working callback url for testing
        auth.setCallback("http://www.asktherelic.com")
        token, token_secret = auth.setAndGetRequestToken()
        auth_url = auth.buildAuthUrl()

        #callback section
        #get response, which is a redirect to the callback url
        response = automated_oauth_approval(auth_url)
        query_string = urlparse.urlparse(response.geturl()).query
        #grab the verifier token from the callback url query string
        token_verifier = urlparse.parse_qs(query_string)['oauth_verifier'][0]

        auth.setAccessTokenFromCallback(token, token_secret, token_verifier)
        reader = GoogleReader(auth)

        info = reader.getUserInfo()
        self.assertEqual(dict, type(info))
        self.assertEqual(firstname, info['userName'])
def main():
    if len(sys.argv) <= 1 or len(sys.argv) > 3:
        print("Usage: %s username [password]" % (sys.argv[0]))
        return 1

    username = sys.argv[1]
    if len(sys.argv) == 2:
        sys.stderr.write('Password for %s: ' % username)
        password = raw_input()
    else:
        password = sys.argv[2]

    auth = ClientAuthMethod(username, password)
    reader = GoogleReader(auth)

    root = ET.Element('opml')
    head = ET.SubElement(root, 'head')
    ET.SubElement(head, 'title').text = \
        '%s subscriptions in Google Reader' % username
    body = ET.SubElement(root, 'body')

    category_els = {}

    reader.buildSubscriptionList()
    for feed in reader.getSubscriptionList():
        if feed.getCategories():
            for category in feed.getCategories():
                # Create category element
                if not category.id in category_els:
                    category_el = ET.SubElement(body, 'outline')
                    category_el.set('text', category.label)
                    category_el.set('title', category.label)
                    category_els[category.id] = category_el
                make_feed_el(feed, category_els[category.id])
        else:
            make_feed_el(feed, body)

    tree = ET.ElementTree(root)
    tree.write(sys.stdout, xml_declaration=True)
def main():
    if len(sys.argv) <= 1 or len(sys.argv) > 3:
        print ("Usage: %s username [password]" % (sys.argv[0]))
        return 1

    username = sys.argv[1]
    if len(sys.argv) == 2:
        sys.stderr.write("Password for %s: " % username)
        password = raw_input()
    else:
        password = sys.argv[2]

    auth = ClientAuthMethod(username, password)
    reader = GoogleReader(auth)

    root = ET.Element("opml")
    head = ET.SubElement(root, "head")
    ET.SubElement(head, "title").text = "%s subscriptions in Google Reader" % username
    body = ET.SubElement(root, "body")

    category_els = {}

    reader.buildSubscriptionList()
    for feed in reader.getSubscriptionList():
        if feed.getCategories():
            for category in feed.getCategories():
                # Create category element
                if not category.id in category_els:
                    category_el = ET.SubElement(body, "outline")
                    category_el.set("text", category.label)
                    category_el.set("title", category.label)
                    category_els[category.id] = category_el
                make_feed_el(feed, category_els[category.id])
        else:
            make_feed_el(feed, body)

    tree = ET.ElementTree(root)
    tree.write(sys.stdout, xml_declaration=True)
Exemple #24
0
    def test_reading_list_exists(self):
        ca = ClientAuthMethod(username, password)
        reader = GoogleReader(ca)
        reader.makeSpecialFeeds()
        feeds = reader.getFeedContent(
            reader.getSpecialFeed(ReaderUrl.READING_LIST))

        self.assertEqual(dict, type(feeds))

        list_match = re.search('reading list in Google Reader', feeds['title'])
        self.assertTrue(list_match)
Exemple #25
0
    def test_subscribe_unsubscribe(self):
        ca = ClientAuthMethod(username, password)
        reader = GoogleReader(ca)

        slashdot = 'feed/http://rss.slashdot.org/Slashdot/slashdot'

        #unsubscribe always return true; revert feedlist state
        self.assertTrue(reader.unsubscribe(slashdot))

        # now subscribe
        self.assertTrue(reader.subscribe(slashdot))

        # wait for server to update
        time.sleep(1)
        reader.buildSubscriptionList()

        # test subscribe successful
        self.assertIn(slashdot, [x.id for x in reader.getSubscriptionList()])
    def test_subscribe_unsubscribe(self):
        ca = ClientAuthMethod(username,password)
        reader = GoogleReader(ca)
        
        slashdot = 'feed/http://rss.slashdot.org/Slashdot/slashdot'

        #unsubscribe always return true; revert feedlist state
        self.assertTrue(reader.unsubscribe(slashdot))

        # now subscribe
        self.assertTrue(reader.subscribe(slashdot))

        # wait for server to update
        time.sleep(1)
        reader.buildSubscriptionList()

        # test subscribe successful
        self.assertIn(slashdot, [x.id for x in reader.getSubscriptionList()])
Exemple #27
0
    def test_oauth_subscribe(self):
        auth = OAuth2Method(client_id, client_secret)
        auth.setRedirectUri(redirect_url)
        url = auth.buildAuthUrl()
        token = automated_oauth2_approval(url)
        auth.code = token
        auth.setAccessToken()
        auth.setActionToken()

        reader = GoogleReader(auth)

        slashdot = 'feed/http://rss.slashdot.org/Slashdot/slashdot'
        #unsubscribe always return true; revert feedlist state
        self.assertTrue(reader.unsubscribe(slashdot))
        # now subscribe
        self.assertTrue(reader.subscribe(slashdot))
        # wait for server to update
        import time
        time.sleep(1)
        reader.buildSubscriptionList()
        # test subscribe successful
        self.assertIn(slashdot, [x.id for x in reader.getSubscriptionList()])
Exemple #28
0
    def test_transaction_add_feed_tags(self):
        ca = ClientAuthMethod(username,password)
        reader = GoogleReader(ca)
        container = SpecialFeed(reader, ReaderUrl.READING_LIST)
        container.loadItems()

        tags = ['test-transaction%s' % x for x in range(5)]
        feed_1 = container.items[0]

        reader.beginAddItemTagTransaction()
        for tag in tags:
            reader.addItemTag(feed_1, 'user/-/label/' + tag)
        reader.commitAddItemTagTransaction()

        #reload now
        container.clearItems()
        container.loadItems()
        feed_2 = container.items[0]

        # figure out if all tags were returned
        tags_exist = [any(map(lambda tag: tag in x, tags)) for x in feed_2.data['categories']]
        tag_exist_count = sum([1 for x in tags_exist if x])
        self.assertEqual(5, tag_exist_count)
Exemple #29
0
    def test_oauth_subscribe(self):
        auth = OAuth2Method(client_id, client_secret)
        auth.setRedirectUri(redirect_url)
        url = auth.buildAuthUrl()
        token = automated_oauth2_approval(url)
        auth.code = token
        auth.setAccessToken()
        auth.setActionToken()

        reader = GoogleReader(auth)

        slashdot = 'feed/http://rss.slashdot.org/Slashdot/slashdot'
        #unsubscribe always return true; revert feedlist state
        self.assertTrue(reader.unsubscribe(slashdot))
        # now subscribe
        self.assertTrue(reader.subscribe(slashdot))
        # wait for server to update
        import time
        time.sleep(1)
        reader.buildSubscriptionList()
        # test subscribe successful
        self.assertIn(slashdot, [x.id for x in reader.getSubscriptionList()])
    def test_add_remove_single_feed_tag(self):
        ca = ClientAuthMethod(username, password)
        reader = GoogleReader(ca)
        container = SpecialFeed(reader, ReaderUrl.READING_LIST)
        container.loadItems()

        tag_name = 'test-single-tag'
        feed_1 = container.items[0]

        # assert tag doesn't exist yet
        self.assertFalse(
            any([tag_name in x for x in feed_1.data['categories']]))

        # add tag
        reader.addItemTag(feed_1, 'user/-/label/' + tag_name)

        #reload now
        container.clearItems()
        container.loadItems()
        feed_2 = container.items[0]

        # assert tag is in new
        self.assertTrue(any([tag_name in x
                             for x in feed_2.data['categories']]))

        # remove tag
        reader.removeItemTag(feed_2, 'user/-/label/' + tag_name)

        #reload now
        container.clearItems()
        container.loadItems()
        feed_3 = container.items[0]

        # assert tag is removed
        self.assertFalse(
            any([tag_name in x for x in feed_3.data['categories']]))
Exemple #31
0
def import_google_reader_begin(user_id, access_token):

    auth = OAuth2Method(settings.GOOGLE_OAUTH2_CLIENT_ID,
                        settings.GOOGLE_OAUTH2_CLIENT_SECRET)
    auth.authFromAccessToken(access_token)
    reader = GoogleReader(auth)

    django_user, mongo_user = get_user_from_dbs(user_id)
    username = django_user.username

    try:
        user_infos = reader.getUserInfo()

    except TypeError:
        LOGGER.exception(u'Could not start Google Reader import for user %s.',
                         username)
        # Don't refresh, it's now done by a dedicated periodic task.
        # If we failed, it means the problem is quite serious.
        #       import_google_reader_trigger(user_id, refresh=True)
        return

    GR_MAX_FEEDS = config.GR_MAX_FEEDS

    LOGGER.info(u'Starting Google Reader import for user %s.', username)

    gri = GoogleReaderImport(user_id)

    # take note of user informations now that we have them.
    gri.start(user_infos=user_infos)

    reader.buildSubscriptionList()

    total_reads, reg_date     = reader.totalReadItems(without_date=False)
    total_starred, star1_date = reader.totalStarredItems(without_date=False)
    total_feeds               = len(reader.feeds) + 1  # +1 for 'starred'

    gri.reg_date(pytime.mktime(reg_date.timetuple()))
    gri.star1_date(pytime.mktime(star1_date.timetuple()))
    gri.total_reads(total_reads)
    gri.total_starred(total_starred)

    LOGGER.info(u'Google Reader import for user %s: %s feed(s) and %s read '
                u'article(s) to go…', username, total_feeds, total_reads)

    if total_feeds > GR_MAX_FEEDS and not settings.DEBUG:
        mail_admins('User {0} has more than {1} feeds: {2}!'.format(
                    username, GR_MAX_FEEDS, total_feeds),
                    u"\n\nThe GR import will be incomplete.\n\n"
                    u"Just for you to know…\n\n")

    # We launch the starred feed import first. Launching it after the
    # standard feeds makes it being delayed until the world's end.
    reader.makeSpecialFeeds()
    starred_feed = reader.getSpecialFeed(ReaderUrl.STARRED_LIST)
    import_google_reader_starred.apply_async((user_id, username, starred_feed),
                                             queue='low')

    processed_feeds = 1
    feeds_to_import = []

    for gr_feed in reader.feeds[:GR_MAX_FEEDS]:

        try:
            feed = create_feed(gr_feed, mongo_user)

        except Feed.DoesNotExist:
            LOGGER.exception(u'Could not create feed “%s” for user %s, '
                             u'skipped.', gr_feed.title, username)
            continue

        processed_feeds += 1
        feeds_to_import.append((user_id, username, gr_feed, feed))

        LOGGER.info(u'Imported feed “%s” (%s/%s) for user %s…',
                    gr_feed.title, processed_feeds, total_feeds, username)

    # We need to clamp the total, else task won't finish in
    # the case where the user has more feeds than allowed.
    #
    gri.total_feeds(min(processed_feeds, GR_MAX_FEEDS))

    for feed_args in feeds_to_import:
        import_google_reader_articles.apply_async(feed_args, queue='low')

    LOGGER.info(u'Imported %s/%s feeds in %s. Articles import already '
                u'started with limits: date: %s, %s waves of %s articles, '
                u'max articles: %s, reads: %s, starred: %s.',
                processed_feeds, total_feeds,
                naturaldelta(now() - gri.start()),
                naturaltime(max([gri.reg_date(), GR_OLDEST_DATE])),
                config.GR_WAVE_LIMIT, config.GR_LOAD_LIMIT,
                config.GR_MAX_ARTICLES, total_reads, total_starred)
Exemple #32
0
        subscription.itemsById = {}
        subscription.loadMoreItems(loadLimit=load)
    except Exception:
        logging.warning(sys.exc_info()[0])
        print "Continuing..."
        sys.exc_clear()
        loadMoreUntilSuccessful(subscription, load)


couch = couchdb.Server('http://localhost:5984')
try:
    db = couch[db_name]
except Exception:
    db = couch.create(db_name)
auth = ClientAuthMethod(username, password)
reader = GoogleReader(auth)
ll = 100
reader.buildSubscriptionList()
l = reader.getSubscriptionList()

for li in l:
    print li.title.encode('ascii', 'xmlcharrefreplace')
    loadUntilSuccessful(li, ll)
    while li.lastLoadLength > 0:
        bulk_upload = []
        for i in li.getItems():
            doc = {}
            doc = i.data
            if len(li.categories) > 0:
                doc["label"] = li.categories[0].label
            bulk_upload.append(doc)
Exemple #33
0
    'redirect_server_port')

#########################
# move this is a method!
if not os.path.isfile(CACHED_CA_FILE) or not os.path.isfile(
        CACHED_READER_FILE):
    topickle = True
elif ((time.time() - os.path.getmtime(CACHED_CA_FILE)) / 60) > 10:
    # older than 10 mins so refresh
    topickle = True
else:
    topickle = False

if topickle:
    ca = ClientAuth(greader_user, greader_pass)
    reader = GoogleReader(ca)
    reader.buildSubscriptionList()
    pickle.dump(ca, open(CACHED_CA_FILE, 'wb'), -1)
    pickle.dump(reader, open(CACHED_READER_FILE, 'wb'), -1)
else:
    ca = pickle.load(open(CACHED_CA_FILE))
    reader = pickle.load(open(CACHED_READER_FILE))
#########################

params = get_params()
url = None
id = None
name = None
mode = None
try:
    url = urllib.unquote_plus(params["url"])
Exemple #34
0
from libgreader import GoogleReader, ClientAuthMethod, Feed
auth = ClientAuthMethod('acpigeon', 'Katana666')
reader = GoogleReader(auth)
print reader.getUserInfo()

Exemple #35
0
 def test_reader_user_info(self):
     ca = ClientAuthMethod(username,password)
     reader = GoogleReader(ca)
     info = reader.getUserInfo()
     self.assertEqual(dict, type(info))
     self.assertEqual(firstname, info['userName'])
Exemple #36
0
USE_REDIRECT_SERVER = __settings__.getSetting('use_redirect_server')
REDIRECT_SERVER_URL = 'http://127.0.0.1:' + __settings__.getSetting('redirect_server_port')

#########################
# move this is a method!
if not os.path.isfile(CACHED_CA_FILE) or not os.path.isfile(CACHED_READER_FILE):
    topickle = True
elif ((time.time() - os.path.getmtime(CACHED_CA_FILE))/60) > 10:
    # older than 10 mins so refresh
    topickle = True
else:
    topickle = False

if topickle:
    ca = ClientAuth( greader_user, greader_pass )
    reader = GoogleReader( ca )
    reader.buildSubscriptionList()
    pickle.dump(ca, open(CACHED_CA_FILE,'wb'), -1)
    pickle.dump(reader, open(CACHED_READER_FILE,'wb'), -1)
else:
    ca = pickle.load(open(CACHED_CA_FILE))
    reader = pickle.load(open(CACHED_READER_FILE))
#########################

params=get_params()
url=None
id=None
name=None
mode=None
try:
    url=urllib.unquote_plus(params["url"])
Exemple #37
0
class DM_GReader():
    def __init__(self, username, password, method='kmeans'):
        auth = ClientAuthMethod(username, password)
        self.reader = GoogleReader(auth)
        self.reader.buildSubscriptionList()
        self.categories = self.reader.getCategories()
        self.corpus = Corpus()
        self.method = method

    def import_category(self,
                        category_id=0,
                        path=None,
                        local=False,
                        max_articles=2000,
                        days=3):
        """Import the specific category to a Pattern Corpus for future calculation.
        category_id: the integer indicates which category to use.
        cont: the integer tells how many queries to issue to continuously crawl the GReader.
        path: the location for storing the pickle of the Pattern Corpus.
        local: to use the local stored corpus?
        max_articles: the number of max articles we try to crawl if one day's subscriptions is too much."""

        if path is None:
            print "Please provide with a path to store/load local pickle file."
            return

        if local:
            self.corpus = Corpus.load(path)
            return

        self.target_category = self.categories[category_id]
        continuation = None

        # Crawl only the data within one day
        time_threadshold = calendar.timegm(
            (datetime.date.today() -
             datetime.timedelta(days=days)).timetuple())

        i = 1

        while 1 and i < (max_articles / 20):

            self.target_category_content = self.reader.getCategoryContent(
                self.target_category, continuation=continuation)
            feeds = self.target_category_content[u'items']

            if self.target_category_content['updated'] < time_threadshold:
                break

            feeds_docs = []
            for feed in feeds:
                doc_name = feed[u'id'][-16:]
                for content in [u'content', u'summary']:
                    if content in feed:
                        feed_soup = BeautifulSoup(feed[content][u'content'])
                        feed_text = feed_soup.get_text()
                        feeds_docs.append(
                            Document(feed_text, stemmer=LEMMA, name=doc_name))
                        break

            self.corpus.extend(feeds_docs)

            if u'continuation' in self.target_category_content and self.target_category_content[
                    u'continuation'] is not None:
                continuation = self.target_category_content[u'continuation']
            else:
                print 'Finished!'
                break

            print 'Retrieving %d articles...' % (i * 20)
            i = i + 1

        self.corpus.save(path, update=True)

    def _generate_clusters(self, k=10, p=0.8, maxlevel=10):
        """Use KMEANS method by default, and choose the initial k values by KMPP method.
        k is the number of clusters.
        p is to control the error of KMEANS, when p=1.0 is faster with small error.
        """
        if self.method == "kmeans":

            from pattern.vector import KMEANS, KMPP
            self.clusters = self.corpus.cluster(method=KMEANS,
                                                k=k,
                                                seed=KMPP,
                                                p=p,
                                                iterations=10)
            doc_list = []
            # For each cluster, calculate the centroid, and calculate the doc (vector) which is nearest to the centroid.
            for cluster in self.clusters:
                c = centroid(cluster)
                d_min = (cluster[0].vector, c)
                for doc in cluster:
                    d = distance(doc.vector, c)
                    if distance(doc.vector, c) < d_min:
                        d_min = d
                        doc_min = doc
                doc_list.append(doc_min)
            self.centroids = [i.name for i in doc_list]
            self.clusters = [[i.name for i in cluster]
                             for cluster in self.clusters]

        elif self.method == 'covertree':

            def mydistance(doc_name1, doc_name2):
                v1 = self.corpus.document(doc_name1).vector
                v2 = self.corpus.document(doc_name2).vector
                return distance(v1, v2)

            self.covertree = Covertree(mydistance, maxlevel)

            for i, doc in enumerate(self.corpus):
                tree_node = myTree(doc.name)
                self.covertree.insert(tree_node, self.covertree.ct, 0)

            self.covertree.merge_levels()
            self.centroids, self.clusters = self.covertree.clustering_from_ct(
                k)

    def generate_repr_ids(self, k):
        """
        For each cluster, we choose an arbitary article as the cluster's representative.

        Return the ids of the article, here the document name is the article's id.
        Google Reader is using "i=http://www.google.com/reader/api/0/stream/items/contents" to get the content of a specific data.
        Now we use the centroid to represent the documents

        """
        self._generate_clusters(k)
        return self.centroids

    def cost(self):
        cost = 0
        for i, center in enumerate(self.centroids):
            for doc in self.clusters[i]:
                cost += distance(
                    self.corpus.document(doc).vector,
                    self.corpus.document(center).vector)

        return cost

    def get_article_content(self, ids):
        """
        Use the ids to find the content of the articles through google web content API
        """
        url = 'http://www.google.com/reader/api/0/stream/items/contents'
        id_handle = 'tag:google.com,2005:reader/item/%s'

        contents = []
        for _id in ids:
            r = requests.post(url, data={'i': (id_handle % _id)})
            contents.append(r.json)
        return contents

    def generate_htmls(self, k, ids):
        """
        Use the ids and k to generate htmls
        """
        htmls = {}
        for i in self.get_article_content(ids):
            feed = i['items'][0]
            for content in [u'content', u'summary']:
                if content in feed:
                    title = feed['title']
                    url = feed['alternate'][0]['href']
                    htmls[title] = url
        return htmls
Exemple #38
0
def import_google_reader_begin(user_id, access_token):

    auth = OAuth2Method(settings.GOOGLE_OAUTH2_CLIENT_ID,
                        settings.GOOGLE_OAUTH2_CLIENT_SECRET)
    auth.authFromAccessToken(access_token)
    reader = GoogleReader(auth)

    django_user, mongo_user = get_user_from_dbs(user_id)
    username = django_user.username

    try:
        user_infos = reader.getUserInfo()

    except TypeError:
        LOGGER.exception(u'Could not start Google Reader import for user %s.',
                         username)
        # Don't refresh, it's now done by a dedicated periodic task.
        # If we failed, it means the problem is quite serious.
        #       import_google_reader_trigger(user_id, refresh=True)
        return

    GR_MAX_FEEDS = config.GR_MAX_FEEDS

    LOGGER.info(u'Starting Google Reader import for user %s.', username)

    gri = GoogleReaderImport(user_id)

    # take note of user informations now that we have them.
    gri.start(user_infos=user_infos)

    reader.buildSubscriptionList()

    total_reads, reg_date = reader.totalReadItems(without_date=False)
    total_starred, star1_date = reader.totalStarredItems(without_date=False)
    total_feeds = len(reader.feeds) + 1  # +1 for 'starred'

    gri.reg_date(pytime.mktime(reg_date.timetuple()))
    gri.star1_date(pytime.mktime(star1_date.timetuple()))
    gri.total_reads(total_reads)
    gri.total_starred(total_starred)

    LOGGER.info(
        u'Google Reader import for user %s: %s feed(s) and %s read '
        u'article(s) to go…', username, total_feeds, total_reads)

    if total_feeds > GR_MAX_FEEDS and not settings.DEBUG:
        mail_admins(
            'User {0} has more than {1} feeds: {2}!'.format(
                username, GR_MAX_FEEDS, total_feeds),
            u"\n\nThe GR import will be incomplete.\n\n"
            u"Just for you to know…\n\n")

    # We launch the starred feed import first. Launching it after the
    # standard feeds makes it being delayed until the world's end.
    reader.makeSpecialFeeds()
    starred_feed = reader.getSpecialFeed(ReaderUrl.STARRED_LIST)
    import_google_reader_starred.apply_async((user_id, username, starred_feed),
                                             queue='low')

    processed_feeds = 1
    feeds_to_import = []

    for gr_feed in reader.feeds[:GR_MAX_FEEDS]:

        try:
            feed = create_feed(gr_feed, mongo_user)

        except Feed.DoesNotExist:
            LOGGER.exception(
                u'Could not create feed “%s” for user %s, '
                u'skipped.', gr_feed.title, username)
            continue

        processed_feeds += 1
        feeds_to_import.append((user_id, username, gr_feed, feed))

        LOGGER.info(u'Imported feed “%s” (%s/%s) for user %s…', gr_feed.title,
                    processed_feeds, total_feeds, username)

    # We need to clamp the total, else task won't finish in
    # the case where the user has more feeds than allowed.
    #
    gri.total_feeds(min(processed_feeds, GR_MAX_FEEDS))

    for feed_args in feeds_to_import:
        import_google_reader_articles.apply_async(feed_args, queue='low')

    LOGGER.info(
        u'Imported %s/%s feeds in %s. Articles import already '
        u'started with limits: date: %s, %s waves of %s articles, '
        u'max articles: %s, reads: %s, starred: %s.', processed_feeds,
        total_feeds, naturaldelta(now() - gri.start()),
        naturaltime(max([gri.reg_date(), GR_OLDEST_DATE])),
        config.GR_WAVE_LIMIT, config.GR_LOAD_LIMIT, config.GR_MAX_ARTICLES,
        total_reads, total_starred)
Exemple #39
0
 def test_reader_user_info(self):
     ca = ClientAuthMethod(username,password)
     reader = GoogleReader(ca)
     info = reader.getUserInfo()
     self.assertEqual(dict, type(info))
     self.assertEqual(firstname, info['userName'])
Exemple #40
0
 def test_reader(self):
     ca = ClientAuthMethod(username,password)
     reader = GoogleReader(ca)
     self.assertNotEqual(reader, None)
Exemple #41
0
class DM_GReader():

    def __init__(self, username, password, method='kmeans'):
        auth = ClientAuthMethod(username, password)
        self.reader = GoogleReader(auth)
        self.reader.buildSubscriptionList()
        self.categories = self.reader.getCategories()
        self.corpus = Corpus()
        self.method = method

    def import_category(self, category_id=0, path=None, local=False, max_articles=2000, days=3):
        """Import the specific category to a Pattern Corpus for future calculation.
        category_id: the integer indicates which category to use.
        cont: the integer tells how many queries to issue to continuously crawl the GReader.
        path: the location for storing the pickle of the Pattern Corpus.
        local: to use the local stored corpus?
        max_articles: the number of max articles we try to crawl if one day's subscriptions is too much."""

        if path is None:
            print "Please provide with a path to store/load local pickle file."
            return

        if local:
            self.corpus = Corpus.load(path)
            return

        self.target_category = self.categories[category_id]
        continuation = None

        # Crawl only the data within one day
        time_threadshold = calendar.timegm((datetime.date.today() - datetime.timedelta(days=days)).timetuple())

        i = 1

        while 1 and i < (max_articles / 20):

            self.target_category_content = self.reader.getCategoryContent(self.target_category, continuation=continuation)
            feeds = self.target_category_content[u'items']

            if self.target_category_content['updated'] < time_threadshold:
                break

            feeds_docs = []
            for feed in feeds:
                doc_name = feed[u'id'][-16:]
                for content in [u'content', u'summary']:
                    if content in feed:
                        feed_soup = BeautifulSoup(feed[content][u'content'])
                        feed_text = feed_soup.get_text()
                        feeds_docs.append(Document(feed_text, stemmer=LEMMA, name=doc_name))
                        break

            self.corpus.extend(feeds_docs)

            if u'continuation' in self.target_category_content and self.target_category_content[u'continuation'] is not None:
                continuation = self.target_category_content[u'continuation']
            else:
                print 'Finished!'
                break

            print 'Retrieving %d articles...' % (i * 20)
            i = i + 1

        self.corpus.save(path, update=True)

    def _generate_clusters(self, k=10, p=0.8, maxlevel=10):
        """Use KMEANS method by default, and choose the initial k values by KMPP method.
        k is the number of clusters.
        p is to control the error of KMEANS, when p=1.0 is faster with small error.
        """
        if self.method == "kmeans":

            from pattern.vector import KMEANS, KMPP
            self.clusters = self.corpus.cluster(method=KMEANS, k=k, seed=KMPP, p=p, iterations=10)
            doc_list = []
            # For each cluster, calculate the centroid, and calculate the doc (vector) which is nearest to the centroid.
            for cluster in self.clusters:
                c = centroid(cluster)
                d_min = (cluster[0].vector, c)
                for doc in cluster:
                    d = distance(doc.vector, c)
                    if distance(doc.vector, c) < d_min:
                        d_min = d
                        doc_min = doc
                doc_list.append(doc_min)
            self.centroids = [i.name for i in doc_list]
            self.clusters = [[i.name for i in cluster] for cluster in self.clusters]

        elif self.method == 'covertree':

            def mydistance(doc_name1, doc_name2):
                v1 = self.corpus.document(doc_name1).vector
                v2 = self.corpus.document(doc_name2).vector
                return distance(v1, v2)

            self.covertree = Covertree(mydistance, maxlevel)

            for i, doc in enumerate(self.corpus):
                tree_node = myTree(doc.name)
                self.covertree.insert(tree_node, self.covertree.ct, 0)

            self.covertree.merge_levels()
            self.centroids, self.clusters = self.covertree.clustering_from_ct(k)

    def generate_repr_ids(self, k):
        """
        For each cluster, we choose an arbitary article as the cluster's representative.

        Return the ids of the article, here the document name is the article's id.
        Google Reader is using "i=http://www.google.com/reader/api/0/stream/items/contents" to get the content of a specific data.
        Now we use the centroid to represent the documents

        """
        self._generate_clusters(k)
        return self.centroids

    def cost(self):
        cost = 0
        for i, center in enumerate(self.centroids):
            for doc in self.clusters[i]:
                cost += distance(self.corpus.document(doc).vector, self.corpus.document(center).vector)

        return cost

    def get_article_content(self, ids):
        """
        Use the ids to find the content of the articles through google web content API
        """
        url = 'http://www.google.com/reader/api/0/stream/items/contents'
        id_handle = 'tag:google.com,2005:reader/item/%s'

        contents = []
        for _id in ids:
            r = requests.post(url, data={'i': (id_handle % _id)})
            contents.append(r.json)
        return contents

    def generate_htmls(self, k, ids):
        """
        Use the ids and k to generate htmls
        """
        htmls = {}
        for i in self.get_article_content(ids):
            feed = i['items'][0]
            for content in [u'content', u'summary']:
                if content in feed:
                    title = feed['title']
                    url = feed['alternate'][0]['href']
                    htmls[title] = url
        return htmls
class HooverReader(object):
    '''
    Export everything that was saved in Google Reader as JSON objects. Keep
    as much information as possible, but especially ID (useful for
    cross-referencing), title, url, notes (probably gone) and read status.
    Each file should contain entries for just one category/tag. List of
    categories (folders in Google Reader) should be stores in categories.json.

    Script has no memory and will always fetch everything (doesn't do
    incremental updates). Script will NOT save list of feeds since those can
    be exported as OPML from Google Reader.

    If it hits rate limit, then it will pause up to half an hour before giving
    up.

    DILEMMAS:
    - Should we save feeds contents? How far?
    - Should we save categories contents? How far?
        (probably; categories can contain entries labeled with category label
         that are not otherwise tagged and hence not backed up)

    Save:
    - all tagged entries (labeled feeds; categories that don't contain feeds)
    - list of categories with feeds they contain

    Algorithm:
    - fetch a list of categories
    - fetch a list of all labels (which includes categories)
    - for every label which is not a category:
        - loadItems
        - execute loadMoreItems until items count remains same (or error)
        - dump data as JSON to file
    - for every category fetch a list of feeds it contains
    - dump the list of categories with feeds as JSON to a file
    '''
    def __init__(self, username, password):
        self.auth = ClientAuthMethod(username, password)
        self.reader = GoogleReader(self.auth)
        self.reader.makeSpecialFeeds()  # Fetches list of special feeds like starred
        self.reader.buildSubscriptionList()  # Fetches list of feeds AND categories

        self.categories = self.reader.categories
        self.feeds = self.reader.feeds[:]  # Make a copy so lib calls don't feel it with crap
        self.specialFeeds = self.reader.specialFeeds.copy()

    def __create_feed_filename(self, feed_label):
        return "{0}.json".format(feed_label)

    def get_tags(self):
        tags_json = self.reader.httpGet(
            'https://www.google.com/reader/api/0/tag/list',
            {'output': 'json'})
        tags = json.loads(tags_json)
        tags_list = tags['tags']
        self.tags = tags_list

    def load_items(self, feed):
        fetch_size = 1000
        tryagain = 0
        feed.loadItems(loadLimit=fetch_size)
        while (feed.lastLoadLength > 0 and feed.lastLoadLength == fetch_size) \
                or (tryagain > 0 and tryagain < 5):
            feed.loadMoreItems(loadLimit=fetch_size)
            if not feed.lastLoadOk:
                print "Error fetching items for feed '{0}'".format(
                    feed.title)
                pause_for = PAUSE_INTERVAL * (2 ** tryagain)
                print "Pausing for a {0} minute(s)...".format(pause_for / 60)
                # Double time to sleep on each iteration
                time.sleep(pause_for)
                tryagain += 1
            else:
                tryagain = 0
        return feed.items

    def process_item(self, item):
        values = {}
        keys = ('id', 'title', 'content', 'read', 'starred', 'shared', 'url')
        for key in keys:
            values[key] = getattr(item, key, u'')
        values['origin'] = getattr(item, 'origin', {})
        return values

    def get_feed_info(self, feed):
        feed_obj = {
            'feed_id': feed.id,
            'title': feed.title,
            'site_url': getattr(feed, "siteUrl", ""),
            'feed_url': getattr(feed, "feedUrl", ""),
            'last_updated': feed.lastUpdated,  # Unix timestamp; updated when feed is fetched
        }
        return feed_obj

    def save_to_file(self, filename, obj, subdir=None):
        save_dir = BACKUP_DIR
        if subdir:
            save_dir = join(BACKUP_DIR, subdir)
        if not os.path.exists(save_dir):
            try:
                os.makedirs(save_dir)
            except:  # Could not create it
                print 'Could not create backup directory {0}. Exiting.'.format(
                    save_dir)
                sys.exit(1)

        obj_json = toJSON(obj)
        fname = join(save_dir, filename)
        with open(fname, 'w') as f:
            f.write(obj_json)

    def save_feed(self, feed, subdir=None):
        items = []

        print 'Saving:', feed.title.encode('utf-8')
        try:
            raw_items = self.load_items(feed)
        except:
            print 'Failed. Moving on...'
            print
            return
        for item in raw_items:
            items.append(self.process_item(item))
        feed_obj = self.get_feed_info(feed)
        feed_obj['items'] = items
        feed_obj['items_count'] = len(items)
        self.save_to_file(self.__create_feed_filename(feed.title), feed_obj, subdir)

    def process_category(self, category):
        cat = {
            'id': category.id,
            'title': category.label,
        }
        cat['feeds'] = [self.get_feed_info(feed) for feed in category.feeds]
        return cat

    def save_tag(self, tag):
        cat = {
            'id': tag.id,
            'title': tag.label,
        }
        print 'Saving:', tag.label.encode('utf-8')
        cat['items'] = [self.process_item(item) for item in
                        self.load_items(tag)]
        cat['items_count'] = len(cat['items'])
        self.save_to_file(self.__create_feed_filename(cat['title']), cat, 'tags')

    def save_categories(self):
        categories = {
            'title': 'Google Reader Categories'
        }
        categories['categories'] = [self.process_category(cat) for cat in
                                    self.categories]
        if len(categories['categories']):
            self.save_to_file("categories.json", categories)
        else:
            print 'There are no categories to save.'

    def save_feed_list(self):
        feeds = {
            'title': 'Google Reader List of Feeds'
        }
        feeds_list = []
        for feed in self.feeds:
            feeds_list.append(self.get_feed_info(feed))
        feeds['feeds'] = feeds_list
        if len(feeds['feeds']):
            self.save_to_file("feeds.json", feeds)
        else:
            print 'There are no feeds to save.'

    def backup(self):
        if getattr(settings, 'SAVE_TAGS', True):
            print "Saving tags..."
            self.get_tags()
            for tag in self.tags:
                # Tag is really a category
                try:
                    label = tag['id'].rsplit('label/')[1]
                except:
                    # Special feeds (state/); skip, they are handled separately
                    continue
                ctag = Category(self.reader, label, tag['id'])
                self.save_tag(ctag)

        if getattr(settings, 'SAVE_FEEDS', False):
            print "Saving feeds..."
            for feed in self.feeds:
                self.save_feed(feed, 'feeds')

        print "Saving special feeds..."
        if getattr(settings, 'SAVE_SPECIAL_FEEDS_ALL', False):
            sf_keys = self.specialFeeds.keys()
        else:
            sf_keys = ('starred', )
        for feed_name in sf_keys:
            feed = self.specialFeeds[feed_name]
            self.save_feed(feed, 'special')

        if getattr(settings, 'SAVE_CATEGORIES', True):
            print "Saving list of feeds and categories..."
            self.save_feed_list()
            self.save_categories()
Exemple #43
0
from libgreader import GoogleReader, ClientAuthMethod

username = "******"
password = "******"

ca = ClientAuthMethod(username, password)

reader = GoogleReader(ca)

info = reader.getUserInfo()
print info

x = reader.buildSubscriptionList()
if x == "True":
    print ("Subscription list built.")
else:
    print ("Something went wrong with building the subscription list.")

print ("Printing feed objects...")
print reader.feeds

# reader.feeds is a list, so we can play around wtih individual objects
print reader.feeds[0]

# Each list item has some properties associated with it
print reader.feeds[0].id
print reader.feeds[0].title

# List comprehension for returning feed id
print [x.id for x in reader.feeds]