def get_cached_entries(): blogger_service = service.GDataService() blogger_service.service = 'blogger' blogger_service.server = 'www.blogger.com' blogger_service.ssl = False query = service.Query() query.feed = '/feeds/6752139154038265086/posts/default' query.max_results = 500 bri_entries = [] entries = [] i = 0 while 1: query.start_index = i * 500 + 1 feed = blogger_service.Get(query.ToUri()) logging.info('%d entries fetched, fetch number %d' % (len(feed.entry), i + 1)) entries.extend(feed.entry) if len(feed.entry) == 500: i += 1 else: break logging.info('retrieved %d entries total' % len(entries)) cachedentries = tuple(format_entry(e) for e in entries) return cachedentries
def updateLabels(self, labels, newLabels): query = service.Query() query.feed = self.postsUri query.max_results = 10000 if labels: query.categories = labels feed = self.gdService.Get(query.ToUri()) newCategory = [] if newLabels: for label in newLabels: category = atom.Category(scheme=blogger.LABEL_SCHEME, term=label) newCategory.append(category) for entry in feed.entry: while len(entry.category) > 0: entry.category.pop() entry.category = newCategory editUri = entry.GetEditLink().href self.gdService.Put(entry, editUri) return newCategory
def handle_import(self, options): """ Gets posts from Forumger. """ forum_id = options.get("forum_id") if forum_id is None: raise CommandError("Usage is import_forumger %s" % self.args) try: from gdata import service except ImportError: raise CommandError("Could not import the gdata library.") forumger = service.GDataService() forumger.service = "forumger" forumger.server = "www.forumger.com" query = service.Query() query.feed = "/feeds/%s/posts/full" % forum_id query.max_results = 500 try: feed = forumger.Get(query.ToUri()) except service.RequestError, err: message = "There was a service error. The response was: " \ "%(status)s %(reason)s - %(body)s" % err.message raise CommandError(message, forumger.server + query.feed, err.message["status"])
def get_feeds(self, page_id, page_items): query = service.Query() query.feed = 'http://www.blogger.com/feeds/' + self.blog_id + '/posts/default' query.max_results = page_items query.start_index = ((page_id - 1) * page_items) + 1 feed = self.blogger_service.Get(query.ToUri()) feeds = [] for entry in feed.entry: feeds.append({ 'id': self.get_post_id(entry), 'title': entry.title.text, 'link': self.get_post_link(entry), 'published': parser.parse(entry.published.text), 'updated': entry.updated.text, 'author': entry.author[0].name.text, 'content': entry.content.text, 'comments': self.get_comments(entry) }) blog = { 'page_id': page_id, 'post_id': None, 'posts': feeds, 'next_link': self.get_next_link(page_id, feed), 'prev_link': self.get_prev_link(page_id, feed), 'singlepost': False } return blog
def PrintPostsInDateRange(self, start_time, end_time): """This method displays the title and modification time for any posts that have been created or updated in the period between the start_time and end_time parameters. The method creates the query, submits it to the GDataService, and then displays the results. Note that while the start_time is inclusive, the end_time is exclusive, so specifying an end_time of '2007-07-01' will include those posts up until 2007-6-30 11:59:59PM. The start_time specifies the beginning of the search period (inclusive), while end_time specifies the end of the search period (exclusive). """ # Create query and submit a request. query = service.Query() query.feed = '/feeds/' + self.blog_id + '/posts/default' query.updated_min = start_time query.updated_max = end_time query.orderby = 'updated' feed = self.service.Get(query.ToUri()) # Print the results. print feed.title.text + " posts between " + start_time + " and " + end_time print feed.title.text for entry in feed.entry: if not entry.title.text: print "\tNo Title" else: print "\t" + entry.title.text print
def PrintAllPosts(blogger_service, blog_id, max_results='99999'): query = service.Query() query.feed = '/feeds/' + blog_id + '/posts/default' query.max_results = max_results feed = blogger_service.Get(query.ToUri()) if os.path.exists(os.getcwd() + "/" + feed.title.text + " Backup"): shutil.rmtree(os.getcwd() + "/" + feed.title.text + " Backup") os.mkdir(os.getcwd() + "/" + feed.title.text + " Backup") os.makedirs(os.getcwd() + "/" + feed.title.text + " Backup" + "/images/") for entry in feed.entry: print "Parsing images in " + entry.title.text os.makedirs(os.getcwd() + "/" + feed.title.text + " Backup" + "/images/" + entry.title.text) html = BeautifulSoup(entry.content.text) images = html.findAll('img') for i in range(len(images)): print "Downloaded " + str(i) + " out of " + str(len(images)) urllib.urlretrieve( images[i]["src"], os.getcwd() + "/" + feed.title.text + " Backup" + "/images/" + entry.title.text + "/" + images[i]["src"].split('/')[-1])
def get_hrefs(): blogger_service = service.GDataService() blogger_service.service = 'blogger' blogger_service.server = 'www.blogger.com' blogger_service.ssl = False query = service.Query() query.feed = '/feeds/6752139154038265086/posts/default' query.max_results = 500 allhrefs = [] i = 0 while 1: query.start_index = i * 500 + 1 feed = blogger_service.Get(query.ToUri()) logging.info('%d urls fetched, fetch number %d' % (len(feed.entry), i + 1)) allhrefs.extend(entry.link[-1].href for entry in feed.entry) if len(feed.entry) == 500: i += 1 else: break logging.info('retrieved %d urls total' % len(allhrefs)) return allhrefs
def get_post(self, post_id): query = service.Query() query.feed = 'http://www.blogger.com/feeds/' + self.blog_id + '/posts/default/' + post_id entry = self.blogger_service.Get(query.ToUri()) feeds = [] feeds.append({ 'id': entry.id, 'title': entry.title.text, 'link': self.get_post_link(entry), 'published': parser.parse(entry.published.text), 'updated': entry.updated.text, 'author': entry.author[0].name.text, 'content': entry.content.text, 'comments': self.get_comments(entry) }) blog = { 'page_id': -1, 'post_id': post_id, 'posts': feeds, 'next_link': None, 'prev_link': None, 'singlepost': True } return blog
def getIndividualPost(self, blogID, postID): # Create query and submit a request. query = service.Query() query.feed = '/feeds/' + blogID + '/posts/default/' + postID feed = self.service.Get(query.ToUri()) return feed
def PrintUserBlogTitles(blogger_service): query = service.Query() query.feed = '/feeds/default/blogs' feed = blogger_service.Get(query.ToUri()) print feed.title.text for entry in feed.entry: print "\t" + entry.title.text, "blog_id:", entry.GetSelfLink( ).href.split("/")[-1]
def PrintUserBlogTitles(self): query = service.Query() query.feed = '/feeds/default/blogs' feed = self.service.Get(query.ToUri()) # Print the results. print feed.title.text for entry in feed.entry: print "\t" + entry.title.text
def GetBlogByTitle(self, title): query = service.Query() query.feed = '/feeds/default/blogs' feed = self.blogger_service.Get(query.ToUri()) for entry in feed.entry: if entry.title.text == title: self.blog_id = entry.GetSelfLink().href.split("/")[-1] return entry print("Can't find blog with title : {0}".format(title)) sys.exit(0)
def PrintAllPosts(blogger_service, blog_id, max_results='99999'): ''' Grab blogger url feed and get ready for backup''' query = service.Query() query.feed = '/feeds/' + blog_id + '/posts/default' query.max_results = max_results feed = blogger_service.Get(query.ToUri()) #DownloadImages(feed) DownloadArticles(feed)
def PrintUserBlogTitles(self): """Prints a list of all the user's blogs.""" # Request the feed. query = service.Query() query.feed = '/feeds/default/blogs' feed = self.service.Get(query.ToUri()) # Print the results. print feed.title.text for entry in feed.entry: print "\t" + entry.title.text print
def updatePost(self, user, password, startTime, endTime, newContent): self.blogerLogin(user, password) self.getOneBlog(0) # find the update entry. query = service.Query() query.feed = '/feeds/' + self.blogId + '/posts/default' query.published_min = startTime query.published_max = endTime feed = self.service.Get(query.ToUri()) # should only have one entry. anyway we will only get the first entry. theEntry = feed.entry[0] theEntry.content = atom.Content(content_type='html', text=newContent) self.gdService.Put(theEntry, theEntry.GetEditLink().href)
def getListPost(self, idBlog, iMaxNumPost, allDraft=None): # Si allDraft==True devolverá sólo los borradores, si es False caso las entradas que no sean borradores, en otro caso devuelve todo. query = service.Query() #Creamos una query. query.feed = '/feeds/' + idBlog + '/posts/default' query.max_results = iMaxNumPost feed = self.service.GetFeed( query.ToUri()) # Lanzo la query a la petición. #feed = self.service.Get('/feeds/' + idBlog + '/posts/default') misEntradas = [] if allDraft != None: for post in feed.entry: if allDraft and blogger.is_draft(post): misEntradas = misEntradas + [post] elif (not allDraft) and (not blogger.is_draft(post)): misEntradas = misEntradas + [post] else: for post in feed.entry: misEntradas = misEntradas + [post] return misEntradas
def run(self): blogs = [] query = service.Query() query.feed = '/feeds/default/blogs' feed = self.service.Get(query.ToUri()) for entry in feed.entry: blog_dict = { 'id': entry.id.text, 'title': entry.title.text, 'updated': entry.updated.text, } for link in entry.link: rel = link.rel.split('#')[-1] href = link.href blog_dict[rel] = href if rel == 'self': blog_id = href.split('/')[-1] blog_dict['blog_id'] = blog_id blogs.append(blog_dict) self.queue.put_nowait(blogs)
def PrintPostsInLastWeek(self): """This method displays the title and modification time for any posts that have been created or updated in the period between the start_time and end_time parameters. The method creates the query, submits it to the GDataService, and then displays the results. Note that while the start_time is inclusive, the end_time is exclusive, so specifying an end_time of '2007-07-01' will include those posts up until 2007-6-30 11:59:59PM. The start_time specifies the beginning of the search period (inclusive), while end_time specifies the end of the search period (exclusive). """ # Create query and submit a request. query = service.Query() query.feed = '/feeds/' + self.blog_id + '/posts/default' end_time = datetime.strftime(datetime.now(), "%Y-%m-%d") start_time = datetime.strftime(datetime.now() + timedelta( days = -7 ), "%Y-%m-%d") print start_time , end_time query.updated_min = start_time query.updated_max = end_time query.orderby = 'updated' feed = self.service.Get(query.ToUri()) try: import simplejson as json except: import json out = [ {'selected' : False , 'updated_text' : entry.updated.text or "" , 'updated_summary' : entry.summary.text or "" , 'article_body' : entry.content.text or "" ,'article_title' : entry.article.text or "" } for entry in feed.entry ] print json.dumps(out) html = html + template.render (templatepath + 'newsletter_start.html', {}) html = html + template.render (templatepath + 'newsletter_js.html', {}) html = html + """<table width="500" class='out' border="1" data-dynamic="%s">""" % json.dumps(your_generated_data_dict) f = open( 'c:/xampp/htdocs/newsletter.php' , 'w') f.write(html) f.close()
def GetPostByTitle(self, title): ''' Fetch a single post which matches the title most. If "all" or "recent" are given then fetches all or recents posts. ''' posts = list() if title != "all": feed = self.blogger_service.GetFeed('/feeds/' + self.blog_id + '/posts/default') for entry in feed.entry: if entry.title.text: if title != "recent": match = difflib.SequenceMatcher( None, entry.title.text, title).ratio() if match > 0.7: print(" |- Found with title : {0} ".format( entry.title.text)) posts.append(entry) return posts else: pass else: # We want all recent posts posts.append(entry) else: pass # Titleless post print("== Total {0} posts fetched . ".format(len(posts))) return posts else: # fetch all query = service.Query() query.feed = '/feeds/' + self.blog_id + '/posts/default' query.published_min = '1980-01-01' query.published_max = time.strftime('%Y-%m-%d') feed = self.blogger_service.Get(query.ToUri()) print(feed.title.text + " posts between " + query.published_min + " and " \ + query.published_max) for entry in feed.entry: if entry.title.text: posts.append(entry) return posts
def getPosts(self, labels=None, publishedDate=None, orderby=None, maxResults=25): posts = [] query = service.Query() query.feed = self.postsUri # default sort option is updated. query.orderby = 'updated' if orderby: query.orderby = orderby # adding labels for the query. if labels: query.categories = labels # adding publication range for the query. if publishedDate: # the time format should be like this: # 2008-02-09T08:00:00-08:00 query.published_min = publishedDate[0] query.published_max = publishedDate[1] query.max_results = maxResults feed = self.gdService.Get(query.ToUri()) for entry in feed.entry: posts.append((entry.title.text, entry.GetSelfLink().href, entry.GetAlternateLink().href)) return posts
def __init__(self, email, password): # Authenticate using ClientLogin. self.service = service.GDataService(email, password) self.service.source = 'Blogger_Python_Sample-1.0' self.service.service = 'blogger' self.service.server = 'www.blogger.com' self.service.ProgrammaticLogin() self.blog_id = 0 # Get the blog ID for http://pythonjobs.blogspot.com query = service.Query() query.feed = '/feeds/default/blogs' feed = self.service.Get(query.ToUri()) for entry in feed.entry: print "\t" + entry.title.text print entry.link[0].href # if entry.link[0].href=='http://pythonjobs.blogspot.com/': if entry.link[ 0].href == 'http://www.blogger.com/feeds/18362312542208032325/blogs/5503040385101187323': self_link = entry.GetSelfLink() self.blog_id = self_link.href.split('/')[-1] break
def handle_import(self, options): """ Gets posts from Blogger. """ blog_id = options.get("blog_id") if blog_id is None: raise CommandError("Usage is import_blogger %s" % self.args) try: from gdata import service except ImportError: raise CommandError("Could not import the gdata library.") blogger = service.GDataService() blogger.service = "blogger" blogger.server = "www.blogger.com" start_index = 1 processed_posts = [] new_posts = 1 while new_posts: new_posts = 0 query = service.Query() query.feed = "/feeds/%s/posts/full" % blog_id query.max_results = 500 query.start_index = start_index try: feed = blogger.Get(query.ToUri()) except service.RequestError as err: message = ("There was a service error. The response was: " "%(status)s %(reason)s - %(body)s" % err.message) raise CommandError(message, blogger.server + query.feed, err.message["status"]) for (i, entry) in enumerate(feed.entry): # this basically gets the unique post ID from the URL to itself # and pulls the ID off the end. post_id = entry.GetSelfLink().href.split("/")[-1] # Skip duplicate posts. Important for the last query. if post_id in processed_posts: continue title = entry.title.text content = entry.content.text # this strips off the time zone info off the end as we want UTC clean_date = entry.published.text[:re.search( r"\.\d{3}", entry.published.text).end()] published_date = self.parse_datetime(clean_date) # TODO - issues with content not generating correct <P> tags tags = [tag.term for tag in entry.category] post = self.add_post(title=title, content=content, pub_date=published_date, tags=tags) # get the comments from the post feed and then add them to # the post details comment_url = "/feeds/%s/%s/comments/full?max-results=1000" comments = blogger.Get(comment_url % (blog_id, post_id)) for comment in comments.entry: email = comment.author[0].email.text author_name = comment.author[0].name.text # Strip off the time zone info off the end as we want UTC clean_date = comment.published.text[:re.search( r"\.\d{3}", comment.published.text).end()] comment_date = self.parse_datetime(clean_date) website = "" if comment.author[0].uri: website = comment.author[0].uri.text body = comment.content.text # add the comment as a dict to the end of the comments list self.add_comment( post=post, name=author_name, email=email, body=body, website=website, pub_date=comment_date, ) processed_posts.append(post_id) new_posts += 1 start_index += 500
def PrintPostsInLastWeek(self): """This method displays the title and modification time for any posts that have been created or updated in the period between the start_time and end_time parameters. The method creates the query, submits it to the GDataService, and then displays the results. Note that while the start_time is inclusive, the end_time is exclusive, so specifying an end_time of '2007-07-01' will include those posts up until 2007-6-30 11:59:59PM. The start_time specifies the beginning of the search period (inclusive), while end_time specifies the end of the search period (exclusive). """ # Create query and submit a request. query = service.Query() query.feed = '/feeds/' + self.blog_id + '/posts/default' end_time = datetime.strftime(datetime.now(), "%Y-%m-%d") start_time = datetime.strftime(datetime.now() + timedelta( days = -7 ), "%Y-%m-%d") print start_time , end_time query.updated_min = start_time query.updated_max = end_time query.orderby = 'updated' feed = self.service.Get(query.ToUri()) fd = open( 'c:/xampp/htdocs/template.html' , 'r') content = fd.read() fContent ="" fd.close() rowContent ="" count =0 for entry in feed.entry: count+=1 try : article_update =entry.updated.text except: article_update = "" try : article_summary =entry.summary.text except: article_summary = "" try : article_body = entry.content.text except: article_body = "" try : article_title = entry.title.text except: article_title = "" rowContent += """<table> <tr> <td><input type="checkbox" name="articleList[%d]['selOpt']" value="on" /></td> <td><input type="text" name="<?php echo "articleList[%d]['date']"; ?>" value="%s" /></td> <td><input type="text" name="<?php echo "articleList[%d]['title']"; ?>" value="%s" /></td> <td><input type="text" name="<?php echo "articleList[%d]['body']"; ?>" value="%s" /></td> </tr> </table>"""%( count , count , article_update , count , article_title , count , article_body ) fContent += content % ( rowContent , contentPhp) print fContent f = open( 'c:/xampp/htdocs/newsletter.php' , 'w') f.write(fContent) f.close()
# login... selfservice = service.GDataService('user', 'password') selfservice.source = 'Blogger_Python_Sample-1.0' selfservice.service = 'blogger' selfservice.server = 'www.blogger.com' selfservice.ProgrammaticLogin() # Get the blog ID for the first blog. feed = selfservice.Get('/feeds/default/blogs') self_link = feed.entry[0].GetSelfLink() if self_link: selfblog_id = self_link.href.split('/')[-1] # find the update entry. query = service.Query() query.feed = '/feeds/' + selfblog_id + '/posts/default' query.published_min = "2008-02-17T14:00:00-08:00" query.published_max = "2008-02-18T00:00:00-08:00" #query.orderby = 'published' feed = selfservice.Get(query.ToUri()) print query.ToUri() print feed.title.text print feed.entry for entry in feed.entry: print '\t' + entry.title.text # should have only one entry. theEntry = feed.entry[0] # update the post on server side.
def handle_import(self, options): """ Gets posts from Blogger. """ blog_id = options.get("blog_id") if blog_id is None: raise CommandError("Usage is import_blogger %s" % self.args) try: from gdata import service except ImportError: raise CommandError("Could not import the gdata library.") blogger = service.GDataService() blogger.service = "blogger" blogger.server = "www.blogger.com" query = service.Query() query.feed = "/feeds/%s/posts/full" % blog_id query.max_results = 500 try: feed = blogger.Get(query.ToUri()) except service.RequestError as err: message = "There was a service error. The response was: " \ "%(status)s %(reason)s - %(body)s" % err.message raise CommandError(message, blogger.server + query.feed, err.message["status"]) for (i, entry) in enumerate(feed.entry): # this basically gets the unique post ID from the URL to itself # and pulls the ID off the end. post_id = entry.GetSelfLink().href.split("/")[-1] title = entry.title.text content = entry.content.text #this strips off the time zone info off the end as we want UTC published_date = datetime.strptime( entry.published.text[:-6], "%Y-%m-%dT%H:%M:%S.%f") - timedelta(seconds=timezone) #TODO - issues with content not generating correct <P> tags tags = [tag.term for tag in entry.category] post = self.add_post(title=title, content=content, pub_date=published_date, tags=tags) # get the comments from the post feed and then add them to # the post details ids = (blog_id, post_id) comment_url = "/feeds/%s/%s/comments/full?max-results=1000" % ids comments = blogger.Get(comment_url) for comment in comments.entry: email = comment.author[0].email.text author_name = comment.author[0].name.text #this strips off the time zone info off the end as we want UTC comment_date = datetime.strptime( comment.published.text[:-6], "%Y-%m-%dT%H:%M:%S.%f") - timedelta(seconds=timezone) website = "" if comment.author[0].uri: website = comment.author[0].uri.text body = comment.content.text # add the comment as a dict to the end of the comments list self.add_comment(post=post, name=author_name, email=email, body=body, website=website, pub_date=comment_date)