def assimilateNewUrls(self, urls):
        """adds urls to the UnvisitedURLs table if we don't have information on them
           in any of our tables"""
           
        # First, 'clean' the new urls - remove duplicates, and trailing slashes, etc.
        urls = HttpLib.cleanLinks(urls)
           
        print "\nAssimilating %s new urls\n" % len(urls)
        if len(urls) <= 0:
            return
            
        s = Session()
        
        
        # For each URL, 
        for url in urls:
            
            # Query the Database & find out if URL already in it
            tables_to_query = [Blog, Post, UnvisitedURLs]
            to_save = True
            for table in tables_to_query:
				# Only care about entries that are fully processed - if they're not, we'll need to scrape them!
				entries = s.query(table).filter_by(url=url,processed=True).first()
			
                # If we found entries (!= None), don't add it to our 'unvisited urls' table
                if entries != None:
                    to_save = False
                    break
                
            if not to_save:
                continue
            
            # If didn't break by this point, then we should add the url to the
            # 'To save' session forthe UnvisitedURLs table
            #print "adding %s to be saved to the unvisited urls table" % url
            u = UnvisitedURLs(url=url)
            u.url_type = self.getUrlType(url)
            s.add(u)
        
            
        print "committing all unsaved urls to the database"
        s.commit()
        print "%s urls now in unvisitedURLs DB\n" % s.query(func.count(UnvisitedURLs))
        urls = []
        self.unvisited_urls = []         
 def getBlogPosts(self, content, blogname):
     """ Returns 'Post' objects with only their 'url' attribute filled out
         Gets all of the posts on a page, but only the ones that link to the
         'mother blog' - not to others. Essentially a page's internal links 
         
         Args:
             blog: the title of the blog (no url), ie. 'naivemelody' """
     soup = BeautifulSoup(content)
     post_urls = soup.findAll('post')
     
     posts = [] # a list of 'Post' objects that we'll insert into the DB
     
     for post in post_urls:
         # Only save posts that belong to the 'mother blog'
         url = post["url"]
         url = HttpLib.cleanLinks([url])[0]
         if self.extractBlogName(url) == blogname:
             #posts.append(Post(url=url))
             posts.append(url)
 
     #print "found posts for %s!" % blogname
     #print posts
     return posts