'wordpress': process_wordpress_post,
	'typepad': process_typepad_post
#	livejournal': process_livejournal_post,
#	'newsvine': process_newsvine_post
}


# API:
#	def process_[blog_type]_blog( file_path )
#		#Find html files for each of the blog posts within a blog of type blog_type stored at file_path
#		#Return a list of filenames
#
#	def process_[blog_type]_post( file_name )
#		#Parse an html blog post into xml
#		#Return xml for the blog post
#
# 	decided to put the directory syntax (/) in the input rather than the actual code
#

if __name__=="__main__":
	#Testing scripts go here:

	#files = process_newsvine_blog( '/scratch/unmirrored5/agong/blog_panel_crawl/' + 'vanessa-wilson73.newsvine.com/')
	#print len( files )
	#print files[:5]

	profiler3.testBlogParser( '/scratch/unmirrored5/agong/blog_panel_crawl/' + 'witchdoctorrepellent.blogspot.com/2005/12/no-tears-for-monster.html/'+ 
 'vanessa-wilson73.newsvine.com/', "blogger", sample=20 )


コード例 #2
0
	'blogger':	process_blogger_post,
	'wordpress': process_wordpress_post,
	'typepad': process_typepad_post,
	'livejournal': process_livejournal_post,
	'newsvine': process_newsvine_post
}


# API:
#	def process_[blog_type]_blog( file_path )
#		#Find html files for each of the blog posts within a blog of type blog_type stored at file_path
#		#Return a list of filenames
#
#	def process_[blog_type]_post( file_name )
#		#Parse an html blog post into xml
#		#Return xml for the blog post
#
# 	decided to put the directory syntax (/) in the input rather than the actual code
#

if __name__=="__main__":
	#Testing scripts go here:

	#files = process_newsvine_blog( '/scratch/unmirrored5/agong/blog_panel_crawl/' + 'vanessa-wilson73.newsvine.com/')
	#print len( files )
	#print files[:5]

	profiler3.testBlogParser( 'carloz.newsvine.com', "newsvine", sample=20 )