Example #1
0
def dataset_blocks(url):
  print "blocks: %s" % (url)
  d = Dataset( "%s:27017" % (mongodb_host) )

  page = d.read(url)

  txt = mw(page["dataset"][0]["*"])

  (blocks, structure) = txt.get_blocks()

  key = "%s/blocks" % (url)
  value = {  "structure" : structure, "blocks": blocks }

  d.write(key, value)

  return value
from  wekeypedia.parser.dataset import Dataset

mw_content = "yo"

pp = pprint.PrettyPrinter(indent=2)

if len(sys.argv) > 2:
  d = Dataset("/Users/tk/datasets/wicrimea")

  page = sys.argv[1]
  revision = sys.argv[2]

  mw_content = d.get_revision_content(page, revision) 

  #  print mw_content

txt = mw(mw_content)

#print unicode(txt.text)
#pp.pprint(txt.text.nodes)

headings = txt.get_headings()

for h in headings:
  print ("  " * (h.level - 2)) + str(h.title)

print "—"*10

blocks = txt.get_blocks()

print blocks