コード例 #1
0
				print n[1]
				millis = convertTime(n[1])
				addEvent["starting_time"] = millis

			if n[0].isdigit():
				millis = convertTime(n[0])
				addEvent["starting_time"] = millis
				 
	
			addEvent["description"] = n[2]	
			
			if addEvent["description"]!="description" and addEvent["starting_time"]!=1:
				addEvent["color"]='orange'
				print 'addingEvent'
				print addEvent
				timeline["times"].append(addEvent)

				addEvent={"color":"blue", "description":"description", "starting_time": 1}
				
	outerMost.append(timeline)
	return outerMost


if __name__ == '__main__':
	allInformation = readCsv()
	newFormation = reformat(allInformation)
	finalFormation = webToJson(newFormation)
	convertToFile('usTimeline.json',finalFormation)


コード例 #2
0
for x in soup.find_all():
    if len(x.text) == 0:
        x.extract()

for tag in invalid_tags: 
    for match in soup.findAll(tag):
        match.replaceWithChildren()

timelineBox = soup.find_all(class_ = "MsoNormal")
timeline = []
for stuff in timelineBox:
	timelineJunk=stuff.find_all("span")
	for junk in timelineJunk:
		if len(junk)>0:

			toClean = junk.contents[0]
			
			clean = unicode(toClean)
			print clean
			clean = clean.encode('ascii', 'ignore')
			timeline.append(clean)
		

print timeline	
results = webToJson(timeline)
convertToFile('timeline1.json', results)


		

コード例 #3
0
	outerMost = []


	for n in soup:
		if n.isdigit():
			millis = convertTime(n)
			addEvent["starting_time"] = millis
			
		else:
			addEvent["description"] = n	
					
		if addEvent["description"]!="description" and addEvent["starting_time"]!=1:
			
			addEvent["color"]='green'
			timeline["times"].append(addEvent)

			addEvent={"color":"blue", "description":"description", "starting_time": 1}
			
	outerMost.append(timeline)
	return outerMost

if __name__ == '__main__':
	url ="http://tgmaa.weebly.com/chronology.html"
	parsed = parsePage(url)
	converted = webToJson(parsed)
	convertToFile('timeline2.json',converted)
	
	


コード例 #4
0
    "http://library.howard.edu/content.php?pid=257155&sid=2164686")
data = r.text
soup = BeautifulSoup(data)
invalid_tags = ['b', 'i', 'u']

for x in soup.find_all():
    if len(x.text) == 0:
        x.extract()

for tag in invalid_tags:
    for match in soup.findAll(tag):
        match.replaceWithChildren()

timelineBox = soup.find_all(class_="MsoNormal")
timeline = []
for stuff in timelineBox:
    timelineJunk = stuff.find_all("span")
    for junk in timelineJunk:
        if len(junk) > 0:

            toClean = junk.contents[0]

            clean = unicode(toClean)
            print clean
            clean = clean.encode('ascii', 'ignore')
            timeline.append(clean)

print timeline
results = webToJson(timeline)
convertToFile('timeline1.json', results)