def correctFileName(max): for n in range(57,max+1): file = open(str(n)+'.html','r') soup = BeautifulSoup(file) soup = BeautifulSoup(soup.find('h2').prettify()) string = soup.find('h2').get_text() string = string.strip().title() stringList = string.rsplit(" ") print stringList pre = "Volume "+str(n)+", " string = pre+stringList[0]+" "+roman_int.roman_to_english(stringList[1]) print string os.rename(str(n)+'.html',string+'.html') file.close()
file.close() def strartHTML(style): return '<html>\n<head>\n'+str(style[0])+'\n</head>\n<body>\n' def endHTML(): return '</body>\n</html>' def correctFileName(max): for n in range(57,max+1): file = open(str(n)+'.html','r') soup = BeautifulSoup(file) soup = BeautifulSoup(soup.find('h2').prettify()) string = soup.find('h2').get_text() string = string.strip().title() stringList = string.rsplit(" ") print stringList pre = "Volume "+str(n)+", " string = pre+stringList[0]+" "+roman_int.roman_to_english(stringList[1]) print string os.rename(str(n)+'.html',string+'.html') file.close() if __name__ == '__main__': print roman_int.roman_to_english('X') correctFileName(57) #breakHTMLDocument('Emma, by Jane Austen.html','<a name')