예제 #1
0
class Part(object):

        def __init__(self, url):
                self.url = url
                self.partPage = requests.get(url)
                self.html = BeautifulSoup(self.partPage.text, "lxml")
                
                self.standard_number = self.find_standard_number() 
                self.mainIndex = self.html.find("div", class_="main")           
                
                self.setupPart()
                
                #print(self.standard_number)
                	
        def table_of_contents(self):
                return self.table_of_contents;
                
        def subparts(self):
                return self.subpartArray;
          
        def setupPart(self):
                lineItems = self.mainIndex.find('ul').find_all('li')
                        
                for li in lineItems:
                        if self.find_toc(li):
                                print('found toc')
                                continue
                        elif self.find_subparts(li):
                                print('foundsubpart')
                                continue
                        else:
                                print('foundsubpart def')
                                self.find_subpartdefinition(li)
       
        def find_standard_number(self):
                return self.html.find("div", class_="blackTen").find('strong').text;
                      
        def find_toc(self, li):
                if 'Table' in li.find_all('a')[1]['title']:
                        self.table_of_contents = ToC(li.find_all('a')[1]['href'])
                        return True                
                return False
                        
        def find_subparts(self, li):
              self.subparts = []
              if 'Subpart' in li.find_all('a')[0]['name']:
                        self.subpart = SubPart(li.find_all('a')[1]['href'])
                        self.subparts.append(self.subpart)
                        return True
              return False
                              
        def find_subpartdefinition(self, li):
              for li in self.mainIndex.find('ul').find_all('li'):
                      self.subpart.append_subpartdef(li.find_all('a')[1]['href'])
예제 #2
0
 def find_subparts(self, li):
       self.subparts = []
       if 'Subpart' in li.find_all('a')[0]['name']:
                 self.subpart = SubPart(li.find_all('a')[1]['href'])
                 self.subparts.append(self.subpart)
                 return True
       return False