class Extractor(object): def __init__(self): self._readerHelper = Helper() self._parser = Parser() self._validator = Validator() def extract(self, fileName): return self.extractData(fileName) def extractData(self, fileName): content = self._readerHelper.readContentFromFile(fileName) return self._parser.parse(content, fileName)
class Extractor(object): def __init__(self): self._readerHelper = Helper() self._parser = Parser() self._validator = Validator() def extract(self, url): if self._validator.urlValidator(url): return self.extractData(url) print "ERROR:: Validation error!! URL: ", url return None def extractData(self, url): content = self._readerHelper.readContentFromUrl(url) return self._parser.parse(content, url)
def __init__(self): self._readerHelper = Helper() self._parser = Parser() self._validator = Validator()
#this is to extract the state code '##' state = cityStateText[-2:] return state def getBikeName(self, soup): bikeNameDiv = soup.find_all("div", class_='grid_8 margin-top10') bikeName = self.getStringFromSoupElement(bikeNameDiv[0].h1) bikeName = bikeName.replace('Used, ', '') return bikeName def unicodeToString(self, unicode): if unicode is None: return unicode else: return unicode.encode('ascii','ignore') def getStringFromSoupElement(self, element): str = self.unicodeToString(element.get_text()) if str is not None: str = " ".join(str.split()) return str if __name__ == '__main__': instance = Extractor() daoInstance = Dao() readerHelper = Helper() for fileName in readerHelper.getAllHtmlFileNames(): dict = instance.extract(fileName) if bool(dict): daoInstance.populateAndExecute(dict)