class Extractor(object):

    def __init__(self):
        self._readerHelper = Helper()
        self._parser = Parser()
        self._validator = Validator()

    def extract(self, fileName):
        return self.extractData(fileName)

    def extractData(self, fileName):
        content = self._readerHelper.readContentFromFile(fileName)
        return self._parser.parse(content, fileName)
class Extractor(object):

    def __init__(self):
        self._readerHelper = Helper()
        self._parser = Parser()
        self._validator = Validator()

    def extract(self, url):
        if self._validator.urlValidator(url):
            return self.extractData(url)
        print "ERROR:: Validation error!! URL: ", url
        return None

    def extractData(self, url):
        content = self._readerHelper.readContentFromUrl(url)
        return self._parser.parse(content, url)
 def __init__(self):
     self._readerHelper = Helper()
     self._parser = Parser()
     self._validator = Validator()
        #this is to extract the state code '##'
        state = cityStateText[-2:]
        return state

    def getBikeName(self, soup):
        bikeNameDiv = soup.find_all("div", class_='grid_8 margin-top10')
        bikeName = self.getStringFromSoupElement(bikeNameDiv[0].h1)
        bikeName = bikeName.replace('Used, ', '')
        return bikeName

    def unicodeToString(self, unicode):
        if unicode is None:
            return unicode
        else:
            return unicode.encode('ascii','ignore')

    def getStringFromSoupElement(self, element):
        str = self.unicodeToString(element.get_text())
        if str is not None:
            str = " ".join(str.split())
        return str

if __name__ == '__main__':
    instance = Extractor()
    daoInstance = Dao()
    readerHelper = Helper()
    for fileName in readerHelper.getAllHtmlFileNames():
        dict = instance.extract(fileName)
        if bool(dict):
            daoInstance.populateAndExecute(dict)