def tranfer_data_from_stage_to_temp(): dao = Dao() rows = dao.get_entries_from_stage() for row in rows: for key in row: row[key] = row[key].strip() if key == 'kilometer_done' or key == 'price': row[key] = int(row[key].replace(',', '')) elif key == 'model_year': row[key] = int(re.search(r'([A-Za-z]{3})(-)(.+\d)', row[key]).group(3)) elif key == 'profile_id': row[key] = int(re.search(r'(S)(.+\d)', row[key]).group(2)) dao.populateAndExecuteIntoTemp(row)
__author__ = 'aliHitawala' from DataModels.webcrawler.bikewale.DataExtractor import Extractor from DataModels.persistence.bikewale.BikeWaleDaoImpl import Dao if __name__ == '__main__': filename = 'html_pages_url' __list = [] urls = [] def readAllUrls(): global urls f = open(filename, 'r') urls = f.readlines() readAllUrls() instance = Extractor() daoInstance = Dao() for url in urls: dict = instance.extract(url) if bool(dict): print dict daoInstance.populateAndExecute(dict)