def parse(): # parse gathered data and save as csv logger.info("parse") storage = Persistor() parser = Parser() raw_data = storage.read_raw_data() parsed_files = parser.parse_object(raw_data) storage.save_csv(parsed_files)
def parse(): logger.info("parse") storage = Persistor() parser = Parser() raw_data = storage.read_raw_data(SCRAPPED_FILE) data = parser.process_rawdata(raw_data) #processing raw data parsed_files = [parser.parse_object(file) for file in data] #parsing every object storage.save_csv(parsed_files, TABLE_FORMAT_FILE) #save our data
def parse(): # parse gathered data and save as csv logger.info("parse") storage = Persistor(SCRAPPED_FILE) parser = Parser() for year in range(1903, int(datetime.datetime.now().year)): raw_data = storage.read_raw_data(year) parsed_file = parser.parse_object(raw_data) storage.append_data(parsed_file) storage.save_csv(TABLE_FORMAT_FILE)
def parse(): # parse gathered data and save as csv logger.info("parse") storage = Persistor(SCRAPPED_FILE, TABLE_FORMAT_FILE) parser = Parser() raw_data = storage.read_raw_data() parsed_file = parser.parse_object(raw_data) #parsed_files = [parser.parse_object(file) for file in raw_data] storage.save_csv(parsed_file)
def parse(): logger.info("parse") storage = Persistor(SCRAPPED_FILE) parser = Parser() raw_data = storage.read_raw_data() ind_start = raw_data.find('table class=\"wikitable sortable\"') raw_data = raw_data[ind_start:] ind_end = raw_data.find('</table>') raw_data = raw_data[:ind_end + len('</table>')] all_rows = re.findall('<tr[^^]*?</tr>', raw_data) parsed_files = [parser.parse_object(raw) for raw in all_rows] storage.save_csv(parsed_files, TABLE_FORMAT_FILE)