Ejemplo n.º 1
0
def parse():
    # parse gathered data and save as csv

    logger.info("parse")
    storage = Persistor()
    parser = Parser()

    raw_data = storage.read_raw_data()
    parsed_files = parser.parse_object(raw_data)
    storage.save_csv(parsed_files)
Ejemplo n.º 2
0
def parse():

    logger.info("parse")
    storage = Persistor()
    parser = Parser()

    raw_data = storage.read_raw_data(SCRAPPED_FILE)
    data = parser.process_rawdata(raw_data)  #processing raw data
    parsed_files = [parser.parse_object(file)
                    for file in data]  #parsing every object
    storage.save_csv(parsed_files, TABLE_FORMAT_FILE)  #save our data
Ejemplo n.º 3
0
def parse():
    # parse gathered data and save as csv

    logger.info("parse")
    storage = Persistor(SCRAPPED_FILE)
    parser = Parser()
    for year in range(1903, int(datetime.datetime.now().year)):
        raw_data = storage.read_raw_data(year)
        parsed_file = parser.parse_object(raw_data)
        storage.append_data(parsed_file)
    storage.save_csv(TABLE_FORMAT_FILE)
Ejemplo n.º 4
0
def parse():
    # parse gathered data and save as csv

    logger.info("parse")
    storage = Persistor(SCRAPPED_FILE, TABLE_FORMAT_FILE)
    parser = Parser()

    raw_data = storage.read_raw_data()
    parsed_file = parser.parse_object(raw_data)
    #parsed_files = [parser.parse_object(file) for file in raw_data]
    storage.save_csv(parsed_file)
Ejemplo n.º 5
0
def parse():

    logger.info("parse")
    storage = Persistor(SCRAPPED_FILE)
    parser = Parser()

    raw_data = storage.read_raw_data()

    ind_start = raw_data.find('table class=\"wikitable sortable\"')
    raw_data = raw_data[ind_start:]
    ind_end = raw_data.find('</table>')
    raw_data = raw_data[:ind_end + len('</table>')]

    all_rows = re.findall('<tr[^^]*?</tr>', raw_data)

    parsed_files = [parser.parse_object(raw) for raw in all_rows]
    storage.save_csv(parsed_files, TABLE_FORMAT_FILE)