예제 #1
0
def gather():
    print('here')
    logger.info("gather")
    storage = Persistor(SCRAPPED_FILE)
    scrapper = Scraper(storage)
    for year in range(1903, int(datetime.datetime.now().year)):
        scrapper.scrape(year)
예제 #2
0
파일: main.py 프로젝트: Anuushik/ML_G3_2020
def parse():
    # parse gathered data and save as csv

    logger.info("parse")
    storage = Persistor()
    parser = Parser()

    raw_data = storage.read_raw_data()
    parsed_files = parser.parse_object(raw_data)
    storage.save_csv(parsed_files)
예제 #3
0
def parse():

    logger.info("parse")
    storage = Persistor()
    parser = Parser()

    raw_data = storage.read_raw_data(SCRAPPED_FILE)
    data = parser.process_rawdata(raw_data)  #processing raw data
    parsed_files = [parser.parse_object(file)
                    for file in data]  #parsing every object
    storage.save_csv(parsed_files, TABLE_FORMAT_FILE)  #save our data
예제 #4
0
def parse():
    # parse gathered data and save as csv

    logger.info("parse")
    storage = Persistor(SCRAPPED_FILE)
    parser = Parser()
    for year in range(1903, int(datetime.datetime.now().year)):
        raw_data = storage.read_raw_data(year)
        parsed_file = parser.parse_object(raw_data)
        storage.append_data(parsed_file)
    storage.save_csv(TABLE_FORMAT_FILE)
예제 #5
0
def parse():
    # parse gathered data and save as csv

    logger.info("parse")
    storage = Persistor(SCRAPPED_FILE, TABLE_FORMAT_FILE)
    parser = Parser()

    raw_data = storage.read_raw_data()
    parsed_file = parser.parse_object(raw_data)
    #parsed_files = [parser.parse_object(file) for file in raw_data]
    storage.save_csv(parsed_file)
예제 #6
0
def parse():

    logger.info("parse")
    storage = Persistor(SCRAPPED_FILE)
    parser = Parser()

    raw_data = storage.read_raw_data()

    ind_start = raw_data.find('table class=\"wikitable sortable\"')
    raw_data = raw_data[ind_start:]
    ind_end = raw_data.find('</table>')
    raw_data = raw_data[:ind_end + len('</table>')]

    all_rows = re.findall('<tr[^^]*?</tr>', raw_data)

    parsed_files = [parser.parse_object(raw) for raw in all_rows]
    storage.save_csv(parsed_files, TABLE_FORMAT_FILE)
예제 #7
0
def gather():
    logger.info("gather")
    storage = Persistor()

    scrapper = Scraper(storage)
    scrapper.scrape(SCRAPPED_FILE)
예제 #8
0
 def __init__(self):
     self.persistor = Persistor()