예제 #1
0
class Scraper:
    def __init__(self):
        self.persistor = Persistor()

    def scrape(self):
        #Gives the text from zigzag website

        url = 'https://www.zigzag.am/am/tv-audio-video/tvs.html'
        # opening the page and taking the data
        data = uReq(url)
        page_html = data.read()
        #     response = requests.get(url)
        data.close()
        # #html parsing
        #     page_soup = BeautifulSoup(page_html, "html.parser")
        #   #  print(page_soup.body)
        #     #for each product
        #     categories = page_soup.findAll("div", {"class":"item_category"})
        #     print(category[0])

        #     for category in categories:
        #         item = category.a["item_name item_link"]
        #         title = category.findAll("a", {"class":"item_name"})
        #         name = title[0].text

        #         print("item: ", item)
        #         print ("title: ", title )
        #         print("name: ", name)

        self.persistor.save_raw_data(page_html, "tvs.html")
예제 #2
0
파일: main.py 프로젝트: Anuushik/ML_G3_2020
def parse():
    # parse gathered data and save as csv

    logger.info("parse")
    storage = Persistor()
    parser = Parser()

    raw_data = storage.read_raw_data()
    parsed_files = parser.parse_object(raw_data)
    storage.save_csv(parsed_files)
예제 #3
0
def parse():

    logger.info("parse")
    storage = Persistor()
    parser = Parser()

    raw_data = storage.read_raw_data(SCRAPPED_FILE)
    data = parser.process_rawdata(raw_data)  #processing raw data
    parsed_files = [parser.parse_object(file)
                    for file in data]  #parsing every object
    storage.save_csv(parsed_files, TABLE_FORMAT_FILE)  #save our data
예제 #4
0
def parse():
    # parse gathered data and save as csv

    logger.info("parse")
    storage = Persistor(SCRAPPED_FILE, TABLE_FORMAT_FILE)
    parser = Parser()

    raw_data = storage.read_raw_data()
    parsed_file = parser.parse_object(raw_data)
    #parsed_files = [parser.parse_object(file) for file in raw_data]
    storage.save_csv(parsed_file)
예제 #5
0
def gather():
    print('here')
    logger.info("gather")
    storage = Persistor(SCRAPPED_FILE)
    scrapper = Scraper(storage)
    for year in range(1903, int(datetime.datetime.now().year)):
        scrapper.scrape(year)
예제 #6
0
def parse():

    logger.info("parse")
    storage = Persistor(SCRAPPED_FILE)
    parser = Parser()

    raw_data = storage.read_raw_data()

    ind_start = raw_data.find('table class=\"wikitable sortable\"')
    raw_data = raw_data[ind_start:]
    ind_end = raw_data.find('</table>')
    raw_data = raw_data[:ind_end + len('</table>')]

    all_rows = re.findall('<tr[^^]*?</tr>', raw_data)

    parsed_files = [parser.parse_object(raw) for raw in all_rows]
    storage.save_csv(parsed_files, TABLE_FORMAT_FILE)
예제 #7
0
def parse():
    # parse gathered data and save as csv

    logger.info("parse")
    storage = Persistor(SCRAPPED_FILE)
    parser = Parser()
    for year in range(1903, int(datetime.datetime.now().year)):
        raw_data = storage.read_raw_data(year)
        parsed_file = parser.parse_object(raw_data)
        storage.append_data(parsed_file)
    storage.save_csv(TABLE_FORMAT_FILE)
예제 #8
0
def gather():
    logger.info("gather")
    storage = Persistor()

    scrapper = Scraper(storage)
    scrapper.scrape(SCRAPPED_FILE)
예제 #9
0
 def __init__(self):
     self.persistor = Persistor()