Python insert_data 예제들, dbtools.insert_data Python 예제들

예제 #1

0

파일 보기

파일: scrap_sources.py 프로젝트: DmitryKurbakov/HackathonAggregatorDataParsing

    def scrap_source4(self):

        self.driver.get('https://it-events.com/hackathons?type=upcoming')

        html = self.driver.page_source
        soup = BeautifulSoup(html, 'html.parser')

        rows = list(
            soup.select(
                "body > div.container > div:nth-of-type(2) > div.col-10 > section > div"
            ))

        print("got rows")

        titles = []
        locations = []
        preview = []
        # descriptions = []
        time = []
        refs = []
        source = "https://it-events.com"

        for row in rows:
            temp_title = row.find("a", {"class": "event-list-item__title"})
            temp_location = row.find("div", {
                "class":
                "event-list-item__info event-list-item__info_location"
            })
            temp_preview = ""
            temp_time = row.find("div", {"class": "event-list-item__info"})

            temp_ref = source + "/events" + row.find(
                "a", href=True)['href'].encode('ascii',
                                               'ignore').decode('utf-8')

            if hasattr(temp_title, "text"):
                titles.append(temp_title.text.strip())
            else:
                titles.append("")

            if hasattr(temp_location, "text"):
                locations.append(temp_location.text.strip())
            else:
                locations.append("")

            if hasattr(temp_preview, "text"):
                preview.append(temp_preview.text.strip().encode(
                    'ascii', 'ignore'))
            else:
                preview.append("")

            if hasattr(temp_time, "text"):
                time.append(
                    helpers.format_date_source_4(temp_time.text.strip()))
            else:
                time.append("")

            refs.append(temp_ref)

        print("got data")

        i = 0
        data = [None] * len(titles)

        while i < len(data):
            data[i] = Object()
            data[i].title = titles[i]
            data[i].location = locations[i]
            data[i].preview = preview[i]
            data[i].time = time[i]
            data[i].ref = refs[i]
            data[i].area = ""
            data[i].source = source

            i = i + 1

        dbtools.insert_data(data)

        print("transferred data to database handler")

예제 #2

0

파일 보기

파일: scrap_sources.py 프로젝트: DmitryKurbakov/HackathonAggregatorDataParsing

    def scrap_source3(self):
        self.driver.get('https://rb.ru/list/hack-spring/')

        html = self.driver.page_source
        soup = BeautifulSoup(html, 'html.parser')
        te = soup.select('#post-text')
        temp_rows = list(te[0].contents)
        rows = []

        for row in temp_rows:
            if temp_rows.index(row) + 2 < temp_rows.__len__():
                if row.name == 'h4' and temp_rows[
                        temp_rows.index(row) +
                        2].name == 'p' and row.text != '':
                    rows.append(row)
                    rows.append(temp_rows[temp_rows.index(row) + 2])

            #rows = list(soup.select("#container > div > div > div > div.results > div.challenge-results > div"))

        print("got rows")

        titles = []
        locations = []
        preview = []
        # descriptions = []
        time = []
        refs = []
        source = "https://rb.ru/list/hack-spring/"

        i = 0
        while i < len(rows) - 1:
            temp_title = rows[i].text
            temp_ref = rows[i].find("a", href=True)['href']

            titles.append(temp_title.strip())
            refs.append(temp_ref)
            i += 2

        i = 1
        while i < len(rows):
            strong_text = ''
            strong_texts = rows[i].find_all('strong')
            for s in strong_texts:
                if s.text != '':
                    strong_text = s.text

            date_and_location = helpers.format_date_and_location_source3(
                strong_text)
            temp_time = date_and_location['date']
            temp_location = date_and_location['location']
            temp_preview = rows[i].text

            time.append(temp_time)
            locations.append(temp_location)
            preview.append(temp_preview)
            i += 2

        print("got data")

        i = 0
        data = [None] * len(titles)

        while i < len(data):
            data[i] = Object()
            data[i].title = titles[i]
            data[i].location = locations[i]
            data[i].preview = preview[i]
            # data[i].description = descriptions[i].encode('ascii', 'ignore')
            data[i].time = time[i]
            data[i].ref = str(refs[i])
            data[i].area = ""
            data[i].source = source

            i = i + 1

        dbtools.insert_data(data)

        print("transferred data to database handler")

예제 #3

0

파일 보기

파일: scrap_sources.py 프로젝트: DmitryKurbakov/HackathonAggregatorDataParsing

    def scrap_source2(self):
        self.driver.get('https://hackevents.co/hackathons')

        html = self.driver.page_source
        soup = BeautifulSoup(html, 'html.parser')
        rows = []
        #rows = list(soup.select('#main > div.hackathons > div > div'))

        while True:
            temp_rows = list(soup.select('#main > div.hackathons > div > div'))
            for x in temp_rows:
                rows.append(x)

            el = self.driver.find_elements_by_css_selector(
                '#main > div.hackathons > ul > li.next_page > a')
            if el.__len__() != 1:
                break
            el[0].click()
            timeout.sleep(2)
            self.driver.get(self.driver.current_url)
            html = self.driver.page_source
            soup = BeautifulSoup(html, 'html.parser')

            #rows = list(soup.select("#container > div > div > div > div.results > div.challenge-results > div"))

        print("got rows")

        titles = []
        locations = []
        preview = []
        # descriptions = []
        time = []
        refs = []
        source = "https://hackevents.co/hackathons"

        for row in rows:
            temp_title = row.find("a", {"class": "title"})
            city = row.find("span", {"class": "city"}).text
            country = row.find("span", {"class": "country"}).text
            temp_location = city + ', ' + country
            temp_preview = ""
            temp_time = row.find("span", {"class": "info-date"})

            temp_ref = row.find("a",
                                {"class": "title"}, href=True)['href'].encode(
                                    'ascii', 'ignore').decode('utf-8')

            if hasattr(temp_title, "text"):
                titles.append(temp_title.text.strip().encode(
                    'ascii', 'ignore'))
            else:
                titles.append("")

            if hasattr(temp_preview, "text"):
                preview.append(temp_preview.text.strip().encode(
                    'ascii', 'ignore'))
            else:
                preview.append("")

            if hasattr(temp_time, "text"):
                time.append(
                    helpers.format_date_source2(temp_time.text.strip().encode(
                        'ascii', 'ignore')))
            else:
                time.append("")

            refs.append('https://hackevents.co' + temp_ref)
            locations.append(temp_location.strip().encode('ascii', 'ignore'))

        print("got data")

        i = 0
        data = [None] * len(titles)

        while i < len(data):
            data[i] = Object()
            data[i].title = titles[i].decode("utf-8")
            data[i].location = locations[i].decode("utf-8")
            data[i].preview = preview[i]
            # data[i].description = descriptions[i].encode('ascii', 'ignore')
            data[i].time = time[i]
            data[i].ref = refs[i]
            data[i].area = ""
            data[i].source = source

            i = i + 1

        dbtools.insert_data(data)

        print("transferred data to database handler")

예제 #4

0

파일 보기

파일: scrap_sources.py 프로젝트: DmitryKurbakov/HackathonAggregatorDataParsing

    def scrap_source0(self):
        self.driver.get('https://devpost.com/hackathons')

        el = self.driver.find_element_by_xpath(
            '//*[@id="container"]/div/div/div/div[1]/div[2]/a')
        while el.is_displayed():
            if el.text == "":
                break
            el.click()
            timeout.sleep(1)

        html = self.driver.page_source
        soup = BeautifulSoup(html, 'html.parser')

        rows = list(
            soup.select(
                "#container > div > div > div > div.results > div.challenge-results > div"
            ))

        print("got rows")

        titles = []
        locations = []
        preview = []
        # descriptions = []
        time = []
        refs = []
        source = "https://devpost.com/hackathons"

        for row in rows:
            temp_title = row.find("h2", {"class": "title"})
            temp_location = row.find("p", {"class": "challenge-location"})
            temp_preview = row.find("p", {"class": "challenge-description"})
            temp_time = row.find("span", {"class": "value date-range"})

            temp_ref = row.find("a",
                                href=True)['href'].encode('ascii', 'ignore')

            if hasattr(temp_title, "text"):
                titles.append(temp_title.text.strip().encode(
                    'ascii', 'ignore'))
            else:
                titles.append("")

            if hasattr(temp_location, "text"):
                locations.append(temp_location.text.strip().encode(
                    'ascii', 'ignore'))
            else:
                locations.append("")

            if hasattr(temp_preview, "text"):
                preview.append(temp_preview.text.strip().encode(
                    'ascii', 'ignore'))
            else:
                preview.append("")

            if hasattr(temp_time, "text"):
                time.append(
                    helpers.format_date(temp_time.text.strip().encode(
                        'ascii', 'ignore')))
            else:
                time.append("")

            refs.append(temp_ref)

        print("got data")

        i = 0
        data = [None] * len(titles)

        while i < len(data):
            data[i] = Object()
            data[i].title = titles[i].decode("utf-8")
            data[i].location = locations[i]
            data[i].preview = preview[i]
            #data[i].description = descriptions[i].encode('ascii', 'ignore')
            data[i].time = time[i]
            data[i].ref = refs[i]
            data[i].area = ""
            data[i].source = source

            i = i + 1

        dbtools.insert_data(data)

        print("transferred data to database handler")

예제 #5

0

파일 보기

파일: scrap_sources.py 프로젝트: DmitryKurbakov/HackathonAggregatorDataParsing

    def read_info(self, url):

        self.driver.get(url)

        html = self.driver.page_source
        soup = BeautifulSoup(html, 'html.parser')

        rows = list(soup.find_all("div", {"class": "event-wrapper"}))

        titles = []
        locations = []
        preview = []
        descriptions = []
        time = []
        refs = []
        source = url

        for row in rows:
            temp_title = row.find("h3", {"itemprop": "name"})
            temp_location = row.find("div", {"itemprop": "address"})
            temp_preview = ""  # row.find("p", {"class": "challenge-description"})

            temp_time = row.find("meta", {
                "itemprop": "startDate"
            }).get("content") + "-" + row.find("meta", {
                "itemprop": "endDate"
            }).get("content")

            temp_ref = row.find("a",
                                href=True)['href'].encode('ascii', 'ignore')

            if hasattr(temp_title, "text"):
                titles.append(temp_title.text.strip().encode(
                    'ascii', 'ignore'))
            else:
                titles.append("")

            if hasattr(temp_location, "text"):
                locations.append(temp_location.text.strip().encode(
                    'ascii', 'ignore'))
            else:
                locations.append("")

            if hasattr(temp_preview, "text"):
                preview.append(temp_preview.text.strip().encode(
                    'ascii', 'ignore'))
            else:
                preview.append("")

            time.append(temp_time)
            refs.append(temp_ref)

        i = 0
        data = [None] * len(titles)

        while i < len(data):
            data[i] = Object()
            data[i].title = titles[i].decode("utf-8")
            data[i].location = locations[i]
            data[i].preview = preview[i]
            # data[i].description = descriptions[i]
            data[i].time = time[i]
            data[i].ref = refs[i]
            data[i].area = ""
            data[i].source = source

            i = i + 1

        dbtools.insert_data(data)