Ejemplos de save_to_DB en Python

Lenguaje de programación: Python

Namespace/Package Name: WebScrappingUtilWIBIS

Método / Función: save_to_DB

Ejemplos en hotexamples.com: 3

Python save_to_DB - 3 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de WebScrappingUtilWIBIS.save_to_DB extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Ejemplo n.º 1

Mostrar archivo

Archivo: WebScrappingSAINS.py Proyecto: qayyuum85/TestScript

    def parse(self, response):
        def createItem(date, text, url, ctr):
            item = dict()
            item['title'] = 'Maklumat Gangguan Bekalan Air (' + str(
                ctr) + ') ' + date
            item['text'] = text
            item['page_link'] = response.url
            item['file_link'] = ''
            item['date'] = date
            return item

        trs = response.xpath("//table/tr[*]")
        url = response.url
        ctr = 0
        for tr in trs:
            isDateExist = tr.xpath(
                "td[*]/strong/text()").extract_first() is not None
            isTextExist = tr.xpath("td[*]/p").extract_first() is not None
            if isDateExist is True and isTextExist is True:
                ctr = ctr + 1
                date = tr.xpath("td[*]/strong/text()").extract_first()
                text = tr.xpath("td[*]/p").extract_first()
                thisItem = createItem(date=date, text=text, url=url, ctr=ctr)

                main = dict()
                main['category'] = 'SAINS'
                main['cat_desc'] = 'Syarikat Air Negeri Sembilan'
                main['content'] = thisItem
                jsonstr = json.dumps(main)
                # print(jsonstr)

                # send content to Cache
                save_to_DB(jsonstr)

Ejemplo n.º 2

Mostrar archivo

Archivo: WebScrappingSAINS.py Proyecto: qayyuum85/TestScript

    def parse_full_article(self, response):
        # open the path and scrap
        text = ""
        content = response.xpath("//article/div/p")
        for subcontent in content:
            textincontent = subcontent.xpath("text()").extract()
            if (len(textincontent) > 0):
                text = text + textincontent[0]
        title = re.sub(
            pattern=r'\n+|\t+',
            repl='',
            string=response.xpath("//article/h1/text()").extract_first())

        item = dict()
        item['title'] = title
        item['text'] = text
        item['page_link'] = response.url
        item['file_link'] = ''
        item['date'] = ''

        main = dict()
        main['category'] = 'SAINS'
        main['cat_desc'] = 'Syarikat Air Negeri Sembilan'
        main['content'] = item
        jsonstr = json.dumps(main)
        # print(jsonstr)

        # send content to Cache
        save_to_DB(jsonstr)

Ejemplo n.º 3

Mostrar archivo

Archivo: WebScrappingAirPerlis.py Proyecto: qayyuum85/TestScript

articles = soup.find_all('article')
title = str(articles[0].p.get_text())

# article content


def get_content(soupObject):
    formattedContent = soupObject.prettify(formatter="html")
    return re.sub(pattern=r'\n', repl='', string=formattedContent)


contents = []
item = dict()
item['date'] = ''
item['title'] = title
item['text'] = get_content(articles[0])
item['file_link'] = ''
item['page_link'] = urlSAP
contents.append(item)

# wrap to JSON
for content in contents:
    main = dict()
    main['category'] = 'SAP'
    main['cat_desc'] = 'Syarikat Air Perlis'
    main['content'] = content
    jsonstr = json.dumps(main)

    # send content to Cache
    save_to_DB(jsonstr)