Python aboutEthereum Exemples, spider_functions.aboutEthereum Python Exemples

Exemple #1

0

Afficher le fichier

 def parse(self, response):
     text = response.xpath('//div[@id = "content_box"]').extract_first()
     try:
         text = text.split('</span></div></div>')[1]
     except:
         print('error merkle')
     text = text.split('<script type="text/javascript">')[0]
     try:
         text = text.replace(
             'freestar.queue.push(function () { googletag.display(\'TheMerkle_728x90_320x50_BTF\'); });',
             '')
     except:
         print('error replacing the merkle')
     #text processing
     text = fun.textPreprocessing(text)
     try:
         text = fun.textPreprocessing(text)
     except:
         print('error processing')
     try:
         text = text.replace(
             'freestar queue push function googletag display TheMerkle_728x90_320x50_BTF',
             ' ')
     except:
         print('error 2 replacing')
     #only alphabetic
     try:
         News.update(
             body=text,
             bitcoinBoolean=fun.aboutBitcoin(text),
             ethereumBoolean=fun.aboutEthereum(text),
             finished=True).where(News.link == response.url).execute()
     except:
         print('error storing')

Exemple #2

0

Afficher le fichier

 def parse(self, response):
     text = response.xpath(
         '//div[@class = "post-full-text contents"]').extract_first()
     #text processing
     text = fun.textPreprocessing(text)
     News.update(body=text,
                 bitcoinBoolean=fun.aboutBitcoin(text),
                 ethereumBoolean=fun.aboutEthereum(text),
                 finished=True).where(News.link == response.url).execute()

Exemple #3

0

Afficher le fichier

Fichier : spider_bitcoinmagazine_articles.py Projet : mjuchli/ctc-news-aggregator

 def parse(self, response):
     text = response.xpath('//div[@class = "rich-text"]').extract_first()
     text = text.split('<p class="tagline">')[0]
     #text processing
     text = fun.textPreprocessing(text)
     #only alphabetic
     News.update(body=text,
                 bitcoinBoolean=fun.aboutBitcoin(text),
                 ethereumBoolean=fun.aboutEthereum(text),
                 finished=True).where(News.link == response.url).execute()

Exemple #4

0

Afficher le fichier

Fichier : spider_cointelegraph_explained.py Projet : mjuchli/ctc-news-aggregator

 def parse(self, response):
     textParts = response.xpath('//div[@class = "name"]').extract()
     textParts = textParts + response.xpath(
         '//div[@class = "clearfix content"]').extract()
     text = ''
     for part in textParts:
         text = text + unicode(part)
     text = fun.textPreprocessing(text)
     News.update(body=text,
                 bitcoinBoolean=fun.aboutBitcoin(text),
                 ethereumBoolean=fun.aboutEthereum(text),
                 finished=True).where(News.link == response.url).execute()

Exemple #5

0

Afficher le fichier

    def parse(self, response):
        text = response.xpath(
            '//div[@class = "entry-content"]').extract_first()
        text = text.split('Disclaimer')[0]
        text = text.split('CDATA id15 Content Ad 2 OA_show 15 ')[0]
        #text processing
        text = fun.textPreprocessing(text)

        #only alphabetic
        News.update(body=text,
                    bitcoinBoolean=fun.aboutBitcoin(text),
                    ethereumBoolean=fun.aboutEthereum(text),
                    finished=True).where(News.link == response.url).execute()

Exemple #6

0

Afficher le fichier

 def parse(self, response):
     text = response.xpath(
         '//div[@class = "article-content-container noskimwords"]'
     ).extract_first()
     #text processing
     text = fun.textPreprocessing(text)
     text = text.split('function e t r n c a l')[0]
     text = text.split('image via ')[0]
     text = text.split('Image via ')[0]
     text = text.split('via Shutter')[0]
     News.update(
         body=text,
         bitcoinBoolean=fun.aboutBitcoin(text),
         ethereumBoolean=fun.aboutEthereum(text),
         finished=True).where(News.link == str(response.url)).execute()

Exemple #7

0

Afficher le fichier

 def parse(self, response):
     text = response.xpath('//div[@class = "post-info"]').extract_first()
     try:
         text = text.split('<!--Content Ad -->')[2]
     except:
         try:
             text = text.split('<li class="sm-share reddit">')[1]
         except:
             print('error livebitcoinnews')
     text = text.split('<footer class=')[0]
     text = text.split('Header image')[0]
     #text processing
     text = fun.textPreprocessing(text)
     #only alphabetic
     News.update(body=text,
                 bitcoinBoolean=fun.aboutBitcoin(text),
                 ethereumBoolean=fun.aboutEthereum(text),
                 finished=True).where(News.link == response.url).execute()