def parse(self, response): text = response.xpath('//div[@id = "content_box"]').extract_first() try: text = text.split('</span></div></div>')[1] except: print('error merkle') text = text.split('<script type="text/javascript">')[0] try: text = text.replace( 'freestar.queue.push(function () { googletag.display(\'TheMerkle_728x90_320x50_BTF\'); });', '') except: print('error replacing the merkle') #text processing text = fun.textPreprocessing(text) try: text = fun.textPreprocessing(text) except: print('error processing') try: text = text.replace( 'freestar queue push function googletag display TheMerkle_728x90_320x50_BTF', ' ') except: print('error 2 replacing') #only alphabetic try: News.update( body=text, bitcoinBoolean=fun.aboutBitcoin(text), ethereumBoolean=fun.aboutEthereum(text), finished=True).where(News.link == response.url).execute() except: print('error storing')
def parse(self, response): text = response.xpath( '//div[@class = "post-full-text contents"]').extract_first() #text processing text = fun.textPreprocessing(text) News.update(body=text, bitcoinBoolean=fun.aboutBitcoin(text), ethereumBoolean=fun.aboutEthereum(text), finished=True).where(News.link == response.url).execute()
def parse(self, response): text = response.xpath('//div[@class = "rich-text"]').extract_first() text = text.split('<p class="tagline">')[0] #text processing text = fun.textPreprocessing(text) #only alphabetic News.update(body=text, bitcoinBoolean=fun.aboutBitcoin(text), ethereumBoolean=fun.aboutEthereum(text), finished=True).where(News.link == response.url).execute()
def parse(self, response): textParts = response.xpath('//div[@class = "name"]').extract() textParts = textParts + response.xpath( '//div[@class = "clearfix content"]').extract() text = '' for part in textParts: text = text + unicode(part) text = fun.textPreprocessing(text) News.update(body=text, bitcoinBoolean=fun.aboutBitcoin(text), ethereumBoolean=fun.aboutEthereum(text), finished=True).where(News.link == response.url).execute()
def parse(self, response): text = response.xpath( '//div[@class = "entry-content"]').extract_first() text = text.split('Disclaimer')[0] text = text.split('CDATA id15 Content Ad 2 OA_show 15 ')[0] #text processing text = fun.textPreprocessing(text) #only alphabetic News.update(body=text, bitcoinBoolean=fun.aboutBitcoin(text), ethereumBoolean=fun.aboutEthereum(text), finished=True).where(News.link == response.url).execute()
def parse(self, response): text = response.xpath( '//div[@class = "article-content-container noskimwords"]' ).extract_first() #text processing text = fun.textPreprocessing(text) text = text.split('function e t r n c a l')[0] text = text.split('image via ')[0] text = text.split('Image via ')[0] text = text.split('via Shutter')[0] News.update( body=text, bitcoinBoolean=fun.aboutBitcoin(text), ethereumBoolean=fun.aboutEthereum(text), finished=True).where(News.link == str(response.url)).execute()
def parse(self, response): text = response.xpath('//div[@class = "post-info"]').extract_first() try: text = text.split('<!--Content Ad -->')[2] except: try: text = text.split('<li class="sm-share reddit">')[1] except: print('error livebitcoinnews') text = text.split('<footer class=')[0] text = text.split('Header image')[0] #text processing text = fun.textPreprocessing(text) #only alphabetic News.update(body=text, bitcoinBoolean=fun.aboutBitcoin(text), ethereumBoolean=fun.aboutEthereum(text), finished=True).where(News.link == response.url).execute()