class GoogleFinanceScrapper: isFinished = False def __init__(self, filename): self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() self.filename = filename self.url = 'https://www.google.com/finance?' self.main_url = 'https://www.google.com' self.csvWriter = Csv('google_finance.csv') csvDataHeader = ['Ticker Symbol', 'Quarter End', 'Revenue', 'Total Revenue', 'Date of Scrape'] self.csvWriter.writeCsvRow(csvDataHeader) def run(self): self.scrapData() self.csvWriter.closeWriter() def scrapData(self): try: file = open(self.filename, 'rb') for line in file.readlines(): if self.isFinished: return line = self.regex.replaceData('\r+', '', line) line = self.regex.reduceNewLine(line) line = self.regex.reduceBlankSpace(line) line = line.strip() params = urllib.urlencode({'q': line}) url = self.url + params self.scrapBykeyword(url, line) except Exception, x: print x self.logger.error('Error: ' + x.message)
class TopsyScrapper: isFinished = False def __init__(self, filename): self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() self.filename = filename self.url = 'http://topsy.com/s?' self.csvWriter = Csv('topsy.csv') csvDataHeader = ['Keyword', 'Tweets in last 30 days', 'Topsy Sentiment Score', ' Date of scrape'] self.csvWriter.writeCsvRow(csvDataHeader) def run(self): self.scrapData() self.csvWriter.closeWriter() def scrapData(self): try: file = open(self.filename, 'rb') for line in file.readlines(): if self.isFinished: return line = self.regex.replaceData('\r+', '', line) line = self.regex.reduceNewLine(line) line = self.regex.reduceBlankSpace(line) line = line.strip() if len(line) > 0: params = urllib.urlencode({'q': line, 'window': 'm', 'type': 'tweet'}) url = self.url + params self.scrapBrowserData(url, line) except Exception, x: print x
class TopsyScrapper: isFinished = False def __init__(self, filename): self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() self.filename = filename self.url = 'http://topsy.com/s?' self.csvWriter = Csv('topsy.csv') csvDataHeader = [ 'Keyword', 'Tweets in last 30 days', 'Topsy Sentiment Score', ' Date of scrape' ] self.csvWriter.writeCsvRow(csvDataHeader) def run(self): self.scrapData() self.csvWriter.closeWriter() def scrapData(self): try: file = open(self.filename, 'rb') for line in file.readlines(): if self.isFinished: return line = self.regex.replaceData('\r+', '', line) line = self.regex.reduceNewLine(line) line = self.regex.reduceBlankSpace(line) line = line.strip() if len(line) > 0: params = urllib.urlencode({ 'q': line, 'window': 'm', 'type': 'tweet' }) url = self.url + params self.scrapBrowserData(url, line) except Exception, x: print x
class GoogleFinanceScrapper: isFinished = False def __init__(self, filename): self.logger = LogManager(__name__) self.spider = Spider() self.regex = Regex() self.utils = Utils() self.filename = filename self.url = 'https://www.google.com/finance?' self.main_url = 'https://www.google.com' self.csvWriter = Csv('google_finance.csv') csvDataHeader = [ 'Ticker Symbol', 'Quarter End', 'Revenue', 'Total Revenue', 'Date of Scrape' ] self.csvWriter.writeCsvRow(csvDataHeader) def run(self): self.scrapData() self.csvWriter.closeWriter() def scrapData(self): try: file = open(self.filename, 'rb') for line in file.readlines(): if self.isFinished: return line = self.regex.replaceData('\r+', '', line) line = self.regex.reduceNewLine(line) line = self.regex.reduceBlankSpace(line) line = line.strip() params = urllib.urlencode({'q': line}) url = self.url + params self.scrapBykeyword(url, line) except Exception, x: print x self.logger.error('Error: ' + x.message)