Ejemplo n.º 1
0
def main():
    parser = SafeConfigParser()
    parser.read("config.ini")

    capacity = parser.get('stocks', 'number')

    folder = parser.get('csv', 'folder')
    csv_file = parser.get('csv', 'filename')
    csv = CsvReader(folder, csv_file, int(capacity))
    stocks = iter(csv.get_stocks())

    url = parser.get('stocks', 'url')
    request = Request(url, stocks, int(capacity))
    stock_info = request.get_url_info()

    stream = parser.get('kinesis', 'stream_name')
    shards = json.loads(parser.get('kinesis', 'shards'))
    kinesis = Kinesis(stream, shards)
    responses = kinesis.stream_stock(stock_info)

    print('{},{},{},{},{},{}'.format('Timestamp', 'StatusCode', 'ShardId',
                                     'Sequence Number', 'Stock',
                                     'Stock Price'))
    for s, r in zip(stock_info, responses):
        t = (s + r)
        print(','.join(str(i) for i in t))
Ejemplo n.º 2
0
    def list_parse(self, res):
        soup = BeautifulSoup(res.text, 'lxml')

        # 列表页信息获取
        for li in soup.select('div.listNews.whtPD.columns'):
            print(li.find('h2', 'titleNews').get_text(strip=True))
            print(urljoin(res.url, li.a['href']))
            print()
            break
        
        # 下一页
        next_page = soup.select('.navigation-page a.jp-last')
        if next_page:
            url = urljoin(res.url, next_page[0]['href'])
            yield Request(url, cb=self.list_parse)
Ejemplo n.º 3
0
 def start(self):
     keyword = 'Trump'
     yield Request('https://twitter.com/i/search/timeline',
                   params={
                       'vertical': 'news',
                       'q': keyword,
                       'src': 'typd',
                       'include_available_features': '1',
                       'include_entities': '1',
                       'reset_error_state': 'false'
                   },
                   headers={
                       'accept-language':
                       'zh-CN,zh-HK;q=0.9,zh;q=0.8,zh-TW;q=0.7'
                   },
                   cb=self.list_parse)
Ejemplo n.º 4
0
 def list_parse(self, res):
     soup = BeautifulSoup(res.json()['items_html'], 'lxml')
     for li in soup.select('li.stream-item'):
         data = self.information(li, res.url)
         if data['footer']['reply_count'] > 0:
             url = 'https://twitter.com/i/%s/conversation/%s' % (
                 data['user']['username'], data['id'])
             yield Request(url,
                           params={
                               'include_available_features': '1',
                               'include_entities': '1',
                               'max_position': '',
                               'reset_error_state': 'false'
                           },
                           headers={
                               'accept-language':
                               'zh-CN,zh-HK;q=0.9,zh;q=0.8,zh-TW;q=0.7'
                           },
                           cb=self.reply_parse,
                           item=data)
         else:
             yield TwitterItem(data)
Ejemplo n.º 5
0
 def start(self):
     for i in range(1, 21):
         url = 'https://www.thejakartapost.com/index/page/%d' % i
         yield Request(url, cb=self.list_parse)
Ejemplo n.º 6
0
 def start(self):
     yield Request('http://www.thejakartapost.com/index', cb=self.list_parse)