def test_main_server_unexpected_behavior(monkeypatch, requests_mock, caplog): monkeypatch.setattr("album.validate_input", lambda: MOCK_ALBUM_ID) requests_mock.get(URL.format(MOCK_ALBUM_ID), json=[]) with pytest.raises(RuntimeError, match=SERVER_ERROR_MESSAGE): album.main() assert caplog.record_tuples == [("root", logging.ERROR, SERVER_ERROR_MESSAGE)]
def test_main_expected_behavior_with_loop(monkeypatch, requests_mock, capsys): inputs = [-1, MOCK_ALBUM_ID] monkeypatch.setattr("album.validate_input", lambda: inputs.pop(0)) requests_mock.get(URL.format(MOCK_ALBUM_ID), json=MOCK_JSON) album.main() captured = capsys.readouterr() assert captured.out == MOCK_OUTPUT
def get_unique_tweets(self, data_dict): # TODO: Implement filter to check if Tweet text starts with 'RT' """ :param data_dict: :return: """ flag = False try: text = data_dict['text'].encode('ascii', 'ignore').lower() # Check for 'retweeted_status' in metadata field to determine # if tweet is a retweet (1st check) if 'retweeted_status' not in data_dict: url_match = URL.match(text) # Check if link contains url if url_match: match_group = url_match.group() if len(self.key_list) > 0: if any(match_group in item for item in self.key_list): flag = True if flag is False: data_dict['text'] = match_group print "Inserted text: " + data_dict['text'] + '\n' self.key_list.append(match_group) sid = SentimentIntensityAnalyzer() ss = sid.polarity_scores(text) print ss['compound'] score = ss['compound'] if score < 0: score += (3 * score) for w in GOOGLE: if w in text and self.google_price >= 0: self.google_price = score self.google_text = text for w in MICROSOFT: if w in text and self.microsoft_price >= 0: self.microsoft_price = score self.microsoft_text = text for w in FACEBOOK: if w in text and self.facebook_price >= 0: self.facebook_price = score self.facebook_text = text p.trigger('test_channel', 'my_event', {'google': self.google_price, 'microsoft': self.microsoft_price, 'facebook': self.facebook_price}) p.trigger('tweet_channel', 'my_event', { 'google_text': self.google_text, 'microsoft_text': self.microsoft_text, 'facebook_text' : self.facebook_text }) self.google_price = 0 self.microsoft_price = 0 self.facebook_price = 0 else: self.key_list.append(url_match.group()) except TypeError, e: print >> sys.stderr, e self.log_error(str(e))
def get_unique_tweets(self, data_dict): # TODO: Implement filter to check if Tweet text starts with 'RT' """ :param data_dict: :return: """ flag = False try: text = data_dict['text'].encode('ascii', 'ignore').lower() # Check for 'retweeted_status' in metadata field to determine # if tweet is a retweet (1st check) if 'retweeted_status' not in data_dict: print "Number of tweets in collection: " + \ str(self.stream_filter.collection.count()) url_match = URL.match(text) # Check if link contains url if url_match: match_group = url_match.group() if len(self.key_list) > 0: if any(match_group in item for item in self.key_list): flag = True if flag is False: data_dict['text'] = match_group print "Inserted text: " + data_dict['text'] + '\n' self.key_list.append(match_group) self.stream_filter.collection.insert(data_dict) if self.wtf is True: if os.path.isfile(self.filename): with open(self.filename, 'a') as outfile: json.dump(data_dict['text'], outfile) outfile.write('\n') else: with open(self.filename, 'w') as outfile: json.dump(data_dict['text'], outfile) outfile.write('\n') else: self.key_list.append(url_match.group()) else: print "Inserted text: " + text self.stream_filter.collection.insert(data_dict) except TypeError, e: print >> sys.stderr, e self.log_error(str(e))
def run(): for os in OS_LIST: page = START_PAGE while 1: # 构造url url = URL.format(os=os, page=page) # get请求 res = requests.get(url) print(f'Fetch: {url}') root = html.fromstring(res.content) rs = root.xpath('//td[@class="useragent"]/a/text()') ua_list.extend(rs) if 50 > len(rs): break page += 1 if page > END_PAGE: break
def make_req(kind, i): return grequests.get(URL.format(kind=kind, episode=i), callback=set_meta({"kind": kind, "i": i}))
def make_req(season, episode, act): return grequests.get( URL.format(season=season, episode=episode, act=act), callback=set_meta({"season": season, "episode": episode, "act": act}))