def search_npr(url_num): url = 'http://api.npr.org/query?apiKey=' key = 'MDEyNTYyODg1MDEzODM5ODAxNTIzYjc2ZQ001' url = url + key url += '&numResults=1&format=json&id=' url += str(url_num) #open our url, load the JSON response = urlopen(url) json_obj = load(response) items = [] textLength=0. #parse our story item = Item() for story in json_obj['list']['story']: title = story['title']['$text'] link = story['link'][0]['$text'] item.name = title item.url = link print link item.add_tag("world") for paragraph in story['textWithHtml']['paragraph']: text = paragraph['$text'] textLength +=len(text) time = durationfunctionstupid(requests.get(url).text) item.duration = time items.append(item) return items
def nytimes_urls(category): url_list = [] for item in response["results"]: if item["section"] == category: article = Item() article.name = item["title"].encode('utf-8') html = requests.get(item["url"].encode('utf-8')).text article.url = item["url"].encode('utf-8') article.duration = durationfunctionstupid(html) article.add_tag(category) url_list.append(article) return url_list
def get_yahoo_top(keyword, number): url1="http://query.yahooapis.com/v1/public/yql?q=select%20*%20from%20rss%20where%20url%3D%22http%3A%2F%2Frss.news.yahoo.com%2Frss%2F" url2="%22&format=json&callback=" number = n_sanity_check(number) response = requests.get(url1+keyword+url2) data = response.json() items=[] for i in xrange(0,number): #print data.keys() item_response = data["query"]["results"]["item"][i] url = item_response["link"] name = item_response["title"] duration = durationfunctionstupid(requests.get(url).text) tag = keyword items.append(create_item(duration,name,tag,url)) return items