def run(request): #read the user list from database users = database.get_user() for user in users: #read playing songs from the xiami titles, artists, track_times, record_time = scrobble.xiami(user) if titles: print 'titles: %s, artists: %s ' % (titles, artists) scrobble.lastfm(titles, artists, track_times, user) #modify the user information database.modify_user(user[0], record_time) return HttpResponse('running!')
def xiami(user): headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:27.0) Gecko/20100101 Firefox/27.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3', 'Accept-Encoding': 'gzip, deflate', 'DNT': '1', 'Connection': 'keep-alive'} # proxies = {'http': 'http://122.226.122.201:8080'} xiami_url = 'http://www.xiami.com/space/charts-recent/u/%s'%(user[0]) r = requests.get(xiami_url, headers=headers) soup = BeautifulSoup(r.content, 'html5lib') last_time = datetime.strptime(user[2], '%Y-%m-%d %H:%M:%S') minutes = (datetime.now() - last_time).seconds/60 track_times = soup.findAll('td', class_='track_time') track_times = [re.search(u'\d+', track_time.text).group() for track_time in track_times if re.search(u'分钟前', track_time.text)] second_html = soup.find('td', class_='track_time') if second_html: second_exist = re.search(u'秒前|刚刚', second_html.text) else: second_exist = False if track_times or second_exist: exists_times = [int(track_time) for track_time in track_times if int(track_time)<10] track_times = [int(track_time) for track_time in track_times if int(track_time)<minutes] #!页面中存在刚刚收听的音乐时间小于十分钟则将times继续设为0 record_time = None if track_times: record_time = datetime.now() - timedelta(minutes=track_times[0]) record_time = record_time.strftime('%Y-%m-%d %H:%M:%S') track_times = [int(time.time()-track_time*60) for track_time in track_times] if second_exist: record_time = datetime.now() record_time = record_time.strftime('%Y-%m-%d %H:%M:%S') track_times.insert(0, int(time.time())) track_number = len(track_times) if record_time: track_htmls = soup.findAll('tr', id=re.compile('track_\d+'), limit=track_number) upper_htmls = [track_html.find('td', class_='song_name') for track_html in track_htmls] artists_html = [artist_html.findAll('a')[1:] for artist_html in upper_htmls] artists = [] for artist in artists_html: all_artists = [one_artist.text for one_artist in artist if not re.search('http://i.xiami.com', one_artist['href'])] all_artist = '&'.join(all_artists) artists.append(all_artist) title_htmls = soup.findAll('a', href=re.compile('/song/\d+'), limit=track_number) titles = [title['title'] for title in title_htmls] return (titles, artists, track_times, record_time) elif exists_times: database.modify_user(user[0], user[2]) return (None, None, None, None) else: database.not_listening(user[0]) return (None, None, None, None) else: database.not_listening(user[0]) return (None, None, None, None)
def xiami(user): headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:27.0) Gecko/20100101 Firefox/27.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3', 'Accept-Encoding': 'gzip, deflate', 'DNT': '1', 'Connection': 'keep-alive' } # proxies = {'http': 'http://122.226.122.201:8080'} xiami_url = 'http://www.xiami.com/space/charts-recent/u/%s' % (user[0]) r = requests.get(xiami_url, headers=headers) soup = BeautifulSoup(r.content, 'html5lib') last_time = datetime.strptime(user[2], '%Y-%m-%d %H:%M:%S') minutes = (datetime.now() - last_time).seconds / 60 track_times = soup.findAll('td', class_='track_time') track_times = [ re.search(u'\d+', track_time.text).group() for track_time in track_times if re.search(u'分钟前', track_time.text) ] second_html = soup.find('td', class_='track_time') if second_html: second_exist = re.search(u'秒前|刚刚', second_html.text) else: second_exist = False if track_times or second_exist: exists_times = [ int(track_time) for track_time in track_times if int(track_time) < 10 ] track_times = [ int(track_time) for track_time in track_times if int(track_time) < minutes ] #!页面中存在刚刚收听的音乐时间小于十分钟则将times继续设为0 record_time = None if track_times: record_time = datetime.now() - timedelta(minutes=track_times[0]) record_time = record_time.strftime('%Y-%m-%d %H:%M:%S') track_times = [ int(time.time() - track_time * 60) for track_time in track_times ] if second_exist: record_time = datetime.now() record_time = record_time.strftime('%Y-%m-%d %H:%M:%S') track_times.insert(0, int(time.time())) track_number = len(track_times) if record_time: track_htmls = soup.findAll('tr', id=re.compile('track_\d+'), limit=track_number) upper_htmls = [ track_html.find('td', class_='song_name') for track_html in track_htmls ] artists_html = [ artist_html.findAll('a')[1:] for artist_html in upper_htmls ] artists = [] for artist in artists_html: all_artists = [ one_artist.text for one_artist in artist if not re.search('http://i.xiami.com', one_artist['href']) ] all_artist = '&'.join(all_artists) artists.append(all_artist) title_htmls = soup.findAll('a', href=re.compile('/song/\d+'), limit=track_number) titles = [title['title'] for title in title_htmls] return (titles, artists, track_times, record_time) elif exists_times: database.modify_user(user[0], user[2]) return (None, None, None, None) else: database.not_listening(user[0]) return (None, None, None, None) else: database.not_listening(user[0]) return (None, None, None, None)