def get_songs_from_list(self, url): doc = parse_document_from_requests(url, self.session) rows = doc.xpath( '//*[contains(concat(" ", normalize-space(@class), " "), " song-item ")]' ) songs = [] for tr in rows: try: a = tr.xpath('./span[@class="song-title"]/a')[0] except IndexError: # some lists contain empty items... # e.g. index 30 of this: # http://music.baidu.com/search/song?key=70%E5%90%8E&start=20&size=20 continue href = a.get('href') sid = href.rsplit('/', 1)[-1] title = a.text_content() artists = tuple( a.text_content() for a in tr.xpath('./span[@class="singer"]/span/a')) try: album = tr.xpath('./span[@class="album-title"]/a' )[0].text_content().strip() album = album.lstrip('《').rstrip('》') except IndexError: album = None song = SongInfo(sid, title, href, artists, album, None) songs.append(song) return songs
def get_user_topic_ids(self, user_id): r = self.request('/search.php?action=show_user_topics&user_id=%d' % user_id) doc = parse_document_from_requests(r) links = doc.xpath('//td[@class="tcl"]/div[@class="tclcon"]/div//a') tids = [int(x.get('href').split('=', 1)[-1]) for x in links] return tids
def download_official_pkgbuild(name: str) -> List[str]: url = 'https://www.archlinux.org/packages/search/json/?name=' + name logger.info('download PKGBUILD for %s.', name) info = s.get(url).json() r = [r for r in info['results'] if r['repo'] != 'testing'][0] repo = r['repo'] arch = r['arch'] if repo in ('core', 'extra'): gitrepo = 'packages' else: gitrepo = 'community' pkgbase = [r['pkgbase'] for r in info['results'] if r['repo'] != 'testing'][0] tree_url = 'https://projects.archlinux.org/svntogit/%s.git/tree/repos/%s-%s?h=packages/%s' % ( gitrepo, repo, arch, pkgbase) doc = parse_document_from_requests(tree_url, s) blobs = doc.xpath('//div[@class="content"]//td/a[contains(concat(" ", normalize-space(@class), " "), " ls-blob ")]') files = [x.text for x in blobs] for filename in files: blob_url = 'https://projects.archlinux.org/svntogit/%s.git/plain/repos/%s-%s/%s?h=packages/%s' % ( gitrepo, repo, arch, filename, pkgbase) with open(filename, 'wb') as f: logger.debug('download file %s.', filename) data = s.get(blob_url).content f.write(data) return files
def search(self, q): url = 'http://www.xiami.com/search?key=' + q doc = parse_document_from_requests(url, self.session) rows = doc.xpath('//table[@class="track_list"]//tr')[1:] ret = [] for tr in rows: # 没有 target 属性的是用于展开的按钮 names = tr.xpath('td[@class="song_name"]/a[@target]') if len(names) == 2: extra = names[1].text_content() else: extra = None name = names[0].text_content() href = names[0].get('href') # '/text()' in XPath get '.text', not '.text_content()' artist = tr.xpath('td[@class="song_artist"]/a')[0].text_content().strip() album = tr.xpath('td[@class="song_album"]/a')[0].text_content().strip() album = album.lstrip('《').rstrip('》') sid = href.rsplit('/', 1)[-1] song = SongInfo(sid, name, href, (artist,), album, extra) ret.append(song) return ret
def delete_unverified_users(self, doc=None, *, msg=None, since=None): '''delete inverified users in first page doc can be given if you have that page's parsed content alread. return False if no such users are found. ''' if doc is None: url = '/admin_users.php?find_user=&' \ 'order_by=username&direction=ASC&user_group=0&p=1' if since: url += '®istered_before=' + since.strftime('%Y-%m-%d %H:%M:%s') res = self.request(url) doc = parse_document_from_requests(res) trs = doc.xpath('//div[@id="users2"]//tbody/tr') if not trs: return False users = [tr.xpath('td/input[@type="checkbox"]/@name')[0][6:-1] for tr in trs] users = ','.join(users) post = { 'delete_users_comply': 'delete', 'delete_posts': '1', 'users': users, } res = self.request('/admin_users.php', data=post) res.text return True
def get_user_topic_ids(self, user_id): r = self.request('/search.php?action=show_user_topics&user_id=%d' % user_id) doc = parse_document_from_requests(r) links = doc.xpath('//td[@class="tcl"]/div[@class="tclcon"]/div/strong/a') tids = [int(x.get('href').split('=', 1)[-1]) for x in links] return tids
def download_official_pkgbuild(name): url = 'https://www.archlinux.org/packages/search/json/?name=' + name logger.info('download PKGBUILD for %s.', name) info = s.get(url).json() r = [r for r in info['results'] if r['repo'] != 'testing'][0] repo = r['repo'] arch = r['arch'] if repo in ('core', 'extra'): gitrepo = 'packages' else: gitrepo = 'community' pkgbase = [ r['pkgbase'] for r in info['results'] if r['repo'] != 'testing' ][0] tree_url = 'https://projects.archlinux.org/svntogit/%s.git/tree/repos/%s-%s?h=packages/%s' % ( gitrepo, repo, arch, pkgbase) doc = parse_document_from_requests(tree_url, s) blobs = doc.xpath( '//div[@class="content"]//td/a[contains(concat(" ", normalize-space(@class), " "), " ls-blob ")]' ) files = [x.text for x in blobs] for filename in files: blob_url = 'https://projects.archlinux.org/svntogit/%s.git/plain/repos/%s-%s/%s?h=packages/%s' % ( gitrepo, repo, arch, filename, pkgbase) with open(filename, 'wb') as f: logger.debug('download file %s.', filename) data = s.get(blob_url).content f.write(data) return files
def get_songs_from_list(self, url): doc = parse_document_from_requests(url, self.session) rows = doc.xpath( '//*[contains(concat(" ", normalize-space(@class), " "), " song-item ")]') songs = [] for tr in rows: try: a = tr.xpath('./span[@class="song-title"]/a')[0] except IndexError: # some lists contain empty items... # e.g. index 30 of this: # http://music.baidu.com/search/song?key=70%E5%90%8E&start=20&size=20 continue href = a.get('href') sid = href.rsplit('/', 1)[-1] title = a.text_content() artists = tuple( a.text_content() for a in tr.xpath('./span[@class="singer"]/span/a')) try: album = tr.xpath('./span[@class="album-title"]/a')[0].text_content().strip() album = album.lstrip('《').rstrip('》') except IndexError: album = None song = SongInfo(sid, title, href, artists, album, None) songs.append(song) return songs
def get_post_ids_from_topic(self, topic_id): r = self.request('/viewtopic.php?id=%d' % topic_id) doc = parse_document_from_requests(r) links = doc.xpath('//div[@id]/h2//a') pids = [int(x.get('href').split('#', 1)[-1][1:]) for x in links] return pids
def delete_unverified_users(self, doc=None, *, msg=None, since=None): '''delete inverified users in first page doc can be given if you have that page's parsed content alread. return False if no such users are found. ''' if doc is None: url = '/admin_users.php?find_user=&' \ 'order_by=username&direction=ASC&user_group=0&p=1' if since: url += '®istered_before=' + since.strftime( '%Y-%m-%d %H:%M:%s') res = self.request(url) doc = parse_document_from_requests(res) trs = doc.xpath('//div[@id="users2"]//tbody/tr') if not trs: return False users = [ tr.xpath('td/input[@type="checkbox"]/@name')[0][6:-1] for tr in trs ] users = ','.join(users) post = { 'delete_users_comply': 'delete', 'delete_posts': '1', 'users': users, } res = self.request('/admin_users.php', data=post) body = res.text return True
def block_user(self, user_id): r = self.request('/profile.php?section=admin&id=%d' % user_id) r.content data = { 'form_sent': '1', 'group_id': '4', 'ban': '阻止用户', } r = self.request('/profile.php?section=admin&id=%d' % user_id, data=data) doc = parse_document_from_requests(r) r = self.request('/admin_bans.php?add_ban=%d' % user_id) doc = parse_document_from_requests(r) form = doc.forms[0] form.fields['ban_message'] = 'spam' r = self.request(form.action, data=dict(form.fields)) r.content
def get_login_things(self): r = self.request(self.login_url) doc = parse_document_from_requests(r) once = doc.xpath('//input[@name="once"]')[0].get('value') form = doc.xpath('//form')[-1] username_field = form.xpath('.//input[@type="text"]')[0].get('name') password_field = form.xpath('.//input[@type="password"]')[0].get('name') return once, username_field, password_field
def get_login_things(self): r = self.request(self.login_url) doc = parse_document_from_requests(r) once = doc.xpath('//input[@name="once"]')[0].get('value') form = doc.xpath('//form')[-1] username_field = form.xpath('.//input[@type="text"]')[0].get('name') password_field = form.xpath('.//input[@type="password"]')[0].get( 'name') return once, username_field, password_field
def _get_aur_packager(name: str) -> Tuple[Optional[str], str]: doc = parse_document_from_requests( f'https://aur.archlinux.org/packages/{name}/', s) maintainer: Optional[str] = str( doc.xpath('//th[text()="Maintainer: "]/following::td[1]/text()')[0]) last_packager = str( doc.xpath('//th[text()="Last Packager: "]/following::td[1]/text()')[0]) if maintainer == 'None': maintainer = None return maintainer, last_packager
def edit_post(self, post_id, body, *, subject=None, sticky=False): r = self.request('/viewtopic.php?pid=%s' % post_id) post = parse_document_from_requests(r) old_subject = post.xpath('//ul[@class="crumbs"]/li/strong/a')[0].text data = { 'form_sent': '1', 'req_message': body, 'req_subject': subject or old_subject, 'stick_topic': sticky and '1' or '0', } url = '/edit.php?id=%s&action=edit' % post_id res = self.request(url, data=data) return b'http-equiv="refresh"' in res.content
def daily_mission(self): r = self.request(self.daily_url) if 'href="/signin"' in r.text: raise NotLoggedIn doc = parse_document_from_requests(r) buttons = doc.xpath('//input[@value = "领取 X 铜币"]') if not buttons: raise MissionNotAvailable button = buttons[0] url = button.get('onclick').split("'")[1] r = self.request(urljoin(self.index_url, url)) if '已成功领取每日登录奖励' not in r.text: raise V2EXFailure('daily mission failed', r)
def check_login(self): '''check if we have logged in already (by cookies)''' res = self.request('/') doc = parse_document_from_requests(res) return len( doc.xpath('//div[@id="brdwelcome"]/*[@class="conl"]/li')) > 0
def get_once_value(self): r = self.request(self.login_url) doc = parse_document_from_requests(r) return doc.xpath('//input[@name="once"]')[0].get('value')
def check_login(self): '''check if we have logged in already (by cookies)''' res = self.request('/') doc = parse_document_from_requests(res) return len(doc.xpath( '//div[@id="brdwelcome"]/*[@class="conl"]/li')) > 0