def sab(self): from NXSpider.bin.models import album_mo if self.param_check(['album'], sys._getframe().f_code.co_name) is False: return download_type = self.parse_download() albums = self.app.pargs.album.split(',') # type: list for pid in albums: album_detail = api.get_album_detail(pid) if album_detail is None: continue log.print_info(u"{} artist:{}".format( "<" + album_detail['name'] + ">", album_detail['artist']['name'], )) album_mo.parse_model( album_detail, download_type=download_type, file_check=Config().get_file_check(), shortcuts_stack=[] if Config().get_shortcut() else None) log.print_info("spider complete!~") pass
def sur_pls(self): from NXSpider.bin.models import playlist_mo if self.param_check(['user'], sys._getframe().f_code.co_name) is False: return download_type = self.parse_download() user_id = self.app.pargs.user playlists = api.user_playlist(user_id, offset=self.app.pargs.offset or 0, limit=self.app.pargs.limit or 50) log.print_info("playlists bellow will be crawled") print_playlist(playlists) for pl_obj in playlists: playlist_detail = api.get_playlist_detail(pl_obj['id']) if playlist_detail: log.print_info(u"<{}> author:{}".format( playlist_detail['name'], playlist_detail['creator']['nickname'], )) playlist_mo.parse_model( playlist_detail, download_type=download_type, file_check=Config().get_file_check(), shortcuts_stack=[] if Config().get_shortcut() else None) log.print_info("spider complete!~") pass
def login_smv(self): from NXSpider.bin.models import no_rec_mv_mo if self.param_check(['lu'], sys._getframe().f_code.co_name) is False: return plaintext_pwd = self.app.pargs.lp or None if plaintext_pwd is None: import getpass plaintext_pwd = getpass.getpass("Please input your password:"******"none"))) exit() mvs = api.my_mvs(session) mvs = [api.get_mv_detail(d['id']) for d in mvs] mvs = [d for d in mvs if d] for mv in mvs: no_rec_mv_mo.parse_model( mv, download_type=['mv'], file_check=Config().get_file_check(), shortcuts_stack=[] if Config().get_shortcut() else None) log.print_info("spider complete!~") pass
def spls(self): from NXSpider.bin.models import playlist_mo if self.param_check(['playlist'], sys._getframe().f_code.co_name) is False: return download_type = self.parse_download() playlists = self.app.pargs.playlist.split(',') # type: list for pid in playlists: playlist_detail = api.get_playlist_detail(pid) if playlist_detail: log.print_info(u"<{}> author:{}".format( playlist_detail['name'], playlist_detail['creator']['nickname'], )) playlist_mo.parse_model( playlist_detail, download_type=download_type, file_check=Config().get_file_check(), shortcuts_stack=[] if Config().get_shortcut() else None) log.print_info("spider complete!~") pass
def sar_top_mp3(self): from NXSpider.bin.models import artist_mo if self.param_check(['artist'], sys._getframe().f_code.co_name) is False: return download_type = self.parse_download() artists = self.app.pargs.artist.split(',') # type: list for arid in artists: detail = api.get_artists_songs(arid) if detail is None: continue artist_detail = detail['artist'] artist_detail['mp3'] = detail['hotSongs'] log.print_info(u"<{}>".format(artist_detail['name'])) artist_mo.parse_model( artist_detail, download_type=download_type, file_check=Config().get_file_check(), shortcuts_stack=[] if Config().get_shortcut() else None) log.print_info("spider complete!~") pass
def download_file(self, doc): """ download file from music 163 :param doc: :return: """ file_relative_path = self.download_relative_path(doc) path = Config().get_path() content = self.request_file(doc) if content is None: log.print_err(u"file download failed : %s" % file_relative_path) return False try: file_name = os.path.join(path, file_relative_path) # dir make dir_name = os.path.dirname(file_name) if not os.path.exists(dir_name): os.makedirs(dir_name) # file write with open(file_name, "wb") as code: code.write(content) self.download_file_tag(file_name, doc) log.print_info(u"file download complete: %s" % file_relative_path) self.download_log(doc) return True except Exception as e: log.print_err("file save failed : %s, err: %s" % (file_relative_path, e)) return False
def attach_mp4_tag(doc, file): """ :type doc: Mp4Model :param doc: :param file: :return: """ authors = u",".join([x['name'] for x in doc.artists]) data = { 'title': doc['name'], 'artist': authors, } if Config().get_media_tag_163(): comment_plaintext = u'mv:{"title":"%s","mvId":%d,"artistId":%d,' \ u'"artistName":"%s","pubTime":"%s","bitrate":%d}' \ % (doc['name'], doc.id, doc.artists[0].id, doc.artists[0]['name'], doc['publishTime'], doc['download_video_r']) comment = "163 key(Don't modify):" + aes_ecb(comment_plaintext, aes_code).decode() data['comment'] = comment try: mp4 = MP4(file) for k, v in data.items(): if k not in mugagen_mp4_key_map: continue mp4[mugagen_mp4_key_map[k]] = v mp4.save() except Exception as e: return False return True
def sur_pls(self): from NXSpider.bin.models import playlist_mo if self.param_check(['user'], sys._getframe().f_code.co_name) is False: return download_type = self.parse_download() user_id = self.app.pargs.user playlists = api.user_playlist(user_id) from terminaltables import AsciiTable table = AsciiTable([["ID", "Name", "User", "PlayCount"]]) table_data = [[ str(item['id']), item['name'], item['creator']['nickname'], str(item['playCount']), ] for item in playlists] table.table_data.extend(table_data) log.print_info("playlists bellow will be crawled") print(table.table) for pl_obj in playlists: playlist_detail = api.get_playlist_detail(pl_obj['id']) if playlist_detail: log.print_info(u"<{}> author:{}".format( playlist_detail['name'], playlist_detail['creator']['nickname'], )) playlist_mo.parse_model(playlist_detail, download_type=download_type, file_check=Config().get_file_check()) log.print_info("spider complete!~") pass
def scls_pls(self): from NXSpider.bin.models import playlist_mo if self.param_check(['cls'], sys._getframe().f_code.co_name) is False: return download_type = self.parse_download() class_name = self.app.pargs.cls class_name = py2_decoding(class_name) if class_name != u"全部" and py2_encoding( class_name) not in api.ALL_CLASSES: log.print_err( "class name is wrong, pls check by run : nxspider sw-pl-classes" ) return playlists = api.get_top_playlists(category=class_name, offset=self.app.pargs.offset or 0, limit=self.app.pargs.limit or 50) # type: list for pl_obj in playlists: playlist_detail = api.get_playlist_detail(pl_obj['id']) if playlist_detail: log.print_info(u"<{}> author:{}".format( playlist_detail['name'], playlist_detail['creator']['nickname'], )) playlist_mo.parse_model(playlist_detail, download_type=download_type, file_check=Config().get_file_check()) log.print_info("spider complete!~") pass
def create_shortcut(self, doc, shortcuts_stack): """ :param doc: :param shortcuts_stack: :type shortcuts_stack: list[str] :return: """ from NXSpider.utility.shortcut import symlink if not (hasattr(doc, model_is_download) and doc[model_is_download] and hasattr(doc, model_download_path) and os.path.exists( doc[model_download_path]) and self.download_filename(doc)): return file_path = doc[model_download_path] shortcut_root = os.path.join(Config().get_path(), 'shortcuts') file_name = self.download_filename(doc) if file_path and file_path is not True: if shortcuts_stack: path = os.path.join(shortcut_root, *shortcuts_stack) os.makedirs(path, exist_ok=True) target = os.path.join(path, file_name) symlink(file_path, os.path.join(shortcut_root, target)) for k in self.shortcut_self_path(doc): path = os.path.join(shortcut_root, k) os.makedirs(path, exist_ok=True) target = os.path.join(path, file_name) symlink(file_path, os.path.join(shortcut_root, target)) pass
def config_check(self): self.config_show() try: config = Config() if config.config_test(): log.print_info('config check complete, all is well done!') except: log.print_err('config check failed, pls re config')
def stop_mvs(self): from NXSpider.bin.models import no_rec_mv_mo mvs = api.top_mvs(offset=self.app.pargs.offset or 0, limit=self.app.pargs.limit or 50) mvs = [api.get_mv_detail(d['id']) for d in mvs] mvs = [d for d in mvs if d] for mv in mvs: no_rec_mv_mo.parse_model( mv, download_type=['mv'], file_check=Config().get_file_check(), shortcuts_stack=[] if Config().get_shortcut() else None) log.print_info("spider complete!~") pass
def debug_save_json(self, obj): with tools.ignored(Exception): relative_path = os.path.join( self.__file_type__ + '.debug', self.__file_type__ + "_" + str(obj['id']) + '.json') file_name = os.path.join(Config().get_path(), relative_path) with codecs.open(file_name, "wb", encoding='utf8') as code: test = json.dumps(obj, ensure_ascii=False) code.write(test)
def login_spls(self): if self.param_check(['lu', 'lp'], sys._getframe().f_code.co_name) is False: return from NXSpider.bin.models import playlist_mo plaintext_pwd = self.app.pargs.lp or None if plaintext_pwd is None: import getpass plaintext_pwd = getpass.getpass("Please input your password:"******"none"))) exit() user_id = res['account']['id'] download_type = self.parse_download() playlists = api.user_playlist(user_id, offset=self.app.pargs.offset or 0, limit=self.app.pargs.limit or 1000) log.print_info("playlists bellow will be crawled") print_playlist(playlists) for pl_obj in playlists: playlist_detail = api.get_playlist_detail(pl_obj['id']) if playlist_detail: log.print_info(u"<{}> author:{}".format( playlist_detail['name'], playlist_detail['creator']['nickname'], )) playlist_mo.parse_model( playlist_detail, download_type=download_type, file_check=Config().get_file_check(), shortcuts_stack=[] if Config().get_shortcut() else None) log.print_info("spider complete!~") pass
def config_spider(self): config = Config() config_dict = config.config # type: dict is_config = False try: if self.app.pargs.path_download is not None: paths = self.app.pargs.path_download.split(',') # type: list if default_path_key in paths: index = paths.index(default_path_key) paths.remove(default_path_key) paths.insert(index, default_download_dir) final_paths = [] for p in paths: try: # some error need pass if os.path.isdir(p) is False: os.mkdir(p) final_paths.append(p) except: log.print_warn("path may be wrong and be deleted: {}".format(p)) pass if not final_paths: final_paths.append(default_download_dir) log.print_info('path will be set as: ' + ','.join(final_paths)) config_dict['download_path'] = final_paths is_config = True if self.app.pargs.mv_resolution is not None: r = int(self.app.pargs.mv_resolution) if r not in mv_resolutions: log.print_warn("-mvr resolution config skip, value must be 240,480,720,1080") config_dict['mv_def_resolution'] = r is_config = True if self.app.pargs.media_tag is not None: config_dict['media_tag'] = True if self.app.pargs.media_tag.lower() == 'true'\ or self.app.pargs.media_tag == '1' else False is_config = True if self.app.pargs.media_tag_163 is not None: config_dict['media_tag_163'] = True if self.app.pargs.media_tag_163.lower() == 'true' \ or self.app.pargs.media_tag_163 == '1' else False is_config = True except: log.print_err("input error, pls check") raise if is_config: config.save_config_file() log.print_info("config success") self.config_show()
def smp3s(self): from NXSpider.bin.models import dw_mp3_mo if self.param_check(['mp3'], sys._getframe().f_code.co_name) is False: return download_type = self.parse_download() mp3s = self.app.pargs.mp3.split(',') # type: list details = api.get_mp3_details(mp3s) for mid, detail in details.items(): log.print_info(u"<{}>".format(detail['name'])) dw_mp3_mo.parse_model( detail, download_type=download_type, file_check=Config().get_file_check(), shortcuts_stack=[] if Config().get_shortcut() else None) log.print_info("spider complete!~") pass
def url_load(self, doc): """ implement pls :param doc: :return: :rtype: str """ try: target_r = get_target_r(doc, Config().get_mv_resolution()) doc['download_video_r'] = target_r return get_video_link(doc['id'], target_r) except: return None
def sar_albums(self): from NXSpider.bin.models import artist_album_mo if self.param_check(['artist'], sys._getframe().f_code.co_name) is False: return download_type = self.parse_download() artists = self.app.pargs.artist.split(',') # type: list for arid in artists: detail = api.get_artist_album(arid, offset=self.app.pargs.offset or 0, limit=self.app.pargs.limit or 50) if detail is None: continue artist_detail = detail['artist'] album_details = [ api.get_album_detail(d['id']) for d in detail['hotAlbums'] ] album_details = [d for d in album_details if d] artist_detail['albums'] = album_details log.print_info(u"<{}>".format(artist_detail['name'])) log.print_info("albums bellow will be crawled") print_albums(artist_detail['albums']) artist_album_mo.parse_model( artist_detail, download_type=download_type, file_check=Config().get_file_check(), shortcuts_stack=[] if Config().get_shortcut() else None) log.print_info("spider complete!~") pass
def attach_mp3_idv3(doc, file): """ :type doc: Mp3Model :param doc: :param file: :return: """ artists = [('["%s",%d]' % (x['name'], x.id)) for x in doc.artists] artists_str = reduce(lambda x, y: x + "," + y, artists) authors = reduce(lambda x, y: x + ',' + y, [x['name'] for x in doc.artists]) data = { 'title': doc['name'], 'artist': authors, 'album': doc['album']['name'], 'album_artist': authors, 'track_num': str(doc['no']), } if Config().get_media_tag_163(): comment_plaintext = u'music:{"musicId":%d,"musicName":"%s","bitrate":320000,' \ u'"albumId":%d,"album":"%s", "artist":[%s]}' \ % (doc.id, doc['name'], doc.album.id, doc.album['name'], artists_str) comment = "163 key(Don't modify):" + aes_ecb(comment_plaintext, aes_code).decode() data['comment'] = comment try: mp3 = ID3(file, v2_version=3) for k, v in data.items(): if k not in mutagen_idv3_key_map: continue if k == 'comment': mp3.add(id3.COMM(lang='XXX', text=v)) continue attr_type = getattr(id3, mutagen_idv3_key_map[k], None) if attr_type: mp3.add(attr_type(text=v)) mp3.save(v2_version=3) except Exception as e: return False return True
def download_check(self, doc, check_file=False): """ :param doc: :param check_file: check dist file to download :type doc: DynamicDocument :return: """ if check_file: file_relative_path = self.download_relative_path(doc) for path in Config().get_paths(): file_path = os.path.join(path, file_relative_path) if os.path.exists(file_path): self.download_log(doc, download_path=file_path) return file_path self.download_log(doc, downloaded=False) return False return hasattr(doc, model_is_download) and doc[model_is_download]
def sar_albums(self): from NXSpider.bin.models import artist_album_mo if self.param_check(['artist'], sys._getframe().f_code.co_name) is False: return download_type = self.parse_download() artists = self.app.pargs.artist.split(',') # type: list for arid in artists: detail = api.get_artist_album(arid, offset=self.app.pargs.offset or 0, limit=self.app.pargs.limit or 50) if detail is None: continue artist_detail = detail['artist'] album_details = [ api.get_album_detail(d['id']) for d in detail['hotAlbums'] ] album_details = [d for d in album_details if d] artist_detail['albums'] = album_details from terminaltables import AsciiTable table = AsciiTable([["ID", "Album", "Artist", "ArtistID"]]) table_data = [[ str(item['id']), item['name'], ','.join([ar['name'] for ar in item['artists']]), ','.join([str(ar['id']) for ar in item['artists']]), ] for item in artist_detail['albums']] table.table_data.extend(table_data) log.print_info(u"<{}>".format(artist_detail['name'])) log.print_info("albums bellow will be crawled") print(table.table) artist_album_mo.parse_model(artist_detail, download_type=download_type, file_check=Config().get_file_check()) log.print_info("spider complete!~") pass
def config_mongo(self): config = Config() config_dict = config.config # type: dict mongo_key = 'mongo' is_config = False try: if self.app.pargs.mhost is not None: config_dict[mongo_key]['host'] = self.app.pargs.mhost config_dict['no_mongo'] = False is_config = True if self.app.pargs.mport is not None: config_dict[mongo_key]['port'] = int(self.app.pargs.mport) is_config = True if self.app.pargs.muser is not None: config_dict[mongo_key]['username'] = self.app.pargs.muser is_config = True if self.app.pargs.mpassword is not None: config_dict[mongo_key]['password'] = self.app.pargs.mpassword is_config = True if self.app.pargs.mdbname is not None: config_dict[mongo_key]['name'] = self.app.pargs.mdbname is_config = True if self.app.pargs.nomongo is not None: config_dict['no_mongo'] = True if self.app.pargs.nomongo.lower() == 'true'\ or self.app.pargs.nomongo == '1' else False is_config = True except: log.print_err("input error, pls check") raise if is_config: config.save_config_file() log.print_info("config success") self.config_show()
def get_target_r(obj, limit_r=Config().get_mv_resolution()): max_valid = max([x['resolution'] for x in obj['resolutions']]) return limit_r if max_valid > limit_r else max_valid
def download_file_tag(self, filename, doc): if self.__file_type__ in ['mp3', 'mp4', 'mv'] \ and Config().get_media_tag(): attach_media_tag(doc, filename)
def config_clear(self): Config().config_reset() log.print_info("config has been reset, u need re-config from beginning pls") self.config_show()
#!/usr/bin/env python # -*- coding: utf-8 -*- # # created by Lipson on 2018/6/20. # email to [email protected] # from NXSpider.common.config import Config if Config().get_no_mongo(): from NXSpider.model.dict_model import * else: from NXSpider.model.mongo_model import * model_download_url = 'download_url' model_is_download = 'downloaded' model_download_path = 'download_path' __all__ = [ 'ConfigModel', 'UserModel', 'AlbumModel', 'PlaylistModel', 'Mp4Model', 'VideoModel', 'ArtistModel', 'Mp3Model', 'AuthorModel', 'update_dynamic_doc', 'model_download_url', 'model_is_download', 'get_one_model_by_key',
import os import shutil from NXSpider.common.config import Config from NXSpider.model.mongo_model import Mp3Model, Mp4Model from NXSpider.spider.mp3 import Mp3 from NXSpider.spider.mv import MV from NXSpider.common import tools, log, constant from NXSpider.spider.base_driver import Music163Obj from NXSpider.utility.media_tag import attach_media_tag media_types = { 'mp3': [Mp3Model, Mp3], 'mp4': [Mp4Model, MV], } paths = Config().get_paths() def attach_media_idv3_by_db(): """ attach media idv3 by paths in config :return: """ for suffix, type_setting in media_types: model, driver = type_setting # type: Mp3Model or Mp4Model, Music163Obj objs = model.objects(downloaded=True) for obj in objs: file_path = driver.download_check(obj, check_file=True) if not file_path: continue
# # created by Lipson on 2018/4/19. # email to [email protected] # from datetime import datetime import pymongo from mongoengine import connect, DynamicDocument, Document, signals from mongoengine.fields import * from pymongo.errors import ServerSelectionTimeoutError from NXSpider.common import log from NXSpider.common.config import Config from NXSpider.model.export import * mongodb_conf = Config().get_mongo() try: client = pymongo.MongoClient(host=mongodb_conf['host'], port=mongodb_conf['port'], connectTimeoutMS=3000, serverSelectionTimeoutMS=3000) test_connect = client.database.test.count() del client except ServerSelectionTimeoutError as e: log.print_err("mongodb server config error") exit() model_download_url = 'download_url' model_is_download = 'downloaded'
def config_show(self): config_dict = Config().config log.print_info("config will be show fellow:") print(json.dumps(config_dict, ensure_ascii=False, indent=1))