Example #1
0
    def download_file(self, doc):
        """
        download file from music 163
        :param doc:
        :return:
        """
        file_relative_path = self.download_relative_path(doc)
        path = Config().get_path()
        content = self.request_file(doc)

        if content is None:
            log.print_err(u"file download failed : %s" % file_relative_path)
            return False

        try:
            file_name = os.path.join(path, file_relative_path)

            # dir make
            dir_name = os.path.dirname(file_name)
            if not os.path.exists(dir_name):
                os.makedirs(dir_name)

            # file write
            with open(file_name, "wb") as code:
                code.write(content)

            self.download_file_tag(file_name, doc)

            log.print_info(u"file download complete: %s" % file_relative_path)
            self.download_log(doc)
            return True
        except Exception as e:
            log.print_err("file save failed : %s, err: %s" %
                          (file_relative_path, e))
            return False
Example #2
0
    def search(self):
        search_key = 'mp3'
        key_num = 0
        for k, v in search_types.items():
            if getattr(self.app.pargs, k, None):
                search_key = k
                key_num += 1
        if key_num > 1:
            log.print_err("it could search by only one type")

        # input must be decode in python2
        search_value = getattr(self.app.pargs, search_key)
        search_value = py2_decoding(search_value)

        res = api.search(search_value, stype=search_key,
                         offset=self.app.pargs.offset or 0,
                         limit=self.app.pargs.limit or 50)

        if not res:
            log.print_info("nothing found!")
            return

        if search_key in PRINT_ATTR_FUNC_MAP:
            func = PRINT_ATTR_FUNC_MAP[search_key][1]   # type: function
            value = (res.get(PRINT_ATTR_FUNC_MAP[search_key][0], []))   # type: list
            func(value)
Example #3
0
    def scls_pls(self):
        from NXSpider.bin.models import playlist_mo

        if self.param_check(['cls'], sys._getframe().f_code.co_name) is False:
            return

        download_type = self.parse_download()
        class_name = self.app.pargs.cls
        class_name = py2_decoding(class_name)

        if class_name != u"全部" and py2_encoding(
                class_name) not in api.ALL_CLASSES:
            log.print_err(
                "class name is wrong, pls check by run : nxspider sw-pl-classes"
            )
            return

        playlists = api.get_top_playlists(category=class_name,
                                          offset=self.app.pargs.offset or 0,
                                          limit=self.app.pargs.limit
                                          or 50)  # type: list

        for pl_obj in playlists:
            playlist_detail = api.get_playlist_detail(pl_obj['id'])
            if playlist_detail:
                log.print_info(u"<{}> author:{}".format(
                    playlist_detail['name'],
                    playlist_detail['creator']['nickname'],
                ))
                playlist_mo.parse_model(playlist_detail,
                                        download_type=download_type,
                                        file_check=Config().get_file_check())
        log.print_info("spider complete!~")
        pass
Example #4
0
    def login_smv(self):
        from NXSpider.bin.models import no_rec_mv_mo

        if self.param_check(['lu'], sys._getframe().f_code.co_name) is False:
            return

        plaintext_pwd = self.app.pargs.lp or None
        if plaintext_pwd is None:
            import getpass
            plaintext_pwd = getpass.getpass("Please input your password:"******"none")))
            exit()

        mvs = api.my_mvs(session)
        mvs = [api.get_mv_detail(d['id']) for d in mvs]
        mvs = [d for d in mvs if d]

        for mv in mvs:
            no_rec_mv_mo.parse_model(
                mv,
                download_type=['mv'],
                file_check=Config().get_file_check(),
                shortcuts_stack=[] if Config().get_shortcut() else None)

        log.print_info("spider complete!~")

        pass
Example #5
0
 def config_check(self):
     self.config_show()
     try:
         config = Config()
         if config.config_test():
             log.print_info('config check complete, all is well done!')
     except:
         log.print_err('config check failed, pls re config')
Example #6
0
    def config_spider(self):
        config = Config()
        config_dict = config.config  # type: dict
        is_config = False
        try:
            if self.app.pargs.path_download is not None:
                paths = self.app.pargs.path_download.split(',')  # type: list
                if default_path_key in paths:
                    index = paths.index(default_path_key)
                    paths.remove(default_path_key)
                    paths.insert(index, default_download_dir)

                final_paths = []
                for p in paths:
                    try:
                        # some error need pass
                        if os.path.isdir(p) is False:
                            os.mkdir(p)
                        final_paths.append(p)
                    except:
                        log.print_warn("path may be wrong and be deleted: {}".format(p))
                        pass

                if not final_paths:
                    final_paths.append(default_download_dir)

                log.print_info('path will be set as: ' + ','.join(final_paths))

                config_dict['download_path'] = final_paths
                is_config = True

            if self.app.pargs.mv_resolution is not None:
                r = int(self.app.pargs.mv_resolution)
                if r not in mv_resolutions:
                    log.print_warn("-mvr resolution config skip, value must be 240,480,720,1080")
                config_dict['mv_def_resolution'] = r
                is_config = True

            if self.app.pargs.media_tag is not None:
                config_dict['media_tag'] = True if self.app.pargs.media_tag.lower() == 'true'\
                                                   or self.app.pargs.media_tag == '1' else False
                is_config = True

            if self.app.pargs.media_tag_163 is not None:
                config_dict['media_tag_163'] = True if self.app.pargs.media_tag_163.lower() == 'true' \
                                                       or self.app.pargs.media_tag_163 == '1' else False
                is_config = True

        except:
            log.print_err("input error, pls check")
            raise
        if is_config:
            config.save_config_file()
        log.print_info("config success")
        self.config_show()
Example #7
0
def api_request(url,
                data=None,
                method="get",
                json=True,
                session=None,
                headers=headers,
                encrypt=True,
                https=False):
    """
    request and try
    :param https:
    :param encrypt:
    :param url:
    :param data:
    :param method:
    :param json:
    :param session:
    :type session:  requests.Session
    :param headers:
    :return:
    """
    url = base_https_url + url if https else base_url + url
    request_obj = session or requests

    # update cookies
    if isinstance(request_obj, requests.Session):
        for cookie in request_obj.cookies:
            if cookie.name == '__csrf':
                data['csrf_token'] = cookie.value
                break

    # encrypt
    if encrypt:
        data = encrypted_request(data)

    method = 'get' if not data and method == 'get' else 'post'
    request_method = getattr(request_obj, method, None) or request_obj.get
    try:
        req = request_method(url, data=data, headers=headers, timeout=10)
        req.encoding = "UTF-8"
        res = req.json() if json else req.text
        # if session:
        #     session.cookies.save()
        return res
    except ValueError as e:
        log.print_err("api do not return a valuable json")
        return {}
    except requests.exceptions.RequestException as e:
        log.print_warn("request error: %s" % url)
        return {}
Example #8
0
    def login_spls(self):
        if self.param_check(['lu', 'lp'],
                            sys._getframe().f_code.co_name) is False:
            return

        from NXSpider.bin.models import playlist_mo

        plaintext_pwd = self.app.pargs.lp or None
        if plaintext_pwd is None:
            import getpass
            plaintext_pwd = getpass.getpass("Please input your password:"******"none")))
            exit()

        user_id = res['account']['id']
        download_type = self.parse_download()
        playlists = api.user_playlist(user_id,
                                      offset=self.app.pargs.offset or 0,
                                      limit=self.app.pargs.limit or 1000)

        log.print_info("playlists bellow will be crawled")
        print_playlist(playlists)

        for pl_obj in playlists:
            playlist_detail = api.get_playlist_detail(pl_obj['id'])
            if playlist_detail:
                log.print_info(u"<{}> author:{}".format(
                    playlist_detail['name'],
                    playlist_detail['creator']['nickname'],
                ))
                playlist_mo.parse_model(
                    playlist_detail,
                    download_type=download_type,
                    file_check=Config().get_file_check(),
                    shortcuts_stack=[] if Config().get_shortcut() else None)
        log.print_info("spider complete!~")
        pass
Example #9
0
def get_one_model_by_key(model, model_id):
    """
    load or create a model by id
    :type model: DynamicDocument
    :param model:
    :param model_id:
    :return: doc, is_new
    :rtype: (DynamicDocument, boolean)
    """

    try:
        res = model.objects(id=model_id).first()
        if res is None:
            res = model(id=model_id)
            return res, True
        return res, False
    except Exception as e:
        log.print_err('load a doc err: %s' % e)
        return None, True
Example #10
0
    def param_check(self, params, func_name):
        """
        this will check param inputted and require is complete or not, and print help
        help will be in expose(help='...'), and got by function name
        :param params:
        :param func_name:
        :return:
        """
        help = None
        fun = getattr(self, func_name, None)
        if fun and getattr(fun, '__cement_meta__', None):
            help = fun.__cement_meta__['help']

        for p in params:
            param = getattr(self.app.pargs, p, None)
            if param is None:
                log.print_err("param {} miss, see help:".format(p))
                if help:
                    print(help)
                return False
        return True
Example #11
0
    def config_mongo(self):
        config = Config()
        config_dict = config.config  # type: dict
        mongo_key = 'mongo'
        is_config = False
        try:
            if self.app.pargs.mhost is not None:
                config_dict[mongo_key]['host'] = self.app.pargs.mhost
                config_dict['no_mongo'] = False
                is_config = True

            if self.app.pargs.mport is not None:
                config_dict[mongo_key]['port'] = int(self.app.pargs.mport)
                is_config = True

            if self.app.pargs.muser is not None:
                config_dict[mongo_key]['username'] = self.app.pargs.muser
                is_config = True

            if self.app.pargs.mpassword is not None:
                config_dict[mongo_key]['password'] = self.app.pargs.mpassword
                is_config = True

            if self.app.pargs.mdbname is not None:
                config_dict[mongo_key]['name'] = self.app.pargs.mdbname
                is_config = True

            if self.app.pargs.nomongo is not None:
                config_dict['no_mongo'] = True if self.app.pargs.nomongo.lower() == 'true'\
                                                   or self.app.pargs.nomongo == '1' else False
                is_config = True
        except:
            log.print_err("input error, pls check")
            raise
        if is_config:
            config.save_config_file()
        log.print_info("config success")
        self.config_show()
Example #12
0
def crawl_playlist_by_page(page,
                           dtype="全部",
                           download_type=['mp3', 'mv'],
                           save=True,
                           file_check=True):
    play_url = "http://music.163.com/discover/playlist/?order=hot&cat={}&limit=35&offset={}"
    play_url = play_url.format(dtype, page * 35)
    playlist_id = []
    titles = []
    try:
        acmsk = {'class': 'msk'}
        scnb = {'class': 'nb'}
        dcu = {'class': 'u-cover u-cover-1'}
        ucm = {'class': 'm-cvrlst f-cb'}
        data = tools.curl(play_url, headers, type=RETURE_HTML)
        lst = data.find('ul', ucm)
        for play in lst.find_all('div', dcu):
            title = play.find('a', acmsk)['title']
            link = play.find('a', acmsk)['href'].replace("/playlist?id=", "")

            playlist_detail = get_playlist_detail(link)
            with tools.ignored(Exception):
                log.print_info("%s author:%s" % (
                    "<" + playlist_detail['name'] + ">",
                    tools.encode(playlist_detail['creator']['nickname']),
                ))

            playlist_mo = Playlist()
            playlist_mo.parse_model(playlist_detail,
                                    save=save,
                                    download_type=download_type,
                                    file_check=file_check)

        return titles
    except Exception as e:
        log.print_err("crawl html error:{} type:{} page:{}".format(
            e, dtype, page))
        raise
Example #13
0
from NXSpider.common import log
from NXSpider.common.config import Config
from NXSpider.model.export import *

mongodb_conf = Config().get_mongo()

try:

    client = pymongo.MongoClient(host=mongodb_conf['host'],
                                 port=mongodb_conf['port'],
                                 connectTimeoutMS=3000,
                                 serverSelectionTimeoutMS=3000)
    test_connect = client.database.test.count()
    del client
except ServerSelectionTimeoutError as e:
    log.print_err("mongodb server config error")
    exit()

model_download_url = 'download_url'
model_is_download = 'downloaded'


def field_value(field, value):
    """
    Converts a supplied value to the type required by the field.
    If the field requires a EmbeddedDocument the EmbeddedDocument
    is created and updated using the supplied data.
    :param field:
    :param value:
    :return:
    """
Example #14
0
def create_params_by_dict(obj):
    try:
        return create_params_text(json.dumps(obj))
    except Exception as e:
        log.print_err('create params error: %s' % e)
        return None
Example #15
0
    def parse_model(self,
                    crawl_dict,
                    download_type=None,
                    file_check=False,
                    save=True,
                    debug=False,
                    shortcuts_stack=None):
        """
        Get a model from db or create, update and save!!!
        this will replace some attributes into models by load_save_model also.
        by @attr_replace(attr_name, new_name)
        :param debug:
        :param file_check:
        :param download_type:
        :param save: save doc
        :param crawl_dict: must have id attr
        :param shortcuts_stack: stack of shortcuts path
        :type crawl_dict: dict
        :type shortcuts_stack: list[str]
        :return:
        :rtype: DynamicDocument
        :type save: bool
        """
        if debug:
            # self.debug_save_json(crawl_dict)
            pass

        # get id
        if 'id' not in crawl_dict:
            log.print_err(u"can not load id by json obj %s" %
                          json.dumps(crawl_dict))
            return None
        doc_id = crawl_dict['id']

        # load a mongo document
        doc, is_new_doc = get_one_model_by_key(self.__model_name__, doc_id)
        if doc is None:
            log.print_err(u"can not load a doc by obj %s_%d" %
                          (self.__file_type__, doc_id))
            return None

        # shortcuts in stack
        if shortcuts_stack is not None and isinstance(shortcuts_stack, list) \
                and self.shortcut_relative_name(doc):
            shortcuts_stack.append(self.shortcut_relative_name(doc))

        # if is_new_doc:
        # replace attr or ignore
        obj = dict()
        for k, v in crawl_dict.items():
            if k in self.__model_rfilter__:
                continue
            obj[k] = v

            # replace object
            if k not in self.__attrs_replace_fucs__:
                continue

            # change attr
            if isinstance(v, list):
                v = [self.__attrs_replace_fucs__[k](self, x) for x in v]
            else:
                v = self.__attrs_replace_fucs__[k](self, v)

            # replace key name
            del obj[k]
            obj[self.__attrs_replace_map__[k]] = v

        # recursion replace a attr into a model
        for k, v in self.__parse_recursion__.items():
            if k not in obj:
                continue

            if isinstance(obj[k], list):
                obj[k] = [
                    v.parse_model(x,
                                  save=save,
                                  download_type=download_type,
                                  file_check=file_check,
                                  debug=debug,
                                  shortcuts_stack=shortcuts_stack)
                    for x in obj[k]
                ]
            elif isinstance(obj[k], dict):
                obj[k] = v.parse_model(obj[k],
                                       save=save,
                                       download_type=download_type,
                                       file_check=file_check,
                                       debug=debug,
                                       shortcuts_stack=shortcuts_stack)

        # update json to doc, this must be after recursion
        update_dynamic_doc(doc, obj)

        # modify doc and
        self.pre_save(doc, crawl_dict)

        # try download
        self.try_download(doc, download_type, file_check)

        if shortcuts_stack:
            if self.shortcut_relative_name(doc):
                shortcuts_stack.pop()
            self.create_shortcut(doc, shortcuts_stack)

        # save document
        if save and callable(getattr(doc, 'save', None)):
            doc.save()

        if debug:
            self.debug_print(crawl_dict)

        return doc
Example #16
0
    def config_test(self):
        result = True
        try:

            # check mongodb config
            if self.config['no_mongo'] is False:
                log.print_info('check mongodb config')
                mongo = self.config['mongo']
                for k in ['name', 'host', 'port']:
                    if k not in mongo:
                        log.print_err(
                            "mongo config error, key mongo.{} is not set yet".
                            format(k))
                        result = False

                # try import model, which will connect to server and exit if server config wrong
                import NXSpider.model.mongo_model

                for k in [
                        'download_path', 'mv_def_resolution', 'media_tag',
                        'media_tag_163'
                ]:
                    if k not in self.config:
                        log.print_err(
                            "config error, key {} is not set yet".format(k))
                        result = False

            # check type
            type_check = {
                'download_path': list,
                'mv_def_resolution': int,
                'media_tag': bool,
                'media_tag_163': bool,
                'download_file_check': bool,
                'no_mongo': bool,
            }

            need_save = False
            for k, v in type_check.items():
                if not isinstance(self.config[k], v):
                    log.print_err("config error, {} is not a require type, "
                                  "and is reset to default value: {}".format(
                                      k, self.default_config[k]))
                    self.config[k] = self.default_config[k]
                    need_save = True
                    result = False

            # download path check
            final_paths = []
            for p in self.config['download_path']:
                try:
                    # some error need pass
                    if os.path.isdir(p) is False:
                        os.mkdir(p)
                    final_paths.append(p)
                except:
                    log.print_warn(
                        "download path may be wrong and be deleted: {}".format(
                            p))
                    need_save = True
                    result = False
                    pass

            # mv resolution check
            if self.config['mv_def_resolution'] not in mv_resolutions:
                log.print_warn(
                    "mv_def_resolution will be reset to default: {}".format(
                        self.default_config['mv_def_resolution']))
                self.config['mv_def_resolution'] = self.default_config[
                    'mv_def_resolution']
                need_save = True
                result = False

            if need_save:
                self.config['download_path'] = final_paths
                self.save_config_file()

            return result
        except Exception as e:
            log.print_err(e)

        return False
Example #17
0
    def search(self):
        search_key = 'mp3'
        key_num = 0
        for k, v in search_types.items():
            if getattr(self.app.pargs, k, None):
                search_key = k
                key_num += 1
        if key_num > 1:
            log.print_err("it could search by only one type")

        # input must be decode in python2
        search_value = getattr(self.app.pargs, search_key)
        search_value = py2_decoding(search_value)

        res = api.search(search_value,
                         stype=search_key,
                         offset=self.app.pargs.offset or 0,
                         limit=self.app.pargs.limit or 50)

        if not res:
            log.print_info("nothing found!")

        table = ''
        if search_key == 'mp3' and 'songs' in res:
            table = AsciiTable(
                [["ID", "Name", "Album", "AlbumID", "Artist", "ArtistID"]])
            table_data = [[
                str(item['id']),
                item['name'],
                item['album']['name'],
                item['album']['id'],
                ','.join([ar['name'] for ar in item['artists']]),
                ','.join([str(ar['id']) for ar in item['artists']]),
            ] for item in res['songs']]
            table.table_data.extend(table_data)
        elif search_key == 'playlist' and 'playlists' in res:
            table = AsciiTable(
                [["ID", "Name", "User", "PlayCount", "FavoriteCount"]])
            table_data = [[
                str(item['id']),
                item['name'],
                item['creator']['nickname'],
                str(item['playCount']),
                str(item['bookCount']),
            ] for item in res['playlists']]
            table.table_data.extend(table_data)
            pass
        elif search_key == 'user' and 'userprofiles' in res:
            table = AsciiTable([["ID", "Name", "Signature"]])
            table_data = [[
                str(item['userId']),
                item['nickname'],
                item['signature'],
            ] for item in res['userprofiles']]
            table.table_data.extend(table_data)
            pass
        elif search_key == 'artist' and 'artists' in res:
            table = AsciiTable([["ID", "Name", "AlbumNum", "MVNum"]])
            table_data = [[
                str(item['id']), item['name'],
                str(item['albumSize']),
                str(item['mvSize'])
            ] for item in res['artists']]
            table.table_data.extend(table_data)
        elif search_key == 'album' and 'albums' in res:
            table = AsciiTable([["ID", "Album", "Artist", "ArtistID"]])
            table_data = [[
                str(item['id']),
                item['name'],
                ','.join([ar['name'] for ar in item['artists']]),
                ','.join([str(ar['id']) for ar in item['artists']]),
            ] for item in res['albums']]
            table.table_data.extend(table_data)
            pass
        elif search_key == 'mv' and 'mvs' in res:
            table = AsciiTable(
                [["ID", "Name", "Artist", "ArtistID", "Duration",
                  "PlayCount"]])
            table_data = [[
                str(item['id']),
                item['name'],
                item['artistName'],
                item['artistId'],
                '%02d:%02d' % divmod(int(item['duration'] / 1000), 60),
                item['playCount'],
            ] for item in res['mvs']]
            table.table_data.extend(table_data)
            pass

        if table == '':
            log.print_err('nothing found')
        else:
            print(table.table)