예제 #1
0
class PixivCrawler:
    KKRTAG = ['弦巻こころ']

    def __init__(
            self,
            auth,
            work_path=os.path.abspath('../pixiv/'),
    ):
        self._api = PixivAPI()
        self._api.login(*auth)
        self._wd = work_path

    def fetch_work(self, work_id, tag):
        got = False
        ri = self._api.works(work_id)
        try:
            r = ri.response[0]
        except:
            r = None
        if not r:
            return got
        url_list = []
        if r.metadata:
            for p in r.metadata.pages:
                url_list.append(p.image_urls.large)
        else:
            url_list.append(r.image_urls.large)

        created_time = r.created_time[:10].replace('-', '')
        wd = os.path.join(self._wd, created_time)
        if not os.path.isdir(wd):
            os.mkdir(wd)
        fns = []

        for url in url_list:
            fn = os.path.basename(url)
            final_fn = os.path.join(created_time, fn)
            _logger.info('getting %s to %s', url, wd)
            try:
                if self._api.download(url, fname=fn, path=wd):
                    got = True
                    shutil.move(os.path.join(wd, fn),
                                os.path.join(wd, fn + '.download'))
                fns.append(final_fn)
            except:
                import sys
                sys.excepthook(*sys.exc_info())
        if fns:
            meta = json.dumps(r)
            dmeta = {
                'work_id': work_id,
                'mode': tag,
                'user': r.user.id,
                'fn': fns,
                'meta': meta,
            }
            PixivCursor.insert_update_one(dmeta)
        return got

    def get_by_tag(self, search_tag='', filter_tag=[], num=30, save_tag=''):
        if not search_tag and not filter_tag:
            return None
        if filter_tag:
            filter_tag = [x.strip().lower() for x in filter_tag]
        if not search_tag:
            search_tag = filter_tag[0]
            filter_tag = filter_tag[1:]
        if not save_tag:
            save_tag = search_tag
        filter_tag = set(filter_tag)
        _logger.info('search: %s filter: %s', search_tag, filter_tag)
        ret = 0
        page = 1
        while ret < num:
            r = self._api.search_works(search_tag,
                                       mode='tag',
                                       page=page,
                                       per_page=30)
            try:
                l = r.response
            except:
                l = None

            if not l:
                break
            _logger.info('get %d illusts', len(l))
            for i in l:
                if i.type != 'illustration':
                    continue
                tt = set([x.strip().lower() for x in i.tags])
                if len(tt & filter_tag) != len(filter_tag):
                    continue
                if self.fetch_work(i.id, save_tag):
                    ret += 1
                if ret > num:
                    break
            page += 1

        return ret

    def get_rank(self, mode='daily', num=30):
        ret = 0
        page = 1
        while ret < num:
            r = self._api.ranking_all(mode=mode, page=page, per_page=30)
            try:
                l = r.response[0].works
            except:
                l = None
            if not l:
                break
            _logger.info('get %d ranking illust', len(l))
            for i in l:
                if i.work.type != 'illustration':
                    continue
                if self.fetch_work(i.work.id, mode):
                    ret += 1
                if ret >= num:
                    break
            page += 1
        return ret
예제 #2
0
class Pixiv(Thread):
    search_on = 0.0  #最後查詢時間
    client = None

    def __init__(self):
        Thread.__init__(self)

    def __connect(self):
        if self.client is None:
            try:
                self.client = PixivAPI()
                self.client.login(cfg['pixiv']['帳號'], cfg['pixiv']['密碼'])
            except Exception as e:
                raise e
                return False
        return True

    def run(self):
        pass

    def search(self, key, number=30):
        if not self.__connect():
            return 'Pixiv模組發生錯誤 暫時不能使用'

        if number > 1000:
            number = 1000

        if key[0] == '@':
            result = self.client.users_works(int(key[1:]))
        else:
            result = self.client.search_works(
                key,
                page=1,
                per_page=number,
                mode='tag',  # text標題 tag標籤 exact_tag精準標籤 caption描述
                period='all',  # all所有 day一天內 week一週內 month一月內
                order='desc',  # desc新順序 asc舊順序
                sort='date',
            )

        if result.status == 'failure':
            return '找不到 <%s>' % (key)

        result_rank = []
        for i in result.response:
            for i2 in result_rank:
                if i.stats.views_count > i2.stats.views_count:
                    result_rank.insert(result_rank.index(i2), i)
                    break
            else:
                result_rank.append(i)

        reply = []
        for i in result_rank:
            self.client.download(i.image_urls.px_480mw,
                                 path=cfg['temp_dir'],
                                 name=str(i.id))  #px_128x128 px_480mw
            print('%s\\%s' % (cfg['temp_dir'], i.id))
            url = imgur.upload('%s\\%s' % (cfg['temp_dir'], i.id))
            #url = 'http://temp.maou.pw/%s' % (i.id)
            reply.append(url)
            if len(reply) >= 4:
                break

        url = 'https://www.pixiv.net/search.php?word=123&s_mode=s_tag_full'
        reply = reply[:4]
        reply.append(url)
        return reply

    def rss(self):
        if not self.__connect():
            return 'Pixiv模組錯誤'
예제 #3
0
class pixivImage:
    #Takes URL or ID as argument
    def __init__(self, *args):
        baseURL = "https://www.pixiv.net/member_illust.php?mode=medium&illust_id="
        self.image_URLs = []
        self.directories = []
        self.caption = ""
        self.userTags = []
        self.userImported = False
        for arg in args:
            length = len(str(arg))
            #If it is an ID, it is 8 digits long and an int
            if length == 8:
                self.ID = int(arg)
                self.URL = baseURL + str(arg)
            #If it's a url, it's the baseURL plus the int
            elif isinstance(arg, str) & length == len(baseURL) + 8:
                self.URL = arg
                try:
                    self.ID = self.url[self.url.find("&illust_id=", 0, length
                                                     ) +
                                       len("&illust_id="):length]
                except TypeError:
                    print("URL is malformed")
                #Fix minor bad URL
                self.URL = baseURL + str(arg["ID"])
            else:
                print("URL OR ID is wrong or in bad format")

    #Gets PixixImage attribute
    def __get__(self, obj, objtype):
        #Tries to get attribute, if it does not exist, cycles through imports then outputs error
        try:
            return getattr(obj, self.attr)
        except AttributeError:
            try:
                self.importIllustJSON()
                return self.item
            except AttributeError:
                try:
                    self.importUserJSON()
                except AttributeError:
                    print("Image does not have that attribute")
                    pass

    def setCustomTags(self, tags):
        self.userTags = tags

    def setCaption(self, caption):
        self.caption = caption

    #Import info using pixivAPI into class from JSON
    def importIllustJSON(self):
        #Login to Pixiv API
        self.api = PixivAPI()
        self.api.login(pixivLogin["pixivusername"],
                       pixivLogin["pixivpassword"])
        userURL = "https://www.pixiv.net/member_id="
        self.JSON = self.api.works(self.ID)['response'][0]
        self.manga = self.JSON['is_manga']
        self.account = self.JSON['user']['account']
        self.name = self.JSON['user']['name']
        self.user_ID = self.JSON['user']['id']
        self.user_URL = userURL + str(self.user_ID)
        self.title = self.JSON['title']
        self.tags = self.JSON['tags']
        self.pages = self.JSON['page_count']
        if self.pages > 1:
            for page in range(self.pages - 1):
                self.image_URLs.append(self.JSON['metadata']["pages"][page]
                                       ["image_urls"]['large'])
        else:
            self.image_URLs.append(self.JSON['image_urls']['large'])

    #Imports JSON with user information.
    def importUserJSON(self):
        #Non-authenticated API login
        aapi = AppPixivAPI()
        self.userJSON = aapi.user_detail(self.user_ID)
        self.webpage = self.userJSON['profile']['webpage']
        self.twitter_name = self.userJSON['profile']['twitter_account']
        self.twitter_URL = self.userJSON['profile']['twitter_url']
        self.pawoo_URL = self.userJSON['profile']['pawoo_url']
        self.userImported = True

    #Manually import JSON information
    def importJSON(self):
        self.importIllustJSON()
        self.importUserJSON()

    #Downloads images to directory
    def download(self, directory=None):
        for URL in self.image_URLs:
            if directory is None:
                directory = os.path.dirname(
                    os.path.abspath(__file__)) + "\\temp\\"
                if not os.path.exists(directory):
                    os.makedirs(os.path.dirname(directory))
                self.api.download(URL, prefix=directory)
            else:
                if not os.path.exists(directory):
                    os.makedirs(directory)
                self.api.download(URL, prefix=directory)
                directory = directory + "\\" + str(os.path.basename(URL))
                self.directories.append(directory)
                self.api.download(URL)