Ejemplo n.º 1
0
 def __init__(self, queue):
     
     self._database = DBUtil()
     self._taskQueue = queue
     self._queueProducerStatus = True
     self.completed = 0
     self.totalDownload = 0
Ejemplo n.º 2
0
    def __init__(self):

        self._pyaria2 = PyAria2(host=ARIA2_HOST,
                                port=ARIA2_PORT,
                                token=ARIA2_TOKEN,
                                link_type=ARIA2_LINK_TYPE)
        self._database = DBUtil()
        self._completed = 0
        self._totalDownload = 0
Ejemplo n.º 3
0
class Video(GeneralTool):
    def __init__(self):

        self._pyaria2 = PyAria2(host=ARIA2_HOST,
                                port=ARIA2_PORT,
                                token=ARIA2_TOKEN,
                                link_type=ARIA2_LINK_TYPE)
        self._database = DBUtil()
        self._completed = 0
        self._totalDownload = 0

    def torrentResultDictGenerator(self):
        torrentResultDictList = self._database.query("torrent",
                                                     {"downloaded": 0})
        self._totalDownload = len(torrentResultDictList)
        logger.info("not downloaded torrent count {0}".format(
            len(torrentResultDictList)))
        for torentResultDict in torrentResultDictList:
            yield torentResultDict

    def getSavePath(self, torrentResultDict):
        categoryName = self.getCategoryFromDatailsPageUrl(
            torrentResultDict["detailsPageUrl"])
        if ARIA2_SAVE_PATH:
            torrentPath = self.checkAria2SavePath(
                os.path.join(ARIA2_SAVE_PATH, "video", categoryName,
                             self.year(), self.month(), self.day()))

            return torrentPath
        else:
            return None

    def addToAria2(self, torentResultDict):
        magnet = torentResultDict["magnet"]
        savePath = self.getSavePath(torentResultDict)
        torrenName = torentResultDict["title"]
        logger.info("add to aria2, torrent file: {0}".format(torrenName))
        self._pyaria2.addUrls(magnet, savePath)
        self._completed += 1

    def downloadScheduler(self):
        for torentResultDict in self.torrentResultDictGenerator():
            self.addToAria2(torentResultDict)
            updateDownloaded = {
                "key_values": {
                    "downloaded": "1"
                },
                "postions": {
                    "md5": torentResultDict["md5"]
                }
            }
            self._database.update("torrent", updateDownloaded)
Ejemplo n.º 4
0
class Image(GeneralTool):
    
    def __init__(self, queue):
        self._database = DBUtil()
        self._taskQueue = queue
        self._queueProducerStatus = True
        self.completed = 0
        self.totalDownload = 0

    def generatorListPage(self):
        '''
        generator list page url
        :return: response list
        '''
        # check track page count is vailed
        # track page count is 100 if track too many
        trackingPageCount = TRACKING_PAGE_COUNT
        if isinstance(trackingPageCount, int):
            if trackingPageCount > 100:
                trackingPageCount = 100
        else:
            trackingPageCount = 100
            
        listPageUrlList = []
        for trackingNumber in range(1, trackingPageCount + 1):
            url = "https://www.t66y.com/thread0806.php?fid=16&search=&page={0}".format(str(trackingNumber))
            listPageUrlList.append(url)
        requestsList = []
        for listPageUrl in listPageUrlList:
            if COOKIES:
                requestsList.append(
                    AsyncRequests.get(listPageUrl, headers=self.headers, cookies=COOKIES)
                )
            else:
                requestsList.append(
                    AsyncRequests.get(listPageUrl, headers=self.headers)
                )
        resultList = AsyncRequests.map(requestsList, size=DOWNLOAD_CONCURRENT_COUNT)
        return resultList
    
    def getDetailsPageUrl(self, responseList):
        '''
        get details page url by list page
        input url to queue
        :return:
        '''
        if not responseList:
            logger.error("get response list is empty in details page function")
        detailsPageUrlList = []
        # get details page url from list page response
        try:
            for response in responseList:
                if response is not None and response.status_code == 200:
                    response.encoding = "gbk"
                    soup = bs(response.text, "lxml")
                    trList = soup.find_all("tr")
                    for tr in trList:
                        tdList = tr.find_all("td")
                        if len(tdList) == 5:
                            urlTagList = tdList[1].select('a[href]')
                            if urlTagList:
                                url = urlTagList[0]["href"]
                                # title = urlTagList[0].string
                                if url not in self.excludeUrlList and not self.urlInExclued(url):
                                    detailsPageUrlList.append(self.domains + url.strip())
            # url to response
            logger.info("get details page response ...")
            detailsPageResponseList = self.urlToResponse(detailsPageUrlList)
            self.totalDownload = len(detailsPageResponseList)
            logger.info("present craw count:{0}".format(str(len(detailsPageResponseList))))
            # put to queue
            for detailsPageResponse in detailsPageResponseList:
                self._taskQueue.put(detailsPageResponse)
        except BaseException:
            logger.error(traceback.format_exc())
            
    def computeDetailsImageCount(self, detailsResponse):
        '''
        compute details image count bt detailsResponse
        :param detailsResponse:
        :return:
        '''
        try:
            if detailsResponse is not None and detailsResponse.status_code == 200:
                detailsResponse.encoding = "gbk"
                soup = bs(detailsResponse.text, "lxml")
                inputTagList = soup.find_all("input")
                imageCount = 0
                for inputTag in inputTagList:
                    try:
                        imageDownloadUrl = inputTag['data-src']
                        imageCount += 1
                    except KeyError:
                        continue
                return imageCount
            else:
                logger.error("detalisResponse is None or status code not 200 in the computeDetailsImageCount function")
                return 0
        except BaseException:
            logger.error(traceback.format_exc())
            return 0
        
    def getImageDownloadUrlGenerator(self, detailsResponse):
        '''
        get download page url by details page
        this is a generator function
        generator imageResultClass
        :param detailsResponse:
        :return: imageResultClassList
        '''
        try:
            # imageResultClassList = []
            if detailsResponse is not None and detailsResponse.status_code == 200:
                detailsResponse.encoding = "gbk"
                soup = bs(detailsResponse.text, "lxml")
                title = soup.head.title.text
                inputTagList = soup.find_all("input")
                for inputTag in inputTagList:
                    try:
                        imageDownloadUrl = inputTag['data-src']
                    except KeyError:
                        continue
                    if imageDownloadUrl:
                        # print("is ok")
                        if self.existInDatabase("image", {"imageDownloadUrl": imageDownloadUrl}):
                            continue
                        if self.isImageDownloadPageUrl(imageDownloadUrl):
                            # imageResultClass.setCategory(self.getCategoryFromDatailsPageUrl(str(detailsResponse.url)))
                            imageResultClass = ImageResultClass()
                            imageResultClass.setCrawDate(self.formatDate())
                            imageResultClass.setDetailsPageUrl(str(detailsResponse.url))
                            imageResultClass.setDetailsPageImageCount(str(self.computeDetailsImageCount(detailsResponse)))
                            imageResultClass.setImageDownloadUrl(imageDownloadUrl)
                            imageResultClass.setResponse(self.urlToResponse([imageDownloadUrl])[0])
                            imageResultClass.setTitle(self.clearTitle(title))
                            imageResultClass.setCrawDate(self.formatDate())
                            yield imageResultClass
                            # imageResultClassList.append(imageResultClass)
                    else:
                        continue
            # return imageResultClassList
        except BaseException:
            logger.error(traceback.format_exc())
            return None
      
    def saveImageGenerator(self, imageResultClassGenerator):
        '''
        save image by image result class list
        :param imageResultClassList:
        :return:
        '''
        if imageResultClassGenerator is None:
            logger.error("imageResultClass generator is None")
            return None
        for imageResultClass in imageResultClassGenerator:
            try:
                # imageResultClass = imageResultClassGenerator.__next__()
                if isinstance(imageResultClass, str):
                    continue
                response = imageResultClass.getResponse()
                if response is not None and response.status_code == 200:
                    imageByte = imageResultClass.getResponse().content
                    imageMd5 = self.computeMD5ByFile(imageByte)
                    imageResultClass.setMd5(imageMd5)
                    imageName = os.path.split(imageResultClass.getImageDownloadUrl())[1]
                    imageDirName = imageResultClass.getTitle()
                    # check save path is vailed
                    imageSavePath = self.checkDirExist(os.path.join(SAVE_PATH,
                                                                    "image",
                                                                    self.year(),
                                                                    self.month(),
                                                                    self.day(),
                                                                    imageDirName,
                                                                    imageName))
                    imageResultClass.setSavePath(imageSavePath)
                    with open(imageSavePath, "wb+") as fo:
                        fo.write(imageByte)
                    yield imageResultClass
            # except StopIteration:
            #     break
            except BaseException:
                logger.error(traceback.format_exc())
                return None
        # return imageResultClassList
    
    # def imageResultClassListGenerator(self, imageResultClassList):
    
    
    def imageResultClassPreprocessing(self, imageResultClass):
        '''
        the preprocessing infoClass is used to write to the database
        :param infoClass:
        :return:
        '''
        if imageResultClass is None:
            logger.error("imageResultClass is None in the imageResultClassPreprocessign function")
            return None
        try:
            
            imageResultDict = {}
            imageResultDict.setdefault("id", None)
            imageResultDict.setdefault("title", imageResultClass.getTitle())
            imageResultDict.setdefault("detailsPageUrl", imageResultClass.getDetailsPageUrl())
            imageResultDict.setdefault("detailsPageImageCount", imageResultClass.getDetailsPageImageCount())
            imageResultDict.setdefault("imageDownloadUrl", imageResultClass.getImageDownloadUrl())
            imageResultDict.setdefault("savePath", imageResultClass.getSavePath())
            imageResultDict.setdefault("crawData", imageResultClass.getCrawDate())
            imageResultDict.setdefault("md5", imageResultClass.getMd5())
            return imageResultDict
        except BaseException:
            logger.error(traceback.format_exc())
            return None
    
    def writeToDatabase(self, imageResultDict):
        '''
        information write to database
        :param infoDict:
        :return:
        '''
        if imageResultDict is None:
            logger.error("imageResultDict is None")
            return None
        try:
            result = self._database.queryIsExist("image", {"md5": imageResultDict["md5"]})
            progressBar = self.computeProgressBar(self.completed, self.totalDownload)
            if not result:
                logger.info("Image completed: {progressBar: <10}".format(progressBar=progressBar) + \
                            "Title:{title}".format(title=imageResultDict['title']),
                            level="ALL")
                self._database.insert("image", imageResultDict)
        except BaseException:
            logger.error(traceback.format_exc())
            logger.error("An error occurred in the function ---> wirteToDataBase")
            return None
        
    def producer(self):
        '''
        used to produce details response
        :return:
        '''
        self._queueProducerStatus = True
        self.getDetailsPageUrl(self.generatorListPage())
        self._queueProducerStatus = False
    
    def consumer(self):
        '''
        used to consume details response
        get a details page response by queue, then get image download url by details page response,
        then save image to local dir, then write to database
        :return:
        '''
        while not self._taskQueue.empty() or self._queueProducerStatus:
            # get detailsResponse
            detailsResponse = self._taskQueue.get()
            if detailsResponse is None:
                continue
            # get image download url result is a imageResultClass generator
            imageDownloadUrlGenerator = self.getImageDownloadUrlGenerator(detailsResponse)
            if imageDownloadUrlGenerator is None:
                continue
            # save image to dir result is a imageResultClass
            imageResultClassGenerator = self.saveImageGenerator(imageDownloadUrlGenerator)
            for imageResultClass in imageResultClassGenerator:
                if imageResultClass is None:
                    continue
                # imageResultClass to imageResultDict
                imageResultDict = self.imageResultClassPreprocessing(imageResultClass)
                if imageResultDict is None:
                    continue
                # imageResultDict write to database
                self.writeToDatabase(imageResultDict)
            self.completed += 1
            progressBar = self.computeProgressBar(self.completed, self.totalDownload)
            logger.info("Image completed:{0}".format(progressBar))
Ejemplo n.º 5
0
class Torrent(GeneralTool):
    
    def __init__(self, queue):
        
        self._database = DBUtil()
        self._taskQueue = queue
        self._queueProducerStatus = True
        self.completed = 0
        self.totalDownload = 0
    
    def generatorListPage(self):
        '''
        generator list page url
        :return: response list
        '''
        
        # check track page count is vailed
        # track page count is 100 if track too many
        trackingPageCount = TRACKING_PAGE_COUNT
        if isinstance(trackingPageCount, int):
            if trackingPageCount > 100:
                trackingPageCount = 100
        else:
            trackingPageCount = 100
        
        listPageUrlList = []
        for categoryName in BT_DOWNLOAD_CATEGORY:
            categoryNumber = Category.categoryNameToNumberDict[categoryName]
            for trackingNumber in range(1, trackingPageCount + 1):
                url = "https://www.t66y.com/thread0806.php?fid={0}&search=&page={1}".format(str(categoryNumber),
                                                                                            str(trackingNumber))
                listPageUrlList.append(url)
        requestsList = []
        for listPageUrl in listPageUrlList:
            if COOKIES:
                requestsList.append(
                    AsyncRequests.get(listPageUrl, headers=self.headers, cookies=COOKIES)
                )
            else:
                requestsList.append(
                    AsyncRequests.get(listPageUrl, headers=self.headers)
                )
        resultList = AsyncRequests.map(requestsList, size=DOWNLOAD_CONCURRENT_COUNT)
        return resultList
    
    def getDetailsPageUrl(self, responseList):
        '''
        get details page url by list page
        input url to queue
        :return:
        '''
        if not responseList:
            logger.error("get response list is empty in details page function")
        detailsPageUrlList = []
        # get details page url from list page response
        try:
            for response in responseList:
                if response is not None and response.status_code == 200:
                    response.encoding = "gbk"
                    soup = bs(response.text, "lxml")
                    trList = soup.find_all("tr")
                    for tr in trList:
                        tdList = tr.find_all("td")
                        if len(tdList) == 5:
                            urlTagList = tdList[1].select('a[href]')
                            if urlTagList:
                                url = urlTagList[0]["href"]
                                # title = urlTagList[0].string
                                if url not in self.excludeUrlList:
                                    detailsPageUrlList.append(self.domains + url.strip())
            # url to response
            logger.info("get details page response ...")
            detailsPageResponseList = self.urlToResponse(detailsPageUrlList)
            logger.info("present craw count:{0}".format(str(len(detailsPageResponseList))))
            self.totalDownload = len(detailsPageResponseList)
            # put to queue
            for detailsPageResponse in detailsPageResponseList:
                self._taskQueue.put(detailsPageResponse)
        except BaseException:
            logger.error(traceback.format_exc())
    
    def getDownloadPageUrl(self, detailsResponse):
        '''
        get download page url by details page
        :param detailsResponse:
        :return:
        '''
        try:
            torrentResultClass = TorrentResultClass()
            if detailsResponse is not None and detailsResponse.status_code == 200:
                detailsResponse.encoding = "gbk"
                soup = bs(detailsResponse.text, "lxml")
                title = soup.head.title.text
                aTagList = soup.find_all("a")
                for a in aTagList:
                    downloadPageUrl = a.string
                    if downloadPageUrl:
                        if self.isDownloadPageUrl(downloadPageUrl):
                            torrentResultClass.setCategory(self.getCategoryFromDatailsPageUrl(str(detailsResponse.url)))
                            torrentResultClass.setCrawData(self.formatDate())
                            torrentResultClass.setDetailsPageUrl(str(detailsResponse.url))
                            torrentResultClass.setResponse(self.urlToResponse([downloadPageUrl.strip()])[0])
                            torrentResultClass.setTitle(self.clearTitle(title))
                    else:
                        continue
            # return self.getTorrentDownloadUrl(torrentResultClass)
            return torrentResultClass
        except BaseException:
            logger.error(traceback.format_exc())
            return None
    
    def getTorrentDownloadUrl(self, torrentResultClass):
        '''
        get torrent download url by download information page
        :param torrentResultClass:
        :return:
        '''
        if torrentResultClass is None:
            logger.error("get download page url failed, because torrentResultClass is None")
            return None
        try:
            response = torrentResultClass.getResponse()
            # i don't knonw why i might get a string here
            if isinstance(response, str):
                return None
            if response is not None and response.status_code == 200:
                response.encoding = "utf-8"
                downloadUrl = self.torrentDownloadUrl(response.text)
                torrentResultClass.setDownloadPageUrl(str(response.url))
                torrentResultClass.setTorrentDownloadUrl(downloadUrl)
                return torrentResultClass
            else:
                return None
        except BaseException:
            logger.error(traceback.format_exc())
            return None
    
    def downloadTorrentFile(self, torrentResultClass):
        '''
        download torrent file
        :param torrentResultClass:
        :return:
        '''
        if torrentResultClass is None:
            logger.error("torrentResultClass is None in the downloadTorrentFile function")
            return None
        torrentName = self.filterTorrentName(self.clearTitle(torrentResultClass.getTitle()))
        torrentDownloadUrl = torrentResultClass.getTorrentDownloadUrl()
        detailsPageUrl = torrentResultClass.getDetailsPageUrl()
        if not torrentName:
            logger.error("get torrent name failed")
            return None
        if not torrentDownloadUrl:
            logger.error("get torrent download url failed")
            return None
        if not detailsPageUrl:
            logger.error("get details page url failed")
            return None
        try:
            categoryName = self.getCategoryFromDatailsPageUrl(detailsPageUrl)
            torrentResponse = requests.get(torrentDownloadUrl, headers=self.headers)
            torrentMd5 = self.computeMD5ByFile(torrentResponse.content)
            # check save path is vailed
            torrentPath = self.checkDirExist(
                os.path.join(SAVE_PATH,
                             "torrent",
                             categoryName,
                             self.year(),
                             self.month(),
                             self.day(),
                             torrentName + ".torrent"))
            with open(torrentPath, "wb+") as file:
                file.write(torrentResponse.content)
            torrentResultClass.setSavePath(torrentPath)
            torrentResultClass.setCrawData(self.formatDate())
            torrentResultClass.setMd5(torrentMd5)
            torrentResultClass.setMagnet(self.torrentToMagnet(torrentResponse.content))
            torrentResultClass.setDownloaded(0)
            return torrentResultClass
        except BaseException:
            logger.error(traceback.format_exc())
            return None
    
    def torrentResultClassPreprocessing(self, torrentResultClass):
        '''
        the preprocessing torrentResultClass is used to write to the database
        :param torrentResultClass:
        :return:
        '''
        if torrentResultClass is None:
            logger.error("torrentResultClass is None in the torrentResultClassPreprocessign function")
            return None
        try:
            torrentResultDict = {}
            torrentResultDict.setdefault("id", None)
            torrentResultDict.setdefault("category", torrentResultClass.getCategory())
            torrentResultDict.setdefault("title", torrentResultClass.getTitle())
            torrentResultDict.setdefault("detailsPageUrl", torrentResultClass.getDetailsPageUrl())
            torrentResultDict.setdefault("downloadPageUrl", torrentResultClass.getDownloadPageUrl())
            torrentResultDict.setdefault("torrentDownloadUrl", torrentResultClass.getTorrentDownloadUrl())
            torrentResultDict.setdefault("savePath", torrentResultClass.getSavePath())
            torrentResultDict.setdefault("crawData", torrentResultClass.getCrawData())
            torrentResultDict.setdefault("md5", torrentResultClass.getMd5())
            torrentResultDict.setdefault("magnet", torrentResultClass.getMagnet())
            torrentResultDict.setdefault("downloaded", torrentResultClass.getDownloaded())
            return torrentResultDict
        except BaseException:
            logger.error(traceback.format_exc())
            return None
    
    def writeToDatabase(self, torrentResultDict):
        '''
        information write to database
        :param infoDict:
        :return:
        '''
        if torrentResultDict is None:
            logger.error("torrentResultDict is None")
            return None
        try:
            result = self._database.queryIsExist("torrent", {"md5": torrentResultDict["md5"]})
            progressBar = self.computeProgressBar(self.completed, self.totalDownload)
            if not result:
                
                logger.info("Torrent completed: {progressBar: <10}".format(progressBar=progressBar) + \
                            "category: {category: <20}".format(category=torrentResultDict['category']) + \
                            "Title:{title}".format(title=torrentResultDict['title']),
                            level="ALL")
                self._database.insert("torrent", torrentResultDict)
            else:
                logger.info("Torrent completed:{progressBar: <5} torrent already exist database.".format(
                    progressBar=progressBar))
        except BaseException:
            logger.error(traceback.format_exc())
            logger.error("An error occurred in the function ---> wirteToDataBase")
            return None
    
    def producer(self):
        '''
        used to produce details response
        :return:
        '''
        self._queueProducerStatus = True
        self.getDetailsPageUrl(self.generatorListPage())
        self._queueProducerStatus = False
    
    def consumer(self):
        '''
        used to consume details response
        get a response by queue, then get download page url by details page response,
        then get torrent download url by download page response
        :return:
        '''
        while not self._taskQueue.empty() or self._queueProducerStatus:
            # get detailsResponse
            detailsResponse = self._taskQueue.get()
            if detailsResponse is None:
                self.completed += 1
                continue
            # get download page url result is a torrentResultClass
            downloadPageUrlTorrentResultClass = self.getDownloadPageUrl(detailsResponse)
            if downloadPageUrlTorrentResultClass is None:
                self.completed += 1
                continue
            # get torrent download url result is a torrentResultClass
            torrentDownloadUrlTorrentResultClass = self.getTorrentDownloadUrl(downloadPageUrlTorrentResultClass)
            if torrentDownloadUrlTorrentResultClass is None:
                self.completed += 1
                continue
            # download torrent result is a torrentResultClass
            downloadTorrentFileTorrentResultClass = self.downloadTorrentFile(torrentDownloadUrlTorrentResultClass)
            if downloadTorrentFileTorrentResultClass is None:
                self.completed += 1
                continue
            # torrentResultClass to infoDict
            torrentResultDict = self.torrentResultClassPreprocessing(downloadTorrentFileTorrentResultClass)
            if torrentResultDict is None:
                self.completed += 1
                continue
            # infoDict write to database
            self.completed += 1
            self.writeToDatabase(torrentResultDict)
Ejemplo n.º 6
0
class GeneralTool():

    _database = DBUtil()

    headers = {
        "User-Agent":
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36"
    }

    excludeUrlList = [
        'htm_data/16/1106/524942.html', 'htm_data/16/1808/344501.html',
        'htm_data/16/1707/2519480.html', 'htm_data/2/1111/30611.html',
        'htm_data/16/1706/2424348.html', 'htm_data/16/1110/622028.html',
        'htm_data/16/0805/136474.html', 'htm_data/16/1109/594741.html',
        'read.php?tid=5877', 'read.php?tid', 'htm_data/4/1106/524586.html',
        'htm_data/5/1707/2519502.html', 'htm_data/5/1106/517566.html',
        'htm_data/4/1206/756654.html', '344501.html'
    ]

    domains = "https://www.t66y.com/"
    xpath = "//td[@class='tal']/h3/a/@href"

    @classmethod
    def urlInExclued(cls, url):
        for excluedUrl in cls.excludeUrlList:
            if excluedUrl in url:
                return True
        return False

    @classmethod
    def checkSystemType(cls):
        if "windows" in str(platform.platform()).lower():
            return "windows"
        elif "linux" in str(platform.platform()).lower():
            return "linux"
        else:
            return "unknown"

    @classmethod
    def checkDirExist(cls, originPath):
        '''
        check dir exist
        :param originPath:
        :return: return torrent save full path
        '''
        dirPath = os.path.split(originPath)[0]
        torrentName = os.path.split(originPath)[1]
        if cls.checkSystemType() == "windows":
            # if config save path is absolute
            if ":" in dirPath:
                if not os.path.isdir(dirPath):
                    os.makedirs(dirPath)
                return originPath
            else:
                currentDir = os.path.split(os.getcwd())[0]
                saveDirPath = os.path.join(currentDir, dirPath)
                if not os.path.isdir(saveDirPath):
                    os.makedirs(saveDirPath)
                return os.path.join(saveDirPath, torrentName)
        elif cls.checkSystemType() == "linux":
            # check dir exist
            if not os.path.isdir(dirPath):
                os.makedirs(dirPath)
            return originPath
        else:
            # check dir exist
            if not os.path.isdir(dirPath):
                os.makedirs(dirPath)
            return originPath

    @classmethod
    def checkAria2SavePath(cls, savePath):
        if not savePath:
            return savePath
        if "/" in ARIA2_SAVE_PATH:
            return savePath.replace("\\", "/")
        else:
            return savePath.replace("/", "\\")

    @classmethod
    def computeMD5(cls, title, url, dataStr):
        title = str(title)
        url = str(url)
        dataStr = str(dataStr)
        string = title.lower() + url.lower() + dataStr.lower()
        md5Str = hashlib.md5()
        md5Str.update(string.encode(encoding='utf-8'))
        return md5Str.hexdigest().upper()

    @classmethod
    def computeMD5ByFile(cls, torrentFileByte):
        md5Str = hashlib.md5()
        md5Str.update(torrentFileByte)
        return md5Str.hexdigest().upper()

    @classmethod
    def torrentToMagnet(cls, torrentByte):
        try:
            metadata = bencodepy.decode(torrentByte)
            subj = metadata[b'info']
            hashcontents = bencodepy.encode(subj)
            digest = hashlib.sha1(hashcontents).digest()
            b32hash = base64.b32encode(digest).decode()
            return 'magnet:?' \
                   + 'xt=urn:btih:' + b32hash
        except:
            return ""

    @classmethod
    def formatDate(cls):
        return datetime.now().strftime('%Y-%m-%d %H:%M:%S')

    @classmethod
    def year(cls):
        return str(datetime.now().strftime('%Y'))

    @classmethod
    def month(cls):
        return str(datetime.now().strftime('%m'))

    @classmethod
    def day(cls):
        return str(datetime.now().strftime('%d'))

    @classmethod
    def isDownloadPageUrl(cls, url):
        '''
        is download page url
        :param url:
        :return:
        '''
        if "rmdown.com" in url:
            return True
        else:
            return False

    @classmethod
    def isImageDownloadPageUrl(cls, url):
        return True

    @classmethod
    def torrentDownloadUrl(cls, responseText):
        '''
        get torrent download url from download page
        :param responseText: response text not response
        :return:
        '''
        soup = bs(responseText, "lxml")
        inputTagList = soup.find_all("input")
        reff = ""
        ref = ""
        torrentDownloadUrl = ""
        for inputTag in inputTagList:
            if inputTag["name"] == "reff":
                reff = str(inputTag["value"])
            elif inputTag["name"] == "ref":
                ref = str(inputTag["value"])
        if reff and ref:
            torrentDownloadUrl = "http://www.rmdown.com/download.php?reff={0}&ref={1}".format(
                reff, ref)
        if torrentDownloadUrl:
            return torrentDownloadUrl
        else:
            return None

    @classmethod
    def urlToResponse(cls, urlList, cookies=None, timeOut=TIME_OUT):

        requestsList = []
        for url in urlList:
            if cookies:
                request = AsyncRequests.get(url=url,
                                            headers=cls.headers,
                                            cookies=cookies,
                                            timeout=timeOut)
            else:
                request = AsyncRequests.get(url=url,
                                            headers=cls.headers,
                                            timeout=timeOut)
            requestsList.append(request)
        resultList = AsyncRequests.map(requestsList,
                                       size=DOWNLOAD_CONCURRENT_COUNT)
        return resultList

    @classmethod
    def filterTorrentName(cls, torrentName):
        try:
            torrentName = re.sub(r'[?\\*|“<>:/]', '', torrentName)
            torrentName = re.sub(r'[\/\\\:\*\?\"\<\>\|]', '', torrentName)
            return torrentName
        except BaseException:
            logger.error(traceback.format_exc())
            return torrentName

    @classmethod
    def removeDepulicates(cls, inputList):
        newList = []
        for element in inputList:
            if not element in newList:
                newList.append(element)
        return newList

    @classmethod
    def getCategoryFromDatailsPageUrl(cls, detailsPageUrl):
        try:
            return Category.categoryNumberToNameDict[str(
                detailsPageUrl.split("/")[4])]
        except BaseException:
            logger.error(traceback.format_exc())
            return "unknown"

    @classmethod
    def clearTitle(cls, title):
        try:
            title = title.split(" - ")[0]
            title = title.replace("\xa0", "")
            return title
        except BaseException:
            logger.error(traceback.format_exc())
            return title

    @classmethod
    def computeProgressBar(cls, currentValue, maxValue):
        return str(round(
            (float(currentValue) / float(maxValue)), 4) * 100)[:5] + "%"

    @classmethod
    def existInDatabase(cls, tableName, position):
        result = cls._database.queryIsExist(tableName, position)
        try:
            if result:
                return True
            else:
                return False
        except BaseException:
            logger.error(traceback.format_exc())
            return False

    @classmethod
    def testCode(cls):
        result = cls._database.query(
            "video", {"md5": "7034677AC329006F5B2F54D69180F1E1"})
        pass