def __init__(self): # Provider Init TorrentProvider.__init__(self, 'AlphaRatio') # Credentials self.username = None self.password = None # URLs self.url = 'http://alpharatio.cc' self.urls = { 'login': urljoin(self.url, 'login.php'), 'search': urljoin(self.url, 'torrents.php'), } # Proper Strings self.proper_strings = ['PROPER', 'REPACK'] # Torrent Stats self.minseed = None self.minleech = None # Miscellaneous Options # Cache self.cache = tvcache.TVCache(self)
def __init__(self): # Provider Init TorrentProvider.__init__(self, 'TokyoToshokan') # Credentials self.public = True # URLs self.url = 'http://tokyotosho.info/' self.urls = { 'search': urljoin(self.url, 'search.php'), 'rss': urljoin(self.url, 'rss.php'), } # Proper Strings # Miscellaneous Options self.supports_absolute_numbering = True self.anime_only = True # Torrent Stats self.minseed = None self.minleech = None # Cache self.cache = tvcache.TVCache(self, min_time=15) # only poll TokyoToshokan every 15 minutes max
def __init__(self): # Provider Init TorrentProvider.__init__(self, 'ABNormal') # Credentials self.username = None self.password = None # Torrent Stats self.minseed = None self.minleech = None # URLs self.url = 'https://abnormal.ws' self.urls = { 'login': urljoin(self.url, 'login.php'), 'search': urljoin(self.url, 'torrents.php'), } # Proper Strings self.proper_strings = ['PROPER'] # Cache self.cache = tvcache.TVCache(self, min_time=30)
def __init__(self): # Provider Init TorrentProvider.__init__(self, 'HDSpace') # Credentials self.username = None self.password = None # URLs self.url = 'https://hd-space.org' self.urls = { 'login': urljoin(self.url, 'index.php?page=login'), 'search': urljoin(self.url, 'index.php'), } # Proper Strings # Miscellaneous Options # Torrent Stats self.minseed = None self.minleech = None # Cache self.cache = tvcache.TVCache(self, min_time=10) # only poll HDSpace every 10 minutes max
def __init__(self): # Provider Init TorrentProvider.__init__(self, 'TorrentDay') # Credentials self.username = None self.password = None # URLs self.url = 'https://classic.torrentday.com' self.urls = { 'login': urljoin(self.url, '/torrents/'), 'search': urljoin(self.url, '/V3/API/API.php'), 'download': urljoin(self.url, '/download.php/') } # Proper Strings # Miscellaneous Options self.freeleech = False self.enable_cookies = True self.cookies = '' self.categories = {'Season': {'c14': 1}, 'Episode': {'c2': 1, 'c26': 1, 'c7': 1, 'c24': 1}, 'RSS': {'c2': 1, 'c26': 1, 'c7': 1, 'c24': 1, 'c14': 1}} # Torrent Stats self.minseed = None self.minleech = None # Cache self.cache = tvcache.TVCache(self, min_time=10) # Only poll IPTorrents every 10 minutes max
def __init__(self): # Provider Init TorrentProvider.__init__(self, "TorrentLeech") # Credentials self.username = None self.password = None # Torrent Stats self.minseed = None self.minleech = None # URLs self.url = "https://torrentleech.org" self.urls = { "login": urljoin(self.url, "user/account/login/"), "search": urljoin(self.url, "torrents/browse"), } # Proper Strings self.proper_strings = ["PROPER", "REPACK"] # Cache self.cache = tvcache.TVCache(self)
def __init__(self): # Provider Init TorrentProvider.__init__(self, "TransmitTheNet") # Credentials self.username = None self.password = None # Torrent Stats self.minseed = None self.minleech = None self.freeleech = None # URLs self.url = 'https://transmithe.net/' self.urls = { 'login': urljoin(self.url, '/login.php'), 'search': urljoin(self.url, '/torrents.php'), } # Proper Strings # Cache self.cache = tvcache.TVCache(self)
def __init__(self): """Initialize the class.""" super(AnimeTorrentsProvider, self).__init__('AnimeTorrents') # Credentials self.username = None self.password = None # URLs self.url = 'http://animetorrents.me' self.urls = { 'login': urljoin(self.url, 'login.php'), 'search_ajax': urljoin(self.url, 'ajax/torrents_data.php'), } # Miscellaneous Options self.supports_absolute_numbering = True self.anime_only = True self.categories = { 2: 'Anime Series', 7: 'Anime Series HD', } # Proper Strings self.proper_strings = [] # Cache self.cache = tv.Cache(self, min_time=20)
def __init__(self): # Provider Init TorrentProvider.__init__(self, 'MoreThanTV') # Credentials self.username = None self.password = None self._uid = None self._hash = None # URLs self.url = 'https://www.morethan.tv/' self.urls = { 'login': urljoin(self.url, 'login.php'), 'search': urljoin(self.url, 'torrents.php'), } # Proper Strings self.proper_strings = ['PROPER', 'REPACK'] # Miscellaneous Options # Torrent Stats self.minseed = None self.minleech = None # Cache self.cache = tvcache.TVCache(self)
def __init__(self): # Provider Init TorrentProvider.__init__(self, 'ILoveTorrents') # Credentials self.username = None self.password = None # URLs self.url = 'https://www.ilovetorrents.me/' self.urls = { 'login': urljoin(self.url, 'takelogin.php'), 'search': urljoin(self.url, 'browse.php'), 'download': urljoin(self.url, '{link}'), } # Proper Strings self.proper_strings = ['PROPER', 'REPACK', 'REAL'] # Miscellaneous Options # Torrent Stats self.minseed = None self.minleech = None # Cache self.cache = tvcache.TVCache(self)
def __init__(self): # Provider Init TorrentProvider.__init__(self, "ThePirateBay") # Credentials self.public = True # Torrent Stats self.minseed = None self.minleech = None self.confirmed = True # URLs self.url = "https://thepiratebay.se" self.urls = { "rss": urljoin(self.url, "browse/200"), "search": urljoin(self.url, "s/"), # Needs trailing / } self.custom_url = None # Proper Strings # Cache self.cache = tvcache.TVCache(self, min_time=30) # only poll ThePirateBay every 30 minutes max
def __init__(self): # Provider Init TorrentProvider.__init__(self, "AlphaRatio") # Credentials self.username = None self.password = None # Torrent Stats self.minseed = None self.minleech = None # URLs self.url = "http://alpharatio.cc" self.urls = { "login": urljoin(self.url, "login.php"), "search": urljoin(self.url, "torrents.php"), } # Proper Strings self.proper_strings = ["PROPER", "REPACK"] # Cache self.cache = tvcache.TVCache(self)
def __init__(self): # Provider Init TorrentProvider.__init__(self, "Speedcd") # Credentials self.username = None self.password = None # Torrent Stats self.minseed = None self.minleech = None self.freeleech = False # URLs self.url = 'https://speed.cd' self.urls = { 'login': urljoin(self.url, 'take.login.php'), 'search': urljoin(self.url, 'browse.php'), } # Proper Strings self.proper_strings = ['PROPER', 'REPACK'] # Cache self.cache = tvcache.TVCache(self)
def __init__(self): # Provider Init TorrentProvider.__init__(self, 'YggTorrent') # Credentials self.username = None self.password = None # Torrent Stats self.minseed = None self.minleech = None # URLs self.custom_url = None self.url = 'https://www.yggtorrent.is/' self.urls = { 'login': urljoin(self.url, 'user/login'), 'search': urljoin(self.url, 'engine/search') } # Proper Strings self.proper_strings = ['PROPER'] # Cache self.cache = tvcache.TVCache(self, min_time=30)
def __init__(self): # Provider Init TorrentProvider.__init__(self, 'HoundDawgs') # Credentials self.username = None self.password = None # URLs self.url = 'https://hounddawgs.org' self.urls = { 'base_url': self.url, 'search': urljoin(self.url, 'torrents.php'), 'login': urljoin(self.url, 'login.php'), } # Proper Strings # Miscellaneous Options self.freeleech = None self.ranked = None # Torrent Stats self.minseed = None self.minleech = None # Cache self.cache = tvcache.TVCache(self)
def __init__(self): # Provider Init TorrentProvider.__init__(self, "T411") # Credentials self.username = None self.password = None self.token = None self.tokenLastUpdate = None # URLs self.url = 'https://api.t411.ch' self.urls = { 'base_url': 'http://www.t411.ch/', 'search': urljoin(self.url, 'torrents/search/%s*?cid=%s&limit=100'), 'rss': urljoin(self.url, 'torrents/top/today'), 'login_page': urljoin(self.url, 'auth'), 'download': urljoin(self.url, 'torrents/download/%s'), } # Proper Strings # Miscellaneous Options self.headers.update({'User-Agent': USER_AGENT}) self.subcategories = [433, 637, 455, 639] self.confirmed = False # Torrent Stats self.minseed = 0 self.minleech = 0 # Cache self.cache = tvcache.TVCache(self, min_time=10) # Only poll T411 every 10 minutes max
def __init__(self): # Provider Init TorrentProvider.__init__(self, "ThePirateBay") # Credentials self.public = True # Torrent Stats self.minseed = None self.minleech = None self.confirmed = True # URLs self.url = "https://thepiratebay.se" self.urls = { "rss": [urljoin(self.url, "browse/208/0/4/0"), urljoin(self.url, "browse/205/0/4/0")], "search": urljoin(self.url, "search"), } self.custom_url = None # Proper Strings # Cache self.cache = tvcache.TVCache(self, min_time=30) # only poll ThePirateBay every 30 minutes max self.magnet_regex = re.compile(r'magnet:\?xt=urn:btih:\w{32,40}(:?&dn=[\w. %+-]+)*(:?&tr=(:?tcp|https?|udp)[\w%. +-]+)*')
def __init__(self): # Provider Init TorrentProvider.__init__(self, "TorrentBytes") # Credentials self.username = None self.password = None # Torrent Stats self.minseed = None self.minleech = None self.freeleech = False # URLs self.url = "https://www.torrentbytes.net" self.urls = { "login": urljoin(self.url, "takelogin.php"), "search": urljoin(self.url, "browse.php") } # Proper Strings self.proper_strings = ["PROPER", "REPACK"] # Cache self.cache = tvcache.TVCache(self)
def __init__(self): # Provider Init TorrentProvider.__init__(self, 'ArcheTorrent') # Credentials self.username = None self.password = None # Torrent Stats self.minseed = None self.minleech = None # Freelech self.freeleech = False # URLs self.url = 'https://www.archetorrent.com/' self.urls = { 'login': urljoin(self.url, 'account-login.php'), 'search': urljoin(self.url, 'torrents-search.php'), 'download': urljoin(self.url, 'download.php'), } # Proper Strings self.proper_strings = ['PROPER'] # Cache self.cache = tvcache.TVCache(self, min_time=15)
def __init__(self): # Provider Init TorrentProvider.__init__(self, 'TorrentDay') # Credentials self.username = None self.password = None # Torrent Stats self.minseed = None self.minleech = None self.freeleech = False # URLs self.custom_url = None self.url = 'https://www.torrentday.com' self.urls = { 'login': urljoin(self.url, '/t'), 'search': urljoin(self.url, '/t.json'), 'download': urljoin(self.url, '/download.php/') } self.categories = { 'Season': {'14': 1}, 'Episode': {'2': 1, '26': 1, '7': 1, '24': 1, '34': 1}, 'RSS': {'2': 1, '26': 1, '7': 1, '24': 1, '34': 1, '14': 1} } self.enable_cookies = True # Cache self.cache = tvcache.TVCache(self, min_time=10) # Only poll IPTorrents every 10 minutes max
def __init__(self): # Provider Init TorrentProvider.__init__(self, 'HDBits') # Credentials self.username = None self.passkey = None # URLs self.url = 'https://hdbits.org' self.urls = { 'search': urljoin(self.url, '/api/torrents'), 'rss': urljoin(self.url, '/api/torrents'), 'download': urljoin(self.url, '/download.php'), } # Proper Strings # Miscellaneous Options # Torrent Stats # Cache self.cache = HDBitsCache(self, min_time=15) # only poll HDBits every 15 minutes max
def __init__(self): # Provider Init TorrentProvider.__init__(self, 'Danishbits') # Credentials self.username = None self.password = None # Torrent Stats self.minseed = 0 self.minleech = 0 self.freeleech = True # URLs self.url = 'https://danishbits.org' self.urls = { 'login': urljoin(self.url, 'login.php'), 'search': urljoin(self.url, 'torrents.php'), } # Proper Strings # Miscellaneous Options # Torrent Stats # Cache self.cache = tvcache.TVCache(self, min_time=10) # Only poll Danishbits every 10 minutes max
def __init__(self): # Provider Init TorrentProvider.__init__(self, "FileList") # Credentials self.username = None self.password = None # Torrent Stats self.minseed = None self.minleech = None # URLs self.url = "http://filelist.ro" self.urls = { "login": urljoin(self.url, "takelogin.php"), "search": urljoin(self.url, "browse.php"), } # Proper Strings self.proper_strings = ["PROPER", "REPACK"] # Cache self.cache = tvcache.TVCache(self)
def __init__(self): # Provider Init TorrentProvider.__init__(self, "PhxBit") # Credentials self.username = None self.password = None # Torrent Stats self.minseed = None self.minleech = None # URLs self.url = 'https://phxbit.com' self.urls = { 'login': urljoin(self.url, '/connect.php'), 'search': urljoin(self.url, '/sphinx.php') } # Proper Strings self.proper_strings = ['PROPER'] # Cache self.cache = tvcache.TVCache(self, min_time=30)
def __init__(self): # Provider Init TorrentProvider.__init__(self, "Immortalseed") # Credentials self.username = None self.password = None self.passkey = None # Torrent Stats self.minseed = None self.minleech = None self.freeleech = None # URLs self.url = 'https://immortalseed.me/' self.urls = { 'login': urljoin(self.url, 'takelogin.php'), 'search': urljoin(self.url, 'browse.php'), 'rss': urljoin(self.url, 'rss.php'), } # Proper Strings self.proper_strings = ['PROPER', 'REPACK'] # Cache self.cache = ImmortalseedCache(self, min_time=20)
def __init__(self): # Provider Init TorrentProvider.__init__(self, 'TorrentLeech') # Credentials self.username = None self.password = None # URLs self.url = 'https://torrentleech.org' self.urls = { 'login': urljoin(self.url, 'user/account/login/'), 'search': urljoin(self.url, 'torrents/browse'), } # Proper Strings self.proper_strings = ['PROPER', 'REPACK'] # Miscellaneous Options # Torrent Stats self.minseed = None self.minleech = None # Cache self.cache = tvcache.TVCache(self)
def __init__(self): # Provider Init TorrentProvider.__init__(self, "ABNormal") # Credentials self.username = None self.password = None # Torrent Stats self.ratio = None self.minseed = None self.minleech = None # URLs self.url = "https://abnormal.ws" self.urls = { "login": urljoin(self.url, "login.php"), "search": urljoin(self.url, "torrents.php"), } # Proper Strings self.proper_strings = ["PROPER"] # Cache self.cache = tvcache.TVCache(self, min_time=30)
def crawler(self): parser = argparse.ArgumentParser() parser.add_argument("keyword", help="keyword that you want to look for") parser.add_argument("-n", "--number", help="number of search result. default is 5", type=int, default=5) parser.add_argument("-p", "--page", help="page that you parse", type=int, default=1) args = parser.parse_args() url_req = urljoin(self.yt_url, "results?search_query={}&page={}".format(args.keyword, args.page)) page = requests.get(url_req) collection = [] counter = args.number tree = html.fromstring(page.text) for subtree in tree.xpath('//div[@class="yt-lockup-content"]'): if counter > 0: item = {} item['title'] = subtree.xpath('./h3[@class="yt-lockup-title "]/a/text()')[0] item['link'] = urljoin(self.yt_url, subtree.xpath('./h3[@class="yt-lockup-title "]/a/@href')[0]) item['desc'] = "" if len(subtree.xpath('./div/text()')) == 0 else subtree.xpath('./div/text()')[0] detail = self.__getdetail(item['link']) item.update(detail) short_url = self.__getshorturl(item['link']) item.update(short_url) collection.append(YTSearchResult(**item)) counter -= 1 else: break; return collection
def __init__(self): # Provider Init TorrentProvider.__init__(self, 'TorrentBytes') # Credentials self.username = None self.password = None # URLs self.url = 'https://www.torrentbytes.net' self.urls = { 'login': urljoin(self.url, 'takelogin.php'), 'search': urljoin(self.url, 'browse.php') } # Proper Strings self.proper_strings = ['PROPER', 'REPACK'] # Miscellaneous Options self.freeleech = False # Torrent Stats self.minseed = None self.minleech = None # Cache self.cache = tvcache.TVCache(self)
def __init__(self): """Initialize the class.""" super(ShazbatProvider, self).__init__('Shazbat.tv') # Credentials self.passkey = None # URLs self.url = 'http://www.shazbat.tv' self.urls = { 'login': urljoin(self.url, 'login'), 'rss_recent': urljoin(self.url, 'rss/recent'), # 'rss_queue': urljoin(self.url, 'rss/download_queue'), # 'rss_followed': urljoin(self.url, 'rss/followed') } # Proper Strings # Miscellaneous Options self.supports_backlog = False self.options = None # Torrent Stats # Cache self.cache = ShazbatCache(self, min_time=20)
def get_actual_slot(self): actual_slots = {} response = requests.get(urljoin(self.url, self.api_beacon_head)).json() actual_slots['actual_slot'] = int(response['headSlot']) actual_slots['finalized_slot'] = int(response['finalizedSlot']) return actual_slots
def send_message(self, chat_id, text): params = {"chat_id": chat_id, "text": text} return requests.post(urljoin(self.api_url, "sendMessage"), params)
def _get_result(self, device, execution_id, num_retries=3000, interval=1, verbose=False): #任务状态 job_status_url = ('Network/ibm-q/Groups/open/Projects/main/Jobs/' + execution_id) if verbose: print("Waiting for results. [Job ID: {}]".format(execution_id)) original_sigint_handler = signal.getsignal(signal.SIGINT) def _handle_sigint_during_get_result(*_): # pragma: no cover raise Exception( "Interrupted. The ID of your submitted job is {}.".format( execution_id)) try: signal.signal(signal.SIGINT, _handle_sigint_during_get_result) for retries in range(num_retries): # STEP5: WAIT FOR THE JOB TO BE RUN json_step5 = { 'allow_redirects': True, 'timeout': (self.timeout, None) } request = super(IOP, self).get(urljoin(_API_URL, job_status_url), **json_step5) request.raise_for_status() r_json = request.json() acceptable_status = ['VALIDATING', 'VALIDATED', 'RUNNING'] if r_json['status'] == 'COMPLETED': # STEP6: Get the endpoint to get the result json_step6 = { 'allow_redirects': True, 'timeout': (self.timeout, None) } request = super(IOP, self).get( urljoin(_API_URL, job_status_url + '/resultDownloadUrl'), **json_step6) request.raise_for_status() r_json = request.json() # STEP7: Get the result json_step7 = { 'allow_redirects': True, 'params': { 'access_token': None }, 'timeout': (self.timeout, None) } request = super(IOP, self).get(r_json['url'], **json_step7) r_json = request.json() result = r_json['results'][0] # STEP8: Confirm the data was downloaded json_step8 = { 'data': None, 'json': None, 'timeout': (5.0, None) } request = super(IOP, self).post( urljoin(_API_URL, job_status_url + '/resultDownloaded'), **json_step8) r_json = request.json() return result # Note: if stays stuck if 'Validating' mode, then sthg went # wrong in step 3 if r_json['status'] not in acceptable_status: raise Exception( "Error while running the code. Last status: {}.". format(r_json['status'])) time.sleep(interval) if self.is_online(device) and retries % 60 == 0: self.get_list_devices() if not self.is_online(device): raise DeviceOfflineError( "Device went offline. The ID of " "your submitted job is {}.".format(execution_id)) finally: if original_sigint_handler is not None: signal.signal(signal.SIGINT, original_sigint_handler) raise Exception("Timeout. The ID of your submitted job is {}.".format( execution_id))
def _run(self, info, device): """ Run the quantum code to the IOP machine. Update since September 2020: only protocol available is what they call 'object storage' where a job request via the POST method gets in return a url link to which send the json data. A final http validates the data communication. Args: info (dict): dictionary sent by the backend containing the code to run device (str): name of the ibm device to use Returns: (tuple): (str) Execution Id """ # STEP1: Obtain most of the URLs for handling communication with # quantum device # 连接设备 json_step1 = { 'data': None, 'json': { 'backend': { 'name': device }, 'allowObjectStorage': True, 'shareLevel': 'none' }, 'timeout': (self.timeout, None) } request = super(IOP, self).post( urljoin(_API_URL, 'Network/ibm-q/Groups/open/Projects/main/Jobs'), **json_step1) request.raise_for_status() r_json = request.json() upload_url = r_json['objectStorageInfo']['uploadUrl'] execution_id = r_json['id'] # STEP2: WE UPLOAD THE CIRCUIT DATA n_classical_reg = info['nq'] # hack: easier to restrict labels to measured qubits n_qubits = n_classical_reg # self.backends[device]['nq'] instructions = info['json'] maxcredit = info['maxCredits'] c_label = [["c", i] for i in range(n_classical_reg)] q_label = [["q", i] for i in range(n_qubits)] # hack: the data value in the json quantum code is a string instruction_str = str(instructions).replace('\'', '\"') data = '{"qobj_id": "' + str(uuid.uuid4()) + '", ' data += '"header": {"backend_name": "' + device + '", ' data += ('"backend_version": "' + self.backends[device]['version'] + '"}, ') data += '"config": {"shots": ' + str(info['shots']) + ', ' data += '"max_credits": ' + str(maxcredit) + ', "memory": false, ' data += ('"parameter_binds": [], "memory_slots": ' + str(n_classical_reg)) data += (', "n_qubits": ' + str(n_qubits) + '}, "schema_version": "1.2.0", ') data += '"type": "QASM", "experiments": [{"config": ' data += '{"n_qubits": ' + str(n_qubits) + ', ' data += '"memory_slots": ' + str(n_classical_reg) + '}, ' data += ('"header": {"qubit_labels": ' + str(q_label).replace('\'', '\"') + ', ') data += '"n_qubits": ' + str(n_classical_reg) + ', ' data += '"qreg_sizes": [["q", ' + str(n_qubits) + ']], ' data += '"clbit_labels": ' + str(c_label).replace('\'', '\"') + ', ' data += '"memory_slots": ' + str(n_classical_reg) + ', ' data += '"creg_sizes": [["c", ' + str(n_classical_reg) + ']], ' data += ('"name": "circuit0", "global_phase": 0}, "instructions": ' + instruction_str + '}]}') json_step2 = { 'data': data, 'params': { 'access_token': None }, 'timeout': (5.0, None) } request = super(IOP, self).put(upload_url, **json_step2) request.raise_for_status() # STEP3: CONFIRM UPLOAD json_step3 = { 'data': None, 'json': None, 'timeout': (self.timeout, None) } upload_data_url = urljoin( _API_URL, 'Network/ibm-q/Groups/open/Projects/main/Jobs/' + str(execution_id) + '/jobDataUploaded') request = super(IOP, self).post(upload_data_url, **json_step3) request.raise_for_status() return execution_id
def get_genesis(self): return int( requests.get(urljoin( self.url, self.api_genesis)).json()['data']['genesis_time'])
def get_finalized_epoch(self): return int( requests.get( urljoin(self.url, self.api_beacon_head_finality_checkpoints)).json() ['data']['finalized']['epoch'])
def __init__(self, url, slots_per_epoch): self.url = url self.slots_per_epoch = slots_per_epoch self.version = requests.get(urljoin(url, self.api_version)).json()
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A KV with a list of items found and if there's an next page to search """ def process_column_header(td): ret = '' if td.a and td.a.img: ret = td.a.img.get('title', td.a.get_text(strip=True)) if not ret: ret = td.get_text(strip=True) return ret items = [] has_next_page = False with BS4Parser(data, 'html5lib') as html: torrent_table = html.find('table', id='torrent_table') torrent_rows = torrent_table('tr') if torrent_table else [] # ignore next page in RSS mode has_next_page = mode != 'RSS' and html.find( 'a', class_='pager_next') is not None log.debug('Are there more pages? {0}'.format(has_next_page)) # Continue only if at least one Release is found if len(torrent_rows) < 2: log.debug( 'Data returned from provider does not contain any torrents' ) return {'has_next_page': has_next_page, 'items': []} # '', '', 'Name /Year', 'Files', 'Time', 'Size', 'Snatches', 'Seeders', 'Leechers' labels = [ process_column_header(label) for label in torrent_rows[0]('td') ] group_title = '' # Skip column headers for result in torrent_rows[1:]: cells = result('td') result_class = result.get('class') # When "Grouping Torrents" is enabled, the structure of table change group_index = -2 if 'group_torrent' in result_class else 0 try: title = result.select( 'a[href^="torrents.php?id="]')[0].get_text() title = re.sub( r'\s+', ' ', title).strip() # clean empty lines and multiple spaces if 'group' in result_class or 'torrent' in result_class: # get international title if available title = re.sub(r'.* \[(.*?)\](.*)', r'\1\2', title) if 'group' in result_class: group_title = title continue for serie in self.absolute_numbering: if serie in title: # remove season from title when its in absolute format title = re.sub(r'S\d{2}E(\d{2,4})', r'\1', title) break download_url = urljoin( self.url, result.select('a[href^="torrents.php?action=download"]' )[0]['href']) if not all([title, download_url]): continue seeders = try_int(cells[labels.index('Seeders') + group_index].get_text(strip=True)) leechers = try_int(cells[labels.index('Leechers') + group_index].get_text(strip=True)) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug( "Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue torrent_details = None if 'group_torrent' in result_class: # torrents belonging to a group torrent_details = title title = group_title elif 'torrent' in result_class: # standalone/un grouped torrents torrent_details = cells[labels.index('Nome/Ano')].find( 'div', class_='torrent_info').get_text() torrent_details = torrent_details.replace( '[', ' ').replace(']', ' ').replace('/', ' ') torrent_details = torrent_details.replace( 'Full HD ', '1080p').replace('HD ', '720p') torrent_size = cells[labels.index('Tamanho') + group_index].get_text(strip=True) size = convert_size(torrent_size) or -1 torrent_name = '{0} {1}'.format( title, torrent_details.strip()).strip() torrent_name = re.sub(r'\s+', ' ', torrent_name) items.append({ 'title': torrent_name, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': None }) if mode != 'RSS': log.debug( 'Found result: {0} with {1} seeders and {2} leechers' .format(torrent_name, seeders, leechers)) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return {'has_next_page': has_next_page, 'items': items}
def query(self, rel_url, params=None, sse=False): abs_url = urljoin(self.horizon_uri, rel_url) reply = self._query(abs_url, params, sse) return check_horizon_reply(reply) if not sse else reply
def get_finalized_epoch(self): finalized_epoch = int( requests.get(urljoin( self.url, self.api_beacon_head)).json()['finalizedEpoch']) return finalized_epoch
def get_capabilities(self, just_params=False): """ Use the provider url and apikey to get the capabilities. Makes use of the default newznab caps param. e.a. http://yourznab/api?t=caps&apikey=skdfiw7823sdkdsfjsfk Returns a tuple with (succes or not, array with dicts [{'id': '5070', 'name': 'Anime'}, {'id': '5080', 'name': 'Documentary'}, {'id': '5020', 'name': 'Foreign'}...etc}], error message) """ Capabilities = namedtuple('Capabilities', 'success categories params message') categories = params = [] if not self._check_auth(): message = 'Provider requires auth and your key is not set' return Capabilities(False, categories, params, message) url_params = {'t': 'caps'} if self.needs_auth and self.api_key: url_params['apikey'] = self.api_key response = self.session.get(urljoin(self.url, 'api'), params=url_params) if not response or not response.text: message = 'Error getting caps xml for: {0}'.format(self.name) log.warning(message) return Capabilities(False, categories, params, message) with BS4Parser(response.text, 'html5lib') as html: if not html.find('categories'): message = 'Error parsing caps xml for: {0}'.format(self.name) log.warning(message) return Capabilities(False, categories, params, message) self.set_caps(html.find('searching')) params = self.cap_tv_search if just_params: message = 'Success getting params for: {0}'.format(self.name) return Capabilities(True, categories, params, message) for category in html('category'): category_name = category.get('name', '') if 'TV' in category_name and category.get('id'): categories.append({ 'id': category['id'], 'name': category['name'] }) for subcat in category('subcat'): if subcat.get('name', '') and subcat.get('id'): categories.append({ 'id': subcat['id'], 'name': subcat['name'] }) # Some providers have the subcat `Anime` in the `Other` category elif category_name == 'Other' and category.get('id'): for subcat in category('subcat'): if subcat.get('name', '') == 'Anime' and subcat.get('id'): categories.append({ 'id': subcat['id'], 'name': subcat['name'] }) break message = 'Success getting categories and params for: {0}'.format( self.name) return Capabilities(True, categories, params, message)
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ def process_column_header(td): result = '' if td.a: result = td.a.get('title') if not result: result = td.get_text(strip=True) return result items = [] with BS4Parser(data, 'html5lib') as html: torrent_table = html.find('table', id='torrenttable') torrent_rows = torrent_table('tr') if torrent_table else [] # Continue only if at least one release is found if len(torrent_rows) < 2: log.debug( 'Data returned from provider does not contain any torrents' ) return items labels = [ process_column_header(label) for label in torrent_rows[0]('th') ] # Skip column headers for row in torrent_rows[1:]: try: title = row.find( 'td', class_='name').find('a').get_text(strip=True) download_url = urljoin( self.url, row.find('td', class_='quickdownload').find('a')['href']) if not all([title, download_url]): continue seeders = try_int( row.find('td', class_='seeders').get_text(strip=True)) leechers = try_int( row.find('td', class_='leechers').get_text(strip=True)) # Filter unseeded torrent if seeders < min(self.minseed, 1): if mode != 'RSS': log.debug( "Discarding torrent because it doesn't meet the" " minimum seeders: {0}. Seeders: {1}", title, seeders) continue torrent_size = row('td')[labels.index('Size')].get_text() size = convert_size(torrent_size) or -1 item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': None, } if mode != 'RSS': log.debug( 'Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.error('Failed parsing provider. Traceback: {0!r}', traceback.format_exc()) return items
class Torrentday(TorrentProvider): id = "torrentday" name = "Torrentday" fields = {"cookies": "Cookies"} url = 'https://www.torrentday.com' urls = { 'login': urljoin(url, '/t'), 'search': urljoin(url, '/V3/API/API.php'), 'download': urljoin(url, '/download.php/'), 'details': urljoin(url, "/details.php?id=") } categories = { 'Season': { 'c14': 1 }, 'Episode': { 'c2': 1, 'c26': 1, 'c7': 1, 'c24': 1, 'c34': 1 }, 'Movie': { 'c11': 1, 'c5': 1, 'c48': 1, 'c44': 1 }, 'RSS': { 'c2': 1, 'c26': 1, 'c7': 1, 'c24': 1, 'c34': 1, 'c14': 1 } } enable_cookies = True @staticmethod def convert_size(size, default=None, use_decimal=False, **kwargs): result = None try: sep = kwargs.pop('sep', ' ') scale = kwargs.pop('units', ['B', 'KB', 'MB', 'GB', 'TB', 'PB']) default_units = kwargs.pop('default_units', scale[0]) if sep: size_tuple = size.strip().split(sep) scalar, units = size_tuple[0], size_tuple[1:] units = units[0].upper() if units else default_units else: regex_scalar = re.search(r'([\d. ]+)', size, re.I) scalar = regex_scalar.group() if regex_scalar else -1 units = size.strip(scalar) if scalar != -1 else 'B' scalar = float(scalar) scalar *= (1024 if not use_decimal else 1000)**scale.index(units) result = scalar # TODO: Make sure fallback methods obey default units except AttributeError: result = size if size is not None else default except ValueError: result = default finally: try: if result != default: result = max(int(result), 0) except (TypeError, ValueError): pass return result @staticmethod def search(imdbid, term): search_url = Torrentday.urls['search'] post_data = { '/browse.php?': None, 'cata': 'yes', 'jxt': 8, 'jxw': 'b', 'search': imdbid } post_data.update(Torrentday.categories['Movie']) headers = {"Cookie": Torrentday.getConfig('torrentday')['cookie']} print(headers) response = Url.open(search_url, post_data=post_data, headers=headers) try: parsed_json = json.loads(response.text) except JSONDecodeError: logging.error( "Torrentday did not get a proper response. Check cookies") return [] if not parsed_json: logging.debug('No data returned from provider') return [] try: items = parsed_json.get('Fs', [])[0].get('Cn', {}).get('torrents', []) except Exception: logging.debug( 'Data returned from provider does not contain any torrents') return [] return Torrentday.parse(items, imdbid) @staticmethod def get_rss(): logging.info('Fetching latest RSS from Torrentday.') return [] @staticmethod def parse(data, imdbid=None, term=None): logging.info('Parsing Torrentday results.') results = [] for i in data: result = {} try: title = re.sub(r'\[.*\=.*\].*\[/.*\]', '', i['name']) if i['name'] else None torrent_url = urljoin( Torrentday.urls['download'], '{0}/{1}'.format( i['id'], i['fname'])) if i['id'] and i['fname'] else None result['score'] = 0 result['size'] = Torrentday.convert_size(i['size']) or -1 result['status'] = 'Available' result['pubdate'] = None result['title'] = title result['imdbid'] = imdbid result['indexer'] = 'TorrentDay' result['info_link'] = urljoin(Torrentday.urls['details'], "?id=" + str(i['id'])) result['torrentfile'] = torrent_url result['guid'] = md5("torrentday:{}".format(id).encode( ('utf-8'))).hexdigest() result['type'] = 'torrent' result['downloadid'] = None result['freeleech'] = i['free'] result['download_client'] = None result['seeders'] = i['seed'] results.append(result) except Exception as e: logging.error('Error parsing Torrentday json.', exc_info=True) continue continue logging.info('Found {} results from Torrentday.'.format(len(results))) return results
def search(self, search_strings, age=0, ep_obj=None, force_query=False, manual_search=False, **kwargs): """ Search a provider and parse the results. :param search_strings: A dict with mode (key) and the search value (value) :param age: Not used :param ep_obj: Not used :param force_query: Newznab will by default search using the tvdb/tmdb/imdb id for a show. As a backup it can also search using a query string, like the showtitle with the season/episode number. The force_query parameter can be passed to force a search using the query string. :param manual_search: If the search is started through a manual search, we're utilizing the force_query param. :returns: A list of search results (structure) """ results = [] if not self._check_auth(): return results # For providers that don't have caps, or for which the t=caps is not working. if not self.params and all( provider not in self.url for provider in self.providers_without_caps): self.get_capabilities(just_params=True) # Search Params search_params = { 't': 'search', 'limit': 100, 'offset': 0, 'cat': ','.join(self.cat_ids), 'maxage': app.USENET_RETENTION } for mode in search_strings: log.debug('Search mode: {0}', mode) if self.needs_auth and self.api_key: search_params['apikey'] = self.api_key if mode != 'RSS': match_indexer = self._match_indexer() if match_indexer and not force_query: search_params['t'] = 'tvsearch' search_params.update(match_indexer) if ep_obj.series.air_by_date or ep_obj.series.sports: date_str = str(ep_obj.airdate) search_params['season'] = date_str.partition('-')[0] search_params['ep'] = date_str.partition( '-')[2].replace('-', '/') else: search_params['season'] = ep_obj.scene_season search_params['ep'] = ep_obj.scene_episode else: search_params['t'] = 'search' if mode == 'Season': search_params.pop('ep', '') for search_string in search_strings[mode]: if mode != 'RSS': # If its a PROPER search, need to change param to 'search' # so it searches using 'q' param if any(proper_string in search_string for proper_string in self.proper_strings): search_params['t'] = 'search' log.debug( 'Search show using {search}', { 'search': 'search string: {search_string}'.format( search_string=search_string if search_params['t'] != 'tvsearch' else 'indexer_id: {indexer_id}'.format( indexer_id=match_indexer)) }) if search_params['t'] != 'tvsearch': search_params['q'] = search_string response = self.session.get(urljoin(self.url, 'api'), params=search_params) if not response or not response.text: log.debug('No data returned from provider') continue results += self.parse(response.text, mode) # Since we aren't using the search string, # break out of the search string loop if any(param in search_params for param in itervalues(INDEXERS_PARAM)): break # Reprocess but now use force_query = True if there are no results if not results and not force_query: return self.search(search_strings, ep_obj=ep_obj, force_query=True) return results
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: # Continue only if at least one release is found empty = html.find('h2', text='No .torrents fit this filter criteria') if empty: log.debug('Data returned from provider does not contain any torrents') return items torrent_table = html.find('table', attrs={'style': 'border: none; width: 100%;'}) torrent_rows = torrent_table('tr', class_='browse') if torrent_table else [] for row in torrent_rows: cells = row('td') try: title = cells[1].find('a').get('title') torrent_url = cells[2].find('a').get('href') download_url = urljoin(self.url, torrent_url) if not all([title, torrent_url]): continue seeders = try_int(cells[9].get_text(), 1) leechers = try_int(cells[10].get_text()) # Filter unseeded torrent if seeders < min(self.minseed, 1): if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue torrent_size = self._norm_size(cells[7].get_text(strip=True)) size = convert_size(torrent_size) or -1 item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': None, } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ def process_column_header(td): return td.get_text(strip=True).lower() items = [] with BS4Parser(data, 'html5lib') as html: # We need to store the post url, to be used with every result later on. post_url = html.find('form', {'method': 'post'})['action'] table = html.find('table', class_='xMenuT') rows = table('tr') if table else [] row_offset = 1 if not rows or not len(rows) - row_offset: log.debug('Data returned from provider does not contain any torrents') return items headers = rows[0]('th') # 0, 1, subject, poster, group, age labels = [process_column_header(header) or idx for idx, header in enumerate(headers)] # Skip column headers rows = rows[row_offset:] for row in rows: try: col = dict(list(zip(labels, row('td')))) nzb_id_input = col[0 if mode == 'RSS' else 1].find('input') if not nzb_id_input: continue nzb_id = nzb_id_input['name'] # Try and get the the article subject from the weird binsearch format title = self.clean_title(col['subject'].text, mode) except AttributeError: log.debug('Parsing rows, that may not always have useful info. Skipping to next.') continue if not all([title, nzb_id]): continue # Obtain the size from the 'description' size_field = BinSearchProvider.size_regex.search(col['subject'].text) if size_field: size_field = size_field.group(1) size = convert_size(size_field, sep='\xa0') or -1 size = int(size) download_url = urljoin(self.url, '{post_url}|nzb_id={nzb_id}'.format(post_url=post_url, nzb_id=nzb_id)) # For future use # detail_url = 'https://www.binsearch.info/?q={0}'.format(title) human_time = True date = col['age' if mode != 'RSS' else 'date'].get_text(strip=True).replace('-', ' ') if mode == 'RSS': human_time = False pubdate_raw = date pubdate = self.parse_pubdate(pubdate_raw, human_time=human_time) item = { 'title': title, 'link': download_url, 'size': size, 'pubdate': pubdate, } if mode != 'RSS': log.debug('Found result: {0}', title) items.append(item) return items
def run(self, res): for a in res.meta['html'].cssselect(self.css_selector): new_link = urljoin(res.request.url, a.attrib['href']) yield Resource(Request('GET', new_link), None, res.session, {})
def search(self, search_strings, age=0, ep_obj=None): results = [] if not self.login(): return results for mode in search_strings: items = [] if mode != 'RSS': logger.log("Search Mode: {0}".format(mode), logger.DEBUG) for search_string in search_strings[mode]: if mode != 'RSS': logger.log( "Search string: {0}".format( search_string.decode("utf-8")), logger.DEBUG) search_url = self.urls['search'] % (quote(search_string), self.categories[mode]) try: data = self.get_url(search_url, returns='text') time.sleep(cpu_presets[sickbeard.CPU_PRESET]) except Exception as e: logger.log( "Unable to fetch data. Error: {0}".format(repr(e)), logger.WARNING) if not data: continue with BS4Parser(data, 'html5lib') as html: torrent_table = html.find('table', id='torrents-table') torrent_rows = torrent_table('tr') if torrent_table else [] # Continue only if at least one Release is found if len(torrent_rows) < 2: logger.log( "Data returned from provider does not contain any torrents", logger.DEBUG) continue for result in torrent_table('tr')[1:]: try: link = result.find('td', class_='ttr_name').find('a') url = result.find('td', class_='td_dl').find('a') title = link.string if re.search(r'\.\.\.', title): data = self.get_url(urljoin( self.url, link['href']), returns='text') if data: with BS4Parser(data) as details_html: title = re.search( '(?<=").+(?<!")', details_html.title.string).group(0) download_url = self.urls['download'] % url['href'] seeders = int( result.find('td', class_='ttr_seeders').string) leechers = int( result.find('td', class_='ttr_leechers').string) torrent_size = result.find( 'td', class_='ttr_size').contents[0] size = convert_size(torrent_size) or -1 except (AttributeError, TypeError): continue if not all([title, download_url]): continue # Filter unseeded torrent if seeders < self.minseed or leechers < self.minleech: if mode != 'RSS': logger.log( "Discarding torrent because it doesn't meet the minimum seeders or leechers: {0} (S:{1} L:{2})" .format(title, seeders, leechers), logger.DEBUG) continue item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'hash': '' } if mode != 'RSS': logger.log( "Found result: {0} with {1} seeders and {2} leechers" .format(title, seeders, leechers), logger.DEBUG) items.append(item) # For each search mode sort all the items by seeders if available items.sort(key=lambda d: try_int(d.get('seeders', 0)), reverse=True) results += items return results
def search(self, search_strings, age=0, ep_obj=None): # pylint: disable=too-many-branches, too-many-locals, too-many-statements results = [] anime = (self.show and self.show.anime) or (ep_obj and ep_obj.show and ep_obj.show.anime) or False search_params = { "q": "", "field": "seeders", "sorder": "desc", "rss": 1, "category": ("tv", "anime")[anime] } for mode in search_strings: items = [] logger.log("Search Mode: {0}".format(mode), logger.DEBUG) for search_string in search_strings[mode]: search_params["q"] = search_string if mode != "RSS" else "" search_params["field"] = "seeders" if mode != "RSS" else "time_add" if mode != "RSS": logger.log("Search string: {0}".format (search_string.decode("utf-8")), logger.DEBUG) search_url = self.urls["search"] % ("usearch" if mode != "RSS" else search_string) if self.custom_url: if not validators.url(self.custom_url): logger.log("Invalid custom url: {0}".format(self.custom_url), logger.WARNING) return results search_url = urljoin(self.custom_url, search_url.split(self.url)[1]) data = self.get_url(search_url, params=search_params, returns="text") if not data: logger.log("URL did not return results/data, if the results are on the site maybe try a custom url, or a different one", logger.DEBUG) continue if not data.startswith("<?xml"): logger.log("Expected xml but got something else, is your mirror failing?", logger.INFO) continue with BS4Parser(data, "html5lib") as html: for item in html("item"): try: title = item.title.get_text(strip=True) # Use the torcache link kat provides, # unless it is not torcache or we are not using blackhole # because we want to use magnets if connecting direct to client # so that proxies work. download_url = item.enclosure["url"] if sickbeard.TORRENT_METHOD != "blackhole" or "torcache" not in download_url: download_url = item.find("torrent:magneturi").next.replace("CDATA", "").strip("[!]") + self._custom_trackers if not (title and download_url): continue seeders = try_int(item.find("torrent:seeds").get_text(strip=True)) leechers = try_int(item.find("torrent:peers").get_text(strip=True)) # Filter unseeded torrent if seeders < self.minseed or leechers < self.minleech: if mode != "RSS": logger.log("Discarding torrent because it doesn't meet the minimum seeders or leechers: {0} (S:{1} L:{2})".format (title, seeders, leechers), logger.DEBUG) continue verified = bool(try_int(item.find("torrent:verified").get_text(strip=True))) if self.confirmed and not verified: if mode != "RSS": logger.log("Found result " + title + " but that doesn't seem like a verified result so I'm ignoring it", logger.DEBUG) continue torrent_size = item.find("torrent:contentlength").get_text(strip=True) size = convert_size(torrent_size) or -1 info_hash = item.find("torrent:infohash").get_text(strip=True) item = {'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'hash': info_hash} if mode != "RSS": logger.log("Found result: {0} with {1} seeders and {2} leechers".format(title, seeders, leechers), logger.DEBUG) items.append(item) except (AttributeError, TypeError, KeyError, ValueError): continue # For each search mode sort all the items by seeders if available items.sort(key=lambda d: try_int(d.get('seeders', 0)), reverse=True) results += items return results
def map_download(request, mapid, template='maps/map_download.html'): """ Download all the layers of a map as a batch XXX To do, remove layer status once progress id done This should be fix because """ map_obj = _resolve_map( request, mapid, 'base.download_resourcebase', _PERMISSION_MSG_VIEW) map_status = dict() if request.method == 'POST': def perm_filter(layer): return request.user.has_perm( 'base.view_resourcebase', obj=layer.get_self_resource()) mapJson = map_obj.json(perm_filter) # we need to remove duplicate layers j_map = json.loads(mapJson) j_layers = j_map["layers"] for j_layer in j_layers: if j_layer["service"] is None: j_layers.remove(j_layer) continue if (len([l for l in j_layers if l == j_layer])) > 1: j_layers.remove(j_layer) mapJson = json.dumps(j_map) if 'geonode.geoserver' in settings.INSTALLED_APPS \ and ogc_server_settings.BACKEND == 'geonode.geoserver': # TODO the url needs to be verified on geoserver url = "%srest/process/batchDownload/launch/" % ogc_server_settings.LOCATION elif 'geonode.qgis_server' in settings.INSTALLED_APPS \ and ogc_server_settings.BACKEND == 'geonode.qgis_server': url = urljoin(settings.SITEURL, reverse("qgis_server:download-map", kwargs={'mapid': mapid})) # qgis-server backend stop here, continue on qgis_server/views.py return redirect(url) # the path to geoserver backend continue here resp, content = http_client.request(url, 'POST', body=mapJson) status = int(resp.status) if status == 200: map_status = json.loads(content) request.session["map_status"] = map_status else: raise Exception( 'Could not start the download of %s. Error was: %s' % (map_obj.title, content)) locked_layers = [] remote_layers = [] downloadable_layers = [] for lyr in map_obj.layer_set.all(): if lyr.group != "background": if not lyr.local: remote_layers.append(lyr) else: ownable_layer = Layer.objects.get(alternate=lyr.name) if not request.user.has_perm( 'download_resourcebase', obj=ownable_layer.get_self_resource()): locked_layers.append(lyr) else: # we need to add the layer only once if len( [l for l in downloadable_layers if l.name == lyr.name]) == 0: downloadable_layers.append(lyr) return render(request, template, context={ "geoserver": ogc_server_settings.PUBLIC_LOCATION, "map_status": map_status, "map": map_obj, "locked_layers": locked_layers, "remote_layers": remote_layers, "downloadable_layers": downloadable_layers, "site": settings.SITEURL })
def resolve_redirects(self, resp, req, stream=False, timeout=None, verify=True, cert=None, proxies=None, yield_requests=False, **adapter_kwargs): """Receives a Response. Returns a generator of Responses or Requests.""" hist = [] # keep track of history url = self.get_redirect_target(resp) while url: prepared_request = req.copy() # Update history and keep track of redirects. # resp.history must ignore the original request in this loop hist.append(resp) resp.history = hist[1:] # Consume socket so it can be released resp.content if self.max_redirects <= len(resp.history): raise TooManyRedirects('Exceeded %s redirects.' % self.max_redirects, response=resp) # Release the connection resp.close() # Handle redirection without scheme (see: RFC 1808 Section 4) if url.startswith('//'): parsed_rurl = urlparse(resp.url) url = '%s:%s' % (to_native_string(parsed_rurl.scheme), url) # The scheme should be lower case... parsed = urlparse(url) url = parsed.geturl() # Facilitate relative 'location' headers, as allowed by RFC 7231. # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource') # Compliant with RFC3986, we percent encode the url. if not parsed.netloc: url = urljoin(resp.url, requote_uri(url)) else: url = requote_uri(url) prepared_request.url = to_native_string(url) self.rebuild_method(prepared_request, resp) # https://github.com/requests/requests/issues/1084 if resp.status_code not in (codes.temporary_redirect, codes.permanent_redirect): # https://github.com/requests/requests/issues/3490 purged_headers = ('Content-Length', 'Content-Type', 'Transfer-Encoding') for header in purged_headers: prepared_request.headers.pop(header, None) prepared_request.body = None headers = prepared_request.headers try: del headers['Cookie'] except KeyError: pass # Extract any cookies sent on the response to the cookiejar # in the new request. Because we've mutated our copied prepared # request, use the old one that we haven't yet touched. prepared_request._cookies.extract_cookies( MockResponse(HTTPHeaderDict(resp.headers)), MockRequest(req)) merge_cookies(prepared_request._cookies, self.cookies) prepared_request.prepare_cookies(prepared_request._cookies) # Rebuild auth and proxy information. proxies = self.rebuild_proxies(prepared_request, proxies) self.rebuild_auth(prepared_request, resp) # Override the original request. req = prepared_request req.adapt_prepare() if yield_requests: yield req else: resp = self.send(req, stream=stream, timeout=timeout, verify=verify, cert=cert, proxies=proxies, allow_redirects=False, **adapter_kwargs) yield resp while not resp.done(): yield resp resp = resp.result() self.cookies.extract_cookies( MockResponse(HTTPHeaderDict(resp.headers)), MockRequest(prepared_request)) # extract redirect url, if any, for the next loop url = self.get_redirect_target(resp)
def mocked_requests_get(*args, **kwargs): # Accessing status of device. Return online. status_url = 'Backends/ibmqx4/queue/status' if args[0] == urljoin(_api_url_status, status_url): return MockResponse({"state": True}, 200)
def search(self, search_strings, age=0, ep_obj=None): # pylint: disable=too-many-locals, too-many-branches, too-many-statements results = [] """ 205 = SD, 208 = HD, 200 = All Videos https://pirateproxy.pl/s/?q=Game of Thrones&type=search&orderby=7&page=0&category=200 """ # oder_by is 7 in browse for seeders, but 8 in search! search_params = { "q": "", "type": "search", "orderby": 8, "page": 0, "category": 200 } # Units units = ["B", "KIB", "MIB", "GIB"] def process_column_header(th): text = "" if th.a: text = th.a.get_text(strip=True) if not text: text = th.get_text(strip=True) return text for mode in search_strings: items = [] logger.log("Search Mode: {0}".format(mode), logger.DEBUG) for search_string in search_strings[mode]: search_urls = (self.urls["search"], self.urls["rss"])[mode == "RSS"] if not isinstance(search_urls, list): search_urls = [search_urls] for search_url in search_urls: if self.custom_url: if not validators.url(self.custom_url): logger.log( "Invalid custom url: {0}".format( self.custom_url), logger.WARNING) return results search_url = urljoin(self.custom_url, search_url.split(self.url)[1]) if mode != "RSS": search_params["q"] = search_string logger.log( "Search string: {}".format( search_string.decode("utf-8")), logger.DEBUG) # Prevents a 302 redirect, since there is always a 301 from .se to the best mirror having an extra # redirect is excessive on the provider and spams the debug log unnecessarily search_url, params = self.convert_url( search_url, search_params) data = self.get_url(search_url, params=params, returns="text") else: data = self.get_url(search_url, returns="text") if not data: logger.log( "URL did not return data, maybe try a custom url, or a different one", logger.DEBUG) continue with BS4Parser(data, "html5lib") as html: torrent_table = html.find("table", id="searchResult") torrent_rows = torrent_table( "tr") if torrent_table else [] # Continue only if at least one Release is found if len(torrent_rows) < 2: logger.log( "Data returned from provider does not contain any torrents", logger.DEBUG) continue labels = [ process_column_header(label) for label in torrent_rows[0]("th") ] # Skip column headers for result in torrent_rows[1:]: try: cells = result("td") # Funky js on page messing up titles, this fixes that title = result.find( class_="detLink")['title'].split( 'Details for ', 1)[-1] download_url = result.find( title="Download this torrent using magnet" )["href"] + self._custom_trackers if not self.magnet_regex.match(download_url): logger.log( "Got an invalid magnet: {0}".format( download_url)) logger.log( "Invalid ThePirateBay proxy please try another one", logger.DEBUG) continue if not all([title, download_url]): continue seeders = try_int( cells[labels.index("SE")].get_text( strip=True)) leechers = try_int( cells[labels.index("LE")].get_text( strip=True)) # Filter unseeded torrent if seeders < self.minseed or leechers < self.minleech: if mode != "RSS": logger.log( "Discarding torrent because it doesn't meet the minimum seeders or leechers: {0} (S:{1} L:{2})" .format(title, seeders, leechers), logger.DEBUG) continue # Accept Torrent only from Good People for every Episode Search if self.confirmed and not result.find( alt=re.compile(r"VIP|Trusted")): if mode != "RSS": logger.log( "Found result: {0} but that doesn't seem like a trusted result so I'm ignoring it" .format(title), logger.DEBUG) continue # Convert size after all possible skip scenarios torrent_size = re.sub( r".*Size ([\d.]+).+([KMGT]iB).*", r"\1 \2", result.find(class_="detDesc").get_text( strip=True)) size = convert_size(torrent_size, units=units) or -1 item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'hash': '' } if mode != "RSS": logger.log( "Found result: {0} with {1} seeders and {2} leechers" .format(title, seeders, leechers), logger.DEBUG) items.append(item) except StandardError: continue # For each search mode sort all the items by seeders if available items.sort(key=lambda d: try_int(d.get('seeders', 0)), reverse=True) results += items return results
def search(self, search_strings, age=0, ep_obj=None): results = [] if not self.login(): return results # Search Params search_params = { "searchstr": "", "filter_cat[1]": 1, "filter_cat[2]": 1, "filter_cat[3]": 1, "filter_cat[4]": 1, "filter_cat[5]": 1 } # Units units = ["B", "KB", "MB", "GB", "TB", "PB"] def process_column_header(td): result = "" if td.a and td.a.img: result = td.a.img.get("title", td.a.get_text(strip=True)) if not result: result = td.get_text(strip=True) return result for mode in search_strings: items = [] logger.log("Search Mode: {0}".format(mode), logger.DEBUG) for search_string in search_strings[mode]: if mode != "RSS": logger.log( "Search string: {0}".format( search_string.decode("utf-8")), logger.DEBUG) search_params["searchstr"] = search_string search_url = self.urls["search"] data = self.get_url(search_url, params=search_params, returns="text") if not data: logger.log("No data returned from provider", logger.DEBUG) continue with BS4Parser(data, "html5lib") as html: torrent_table = html.find("table", id="torrent_table") torrent_rows = torrent_table("tr") if torrent_table else [] # Continue only if at least one Release is found if len(torrent_rows) < 2: logger.log( "Data returned from provider does not contain any torrents", logger.DEBUG) continue # "", "", "Name /Year", "Files", "Time", "Size", "Snatches", "Seeders", "Leechers" labels = [ process_column_header(label) for label in torrent_rows[0]("td") ] # Skip column headers for result in torrent_rows[1:]: cells = result("td") if len(cells) < len(labels): continue try: title = cells[labels.index("Name /Year")].find( "a", dir="ltr").get_text(strip=True) download_url = urljoin( self.url, cells[labels.index("Name /Year")].find( "a", title="Download")["href"]) if not all([title, download_url]): continue seeders = try_int( cells[labels.index("Seeders")].get_text( strip=True)) leechers = try_int( cells[labels.index("Leechers")].get_text( strip=True)) # Filter unseeded torrent if seeders < self.minseed or leechers < self.minleech: if mode != "RSS": logger.log( "Discarding torrent because it doesn't meet the" " minimum seeders or leechers: {0} (S:{1} L:{2})" .format(title, seeders, leechers), logger.DEBUG) continue torrent_size = cells[labels.index( "Size")].get_text(strip=True) size = convert_size(torrent_size, units=units) or -1 item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'hash': '' } if mode != "RSS": logger.log( "Found result: {0} with {1} seeders and {2} leechers" .format(title, seeders, leechers), logger.DEBUG) items.append(item) except StandardError: continue # For each search mode sort all the items by seeders if available items.sort(key=lambda d: try_int(d.get('seeders', 0)), reverse=True) results += items return results
def search(self, search_strings, age=0, ep_obj=None): # pylint: disable=too-many-locals, too-many-branches results = [] if not self.login(): return results # Search Params search_params = { 'tags_type': 1, 'order_by': 'time', 'order_way': 'desc', 'action': 'basic', 'searchsubmit': 1, 'searchstr': '' } # Units units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB'] def process_column_header(td): result = '' if td.a and td.a.img: result = td.a.img.get('title', td.a.get_text(strip=True)) if not result: result = td.get_text(strip=True) return result for mode in search_strings: items = [] logger.log("Search Mode: {0}".format(mode), logger.DEBUG) for search_string in search_strings[mode]: if mode != 'RSS': logger.log("Search string: {0}".format (search_string.decode("utf-8")), logger.DEBUG) search_params['searchstr'] = search_string data = self.get_url(self.urls['search'], params=search_params, returns='text') if not data: logger.log("No data returned from provider", logger.DEBUG) continue with BS4Parser(data, 'html5lib') as html: torrent_table = html.find('table', class_='torrent_table') torrent_rows = torrent_table('tr') if torrent_table else [] # Continue only if at least one Release is found if len(torrent_rows) < 2: logger.log("Data returned from provider does not contain any torrents", logger.DEBUG) continue labels = [process_column_header(label) for label in torrent_rows[0]('td')] # Skip column headers for result in torrent_rows[1:]: try: # skip if torrent has been nuked due to poor quality if result.find('img', alt='Nuked'): continue title = result.find('a', title='View torrent').get_text(strip=True) download_url = urljoin(self.url, result.find('span', title='Download').parent['href']) if not all([title, download_url]): continue cells = result('td') seeders = try_int(cells[labels.index('Seeders')].get_text(strip=True)) leechers = try_int(cells[labels.index('Leechers')].get_text(strip=True)) # Filter unseeded torrent if seeders < self.minseed or leechers < self.minleech: if mode != 'RSS': logger.log("Discarding torrent because it doesn't meet the" " minimum seeders or leechers: {0} (S:{1} L:{2})".format (title, seeders, leechers), logger.DEBUG) continue torrent_size = cells[labels.index('Size')].get_text(strip=True) size = convert_size(torrent_size, units=units) or -1 item = {'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'hash': ''} if mode != 'RSS': logger.log("Found result: {0} with {1} seeders and {2} leechers".format (title, seeders, leechers), logger.DEBUG) items.append(item) except StandardError: continue # For each search mode sort all the items by seeders if available items.sort(key=lambda d: try_int(d.get('seeders', 0)), reverse=True) results += items return results
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ # Units units = ['B', 'KIB', 'MIB', 'GIB', 'TIB', 'PIB'] items = [] with BS4Parser(data, 'html5lib') as html: torrent_table = html.find('table', class_='mainblockcontenttt') torrent_rows = torrent_table('tr') if torrent_table else [] if not torrent_rows or torrent_rows[2].find('td', class_='lista'): log.debug( 'Data returned from provider does not contain any torrents' ) return items # Cat., Active, Filename, Dl, Wl, Added, Size, Uploader, S, L, C labels = [ label.a.get_text(strip=True) if label.a else label.get_text( strip=True) for label in torrent_rows[0]('td') ] # Skip column headers for row in torrent_rows[1:]: try: cells = row.findChildren('td')[:len(labels)] if len(cells) < len(labels): continue title = cells[labels.index('Filename')].a title = title.get_text(strip=True) if title else None link = cells[labels.index('Dl')].a link = link.get('href') if link else None download_url = urljoin(self.url, link) if link else None if not all([title, download_url]): continue seeders = try_int( cells[labels.index('S')].get_text(strip=True)) leechers = try_int( cells[labels.index('L')].get_text(strip=True)) # Filter unseeded torrent if seeders < min(self.minseed, 1): if mode != 'RSS': log.debug( "Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue torrent_size = cells[labels.index('Size')].get_text() size = convert_size(torrent_size, units=units) or -1 pubdate_raw = cells[labels.index('Added')].get_text() pubdate = self.parse_pubdate(pubdate_raw) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug( 'Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def url(self, path): return urljoin(self.r.config.permalink_url, path)
def search(self, search_strings, age=0, ep_obj=None): # pylint: disable=too-many-branches, too-many-locals, too-many-statements results = [] if not self.login(): return results for mode in search_strings: items = [] for search_string in search_strings[mode]: if mode != 'RSS': logger.log( "Search string: {0}".format( search_string.decode("utf-8")), logger.DEBUG) search_params = { 'searchtext': search_string, 'filter_freeleech': (0, 1)[self.freeleech is True], 'order_by': ('seeders', 'time')[mode == 'RSS'], "order_way": "desc" } if not search_string: del search_params['searchtext'] data = self.get_url(self.urls['search'], params=search_params, returns='text') if not data: logger.log("No data returned from provider", logger.DEBUG) continue try: with BS4Parser(data, 'html5lib') as html: torrent_table = html.find('table', {'id': 'torrent_table'}) if not torrent_table: logger.log( "Data returned from {0} does not contain any torrents" .format(self.name), logger.DEBUG) continue labels = [ x.get_text(strip=True) or x.a.img.get('alt') for x in torrent_table.find( 'tr', class_='colhead').find_all('td') ] torrent_rows = torrent_table('tr', class_='torrent') # Continue only if one Release is found if not torrent_rows: logger.log( "Data returned from {0} does not contain any torrents" .format(self.name), logger.DEBUG) continue for torrent_row in torrent_rows: freeleech = torrent_row.find( 'img', alt="Freeleech") is not None if self.freeleech and not freeleech: continue # Normal Download Link download_item = torrent_row.find( 'a', {'title': 'Download Torrent'}) if not download_item: # If the user has downloaded it download_item = torrent_row.find( 'a', { 'title': 'Previously Grabbed Torrent File' }) if not download_item: # If the user is seeding download_item = torrent_row.find( 'a', {'title': 'Currently Seeding Torrent'}) if not download_item: # If the user is leeching download_item = torrent_row.find( 'a', {'title': 'Currently Leeching Torrent'}) if not download_item: # If there are none continue download_url = urljoin(self.url, download_item['href']) temp_anchor = torrent_row.find( 'a', {"data-src": True}) title = temp_anchor['data-src'].rsplit('.', 1)[0] if not all([title, download_url]): continue cells = torrent_row('td') seeders = try_int( cells[labels.index('Seeders')].text.strip()) leechers = try_int( cells[labels.index('Leechers')].get_text( strip=True)) # Filter unseeded torrent if seeders < self.minseed or leechers < self.minleech: if mode != 'RSS': logger.log( "Discarding torrent because it doesn't meet the" " minimum seeders or leechers: {0} (S:{1} L:{2})" .format(title, seeders, leechers), logger.DEBUG) continue size = temp_anchor['data-filesize'] or -1 item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'hash': '' } if mode != 'RSS': logger.log( "Found result: {0} with {1} seeders and {2} leechers" .format(title, seeders, leechers), logger.DEBUG) items.append(item) except Exception: logger.log( "Failed parsing provider. Traceback: {0}".format( traceback.format_exc()), logger.ERROR) # For each search mode sort all the items by seeders items.sort(key=lambda d: try_int(d.get('seeders', 0)), reverse=True) results += items return results
def httpbin(*suffix): """Returns url for HTTPBIN resource.""" return urljoin(HTTPBIN, '/'.join(suffix))
def map_download(request, mapid, template='maps/map_download.html'): """ Download all the layers of a map as a batch XXX To do, remove layer status once progress id done This should be fix because """ try: map_obj = _resolve_map( request, mapid, 'base.download_resourcebase', _PERMISSION_MSG_VIEW) except PermissionDenied: return HttpResponse(_("Not allowed"), status=403) except Exception: raise Http404(_("Not found")) if not map_obj: raise Http404(_("Not found")) map_status = dict() if request.method == 'POST': def perm_filter(layer): return request.user.has_perm( 'base.view_resourcebase', obj=layer.get_self_resource()) mapJson = map_obj.json(perm_filter) # we need to remove duplicate layers j_map = json.loads(mapJson) j_layers = j_map["layers"] for j_layer in j_layers: if j_layer["service"] is None: j_layers.remove(j_layer) continue if (len([_l for _l in j_layers if _l == j_layer])) > 1: j_layers.remove(j_layer) mapJson = json.dumps(j_map) if check_ogc_backend(qgis_server.BACKEND_PACKAGE): url = urljoin(settings.SITEURL, reverse("qgis_server:download-map", kwargs={'mapid': mapid})) # qgis-server backend stop here, continue on qgis_server/views.py return redirect(url) # the path to geoserver backend continue here resp, content = http_client.request(url, 'POST', body=mapJson) status = int(resp.status_code) if status == 200: map_status = json.loads(content) request.session["map_status"] = map_status else: raise Exception( 'Could not start the download of %s. Error was: %s' % (map_obj.title, content)) locked_layers = [] remote_layers = [] downloadable_layers = [] for lyr in map_obj.layer_set.all(): if lyr.group != "background": if not lyr.local: remote_layers.append(lyr) else: ownable_layer = Layer.objects.get(alternate=lyr.name) if not request.user.has_perm( 'download_resourcebase', obj=ownable_layer.get_self_resource()): locked_layers.append(lyr) else: # we need to add the layer only once if len( [_l for _l in downloadable_layers if _l.name == lyr.name]) == 0: downloadable_layers.append(lyr) site_url = settings.SITEURL.rstrip('/') if settings.SITEURL.startswith('http') else settings.SITEURL register_event(request, EventType.EVENT_DOWNLOAD, map_obj) return render(request, template, context={ "geoserver": ogc_server_settings.PUBLIC_LOCATION, "map_status": map_status, "map": map_obj, "locked_layers": locked_layers, "remote_layers": remote_layers, "downloadable_layers": downloadable_layers, "site": site_url })