def urlencode(self, safe=None): output = [] if safe: encode = lambda k, v: '%s=%s' % ((quote(k, safe), quote(v, safe))) else: encode = lambda k, v: urlencode({k: v}) for k, list_ in self.lists(): k = smart_str(k, self.encoding) output.extend( [encode(k, smart_str(v, self.encoding)) for v in list_]) return '&'.join(output)
def test_field_with_two_filters(self): req = make_drf_request_with_version(version='2.0') project = factories.ProjectFactory() node = factories.NodeFactory(parent=project) data = self.BasicNodeSerializer(node, context={ 'request': req }).data['data'] field = data['relationships']['field_with_filters']['links'] assert_in(quote('filter[target]=hello', safe='?='), field['related']['href']) assert_in(quote('filter[woop]=yea', safe='?='), field['related']['href'])
def test_successful_external_login_cas_redirect(self, mock_service_validate, mock_get_user_from_cas_resp): service_url = 'http://*****:*****@'), safe='@') assert_in('username={}'.format(username_quoted), resp.location) assert_in('verification_key={}'.format(user.verification_key), resp.location)
def track(self, message, event_name='event'): with open (self.log_file, 'a') as file: s = '{time} - {event} - {message}\n' file.write(s.format(time=time.time(), event=event_name, message=message)) try: uid = message.chat_id except AttributeError: self.logger.warn('No chat_id in message') return False data = message.to_json() try: url = self.url_template.format(token=str(self.token), uid=str(uid), name=quote(event_name)) request = Request(url, data=data.encode(), headers={'Content-Type': 'application/json'}) urlopen(request) return True except HTTPError as error: self.logger.warn('Botan track error ' + str(error.code) + ':' + error.read().decode( 'utf-8')) return False except URLError as error: self.logger.warn('Botan track error ' + str(error.reason)) return False
def getObject(container_name, object_name): """ returns (a proxy to) the object with the given name in the given container raises an exception if no such object or container exist """ container = getContainer(container_name, validate=False) return container.getObject(quote(object_name))
def __init__(self, name): """ name: name of the container (must be unique) """ assert "/" not in name self._name = name # all the sockets are in the same directory so it's independent from the PWD self.ipc_name = BASE_DIRECTORY + "/" + quote(name) + ".ipc" if not os.path.isdir(BASE_DIRECTORY + "/."): # + "/." to check it's readable logging.error( "Directory " + BASE_DIRECTORY + " is not accessible, " "which is needed for creating the container %s", name) elif os.path.exists(self.ipc_name): try: os.remove(self.ipc_name) logging.warning( "The file '%s' was deleted to create container '%s'.", self.ipc_name, name) except OSError: logging.error( "Impossible to delete file '%s', needed to create container '%s'.", self.ipc_name, name) Pyro4.Daemon.__init__(self, unixsocket=self.ipc_name, interface=ContainerObject) # To be set by the user of the container self.rootId = None # objectId of a "Root" component
def entries_from_search(self, name, url=None): """Parses torrent download url from search results""" name = normalize_unicode(name) if not url: url = 'http://www.newtorrents.info/search/%s' % quote( name.encode('utf-8'), safe=b':/~?=&%' ) log.debug('search url: %s' % url) html = requests.get(url).text # fix </SCR'+'IPT> so that BS does not crash # TODO: should use beautifulsoup massage html = re.sub(r'(</SCR.*?)...(.*?IPT>)', r'\1\2', html) soup = get_soup(html) # saving torrents in dict torrents = [] for link in soup.find_all('a', attrs={'href': re.compile('down.php')}): torrent_url = 'http://www.newtorrents.info%s' % link.get('href') release_name = link.parent.next.get('title') # quick dirty hack seed = link.find_next('td', attrs={'class': re.compile('s')}).renderContents() if seed == 'n/a': seed = 0 else: try: seed = int(seed) except ValueError: log.warning( 'Error converting seed value (%s) from newtorrents to integer.' % seed ) seed = 0 # TODO: also parse content_size and peers from results torrents.append( Entry( title=release_name, url=torrent_url, torrent_seeds=seed, torrent_availability=torrent_availability(seed, 0), ) ) # sort with seed number Reverse order torrents.sort(reverse=True, key=lambda x: x.get('torrent_availability', 0)) # choose the torrent if not torrents: dashindex = name.rfind('-') if dashindex != -1: return self.entries_from_search(name[:dashindex]) else: return torrents else: if len(torrents) == 1: log.debug('found only one matching search result.') else: log.debug( 'search result contains multiple matches, sorted %s by most seeders' % torrents ) return torrents
def handleCreate(self, confInfo): confInfo.addDeprecationMsg() location = self.callerArgs.id force = False if FORCE in self.callerArgs: force = bundle_paths.parse_boolean(self.callerArgs[FORCE][0]) try: bundle, status = appbuilder.installApp(location, force) except splunk.RESTException as e: raise admin.InternalException(e.msg) upgraded = (status == bundle_paths.BundleInstaller.STATUS_UPGRADED) appName = bundle.name(raw=True) or '' confInfo[appName].append('name', appName) confInfo[appName].append('location', bundle.location() or '') confInfo[appName].append('status', 'upgraded' if upgraded else 'installed') confInfo[appName].append('source_location', location) if not upgraded: reloader = 'apps/local/_reload' else: reloader = 'apps/local/%s/_reload' % urllib_parse.quote( bundle.name()) rest.simpleRequest(reloader, sessionKey=self.getSessionKey())
def _redirect_to_local(self, b): url = splunk.mergeHostPath() for part in self.pathParts[:(self.BASE_DEPTH - 1)]: url += '/' + part url += '/' + 'local' url += '/' + quote(b.prettyname()) return url
def parsenetloc(netloc, defaultport=None): """ Parses a network location (e.g., from a URL) and returns an :class:`twisted.internet.interfaces.IAddress` that corresponds to that location. If a port number is omitted in the network location, ``defaultport`` is used. """ m = _NETLOC_RE.search(netloc.decode("utf_8")) if not m: raise NetLocParseError("unable to parse netloc {!r}".format(netloc)) # From <https://docs.python.org/2/library/itertools.html#recipes>; # group 0 is the entire matched string; captured groups start at # index 1 zip_args = [iter(m.groups()[1:])] * 2 for host, port in zip(*zip_args): if host is not None: port = int(port) if port else defaultport host_quoted = url_parse.quote(host, safe=b":") # This works properly in Python 2 because str maps to # :class:`future.types.newstr` (it *may* also work in # Python 2 with the native `str` type, but that's only # because Python 2 perversely allows one to call `encode` # on a byte string; but the results would be # unpredictable if the URL was already encoded) if isinstance(host_quoted, str): host_quoted = host_quoted.encode("utf_8") return host_quoted, port
def handle_node(path, size_handler, is_dir): """ Generic helper function for handling a remote file system node """ if is_dir and files_only: return url = urljoin(url_prefix, quote(sftp.normalize(path))) title = remotepath.basename(path) entry = Entry(title, url) if get_size: try: size = size_handler(path) except Exception as e: log.error('Failed to get size for %s (%s)' % (path, e)) size = -1 entry['content_size'] = size if private_key: entry['private_key'] = private_key if private_key_pass: entry['private_key_pass'] = private_key_pass entries.append(entry)
def get_videos(params): category = params.get('category') if category in ['Match Highlights', 'Match Replays']: data_url = config.TOPICS_URL.format( quote(config.CATEGORY_LOOKUP[category])) else: data_url = config.VIDEO_URL tree = ET.fromstring(fetch_url(data_url)) listing = [] for section in tree.findall('MediaSection'): for item in section: if not item.attrib['Type'] == 'V': continue v = classes.Video() v.desc = item.find('Description').text v.title = item.find('Title').text v.time = item.find('Timestamp').text video_id = item.find('Video') if video_id is not None: v.video_id = video_id.attrib.get('Id') v.policy_key = video_id.attrib.get('PolicyKey') v.account_id = video_id.attrib.get('AccountId') v.thumb = item.find('FullImageUrl').text v.link_id = item.find('Id').text listing.append(v) return listing
def search(self, task, entry, config=None): """ Search for name from piratebay. """ if not isinstance(config, dict): config = {} self.set_urls(config.get('url', URL)) sort = SORT.get(config.get('sort_by', 'seeds')) if config.get('sort_reverse'): sort += 1 if isinstance(config.get('category'), int): category = config['category'] else: category = CATEGORIES.get(config.get('category', 'all')) filter_url = '/0/%d/%d' % (sort, category) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) # TPB search doesn't like dashes or quotes query = query.replace('-', ' ').replace("'", " ") # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = '%s/search/%s%s' % (self.url, quote(query.encode('utf-8')), filter_url) log.debug('Using %s as piratebay search url' % url) page = task.requests.get(url).content soup = get_soup(page) for link in soup.find_all('a', attrs={'class': 'detLink'}): entry = Entry() entry['title'] = self.extract_title(link) if not entry['title']: log.error('Malformed search result. No title or url found. Skipping.') continue href = link.get('href') if href.startswith('/'): # relative link? href = self.url + href entry['url'] = href tds = link.parent.parent.parent.find_all('td') entry['torrent_seeds'] = int(tds[-2].contents[0]) entry['torrent_leeches'] = int(tds[-1].contents[0]) entry['torrent_availability'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches'] ) # Parse content_size size_text = link.find_next(attrs={'class': 'detDesc'}).get_text() if size_text: size = re.search(r'Size (\d+(\.\d+)?\xa0(?:[PTGMK])?i?B)', size_text) if size: entry['content_size'] = parse_filesize(size.group(1)) else: log.error( 'Malformed search result? Title: "%s", No size? %s', entry['title'], size_text, ) entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('torrent_availability'))
def build_query(domain, month_range=0): """Build the domain query list based on *domain*. *domain* is a string value representing the name of the site to search. Domain names are case sensitive. Returns: dictionary that captures the the required `TrafficHistory` AWIS action's query parameters for the last month. """ today = datetime.date.today() first_day = today.replace(day=1) last_month = first_day - datetime.timedelta(days=1) if month_range: last_month = last_month - relativedelta(months=month_range) last_day = calendar.monthrange(last_month.year, last_month.month)[1] start = '{}01'.format(last_month.strftime('%Y%m')) url_params = { 'Url': quote(domain), 'ResponseGroup': 'History', 'Range': last_day, 'Start': start, } return url_params
def search(self, task, entry, config=None): """ Search for name from piratebay. """ if not isinstance(config, dict): config = {} self.set_urls(config.get('url', URL)) sort = SORT.get(config.get('sort_by', 'seeds')) if config.get('sort_reverse'): sort += 1 if isinstance(config.get('category'), int): category = config['category'] else: category = CATEGORIES.get(config.get('category', 'all')) filter_url = '/0/%d/%d' % (sort, category) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) # TPB search doesn't like dashes or quotes query = query.replace('-', ' ').replace("'", " ") # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = '%s/search/%s%s' % (self.url, quote(query.encode('utf-8')), filter_url) log.debug('Using %s as piratebay search url' % url) page = task.requests.get(url).content soup = get_soup(page) for link in soup.find_all('a', attrs={'class': 'detLink'}): entry = Entry() entry['title'] = self.extract_title(link) if not entry['title']: log.error('Malformed search result. No title or url found. Skipping.') continue href = link.get('href') if href.startswith('/'): # relative link? href = self.url + href entry['url'] = href tds = link.parent.parent.parent.find_all('td') entry['torrent_seeds'] = int(tds[-2].contents[0]) entry['torrent_leeches'] = int(tds[-1].contents[0]) entry['torrent_availability'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches'] ) # Parse content_size size_text = link.find_next(attrs={'class': 'detDesc'}).get_text() if size_text: size = re.search(r'Size (\d+(\.\d+)?\xa0(?:[PTGMK])?i?B)', size_text) if size: entry['content_size'] = parse_filesize(size.group(1)) else: log.error( 'Malformed search result? Title: "%s", No size? %s', entry['title'], size_text, ) entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))
def _execute_request(self, method, url, params, **kwargs): """Function to execute and handle a request.""" # Execute Request try: if method == "GET": if six.PY2: items = params.iteritems() else: items = params.items() encoded_params = "&".join("%s=%s" % (key, quote(str(value))) for key, value in items) resp = self._session.get(url, params=encoded_params, timeout=self._timeout, **kwargs) elif method == "POST": resp = self._session.post(url, params=params, timeout=self._timeout, **kwargs) self._debuglog("Request url: " + resp.url) self._debuglog("Request status_code: " + str(resp.status_code)) self._debuglog("Request headers: " + str(resp.headers)) if resp.status_code == 200: # We got a DSM response return resp.json() # We got a 400, 401 or 404 ... raise RequestException(resp) except (RequestException, JSONDecodeError) as exp: raise SynologyDSMRequestException(exp)
def search(self, task, entry, config=None): from flexget.utils.template import environment search_strings = [ quote(normalize_unicode(s).encode('utf-8')) for s in entry.get('search_strings', [entry['title']]) ] rss_plugin = plugin.get_plugin_by_name('rss') entries = set() rss_config = rss_plugin.instance.build_config(config) try: template = environment.from_string(rss_config['url']) except TemplateSyntaxError as e: raise plugin.PluginError('Invalid jinja template as rss url: %s' % e) rss_config['all_entries'] = True for search_string in search_strings: rss_config['url'] = template.render({'search_term': search_string}) # TODO: capture some other_fields to try to find seed/peer/content_size numbers? try: results = rss_plugin.phase_handlers['input'](task, rss_config) except plugin.PluginError as e: log.error('Error attempting to get rss for %s: %s', rss_config['url'], e) else: entries.update(results) return entries
def safe_classpath(classpath, synthetic_jar_dir, custom_name=None): """Bundles classpath into one synthetic jar that includes original classpath in its manifest. This is to ensure classpath length never exceeds platform ARG_MAX. :param list classpath: Classpath to be bundled. :param string synthetic_jar_dir: directory to store the synthetic jar, if `None` a temp directory will be provided and cleaned up upon process exit. Otherwise synthetic jar will remain in the supplied directory, only for debugging purpose. :param custom_name: filename of the synthetic jar to be created. :returns: A classpath (singleton list with just the synthetic jar). :rtype: list of strings """ if synthetic_jar_dir: safe_mkdir(synthetic_jar_dir) else: synthetic_jar_dir = safe_mkdtemp() # Quote the paths so that if they contain reserved characters can be safely passed to JVM classloader. bundled_classpath = [parse.quote(cp) for cp in relativize_classpath(classpath, synthetic_jar_dir)] manifest = Manifest() manifest.addentry(Manifest.CLASS_PATH, ' '.join(bundled_classpath)) with temporary_file(root_dir=synthetic_jar_dir, cleanup=False, suffix='.jar') as jar_file: with open_zip(jar_file, mode='w', compression=ZIP_STORED) as jar: jar.writestr(Manifest.PATH, manifest.contents()) if custom_name: custom_path = os.path.join(synthetic_jar_dir, custom_name) safe_concurrent_rename(jar_file.name, custom_path) return [custom_path] else: return [jar_file.name]
def broadcast_to_sharejs(action, sharejs_uuid, node=None, wiki_name='home', data=None): """ Broadcast an action to all documents connected to a wiki. Actions include 'lock', 'unlock', 'redirect', and 'delete' 'redirect' and 'delete' both require a node to be specified 'unlock' requires data to be a list of contributors with write permission """ url = 'http://{host}:{port}/{action}/{id}/'.format( host=wiki_settings.SHAREJS_HOST, port=wiki_settings.SHAREJS_PORT, action=action, id=sharejs_uuid) if action == 'redirect' or action == 'delete': redirect_url = quote( node.web_url_for('project_wiki_view', wname=wiki_name, _guid=True), safe='', ) url = os.path.join(url, redirect_url) try: requests.post(url, json=data) except requests.ConnectionError: pass # Assume sharejs is not online
def __init__(self, name, parent=None, children=None, dependencies=None, daemon=None): """ name (string): unique name used to identify the component parent (Component): the parent of this component, that will be in .parent children (dict str -> Component): the children of this component, that will be in .children. Objects not instance of Component are skipped. dependencies (dict str -> Component): the dependencies of this component, that will be in .dependencies. daemon (Pyro4.daemon): daemon via which the object will be registered. default=None => not registered """ ComponentBase.__init__(self) self._name = name if daemon: # registered under its name daemon.register(self, quote(name)) self._parent = None self.parent = parent # calls the setter, which updates ._parent dependencies = dependencies or {} children = children or {} for dep, c in dependencies.items(): if not isinstance(c, ComponentBase): raise ValueError("Dependency %s is not a component: %s" % (dep, c)) cd = set(dependencies.values()) # It's up to the sub-class to set correctly the .parent of the children cc = set(c for c in children.values() if isinstance(c, ComponentBase)) # Note the only way to ensure the VA notifies changes is to set a # different object at every change. self.dependencies = _vattributes.VigilantAttribute(cd) self.children = _vattributes.VigilantAttribute(cc)
def search(self, task, entry, config=None): """ Search for name from piratebay. """ if not isinstance(config, dict): config = {} sort = SORT.get(config.get('sort_by', 'seeds')) if config.get('sort_reverse'): sort += 1 if isinstance(config.get('category'), int): category = config['category'] else: category = CATEGORIES.get(config.get('category', 'all')) filter_url = '/0/%d/%d' % (sort, category) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) # TPB search doesn't like dashes query = query.replace('-', ' ') # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = 'http://thepiratebay.%s/search/%s%s' % ( CUR_TLD, quote(query.encode('utf-8')), filter_url) log.debug('Using %s as piratebay search url' % url) page = requests.get(url).content soup = get_soup(page) for link in soup.find_all('a', attrs={'class': 'detLink'}): entry = Entry() entry['title'] = self.extract_title(link) if not entry['title']: log.error( 'Malformed search result. No title or url found. Skipping.' ) continue entry['url'] = 'http://thepiratebay.%s%s' % (CUR_TLD, link.get('href')) tds = link.parent.parent.parent.find_all('td') entry['torrent_seeds'] = int(tds[-2].contents[0]) entry['torrent_leeches'] = int(tds[-1].contents[0]) entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) # Parse content_size size = link.find_next(attrs={'class': 'detDesc'}).contents[0] size = re.search('Size ([\.\d]+)\xa0([GMK])iB', size) if size: if size.group(2) == 'G': entry['content_size'] = int( float(size.group(1)) * 1000**3 / 1024**2) elif size.group(2) == 'M': entry['content_size'] = int( float(size.group(1)) * 1000**2 / 1024**2) else: entry['content_size'] = int( float(size.group(1)) * 1000 / 1024**2) entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))
def search(self, task, entry, config=None): """ Search for name from torrentleech. """ request_headers = {'User-Agent': 'curl/7.54.0'} rss_key = config['rss_key'] # build the form request: data = {'username': config['username'], 'password': config['password']} # POST the login form: try: login = task.requests.post('https://www.torrentleech.org/user/account/login/', data=data, headers=request_headers, allow_redirects=True) except RequestException as e: raise PluginError('Could not connect to torrentleech: %s', str(e)) if not isinstance(config, dict): config = {} # sort = SORT.get(config.get('sort_by', 'seeds')) # if config.get('sort_reverse'): # sort += 1 categories = config.get('category', 'all') # Make sure categories is a list if not isinstance(categories, list): categories = [categories] # If there are any text categories, turn them into their id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories] filter_url = '/categories/{}'.format(','.join(str(c) for c in categories)) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string).replace(":", "") # urllib.quote will crash if the unicode string has non ascii characters, # so encode in utf-8 beforehand url = ('https://www.torrentleech.org/torrents/browse/list/query/' + quote(query.encode('utf-8')) + filter_url) log.debug('Using %s as torrentleech search url', url) results = task.requests.get(url, headers=request_headers, cookies=login.cookies).json() for torrent in results['torrentList']: entry = Entry() entry['download_headers'] = request_headers entry['title'] = torrent['name'] # construct download URL torrent_url = 'https://www.torrentleech.org/rss/download/{}/{}/{}'.format(torrent['fid'], rss_key, torrent['filename']) log.debug('RSS-ified download link: %s', torrent_url) entry['url'] = torrent_url # seeders/leechers entry['torrent_seeds'] = torrent['seeders'] entry['torrent_leeches'] = torrent['leechers'] entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) entry['content_size'] = parse_filesize(str(torrent['size']) + ' b') entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))
def search(self, task, entry, config): """ Search for entries on 1337x """ if not isinstance(config, dict): config = {} order_by = '' sort_order = '' if isinstance(config.get('order_by'), str): if config['order_by'] != 'leechers': order_by = '/{0}/desc'.format(config['order_by']) sort_order = 'sort-' entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = '{0}search/{1}{2}/1/'.format( sort_order, quote(search_string.encode('utf8')), order_by) log.debug('Using search params: %s; ordering by: %s', search_string, order_by or 'default') try: page = task.requests.get(self.base_url + query) log.debug('requesting: %s', page.url) except RequestException as e: log.error('1337x request failed: %s', e) continue soup = get_soup(page.content) if soup.find('div', attrs={'class': 'tab-detail'}) is not None: for link in soup.find('div', attrs={ 'class': 'tab-detail' }).findAll('a', href=re.compile('^/torrent/')): li = link.parent.parent.parent title = str(link.text).replace('...', '') info_url = self.base_url + str(link.get('href'))[1:] seeds = int(li.find('span', class_='green').string) leeches = int(li.find('span', class_='red').string) size = str(li.find('div', class_='coll-4').string) size = parse_filesize(size) e = Entry() e['url'] = info_url e['title'] = title e['torrent_seeds'] = seeds e['torrent_leeches'] = leeches e['search_sort'] = torrent_availability( e['torrent_seeds'], e['torrent_leeches']) e['content_size'] = size entries.add(e) return entries
def search(self, task, entry, config=None): config = self.process_config(config) feed = REPUTATIONS[config['reputation']] entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string + config.get('extra_terms', '')) for domain in ['eu', 'me', 'ch', 'in']: # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = 'http://torrentz.%s/%s?q=%s' % ( domain, feed, quote(query.encode('utf-8'))) log.debug('requesting: %s' % url) try: r = task.requests.get(url) break except requests.ConnectionError as err: # The different domains all resolve to the same ip, so only try more if it was a dns error log.warning('torrentz.%s connection failed. Error: %s' % (domain, err)) continue except requests.RequestException as err: raise plugin.PluginError( 'Error getting torrentz search results: %s' % err) else: raise plugin.PluginError( 'Error getting torrentz search results') if not r.content.strip(): raise plugin.PluginError( 'No data from %s. Maybe torrentz is blocking the FlexGet User-Agent' % url) rss = feedparser.parse(r.content) if rss.get('bozo_exception'): raise plugin.PluginError('Got bozo_exception (bad rss feed)') for item in rss.entries: m = re.search( r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)', item.description, re.IGNORECASE) if not m: log.debug('regexp did not find seeds / peer data') continue entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['content_size'] = int(m.group(1)) entry['torrent_seeds'] = int(m.group(2).replace(',', '')) entry['torrent_leeches'] = int(m.group(3).replace(',', '')) entry['torrent_info_hash'] = m.group(4).upper() entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) entries.add(entry) log.debug('Search got %d results' % len(entries)) return entries
def search(self, task, entry, config=None): """ Search for name from torrentleech. """ request_headers = {'User-Agent': 'curl/7.54.0'} rss_key = config['rss_key'] # build the form request: data = {'username': config['username'], 'password': config['password']} # POST the login form: try: login = task.requests.post('https://www.torrentleech.org/user/account/login/', data=data, headers=request_headers, allow_redirects=True) except RequestException as e: raise PluginError('Could not connect to torrentleech: %s' % str(e)) if not isinstance(config, dict): config = {} # sort = SORT.get(config.get('sort_by', 'seeds')) # if config.get('sort_reverse'): # sort += 1 categories = config.get('category', 'all') # Make sure categories is a list if not isinstance(categories, list): categories = [categories] # If there are any text categories, turn them into their id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories] filter_url = '/categories/{}'.format(','.join(str(c) for c in categories)) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string).replace(":", "") # urllib.quote will crash if the unicode string has non ascii characters, # so encode in utf-8 beforehand url = ('https://www.torrentleech.org/torrents/browse/list/query/' + quote(query.encode('utf-8')) + filter_url) log.debug('Using %s as torrentleech search url', url) results = task.requests.get(url, headers=request_headers, cookies=login.cookies).json() for torrent in results['torrentList']: entry = Entry() entry['download_headers'] = request_headers entry['title'] = torrent['name'] # construct download URL torrent_url = 'https://www.torrentleech.org/rss/download/{}/{}/{}'.format(torrent['fid'], rss_key, torrent['filename']) log.debug('RSS-ified download link: %s', torrent_url) entry['url'] = torrent_url # seeders/leechers entry['torrent_seeds'] = torrent['seeders'] entry['torrent_leeches'] = torrent['leechers'] entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) entry['content_size'] = parse_filesize(str(torrent['size']) + ' b') entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))
def get_file(self, owner, dataset_id, file_name, format='json'): resp = self._session.get( to_endpoint_url('/file_download/{}/{}/{}'.format( owner, dataset_id, quote(file_name, safe='') )) ) resp.raise_for_status() return self._decode_response(resp, format)
def sign_url(url, media_auth_token): headers = {'authorization': 'JWT{0}'.format(media_auth_token)} data = json.loads( fetch_url(config.SIGN_URL.format(quote(url)), headers=headers)) if data.get('message') == 'SUCCESS': return str(data.get('url')) else: raise Exception('error in signing url')
def search(self, task, entry, config=None): """ Search for entries on SceneAccess """ session = task.requests if 'sceneaccess.eu' not in session.domain_limiters: session.add_domain_limiter(TimedLimiter('sceneaccess.eu', '7 seconds')) if not session.cookies: log.debug('Logging in to %s...' % URL) params = {'username': config['username'], 'password': config['password'], 'submit': 'come on in'} session.post(URL + 'login', data=params) if 'gravity_multiplier' in config: multip = config['gravity_multiplier'] else: multip = 1 # Prepare queries... base_urls = list() entries = set() for category in self.process_categories(config): base_urls.append(URL + '%(url_path)s?method=2%(category_url_string)s' % category) # Search... for search_string in entry.get('search_strings', [entry['title']]): search_string_normalized = normalize_unicode(clean_title(search_string)) search_string_url_fragment = '&search=' + quote(search_string_normalized.encode('utf8')) for url in base_urls: url += search_string_url_fragment log.debug('Search URL for `%s`: %s' % (search_string, url)) page = session.get(url).content soup = get_soup(page) for result in soup.findAll('tr', attrs={'class': 'tt_row'}): entry = Entry() entry['title'] = result.find('a', href=re.compile(r'details\?id=\d+'))['title'] entry['url'] = URL + result.find('a', href=re.compile(r'.torrent$'))['href'] entry['torrent_seeds'] = result.find('td', attrs={'class': 'ttr_seeders'}).text entry['torrent_leeches'] = result.find('td', attrs={'class': 'ttr_leechers'}).text entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) * multip size = result.find('td', attrs={'class': 'ttr_size'}).text size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size) entry['content_size'] = parse_filesize(size.group(0)) entries.add(entry) return entries
def entries_from_search(self, name, url=None): """Parses torrent download url from search results""" name = normalize_unicode(name) if not url: url = 'http://www.newtorrents.info/search/%s' % quote( name.encode('utf-8'), safe=b':/~?=&%') log.debug('search url: %s' % url) html = requests.get(url).text # fix </SCR'+'IPT> so that BS does not crash # TODO: should use beautifulsoup massage html = re.sub(r'(</SCR.*?)...(.*?IPT>)', r'\1\2', html) soup = get_soup(html) # saving torrents in dict torrents = [] for link in soup.find_all('a', attrs={'href': re.compile('down.php')}): torrent_url = 'http://www.newtorrents.info%s' % link.get('href') release_name = link.parent.next.get('title') # quick dirty hack seed = link.find_next('td', attrs={ 'class': re.compile('s') }).renderContents() if seed == 'n/a': seed = 0 else: try: seed = int(seed) except ValueError: log.warning( 'Error converting seed value (%s) from newtorrents to integer.' % seed) seed = 0 # TODO: also parse content_size and peers from results torrents.append( Entry(title=release_name, url=torrent_url, torrent_seeds=seed, search_sort=torrent_availability(seed, 0))) # sort with seed number Reverse order torrents.sort(reverse=True, key=lambda x: x.get('search_sort', 0)) # choose the torrent if not torrents: dashindex = name.rfind('-') if dashindex != -1: return self.entries_from_search(name[:dashindex]) else: return torrents else: if len(torrents) == 1: log.debug('found only one matching search result.') else: log.debug( 'search result contains multiple matches, sorted %s by most seeders' % torrents) return torrents
def _handle_path(self, entries, ftp, baseurl, path='', mlst_supported=False, files_only=False, recursive=False, get_size=True, encoding=None): dirs = self.list_directory(ftp, path) for p in dirs: if encoding: p = native_str_to_text(p, encoding=encoding) # Clean file list when subdirectories are used p = p.replace(path + '/', '') mlst = {} if mlst_supported: mlst_output = ftp.sendcmd('MLST ' + path + '/' + p) clean_mlst_output = [line.strip().lower() for line in mlst_output.splitlines()][1] mlst = self.parse_mlst(clean_mlst_output) else: element_is_directory = self.is_directory(ftp, path + '/' + p) if element_is_directory: mlst['type'] = 'dir' log.debug('%s is a directory', p) else: mlst['type'] = 'file' log.debug('%s is a file', p) if recursive and mlst.get('type') == 'dir': self._handle_path(entries, ftp, baseurl, path + '/' + p, mlst_supported, files_only, recursive, get_size, encoding) if not files_only or mlst.get('type') == 'file': url = baseurl + quote(path) + '/' + quote(p) log.debug("Encoded URL: " + url) title = os.path.basename(p) log.info('Accepting entry "%s" [%s]' % (path + '/' + p, mlst.get('type') or "unknown",)) entry = Entry(title, url) if get_size and 'size' not in mlst: if mlst.get('type') == 'file': entry['content_size'] = old_div(ftp.size(path + '/' + p), (1024 * 1024)) log.debug('(FILE) Size = %s', entry['content_size']) elif mlst.get('type') == 'dir': entry['content_size'] = self.get_folder_size(ftp, path, p) log.debug('(DIR) Size = %s', entry['content_size']) elif get_size: entry['content_size'] = old_div(float(mlst.get('size')), (1024 * 1024)) entries.append(entry)
def urlquote(*args, **kwargs): new_kwargs = dict(kwargs) if not PY3: new_kwargs = dict(kwargs) if 'encoding' in new_kwargs: del new_kwargs['encoding'] if 'errors' in kwargs: del new_kwargs['errors'] return quote(*args, **new_kwargs)
def result(*args, **kwargs): url = url_root for component in args: if not url.endswith("/"): url += "/" url += quote("%s" % component, safe="") if kwargs: url += "?" + urlencode(kwargs) return url
def _build_uri(self, name=None, query=None): """ Create uri for kvstore request :param name: name after collection, usually _key :param query: query params :return: uri for kvstore request """ qs = dict(output_mode='json') base_uri = self.base_uri() if query is not None: qs.update(query) if name is not None: return '%s/storage/collections/data/%s/%s?%s' % ( base_uri, quote(self.collection), quote(name), urlencode(qs)) else: return '%s/storage/collections/data/%s?%s' % ( base_uri, quote(self.collection), urlencode(qs))
def quote_folder_paths(state, schema): try: NodeSettings = state.get_model('addons_googledrive', 'nodesettings') targets = NodeSettings.objects.filter(folder_path__isnull=False) except LookupError: return for obj in targets: obj.folder_path = quote(obj.folder_path.encode('utf-8')) bulk_update(targets, update_fields=['folder_path'])
def search(self, task, entry, config): search_strings = [normalize_unicode(s).lower() for s in entry.get('search_strings', [entry['title']])] entries = set() for search_string in search_strings: search_string = clean_title(search_string) search_string_url_fragment = search_string params = {'rss': 1} if config.get('verified'): search_string_url_fragment += ' verified:1' url = 'https://kat.cr/usearch/%s/' % quote(search_string_url_fragment.encode('utf-8')) if config.get('category', 'all') != 'all': params['category'] = config['category'] sorters = [{'field': 'time_add', 'sorder': 'desc'}, {'field': 'seeders', 'sorder': 'desc'}] for sort in sorters: params.update(sort) log.debug('requesting: %s' % url) try: r = task.requests.get(url, params=params, raise_status=False) except RequestException as e: log.warning('Search resulted in: %s' % e) continue if not r.content: log.debug('No content returned from search.') continue elif r.status_code != 200: log.warning('Search returned %s response code' % r.status_code) continue rss = feedparser.parse(r.content) ex = rss.get('bozo_exception', False) if ex: log.warning('Got bozo_exception (bad feed)') continue for item in rss.entries: entry = Entry() entry['title'] = item.title if not item.get('enclosures'): log.warning('Could not get url for entry from KAT. Maybe plugin needs updated?') continue entry['url'] = item.enclosures[0]['url'] entry['torrent_seeds'] = int(item.torrent_seeds) entry['torrent_leeches'] = int(item.torrent_peers) entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) entry['content_size'] = int(item.torrent_contentlength) / 1024 / 1024 entry['torrent_info_hash'] = item.torrent_infohash entries.add(entry) if len(rss.entries) < 25: break return entries
def search(self, task, entry, config=None): config = self.process_config(config) feed = REPUTATIONS[config['reputation']] entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string + config.get('extra_terms', '')) for domain in ['eu', 'is']: # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = 'http://torrentz2.%s/%s?f=%s' % (domain, feed, quote(query.encode('utf-8'))) log.debug('requesting: %s' % url) try: r = task.requests.get(url) break except requests.ConnectionError as err: # The different domains all resolve to the same ip, so only try more if it was a dns error log.warning('torrentz.%s connection failed. Error: %s' % (domain, err)) continue except requests.RequestException as err: raise plugin.PluginError('Error getting torrentz search results: %s' % err) else: raise plugin.PluginError('Error getting torrentz search results') if not r.content.strip(): raise plugin.PluginError( 'No data from %s. Maybe torrentz is blocking the FlexGet User-Agent' % url ) rss = feedparser.parse(r.content) if rss.get('bozo_exception'): raise plugin.PluginError('Got bozo_exception (bad rss feed)') for item in rss.entries: m = re.search( r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)', item.description, re.IGNORECASE, ) if not m: log.debug('regexp did not find seeds / peer data') continue entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['content_size'] = int(m.group(1)) entry['torrent_seeds'] = int(m.group(2).replace(',', '')) entry['torrent_leeches'] = int(m.group(3).replace(',', '')) entry['torrent_info_hash'] = m.group(4).upper() entry['torrent_availability'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches'] ) entries.add(entry) log.debug('Search got %d results' % len(entries)) return entries
def encode_url_utf8(url): """Encode the path component of url to percent-encoded UTF8.""" (scheme, netloc, path, params, query, fragment) = urlparse(url) # Assume that the path is already encoded if there seems to be # percent encoded entities. if re.search(r'%[0-9A-Fa-f]{2}', path) is None: path = quote(path.encode('UTF8'), '/+') return urlunparse((scheme, netloc, path, params, query, fragment))
def url_encode(dict_value): if isinstance(dict_value, unicode) or isinstance(dict_value, str): return quote(dict_value.encode('utf8'), safe='') elif isinstance(dict_value, dict): for k, v in list(dict_value.items()): dict_value[k] = url_encode(v) elif isinstance(dict_value, list): for i in range(len(dict_value)): dict_value[i] = url_encode(dict_value[i]) return dict_value
def test_resolve_guid_download_file_export(self): pp = PreprintFactory(finish=True) res = self.app.get(pp.url + 'download?format=asdf') assert res.status_code == 302 assert '{}/export?format=asdf&url='.format(MFR_SERVER_URL) in res.location assert '{}/v1/resources/{}/providers/{}{}%3Faction%3Ddownload'.format(quote(WATERBUTLER_URL), pp._id, pp.primary_file.provider, pp.primary_file.path) in res.location res = self.app.get(pp.url + 'download/?format=asdf') assert res.status_code == 302 assert '{}/export?format=asdf&url='.format(MFR_SERVER_URL) in res.location assert '{}/v1/resources/{}/providers/{}{}%3Faction%3Ddownload'.format(quote(WATERBUTLER_URL), pp._id, pp.primary_file.provider, pp.primary_file.path) in res.location res = self.app.get('/{}/download?format=asdf'.format(pp.primary_file.get_guid(create=True)._id)) assert res.status_code == 302 assert '{}/export?format=asdf&url='.format(MFR_SERVER_URL) in res.location assert '{}/v1/resources/{}/providers/{}{}%3Faction%3Ddownload'.format(quote(WATERBUTLER_URL), pp._id, pp.primary_file.provider, pp.primary_file.path) in res.location res = self.app.get('/{}/download/?format=asdf'.format(pp.primary_file.get_guid(create=True)._id)) assert res.status_code == 302 assert '{}/export?format=asdf&url='.format(MFR_SERVER_URL) in res.location assert '{}/v1/resources/{}/providers/{}{}%3Faction%3Ddownload'.format(quote(WATERBUTLER_URL), pp._id, pp.primary_file.provider, pp.primary_file.path) in res.location pp.primary_file.create_version( creator=pp.creator, location={u'folder': u'osf', u'object': u'deadbe', u'service': u'cloud'}, metadata={u'contentType': u'img/png', u'size': 9001} ) pp.primary_file.save() res = self.app.get(pp.url + 'download/?format=asdf') assert res.status_code == 302 assert '{}/export?format=asdf&url='.format(MFR_SERVER_URL) in res.location assert '{}/v1/resources/{}/providers/{}{}%3F'.format(quote(WATERBUTLER_URL), pp._id, pp.primary_file.provider, pp.primary_file.path) in res.location quarams = res.location.split('%3F')[1].split('%26') assert 'action%3Ddownload' in quarams assert 'version%3D2' in quarams assert 'direct' in quarams res = self.app.get(pp.url + 'download/?format=asdf&version=1') assert res.status_code == 302 assert '{}/export?format=asdf&url='.format(MFR_SERVER_URL) in res.location assert '{}/v1/resources/{}/providers/{}{}%3F'.format(quote(WATERBUTLER_URL), pp._id, pp.primary_file.provider, pp.primary_file.path) in res.location quarams = res.location.split('%3F')[1].split('%26') assert 'action%3Ddownload' in quarams assert 'version%3D1' in quarams assert 'direct' in quarams unpub_pp = PreprintFactory(project=self.node, is_published=False) res = self.app.get(unpub_pp.url + 'download?format=asdf', auth=unpub_pp.creator.auth) assert res.status_code == 302 assert res.status_code == 302 assert '{}/export?format=asdf&url='.format(MFR_SERVER_URL) in res.location assert '{}/v1/resources/{}/providers/{}{}%3F'.format(quote(WATERBUTLER_URL), unpub_pp._id, unpub_pp.primary_file.provider, unpub_pp.primary_file.path) in res.location quarams = res.location.split('%3F')[1].split('%26') assert 'action%3Ddownload' in quarams assert 'version%3D1' in quarams assert 'direct' in quarams
def detect(text): """ Detect the language of a text. Basic usage: >>> from googletrans import translator >>> translator.detect('이 문장은 한글로 쓰여졌습니다.') <Detected lang=ko confidence=0.27041003> >>> translator.detect('この文章は日本語で書かれました。') <Detected lang=ja confidence=0.64889508> >>> translator.detect('This sentence is written in English.') <Detected lang=en confidence=0.22348526> >>> translator.detect('Tiu frazo estas skribita en Esperanto.') <Detected lang=eo confidence=0.10538048> Advanced usage: >>> langs = translator.detect(['한국어', '日本語', 'English', 'le français']) >>> for lang in langs: ... print(lang.lang, lang.confidence) ko 1 ja 0.92929292 en 0.96954316 fr 0.043500196 :param text: the text you want to detect. :rtype: Detected :rtype: list (when list is passed) """ if isinstance(text, list): result = [] for item in text: lang = detect(item) result.append(lang) return result result = '' sess = agent() # acquire requests session origin = text text = quote(text) url = urls.DETECT.format(query=text) r = sess.get(url, headers=__headers) data = format_json(r.text) # actual source language that will be recognized by Google Translator when the # src passed is equal to auto. src = '' confidence = 0.0 try: src = ''.join(data[-1][0]) confidence = data[-1][-1][0] except: pass result = Detected(lang=src, confidence=confidence) return result
def do_search_tvsearch(self, arg_entry, task, config=None): log.info('Searching for %s' % (arg_entry['title'])) # normally this should be used with next_series_episodes who has provided season and episodenumber if 'series_name' not in arg_entry or 'series_season' not in arg_entry or 'series_episode' not in arg_entry: return [] if arg_entry.get('tvrage_id'): lookup = '&rid=%s' % arg_entry.get('tvrage_id') else: lookup = '&q=%s' % quote(arg_entry['series_name']) url = config['url'] + lookup + '&season=%s&ep=%s' % (arg_entry['series_season'], arg_entry['series_episode']) return self.fill_entries_for_url(url, task)
def search(self, task, entry, config): """ Search for entries on 1337x """ if not isinstance(config, dict): config = {} order_by = '' sort_order = '' if isinstance(config.get('order_by'), str): if config['order_by'] != 'leechers': order_by = '/{0}/desc'.format(config['order_by']) sort_order = 'sort-' entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = '{0}search/{1}{2}/1/'.format(sort_order, quote(search_string.encode('utf8')), order_by) log.debug('Using search params: %s; ordering by: %s', search_string, order_by or 'default') try: page = task.requests.get(self.base_url + query) log.debug('requesting: %s', page.url) except RequestException as e: log.error('1337x request failed: %s', e) continue soup = get_soup(page.content) if soup.find('div', attrs={'class': 'tab-detail'}) is not None: for link in soup.find('div', attrs={'class': 'tab-detail'}).findAll('a', href=re.compile('^/torrent/')): li = link.parent.parent.parent title = str(link.text).replace('...', '') info_url = self.base_url + str(link.get('href'))[1:] seeds = int(li.find('span', class_='green').string) leeches = int(li.find('span', class_='red').string) size = str(li.find('div', class_='coll-4').string) size = parse_filesize(size) e = Entry() e['url'] = info_url e['title'] = title e['torrent_seeds'] = seeds e['torrent_leeches'] = leeches e['search_sort'] = torrent_availability(e['torrent_seeds'], e['torrent_leeches']) e['content_size'] = size entries.add(e) return entries
def get_scrape_url(tracker_url, info_hash): if 'announce' in tracker_url: v = urlsplit(tracker_url) result = urlunsplit([v.scheme, v.netloc, v.path.replace('announce', 'scrape'), v.query, v.fragment]) else: log.debug('`announce` not contained in tracker url, guessing scrape address.') result = tracker_url + '/scrape' result += '&' if '?' in result else '?' result += 'info_hash=%s' % quote(binascii.unhexlify(info_hash)) return result
def setUp(self): super().setUp() self.url = BaseUrl.fromString(b'unix://<sockpath>/<path>') sockpath = './integrations/node/http.sock' self.url.netloc = url_parse.quote(sockpath.encode('utf_8'), safe=b'').encode('ascii') self.close_d = t_defer.Deferred() def _close_handler(event): _LOGGER.debug('_close_handler() called with %r', event) return reactor.callLater(0, self.close_d.callback, None) self.close_handler = _close_handler
def search(self, task, entry, config=None): config = self.prepare_config(config) if not session.cookies: log.debug('Logging in to %s...' % URL) params = { 'username': config['username'], 'password': config['password'], 'keeplogged': '1', 'login': '******' } session.post(URL + 'login.php', data=params) cat = ''.join(['&' + ('filter_cat[%s]' % id) + '=1' for id in config['category']]) rls = 'release_type=' + config['type'] url_params = rls + cat multip = config['gravity_multiplier'] entries = set() for search_string in entry.get('search_strings', [entry['title']]): srch = normalize_unicode(clean_title(search_string)) srch = '&searchstr=' + quote(srch.encode('utf8')) url = URL + 'torrents.php?' + url_params + srch log.debug('Fetching URL for `%s`: %s' % (search_string, url)) page = session.get(url).content soup = get_soup(page) for result in soup.findAll('tr', attrs={'class': 'torrent'}): entry = Entry() entry['title'] = result.find('span', attrs={'class': 'torrent_name_link'}).text entry['url'] = URL + result.find('a', href=re.compile('torrents\.php\?action=download')).get('href') entry['torrent_seeds'], entry['torrent_leeches'] = [r.text for r in result.findAll('td')[-2:]] entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) * multip size = result.findAll('td')[-4].text size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size) if size: if size.group(2) == 'GB': entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 ** 3 / 1024 ** 2) elif size.group(2) == 'MB': entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 ** 2 / 1024 ** 2) elif size.group(2) == 'KB': entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 / 1024 ** 2) else: entry['content_size'] = int(float(size.group(1).replace(',', '')) / 1024 ** 2) entries.add(entry) return entries
def search(self, task, entry, config=None): """ Search for name from piratebay. """ if not isinstance(config, dict): config = {} sort = SORT.get(config.get("sort_by", "seeds")) if config.get("sort_reverse"): sort += 1 if isinstance(config.get("category"), int): category = config["category"] else: category = CATEGORIES.get(config.get("category", "all")) filter_url = "/0/%d/%d" % (sort, category) entries = set() for search_string in entry.get("search_strings", [entry["title"]]): query = normalize_unicode(search_string) # TPB search doesn't like dashes query = query.replace("-", " ") # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = "http://thepiratebay.%s/search/%s%s" % (CUR_TLD, quote(query.encode("utf-8")), filter_url) log.debug("Using %s as piratebay search url" % url) page = requests.get(url).content soup = get_soup(page) for link in soup.find_all("a", attrs={"class": "detLink"}): entry = Entry() entry["title"] = self.extract_title(link) if not entry["title"]: log.error("Malformed search result. No title or url found. Skipping.") continue entry["url"] = "http://thepiratebay.%s%s" % (CUR_TLD, link.get("href")) tds = link.parent.parent.parent.find_all("td") entry["torrent_seeds"] = int(tds[-2].contents[0]) entry["torrent_leeches"] = int(tds[-1].contents[0]) entry["search_sort"] = torrent_availability(entry["torrent_seeds"], entry["torrent_leeches"]) # Parse content_size size = link.find_next(attrs={"class": "detDesc"}).contents[0] size = re.search("Size ([\.\d]+)\xa0([GMK])iB", size) if size: if size.group(2) == "G": entry["content_size"] = int(float(size.group(1)) * 1000 ** 3 / 1024 ** 2) elif size.group(2) == "M": entry["content_size"] = int(float(size.group(1)) * 1000 ** 2 / 1024 ** 2) else: entry["content_size"] = int(float(size.group(1)) * 1000 / 1024 ** 2) entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get("search_sort"))
def search(self, task, entry, config=None): """ Search for name from piratebay. """ if not isinstance(config, dict): config = {} sort = SORT.get(config.get('sort_by', 'seeds')) if config.get('sort_reverse'): sort += 1 if isinstance(config.get('category'), int): category = config['category'] else: category = CATEGORIES.get(config.get('category', 'all')) filter_url = '/0/%d/%d' % (sort, category) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) # TPB search doesn't like dashes query = query.replace('-', ' ') # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = 'http://thepiratebay.%s/search/%s%s' % (CUR_TLD, quote(query.encode('utf-8')), filter_url) log.debug('Using %s as piratebay search url' % url) page = requests.get(url).content soup = get_soup(page) for link in soup.find_all('a', attrs={'class': 'detLink'}): entry = Entry() entry['title'] = self.extract_title(link) if not entry['title']: log.error('Malformed search result. No title or url found. Skipping.') continue entry['url'] = 'http://thepiratebay.%s%s' % (CUR_TLD, link.get('href')) tds = link.parent.parent.parent.find_all('td') entry['torrent_seeds'] = int(tds[-2].contents[0]) entry['torrent_leeches'] = int(tds[-1].contents[0]) entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) # Parse content_size size = link.find_next(attrs={'class': 'detDesc'}).contents[0] size = re.search('Size ([\.\d]+)\xa0([GMK])iB', size) if size: if size.group(2) == 'G': entry['content_size'] = int(float(size.group(1)) * 1000 ** 3 / 1024 ** 2) elif size.group(2) == 'M': entry['content_size'] = int(float(size.group(1)) * 1000 ** 2 / 1024 ** 2) else: entry['content_size'] = int(float(size.group(1)) * 1000 / 1024 ** 2) entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))
def search(self, task, entry, config): if not isinstance(config, dict): config = {'category': config} config.setdefault('category', 'anime eng') config.setdefault('filter', 'all') entries = set() for search_string in entry.get('search_strings', [entry['title']]): name = normalize_unicode(search_string) url = 'https://www.nyaa.si/?page=rss&q=%s&c=%s&f=%s' % ( quote(name.encode('utf-8')), CATEGORIES[config['category']], FILTERS.index(config['filter']), ) log.debug('requesting: %s' % url) rss = feedparser.parse(url) status = rss.get('status', False) if status != 200: log.debug('Search result not 200 (OK), received %s' % status) if status >= 400: continue ex = rss.get('bozo_exception', False) if ex: log.error('Got bozo_exception (bad feed) on %s' % url) continue for item in rss.entries: entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['torrent_seeds'] = int(item.nyaa_seeders) entry['torrent_leeches'] = int(item.nyaa_leechers) entry['torrent_info_hash'] = item.nyaa_infohash entry['torrent_availability'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches'] ) if item.nyaa_size: entry['content_size'] = parse_filesize(item.nyaa_size) entries.add(entry) return entries
def search(self, task, entry, config=None): if not isinstance(config, dict): config = {} category = CATEGORIES.get(config.get('category', 'all'), None) category_query = '&cid=%d' % category if category else '' entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) search_query = '&search=%s' % quote(query.encode('utf-8')) url = ('http://extratorrent.cc/rss.xml?type=search%s%s' % (category_query, search_query)) log.debug('Using %s as extratorrent search url' % url) rss = feedparser.parse(url) status = rss.get('status', False) if status != 200: log.debug('Search result not 200 (OK), received %s' % status) if not status or status >= 400: continue for item in rss.entries: entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['content_size'] = int(item.size) / 1024 / 1024 entry['torrent_info_hash'] = item.info_hash if isinstance(item.seeders, int): entry['torrent_seeds'] = int(item.seeders) if isinstance(item.leechers, int): entry['torrent_leeches'] = int(item.leechers) entries.add(entry) return entries
def search(self, task, entry, config): # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand optionlist = ['misc', 'movies', 'audio', 'tv', 'games', 'apps', 'pics', 'anime', 'comics', 'books', 'music video', 'unclassified', 'all'] entries = set() search_strings = [normalize_unicode(s) for s in entry.get('search_strings', [entry['title']])] for search_string in search_strings: url = 'http://isohunt.com/js/rss/%s?iht=%s&noSL' % ( quote(search_string.encode('utf-8')), optionlist.index(config)) log.debug('requesting: %s' % url) rss = feedparser.parse(url) status = rss.get('status', False) if status != 200: raise plugin.PluginWarning('Search result not 200 (OK), received %s' % status) ex = rss.get('bozo_exception', False) if ex: raise plugin.PluginWarning('Got bozo_exception (bad feed)') for item in rss.entries: entry = Entry() entry['title'] = item.title entry['url'] = item.link m = re.search(r'Size: ([\d]+).*Seeds: (\d+).*Leechers: (\d+)', item.description, re.IGNORECASE) if not m: log.debug('regexp did not find seeds / peer data') continue else: log.debug('regexp found size(%s), Seeds(%s) and Leeches(%s)' % (m.group(1), m.group(2), m.group(3))) entry['content_size'] = int(m.group(1)) entry['torrent_seeds'] = int(m.group(2)) entry['torrent_leeches'] = int(m.group(3)) entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) entries.add(entry) return entries
def search(self, task, entry, config): if not isinstance(config, dict): config = {"category": config} config.setdefault("category", "anime eng") config.setdefault("filter", "all") entries = set() for search_string in entry.get("search_strings", [entry["title"]]): name = normalize_unicode(search_string) url = "http://www.nyaa.eu/?page=rss&cats=%s&filter=%s&term=%s" % ( CATEGORIES[config["category"]], FILTERS.index(config["filter"]), quote(name.encode("utf-8")), ) log.debug("requesting: %s" % url) rss = feedparser.parse(url) status = rss.get("status", False) if status != 200: log.debug("Search result not 200 (OK), received %s" % status) if status >= 400: continue ex = rss.get("bozo_exception", False) if ex: log.error("Got bozo_exception (bad feed) on %s" % url) continue for item in rss.entries: entry = Entry() entry["title"] = item.title entry["url"] = item.link # TODO: parse some shit # entry['torrent_seeds'] = int(item.seeds) # entry['torrent_leeches'] = int(item.leechs) # entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) # entry['content_size'] = int(item.size) / 1024 / 1024 entries.add(entry) return entries
def search(self, task, entry, config=None): if not isinstance(config, dict): config = {} category = CATEGORIES.get(config.get("category", "all"), None) category_query = "&cid=%d" % category if category else "" entries = set() for search_string in entry.get("search_strings", [entry["title"]]): query = normalize_unicode(search_string) search_query = "&search=%s" % quote(query.encode("utf-8")) url = "http://extratorrent.cc/rss.xml?type=search%s%s" % (category_query, search_query) log.debug("Using %s as extratorrent search url" % url) rss = feedparser.parse(url) status = rss.get("status", False) if status != 200: log.debug("Search result not 200 (OK), received %s" % status) if not status or status >= 400: continue for item in rss.entries: entry = Entry() entry["title"] = item.title entry["url"] = item.link entry["content_size"] = int(item.size) / 1024 / 1024 entry["torrent_info_hash"] = item.info_hash if isinstance(item.seeders, int): entry["torrent_seeds"] = int(item.seeders) if isinstance(item.leechers, int): entry["torrent_leeches"] = int(item.leechers) entries.add(entry) return entries
def __call__(self, *args, **kwargs): """Generate a URL. Assemble a URL using the positional arguments as URL components and the keyword arguments as the query string. The URL will be relative to the root given to the constructor. args ([object]): the path components (will be cast to strings). kwargs ({str: object}): the query parameters (values will be cast to strings). return (str): the desired URL. """ url = self.url_root for component in args: if not url.endswith("/"): url += "/" url += quote("%s" % component, safe="") if kwargs: url += "?" + urlencode(kwargs) return url
def url_rewrite(self, task, entry): entry['url'] = entry['url'].replace('details.php?id=', 'download.php?id=') entry['url'] += '&name=%s.torrent' % (quote(entry['title'], safe=''))