Beispiel #1
0
    def search(self, what, cat='all'):
        """
        Method called by nova2.

        `what` is the already scaped search string, while `cat` restricts in
        which category the search should be performed.

        For each parsed line of the result, we put it in a dictionary and
        pass it to the prettyPrint function.
        """
        data = retrieve_url('http://psychocydd.co.uk/torrents.php?search=%s'
                            % what)

        soup = BeautifulSoup(data)
        res = soup.find_all('table', attrs={'width': '100%', 'class': 'lista'})
        rows = res[5].find_all('tr')  # by inspection, we want res[5]

        for row in rows[2:]:  # by inspection, we want rows[2:]
            cells = row.find_all('td')

            # Columns of interest, all determined by inspection
            info = {
                'name': cells[1].a.text,
                'link': self.url + '/' + cells[4].a['href'],
                'size': cells[6].text,
                'seeds': cells[7].text,
                'leech': cells[8].text,
                'engine_url': self.url,
                }
            prettyPrinter(info)
	def search(self, what):
		i = 1
		while True:
			res = 0
			dat = urllib.urlopen(self.url+'/search/%s/pg-%i'%(what,i)).read().decode('utf8', 'replace')
			# I know it's not very readable, but the SGML parser feels in pain
			section_re = re.compile("(?s)href='/torrent.*?<tr>")
			torrent_re = re.compile("(?s)href='/torrent.*?>(?P<name>.*?)</a>.*?"
			"title='(?P<seeds>\d+)\sseeders.*?"
			",\s(?P<leech>\d+)\sdownloaders.*?"
			"href='(?P<link>.*?[^']+)'><img.*?src='/images/download.*?")
			for match in section_re.finditer(dat):
				txt = match.group(0)
				m = torrent_re.search(txt)
				if m:
					torrent_infos = m.groupdict()
					torrent_infos['engine_url'] = self.url
					torrent_infos['link'] = self.url+torrent_infos['link']
					# This is a hack to return -1
					# Size is not provided by shareTV
					torrent_infos['size'] = -1
					prettyPrinter(torrent_infos)
					res = res + 1
			if res == 0:
				break
			i = i + 1
Beispiel #3
0
	def search(self, what, cat='all'):
		# Remove {} since isohunt does not seem
		# to handle those very well
		what = what.replace('{', '').replace('}', '')
		i = 1
		while True and i<11:
			res = 0
			dat = retrieve_url(self.url+'/torrents.php?ihq=%s&iht=%s&ihp=%s&ihs1=2&iho1=d'%(what, self.supported_categories[cat],i))
			# I know it's not very readable, but the SGML parser feels in pain
			section_re = re.compile('(?s)id=link.*?</tr><tr')
			torrent_re = re.compile('(?s)torrent_details/(?P<link>.*?[^/]+).*?'
			'>(?P<name>.*?)</a>.*?'
			'>(?P<size>[\d,\.]+\s+MB)</td>.*?'
			'>(?P<seeds>\d+)</td>.*?'
			'>(?P<leech>\d+)</td>')
			for match in section_re.finditer(dat):
				txt = match.group(0)
				m = torrent_re.search(txt)
				if m:
					torrent_infos = m.groupdict()
					torrent_infos['name'] = re.sub('<.*?>', '', torrent_infos['name'])
					torrent_infos['engine_url'] = self.url
					torrent_code = torrent_infos['link']
					torrent_infos['link'] = self.url + '/download/' + torrent_code
					torrent_infos['desc_link'] = self.url + '/torrent_details/' + torrent_code + '/dvdrip?tab=summary'
					prettyPrinter(torrent_infos)
					res = res + 1
			if res == 0:
				break
			i = i + 1
Beispiel #4
0
        def handle_data(self, data):
            if self.td_counter == 0:
                if "name" not in self.current_item:
                    self.current_item["name"] = ""
                self.current_item["name"] += data
            elif self.td_counter == 4:
                if "size" not in self.current_item:
                    self.current_item["size"] = data.strip()
                    if self.current_item["size"] == "Pending":
                        self.current_item["size"] = ""
            elif self.td_counter == 5:
                if "seeds" not in self.current_item:
                    self.current_item["seeds"] = data.strip().replace(",", "")
                    if not self.current_item["seeds"].isdigit():
                        self.current_item["seeds"] = 0
            elif self.td_counter == 6:
                if "leech" not in self.current_item:
                    self.current_item["leech"] = data.strip().replace(",", "")
                    if not self.current_item["leech"].isdigit():
                        self.current_item["leech"] = 0

                # display item
                self.td_counter = None
                self.current_item["engine_url"] = self.url
                if self.current_item["name"].find(u" \xbb"):
                    self.current_item["name"] = self.current_item["name"].split(u" \xbb")[0]
                self.current_item["link"] += "&" + urlencode({"dn": self.current_item["name"].encode("utf-8")})

                prettyPrinter(self.current_item)
                self.results.append("a")
Beispiel #5
0
    def search(self, what, cat='all'):
        req = urllib.unquote(what)
        i = 0
        results = 0
        while i < 3:
            data = retrieve_url('https://api.btdigg.org/api/public-8e9a50f8335b964f/s01?%s' % urllib.urlencode(dict(q = req, p = i)))
            for line in data.splitlines():
                if line.startswith('#'):
                    continue

                info_hash, name, files, size, dl, seen = line.strip().split('\t')[:6]
                name = name.replace('|', '')

                res = dict(link = 'magnet:?xt=urn:btih:%s&dn=%s' % (info_hash, urllib.quote(name.encode('utf8'))),
                           name = name,
                           size = size,
                           seeds = int(dl),
                           leech = int(dl),
                           engine_url = self.url,
                           desc_link = '%s/search?%s' % (self.url, urllib.urlencode(dict(info_hash = info_hash, q = req))))

                prettyPrinter(res)
                results += 1

            if results == 0:
                break
            i += 1
    def parse_search(self, what, start=0, first_page=True):
        """Search for what starting on specified page. Defaults to first page of results."""
        logging.debug("parse_search({}, {}, {})".format(what, start, first_page))
        # Search.
        parser = self.Parser(self.download_url, first_page)
        try:
            response = self.opener.open('{}?nm={}&start={}'.format(self.search_url, quote(what), start))
            # Only continue if response status is OK.
            if response.getcode() != 200:
                raise HTTPError(response.geturl(), response.getcode(), "HTTP request to {} failed with status: {}".format(self.login_url, response.getcode()), response.info(), None)
        except (URLError, HTTPError) as e:
            logging.error(e)
            return
        
        data = response.read().decode('cp1251')
        parser.feed(data)
        parser.close()
        
        # PrettyPrint each torrent found.
        for torrent in parser.results:
            torrent['engine_url'] = self.url
            if __name__ != "__main__": # This is just to avoid printing when I debug.
                prettyPrinter(torrent)

        # If no torrent were found, stop immediately
        if parser.tr_counter == 0:
            return
            
        # Else return number of torrents found
        return (parser.tr_counter, parser.other_pages)
  def search(self, what, cat='all'):
    ret = []
    i = 1
    while True and i<11:
      results = []
      json_data = retrieve_url(self.url+'/json.php?q=%s&page=%d'%(what, i))
      try:
        json_dict = json.loads(json_data)
      except:
	i += 1
	continue
      if int(json_dict['total_results']) <= 0: return
      results = json_dict['list']
      for r in results:
        try:
          if cat != 'all' and self.supported_categories[cat] != r['category']: continue
          res_dict = dict()
          res_dict['name'] = r['title']
          res_dict['size'] = str(r['size'])
          res_dict['seeds'] = r['seeds']
          res_dict['leech'] = r['leechs']
          res_dict['link'] = r['torrentLink']
          res_dict['desc_link'] = r['link']
          res_dict['engine_url'] = self.url
          prettyPrinter(res_dict)
        except:
          pass
      i += 1
Beispiel #8
0
    def search(self, what, cat='all'):
        req = urllib.parse.unquote(what)
        what_list = req.split()
        i = 0
        results = 0
        while i < 3:
            u = urllib.request.urlopen('https://api.btdigg.org/api/public-8e9a50f8335b964f/s01?%s' % urllib.parse.urlencode(dict(q = req, p = i)))
            for line in u:
                try:
                    line = line.decode('utf8')
                    if line.startswith('#'):
                        continue

                    info_hash, name, files, size, dl, seen = line.strip().split('\t')[:6]
                    name = name.replace('|', '')
                    # BTDigg returns unrelated results, we need to filter
                    if not all(word in name.lower() for word in what_list):
                        continue

                    res = dict(link = 'magnet:?xt=urn:btih:%s&dn=%s' % (info_hash, urllib.parse.quote(name)),
                               name = name,
                               size = size,
                               seeds = int(dl),
                               leech = int(dl),
                               engine_url = self.url,
                               desc_link = '%s/search?%s' % (self.url, urllib.parse.urlencode(dict(info_hash = info_hash, q = req))))

                    prettyPrinter(res)
                    results += 1
                except:
                    pass

            if results == 0:
                break
            i += 1
    def search(self, what, cat="all"):
        json_data = retrieve_url(
            "".join(
                (
                    self.url,
                    "api/v2/torrents/search/?phrase=",
                    what,
                    "&category=",
                    self.supported_categories.get(cat, ""),
                )
            )
        )
        json_dict = json.loads(json_data)

        if json_dict["results"] < 1:
            return

        for r in json_dict["torrents"]:
            r_dict = {
                "link": r["magnet_uri"],
                "name": r["torrent_title"],
                "size": str(r["size"]) + "B",
                "seeds": r["seeds"],
                "leech": r["leeches"],
                "desc_link": r["page"],
                "engine_url": self.url,
            }
            prettyPrinter(r_dict)
 def search(self, what, cat='all'):
     try:
         self._sign_in()
         opener = self._opener
         data = opener.open(self.url + '/forum/tracker.php?nm=%s' % (urllib.quote(what.decode('utf8').encode('cp1251'))))\
                      .read()
         document = lxml.html.document_fromstring(data)
         info = {'engine_url': self.url}
         for t in document.cssselect('tr.tCenter'):
             try:
                 a = t.xpath('.//a[contains(@href,"dl.php?t=")]')[0]
                 info.update(
                     name = (self.prefix
                             + t.xpath('.//a[contains(@href,"tracker.php?f=")]')[0].text_content()
                             + ' - '
                             + t.xpath('.//a[contains(@href,"viewtopic.php?t=")]')[0].text_content()),
                     link = self.download_url + a.attrib['href'],
                     size = a.text_content().replace(u'\xa0', ' ').replace(u' \u2193', ''),
                     seeds = t.xpath('.//td[contains(@class,"seed")]')[0].text_content(),
                     leech = t.xpath('.//td[contains(@class,"leech")]')[0].text_content()
                 )
                 prettyPrinter(info)
             except IndexError:
                 pass
     except Exception:
         try:
             with open(self.exc_log, 'a') as fo:
                 fo.write(traceback.format_exc())
         except Exception:
             pass
Beispiel #11
0
 def handle_data(self, data):
     if not self.current_item is None:
         if self.size_found:
             #with utf-8 you're going to have something like that: ['Uploaded', '10-02'], ['15:31,', 'Size', '240.34'], ['MiB,', 'ULed', 'by']
             temp = data.split()
             if 'Size' in temp:
                 sizeIn = temp.index('Size')
                 self.current_item['size'] = temp[sizeIn + 1]
                 self.size_found = False
                 self.unit_found = True
         elif self.unit_found:
             temp = data.split()
             self.current_item['size'] = ' '.join((self.current_item['size'], temp[0]))
             self.unit_found = False
         elif self.seed_found:
             self.current_item['seeds'] += data.rstrip()
         elif self.leech_found:
             self.current_item['leech'] += data.rstrip()
             self.current_item['engine_url'] = self.url
             prettyPrinter(self.current_item)
             PREVIOUS_IDS.add(self.current_item['id'])
             self.results.append('a')
             self.current_item = None
             self.size_found = False
             self.unit_found = False
             self.seed_found = False
             self.leech_found = False
Beispiel #12
0
 def search(self, what, cat='all'):
     i = 1
     while True and i<11:
         results = []
         url = self.url+'/api/list.json?sort=seeds&limit=50&keywords=%s&set=%s&genre=%s'%(what, i, self.supported_categories[cat])
         json_data = retrieve_url(url)
         try:
             json_dict = json.loads(json_data)
         except:
             i += 1
             continue
         try:
             results = json_dict['MovieList']
         except KeyError:
             return
         else:
             for r in results:
                 res_dict = dict()
                 res_dict['name'] = r['MovieTitle']
                 res_dict['size'] = r['Size']
                 res_dict['seeds'] = r['TorrentSeeds']
                 res_dict['leech'] = r['TorrentPeers']
                 res_dict['link'] = r['TorrentUrl']
                 res_dict['desc_link'] = r['MovieUrl']
                 res_dict['engine_url'] = self.url
                 prettyPrinter(res_dict)
         i += 1
Beispiel #13
0
        def handle_starttag(self, tag, attrs):
            params = dict(attrs)
            if tag == 'a' and 'href' in params:
                if 'en/details/' in params['href'] and (self.td_counter is None or self.td_counter > 5):
                    self.current_item = {}
                    self.td_counter = 0
                    self.current_item['desc_link'] = params['href']
                elif params['href'].startswith('http://torrents.sumotorrent.sx/download/'):
                    parts = params['href'].strip().split('/')
                    self.current_item['link'] = self.url + '/torrent_download/'+parts[-3]+'/'+parts[-2]+'/'+quote(parts[-1]).replace('%20', '+')

            elif tag == 'td' and isinstance(self.td_counter,int):
                self.td_counter += 1
                if self.td_counter > 6:
                    # Display item
                    self.td_counter = None

                    self.current_item['engine_url'] = self.url
                    if not self.current_item['seeds'].isdigit():
                        self.current_item['seeds'] = 0
                    if not self.current_item['leech'].isdigit():
                        self.current_item['leech'] = 0

                    self.current_item['name'] = self.current_item['name'].strip()
                    try: #python2
                        self.current_item['name'] = self.current_item['name'].decode('utf8')
                    except:
                        pass

                    prettyPrinter(self.current_item)
                    self.results.append('a')
Beispiel #14
0
        def handle_endtag(self, tag): 
            if tag == "script":
                return
            if tag == "div":
                if self.meta_data_grabbing > 0:
                    
                    self.torrent_no_files = self.meta_data_array[2] # Not used
                    self.torrent_date_added = self.meta_data_array[4] # Not used
                    self.torrent_popularity = self.meta_data_array[6] # Not used

                    self.current_item["size"] = self.meta_data_array[0]
                    self.current_item["name"] = self.torrent_name
                    self.current_item["engine_url"] = self.url
                    self.current_item["link"] = self.mangnet_link
                    self.current_item["desc_link"] = self.desc_link
                    self.current_item["seeds"] = -1
                    self.current_item["leech"] = -1

                    prettyPrinter(self.current_item)
                    self.results.append('a')
                    self.current_item = {}

                    self.meta_data_grabbing = 0
                    self.meta_data_array = []
                    self.mangnet_link = ""
                    self.desc_link = ""
                    self.torrent_name = ""
	def search(self, what):
		i = 1
		while True:
			res = 0
			dat = urllib.urlopen(self.url+'/index.php?q=%s&p=%d'%(what,i)).read().decode('utf8', 'replace')
			print "url is "+self.url+'/index.php?q=%s&p=%d'%(what,i)
			# I know it's not very readable, but the SGML parser feels in pain
			section_re = re.compile('(?s)<a class="search_a_news".*?</li>')
			torrent_re = re.compile('(?s)<a class="search_a_news" href="(?P<link>.*?[^"]+).*?'
			'Titre : (?P<name>.*?)- Comm.*?'
			'Taille : (?P<size>.*?)</p></li>')
			for match in section_re.finditer(dat):
				txt = match.group(0)
				m = torrent_re.search(txt)
				if m:
					torrent_infos = m.groupdict()
					torrent_infos['name'] = re.sub('</?span.*?>', '', torrent_infos['name'])
					torrent_infos['engine_url'] = self.url
                                        torrent_infos['seeds'] = -1
                                        torrent_infos['leech'] = -1
					prettyPrinter(torrent_infos)
					res = res + 1
			if res == 0:
				break
			i = i + 1
Beispiel #16
0
        def handle_data(self, data):
            if self.td_counter == 0:
                if 'name' not in self.current_item:
                    self.current_item['name'] = ''
                self.current_item['name'] += data
            elif self.td_counter == 3:
                if 'size' not in self.current_item:
                    self.current_item['size'] = data.strip()
                    if self.current_item['size'] == 'Pending':
                        self.current_item['size'] = ''
            elif self.td_counter == 4:
                if 'seeds' not in self.current_item:
                    self.current_item['seeds'] = data.strip().replace(',', '')
                    if not self.current_item['seeds'].isdigit():
                        self.current_item['seeds'] = 0
            elif self.td_counter == 5:
                if 'leech' not in self.current_item:
                    self.current_item['leech'] = data.strip().replace(',', '')
                    if not self.current_item['leech'].isdigit():
                        self.current_item['leech'] = 0

                # display item
                self.td_counter = None
                self.current_item['engine_url'] = self.url
                if self.current_item['name'].find(' »'):
                    self.current_item['name'] = self.current_item['name'].split(' »')[0]
                self.current_item['link'] += '&' + urlencode({'dn' : self.current_item['name']})

                prettyPrinter(self.current_item)
                self.results.append('a')
 def search(self, what, cat='all'):
     """Search for what on the search engine."""
     # Instantiate parser
     self.parser = self.Parser(self)
     
     # Decode search string
     what = unquote(what)
     logging.info("Searching for {}...".format(what))
     
     # Search on first page.
     logging.info("Parsing page 1.")
     self.parser.search(what)
         
     # If multiple pages of results have been found, repeat search for each page.
     logging.info("{} pages of results found.".format(len(self.parser.other_pages)+1))
     for start in self.parser.other_pages:
         logging.info("Parsing page {}.".format(int(start)//50+1))
         self.parser.search(what, start)
     
     # PrettyPrint each torrent found, ordered by most seeds
     self.parser.results.sort(key=lambda torrent:torrent['seeds'], reverse=True)
     for torrent in self.parser.results:
         torrent['engine_url'] = 'https://rutracker.org' # Kludge, see #15
         if __name__ != "__main__": # This is just to avoid printing when I debug.
             prettyPrinter(torrent)
     
     self.parser.close()
     logging.info("{} torrents found.".format(len(self.parser.results)))
Beispiel #18
0
 def start_td(self,attr):
     if isinstance(self.td_counter,int):
       self.td_counter += 1
       if self.td_counter > 3:
         self.td_counter = None
         self.current_item["engine_url"] = self.url
         prettyPrinter(self.current_item)
         self.results.append("a")
Beispiel #19
0
 def handle_endtag(self, tag):
     if tag == 'tr' and 'link' in self.current_item:
         # display item
         self.td_counter = None
         self.current_item['engine_url'] = self.url
         self.current_item['size'] = ''
         self.current_item['name'] = self.current_item['name'].strip()
         prettyPrinter(self.current_item)
         self.results.append('a')
Beispiel #20
0
 def search(self, what, cat='all'):
     start = 0
     while True:
         ds = list(self.search_page(what, cat, start))
         if not ds:
             break
         for d in ds:
             prettyPrinter(d)
         start += 25
Beispiel #21
0
 def handle_endtag(self, tag):
     """ Parser's end tag handler """
     if tag == "tr" and self.current_item:
         self.current_item["engine_url"] = self.url
         prettyPrinter(self.current_item)
         self.current_item = None
     elif self.cur_item_name:
         if tag == "a" or tag == "td":
             self.cur_item_name = None
Beispiel #22
0
 def search(self, what, cat='all'):
     start = 0
     f = True
     while f and start < 51:
         f = False
         for d in self.search_page(what, cat, start):
             prettyPrinter(d)
             f = True
         start += 1
Beispiel #23
0
 def handle_endtag(self, tag):
     if tag == "article":
         self.article_found = False
     elif self.item_name and (tag == "a" or tag == "td"):
         self.item_name = None
     elif self.item_found and tag == "tr":
         self.item_found = False
         if not self.item_bad:
             prettyPrinter(self.current_item)
         self.current_item = {}
 def search(self, what, cat='all'):
     json_data = retrieve_url(self.url + 'api/v2/torrents/search/?phrase=' + what +
                              '&category=' + self.supported_categories.get(cat, ''))
     json_dict = json.loads(json_data)
     if json_dict['results'] < 1:
         return
     for r in json_dict['torrents']:
         r_dict = {'link': r['magnet_uri'], 'name': r['torrent_title'], 'size': str(r['size']) + 'B',
                   'seeds': r['seeds'], 'leech': r['leeches'], 'desc_link': r['page'], 'engine_url': self.url}
         prettyPrinter(r_dict)
Beispiel #25
0
 def search(self, what, cat='all'):
     start = 1
     f = True
     while f and start < 21:
         page_results = self.search_page(what, cat, start)
         for d in page_results:    
             prettyPrinter(d)
         if len(page_results) < 24:
             f = False
         start += 1
Beispiel #26
0
 def search(self, what, cat='all'):
     start = 0
     f = True
     while f:
         f = False
         for d in self.search_page(what, cat, start):
             if __name__ != "__main__":
                 prettyPrinter(d)
             f = True
         start += 1
  def search(self, what, cat='all'):
    # Get token
    baseURL = "https://torrentapi.org/pubapi_v2.php?%s"
    params = urlencode({'get_token': 'get_token', 'app_id' : 'qbittorrent'})
    response = retrieve_url(baseURL % params)
    j = json.loads(response)
    token = j['token']

    sleep(2.1)

    # get JSON

    what = unquote(what)
    category = self.supported_categories[cat]
    params = urlencode({
                        'mode': 'search',
                        'search_string': what,
                        'ranked': 0,
                        'category': category,
                        'limit': 100,
                        'sort': 'seeders',
                        'format': 'json_extended',
                        'token': token,
                        'app_id' : 'qbittorrent'
                        })

    response = retrieve_url(baseURL % params)
    j = json.loads(response)

    for i in j['torrent_results']:

      tbytes = float(i['size'])
      size = "-1"

      if tbytes > 1024 * 1024 * 1024:
        size = "%.1f GB" % (tbytes / (1024 * 1024 * 1024))

      elif tbytes > 1024 * 1024:
        size = "%.1f MB" % (tbytes / (1024 * 1024))

      elif tbytes > 1024:
        size = "%.1f KB" % (tbytes / 1024)

      else:
        size = "%.1f B" % (tbytes)

      res = dict(link=i['download'],
                 name=i['title'],
                 size=size,
                 seeds=i['seeders'],
                 leech=i['leechers'],
                 engine_url=self.url,
                 desc_link=i['info_page'])

      prettyPrinter(res)
Beispiel #28
0
 def end_tr(self):
   if self.td_counter == 5:
     self.td_counter = None
     # Display item
     if self.current_item and self.current_item.has_key('link'):
       self.current_item['engine_url'] = self.url
       if not self.current_item['seeds'].isdigit():
         self.current_item['seeds'] = 0
       if not self.current_item['leech'].isdigit():
         self.current_item['leech'] = 0
       prettyPrinter(self.current_item)
       self.results.append('a')
Beispiel #29
0
 def start_td(self,attr):
     if isinstance(self.td_counter,int):
       self.td_counter += 1
       if self.td_counter > 7:
         self.td_counter = None
         if self.current_item:
           self.current_item['engine_url'] = self.url
           if not self.current_item['seeds'].isdigit():
             self.current_item['seeds'] = 0
           if not self.current_item['leech'].isdigit():
             self.current_item['leech'] = 0
           prettyPrinter(self.current_item)
           self.results.append('a')
 def handle_endtag(self, tag):
   if self.insideRow:
     if tag == "div":
       self.torrentrowDepth -= 1
       if self.torrentrowDepth < 0:
         self.insideRow = False
         self.crtTorrent["name"] = ("__FREELEECH__" if self.isFree else "") + self.torrentRow["c2"]
         self.crtTorrent["size"] =  str(int(round (float(self.torrentRow["c7"]) * 1024 * 1024)))
         self.crtTorrent["seeds"] = self.torrentRow["c9"]
         self.crtTorrent["leech"] = self.torrentRow["c10"]
         self.crtTorrent["engine_url"] = self.url
         prettyPrinter(self.crtTorrent)
         self.results.append('a')
Beispiel #31
0
        def handle_endtag(self, tag):
            if self.item_name == 'name' and tag == self.SPAN:
                self.find_data = True
                self.end_name = True

            if self.inside_tr and tag == self.TR:
                self.inside_tr = False
                self.item_name = None
                self.find_data = False
                self.seed_found = False
                self.leech_found = False
                array_length = len(self.current_item)
                if array_length < 1:
                    return
                prettyPrinter(self.current_item)
                self.current_item = {}
Beispiel #32
0
 def handle_data(self, data):
     """ Parser's data handler """
     if self.save_data:
         if self.save_data == "name":
             # names with special characters like '&' are splitted in several pieces
             if 'name' not in self.current_item:
                 self.current_item['name'] = ''
             self.current_item['name'] += data
         else:
             self.current_item[self.save_data] = data
             self.save_data = None
         if self.current_item.__len__() == 7:
             self.current_item["size"] = self.size_repl.sub(
                 "", self.current_item["size"])
             prettyPrinter(self.current_item)
             self.current_item = None
Beispiel #33
0
 def handle_endtag(self, tag):
     if tag == 'td':
         self.insideTd = False
         self.insideDataTd = False
     if tag == 'tr':
         self.tdCount = -1
         if len(self.singleResData) > 0:
             # ignore trash stuff
             if self.singleResData['name'] != '-1':
                 # ignore those with link and desc_link equals to -1
                 if (self.singleResData['desc_link'] != '-1'
                         or self.singleResData['link'] != '-1'):
                     prettyPrinter(self.singleResData)
                     self.pageRes.append(self.singleResData)
                     self.fullResData.append(self.singleResData)
             self.singleResData = self.getSingleData()
Beispiel #34
0
    def pretty_print_results(self, results):
        for result in results:
            temp_result = {
                'name': result['title'],
                'size': result['size'],
                'seeds': result['seed'],
                'leech': result['leech'],
                'engine_url': self.url,
                'desc_link': result['pageLink']
            }
            try:
                temp_result['link'] = result['magnetLink']
            except KeyError:
                temp_result['link'] = str(-1)

            prettyPrinter(temp_result)
Beispiel #35
0
 def handle_data(self, data):
     if self.handle_that_data:
         if self.save_data == "name":
             if 'name' not in self.current_item:
                 self.current_item["name"] = ""
             self.current_item["name"] += data
         else:
             self.current_item[self.save_data] = data
         # all data collected
         if self.current_item.__len__() == 7:
             # remove preceding whitespaces
             self.current_item["name"] = self.name_repl.sub(
                 "", self.current_item["name"])
             prettyPrinter(self.current_item)
             self.current_item = None
             self.save_data = None
Beispiel #36
0
 def __get_dic_lis(self, desc_link):
     url = desc_link[0]
     response = self.__urlGet(url)
     msgs = findall(
         r'<td colspan="6">.*?<a href="([^"]+)".*?/>([^>]+)</a>.*?<td> *(\d+) *次</td>\s*<td class="grey">(.*?)</td>',
         response, S)
     for i in msgs:
         link, lench, name = i[0], i[2], f'[更新:{i[-1]}]{i[1]}'
         try:
             size = search(r'\d+\.?\d* ?(?:G|M|K)(?=B?]?)', desc_link[1])[0] + 'B'
         except:
             size = '-1'
         link = quote(link.replace('dialog', 'download').replace('-ajax-1', ''), safe='/:')
         dic = {'name': name, 'seeds': '-1', 'leech': lench, 'size': size, 'link': link, 'desc_link': url,
                'engine_url': self.url}
         prettyPrinter(dic)
Beispiel #37
0
 def handle_endtag(self, tag):
     if tag == 'li':
         self.titleFound = False
         self.tagCount = -1
         if len(self.singleResData) > 0:
             # ignore trash stuff
             if self.singleResData['name'] != '':
                 # ignore those with desc_link equals to -1
                 if self.singleResData['desc_link'] != '-1':
                     # remove trash from name
                     self.singleResData['name'] = self.clearName(
                         self.singleResData['name'])
                     prettyPrinter(self.singleResData)
                     self.pageRes.append(self.singleResData)
                     self.fullResData.append(self.singleResData)
             self.singleResData = self.getSingleData()
Beispiel #38
0
    def draw(self, html: str):
        torrents = RE_TORRENTS.findall(html)
        for tor in torrents:
            local = time.strftime("%y.%m.%d", time.localtime(int(tor[5])))
            torrent_date = f"[{local}] " if config['torrentDate'] else ""

            prettyPrinter({
                "engine_url": self.url,
                "desc_link": self.url + "viewtopic.php?t=" + tor[0],
                "name": torrent_date + unescape(tor[1]),
                "link": self.url_dl + tor[0],
                "size": tor[2],
                "seeds": max(0, int(tor[3])),
                "leech": tor[4]
            })
        del torrents
Beispiel #39
0
    def draw(self, html: str):
        torrents = re.findall(PATTERNS[1], html, re.S)
        for tor in torrents:
            local = time.strftime("%y.%m.%d", time.localtime(int(tor[6])))
            torrent_date = f"[{local}] " if config['torrentDate'] else ""

            prettyPrinter({
                "engine_url": self.url,
                "desc_link": self.url + tor[0],
                "name": torrent_date + unescape(tor[1]),
                "link": self.url + tor[2],
                "size": unescape(tor[3]),
                "seeds": tor[4] if tor[4].isdigit() else '0',
                "leech": tor[5]
            })
        del torrents
Beispiel #40
0
    def search(self, what, cat='all'):
        base_url = "https://torrentapi.org/pubapi_v2.php?%s"

        # get token
        params = urlencode({'get_token': 'get_token', 'app_id': 'qbittorrent'})
        response = retrieve_url(base_url % params)
        j = json.loads(response)
        token = j['token']
        time.sleep(2.1)

        # get response json
        what = unquote(what)
        category = self.supported_categories[cat]
        params = urlencode({
            'mode': 'search',
            'search_string': what,
            'ranked': 0,
            'category': category,
            'limit': 100,
            'sort': 'seeders',
            'format': 'json_extended',
            'token': token,
            'app_id': 'qbittorrent'
        })
        response = retrieve_url(base_url % params)
        j = json.loads(response)

        # parse results
        for result in j['torrent_results']:
            res = {
                'link':
                result['download'],
                'name':
                result['title'],
                'size':
                str(result['size']) + " B",
                'seeds':
                result['seeders'],
                'leech':
                result['leechers'],
                'engine_url':
                self.url,
                'desc_link':
                "%s&%s" %
                (result['info_page'], urlencode({'app_id': 'qbittorrent'}))
            }
            prettyPrinter(res)
Beispiel #41
0
    def search(self, what, cat='all'):
        """ Performs search """
        if cat != 'all':
            return

        search_url = "http://academictorrents.com/browse.php?search={what}"

        url = search_url.format(what=what)

        while url:
            response = retrieve_url(url)
            parser = MyHTMLParser()
            parser.feed(response)
            results = parser.get_results()
            for match in results["torrents"]:
                prettyPrinter(match)
            url = results["next_page"]
Beispiel #42
0
 def search(self, what, cat="all"):
     """ Performs search """
     page = 1
     while page < 11:
         query = "".join((self.url, "/search?q=", what, "+category%3A",
                          self.supported_categories[cat], "&fmt=rss"))
         if page > 1:
             query = query + "&pg=" + str(page)
         response = retrieve_url_nodecode(query)
         xmldoc = minidom.parseString(response)
         itemlist = xmldoc.getElementsByTagName('item')
         if len(itemlist) == 0:
             return
         for item in itemlist:
             zooqle_dict = zooqle_dict = {"engine_url": self.url}
             zooqle_dict['name'] = (
                 item.getElementsByTagName('title')[0].childNodes[0].data)
             zooqle_dict["size"] = (item.getElementsByTagName(
                 'enclosure')[0].attributes['length'].childNodes[0].data)
             if zooqle_dict["size"] == '0':
                 zooqle_dict["link"] = (item.getElementsByTagName(
                     'torrent:magnetURI')[0].childNodes[0].data)
             else:
                 zooqle_dict["link"] = (item.getElementsByTagName(
                     'enclosure')[0].attributes['url'].value)
             zooqle_dict["desc_link"] = (
                 item.getElementsByTagName('link')[0].childNodes[0].data)
             zooqle_dict["leech"] = (item.getElementsByTagName(
                 'torrent:peers')[0].childNodes[0].data)
             if not zooqle_dict["leech"].isdigit():
                 zooqle_dict["leech"] = ''
             zooqle_dict["seeds"] = (item.getElementsByTagName(
                 'torrent:seeds')[0].childNodes[0].data)
             if not zooqle_dict["seeds"].isdigit():
                 zooqle_dict["seeds"] = ''
             prettyPrinter(zooqle_dict)
         totalResultVal = (xmldoc.getElementsByTagName(
             'opensearch:totalResults')[0].childNodes[0].data)
         startIndex = (xmldoc.getElementsByTagName('opensearch:startIndex')
                       [0].childNodes[0].data)
         itemsPerPage = (xmldoc.getElementsByTagName(
             'opensearch:itemsPerPage')[0].childNodes[0].data)
         if (int(startIndex) + int(itemsPerPage)) > int(totalResultVal):
             return
         page += 1
     return
    def search(self, what, cat='all'):
        """ Performs search """
        # prepare query
        cat = self.supported_categories[cat.lower()]
        query = "".join((self.url, "/files/?category=", cat, "&subcategory=All&quality=All&seeded=2&external=2&query=",
                         what, "&to=1&uid=0&sort=S"))

        data = retrieve_url(query)
        add_res_list = re_compile("/files.*page=[0-9]+")
        try:
            data = self.torrent_list.search(data).group(0)
        except AttributeError:
            if 'You must be logged in to to that!' in data:
                prettyPrinter({
                    'seeds': -1,
                    'size': -1,
                    'leech': -1,
                    'engine_url': self.url,
                    'link': self.url,
                    'desc_link': query,
                    'name': 'It seems demonoid.pw is private at the moment. / ' + what
                })
            return
        list_results = add_res_list.findall(data)

        parser = self.MyHtmlParseWithBlackJack(self.url)
        parser.feed(data)

        del data

        if list_results:
            # handling each gamepage in parallel, to not waste time on waiting for requests
            # for 10 pages this speeds up from  6.5s to  1.9s run time
            threads = []
            search_queries = islice(
                (add_res_list.search(result).group(0) for result in list_results[1].split(" | ")), 0, 10)
            for search_query in search_queries:
                t = threading.Thread(target=self.handle_page, args=(search_query,))
                threads.append(t)
                t.start()

            # search method needs to stay alive until all threads are done
            for t in threads:
                t.join()
        return
Beispiel #44
0
 def search(self, what, cat='all'):
     # what is a string with the search tokens, already escaped (e.g. "Ubuntu+Linux")
     # cat is the name of a search category in ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books')
     # q - query, f - filter, c - category
     base_url = 'https://sukebei.nyaa.si/?q=%s&f=0&c=0_0'
     base_url_with_query = base_url % what
     response = retrieve_url(base_url_with_query)
     soup = BeautifulSoup(response, 'html.parser')
     pagination_info = soup.find('div', {'class': 'pagination-page-info'})
     PATTERN = 'Displaying results 1-(\d+) out of (\d+) results'
     parsed_pattern = re.search(PATTERN, pagination_info.text)
     item_per_pages = parsed_pattern.group(1)
     total_page = parsed_pattern.group(2)
     number_of_page = math.ceil(float(total_page) / float(item_per_pages))
     for i in range(0, int(number_of_page)):
         base_url_with_query_and_page = base_url_with_query + '&p=%s' % str(
             i + 1)
         response = retrieve_url(base_url_with_query_and_page)
         soup = BeautifulSoup(response, 'html.parser')
         table = soup.find('table')
         table_body = table.find('tbody')
         rows = table_body.find_all('tr')
         for row in rows:
             tds = row.find_all('td')
             ref = tds[1].find('a').get('href')
             title = tds[1].find('a').text
             link = tds[2].find_all('a')[-1].get('href')
             _size = tds[3].text
             size = _size[:-3]
             unit = _size[-3:]
             sizeInBytes = 0
             if unit == "GiB":
                 sizeInBytes = float(size) * 1073741824
             elif unit == "MiB":
                 sizeInBytes = float(size) * 1000000
             seeders = tds[5].text
             leechers = tds[6].text
             res = dict(link=link,
                        name=title,
                        size=str(sizeInBytes),
                        seeds=seeders,
                        leech=leechers,
                        engine_url=self.url,
                        desc_link=self.url + ref)
             prettyPrinter(res)
Beispiel #45
0
    def search(self, what, cat='all'):
        search_url = "{}/service/search?size=300&q={}".format(self.url, what)
        desc_url = "{}/#/search/torrent/{}/1".format(self.url, what)

        # get response json
        response = retrieve_url(search_url)
        response_json = json.loads(response)

        # parse results
        for result in response_json:
            res = {'link': self.download_link(result),
                   'name': result['name'],
                   'size': str(result['size_bytes']) + " B",
                   'seeds': result['seeders'],
                   'leech': result['leechers'],
                   'engine_url': self.url,
                   'desc_link': desc_url}
            prettyPrinter(res)
Beispiel #46
0
    def search(self, what, cat='all'):
        query = "https://small-games.info/?go=search&go=search&search_text=" + what
        data = self.get_url(query).decode('utf-8', 'replace')
        match = re.compile(
            '<a title=\"(.*?)\"\shref=\"/.*?i=(\d*).*?Скачать\sигру\s\((.{2,11})\)'
        )
        results = match.findall(data)
        name_clean = re.compile('[A-Za-z0-9].*')

        for res in results:
            self.result['name'] = name_clean.findall(res[0])[0]
            self.result[
                'link'] = self.url + "getTorrent.php?direct=1&gid=" + res[1]
            self.result['desc_link'] = self.url + "?go=game&c=61&i=" + res[1]
            #  it always MB, and the M from the string is a weird russian one
            #  so pretty printer will not recognize it
            self.result['size'] = res[2][:-3] + 'MB'
            prettyPrinter(self.result)
Beispiel #47
0
 def start_td(self,attr):
     if isinstance(self.td_counter,int):
         self.td_counter += 1
         if self.td_counter > 3:
             self.td_counter = None
             # add item to results
             if self.current_item:
                 # TorrentReactor returns unrelated results, we need to filter
                 if not all(word in self.current_item['name'].lower() for word in self.what_list):
                     return
                 self.current_item['engine_url'] = self.url
                 if not self.current_item['seeds'].isdigit():
                     self.current_item['seeds'] = 0
                 if not self.current_item['leech'].isdigit():
                     self.current_item['leech'] = 0
                 prettyPrinter(self.current_item)
                 self.has_results = True
                 self.results.append('a')
 def search(self, what, cat='all'):
     json_data = retrieve_url(self.url + 'api/v2/torrents/search/?phrase=' +
                              what + '&category=' +
                              self.supported_categories.get(cat, ''))
     json_dict = json.loads(json_data)
     if json_dict['results'] < 1:
         return
     for r in json_dict['torrents']:
         r_dict = {
             'link': r['magnet_uri'],
             'name': r['torrent_title'],
             'size': str(r['size']) + 'B',
             'seeds': r['seeds'],
             'leech': r['leeches'],
             'desc_link': r['page'],
             'engine_url': self.url
         }
         prettyPrinter(r_dict)
Beispiel #49
0
    def draw(self, html: str):
        torrents = re.findall(
            r'd\stopic.+?href="(.+?)".+?<b>(.+?)</b>.+?href="(d.+?)"'
            r'.+?/u>\s(.+?)<.+?b>(\d+)</.+?b>(\d+)<', html, re.S)

        for tor in torrents:
            torrent = {
                "engine_url": self.url,
                "desc_link": self.url + tor[0],
                "name": tor[1],
                "link": self.url + tor[2],
                "size": tor[3].replace(',', '.'),
                "seeds": tor[4],
                "leech": tor[5]
            }

            prettyPrinter(torrent)
        del torrents
Beispiel #50
0
        def handle_endtag(self, tag):
            # detecting that torrent row is closed and print all collected data
            if self.torrent_row and tag == 'tr':
                self.torrent["engine_url"] = self.url
                logging.debug('torrent row: ' + str(self.torrent))
                prettyPrinter(self.torrent)
                self.torrent = {key: '' for key in self.torrent}
                self.index_td = 0
                self.torrent_row = False
                self.found_torrents += 1

            # detecting that table with result is close
            if self.result_table and tag == 'table':
                self.result_table = False

            # detecting that we found all pagination
            if self.paginator and tag == 'span':
                self.paginator = False
Beispiel #51
0
 def handle_endtag(self, tag):
     if not self.pageComplete:
         if tag == 'div':
             self.insideDataDiv = False
             self.spanCount = -1
             if len(self.singleResData) > 0:
                 # ignore trash stuff
                 if self.singleResData['name'] != '-1' and self.singleResData['size'] != '-1' \
                         and self.singleResData['name'].lower() != 'nome':
                     # ignore those with link and desc_link equals to -1
                     if self.singleResData['desc_link'] != '-1' or self.singleResData['link'] != '-1':
                         try:
                             prettyPrinter(self.singleResData)
                         except:
                             print(self.singleResData)
                         self.pageRes.append(self.singleResData)
                         self.fullResData.append(self.singleResData)
                 self.singleResData = self.getSingleData()
Beispiel #52
0
 def start_td(self, attr):
     if isinstance(self.td_counter, int):
         self.td_counter += 1
         if self.td_counter > 3:
             self.td_counter = None
             # Display item
             if self.current_item:
                 if self.current_item['id'] in PREVIOUS_IDS:
                     self.results = []
                     self.reset()
                     return
                 self.current_item['engine_url'] = self.url
                 if not self.current_item['seeds'].isdigit():
                     self.current_item['seeds'] = 0
                 if not self.current_item['leech'].isdigit():
                     self.current_item['leech'] = 0
                 prettyPrinter(self.current_item)
                 PREVIOUS_IDS.add(self.current_item['id'])
                 self.results.append('a')
    def parse_search(self, what, start=0, first_page=True):
        """Search for what starting on specified page. Defaults to first page of results."""
        logging.debug("parse_search({}, {}, {})".format(
            what, start, first_page))
        # Search.
        parser = self.SimpleSGMLParser(self.download_url, first_page)
        page = self.opener.open('{}?nm={}&start={}'.format(
            self.search_url, urllib.parse.quote(what), start))
        data = page.read().decode('cp1251')
        parser.feed(data)
        parser.close()

        # PrettyPrint each torrent found.
        for torrent in parser.results:
            torrent['engine_url'] = self.url
            if __name__ != "__main__":  # This is just to avoid printing when I debug.
                prettyPrinter(torrent)

        return (parser.tr_counter, parser.other_pages)
Beispiel #54
0
    def handle_endtag(self, tag):
        # we are exiting the table body
        # no data will be processed after this.
        if tag == self.TBODY:
            self.inside_tbody = False

        # exiting the table data and maybe moving td or tr element
        elif self.inside_tbody and self.inside_row and tag == self.TD:
            self.inside_row = False
            self.current_item = None

        # exiting the tr element, which means all necessary data
        # for a torrent has been extracted, we should save it
        # and clean the object's state.
        elif self.inside_tbody and tag == self.TR:
            self.current_result['leech'] = self.current_result['leeches']
            prettyPrinter(self.current_result)
            self.current_result = {}
            self.current_item = None
Beispiel #55
0
 def feed(self, html):
     self.pageResSize = 0
     torrents = self.__findTorrents(html)
     resultSize = len(torrents)
     if resultSize == 0:
         return
     else:
         self.pageResSize = resultSize
     for torrent in range(resultSize):
         data = {
             'link': torrents[torrent][0],
             'name': torrents[torrent][1],
             'size': torrents[torrent][2],
             'seeds': torrents[torrent][3],
             'leech': torrents[torrent][4],
             'engine_url': self.url,
             'desc_link': urllib.parse.unquote(torrents[torrent][0])
         }
         prettyPrinter(data)
Beispiel #56
0
 def handle_endtag(self, tag):
     if self.insideRow:
         if tag == "div":
             self.torrentrowDepth -= 1
             if self.torrentrowDepth < 0:
                 self.insideRow = False
                 self.crtTorrent["name"] = ("__FREELEECH__"
                                            if self.isFree else
                                            "") + self.torrentRow["c2"]
                 self.crtTorrent["size"] = str(
                     int(
                         round(
                             float(self.torrentRow["c7"]) * 1024 *
                             1024)))
                 self.crtTorrent["seeds"] = self.torrentRow["c9"]
                 self.crtTorrent["leech"] = self.torrentRow["c10"]
                 self.crtTorrent["engine_url"] = self.url
                 prettyPrinter(self.crtTorrent)
                 self.results.append('a')
Beispiel #57
0
    def draw(self, html: str):
        torrents = RE_TORRENTS.findall(html)

        for tor in torrents:
            torrent_date = ""
            if config['torrentDate']:
                _loc = time.localtime(int(tor[6]))
                torrent_date = f'[{time.strftime("%y.%m.%d", _loc)}] '

            prettyPrinter({
                "engine_url": self.url,
                "desc_link": self.url + tor[0],
                "name": torrent_date + unescape(tor[1]),
                "link": self.url + tor[2],
                "size": tor[3],
                "seeds": tor[4],
                "leech": tor[5]
            })
        del torrents
Beispiel #58
0
 def _parse_document(self, data):
     document = lxml.html.document_fromstring(data)
     info = {'engine_url': self.url}
     for t in document.cssselect('tr.tCenter'):
         try:
             a = t.xpath('.//a[contains(@href,"dl.php?t=")]')[0]
             info.update(
                 name = (self.prefix
                         + t.xpath('.//a[contains(@href,"tracker.php?f=")]')[0].text_content()
                         + ' - '
                         + t.xpath('.//a[contains(@href,"viewtopic.php?t=")]')[0].text_content()),
                 link = self.download_url + a.attrib['href'],
                 size = a.text_content().replace(u'\xa0', ' ').replace(u' \u2193', ''),
                 seeds = t.xpath('.//td[contains(@class,"seed")]')[0].text_content(),
                 leech = t.xpath('.//td[contains(@class,"leech")]')[0].text_content()
             )
             prettyPrinter(info)
         except IndexError:
             pass
Beispiel #59
0
 def start_li(self, attr):
     if isinstance(self.li_counter, int):
         self.li_counter += 1
         if self.li_counter > 3:
             self.li_counter = None
             # Display item
             if self.current_item:
                 self.current_item['engine_url'] = self.url
                 if not self.current_item['seeds'].isdigit():
                     self.current_item['seeds'] = 0
                 if not self.current_item['leech'].isdigit():
                     self.current_item['leech'] = 0
                 # Search should use AND operator as a default
                 tmp = self.current_item['name'].upper()
                 if self.what is not None:
                     for w in self.what:
                         if tmp.find(w) < 0: return
                 prettyPrinter(self.current_item)
                 self.results.append('a')
    def draw(self, html: str):
        torrents = re.findall(PATTERNS[1], html, re.S)

        for tor in torrents:
            torrent_date = ""
            if config['torrentDate']:
                _loc = time.localtime(int(tor[6]))
                torrent_date = f'[{time.strftime("%y.%m.%d", _loc)}] '

            prettyPrinter({
                "engine_url": self.url,
                "desc_link": self.url + tor[0],
                "name": torrent_date + tor[1],
                "link": self.url + tor[2],
                "size": tor[3].replace(',', '.'),
                "seeds": tor[4],
                "leech": tor[5]
            })
        del torrents