def parse(self):
     if "file_id" in self.link: #most likely not.
         file_id = self.link.split("file_id=")[-1].split("&")[0]
     else:
         file_id = self.link.split("netload.in/datei")[-1].split("/")[0].split(".")[0]
     link = self.link
     page = self.get_page(link)
     #
     m_pattern = 'Free_dl"><a href="(?P<link>[^"]+)'
     m = self.get_match(m_pattern, page, "Download not found")
     link = BASE_URL + "/" + m.group('link').replace("&amp;", "&")
     page = self.get_page(link)
     cn_pattern = '>countdown\((?P<count>[^,]+)'
     self.countdown(cn_pattern, page, 600, 30)
     #this pattern may not work
     #
     m_pattern = 'src="(?P<link>[^"]+)"[^"]+"Sicherheitsbild' #captcha
     m = self.get_match(m_pattern, page, "Captcha not found")
     link = BASE_URL + "/" + m.group('link')
     captcha_result = tesseract.get_solved_captcha(link, self.cookie, self.filter)
     #file_id = self.get_match()
     form = [("file_id", file_id), ("captcha_check", captcha_result), ("start", "")]
     captcha_form_url = BASE_URL + "/" + "index.php?id=10"
     page = self.get_page(captcha_form_url, form=form)
     self.countdown(cn_pattern, page, 600, 30)
     s_pattern = 'class="Orange_Link" href="(?P<link>[^"]+)'
     self.source = self.click(s_pattern, page, False)
Exemple #2
0
 def parse(self):
     if "file_id" in self.link:  #most likely not.
         file_id = self.link.split("file_id=")[-1].split("&")[0]
     else:
         file_id = self.link.split("netload.in/datei")[-1].split(
             "/")[0].split(".")[0]
     link = self.link
     page = self.get_page(link)
     m_pattern = 'Free_dl"><a href="(?P<link>[^"]+)'
     m = self.get_match(m_pattern, page)
     if m is not None:
         link = BASE_URL + "/" + m.group('link').replace("&amp;", "&")
         page = self.get_page(link)
         cn_pattern = '>countdown\((?P<count>[^,]+)'
         self.countdown(cn_pattern, page, 600, 30)
         #this pattern may not work
         m_pattern = 'src="(?P<link>[^"]+)"[^"]+"Sicherheitsbild'  #captcha
         m = self.get_match(m_pattern, page)
         if m is not None:
             link = BASE_URL + "/" + m.group('link')
             captcha_result = tesseract.get_solved_captcha(
                 link, self.cookie, self.filter)
             #file_id = self.get_match()
             form = [("file_id", file_id),
                     ("captcha_check", captcha_result), ("start", "")]
             captcha_form_url = BASE_URL + "/" + "index.php?id=10"
             page = self.get_page(captcha_form_url, form=form)
             self.countdown(cn_pattern, page, 600, 30)
             s_pattern = 'class="Orange_Link" href="(?P<link>[^"]+)'
             self.source = self.click(s_pattern, page, False)
         else:  #captcha not found
             pass
     else:  #dl not found
         pass
    def add(self):
        """
        TODO: Refactory.
        """
        link_file = None
        err_msg = None
        source = None
        wait = WAITING

        try:
            cookie = cookielib.CookieJar()
            opener = URLOpen(cookie)  #cookielib

            #url parse
            if "file_id" in self.link:  #most likely not.
                file_id = self.link.split("file_id=")[-1].split("&")[0]
            else:
                file_id = self.link.split("netload.in/datei")[-1].split(
                    "/")[0].split(".")[0]
            self.link = BASE_URL + "/" + "index.php?id=10&file_id=" + file_id

            with URLClose(opener.open(self.link)) as s1:
                if self.wait_func():
                    return self.link, None, err_msg
                for line in s1:
                    if 'class="Free_dl' in line:
                        id = line.split("?id=")[-1].split("&")[0]
                        url = BASE_URL + "/" + line.split('href="')[-1].split(
                            '"')[0].replace("&amp;", "&")
                        break
                with URLClose(opener.open(url)) as s2:
                    for line in s2:
                        if "captcha.php" in line:
                            captcha_url = BASE_URL + "/" + line.split(
                                'src="')[-1].split('"')[0]
                        elif ">countdown(" in line:
                            try:
                                wait = int(
                                    line.split(">countdown(")[-1].split(",")
                                    [0]) / 100  #ms
                            except Exception as err:
                                logger.exception(err)
                                wait = WAITING
                    if self.wait_func(wait + 1):
                        return self.link, None, err_msg
                    captcha_result = tesseract.get_solved_captcha(
                        captcha_url, cookie, self.filter)
                    form = urllib.urlencode([("file_id", file_id),
                                             ("captcha_check", captcha_result),
                                             ("start", "")])
                    captcha_form_url = BASE_URL + "/" + "index.php?id=" + id
                    with URLClose(opener.open(captcha_form_url, form)) as s3:
                        for line in s3:
                            if ">countdown(" in line:
                                try:
                                    wait = int(
                                        line.split(">countdown(")[-1].split(
                                            ",")[0]) / 100  #ms
                                except Exception as err:
                                    logger.exception(err)
                                    wait = WAITING
                            elif 'class="Orange_Link' in line:
                                link_file = line.split('href="')[-1].split(
                                    '"')[0]
                        if wait > 600:  # 10 minutes
                            raise LimitExceededException("Limit exceeded")
                        if self.wait_func(wait + 1):
                            return self.link, None, err_msg
                        with URLClose(opener.open(link_file,
                                                  range=(self.content_range,
                                                         None)),
                                      always_close=False) as s4:
                            source = s4
                        raise FileLinkFoundException()

        except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
            err_msg = err
        except (FileLinkFoundException, LimitExceededException,
                LinkErrorException, CaptchaException) as err:
            if isinstance(err, LimitExceededException):
                self.set_limit_exceeded(True)
            err_msg = err
            logger.info(err)
        except Exception as err:
            logger.exception(err)
            err_msg = err

        return link_file, source, err_msg