Python HTTP.request Examples

Programming Language: Python

Namespace/Package Name: svtplay_dl.utils.http

Class/Type: HTTP

Method/Function: request

Examples at hotexamples.com: 8

Python HTTP.request - 8 examples found. These are the top rated real world Python examples of svtplay_dl.utils.http.HTTP.request extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

HTTP(5)

request(5)

get(3)

Frequently Used Methods

HTTP (5)

request (5)

get (3)

Example #1

Show file

class VideoRetriever(object):
    def __init__(self, config, url, bitrate=0, **kwargs):
        self.config = config
        self.url = url
        self.bitrate = int(bitrate)
        self.kwargs = kwargs
        self.http = HTTP(config)
        self.finished = False
        self.audio = kwargs.pop("audio", None)
        self.files = kwargs.pop("files", None)
        self.keycookie = kwargs.pop("keycookie", None)
        self.authorization = kwargs.pop("authorization", None)
        self.output = kwargs.pop("output", None)
        self.segments = kwargs.pop("segments", None)
        self.output_extention = None

    def __repr__(self):
        return "<Video(fetcher={}, bitrate={}>".format(self.__class__.__name__,
                                                       self.bitrate)

    @property
    def name(self):
        pass

    def _download_url(self, url, audio=False, total_size=None):
        cookies = self.kwargs["cookies"]
        data = self.http.request("get",
                                 url,
                                 cookies=cookies,
                                 headers={'Range': 'bytes=0-8192'})
        if not total_size:
            try:
                total_size = data.headers['Content-Range']
                total_size = total_size[total_size.find("/") + 1:]
                total_size = int(total_size)
            except KeyError:
                raise KeyError("Can't get the total size.")

        bytes_so_far = 8192
        if audio:
            file_d = output(copy.copy(self.output), self.config, "m4a")
        else:
            file_d = output(self.output, self.config, "mp4")

        if file_d is None:
            return
        file_d.write(data.content)
        eta = ETA(total_size)
        while bytes_so_far < total_size:

            if not self.config.get("silent"):
                eta.update(bytes_so_far)
                progressbar(total_size, bytes_so_far,
                            ''.join(["ETA: ", str(eta)]))

            old = bytes_so_far + 1
            bytes_so_far = total_size

            bytes_range = "bytes={0}-{1}".format(old, bytes_so_far)

            data = self.http.request("get",
                                     url,
                                     cookies=cookies,
                                     headers={'Range': bytes_range})
            file_d.write(data.content)

        file_d.close()
        progressbar(bytes_so_far, total_size, "ETA: complete")
        # progress_stream.write('\n')
        self.finished = True

Example #2

Show file

class Service:
    supported_domains = []
    supported_domains_re = []

    def __init__(self, config, _url, http=None):
        self._url = _url
        self._urldata = None
        self._error = False
        self.subtitle = None
        self.cookies = {}
        self.auto_name = None
        self.output = {
            "title": None,
            "season": None,
            "episode": None,
            "episodename": None,
            "id": None,
            "service": self.__class__.__name__.lower(),
            "tvshow": None,
            "title_nice": None,
            "showdescription": None,
            "episodedescription": None,
            "showthumbnailurl": None,
            "episodethumbnailurl": None,
            "publishing_datetime": None,
        }

        #  Config
        if config.get("configfile") and os.path.isfile(config.get("configfile")):
            self.config = merge(
                readconfig(setup_defaults(), config.get("configfile"), service=self.__class__.__name__.lower()).get_variable(),
                config.get_variable(),
            )
        else:
            self.config = config

        if not http:
            self.http = HTTP(self.config)
        else:
            self.http = http

        logging.debug("service: {}".format(self.__class__.__name__.lower()))

    @property
    def url(self):
        return self._url

    def get_urldata(self):
        if self._urldata is None:
            self._urldata = self.http.request("get", self.url).text
        return self._urldata

    @classmethod
    def handles(cls, url):
        urlp = urlparse(url)

        # Apply supported_domains_re regexp to the netloc. This
        # is meant for 'dynamic' domains, e.g. containing country
        # information etc.
        for domain_re in [re.compile(x) for x in cls.supported_domains_re]:
            if domain_re.match(urlp.netloc):
                return True

        if urlp.netloc in cls.supported_domains:
            return True

        # For every listed domain, try with www.subdomain as well.
        if urlp.netloc in ["www." + x for x in cls.supported_domains]:
            return True

        return False

    def get_subtitle(self, options):
        pass

    # the options parameter is unused, but is part of the
    # interface, so we don't want to remove it. Thus, the
    # pylint ignore.
    def find_all_episodes(self, options):  # pylint: disable-msg=unused-argument
        logging.warning("--all-episodes not implemented for this service")
        return [self.url]

Example #3

Show file

class VideoRetriever:
    def __init__(self, config, url, bitrate, output, **kwargs):
        self.config = config
        self.url = url
        self.bitrate = int(bitrate) if bitrate else 0
        self.kwargs = kwargs
        self.http = HTTP(config)
        self.finished = False
        self.audio = kwargs.pop("audio", None)
        self.files = kwargs.pop("files", None)
        self.keycookie = kwargs.pop("keycookie", None)
        self.authorization = kwargs.pop("authorization", None)
        self.output = output
        self.segments = kwargs.pop("segments", None)
        self.output_extention = None
        channels = kwargs.pop("channels", None)
        codec = kwargs.pop("codec", "h264")
        self.format = f"{codec}-{channels}" if channels else codec

    def __repr__(self):
        return f"<Video(fetcher={self.__class__.__name__}, bitrate={self.bitrate} format={self.format}>"

    @property
    def name(self):
        pass

    def _download_url(self, url, audio=False, total_size=None):
        cookies = self.kwargs["cookies"]
        data = self.http.request("get",
                                 url,
                                 cookies=cookies,
                                 headers={"Range": "bytes=0-8192"})
        if not total_size:
            try:
                total_size = data.headers["Content-Range"]
                total_size = total_size[total_size.find("/") + 1:]
                total_size = int(total_size)
            except KeyError:
                raise KeyError("Can't get the total size.")

        bytes_so_far = 8192
        if audio:
            file_d = output(copy.copy(self.output), self.config, "m4a")
        else:
            file_d = output(self.output, self.config, "mp4")

        if file_d is None:
            return
        file_d.write(data.content)
        eta = ETA(total_size)
        while bytes_so_far < total_size:

            if not self.config.get("silent"):
                eta.update(bytes_so_far)
                progressbar(total_size, bytes_so_far,
                            "".join(["ETA: ", str(eta)]))

            old = bytes_so_far + 1
            bytes_so_far = total_size

            bytes_range = f"bytes={old}-{bytes_so_far}"

            data = self.http.request("get",
                                     url,
                                     cookies=cookies,
                                     headers={"Range": bytes_range})
            file_d.write(data.content)

        file_d.close()
        progressbar(bytes_so_far, total_size, "ETA: complete")
        # progress_stream.write('\n')
        self.finished = True

Example #4

Show file

File: __init__.py Project: toran4/svtplay-dl

class subtitle:
    def __init__(self, config, subtype, url, subfix=None, **kwargs):
        self.url = url
        self.subtitle = None
        self.config = config
        self.subtype = subtype
        self.http = HTTP(config)
        self.subfix = subfix
        self.bom = False
        self.output = kwargs.pop("output", None)
        self.kwargs = kwargs

    def __repr__(self):
        return f"<Subtitle(type={self.subtype}, url={self.url}>"

    def download(self):
        subdata = self.http.request("get", self.url)
        if subdata.status_code != 200:
            logging.warning("Can't download subtitle file")
            return

        data = None
        if "mtgx" in self.url and subdata.content[:3] == b"\xef\xbb\xbf":
            subdata.encoding = "utf-8"
            self.bom = True

        if self.subtype == "tt":
            data = self.tt(subdata)
        if self.subtype == "json":
            data = self.json(subdata)
        if self.subtype == "sami":
            data = self.sami(subdata)
        if self.subtype == "smi":
            data = self.smi(subdata)
        if self.subtype == "wrst":
            if "tv4play" in self.url and subdata.content[:3] == b"\xef\xbb\xbf":
                self.bom = True
            subdata.encoding = subdata.apparent_encoding
            data = self.wrst(subdata)
        if self.subtype == "wrstsegment":
            data = self.wrstsegment(subdata)
        if self.subtype == "raw":
            data = self.raw(subdata)
        if self.subtype == "stpp":
            data = self.stpp(subdata)

        if self.subfix:
            if self.config.get("get_all_subtitles"):
                if self.output["episodename"]:
                    self.output["episodename"] = "{}-{}".format(
                        self.output["episodename"], self.subfix)
                else:
                    self.output["episodename"] = self.subfix

        if self.config.get("get_raw_subtitles"):
            subdata = self.raw(subdata)
            self.save_file(subdata, self.subtype)

        self.save_file(data, "srt")

    def save_file(self, data, subtype):
        file_d = output(self.output,
                        self.config,
                        subtype,
                        mode="w",
                        encoding="utf-8")
        if hasattr(file_d, "read") is False:
            return
        file_d.write(data)
        file_d.close()

    def raw(self, subdata):
        return subdata.text

    def tt(self, subdata):
        i = 1
        subs = subdata.text
        return self._tt(subs, i)

    def _tt(self, subs, i):
        data = ""
        subdata = re.sub(' xmlns="[^"]+"', "", subs, count=1)
        tree = ET.XML(subdata)
        xml = tree.find("body").find("div")
        plist = list(xml.findall("p"))
        for node in plist:
            tag = norm(node.tag)
            if tag == "p" or tag == "span":
                begin = node.attrib["begin"]
                if not ("dur" in node.attrib):
                    if "end" not in node.attrib:
                        duration = node.attrib["duration"]
                else:
                    duration = node.attrib["dur"]
                if not ("end" in node.attrib):
                    begin2 = begin.split(":")
                    duration2 = duration.split(":")
                    try:
                        sec = float(begin2[2]) + float(duration2[2])
                    except ValueError:
                        sec = 0.000
                    end = "%02d:%02d:%06.3f" % (int(begin2[0]), int(
                        begin2[1]), sec)
                else:
                    end = node.attrib["end"]
                data += "{}\n{} --> {}\n".format(i, begin.replace(".", ","),
                                                 end.replace(".", ","))
                data = tt_text(node, data)
                data += "\n"
                i += 1

        return data

    def json(self, subdata):
        data = json.loads(subdata.text)
        number = 1
        subs = ""
        for i in data:
            subs += "{}\n{} --> {}\n".format(number,
                                             timestr(int(i["startMillis"])),
                                             timestr(int(i["endMillis"])))
            subs += "%s\n\n" % i["text"]
            number += 1

        return subs

    def sami(self, subdata):
        text = subdata.text
        text = re.sub(r"&", "&amp;", text)
        tree = ET.fromstring(text)
        allsubs = tree.findall(".//Subtitle")
        subs = ""
        increase = 0
        for sub in allsubs:
            try:
                number = int(sub.attrib["SpotNumber"])
            except ValueError:
                number = int(
                    re.search(r"(\d+)", sub.attrib["SpotNumber"]).group(1))
                increase += 1
            n = number + increase

            texts = sub.findall(".//Text")
            all = ""
            for text in texts:
                line = ""
                for txt in text.itertext():
                    line += f"{txt}"
                all += "{}\n".format(decode_html_entities(line.lstrip()))
            subs += "{}\n{} --> {}\n{}\n".format(
                n, timecolon(sub.attrib["TimeIn"]),
                timecolon(sub.attrib["TimeOut"]), all)
        subs = re.sub("&amp;", r"&", subs)
        return subs

    def smi(self, subdata):
        if requests_version < 0x20300:
            subdata = subdata.content.decode("latin")
        else:
            subdata.encoding = "ISO-8859-1"
            subdata = subdata.text
        ssubdata = StringIO(subdata)
        timea = 0
        number = 1
        data = None
        subs = ""
        TAG_RE = re.compile(r"<(?!\/?i).*?>")
        bad_char = re.compile(r"\x96")
        for i in ssubdata.readlines():
            i = i.rstrip()
            sync = re.search(r"<SYNC Start=(\d+)>", i)
            if sync:
                if int(sync.group(1)) != int(timea):
                    if data and data != "&nbsp;":
                        subs += "{}\n{} --> {}\n".format(
                            number, timestr(timea), timestr(sync.group(1)))
                        text = "%s\n" % TAG_RE.sub("",
                                                   data.replace("<br>", "\n"))
                        text = decode_html_entities(text)
                        if text[len(text) - 2] != "\n":
                            text += "\n"
                        subs += text
                        number += 1
                timea = sync.group(1)
            text = re.search("<P Class=SVCC>(.*)", i)
            if text:
                data = text.group(1)
        recomp = re.compile(r"\r")
        text = bad_char.sub("-", recomp.sub("", subs))
        return text

    def wrst(self, subdata):
        ssubdata = StringIO(subdata.text)
        srt = ""
        subtract = False
        number_b = 1
        number = 0
        block = 0
        subnr = False

        for i in ssubdata.readlines():
            match = re.search(r"^[\r\n]+", i)
            match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i)
            match3 = re.search(r"^(\d+)\s", i)
            if i[:6] == "WEBVTT":
                continue
            elif "X-TIMESTAMP" in i:
                continue
            elif match and number_b == 1 and self.bom:
                continue
            elif match and number_b > 1:
                block = 0
                srt += "\n"
            elif match2:
                if not subnr:
                    srt += "%s\n" % number_b
                matchx = re.search(
                    r"(?P<h1>\d+):(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<h2>\d+):(?P<m2>\d+):(?P<s2>[\d\.]+)",
                    i)
                if matchx:
                    hour1 = int(matchx.group("h1"))
                    hour2 = int(matchx.group("h2"))
                    if int(number) == 1:
                        if hour1 > 9:
                            subtract = True
                    if subtract:
                        hour1 -= 10
                        hour2 -= 10
                else:
                    matchx = re.search(
                        r"(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<m2>\d+):(?P<s2>[\d\.]+)",
                        i)
                    hour1 = 0
                    hour2 = 0
                time = "{:02d}:{}:{} --> {:02d}:{}:{}\n".format(
                    hour1,
                    matchx.group("m1"),
                    matchx.group("s1").replace(".", ","),
                    hour2,
                    matchx.group("m2"),
                    matchx.group("s2").replace(".", ","),
                )
                srt += time
                block = 1
                subnr = False
                number_b += 1

            elif match3 and block == 0:
                number = match3.group(1)
                srt += "%s\n" % number
                subnr = True
            else:
                if self.config.get("convert_subtitle_colors"):
                    colors = {
                        "30": "#000000",
                        "31": "#ff0000",
                        "32": "#00ff00",
                        "33": "#ffff00",
                        "34": "#0000ff",
                        "35": "#ff00ff",
                        "36": "#00ffff",
                        "37": "#ffffff",
                        "c.black": "#000000",
                        "c.red": "#ff0000",
                        "c.green": "#00ff00",
                        "c.yellow": "#ffff00",
                        "c.blue": "#0000ff",
                        "c.magenta": "#ff00ff",
                        "c.cyan": "#00ffff",
                        "c.gray": "#ffffff",
                    }
                    sub = i
                    for tag, color in colors.items():
                        regex1 = "<" + tag + ">"
                        replace = '<font color="' + color + '">'
                        sub = re.sub(regex1, replace, sub)

                    sub = re.sub("</.+>", "</font>", sub)
                else:
                    sub = re.sub("<[^>]*>", "", i)
                srt += sub.strip()
                srt += "\n"
        srt = decode_html_entities(srt)
        return srt

    def wrstsegment(self, subdata):
        time = 0
        subs = []
        for i in self.kwargs["m3u8"].media_segment:
            itemurl = get_full_url(i["URI"], self.url)
            cont = self.http.get(itemurl)
            if "cmore" in self.url:
                cont.encoding = "utf-8"
            if "mtgx" in self.url:
                cont.encoding = "utf-8"
            text = cont.text.split("\n")
            for t in text:  # is in text[1] for tv4play, but this should be more future proof
                if "X-TIMESTAMP-MAP=MPEGTS" in t:
                    time = float(
                        re.search(r"X-TIMESTAMP-MAP=MPEGTS:(\d+)",
                                  t).group(1)) / 90000 - 10
            text = text[3:len(text) - 2]
            itmes = []
            if len(text) > 1:
                for n in text:
                    if n:  # don't get the empty lines.
                        itmes.append(n)

            several_items = False
            skip = False
            pre_date_skip = True
            sub = []

            for x in range(len(itmes)):
                item = itmes[x]
                if strdate(item) and len(subs) > 0 and itmes[x +
                                                             1] == subs[-1][1]:
                    ha = strdate(subs[-1][0])
                    ha3 = strdate(item)
                    second = str2sec(ha3.group(2)) + time
                    subs[-1][0] = "{} --> {}".format(ha.group(1),
                                                     sec2str(second))
                    skip = True
                    pre_date_skip = False
                    continue
                has_date = strdate(item)
                if has_date:
                    if several_items:
                        subs.append(sub)
                        sub = []
                    skip = False
                    first = str2sec(has_date.group(1)) + time
                    second = str2sec(has_date.group(2)) + time
                    sub.append("{} --> {}".format(sec2str(first),
                                                  sec2str(second)))
                    several_items = True
                    pre_date_skip = False
                elif has_date is None and skip is False and pre_date_skip is False:
                    sub.append(item)

            if sub:
                subs.append(sub)
        string = ""
        nr = 1
        for sub in subs:
            string += "{}\n{}\n\n".format(nr, "\n".join(sub))
            nr += 1

        return string

    def stpp(self, subdata):
        nr = 1
        entries = []

        for i in self.kwargs["files"]:
            res = self.http.get(i)
            start = res.content.find(b"mdat") + 4
            if start > 3:
                _data = self._tt(res.content[start:].decode(), nr)
                if _data:
                    entries.append(_data.split("\n\n"))
                    nr += 1

        new_entries = []
        for entry in entries:
            for i in entry:
                if i:
                    new_entries.append(i.split("\n"))

        entries = new_entries
        changed = True
        while changed:
            changed, entries = _resolv(entries)

        nr = 1
        data = ""
        for entry in entries:
            for item in entry:
                data += f"{item}\n"
            data += "\n"

        return data

Example #5

Show file

class subtitle(object):
    def __init__(self, config, subtype, url, subfix=None, **kwargs):
        self.url = url
        self.subtitle = None
        self.config = config
        self.subtype = subtype
        self.http = HTTP(config)
        self.subfix = subfix
        self.bom = False
        self.output = kwargs.pop("output", None)
        self.kwargs = kwargs

    def __repr__(self):
        return "<Subtitle(type={}, url={}>".format(self.subtype, self.url)

    def download(self):
        subdata = self.http.request("get", self.url)
        if subdata.status_code != 200:
            log.warning("Can't download subtitle file")
            return

        data = None
        if "mtgx" in self.url and subdata.content[:3] == b"\xef\xbb\xbf":
            subdata.encoding = "utf-8"
            self.bom = True

        if self.subtype == "tt":
            data = self.tt(subdata)
        if self.subtype == "json":
            data = self.json(subdata)
        if self.subtype == "sami":
            data = self.sami(subdata)
        if self.subtype == "smi":
            data = self.smi(subdata)
        if self.subtype == "wrst":
            if "tv4play" in self.url and subdata.content[:3] == b"\xef\xbb\xbf":
                subdata.encoding = "utf-8"
                self.bom = True
            if "dplay" in self.url:
                subdata.encoding = "utf-8"
            data = self.wrst(subdata)
        if self.subtype == "wrstsegment":
            data = self.wrstsegment(subdata)
        if self.subtype == "raw":
            data = self.raw(subdata)

        if self.subfix:
            if self.config.get("get_all_subtitles"):
                if self.output["episodename"]:
                    self.output["episodename"] = "{}-{}".format(
                        self.output["episodename"], self.subfix)
                else:
                    self.output["episodename"] = self.subfix

        if self.config.get("get_raw_subtitles"):
            subdata = self.raw(subdata)
            self.save_file(subdata, self.subtype)

        self.save_file(data, "srt")

    def save_file(self, data, subtype):
        if platform.system() == "Windows":
            file_d = output(self.output,
                            self.config,
                            subtype,
                            mode="wt",
                            encoding="utf-8")
        else:
            file_d = output(self.output, self.config, subtype, mode="wt")
        if hasattr(file_d, "read") is False:
            return
        file_d.write(data)
        file_d.close()

    def raw(self, subdata):
        return subdata.text

    def tt(self, subdata):
        i = 1
        data = ""
        subs = subdata.text

        subdata = re.sub(' xmlns="[^"]+"', '', subs, count=1)
        tree = ET.XML(subdata)
        xml = tree.find("body").find("div")
        plist = list(xml.findall("p"))
        for node in plist:
            tag = norm(node.tag)
            if tag == "p" or tag == "span":
                begin = node.attrib["begin"]
                if not ("dur" in node.attrib):
                    duration = node.attrib["duration"]
                else:
                    duration = node.attrib["dur"]
                if not ("end" in node.attrib):
                    begin2 = begin.split(":")
                    duration2 = duration.split(":")
                    try:
                        sec = float(begin2[2]) + float(duration2[2])
                    except ValueError:
                        sec = 0.000
                    end = "%02d:%02d:%06.3f" % (int(begin2[0]), int(
                        begin2[1]), sec)
                else:
                    end = node.attrib["end"]
                data += '%s\n%s --> %s\n' % (i, begin.replace(
                    ".", ","), end.replace(".", ","))
                data = tt_text(node, data)
                data += "\n"
                i += 1

        return data

    def json(self, subdata):
        data = json.loads(subdata.text)
        number = 1
        subs = ""
        for i in data:
            subs += "%s\n%s --> %s\n" % (number, timestr(int(
                i["startMillis"])), timestr(int(i["endMillis"])))
            subs += "%s\n\n" % i["text"]
            number += 1

        return subs

    def sami(self, subdata):
        text = subdata.text
        text = re.sub(r'&', '&amp;', text)
        tree = ET.fromstring(text)
        subt = tree.find("Font")
        subs = ""
        n = 0
        for i in subt.getiterator():
            if i.tag == "Subtitle":
                n = i.attrib["SpotNumber"]

                if i.attrib["SpotNumber"] == "1":
                    subs += "%s\n%s --> %s\n" % (
                        i.attrib["SpotNumber"], timecolon(i.attrib["TimeIn"]),
                        timecolon(i.attrib["TimeOut"]))
                else:
                    subs += "\n%s\n%s --> %s\n" % (
                        i.attrib["SpotNumber"], timecolon(i.attrib["TimeIn"]),
                        timecolon(i.attrib["TimeOut"]))
            else:
                if int(n) > 0 and i.text:
                    subs += "%s\n" % decode_html_entities(i.text)

        subs = re.sub('&amp;', r'&', subs)
        return subs

    def smi(self, subdata):
        if requests_version < 0x20300:
            subdata = subdata.content.decode("latin")
        else:
            subdata.encoding = "ISO-8859-1"
            subdata = subdata.text
        ssubdata = StringIO(subdata)
        timea = 0
        number = 1
        data = None
        subs = ""
        TAG_RE = re.compile(r'<(?!\/?i).*?>')
        bad_char = re.compile(r'\x96')
        for i in ssubdata.readlines():
            i = i.rstrip()
            sync = re.search(r"<SYNC Start=(\d+)>", i)
            if sync:
                if int(sync.group(1)) != int(timea):
                    if data and data != "&nbsp;":
                        subs += "%s\n%s --> %s\n" % (number, timestr(timea),
                                                     timestr(sync.group(1)))
                        text = "%s\n" % TAG_RE.sub('',
                                                   data.replace("<br>", "\n"))
                        text = decode_html_entities(text)
                        if text[len(text) - 2] != "\n":
                            text += "\n"
                        subs += text
                        number += 1
                timea = sync.group(1)
            text = re.search("<P Class=SVCC>(.*)", i)
            if text:
                data = text.group(1)
        recomp = re.compile(r'\r')
        text = bad_char.sub('-', recomp.sub('', subs))
        return text

    def wrst(self, subdata):
        ssubdata = StringIO(subdata.text)
        srt = ""
        subtract = False
        number_b = 1
        number = 0
        block = 0
        subnr = False
        if self.bom:
            ssubdata.read(1)
        for i in ssubdata.readlines():
            match = re.search(r"^[\r\n]+", i)
            match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i)
            match3 = re.search(r"^(\d+)\s", i)
            if i[:6] == "WEBVTT":
                continue
            elif "X-TIMESTAMP" in i:
                continue
            elif match and number_b == 1 and self.bom:
                continue
            elif match and number_b > 1:
                block = 0
                srt += "\n"
            elif match2:
                if not subnr:
                    srt += "%s\n" % number_b
                matchx = re.search(
                    r'(?P<h1>\d+):(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<h2>\d+):(?P<m2>\d+):(?P<s2>[\d\.]+)',
                    i)
                if matchx:
                    hour1 = int(matchx.group("h1"))
                    hour2 = int(matchx.group("h2"))
                    if int(number) == 1:
                        if hour1 > 9:
                            subtract = True
                    if subtract:
                        hour1 -= 10
                        hour2 -= 10
                else:
                    matchx = re.search(
                        r'(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<m2>\d+):(?P<s2>[\d\.]+)',
                        i)
                    hour1 = 0
                    hour2 = 0
                time = "{0:02d}:{1}:{2} --> {3:02d}:{4}:{5}\n".format(
                    hour1, matchx.group("m1"),
                    matchx.group("s1").replace(".", ","), hour2,
                    matchx.group("m2"),
                    matchx.group("s2").replace(".", ","))
                srt += time
                block = 1
                subnr = False
                number_b += 1

            elif match3 and block == 0:
                number = match3.group(1)
                srt += "%s\n" % number
                subnr = True
            else:
                if self.config.get("convert_subtitle_colors"):
                    colors = {
                        '30': '#000000',
                        '31': '#ff0000',
                        '32': '#00ff00',
                        '33': '#ffff00',
                        '34': '#0000ff',
                        '35': '#ff00ff',
                        '36': '#00ffff',
                        '37': '#ffffff'
                    }
                    sub = i
                    for tag, color in colors.items():
                        regex1 = '<' + tag + '>'
                        replace = '<font color="' + color + '">'
                        sub = re.sub(regex1, replace, sub)

                    sub = re.sub('</.+>', '</font>', sub)
                else:
                    sub = re.sub('<[^>]*>', '', i)
                srt += sub.strip()
                srt += "\n"
        srt = decode_html_entities(srt)
        return srt

    def wrstsegment(self, subdata):
        time = 0
        subs = []
        for i in self.kwargs["m3u8"].media_segment:
            itemurl = get_full_url(i["URI"], self.url)
            cont = self.http.get(itemurl)
            if "cmore" in self.url:
                cont.encoding = "utf-8"
            text = cont.text.split("\n")
            for t in text:  # is in text[1] for tv4play, but this should be more future proof
                if 'X-TIMESTAMP-MAP=MPEGTS' in t:
                    time = float(
                        re.search(r"X-TIMESTAMP-MAP=MPEGTS:(\d+)",
                                  t).group(1)) / 90000 - 10
            text = text[3:len(text) - 2]
            if len(text) > 1:
                itmes = []
                for n in text:
                    if n:
                        itmes.append(n)
                    else:
                        if len(subs) > 1 and itmes[1] == subs[-1][
                                1]:  # This will happen when  there is two sections in file
                            ha = strdate(subs[-1][0])
                            ha3 = strdate(itmes[0])
                            second = str2sec(ha3.group(2)) + time
                            subs[-1][0] = "{} --> {}".format(
                                ha.group(1), sec2str(second))
                            itmes = []
                        else:
                            ha = strdate(itmes[0])
                            first = str2sec(ha.group(1)) + time
                            second = str2sec(ha.group(2)) + time
                            itmes[0] = "{} --> {}".format(
                                sec2str(first), sec2str(second))
                            subs.append(itmes)
                            itmes = []
                if itmes:
                    if len(subs) > 0 and itmes[1] == subs[-1][1]:
                        ha = strdate(subs[-1][0])
                        ha3 = strdate(itmes[0])
                        second = str2sec(ha3.group(2)) + time
                        subs[-1][0] = "{} --> {}".format(
                            ha.group(1), sec2str(second))
                    else:
                        ha = strdate(itmes[0])
                        first = str2sec(ha.group(1)) + time
                        second = str2sec(ha.group(2)) + time
                        itmes[0] = "{} --> {}".format(sec2str(first),
                                                      sec2str(second))
                        subs.append(itmes)

        string = ""
        nr = 1
        for sub in subs:
            string += "{}\n{}\n\n".format(nr, '\n'.join(sub))
            nr += 1

        return string

Example #6

Show file

File: __init__.py Project: olof/debian-svtplay-dl

class subtitle(object):
    def __init__(self, config, subtype, url, subfix=None, **kwargs):
        self.url = url
        self.subtitle = None
        self.config = config
        self.subtype = subtype
        self.http = HTTP(config)
        self.subfix = subfix
        self.bom = False
        self.output = kwargs.pop("output", None)
        self.kwargs = kwargs

    def __repr__(self):
        return "<Subtitle(type={}, url={}>".format(self.subtype, self.url)

    def download(self):
        subdata = self.http.request("get", self.url)
        if subdata.status_code != 200:
            log.warning("Can't download subtitle file")
            return

        data = None
        if "mtgx" in self.url and subdata.content[:3] == b"\xef\xbb\xbf":
            subdata.encoding = "utf-8"
            self.bom = True

        if self.subtype == "tt":
            data = self.tt(subdata)
        if self.subtype == "json":
            data = self.json(subdata)
        if self.subtype == "sami":
            data = self.sami(subdata)
        if self.subtype == "smi":
            data = self.smi(subdata)
        if self.subtype == "wrst":
            if "tv4play" in self.url and subdata.content[:3] == b"\xef\xbb\xbf":
                subdata.encoding = "utf-8"
                self.bom = True
            if "dplay" in self.url:
                subdata.encoding = "utf-8"
            data = self.wrst(subdata)
        if self.subtype == "wrstsegment":
            data = self.wrstsegment(subdata)
        if self.subtype == "raw":
            data = self.raw(subdata)

        if self.subfix:
            if self.config.get("get_all_subtitles"):
                if self.output["episodename"]:
                    self.output["episodename"] = "{}-{}".format(self.output["episodename"], self.subfix)
                else:
                    self.output["episodename"] = self.subfix

        if self.config.get("get_raw_subtitles"):
            subdata = self.raw(subdata)
            self.save_file(subdata, self.subtype)

        self.save_file(data, "srt")

    def save_file(self, data, subtype):
        if platform.system() == "Windows":
            file_d = output(self.output, self.config, subtype, mode="wt", encoding="utf-8")
        else:
            file_d = output(self.output, self.config, subtype, mode="wt")
        if hasattr(file_d, "read") is False:
            return
        file_d.write(data)
        file_d.close()

    def raw(self, subdata):
        return subdata.text

    def tt(self, subdata):
        i = 1
        data = ""
        subs = subdata.text

        subdata = re.sub(' xmlns="[^"]+"', '', subs, count=1)
        tree = ET.XML(subdata)
        xml = tree.find("body").find("div")
        plist = list(xml.findall("p"))
        for node in plist:
            tag = norm(node.tag)
            if tag == "p" or tag == "span":
                begin = node.attrib["begin"]
                if not ("dur" in node.attrib):
                    duration = node.attrib["duration"]
                else:
                    duration = node.attrib["dur"]
                if not ("end" in node.attrib):
                    begin2 = begin.split(":")
                    duration2 = duration.split(":")
                    try:
                        sec = float(begin2[2]) + float(duration2[2])
                    except ValueError:
                        sec = 0.000
                    end = "%02d:%02d:%06.3f" % (int(begin2[0]), int(begin2[1]), sec)
                else:
                    end = node.attrib["end"]
                data += '%s\n%s --> %s\n' % (i, begin.replace(".", ","), end.replace(".", ","))
                data = tt_text(node, data)
                data += "\n"
                i += 1

        return data

    def json(self, subdata):
        data = json.loads(subdata.text)
        number = 1
        subs = ""
        for i in data:
            subs += "%s\n%s --> %s\n" % (number, timestr(int(i["startMillis"])), timestr(int(i["endMillis"])))
            subs += "%s\n\n" % i["text"]
            number += 1

        return subs

    def sami(self, subdata):
        text = subdata.text
        text = re.sub(r'&', '&amp;', text)
        tree = ET.fromstring(text)
        allsubs = tree.findall(".//Subtitle")
        subs = ""
        increase = 0
        for sub in allsubs:
            try:
                number = int(sub.attrib["SpotNumber"])
            except ValueError:
                number = int(re.search(r"(\d+)", sub.attrib["SpotNumber"]).group(1))
                increase += 1
            n = number + increase

            texts = sub.findall(".//Text")
            all = ""
            for text in texts:
                line = ""
                for txt in text.itertext():
                    line += "{}".format(txt)
                all += "{}\n".format(decode_html_entities(line.lstrip()))
            subs += "{}\n{} --> {}\n{}\n".format(n, timecolon(sub.attrib["TimeIn"]), timecolon(sub.attrib["TimeOut"]), all)
        subs = re.sub('&amp;', r'&', subs)
        return subs

    def smi(self, subdata):
        if requests_version < 0x20300:
            subdata = subdata.content.decode("latin")
        else:
            subdata.encoding = "ISO-8859-1"
            subdata = subdata.text
        ssubdata = StringIO(subdata)
        timea = 0
        number = 1
        data = None
        subs = ""
        TAG_RE = re.compile(r'<(?!\/?i).*?>')
        bad_char = re.compile(r'\x96')
        for i in ssubdata.readlines():
            i = i.rstrip()
            sync = re.search(r"<SYNC Start=(\d+)>", i)
            if sync:
                if int(sync.group(1)) != int(timea):
                    if data and data != "&nbsp;":
                        subs += "%s\n%s --> %s\n" % (number, timestr(timea), timestr(sync.group(1)))
                        text = "%s\n" % TAG_RE.sub('', data.replace("<br>", "\n"))
                        text = decode_html_entities(text)
                        if text[len(text) - 2] != "\n":
                            text += "\n"
                        subs += text
                        number += 1
                timea = sync.group(1)
            text = re.search("<P Class=SVCC>(.*)", i)
            if text:
                data = text.group(1)
        recomp = re.compile(r'\r')
        text = bad_char.sub('-', recomp.sub('', subs))
        return text

    def wrst(self, subdata):
        ssubdata = StringIO(subdata.text)
        srt = ""
        subtract = False
        number_b = 1
        number = 0
        block = 0
        subnr = False
        if self.bom:
            ssubdata.read(1)
        for i in ssubdata.readlines():
            match = re.search(r"^[\r\n]+", i)
            match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i)
            match3 = re.search(r"^(\d+)\s", i)
            if i[:6] == "WEBVTT":
                continue
            elif "X-TIMESTAMP" in i:
                continue
            elif match and number_b == 1 and self.bom:
                continue
            elif match and number_b > 1:
                block = 0
                srt += "\n"
            elif match2:
                if not subnr:
                    srt += "%s\n" % number_b
                matchx = re.search(r'(?P<h1>\d+):(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<h2>\d+):(?P<m2>\d+):(?P<s2>[\d\.]+)', i)
                if matchx:
                    hour1 = int(matchx.group("h1"))
                    hour2 = int(matchx.group("h2"))
                    if int(number) == 1:
                        if hour1 > 9:
                            subtract = True
                    if subtract:
                        hour1 -= 10
                        hour2 -= 10
                else:
                    matchx = re.search(r'(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<m2>\d+):(?P<s2>[\d\.]+)', i)
                    hour1 = 0
                    hour2 = 0
                time = "{0:02d}:{1}:{2} --> {3:02d}:{4}:{5}\n".format(hour1, matchx.group("m1"), matchx.group("s1").replace(".", ","),
                                                                      hour2, matchx.group("m2"), matchx.group("s2").replace(".", ","))
                srt += time
                block = 1
                subnr = False
                number_b += 1

            elif match3 and block == 0:
                number = match3.group(1)
                srt += "%s\n" % number
                subnr = True
            else:
                if self.config.get("convert_subtitle_colors"):
                    colors = {
                        '30': '#000000', '31': '#ff0000', '32': '#00ff00', '33': '#ffff00', '34': '#0000ff',
                        '35': '#ff00ff', '36': '#00ffff', '37': '#ffffff', 'c.black': '#000000', 'c.red': '#ff0000',
                        'c.green': '#00ff00', 'c.yellow': '#ffff00', 'c.blue': '#0000ff', 'c.magneta': '#ff00ff',
                        'c.cyan': '#00ffff', 'c.gray': '#ffffff',
                    }
                    sub = i
                    for tag, color in colors.items():
                        regex1 = '<' + tag + '>'
                        replace = '<font color="' + color + '">'
                        sub = re.sub(regex1, replace, sub)

                    sub = re.sub('</.+>', '</font>', sub)
                else:
                    sub = re.sub('<[^>]*>', '', i)
                srt += sub.strip()
                srt += "\n"
        srt = decode_html_entities(srt)
        return srt

    def wrstsegment(self, subdata):
        time = 0
        subs = []
        for i in self.kwargs["m3u8"].media_segment:
            itemurl = get_full_url(i["URI"], self.url)
            cont = self.http.get(itemurl)
            if "cmore" in self.url:
                cont.encoding = "utf-8"
            text = cont.text.split("\n")
            for t in text:  # is in text[1] for tv4play, but this should be more future proof
                if 'X-TIMESTAMP-MAP=MPEGTS' in t:
                    time = float(re.search(r"X-TIMESTAMP-MAP=MPEGTS:(\d+)", t).group(1)) / 90000 - 10
            text = text[3:len(text) - 2]
            if len(text) > 1:
                itmes = []
                for n in text:
                    if n:
                        itmes.append(n)
                    else:
                        if len(subs) > 1 and len(itmes) < 2:  # Ignore empty lines in unexpected places
                            pass
                        elif len(subs) > 1 and itmes[1] == subs[-1][1]:  # This will happen when there are two sections in file
                            ha = strdate(subs[-1][0])
                            ha3 = strdate(itmes[0])
                            second = str2sec(ha3.group(2)) + time
                            subs[-1][0] = "{} --> {}".format(ha.group(1), sec2str(second))
                            itmes = []
                        else:
                            ha = strdate(itmes[0])
                            first = str2sec(ha.group(1)) + time
                            second = str2sec(ha.group(2)) + time
                            itmes[0] = "{} --> {}".format(sec2str(first), sec2str(second))
                            subs.append(itmes)
                            itmes = []
                if itmes:
                    if len(subs) > 0 and itmes[1] == subs[-1][1]:
                        ha = strdate(subs[-1][0])
                        ha3 = strdate(itmes[0])
                        second = str2sec(ha3.group(2)) + time
                        subs[-1][0] = "{} --> {}".format(ha.group(1), sec2str(second))
                    else:
                        ha = strdate(itmes[0])
                        first = str2sec(ha.group(1)) + time
                        second = str2sec(ha.group(2)) + time
                        itmes[0] = "{} --> {}".format(sec2str(first), sec2str(second))
                        subs.append(itmes)

        string = ""
        nr = 1
        for sub in subs:
            string += "{}\n{}\n\n".format(nr, '\n'.join(sub))
            nr += 1

        return string

Example #7

Show file

File: __init__.py Project: olof/debian-svtplay-dl

class VideoRetriever(object):
    def __init__(self, config, url, bitrate=0, **kwargs):
        self.config = config
        self.url = url
        self.bitrate = int(bitrate)
        self.kwargs = kwargs
        self.http = HTTP(config)
        self.finished = False
        self.audio = kwargs.pop("audio", None)
        self.files = kwargs.pop("files", None)
        self.keycookie = kwargs.pop("keycookie", None)
        self.authorization = kwargs.pop("authorization", None)
        self.output = kwargs.pop("output", None)
        self.segments = kwargs.pop("segments", None)
        self.output_extention = None

    def __repr__(self):
        return "<Video(fetcher={}, bitrate={}>".format(self.__class__.__name__, self.bitrate)

    @property
    def name(self):
        pass

    def _download_url(self, url, audio=False, total_size=None):
        cookies = self.kwargs["cookies"]
        data = self.http.request("get", url, cookies=cookies, headers={'Range': 'bytes=0-8192'})
        if not total_size:
            try:
                total_size = data.headers['Content-Range']
                total_size = total_size[total_size.find("/") + 1:]
                total_size = int(total_size)
            except KeyError:
                raise KeyError("Can't get the total size.")

        bytes_so_far = 8192
        if audio:
            file_d = output(copy.copy(self.output), self.config, "m4a")
        else:
            file_d = output(self.output, self.config, "mp4")

        if file_d is None:
            return
        file_d.write(data.content)
        eta = ETA(total_size)
        while bytes_so_far < total_size:

            if not self.config.get("silent"):
                eta.update(bytes_so_far)
                progressbar(total_size, bytes_so_far, ''.join(["ETA: ", str(eta)]))

            old = bytes_so_far + 1
            bytes_so_far = total_size

            bytes_range = "bytes={0}-{1}".format(old, bytes_so_far)

            data = self.http.request("get", url, cookies=cookies, headers={'Range': bytes_range})
            file_d.write(data.content)

        file_d.close()
        progressbar(bytes_so_far, total_size, "ETA: complete")
        # progress_stream.write('\n')
        self.finished = True

Example #8

Show file

File: __init__.py Project: olof/debian-svtplay-dl

class Service(object):
    supported_domains = []
    supported_domains_re = []

    def __init__(self, config, _url, http=None):
        self._url = _url
        self._urldata = None
        self._error = False
        self.subtitle = None
        self.cookies = {}
        self.auto_name = None
        self.output = {"title": None, "season": None, "episode": None, "episodename": None,
                       "id": None, "service": self.__class__.__name__.lower(),
                       "tvshow": None, "title_nice": None, "showdescription": None,
                       "episodedescription": None, "showthumbnailurl": None,
                       "episodethumbnailurl": None, "publishing_datetime": None}
        if not http:
            self.http = HTTP(config)
        else:
            self.http = http

        #  Config
        if os.path.isfile(config.get("configfile")):
            self.config = merge(readconfig(setup_defaults(), config.get("configfile"),
                                           service=self.__class__.__name__.lower()).get_variable(), config.get_variable())
        else:
            self.config = config
        logging.debug("service: {}".format(self.__class__.__name__.lower()))

    @property
    def url(self):
        return self._url

    def get_urldata(self):
        if self._urldata is None:
            self._urldata = self.http.request("get", self.url).text
        return self._urldata

    @classmethod
    def handles(cls, url):
        urlp = urlparse(url)

        # Apply supported_domains_re regexp to the netloc. This
        # is meant for 'dynamic' domains, e.g. containing country
        # information etc.
        for domain_re in [re.compile(x) for x in cls.supported_domains_re]:
            if domain_re.match(urlp.netloc):
                return True

        if urlp.netloc in cls.supported_domains:
            return True

        # For every listed domain, try with www.subdomain as well.
        if urlp.netloc in ['www.' + x for x in cls.supported_domains]:
            return True

        return False

    def get_subtitle(self, options):
        pass

    # the options parameter is unused, but is part of the
    # interface, so we don't want to remove it. Thus, the
    # pylint ignore.
    def find_all_episodes(self, options):  # pylint: disable-msg=unused-argument
        logging.warning("--all-episodes not implemented for this service")
        return [self.url]