Python split 예제들, helper.split Python 예제들

예제 #1

0

파일 보기

파일: ert.py 프로젝트: eminga/simplEPG

def grab(channel, timespan):
    tz = pytz.timezone("Europe/Athens")
    now = datetime.datetime.now(tz)
    charset = "windows-1253"
    shows = []
    a = 0
    if now.time().hour < 7:
        a = -1
    for i in range(a, 14):
        date = now + datetime.timedelta(days=i)
        text = helper.download("https://program.ert.gr/Ert1/index.asp?id=" +
                               channel + "&pdate=" + date.strftime("%d/%m/%Y"),
                               encoding=charset)
        if text is None:
            continue

        sections = helper.split(
            text, "<td width=\"50\" align=\"center\" class=\"table\">",
            "</tr></table>")
        laststart = datetime.datetime.min.replace(tzinfo=tz)
        for section in sections:
            show = {}

            temp = re.search("(\d\d):(\d\d)", section)
            show["start"] = date.replace(hour=int(temp.group(1)),
                                         minute=int(temp.group(2)),
                                         second=0,
                                         microsecond=0)
            if show["start"] < laststart:
                date += datetime.timedelta(days=1)
                show["start"] += datetime.timedelta(days=1)

            if (show["start"] - now).total_seconds() / 3600 > timespan:
                lastshow = True
            else:
                lastshow = False

            laststart = show["start"]

            temp = re.search("<a class=\"black\".*href=\"(.*)\">(.*)</a>",
                             section)

            show["title"] = temp.group(2)

            subtitle = helper.cut(
                section, "<td width=\"3\"></td><td><font color=\"#6e6868\">",
                "</font>")
            if subtitle is not None and subtitle:
                show["sub-title"] = subtitle

            link = temp.group(1)
            if link[0] == "/":
                link = "https://program.ert.gr" + link
            if link:
                show["details-url"] = link

            shows.append(show)
            if lastshow:
                return shows
    return shows

예제 #2

0

파일 보기

파일: ert.py 프로젝트: eminga/simplEPG

def grabdetails(url):
    charset = "windows-1253"
    text = helper.download(url, encoding=charset)
    if text is None:
        return None
    show = {}
    temp = helper.split(text, "<div align=\"justify\" class=\"black\">",
                        "</div>")
    description = ""
    for d in temp:
        description += d
    if description:
        show["desc"] = helper.cleanup(description)
    director = re.search("Σκηνοθεσία</b>: (.*?)(?:\n|<br>)", text)
    if director is not None:
        show["director"] = helper.cleanup(director.group(1))
    presenter = re.search("Παρουσίαση</b>: (.*?)(?:\n|<br>)", text)
    if presenter is not None:
        show["presenter"] = helper.cleanup(presenter.group(1))
    producer = re.search("Οργάνωση παραγωγής: (.*?)(?:\n|<br>)", text)
    if producer is not None:
        show["producer"] = helper.cleanup(producer.group(1))
    writer = re.search("Αρχισυνταξία: (.*?)(?:\n|<br>)", text)
    if writer is not None:
        show["writer"] = helper.cleanup(writer.group(1))
    return show

예제 #3

0

파일 보기

def channellist():
	text = helper.download("http://programm.ard.de/")
	channels = helper.split(text, "Tagesprogramm::", "</a>")
	result = []
	for channel in channels:
		temp = re.search("Tagesprogramm::(.*?)\".*\?sender\=-?(.*?)\&", channel)
		result.append((temp.group(2), temp.group(1), temp.group(1)))
	return result

예제 #4

0

파일 보기

def channellist():
    text = helper.download("http://www.ishow.gr/tvNow.asp")
    channels = helper.split(text, "<b><a style=\"color:#E1D8BE\"", "</a>")
    result = []
    for channel in channels:
        temp = re.search("\?cid=(.*?)\">(.*)</a>", channel)
        result.append((temp.group(1), temp.group(2), temp.group(2)))
    result.sort(key=lambda r: int(r[0]))
    return result

예제 #5

0

파일 보기

def grab(channel, timespan):
    tz = pytz.timezone("Europe/Athens")
    now = datetime.datetime.now(tz)
    shows = []
    a = 0
    if now.time().hour < 4:
        a = -1
    for i in range(a, 6):
        date = now + datetime.timedelta(days=i)
        text = helper.download(
            "http://ishow.gr/showTodayChannelProgramm.asp?cid=" + channel +
            "&gotoDay=" + str(i))
        if text is None:
            continue

        sections = helper.split(text, "<tr id=\"progTr", "</tr>")
        laststart = datetime.datetime.min.replace(tzinfo=tz)
        for section in sections:
            show = {}

            temp = re.search(
                "<td class=\"progTd progTdTime\".*?>(\d\d):(\d\d)", section)
            show["start"] = date.replace(hour=int(temp.group(1)),
                                         minute=int(temp.group(2)),
                                         second=0,
                                         microsecond=0)
            if show["start"] < laststart:
                date += datetime.timedelta(days=1)
                show["start"] += datetime.timedelta(days=1)

            if (show["start"] - now).total_seconds() / 3600 > timespan:
                lastshow = True
            else:
                lastshow = False

            laststart = show["start"]

            title = re.search("<div class=\"grandTitle\".*>(.+)\s*?</div>",
                              section)
            show["title"] = helper.cleanup(title.group(1))

            subtitle = helper.cut(section, "<div class=\"subTitle\">",
                                  "</div>")
            if subtitle is not None and subtitle:
                show["sub-title"] = helper.cleanup(subtitle)

            temp = re.search("<div class=\"grandTitle\">.*?href=\"(.*?)\"",
                             section)
            if temp is not None:
                show["details-url"] = "http://ishow.gr" + temp.group(1)

            shows.append(show)
            if lastshow:
                return shows
    return shows

예제 #6

0

파일 보기

파일: learner.py 프로젝트: 116014/countOR

def getConstraintsForAll(dataTensor, variables, orderingNotImp):
    repeatDim = ()
    r = set([v for v in range(len(variables)) if v not in repeatDim])
    constraints = {}
    for l, (m, s) in enumerate(helper.split(r, (), repeatDim)):
        newset = m + s

        # this value will be used to filter max constraints
        maxPossible = 1
        for i in range(len(s)):
            maxPossible *= len(variables[s[i]])
        idTensor = helper.tensorIndicator(dataTensor, newset, variables)

        sumSet = range(len(m), len(newset))

        sumTensor_max, sumTensor_min = helper.tensorSum(
            idTensor, sumSet,
            np.array(variables)[list(newset)], 0)

        if len(set(s)) == 1 and len(set(orderingNotImp) & set(s)) == 0:
            (
                minConsZero,
                maxConsZero,
                minConsNonZero,
                maxConsNonZero,
            ) = helper.tensorConsZero(idTensor, sumSet,
                                      np.array(variables)[list(newset)])
        else:
            minConsZero, maxConsZero, minConsNonZero, maxConsNonZero = (0, 0,
                                                                        0, 0)
        row = {}
        row["minSum"] = int(
            sumTensor_min) if sumTensor_min < maxPossible else 0
        row["maxSum"] = int(
            sumTensor_max) if sumTensor_max < maxPossible else 0
        row["minConsZero"] = int(
            minConsZero) if minConsZero < maxPossible else 0
        row["maxConsZero"] = int(
            maxConsZero) if maxConsZero < maxPossible else 0
        row["minConsNonZero"] = (int(minConsNonZero)
                                 if minConsNonZero < maxPossible else 0)
        row["maxConsNonZero"] = (int(maxConsNonZero)
                                 if maxConsNonZero < maxPossible else 0)

        key = ",".join([str(i) for i in m])
        key += ":"
        key += ",".join([str(i) for i in s])
        constraints[key] = row

    return constraints

예제 #7

0

파일 보기

def grab(channel, timespan):
	tz = pytz.timezone("Europe/Berlin")
	now = datetime.datetime.now(tz)
	shows = []
	a = 0
	if now.time().hour < 7:
		a = -1

	for i in range(a, 14):
		date = now + datetime.timedelta(days=i)
		text = helper.download("http://www.zdf.de/live-tv?airtimeDate=" + date.strftime("%Y-%m-%d"))
		if text is None:
			continue

		text = helper.cut(text, "<section class=\"b-epg-timeline timeline-" + channel, "</section>")

		sections = helper.split(text, "<li", "</li>")
		laststart = datetime.datetime.min.replace(tzinfo=tz)
		for section in sections:
			show = {}

			temp = helper.cut(section, "<span class=\"time\">", "</span>")
			temp = re.search("(\d\d):(\d\d) - (\d\d):(\d\d)", temp)	
			show["start"] = date.replace(hour=int(temp.group(1)), minute=int(temp.group(2)), second=0, microsecond=0)
			if show["start"] < laststart:
				date += datetime.timedelta(days=1)
				show["start"] += datetime.timedelta(days=1)

			if (show["start"] - now).total_seconds() / 3600 > timespan:
				return shows

			laststart = show["start"]
			show["stop"] = date.replace(hour=int(temp.group(3)), minute=int(temp.group(4)), second=0, microsecond=0)
			if show["stop"] < show["start"]:
				show["stop"] += datetime.timedelta(days=1)
			temp = re.search("<span class=\"overlay-link-category\">(.*?)<span class=\"visuallyhidden\">:</span></span>\s*(?:<.*>)*\s*(.*?)\s*?</a>", section)
			if temp.group(1):
				show["title"] = helper.cleanup(temp.group(1) + " - " + temp.group(2))
			else:
				show["title"] = helper.cleanup(temp.group(2))

			temp = re.search("contentUrl\": \"(.*)\"", section)
			if temp is not None:
				show["details-url"] = "http://www.zdf.de" + temp.group(1)

			shows.append(show)
	return shows

예제 #8

0

파일 보기

def grab(channel, timespan):
	tz = pytz.timezone("Europe/Berlin")
	now = datetime.datetime.now(tz)
	shows = []
	a = 0
	if now.time().hour < 7:
		a = -1
	for i in range(a, 14):
		date = now + datetime.timedelta(days=i)
		datestring = "%s.%s.%s" % (date.day, date.month, date.year)
		text = helper.download("http://programm.ard.de/TV/Programm/Sender?datum=" + date.strftime("%d.%m.%Y") + "&hour=0&sender=" + channel)
		if text is None:
			continue

		sections = helper.split(text, "<li class=\"eid", "</li>")
		laststart = datetime.datetime.min.replace(tzinfo=tz)
		for section in sections:
			show = {}
			temp = re.search("<span class=\"date[\s\S]*?(\d\d):(\d\d)", section)
			show["start"] = date.replace(hour=int(temp.group(1)), minute=int(temp.group(2)), second=0, microsecond=0)
			if show["start"] < laststart:
				date += datetime.timedelta(days=1)
				show["start"] += datetime.timedelta(days=1)

			if (show["start"] - now).total_seconds() / 3600 > timespan:
				lastshow = True
			else:
				lastshow = False

			laststart = show["start"]

			show["title"] = helper.cleanup(re.search("<span class=\"title[\s\S]*?>\s*([^<]*?)[\t\n]", section).group(1))
			temp = re.search("<span class=\"subtitle[\s\S]*?>\s*([^<]*?)[\t\n]", section)
			if temp is not None:
				subtitle = temp.group(1)
				if subtitle:
					show["sub-title"] = helper.cleanup(subtitle)

			temp = re.search("<a class=\"sendungslink[\s\S]*?href=\"(.*?)\"", section)
			if temp is not None:
				show["details-url"] = "http://programm.ard.de" + temp.group(1)
			shows.append(show)
			if lastshow:
				return shows
	return shows

예제 #9

0

파일 보기

파일: dw.py 프로젝트: eminga/simplEPG

def grab(channel, timespan):
    tz = pytz.timezone("UTC")
    now = datetime.datetime.now(tz)
    shows = []

    laststart = datetime.datetime.min.replace(tzinfo=tz)
    for i in range(1 + timespan // 4):
        timestamp = int(time.time()) + i * 14400
        text = helper.download("https://www.dw.com/epg/data/4765/1/" +
                               str(timestamp) + "000")
        if text is None:
            continue

        channeldata = helper.cut(text, "data-channel-id=\"" + channel + "\"",
                                 "data-channel-id")
        if not channeldata:
            try:
                channeldata = text.split("data-channel-id=\"" + channel +
                                         "\"")[1]
            except IndexError:
                continue
        sections = helper.split(channeldata, "<div class=\"epgProgram\"",
                                "<div class=\"broadcastlinks\">")

        for section in sections:
            show = {}
            day = helper.cut(section, "data-day=\"", "\"")
            begintime = helper.cut(section, "data-begin-time=\"", "\"")
            endtime = helper.cut(section, "data-end-time=\"", "\"")

            show["start"] = pytz.utc.localize(
                datetime.datetime.strptime(day + begintime, "%Y-%m-%d%H:%M"))
            if show["start"] <= laststart:
                continue
            if (show["start"] - now).total_seconds() / 3600 > timespan:
                return shows
            laststart = show["start"]

            show["stop"] = pytz.utc.localize(
                datetime.datetime.strptime(day + endtime, "%Y-%m-%d%H:%M"))
            if show["stop"] < show["start"]:
                show["stop"] += datetime.timedelta(days=1)

            show["title"] = helper.cleanup(
                helper.cut(section, "<h2 class=\"title\">", "</h2>"))
            url = helper.cut(section, "<a href=\"", "\">")
            if url is not None and url:
                show["url"] = "https://www.dw.com" + url
            description = helper.cleanup(
                helper.cut(section, "<ul class=\"topics\">", "</ul>"))
            if description is not None and description:
                show["desc"] = description

            try:
                icon = re.search("<img[\s\S]*?/>", section).group(0)
                width = helper.cut(icon, "width=\"", "\"")
                height = helper.cut(icon, "height=\"", "\"")
                src = "https://www.dw.com" + helper.cut(icon, "src=\"", "\"")
                show["icon"] = (src, {"width": width, "height": height})
            except (AttributeError, IndexError):
                pass

            shows.append(show)
    return shows

예제 #10

0

파일 보기

파일: split_file.py 프로젝트: jsprieto10/UnaCloud-download-splited-file

def split_file(path):
    chunk_size = 256*1024
    spl = path.split('/')
    dest = 'split/{}'.format(spl[-1])
    split(path,dest,chunk_size)