Beispiel #1
0
def modifyBitrate(request, fd, bitrate, fd_to_tp):
	logging.info("modifying birate")
	if request.find(b'-Frag') != -1:
		''' this is a chunk request'''
		logging.info("this is a chunk request")
		if len(bitrate) == 0:
			logging.debug("ERROR: bitrate is not ready yet;")
			return request
		br_client = 0
		if fd in fd_to_tp:
			# maximum bitrate for this client
			br_client = fd_to_tp[fd][0] * 2 / 3
		br_chosen = 0
		for br in bitrate:
			if br <= br_client and br > br_chosen:
				br_chosen = br
		if br_chosen < bitrate[0]:
			""" maintain the mininal bitrate """
			br_chosen = bitrate[0]
		logging.info("client tp: {}, chosen tp: {}".format(br_client, br_chosen))
		old_chunk = re.search(b'/[0-9]+Seg', request).group()
		new_chunk = "/{}Seg".format(br_chosen).encode('utf-8')
		logging.info("from {} to {}".format(old_chunk, new_chunk))
		request = request.replace(old_chunk, new_chunk)
	return request
Beispiel #2
0
def modifyF4M(request):
	""" modify the f4m client request """
	if request.find(b"/big_buck_bunny.f4m") != -1:
		logging.info("this is a f4m")
		request = request.replace(
			b'big_buck_bunny.f4m',
			b'big_buck_bunny_nolist.f4m'
		)
	return request
Beispiel #3
0
def uri2url_nt(uri):
    """
    Ouvre l'URI et suit la redirection pour avoir l'URL de la page
    afin de pouvoir construire l'URL du fichier .nt
    """
    request = html.parse(urllib.request.urlopen(uri))
    try:
        url_nt = request.find("//a[@id='download-rdf-nt']").get("href")
    except AttributeError:
        url_nt = None
    return url_nt
Beispiel #4
0
def _get_requests(
    requests: Optional[ElementTree.Element],
    n: int,
    m: List[Tuple[int, int]],
    t: List[float],
):
    d: List[int] = [0] * n
    a: List[int] = [0] * n
    b: List[int] = [0] * n

    if requests:
        request_list = requests.findall("request")

        if useNumer:
            request_list = request_list[:num - 1]

        for request in request_list:
            id_attr = request.get("id")
            if id_attr:
                i = int(id_attr)
            else:
                raise KeyError("no 'id' attribute in 'request' element")

            # demand
            quantity = request.find("quantity")
            if quantity is not None and quantity.text:
                d[i] = int(float(quantity.text))
            else:
                raise KeyError("no 'quantity' element")

            # time windows
            tw = request.find("tw")
            _get_tw(tw, i, a, b)

            service_time = request.find("service_time")
            _get_service_time(service_time, t, i, m)

    else:
        raise KeyError("no 'requests' element")

    return d, a, b
Beispiel #5
0
def uri2url_nt(uri):
    """
    Ouvre l'URI et suit la redirection pour avoir l'URL de la page
    afin de pouvoir construire l'URL du fichier .nt
    """
    test = True
    url_nt = None
    try:
        request = html.parse(urllib.request.urlopen(uri))
    except TimeoutError as err:
        test = False
    except urllib.error.URLError as err:
        test = False
    if test:
        try:
            url_nt = request.find("//a[@id='download-rdf-nt']").get("href")
        except AttributeError:
            pass
    return url_nt
Beispiel #6
0
def auto_label(csv_file, saved_dir):
    img_data = pd.read_csv(csv_file)
    if ('emotion' in img_data.keys()):
        emotion_list = img_data['emotion']
        emotion_list = emotion_list.fillna('')
    else:
        emotion_list = [''] * len(img_data)
    for i, cocoid in enumerate(img_data['cocoid']):
        img_url = 'http://images.cocodataset.org/train2014/COCO_train2014_' + str(
            cocoid).zfill(12) + '.jpg'
        while (len(emotion_list[i]) == 0 or emotion_list[i].find('error') > 0):
            print(cocoid, i, len(img_data))
            request = get_emotion_form_img(img_url)
            emotion_list[i] = request
            if (request.find('error') > 0):
                img_data['emotion'] = emotion_list
                img_data.to_csv(csv_file, index=False)
                time.sleep(3)
            if (len(request) == 0):
                img_data['emotion'] = emotion_list
                img_data.to_csv(csv_file, index=False)
                time.sleep(5)
    img_data['emotion'] = emotion_list
    img_data.to_csv(csv_file, index=False)
Beispiel #7
0
 def __getDominio(self):
     url = self.url
     protocolo = url[:url.find('://') + 3]
     url = url[url.find('://') + 3:]
     dominio = url[:url.find('/')]
     return protocolo + dominio
Beispiel #8
0
def catFacts(bot, msg):
    request = str(urllib.request.urlopen("http://catfacts-api.appspot.com/api/facts?number=1").read())
    bot.sendMessage(msg["channel"], request[request.find('[') + 2:request.find(']') - 1])
Beispiel #9
0
# coding: utf-8

import urllib.request
from lxml import html, etree
from rdflib.graph import Graph
import rdflib

# Liste des URI déjà traitées
treated_entities = []

url = "http://data.bnf.fr/ark:/12148/cb12138677d#about"
request = html.parse(urllib.request.urlopen(url))
url_ref = request.find("//meta[@property='og:url']").get("content")
url_nt = url_ref + "rdf.nt"
url_nt = url_nt.replace("/fr/", "/")
print(url_nt)


def uri2url_nt(uri):
    """
    Ouvre l'URI et suit la redirection pour avoir l'URL de la page
    afin de pouvoir construire l'URL du fichier .nt
    """
    request = html.parse(urllib.request.urlopen(uri))
    try:
        url_nt = request.find("//a[@id='download-rdf-nt']").get("href")
    except AttributeError:
        url_nt = None
    return url_nt

Beispiel #10
0
    def parse(self, proxy=False):
        result = []

        # avoiding blocks
        headers = {
            'user-agent': feed.UserAgent_random().lstrip(),
            'referer': 'https://www.google.com/search?newwindow=1&q='+self.href
        }
        if proxy != False:
            proxyDict = {
                "http": "http://" + proxy, 
                "https": "https://" + proxy,
            }
        else:
            proxyDict = {}

        # custom ранобэ.рф API import
        if self.href.find('http://xn--80ac9aeh6f.xn--p1ai/') != -1:
            request = f"https://xn--80ac9aeh6f.xn--p1ai/api/v2/books/{ self.href[31:-1] }/chapters"
            request = requests.get(request).json()  # (request, headers=headers, proxies=proxyDict)

            for each in request['items']:
                # ignoring payed chapters
                if each['availabilityStatus'] == 'free':
                    result.append(feedUpdate(
                        name=each["title"],
                        href="http://xn--80ac9aeh6f.xn--p1ai"+each["url"],
                        datetime=datetime.strptime(each["publishTime"], '%Y-%m-%d %H:%M:%S'),
                        title=self.title))

        # custom instagram import
        if self.href.find('https://www.instagram.com/') != -1:
            if not randint(0, 100) == 0:
                return []
            try:
                request = requests.get(self.href, headers=headers, proxies=proxyDict)
                request = BeautifulSoup(request.text, "html.parser")

                for each in request.find_all('script'):
                    data = 'window._sharedData = '
                    if each.text.find(data) != -1:
                        # preparing JSON
                        data = each.text.find(data) + len(data)  # data start position
                        data = each.text[data:-1]  # -1 is for removing ; in the end
                        data = json.loads(data)

                        # selecting data from JSON
                        data = data['entry_data']['ProfilePage'][0]['graphql']
                        data = data['user']['edge_owner_to_timeline_media']['edges']

                        # parsing data from JSON
                        for each in data:
                            # avoiding errors caused by empty titles
                            try:
                                result_name = each['node']['edge_media_to_caption']['edges'][0]['node']['text']
                            except IndexError:
                                result_name = 'no title'

                            result.append(feedUpdate(
                                name=result_name,
                                href="http://instragram.com/p/"+each['node']['shortcode'],
                                datetime=datetime.fromtimestamp(each['node']['taken_at_timestamp']),
                                title=self.title))
            except (KeyError, requests.exceptions.ProxyError, requests.exceptions.SSLError) as err:
                return []

        # custom RSS YouTube converter (link to feed has to be converted manually)
        elif self.href.find('https://www.youtube.com/channel/') != -1:
            self.href_title = self.href[:]
            # 32 = len('https://www.youtube.com/channel/')
            # 7 = len('/videos')
            self.href = "https://www.youtube.com/feeds/videos.xml?channel_id=" + self.href[32:-7]
            result = feed.parse(self)

        # custom RSS readmanga converter (link to feed has to be converted manually to simplify feed object creation)
        elif self.href.find('http://readmanga.me/') != -1 and self.href.find('readmanga.me/rss/manga') == -1 and self.href_title == None:
            # 20 = len('http://readmanga.me/')
            self.href = "feed://readmanga.me/rss/manga?name=" + self.href[20:]
            result = feed.parse(self)

        # custom RSS mintmanga converter (link to feed has to be converted manually to simplify feed object creation)
        elif self.href.find('http://mintmanga.com/') != -1 and self.href.find('mintmanga.com/rss/manga') == -1 and self.href_title == None:
            # 21 = len('http://mintmanga.com/')
            self.href = "feed://mintmanga.com/rss/manga?name=" + self.href[21:]
            result = feed.parse(self)

        # custom RSS deviantart converter (link to feed has to be converted manually to simplify feed object creation)
        elif self.href.find('https://www.deviantart.com/') != -1:
            self.href_title = self.href[:]
            # 27 = len('https://www.deviantart.com/')
            # 9 = len('/gallery/')
            self.href = self.href[27:-9]
            self.href = "http://backend.deviantart.com/rss.xml?q=gallery%3A" + self.href
            result = feed.parse(self)

        # custom fantasy-worlds.org loader
        elif self.href.find('https://fantasy-worlds.org/series/') != -1:
            strainer = SoupStrainer('div', attrs={'class': 'rightBlock'})

            request = requests.get(self.href, headers=headers, proxies=proxyDict)
            request = BeautifulSoup(request.text, "html.parser", parse_only=strainer)

            for each in request.find('ul').find('li').find('ul').find('li').find('ul').find_all('li'):
                result.append(feedUpdate(
                    name=f"{self.title} {each.text[:each.text.find(' // ')]}",
                    href=each.find('a')['href'],
                    datetime=datetime.now(),  # <=== fake date
                    title=self.title))

        # custom pikabu import
        elif self.href.find('pikabu.ru/@') != -1:
            # try:
            strainer = SoupStrainer('div', attrs={'class': 'stories-feed__container'})

            request = requests.get(self.href, headers=headers, proxies=proxyDict)
            request = BeautifulSoup(request.text, "html.parser", parse_only=strainer)

            for each in request.find_all('article'):
                try:
                    result_datetime = each.find('time')['datetime'][:-3]+"00"
                    result_datetime = datetime.strptime(result_datetime, '%Y-%m-%dT%H:%M:%S%z')

                    result.append(feedUpdate(
                        name=each.find('h2', {'class': "story__title"}).find('a').getText(),
                        href=each.find('h2', {'class': "story__title"}).find('a')['href'],
                        datetime=result_datetime,
                        title=self.title))

                except (TypeError, AttributeError) as err:
                    # advertisement, passing as no need to save it
                    pass
            # except (requests.exceptions.ConnectionError, requests.exceptions.SSLError) as err:
            #     # failed connection, hope it works from time to time
            #     return []

        # # custom fanserials parser
        # elif self.href.find('http://fanserial.net/') != -1 and self.filter is not None:
        #     strainer = SoupStrainer('ul', attrs={'id': 'episode_list'})
        #
        #     request = requests.get(self.href, headers=headers, proxies=proxyDict)
        #     request = BeautifulSoup(request.text, "html.parser", parse_only=strainer)
        #     print(request)
        #
        #     for each in request.find_all('li'):
        #         print(each)
        #         result_href = ''
        #         for each_span in each.find('div').find('div', attrs={'class': 'serial-translate'}).find_all('span'):
        #             result_href = 'http://fanserial.tv' + each_span.find('a').get('href')
        #
        #         result.append(feedUpdate(
        #             name=each.find('div', attrs={'class': 'field-description'}).find('a').text,
        #             href=result_href,
        #             datetime=datetime.now(),  # <=== fake date
        #             title=self.title))

        # default RSS import
        else:
            proxyDict = urllib.request.ProxyHandler(proxyDict)

            request = feedparser.parse(self.href, request_headers=headers, handlers=[proxyDict])

            for each in request["items"]:
                # HREF RESULT
                if self.title == "Expresso":
                    result_href = each["summary"]

                    start = result_href.find('https://expres.co/')
                    end = result_href.find('"')

                    result_href = result_href[start:end]
                else:
                    result_href = each["links"][0]["href"]

                # DATE RESULT: parsing dates
                if "published" in each:
                    result_datetime = each["published"]
                elif "updated" in each:
                    result_datetime = each["updated"]
                else:
                    print(f"result_datetime broke for { self.title }")
                
                tzinfos = {'PDT': gettz("America/Los_Angeles"), 'PST': gettz("America/Juneau")}
                result_datetime = parser.parse(result_datetime, tzinfos=tzinfos)

                # APPEND RESULT
                result.append(feedUpdate(
                    name=each["title_detail"]["value"],
                    href=result_href,
                    datetime=result_datetime,
                    title=self.title))

        # universal postfixes
        result_filtered = []
        for each in result:
            # FILTERING: passing item cycle if filter does not match
            if self.filter is not None:
                if each.name.find(self.filter) == -1 or each.href.find(self.filter) == -1:
                    continue

            # DATETIME fixes
            # fix timezone unaware
            # if each.datetime.tzinfo is not None and each.datetime.tzinfo.utcoffset(each.datetime) is not None:
            #     each_dt = localtime(each.datetime)
            #     each.datetime = datetime(each_dt.year, each_dt.month, each_dt.day,
            #          each_dt.hour, each_dt.minute, each_dt.second)
                     
            # if each.datetime.tzinfo is not None and each.datetime.tzinfo.utcoffset(each.datetime) is not None:
            #     print("!!!! WARNING !!!!")
            # # add DELAY
            # if type(self.delay) is not type(None):
            #     each.datetime += timedelta(hours=self.delay)

            # NAME fixes
            each.name = ' '.join(each.name.split())
            each.name = each.name[:140]  # SQLite does not support max-length
            # extra symbols
            if each.title == 'Shadman':
                each.name = each.name[:each.name.find('(')-1]
            elif each.title == 'Apple' and each.name[-len('Apple'):] == 'Apple':
                # - symbol can be a variety of different symbols
                # 8 = len(' - Apple')
                each.name = each.name[:-8]
            elif each.title == 'LastWeekTonight':
                end = each.name.find(': Last Week Tonight with John Oliver (HBO)')
                if end != -1:
                    each.name = each.name[:end]

            result_filtered.append(each)

        return result_filtered