def modifyBitrate(request, fd, bitrate, fd_to_tp): logging.info("modifying birate") if request.find(b'-Frag') != -1: ''' this is a chunk request''' logging.info("this is a chunk request") if len(bitrate) == 0: logging.debug("ERROR: bitrate is not ready yet;") return request br_client = 0 if fd in fd_to_tp: # maximum bitrate for this client br_client = fd_to_tp[fd][0] * 2 / 3 br_chosen = 0 for br in bitrate: if br <= br_client and br > br_chosen: br_chosen = br if br_chosen < bitrate[0]: """ maintain the mininal bitrate """ br_chosen = bitrate[0] logging.info("client tp: {}, chosen tp: {}".format(br_client, br_chosen)) old_chunk = re.search(b'/[0-9]+Seg', request).group() new_chunk = "/{}Seg".format(br_chosen).encode('utf-8') logging.info("from {} to {}".format(old_chunk, new_chunk)) request = request.replace(old_chunk, new_chunk) return request
def modifyF4M(request): """ modify the f4m client request """ if request.find(b"/big_buck_bunny.f4m") != -1: logging.info("this is a f4m") request = request.replace( b'big_buck_bunny.f4m', b'big_buck_bunny_nolist.f4m' ) return request
def uri2url_nt(uri): """ Ouvre l'URI et suit la redirection pour avoir l'URL de la page afin de pouvoir construire l'URL du fichier .nt """ request = html.parse(urllib.request.urlopen(uri)) try: url_nt = request.find("//a[@id='download-rdf-nt']").get("href") except AttributeError: url_nt = None return url_nt
def _get_requests( requests: Optional[ElementTree.Element], n: int, m: List[Tuple[int, int]], t: List[float], ): d: List[int] = [0] * n a: List[int] = [0] * n b: List[int] = [0] * n if requests: request_list = requests.findall("request") if useNumer: request_list = request_list[:num - 1] for request in request_list: id_attr = request.get("id") if id_attr: i = int(id_attr) else: raise KeyError("no 'id' attribute in 'request' element") # demand quantity = request.find("quantity") if quantity is not None and quantity.text: d[i] = int(float(quantity.text)) else: raise KeyError("no 'quantity' element") # time windows tw = request.find("tw") _get_tw(tw, i, a, b) service_time = request.find("service_time") _get_service_time(service_time, t, i, m) else: raise KeyError("no 'requests' element") return d, a, b
def uri2url_nt(uri): """ Ouvre l'URI et suit la redirection pour avoir l'URL de la page afin de pouvoir construire l'URL du fichier .nt """ test = True url_nt = None try: request = html.parse(urllib.request.urlopen(uri)) except TimeoutError as err: test = False except urllib.error.URLError as err: test = False if test: try: url_nt = request.find("//a[@id='download-rdf-nt']").get("href") except AttributeError: pass return url_nt
def auto_label(csv_file, saved_dir): img_data = pd.read_csv(csv_file) if ('emotion' in img_data.keys()): emotion_list = img_data['emotion'] emotion_list = emotion_list.fillna('') else: emotion_list = [''] * len(img_data) for i, cocoid in enumerate(img_data['cocoid']): img_url = 'http://images.cocodataset.org/train2014/COCO_train2014_' + str( cocoid).zfill(12) + '.jpg' while (len(emotion_list[i]) == 0 or emotion_list[i].find('error') > 0): print(cocoid, i, len(img_data)) request = get_emotion_form_img(img_url) emotion_list[i] = request if (request.find('error') > 0): img_data['emotion'] = emotion_list img_data.to_csv(csv_file, index=False) time.sleep(3) if (len(request) == 0): img_data['emotion'] = emotion_list img_data.to_csv(csv_file, index=False) time.sleep(5) img_data['emotion'] = emotion_list img_data.to_csv(csv_file, index=False)
def __getDominio(self): url = self.url protocolo = url[:url.find('://') + 3] url = url[url.find('://') + 3:] dominio = url[:url.find('/')] return protocolo + dominio
def catFacts(bot, msg): request = str(urllib.request.urlopen("http://catfacts-api.appspot.com/api/facts?number=1").read()) bot.sendMessage(msg["channel"], request[request.find('[') + 2:request.find(']') - 1])
# coding: utf-8 import urllib.request from lxml import html, etree from rdflib.graph import Graph import rdflib # Liste des URI déjà traitées treated_entities = [] url = "http://data.bnf.fr/ark:/12148/cb12138677d#about" request = html.parse(urllib.request.urlopen(url)) url_ref = request.find("//meta[@property='og:url']").get("content") url_nt = url_ref + "rdf.nt" url_nt = url_nt.replace("/fr/", "/") print(url_nt) def uri2url_nt(uri): """ Ouvre l'URI et suit la redirection pour avoir l'URL de la page afin de pouvoir construire l'URL du fichier .nt """ request = html.parse(urllib.request.urlopen(uri)) try: url_nt = request.find("//a[@id='download-rdf-nt']").get("href") except AttributeError: url_nt = None return url_nt
def parse(self, proxy=False): result = [] # avoiding blocks headers = { 'user-agent': feed.UserAgent_random().lstrip(), 'referer': 'https://www.google.com/search?newwindow=1&q='+self.href } if proxy != False: proxyDict = { "http": "http://" + proxy, "https": "https://" + proxy, } else: proxyDict = {} # custom ранобэ.рф API import if self.href.find('http://xn--80ac9aeh6f.xn--p1ai/') != -1: request = f"https://xn--80ac9aeh6f.xn--p1ai/api/v2/books/{ self.href[31:-1] }/chapters" request = requests.get(request).json() # (request, headers=headers, proxies=proxyDict) for each in request['items']: # ignoring payed chapters if each['availabilityStatus'] == 'free': result.append(feedUpdate( name=each["title"], href="http://xn--80ac9aeh6f.xn--p1ai"+each["url"], datetime=datetime.strptime(each["publishTime"], '%Y-%m-%d %H:%M:%S'), title=self.title)) # custom instagram import if self.href.find('https://www.instagram.com/') != -1: if not randint(0, 100) == 0: return [] try: request = requests.get(self.href, headers=headers, proxies=proxyDict) request = BeautifulSoup(request.text, "html.parser") for each in request.find_all('script'): data = 'window._sharedData = ' if each.text.find(data) != -1: # preparing JSON data = each.text.find(data) + len(data) # data start position data = each.text[data:-1] # -1 is for removing ; in the end data = json.loads(data) # selecting data from JSON data = data['entry_data']['ProfilePage'][0]['graphql'] data = data['user']['edge_owner_to_timeline_media']['edges'] # parsing data from JSON for each in data: # avoiding errors caused by empty titles try: result_name = each['node']['edge_media_to_caption']['edges'][0]['node']['text'] except IndexError: result_name = 'no title' result.append(feedUpdate( name=result_name, href="http://instragram.com/p/"+each['node']['shortcode'], datetime=datetime.fromtimestamp(each['node']['taken_at_timestamp']), title=self.title)) except (KeyError, requests.exceptions.ProxyError, requests.exceptions.SSLError) as err: return [] # custom RSS YouTube converter (link to feed has to be converted manually) elif self.href.find('https://www.youtube.com/channel/') != -1: self.href_title = self.href[:] # 32 = len('https://www.youtube.com/channel/') # 7 = len('/videos') self.href = "https://www.youtube.com/feeds/videos.xml?channel_id=" + self.href[32:-7] result = feed.parse(self) # custom RSS readmanga converter (link to feed has to be converted manually to simplify feed object creation) elif self.href.find('http://readmanga.me/') != -1 and self.href.find('readmanga.me/rss/manga') == -1 and self.href_title == None: # 20 = len('http://readmanga.me/') self.href = "feed://readmanga.me/rss/manga?name=" + self.href[20:] result = feed.parse(self) # custom RSS mintmanga converter (link to feed has to be converted manually to simplify feed object creation) elif self.href.find('http://mintmanga.com/') != -1 and self.href.find('mintmanga.com/rss/manga') == -1 and self.href_title == None: # 21 = len('http://mintmanga.com/') self.href = "feed://mintmanga.com/rss/manga?name=" + self.href[21:] result = feed.parse(self) # custom RSS deviantart converter (link to feed has to be converted manually to simplify feed object creation) elif self.href.find('https://www.deviantart.com/') != -1: self.href_title = self.href[:] # 27 = len('https://www.deviantart.com/') # 9 = len('/gallery/') self.href = self.href[27:-9] self.href = "http://backend.deviantart.com/rss.xml?q=gallery%3A" + self.href result = feed.parse(self) # custom fantasy-worlds.org loader elif self.href.find('https://fantasy-worlds.org/series/') != -1: strainer = SoupStrainer('div', attrs={'class': 'rightBlock'}) request = requests.get(self.href, headers=headers, proxies=proxyDict) request = BeautifulSoup(request.text, "html.parser", parse_only=strainer) for each in request.find('ul').find('li').find('ul').find('li').find('ul').find_all('li'): result.append(feedUpdate( name=f"{self.title} {each.text[:each.text.find(' // ')]}", href=each.find('a')['href'], datetime=datetime.now(), # <=== fake date title=self.title)) # custom pikabu import elif self.href.find('pikabu.ru/@') != -1: # try: strainer = SoupStrainer('div', attrs={'class': 'stories-feed__container'}) request = requests.get(self.href, headers=headers, proxies=proxyDict) request = BeautifulSoup(request.text, "html.parser", parse_only=strainer) for each in request.find_all('article'): try: result_datetime = each.find('time')['datetime'][:-3]+"00" result_datetime = datetime.strptime(result_datetime, '%Y-%m-%dT%H:%M:%S%z') result.append(feedUpdate( name=each.find('h2', {'class': "story__title"}).find('a').getText(), href=each.find('h2', {'class': "story__title"}).find('a')['href'], datetime=result_datetime, title=self.title)) except (TypeError, AttributeError) as err: # advertisement, passing as no need to save it pass # except (requests.exceptions.ConnectionError, requests.exceptions.SSLError) as err: # # failed connection, hope it works from time to time # return [] # # custom fanserials parser # elif self.href.find('http://fanserial.net/') != -1 and self.filter is not None: # strainer = SoupStrainer('ul', attrs={'id': 'episode_list'}) # # request = requests.get(self.href, headers=headers, proxies=proxyDict) # request = BeautifulSoup(request.text, "html.parser", parse_only=strainer) # print(request) # # for each in request.find_all('li'): # print(each) # result_href = '' # for each_span in each.find('div').find('div', attrs={'class': 'serial-translate'}).find_all('span'): # result_href = 'http://fanserial.tv' + each_span.find('a').get('href') # # result.append(feedUpdate( # name=each.find('div', attrs={'class': 'field-description'}).find('a').text, # href=result_href, # datetime=datetime.now(), # <=== fake date # title=self.title)) # default RSS import else: proxyDict = urllib.request.ProxyHandler(proxyDict) request = feedparser.parse(self.href, request_headers=headers, handlers=[proxyDict]) for each in request["items"]: # HREF RESULT if self.title == "Expresso": result_href = each["summary"] start = result_href.find('https://expres.co/') end = result_href.find('"') result_href = result_href[start:end] else: result_href = each["links"][0]["href"] # DATE RESULT: parsing dates if "published" in each: result_datetime = each["published"] elif "updated" in each: result_datetime = each["updated"] else: print(f"result_datetime broke for { self.title }") tzinfos = {'PDT': gettz("America/Los_Angeles"), 'PST': gettz("America/Juneau")} result_datetime = parser.parse(result_datetime, tzinfos=tzinfos) # APPEND RESULT result.append(feedUpdate( name=each["title_detail"]["value"], href=result_href, datetime=result_datetime, title=self.title)) # universal postfixes result_filtered = [] for each in result: # FILTERING: passing item cycle if filter does not match if self.filter is not None: if each.name.find(self.filter) == -1 or each.href.find(self.filter) == -1: continue # DATETIME fixes # fix timezone unaware # if each.datetime.tzinfo is not None and each.datetime.tzinfo.utcoffset(each.datetime) is not None: # each_dt = localtime(each.datetime) # each.datetime = datetime(each_dt.year, each_dt.month, each_dt.day, # each_dt.hour, each_dt.minute, each_dt.second) # if each.datetime.tzinfo is not None and each.datetime.tzinfo.utcoffset(each.datetime) is not None: # print("!!!! WARNING !!!!") # # add DELAY # if type(self.delay) is not type(None): # each.datetime += timedelta(hours=self.delay) # NAME fixes each.name = ' '.join(each.name.split()) each.name = each.name[:140] # SQLite does not support max-length # extra symbols if each.title == 'Shadman': each.name = each.name[:each.name.find('(')-1] elif each.title == 'Apple' and each.name[-len('Apple'):] == 'Apple': # - symbol can be a variety of different symbols # 8 = len(' - Apple') each.name = each.name[:-8] elif each.title == 'LastWeekTonight': end = each.name.find(': Last Week Tonight with John Oliver (HBO)') if end != -1: each.name = each.name[:end] result_filtered.append(each) return result_filtered