def ctx_dict(request): context = {} if not Region.objects.all().exists(): for valor_region in Lista_regiones: valor_region_lista = valor_region.split(',') objeto_region = Region() objeto_region.id = int(valor_region_lista[0]) h = HTMLParser() objeto_region.Nombre = str(h.unescape(valor_region_lista[1].replace("'", ""))).lower().capitalize() objeto_region.save() if not Provincia.objects.all().exists(): for valor_provincia in Lista_provincia: valor_provincia_lista = valor_provincia.split(',') objeto_provincia = Provincia() objeto_provincia.id = int(valor_provincia_lista[0]) h = HTMLParser() objeto_provincia.region_id = valor_provincia_lista[2] objeto_provincia.Nombre = str(h.unescape(valor_provincia_lista[1].replace("'", ""))).lower().capitalize() objeto_provincia_region = Region.objects.get(id=int(valor_provincia_lista[2])) objeto_provincia.region_provincia = objeto_provincia_region objeto_provincia.save() if not Distrito.objects.all().exists(): for valor_distrito in Lista_distrito: valor_distrito_lista = valor_distrito.split(',') objeto_distrito = Distrito() objeto_distrito.id = int(valor_distrito_lista[0]) h = HTMLParser() objeto_distrito.Nombre = str(h.unescape(valor_distrito_lista[1].replace("'", ""))).lower().capitalize() objeto_distrito.provincia_id = int(valor_distrito_lista[2]) objeto_distrito_provincia = Provincia.objects.get(id=int(valor_distrito_lista[2])) objeto_distrito.provincia_distrito = objeto_distrito_provincia objeto_distrito.save() return context
def _get_event(): event = [e for e in session['events'] if e['id'] == request.args.get('event')] if event: h = HTMLParser() event[0]['description'] = h.unescape(event[0]['description']) return jsonify(event[0]) return ''
def _issue_to_dict(self, issue): parser = HTMLParser() args = { 'project': self.project_settings['key'], 'summary': parser.unescape(issue.summary), 'description': parser.unescape(issue.description), 'issuetype': { 'name': issue.type }, } if issue.reporter: args[self.get_field_id_by_name( self.issue_settings['reporter_field'])] = issue.reporter.name if issue.impact: args[self.get_field_id_by_name( self.issue_settings['impact_field'])] = issue.impact if issue.priority: args['priority'] = {'name': issue.priority} args[self.get_field_id_by_name( self.issue_settings['caller_field'])] = [{ "name": issue.caller.email, "key": issue.caller.email }] return args
def _issue_to_dict(self, issue): """ Convert issue to dict that can be accepted by JIRA as input parameters """ caller = issue.caller.full_name or issue.caller.username parser = HTMLParser() args = { 'project': self.project_settings['key'], 'summary': parser.unescape(issue.summary), 'description': parser.unescape(issue.description), 'issuetype': { 'name': issue.type }, self._get_field_id_by_name(self.issue_settings['caller_field']): caller, } if issue.reporter: args[self._get_field_id_by_name( self.issue_settings['reporter_field'])] = issue.reporter.name if issue.impact: args[self._get_field_id_by_name( self.issue_settings['impact_field'])] = issue.impact if issue.priority: args['priority'] = {'name': issue.priority} return args
def extended_stats(self, user=None): if not user: data = self.api.me() else: if isinstance(user, str): data = self.api.get_user('%s' % str(user.replace('@', ''))) else: raise InvalidParameter logging.info("[*] Created: %s" % data.created_at) logging.info("[*] Description: %s" % data.description) logging.info("[*] Last update: %s" % data.status.created_at) hashtags = ' '.join( [ "#%s" % x['text'] for x in \ data.status.entities['hashtags']] ) mentions = ' '.join( [ "@%s" % x['screen_name'] for x in \ data.status.entities['user_mentions']] ) logging.info("[*] \tUser Mentions: %s" % mentions) logging.info("[*] \tHashtags: %s" % hashtags) html = HTMLParser() if "RT @" in data.status.text: logging.info( "[*] \tRetweet Text: %s" % html.unescape(data.status.text.replace('\n', '\n\t\t '))) else: logging.info( "[*] \tTweet Text: %s" % html.unescape(data.status.text.replace('\n', '\n\t\t '))) logging.info('[*] \tRetweet Count: %s' % str(data.status.retweet_count))
def get_link(url): if 'apitvh.net' in url \ or 'tvhayz.net' in url \ or 'tvhays.org' in url \ or 'tvhai.org' in url \ : url = re.search(r'\?link=(.*)', url).group(1) response = Request().get(url) m = re.search('data-options="(.+?)"', response) h = HTMLParser() try: s = m.group(1) except: raise Exception("Link has been removed") s = h.unescape(s) s = json.loads(s) s = json.loads(s['flashvars']['metadata']) items = [(i['url'], rsl(i['name'])) for i in s['videos']] items = sorted(items, key=lambda elem: int(elem[1]), reverse=True) if len(items) == 1: return items[0] listitems = [] for i in items: listitems.append("%s (%s)" % (i[1], i[0])) index = xbmcgui.Dialog().select("Select ok.ru stream", listitems) if index == -1: return None, None else: return items[index]
def get_programs(self, channel): """ Get a list of all programs of the specified channel. :type channel: str :rtype list[Program] NOTE: This function doesn't use an API. """ if channel not in CHANNELS: raise Exception('Unknown channel %s' % channel) # Load webpage data = self._get_url(CHANNELS[channel]['url']) # Parse programs h = HTMLParser() regex_programs = re.compile( r'<a class="program-overview__link" href="(?P<path>[^"]+)">\s+' r'<span class="program-overview__title">\s+(?P<title>[^<]+)</span>.*?' r'</a>', re.DOTALL) programs = [ Program(channel=channel, path=program.group('path').lstrip('/'), title=h.unescape(program.group('title').strip())) for program in regex_programs.finditer(data) ] return programs
def cmd_genpot(config, options): """Generate the gettext pot file""" os.chdir(config.source_dir) po_path = os.path.join(config.source_dir, 'po') if not os.path.isdir(po_path): os.mkdir(po_path) python_files = [] for root, dirs_dummy, files in os.walk(config.source_dir): for file_name in files: if file_name.endswith('.py'): file_path = os.path.relpath(os.path.join(root, file_name), config.source_dir) python_files.append(file_path) python_files.sort() # First write out a stub .pot file containing just the translated # activity name, then have xgettext merge the rest of the # translations into that. (We can't just append the activity name # to the end of the .pot file afterwards, because that might # create a duplicate msgid.) pot_file = os.path.join('po', '%s.pot' % config.bundle_name) escaped_name = _po_escape(config.activity_name) f = open(pot_file, 'w') f.write('#: activity/activity.info:2\n') f.write('msgid "%s"\n' % escaped_name) f.write('msgstr ""\n') if config.summary is not None: escaped_summary = _po_escape(config.summary) f.write('#: activity/activity.info:3\n') f.write('msgid "%s"\n' % escaped_summary) f.write('msgstr ""\n') if config.description is not None: parser = HTMLParser() strings = [] parser.handle_data = strings.append parser.feed(config.description) for s in strings: s = s.strip() if s: f.write('#: activity/activity.info:4\n') f.write('msgid "%s"\n' % _po_escape(s)) f.write('msgstr ""\n') f.close() args = [ 'xgettext', '--join-existing', '--language=Python', '--keyword=_', '--add-comments=TRANS:', '--output=%s' % pot_file ] args += python_files retcode = subprocess.call(args) if retcode: print('ERROR - xgettext failed with return code %i.' % retcode)
def parse(self, response): for elt in response.xpath("//item/description").extract(): ref = (Selector(text=HTMLParser().unescape(elt)).xpath( "//li/a[text()[contains(., 'Google Earth')]]/@href").extract()) # each cyclone has a different KMZ file describing it for url in ref: yield Request(url=url, callback=self.parse_warning_report)
def convert(content, input_format, output_format): """ Convert transcript `content` from `input_format` to `output_format`. Accepted input formats: sjson, srt. Accepted output format: srt, txt, sjson. Raises: TranscriptsGenerationException: On parsing the invalid srt content during conversion from srt to sjson. """ assert input_format in ('srt', 'sjson') assert output_format in ('txt', 'srt', 'sjson') if input_format == output_format: return content if input_format == 'srt': # Standardize content into bytes for later decoding. if isinstance(content, text_type): content = content.encode('utf-8') if output_format == 'txt': text = SubRipFile.from_string(content.decode('utf-8')).text return HTMLParser().unescape(text) elif output_format == 'sjson': try: srt_subs = SubRipFile.from_string( # Skip byte order mark(BOM) character content.decode('utf-8-sig'), error_handling=SubRipFile.ERROR_RAISE ) except Error as ex: # Base exception from pysrt raise TranscriptsGenerationException(text_type(ex)) return json.dumps(generate_sjson_from_srt(srt_subs)) if input_format == 'sjson': if output_format == 'txt': text = json.loads(content)['text'] text_without_none = [line if line else '' for line in text] return HTMLParser().unescape("\n".join(text_without_none)) elif output_format == 'srt': return generate_srt_from_sjson(json.loads(content), speed=1.0)
def resolve_youtube_dl(url): label = None stream_url = None headers = None thumbnail = None content_type = 'video' try: from YDStreamExtractor import _getYoutubeDLVideo source = _getYoutubeDLVideo(url, resolve_redirects=True) except: source = None if source: selected_stream = source.selectedStream() stream_url = selected_stream['xbmc_url'] title = source.title thumbnail = source.thumbnail label = None if title.lower().startswith('http') else title try: label = HTMLParser().unescape(label) except: pass if 'ytdl_format' in selected_stream and 'formats' in selected_stream[ 'ytdl_format']: formats = selected_stream['ytdl_format']['formats'] format_index = None if selected_stream['ytdl_format'].get( 'extractor_key' ) == 'Facebook': # ytdl xbmc_url is audio only for Facebook, override url q_ids = [ 'dash_hd_src_no_ratelimit', 'dash_hd_src', 'dash_sd_src_no_ratelimit', 'dash_sd_src' ] while len(q_ids) > 0: format_id = q_ids[0] try: format_index = next(index for (index, f) in enumerate(formats) if f['format_id'] == format_id) stream_url = formats[format_index]['url'] break except StopIteration: del q_ids[0] else: format_id = selected_stream['formatID'] format_index = next(index for (index, f) in enumerate(formats) if f['format_id'] == format_id) if format_index: ext = formats[format_index]['ext'] headers = formats[format_index]['http_headers'] if ext: content_type = __get_potential_type('.' + ext) return { 'label': label, 'resolved_url': stream_url, 'content_type': content_type, 'thumbnail': thumbnail, 'headers': headers }
def __init__(self, bot: "Bot"): self.bot = bot self.translate_client = translate.Client() # _http=self.bot.http) self.h = HTMLParser() self._spam_check = defaultdict(SpamChecker) # channel_id: list self.chat_history = defaultdict(lambda: [])
def test_colorize_noclasses_should_return_value_if_lexer_class_not_found(self): ctx = Context({'code_string': '<h1>TEST</h1>'}) content = """{% load syntax_color %} {{ code_string|colorize_noclasses:'invalid_lexer' }} """ expected_result = '<h1>TEST</h1>' result = Template(content).render(ctx) self.assertHTMLEqual(HTMLParser().unescape(result), expected_result)
def convert_to_text(self): txts = [] for node in list(self.get_top_node()): txt = self.parser.getText(node) if txt: txt = HTMLParser().unescape(txt) txt_lis = innerTrim(txt).split(r'\n') txts.extend(txt_lis) return '\n\n'.join(txts)
def get_render_result(request, objects, price_ranges, cur_page, n_page, cur_price, sk): return render(request, 'frontend/index.html', {'objects': [{'name': str(obj['name'][-1]), 'msrp': str(obj['msrp'][-1]), 'thumb': str(obj['thumbnailImage'][-1]), 'desc': HTMLParser().unescape(str(obj['shortDescription'][-1])) if 'shortDescription' in obj else ''} for obj in objects], 'n_page': n_page, 'price_ranges': price_ranges, 'pages': {'1': cur_page, '2': cur_page+1, '3': cur_page+2}, 'cur_price': cur_price, 'sk': sk})
def get_cp_dict(cls, id): member = cls.pp.get_member_by_id(id) if len(member['roles']) > 2: member['roles'] = member['roles'][:2] cp_dict = { 'id': member['member_id'], 'last_name': HTMLParser().unescape(member['last_name']), 'first_name': HTMLParser().unescape(member['first_name']), 'congress': member['roles'][0]['congress'], 'chamber': member['roles'][0]['chamber'], 'state': member['roles'][0]['state'], 'district': member['roles'][0].get('district', None), 'member_json': json.dumps(member), } md5 = hashlib.md5() md5.update(cp_dict['member_json']) cp_dict['member_hash'] = md5.hexdigest() return cp_dict