async def on_message(self, message): data = json_decode(message) send_private_chat_message(message=message, receiver=data['user']) receiver_connection = private_chat_ws_connections.get(self.receiver) if receiver_connection and receiver_connection.receiver == data['user']: if receiver_connection.receiver == data['user']: send_private_chat_message(message=message, receiver=data['send_to']) await self.insert( table='chat', params=dict( sender=data['user'], reciever=data['send_to'], message=data['message'], date_created=datetime.datetime.now()) ) unread_private_message = await self.get_unread_messages(xhtml_unescape(data['send_to'])) if data['send_to'] in group_chat_ws_connections.keys(): send_group_chat_message(message=unread_private_message[0]['count'], message_type='unreaded', receiver=xhtml_unescape(data['send_to']) ) if data['send_to'] in private_chat_ws_connections.keys(): send_private_chat_message(message=unread_private_message[0]['count'], message_type='unreaded', receiver=xhtml_unescape(data['send_to']) )
def replace_post(post_data): d = { "id": 5, "title": "my blog post title", "slug": "my-blog-post-title", "markdown": "the *markdown* formatted post body", #"html": "the <i>html</i> formatted post body", "image": None, "featured": 0, "page": 0, "status": "published", "language": "zh_CN", "meta_title": None, "meta_description": None, "author_id": 1, "created_at": cur_timestamp(), "created_by": 1, "updated_at": cur_timestamp(), "updated_by": 1, "published_at": cur_timestamp(), "published_by": 1 } d['id'] = int(post_data['source_url'].rsplit('/', 1)[1].split('.')[0]) d['title'] = post_data['title'].strip() d['slug'] = post_data['title'].lower().strip() d['markdown'] = xhtml_unescape(post_data['content'].strip()) # unescape return d
def parse_cases(filename): """Parses the fogbugz data in the file. Returns a list of (subject, assigned_to, body) tuples. """ results = [] tree = ElementTree.parse(filename) for case in tree.find('cases').findall('case'): subject = 'FB%s: %s' % (case.get('ixBug'), case.findtext('sTitle')) body = [] assigned_to = case.findtext('sPersonAssignedTo') body.append('Assigned to: %s' % assigned_to) body.append('Project: %s' % case.findtext('sProject')) body.append('Area: %s' % case.findtext('sArea')) body.append('Priority: %s (%s)' % (case.findtext('ixPriority'), case.findtext('sPriority'))) body.append('Category: %s' % case.findtext('sCategory')) body.append('') for event in case.find('events').findall('event'): body.append( '%s at %s' % (event.findtext('evtDescription'), event.findtext('dt'))) if event.findtext('s'): body.append('') body.append(event.findtext('s')) body.append('') if event.find('rgAttachments') is not None: for attachment in event.find('rgAttachments').findall('attachment'): body.append('Attachment: %s' % escape.xhtml_unescape(attachment.findtext('sURL'))) results.append((subject, USER_MAP[assigned_to], '\n'.join(body))) return results
def parse_cases(filename): """Parses the fogbugz data in the file. Returns a list of (subject, assigned_to, body) tuples. """ results = [] tree = ElementTree.parse(filename) for case in tree.find('cases').findall('case'): subject = 'FB%s: %s' % (case.get('ixBug'), case.findtext('sTitle')) body = [] assigned_to = case.findtext('sPersonAssignedTo') body.append('Assigned to: %s' % assigned_to) body.append('Project: %s' % case.findtext('sProject')) body.append('Area: %s' % case.findtext('sArea')) body.append('Priority: %s (%s)' % (case.findtext('ixPriority'), case.findtext('sPriority'))) body.append('Category: %s' % case.findtext('sCategory')) body.append('') for event in case.find('events').findall('event'): body.append( '%s at %s' % (event.findtext('evtDescription'), event.findtext('dt'))) if event.findtext('s'): body.append('') body.append(event.findtext('s')) body.append('') if event.find('rgAttachments') is not None: for attachment in event.find('rgAttachments').findall( 'attachment'): body.append( 'Attachment: %s' % escape.xhtml_unescape(attachment.findtext('sURL'))) results.append((subject, USER_MAP[assigned_to], '\n'.join(body)))
def replace_ascii(match): ascii = text[match.start():match.end()] ascii = xhtml_unescape(ascii).encode('ascii', 'ignore').strip( ) # convert escaped HTML entities back to original chars if not ascii or ascii not in ascii_replace: return ascii return cls.convert(ascii_replace[ascii])
def replace_post(post_data): d = { "id": 5, "title": "my blog post title", "slug": "my-blog-post-title", "markdown": "the *markdown* formatted post body", #"html": "the <i>html</i> formatted post body", "image": None, "featured": 0, "page": 0, "status": "published", "language": "zh_CN", "meta_title": None, "meta_description": None, "author_id": 1, "created_at": cur_timestamp(), "created_by": 1, "updated_at": cur_timestamp(), "updated_by": 1, "published_at": cur_timestamp(), "published_by": 1 } d['id'] = int(post_data['source_url'].rsplit('/', 1)[1].split('.')[0]) d['title'] = post_data['title'].strip() d['slug'] = post_data['title'].strip().replace(' ', '-').lower() d['markdown'] = xhtml_unescape(post_data['content'].strip()) # unescape return d
def get_permanent_wechat_article_url(self, sougou_url): """ 从搜狗的临时url获取永久url Args: sougou_url (str): "http://mp.weixin.qq.com/s?timestamp=1473815432&src=3&ver=1&signature=puOtJfG0mefG5o6Ls-bqDmML9ZjS5S6oDIhdUReNRm6*bIF9yINfCoXvB3btXzPEeUZvV8bdlSRTgKPx5Nsd6ZfzLK4Gv4X6z7te1EEo2azG3llx*rw*fxqXrKnwP2oqTTrNYxaRzM8cARFIbjPHVLpWdZGqNhyxsKoK5ozlXSk=" Returns: msg_link (str): "http://mp.weixin.qq.com/s?__biz=MzI1OTAwNDc1OA==&mid=2652831837&idx=1&sn=3a93c0b6dfeef85e9b85bdac39f47bce&chksm=f1942064c6e3a9728f0bdc4d9bab481b7079c7c1d9ed32397295b45d0b02af839dafcc4b093e#rd"; """ time.sleep(random.randint(1, 10)) curl_str = """ curl 'http://mp.weixin.qq.com/s?timestamp=1473815432&src=3&ver=1&signature=puOtJfG0mefG5o6Ls-bqDmML9ZjS5S6oDIhdUReNRm6*bIF9yINfCoXvB3btXzPEeUZvV8bdlSRTgKPx5Nsd6ZfzLK4Gv4X6z7te1EEo2azG3llx*rw*fxqXrKnwP2oqTTrNYxaRzM8cARFIbjPHVLpWdZGqNhyxsKoK5ozlXSk=' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' -H 'Connection: keep-alive' -H 'Accept-Encoding: gzip, deflate, sdch' -H 'Accept-Language: zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.4' -H 'Upgrade-Insecure-Requests: 1' -H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36' --compressed """ _, headers, _ = parse_curl_str(curl_str) headers['User-Agent'] = random_ua() r = requests.get(sougou_url) html = r.text try: msg_link = xhtml_unescape(extract('msg_link = "', '";', html)) except Exception: self.logger.exception(html) msg_link = sougou_url self.logger.info('get permanent url: %s', msg_link) return msg_link
def normalize(sentence): """ コメントを正規化する :param str sentence: 正規化するコメント :return: 正規化されたコメント :rtype: str """ dst = escape.xhtml_unescape(sentence) if _re_escape.findall(dst): return "" # か゛(u'\u304b\u309b')" -> が(u'\u304b \u3099') # -> が(u'\u304b\u3099') -> が(u'\u304c') # dst = unicodedata.normalize("NFKC", "".join(unicodedata.normalize("NFKC", dst).split())) dst = dst.lower() dst = "".join(dst.split()) try: dst = _convert_marks(dst) except: print "convertError" dst = _re_remove.sub("", dst) dst = _delete_cyclic_word(dst) return dst
def searchIndexersForShowName(self, search_term, lang=None, indexer=None): self.set_header('Cache-Control', 'max-age=0,no-cache,no-store') self.set_header('Content-Type', 'application/json') if not lang or lang == 'null': lang = settings.INDEXER_DEFAULT_LANGUAGE search_term = xhtml_unescape(search_term) searchTerms = [search_term] # If search term ends with what looks like a year, enclose it in () matches = re.match(r'^(.+ |)([12][0-9]{3})$', search_term) if matches: searchTerms.append("{0}({1})".format(matches.group(1), matches.group(2))) for searchTerm in searchTerms: # If search term begins with an article, let's also search for it without matches = re.match(r'^(?:a|an|the) (.+)$', searchTerm, re.I) if matches: searchTerms.append(matches.group(1)) results = {} final_results = [] # Query Indexers for each search term and build the list of results for i, j in sickchill.indexer if not int(indexer) else [(int(indexer), None)]: logger.debug( "Searching for Show with searchterm(s): {0} on Indexer: {1}". format(searchTerms, 'theTVDB')) for searchTerm in searchTerms: # noinspection PyBroadException try: indexerResults = sickchill.indexer[i].search(searchTerm, language=lang) except Exception: logger.exception(traceback.format_exc()) continue # add search results results.setdefault(i, []).extend(indexerResults) for i, shows in results.items(): # noinspection PyUnresolvedReferences final_results.extend({ (sickchill.indexer.name(i), i, sickchill.indexer[i].show_url, show['id'], show['seriesName'], show['firstAired'], sickchill.tv.Show.find(settings.showList, show['id']) is not None) for show in shows }) lang_id = sickchill.indexer.lang_dict()[lang] return json.dumps({ 'results': final_results, 'langid': lang_id, 'success': len(final_results) > 0 })
def index(self, path='', includeFiles=False, fileTypes=''): # pylint: disable=arguments-differ self.set_header(b'Cache-Control', 'max-age=0,no-cache,no-store') self.set_header(b'Content-Type', 'application/json') return json.dumps( foldersAtPath(xhtml_unescape(path), True, bool(int(includeFiles)), fileTypes.split(',')))
def complete(self, term, includeFiles=False, fileTypes=''): self.set_header('Cache-Control', 'max-age=0,no-cache,no-store') self.set_header('Content-Type', 'application/json') paths = [entry['path'] for entry in foldersAtPath(os.path.dirname(xhtml_unescape(term)), includeFiles=bool(int(includeFiles)), fileTypes=fileTypes.split(',')) if 'path' in entry] return json.dumps(paths)
def edit_card(self, message): id = message['data']['id'] text = xhtml_unescape(message['data']['value'].strip()) clean_data = {'value': text, 'id': id} message_out = self.generate_message('editCard', clean_data) self.broadcast(message_out) room_id = self.rooms.get_room_id(self) self.cards.update_text(room_id, card_id=id, text=xhtml_escape(text))
def processEpisode( self, proc_dir=None, nzbName=None, quiet=None, process_method=None, force=None, is_priority=None, delete_on="0", failed="0", proc_type="manual", force_next=False, *args_, **kwargs, ): mode = kwargs.get("type", proc_type) process_path = xhtml_unescape(kwargs.get("dir", proc_dir or "") or "") if not process_path: return self.redirect("/home/postprocess/") release_name = xhtml_unescape(nzbName) if nzbName else nzbName result = settings.postProcessorTaskScheduler.action.add_item( process_path, release_name, method=process_method, force=force, is_priority=is_priority, delete=delete_on, failed=failed, mode=mode, force_next=force_next, ) if config.checkbox_to_value(quiet): return result if result: result = result.replace("\n", "<br>\n") return self._genericMessage("Postprocessing results", result)
async def on_message(self, message): send_group_chat_message(message=message) data = json_decode(message) await self.insert( table='chat', params=dict( sender=xhtml_unescape(data['user']), message=data['message'], date_created=datetime.datetime.now(), ) )
def complete(self, term, includeFiles=False, fileTypes=""): self.set_header("Cache-Control", "max-age=0,no-cache,no-store") self.set_header("Content-Type", "application/json") paths = [ entry["path"] for entry in foldersAtPath(os.path.dirname(xhtml_unescape(term)), includeFiles=bool(int(includeFiles)), fileTypes=fileTypes.split(",")) if "path" in entry ] return json.dumps(paths)
def ascii_to_unicode(cls, text): def replace_ascii(match): ascii = text[match.start():match.end()] ascii = ascii.encode('ascii', 'ignore').strip( ) # convert escaped HTML entities back to original chars if not ascii or ascii not in ascii_replace: return ascii return cls.convert(ascii_replace[ascii]) text = xhtml_unescape(text) return re.sub(cls.ascii_compiled, replace_ascii, text)
def test_xhtml_escape(self): tests = [ ("<foo>", "<foo>"), (u("<foo>"), u("<foo>")), (b"<foo>", b"<foo>"), ("<>&\"", "<>&""), ("&", "&amp;"), ] for unescaped, escaped in tests: self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped)) self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
def test_xhtml_unescape_numeric(self): tests = [ ('foo bar', 'foo bar'), ('foo bar', 'foo bar'), ('foo bar', 'foo bar'), ('foo઼bar', u'foo\u0abcbar'), ('foo&#xyz;bar', 'foo&#xyz;bar'), # invalid encoding ('foo&#;bar', 'foo&#;bar'), # invalid encoding ('foo&#x;bar', 'foo&#x;bar'), # invalid encoding ] for escaped, unescaped in tests: self.assertEqual(unescaped, xhtml_unescape(escaped))
def test_xhtml_escape(self): tests = [ ("<foo>", "<foo>"), ("<foo>", "<foo>"), (b("<foo>"), b("<foo>")), ("<>&\"", "<>&""), ("&", "&amp;"), ] for unescaped, escaped in tests: self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped)) self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
def __init__(self, profiles): linkedin_profile,twitter_profile,meetup_profile = None,None,None for a_profile in profiles: if linkedin_profile is None: linkedin_profile = a_profile if LINKEDIN in a_profile['cluster'] else None if twitter_profile is None: twitter_profile = a_profile if TWITTER in a_profile['cluster'] else None if meetup_profile is None: meetup_profile = a_profile if MEETUP in a_profile['cluster'] else None self.name = linkedin_profile['entity']['firstName']+' '+linkedin_profile['entity']['lastName'] self.title = linkedin_profile['entity']['title'] self.jobprofilesummary = linkedin_profile['entity']['profilesummary'] self.work_interests = linkedin_profile['entity']['interests'] if self.jobprofilesummary: self.jobprofilesummary = xhtml_unescape(self.jobprofilesummary) self.currentjob = linkedin_profile['entity']['current'] self.currentjob = self.currentjob[0] if self.currentjob else None self.previous_jobs = linkedin_profile['entity']['previous'] or [] self.education = linkedin_profile['entity']['education'] or [] self.region = linkedin_profile['entity']['region'] if twitter_profile: self.interests_and_hobbies = twitter_profile['entity']['profilesummary'] if self.interests_and_hobbies: self.interests_and_hobbies = xhtml_unescape(self.interests_and_hobbies) else: self.interests_and_hobbies = "" self.current_tweets = [xhtml_unescape(x) for x in twitter_profile['entity']['tweets'] if x] else: self.interests_and_hobbies = "" self.current_tweets = [] if meetup_profile: org_groups = [xhtml_unescape(x[0]) for x in meetup_profile['entity']['groups'] if x[1] == 'Organizer'] memb_groups = [xhtml_unescape(x[0]) for x in meetup_profile['entity']['groups'] if x[1] == 'Member'] self.currentgroups = org_groups + memb_groups else: self.currentgroups = []
def test_xhtml_escape(self): tests = [ ("<foo>", "<foo>"), (u"<foo>", u"<foo>"), (b"<foo>", b"<foo>"), ("<>&\"'", "<>&"'"), ("&", "&amp;"), (u"<\u00e9>", u"<\u00e9>"), (b"<\xc3\xa9>", b"<\xc3\xa9>"), ] # type: List[Tuple[Union[str, bytes], Union[str, bytes]]] for unescaped, escaped in tests: self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped)) self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
def test_xhtml_escape(self): tests = [ ("<foo>", "<foo>"), (u"<foo>", u"<foo>"), (b"<foo>", b"<foo>"), ("<>&\"'", "<>&"'"), ("&", "&amp;"), (u"<\u00e9>", u"<\u00e9>"), (b"<\xc3\xa9>", b"<\xc3\xa9>"), ] for unescaped, escaped in tests: self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped)) self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
def _get_articel_info(self, article_info, nick_name, ori_create_time): for k, v in article_info.items(): if isinstance(v, str): article_info[k] = xhtml_unescape(v) article_dict = { 'cdn_url': article_info['cover'].replace('\\', ''), 'title': article_info['title'], 'nick_name': nick_name, 'link': ('http://mp.weixin.qq.com' + article_info['content_url'].replace('\\', '')), 'ori_create_time': ori_create_time, 'desc': article_info['digest'], } return article_dict
def post(self): user = self.current_user for x in ('location','twitter','github','css','words'): user[x] = xhtml_unescape(self.get_argument(x,'')) for x in set(html_killer.findall(user[x])): user[x] = user[x].replace(x,'') website = self.get_argument('website','') w = urlparse(website) if w[0] and w[1]: user['website'] = website else: user['website'] = '' self.db.users.save(user) self.redirect('/user/%s' % user['username'] )
def on_message(self, message_json): message = json.loads(message_json) if message['type'] == 'start': self.game.start_game() elif message['type'] == 'update': self.game.request_update(self) elif message['type'] == 'chat': self.game.add_chat(xhtml_unescape(message['name']), message['message'], "chat") elif message['type'] == 'pause': self.game.pause(message['pause']) elif message['type'] == 'submit': self.game.submit_tau(self, message['cards']) elif message['type'] == 'training_option': self.game.set_training_option(message['option'], message['value'])
def fun_article_new_src(user, article_id='-1', article_type='blog', src_type='code', title='', body='', source='', code_type='python', math_type='inline', father_id='-1', group_id='-1'): if article_type not in Article_Type: return [1, '不支持当前文章类型!'] if src_type not in Agree_Src: return [1, '不支持当前类型的资源!'] if title is None: return [1, '名称不能为空!'] if body is None: if src_type != 'reference': return [1, '内容不能为空!'] else: if re.search(r'^(http|https|ftp):\/\/.+$', source) is None: return [1, '请填写链接地址或者引用真实内容!'] body = '' else: if src_type == 'math': body = math_encode(escape.xhtml_unescape(body)) elif src_type == 'code': if code_type not in Agree_Code: return [1, '请选择代码种类!'] if article_type == "about": AF_Object = user.about article_id = str(user.about._id) isnew = False elif article_type == "book-about": isnew = False try: book = Catalog(_id=group_id) AF_Object = book.about limit = book.authority_verify(user) if test_auth(limit, A_WRITE) is False: return [1, '您无权修改摘要!'] except Exception, err: logging.error(traceback.format_exc()) logging.error('Catalog not exist, id %s' % group_id) return [1, '未找到知识谱!']
def test_xhtml_escape(self): tests = [ ("<foo>", "<foo>"), (u("<foo>"), u("<foo>")), (b"<foo>", b"<foo>"), ("<>&\"'", "<>&"'"), ("&", "&amp;"), (u("<\u00e9>"), u("<\u00e9>")), (b"<\xc3\xa9>", b"<\xc3\xa9>"), ] for unescaped, escaped in tests: self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped)) self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
def edit_card(self, message): id = message['data']['id'] text = xhtml_unescape(message['data']['value'].strip()) clean_data = {'value': text, 'id': id} message_out = self.generate_message('editCard', clean_data) self.broadcast_to_all_room_user(self, message_out) room_id = self.rooms.get_room_id(self) self.cards.update_text(room_id, card_id=id, text=xhtml_escape(text)) sentence_generator = SentenceGenerator() res = sentence_generator.generate_sentence(text) for sent in res: message_out = self.generate_message('advice', {'sent': sent}) self.send_message(message_out) yield gen.sleep(2.5)
def clean_url(url): """ Returns an cleaned url starting with a scheme and folder with trailing / or an empty string """ if url and url.strip(): url = xhtml_unescape(url.strip()) if '://' not in url: url = '//' + url scheme, netloc, path, query, fragment = parse.urlsplit(url, 'http') if not path: path += '/' cleaned_url = parse.urlunsplit((scheme, netloc, path, query, fragment)) else: cleaned_url = '' return cleaned_url
def searchIndexersForShowName(self, search_term, lang=None, indexer=None, exact=False): self.set_header("Cache-Control", "max-age=0,no-cache,no-store") self.set_header("Content-Type", "application/json") if not lang or lang == "null": lang = settings.INDEXER_DEFAULT_LANGUAGE search_term = xhtml_unescape(search_term) search_terms = [search_term] # If search term ends with what looks like a year, enclose it in () matches = re.match(r"^(.+ |)([12][0-9]{3})$", search_term) if matches: search_terms.append("{0}({1})".format(matches.group(1), matches.group(2))) for term in search_terms: # If search term begins with an article, let's also search for it without matches = re.match(r"^(?:a|an|the) (.+)$", term, re.I) if matches: search_terms.append(matches.group(1)) results = {} final_results = [] # Query Indexers for each search term and build the list of results for i, j in sickchill.indexer if not int(indexer) else [(int(indexer), None)]: logger.debug( _(f"Searching for Show with search term(s): {search_terms} on Indexer: {sickchill.indexer[i].name} (exact: {exact})" )) for term in search_terms: # noinspection PyBroadException try: indexerResults = sickchill.indexer[i].search(term, language=lang, exact=exact) except Exception: logger.exception(traceback.format_exc()) continue # add search results results.setdefault(i, []).extend(indexerResults) for i, shows in results.items(): # noinspection PyUnresolvedReferences final_results.extend({( sickchill.indexer.name(i), i, sickchill.indexer[i].show_url, show["id"], show["seriesName"], show["firstAired"], sickchill.tv.Show.find(settings.showList, show["id"]) is not None, ) for show in shows}) if exact: logger.debug( _("Filtering and sorting out excess results because exact match was checked" )) final_results = [ item for item in final_results if search_term.lower() in item[4].lower() ] final_results.sort(key=itemgetter(4)) final_results.sort( key=lambda x: x[4].lower().index(search_term.lower())) final_results.sort( key=lambda x: x[4].lower() == search_term.lower(), reverse=True) lang_id = sickchill.indexer.lang_dict()[lang] return json.dumps({ "results": final_results, "langid": lang_id, "success": len(final_results) > 0 })
def output_message(self, message, message_hash): self.set_header("Content-Type", "text/plain") self.write(xhtml_unescape(message).replace("<br/>", "\n"))
def output_message(self, message, message_hash): self.set_header('Content-Type', 'text/plain') self.set_header('X-Message-Hash', message_hash) self.write(xhtml_unescape(message).replace('<br/>', '\n'))
def index(self, path="", includeFiles=False, fileTypes=""): self.set_header("Cache-Control", "max-age=0,no-cache,no-store") self.set_header("Content-Type", "application/json") return json.dumps(foldersAtPath(xhtml_unescape(path), True, bool(int(includeFiles)), fileTypes.split(",")))
from article.blog import Blog from article.about import About from article.comment import Comment from article.reference import Reference from article.tableform import Tableform from article.langcode import Langcode from tornado.escape import xhtml_unescape blogs_all = [Blog(each) for each in Blog.datatype.find()] comments_all = [Comment(each) for each in Comment.datatype.find()] about_all = About.find() ref_all = Reference.find() table_all = Tableform.find() code_all = Langcode.find() for each in blogs_all + comments_all + about_all: each.abstract = xhtml_unescape(each.abstract) each.body = xhtml_unescape(each.body) for each in ref_all: each.body = xhtml_unescape(each.body) for each in code_all: each.code = xhtml_unescape(each.code) for each in table_all: each.tableform = xhtml_unescape(each.tableform)
def convert_text_html(message): """Linkify URLs and turn newlines into <br/> for HTML""" html = xhtml_unescape(tornado_linkify(message)) return html.replace('\n', '<br/>')
def getBasicInfo(self): if self.info['noitem']: return self.info pq_obj = PyQuery(self.content.decode('GBK')) detail = pq_obj.find('div#detail') detail = re.search(r'宝贝类型:\s*(\S+)', detail.text().encode('utf-8'), re.S) self.info['itemType'] = detail.group(1) if detail else None if self.info['siteId'] == '2': self.info['charset'] = re.findall(REGX_B['charset'], self.content) self.info['keywords'] = re.findall(REGX_B['keywords'], self.content) self.info['desc'] = re.findall(REGX_B['desc'], self.content) self.info['itemId'] = re.findall(REGX_B['itemId'], self.content) self.info['pageId'] = re.findall(REGX_B['pageId'], self.content) self.info['shopId'] = re.findall(REGX_B['shopId'], self.content) self.info['userId'] = re.findall(REGX_B['userId'], self.content) self.info['shopName'] = re.findall(REGX_B['shopName'], self.content, re.S) self.info['shopUrl'] = re.findall(REGX_B['shopUrl'], self.content, re.S) self.info['itemImg'] = re.findall(REGX_B['itemImg'], self.content, re.S) self.info['itemTitle'] = re.findall(REGX_B['itemTitle'], self.content, re.S) self.info['initPrice'] = re.findall(REGX_B['initPrice'], self.content, re.S) self.info['bonus'] = re.findall(REGX_B['bonus'], self.content) self.info['totalSoldOut'] = re.findall(REGX_B['totalSoldOut'], self.content) self.info['attrList'] = re.findall(REGX_B['attrList'], self.content, re.S) self.info['starts'] = re.findall(REGX_B['starts'], self.content, re.S) self.info['ends'] = re.findall(REGX_B['ends'], self.content, re.S) self.info['userTag'] = re.findall(REGX_B['userTag'], self.content, re.S) self.info[r'cid'] = re.findall(REGX_B[r'cid'], self.content, re.S) self.info['location'] = re.findall(REGX_B['location'], self.content) self.info['brand'] = re.findall(REGX_B['brand'], self.content) for (k, v) in self.info.items(): if v: if isinstance(v, list) and len(v) > 0: self.info[k] = str(v[0]) self.info[k] = self.info[k].decode( self.res.encoding, 'ignore').encode('utf-8') if k == 'attrList': for t in re.findall(r'\<\!\-\-.+?\-\-\>', self.info[k]) + re.findall( r'\s+', self.info[k]): self.info[k] = self.info[k].replace(t, ' ') elif isinstance(v, list) and len(v) == 0: self.info[k] = None # else: # self.info[k] = self.info[k] or None else: self.info[k] = None if not self.info['itemTitle']: itemTitle = re.findall(r'title"\s*:\s*"(.+?)"', self.content) if itemTitle: self.info['itemTitle'] = itemTitle[0].decode( self.res.encoding, 'ignore').encode('utf-8') if not self.info['itemTitle']: itemTitle = re.search(r'<title>(.+?)</title>', self.content) self.info['itemTitle'] = itemTitle.group(1).decode( self.res.encoding, 'ignore').encode('utf-8').rstrip( '-tmall.com天猫') if itemTitle else '' if not self.info['initPrice'] or self.info['initPrice'] == '0': price = re.findall(r'"price"\s*:\s*"(\d+)"', self.content) if price: self.info['initPrice'] = '%s.%s' % (price[0][:-2], price[0][-2:]) if not self.info['initPrice'] or self.info['initPrice'] == '0': price = re.search( r'defaultItemPrice\'\s*\:\s*\'(\d+\.*\d*).+?\'', self.content, re.S) if price: self.info['initPrice'] = price.group(1) if not self.info['itemImg']: img = re.search(r'url\((\S+?item_pic\.jpg\S+?)\)', self.content, re.S) or re.search( r'J_UlThumb.+?url\((.+?)\)', self.content, re.S) or re.search( r'J_ImgBooth"\s+src="(.+?)"', self.content, re.S) if img: self.info['itemImg'] = img.group(1) if not self.info['cid']: cid = re.search(r'\'categoryId\'\s*\:\s*\'(\d+)\'', self.content, re.S) if cid: self.info['cid'] = cid.group(1) if not self.info['spuId']: spuId = re.search(r'\'spuId\'\s*\:\s*\'(\d+)\'', self.content, re.S) self.info['spuId'] = spuId.group(1) if spuId else None shopGoodRate = [ float(t.text) for t in pq_obj.find('em.count') if t ] if shopGoodRate: self.info['shopGoodRate'] = '%.1f' % (sum(shopGoodRate) / len(shopGoodRate)) else: self.info['charset'] = re.findall(REGX_B['charset'], self.content) self.info['keywords'] = re.findall(REGX_B['keywords'], self.content) self.info['desc'] = re.findall(REGX_B['desc'], self.content) self.info['itemId'] = re.findall(REGX_B['itemId'], self.content) self.info['pageId'] = re.findall(REGX_B['pageId'], self.content) self.info['shopId'] = re.findall(REGX_B['shopId'], self.content) self.info['userId'] = re.findall(REGX_B['userId'], self.content) self.info['shopName'] = re.findall(REGX_C['shopName'], self.content, re.S) self.info['shopUrl'] = re.findall(REGX_C['shopUrl'], self.content, re.S) self.info['itemImg'] = re.findall(REGX_B['itemImg'], self.content, re.S) self.info['itemTitle'] = re.findall(REGX_C['itemTitle'], self.content, re.S) self.info['initPrice'] = re.findall(REGX_C['initPrice'], self.content, re.S) self.info[r'totalSoldOut'] = re.findall(REGX_C[r'totalSoldOut'], self.content) self.info['attrList'] = re.findall(REGX_C['attrList'], self.content, re.S) self.info['starts'] = re.findall(REGX_B['starts'], self.content, re.S) self.info['ends'] = re.findall(REGX_B['ends'], self.content, re.S) self.info['userTag'] = re.findall(REGX_B['userTag'], self.content, re.S) self.info[r'cid'] = re.findall(REGX_C[r'cid'], self.content, re.S) self.info['location'] = re.findall(REGX_C['location'], self.content) self.info['gradeAvg'] = [ float(row) for row in re.findall( r'\<em\sclass="count".+?\>(\d+\.*\d*)\<\/em\>', self.content, re.S) ] self.info['gradeAvg'] = [ sum(self.info['gradeAvg']) / len(self.info['gradeAvg']) ] if self.info['gradeAvg'] else None shopRank = pq_obj.find('a#shop-rank img') self.info['shopRank'] = re.sub( r'.+?(s\_\w+\_\d)\.gif', r'\1', shopRank.attr['src']) if shopRank else None self.info['shopGoodRate'] = pq_obj.find( 'em#J_PositiveRating').text() for (k, v) in self.info.items(): if v: if isinstance(v, list) and len(v) > 0: self.info[k] = v[0] self.info[k] = self.info[k].decode( self.res.encoding, 'ignore' ).encode('utf-8') \ if isinstance(self.info[k], (str, unicode))\ else self.info[k] if k == 'attrList': for t in re.findall(r'\<\!\-\-.+?\-\-\>', self.info[k]) + re.findall( r'\s+', self.info[k]): self.info[k] = self.info[k].replace(t, ' ') elif isinstance(v, list) and len(v) == 0: self.info[k] = None else: self.info[k] = None if self.info['location']: self.info['location'] = unquote(self.info['location']).decode( self.res.encoding, 'ignore').encode('utf-8') if not self.info['itemImg']: img = re.search(r'id="J_ImgBooth" data-src="(.+?)"', self.content, re.S) if img: self.info['itemImg'] = img.group(1) self.info['shopName'] = eval( "'%s'" % (self.info['shopName'] or '').replace('%', '\\x')) created = re.search(r'dbst\s*:\s*(\d+)', self.content, re.S) self.info['created'] = created.group(1) if created else None self.info['attrList'] = (xhtml_unescape( self.info['attrList']).encode('utf-8') if self.info['attrList'] else None) self.info['attrList'] = re.sub(r'\s+', r' ', self.info['attrList'] or '') self.info['attrs'] = re.findall( r'<li.+?>(.+?)[::]\s*(.*?)</li>'.decode('utf-8'), (self.info['attrList'] or '').decode('utf-8'), re.S) self.info['attrs'] = [[ t[0].strip().encode('utf-8'), t[1].strip('\t\r ').encode('utf-8') ] for t in self.info['attrs']] self.info['offSale'] = True \ if self.content.decode(self.res.encoding).encode('utf-8').find('已下架') > -1 \ else False self.info['location'] = self.info['location'] or None self.deal_taobao_meal_basic() return self.info
def render(self, text, hl, **kwargs): for wd in hl: text = re.sub(r'\b({})\b'.format(wd), '<span style="color:red">{}</span>'.format(wd), text) return escape.xhtml_unescape(escape.linkify(text, **kwargs))
def addExistingShows(self, shows_to_add, promptForSettings, **kwargs): """ Receives a dir list and add them. Adds the ones with given TVDB IDs first, then forwards along to the newShow page. """ # grab a list of other shows to add, if provided if not shows_to_add: shows_to_add = [] elif not isinstance(shows_to_add, list): shows_to_add = [shows_to_add] shows_to_add = [unquote_plus(xhtml_unescape(x)) for x in shows_to_add] indexer_id_given = [] dirs_only = [] # separate all the ones with Indexer IDs for cur_dir in shows_to_add: if "|" in cur_dir: split_vals = cur_dir.split("|") if len(split_vals) < 3: dirs_only.append(cur_dir) if "|" not in cur_dir: dirs_only.append(cur_dir) else: indexer, show_dir, indexer_id, show_name = self.split_extra_show( cur_dir) if not show_dir or not indexer_id or not show_name: continue indexer_id_given.append( (int(indexer), show_dir, int(indexer_id), show_name)) # if they want me to prompt for settings then I will just carry on to the newShow page if shows_to_add and config.checkbox_to_value(promptForSettings): return self.newShow(shows_to_add[0], shows_to_add[1:]) # if they don't want me to prompt for settings then I can just add all the nfo shows now num_added = 0 for cur_show in indexer_id_given: indexer, show_dir, indexer_id, show_name = cur_show if indexer is not None and indexer_id is not None: # add the show settings.showQueueScheduler.action.add_show( indexer, indexer_id, show_dir, default_status=settings.STATUS_DEFAULT, quality=settings.QUALITY_DEFAULT, season_folders=settings.SEASON_FOLDERS_DEFAULT, subtitles=settings.SUBTITLES_DEFAULT, anime=settings.ANIME_DEFAULT, scene=settings.SCENE_DEFAULT, default_status_after=settings.STATUS_DEFAULT_AFTER, ) num_added += 1 if num_added: ui.notifications.message( _("Shows Added"), _("Automatically added {num_shows} from their existing metadata files" ).format(num_shows=str(num_added))) # if we're done then go home if not dirs_only: return self.redirect("/home/") # for the remaining shows we need to prompt for each one, so forward this on to the newShow page return self.newShow(dirs_only[0], dirs_only[1:])
def getBasicInfo(self): if self.info['siteId'] == '2': self.info['charset'] = re.findall(REGX_B['charset'], self.content) self.info['keywords'] = re.findall( REGX_B['keywords'], self.content) self.info['desc'] = re.findall(REGX_B['desc'], self.content) self.info['itemId'] = re.findall(REGX_B['itemId'], self.content) self.info['pageId'] = re.findall(REGX_B['pageId'], self.content) self.info['shopId'] = re.findall(REGX_B['shopId'], self.content) self.info['userId'] = re.findall(REGX_B['userId'], self.content) self.info['shopName'] = re.findall(REGX_B['shopName'], self.content, re.S) self.info['shopUrl'] = re.findall(REGX_B['shopUrl'], self.content, re.S) self.info['itemImg'] = re.findall(REGX_B['itemImg'], self.content, re.S) self.info['itemTitle'] = re.findall(REGX_B['itemTitle'], self.content, re.S) self.info['initPrice'] = re.findall(REGX_B['initPrice'], self.content, re.S) self.info['bonus'] = re.findall(REGX_B['bonus'], self.content) self.info['totalSoldOut'] = re.findall(REGX_B['totalSoldOut'], self.content) self.info['attrList'] = re.findall(REGX_B['attrList'], self.content, re.S) self.info['starts'] = re.findall(REGX_B['starts'], self.content, re.S) self.info['ends'] = re.findall(REGX_B['ends'], self.content, re.S) self.info['userTag'] = re.findall(REGX_B['userTag'], self.content, re.S) self.info[r'cid'] = re.findall(REGX_B[r'cid'], self.content, re.S) self.info['location'] = re.findall( REGX_B['location'], self.content) self.info['brand'] = re.findall(REGX_B['brand'], self.content) for (k, v) in self.info.items(): if v: if len(v) > 0: self.info[k] = v[0] self.info[k] = self.info[k].decode(self.res.encoding, 'ignore').encode('utf-8') if k == 'attrList': for t in re.findall(r'\<\!\-\-.+?\-\-\>', self.info[k]) + re.findall(r'\s+', self.info[k]): self.info[k] = self.info[k].replace(t, ' ') else: self.info[k] = None else: self.info[k] = None if not self.info['itemTitle']: itemTitle = re.findall(r'title"\s*:\s*"(.+?)"', self.content) if itemTitle: self.info['itemTitle'] = itemTitle[0].decode( self.res.encoding, 'ignore').encode('utf-8') if not self.info['initPrice'] or self.info['initPrice'] == '0': price = re.findall(r'"price"\s*:\s*"(\d+)"', self.content) if price: self.info['initPrice'] = '%s.%s' % ( price[0][:-2], price[0][-2:]) if not self.info['initPrice'] or self.info['initPrice'] == '0': price = re.search( r'defaultItemPrice\'\s*\:\s*\'(\d+\.*\d*).+?\'', self.content, re.S) if price: self.info['initPrice'] = price.group(1) if not self.info['itemImg']: img = re.search(r'url\((\S+?item_pic\.jpg\S+?)\)', self.content, re.S) or re.search( r'J_UlThumb.+?url\((.+?)\)', self.content, re.S) if img: self.info['itemImg'] = img.group(1) if not self.info['cid']: cid = re.search( r'\'categoryId\'\s*\:\s*\'(\d+)\'', self.content, re.S) if cid: self.info['cid'] = cid.group(1) if not self.info['spuId']: spuId = re.search( r'\'spuId\'\s*\:\s*\'(\d+)\'', self.content, re.S) self.info['spuId'] = spuId.group(1) if spuId else None else: self.info['charset'] = re.findall(REGX_B['charset'], self.content) self.info['keywords'] = re.findall( REGX_B['keywords'], self.content) self.info['desc'] = re.findall(REGX_B['desc'], self.content) self.info['itemId'] = re.findall(REGX_B['itemId'], self.content) self.info['pageId'] = re.findall(REGX_B['pageId'], self.content) self.info['shopId'] = re.findall(REGX_B['shopId'], self.content) self.info['userId'] = re.findall(REGX_B['userId'], self.content) self.info['shopName'] = re.findall(REGX_C['shopName'], self.content, re.S) self.info['shopUrl'] = re.findall(REGX_C['shopUrl'], self.content, re.S) self.info['itemImg'] = re.findall(REGX_B['itemImg'], self.content, re.S) self.info['itemTitle'] = re.findall(REGX_C['itemTitle'], self.content, re.S) self.info['initPrice'] = re.findall(REGX_C['initPrice'], self.content, re.S) self.info[r'totalSoldOut'] = re.findall(REGX_C[r'totalSoldOut'], self.content) self.info['attrList'] = re.findall(REGX_C['attrList'], self.content, re.S) self.info['starts'] = re.findall(REGX_B['starts'], self.content, re.S) self.info['ends'] = re.findall(REGX_B['ends'], self.content, re.S) self.info['userTag'] = re.findall(REGX_B['userTag'], self.content, re.S) self.info[r'cid'] = re.findall(REGX_C[r'cid'], self.content, re.S) self.info['location'] = re.findall( REGX_C['location'], self.content) for (k, v) in self.info.items(): if v: if len(v) > 0: self.info[k] = v[0] self.info[k] = self.info[k].decode(self.res.encoding, 'ignore').encode('utf-8') if k == 'attrList': for t in re.findall( r'\<\!\-\-.+?\-\-\>', self.info[k]) + re.findall( r'\s+', self.info[k]): self.info[k] = self.info[k].replace(t, ' ') else: self.info[k] = None else: self.info[k] = None if self.info['location']: self.info['location'] = unquote( self.info['location'] ).decode(self.res.encoding, 'ignore').encode('utf-8') self.info['attrList'] = (xhtml_unescape( self.info['attrList']).encode('utf-8') if self.info['attrList'] else None) return self.info
def output_message(self, message, message_hash): self.set_header('Content-Type', 'text/plain') self.write(xhtml_unescape(message).replace('<br/>', '\n'))
def make_mobi(user, feeds, data_dir, kindle_format='book', mobi_templates=None, **other_services): """docstring for make_mobi""" is_updated = False for feed in feeds: if len(feed.items) > 0: is_updated = True if not is_updated: logging.info("no feed update.") return None if kindle_format not in ['book', 'periodical']: kindle_format = 'book' logging.info("generate .mobi file start... ") if not mobi_templates: from kindletemplate import TEMPLATES mobi_templates = TEMPLATES for tpl in mobi_templates: if tpl is 'book.html': continue t = template.Template(mobi_templates[tpl]) content = t.generate( user=user, feeds=feeds, uuid=uuid.uuid1(), format=kindle_format, **other_services ) fp = open(os.path.join(data_dir, tpl), 'wb') content = content.decode('utf-8', 'ignore').encode('utf-8') fp.write(escape.xhtml_unescape(content)) # fp.write(content) fp.close() pre_mobi_file = "TheOldReader_%s" % time.strftime('%m-%dT%Hh%Mm') opf_file = os.path.join(data_dir, "content.opf") os.environ["PATH"] = os.environ["PATH"] + ":./" subprocess.call('%s %s -o "%s" > log.txt' % (Kindle.kindle_gen_prog, opf_file, pre_mobi_file), shell=True) pre_mobi_file = os.path.join(data_dir, pre_mobi_file) mobi_file = pre_mobi_file+".mobi" status = subprocess.call( 'kindlestrip.py "%s" "%s" >> log.txt' % (pre_mobi_file, mobi_file), shell=True) if 0 != status: import shutil shutil.move(pre_mobi_file, mobi_file) if os.path.isfile(mobi_file) is False: logging.error("failed!") return None else: fsize = os.path.getsize(mobi_file) logging.info(".mobi save as: %s(%.2fMB)" % (mobi_file, float(fsize)/(1024*1024))) return mobi_file
def addNewShow( self, whichSeries=None, indexerLang=None, rootDir=None, defaultStatus=None, quality_preset=None, anyQualities=None, bestQualities=None, season_folders=None, subtitles=None, subtitles_sr_metadata=None, fullShowPath=None, other_shows=None, skipShow=None, providedIndexer=None, anime=None, scene=None, blacklist=None, whitelist=None, defaultStatusAfter=None, ): """ Receive tvdb id, dir, and other options and create a show from them. If extra show dirs are provided then it forwards back to newShow, if not it goes to /home. """ if not indexerLang: indexerLang = settings.INDEXER_DEFAULT_LANGUAGE # grab our list of other dirs if given if not other_shows: other_shows = [] elif not isinstance(other_shows, list): other_shows = [other_shows] def finishAddShow(): # if there are no extra shows then go home if not other_shows: return self.redirect("/home/") # peel off the next one next_show_dir = other_shows[0] rest_of_show_dirs = other_shows[1:] # go to add the next show return self.newShow(next_show_dir, rest_of_show_dirs) # if we're skipping then behave accordingly if skipShow: return finishAddShow() # sanity check on our inputs if (not rootDir and not fullShowPath) or not whichSeries: return _( "Missing params, no Indexer ID or folder: {show_to_add} and {root_dir}/{show_path}" ).format(show_to_add=whichSeries, root_dir=rootDir, show_path=fullShowPath) # figure out what show we're adding and where series_pieces = whichSeries.split("|") if (whichSeries and rootDir) or (whichSeries and fullShowPath and len(series_pieces) > 1): if len(series_pieces) < 6: logger.error( "Unable to add show due to show selection. Not enough arguments: {0}" .format((repr(series_pieces)))) ui.notifications.error( _("Unknown error. Unable to add show due to problem with show selection." )) return self.redirect("/addShows/existingShows/") indexer = int(series_pieces[1]) indexer_id = int(series_pieces[3]) # Show name was sent in UTF-8 in the form show_name = xhtml_unescape(series_pieces[4]) else: # if no indexer was provided use the default indexer set in General settings if not providedIndexer: providedIndexer = settings.INDEXER_DEFAULT indexer = int(providedIndexer) indexer_id = int(whichSeries) show_name = os.path.basename( os.path.normpath(xhtml_unescape(fullShowPath))) # use the whole path if it's given, or else append the show name to the root dir to get the full show path if fullShowPath: show_dir = os.path.normpath(xhtml_unescape(fullShowPath)) extra_check_dir = show_dir else: folder_name = show_name s = sickchill.indexer.series_by_id(indexerid=indexer_id, indexer=indexer, language=indexerLang) if settings.ADD_SHOWS_WITH_YEAR and s.firstAired: try: year = "({0})".format( dateutil.parser.parse(s.firstAired).year) if year not in folder_name: folder_name = "{0} {1}".format(s.seriesName, year) except (TypeError, ValueError): logger.info( _("Could not append the show year folder for the show: {0}" ).format(folder_name)) show_dir = os.path.join( rootDir, sanitize_filename(xhtml_unescape(folder_name))) extra_check_dir = os.path.join( rootDir, sanitize_filename(xhtml_unescape(show_name))) # blanket policy - if the dir exists you should have used "add existing show" numbnuts if (os.path.isdir(show_dir) or os.path.isdir(extra_check_dir)) and not fullShowPath: ui.notifications.error( _("Unable to add show"), _("Folder {show_dir} exists already").format( show_dir=show_dir)) return self.redirect("/addShows/existingShows/") # don't create show dir if config says not to if settings.ADD_SHOWS_WO_DIR: logger.info("Skipping initial creation of " + show_dir + " due to config.ini setting") else: dir_exists = helpers.makeDir(show_dir) if not dir_exists: logger.exception("Unable to create the folder " + show_dir + ", can't add the show") ui.notifications.error( _("Unable to add show"), _("Unable to create the folder {show_dir}, can't add the show" ).format(show_dir=show_dir)) # Don't redirect to default page because user wants to see the new show return self.redirect("/home/") else: helpers.chmodAsParent(show_dir) # prepare the inputs for passing along scene = config.checkbox_to_value(scene) anime = config.checkbox_to_value(anime) season_folders = config.checkbox_to_value(season_folders) subtitles = config.checkbox_to_value(subtitles) subtitles_sr_metadata = config.checkbox_to_value(subtitles_sr_metadata) if whitelist: whitelist = short_group_names(whitelist) if blacklist: blacklist = short_group_names(blacklist) if not anyQualities: anyQualities = [] if not bestQualities or try_int(quality_preset, None): bestQualities = [] if not isinstance(anyQualities, list): anyQualities = [anyQualities] if not isinstance(bestQualities, list): bestQualities = [bestQualities] newQuality = Quality.combineQualities([int(q) for q in anyQualities], [int(q) for q in bestQualities]) # add the show settings.showQueueScheduler.action.add_show( indexer, indexer_id, showDir=show_dir, default_status=int(defaultStatus), quality=newQuality, season_folders=season_folders, lang=indexerLang, subtitles=subtitles, subtitles_sr_metadata=subtitles_sr_metadata, anime=anime, scene=scene, paused=None, blacklist=blacklist, whitelist=whitelist, default_status_after=int(defaultStatusAfter), root_dir=rootDir, ) ui.notifications.message( _("Show added"), _("Adding the specified show into {show_dir}").format( show_dir=show_dir)) return finishAddShow()
def get_current_user(self): user = self.get_secure_cookie('user') return escape.xhtml_unescape(user) if user else None
def massAddTable(self, rootDir=None): t = PageTemplate(rh=self, filename="home_massAddTable.mako") if not rootDir: return _("No folders selected.") elif not isinstance(rootDir, list): root_dirs = [rootDir] else: root_dirs = rootDir root_dirs = [unquote_plus(xhtml_unescape(x)) for x in root_dirs] if settings.ROOT_DIRS: default_index = int(settings.ROOT_DIRS.split("|")[0]) else: default_index = 0 if len(root_dirs) > default_index: tmp = root_dirs[default_index] if tmp in root_dirs: root_dirs.remove(tmp) root_dirs.insert(0, tmp) dir_list = [] main_db_con = db.DBConnection() for root_dir in root_dirs: # noinspection PyBroadException try: file_list = os.listdir(root_dir) except Exception: continue for cur_file in file_list: # noinspection PyBroadException try: cur_path = os.path.normpath( os.path.join(root_dir, cur_file)) if not os.path.isdir(cur_path): continue # ignore Synology folders if cur_file.lower() in ["#recycle", "@eadir"]: continue except Exception: continue cur_dir = { "dir": cur_path, "existing_info": (None, None, None), "display_dir": "<b>" + os.path.dirname(cur_path) + os.sep + "</b>" + os.path.basename(cur_path), } # see if the folder is in KODI already dirResults = main_db_con.select( "SELECT indexer_id FROM tv_shows WHERE location = ? LIMIT 1", [cur_path]) if dirResults: cur_dir["added_already"] = True else: cur_dir["added_already"] = False dir_list.append(cur_dir) indexer_id = show_name = indexer = None for cur_provider in settings.metadata_provider_dict.values(): if not (indexer_id and show_name): (indexer_id, show_name, indexer) = cur_provider.retrieveShowMetadata(cur_path) if all((indexer_id, show_name, indexer)): break if all((indexer_id, show_name, indexer)): cur_dir["existing_info"] = (indexer_id, show_name, indexer) if indexer_id and Show.find(settings.showList, indexer_id): cur_dir["added_already"] = True return t.render(dirList=dir_list)
def getBasicInfo(self): if self.info['noitem']: return self.info pq_obj = PyQuery(self.content.decode('GBK')) detail = pq_obj.find('div#detail') detail = re.search( r'宝贝类型:\s*(\S+)', detail.text().encode('utf-8'), re.S) self.info['itemType'] = detail.group(1) if detail else None if self.info['siteId'] == '2': self.info['charset'] = re.findall(REGX_B['charset'], self.content) self.info['keywords'] = re.findall( REGX_B['keywords'], self.content) self.info['desc'] = re.findall(REGX_B['desc'], self.content) self.info['itemId'] = re.findall(REGX_B['itemId'], self.content) self.info['pageId'] = re.findall(REGX_B['pageId'], self.content) self.info['shopId'] = re.findall(REGX_B['shopId'], self.content) self.info['userId'] = re.findall(REGX_B['userId'], self.content) self.info['shopName'] = re.findall(REGX_B['shopName'], self.content, re.S) self.info['shopUrl'] = re.findall(REGX_B['shopUrl'], self.content, re.S) self.info['itemImg'] = re.findall(REGX_B['itemImg'], self.content, re.S) self.info['itemTitle'] = re.findall(REGX_B['itemTitle'], self.content, re.S) self.info['initPrice'] = re.findall(REGX_B['initPrice'], self.content, re.S) self.info['bonus'] = re.findall(REGX_B['bonus'], self.content) self.info['totalSoldOut'] = re.findall(REGX_B['totalSoldOut'], self.content) self.info['attrList'] = re.findall(REGX_B['attrList'], self.content, re.S) self.info['starts'] = re.findall(REGX_B['starts'], self.content, re.S) self.info['ends'] = re.findall(REGX_B['ends'], self.content, re.S) self.info['userTag'] = re.findall(REGX_B['userTag'], self.content, re.S) self.info[r'cid'] = re.findall(REGX_B[r'cid'], self.content, re.S) self.info['location'] = re.findall( REGX_B['location'], self.content) self.info['brand'] = re.findall(REGX_B['brand'], self.content) for (k, v) in self.info.items(): if v: if isinstance(v, list) and len(v) > 0: self.info[k] = str(v[0]) self.info[k] = self.info[k].decode(self.res.encoding, 'ignore').encode('utf-8') if k == 'attrList': for t in re.findall( r'\<\!\-\-.+?\-\-\>', self.info[k]) + re.findall( r'\s+', self.info[k]): self.info[k] = self.info[k].replace(t, ' ') elif isinstance(v, list) and len(v) == 0: self.info[k] = None # else: # self.info[k] = self.info[k] or None else: self.info[k] = None if not self.info['itemTitle']: itemTitle = re.findall(r'title"\s*:\s*"(.+?)"', self.content) if itemTitle: self.info['itemTitle'] = itemTitle[0].decode( self.res.encoding, 'ignore').encode('utf-8') if not self.info['itemTitle']: itemTitle = re.search(r'<title>(.+?)</title>', self.content) self.info['itemTitle'] = itemTitle.group(1).decode( self.res.encoding, 'ignore' ).encode('utf-8').rstrip('-tmall.com天猫') if itemTitle else '' if not self.info['initPrice'] or self.info['initPrice'] == '0': price = re.findall(r'"price"\s*:\s*"(\d+)"', self.content) if price: self.info['initPrice'] = '%s.%s' % ( price[0][:-2], price[0][-2:]) if not self.info['initPrice'] or self.info['initPrice'] == '0': price = re.search( r'defaultItemPrice\'\s*\:\s*\'(\d+\.*\d*).+?\'', self.content, re.S) if price: self.info['initPrice'] = price.group(1) if not self.info['itemImg']: img = re.search(r'url\((\S+?item_pic\.jpg\S+?)\)', self.content, re.S) or re.search( r'J_UlThumb.+?url\((.+?)\)', self.content, re.S) or re.search( r'J_ImgBooth"\s+src="(.+?)"', self.content, re.S) if img: self.info['itemImg'] = img.group(1) if not self.info['cid']: cid = re.search( r'\'categoryId\'\s*\:\s*\'(\d+)\'', self.content, re.S) if cid: self.info['cid'] = cid.group(1) if not self.info['spuId']: spuId = re.search( r'\'spuId\'\s*\:\s*\'(\d+)\'', self.content, re.S) self.info['spuId'] = spuId.group(1) if spuId else None shopGoodRate = [float(t.text) for t in pq_obj.find('em.count') if t] if shopGoodRate: self.info['shopGoodRate'] = '%.1f' % ( sum(shopGoodRate) / len(shopGoodRate)) else: self.info['charset'] = re.findall(REGX_B['charset'], self.content) self.info['keywords'] = re.findall( REGX_B['keywords'], self.content) self.info['desc'] = re.findall(REGX_B['desc'], self.content) self.info['itemId'] = re.findall(REGX_B['itemId'], self.content) self.info['pageId'] = re.findall(REGX_B['pageId'], self.content) self.info['shopId'] = re.findall(REGX_B['shopId'], self.content) self.info['userId'] = re.findall(REGX_B['userId'], self.content) self.info['shopName'] = re.findall(REGX_C['shopName'], self.content, re.S) self.info['shopUrl'] = re.findall(REGX_C['shopUrl'], self.content, re.S) self.info['itemImg'] = re.findall(REGX_B['itemImg'], self.content, re.S) self.info['itemTitle'] = re.findall(REGX_C['itemTitle'], self.content, re.S) self.info['initPrice'] = re.findall(REGX_C['initPrice'], self.content, re.S) self.info[r'totalSoldOut'] = re.findall(REGX_C[r'totalSoldOut'], self.content) self.info['attrList'] = re.findall(REGX_C['attrList'], self.content, re.S) self.info['starts'] = re.findall(REGX_B['starts'], self.content, re.S) self.info['ends'] = re.findall(REGX_B['ends'], self.content, re.S) self.info['userTag'] = re.findall(REGX_B['userTag'], self.content, re.S) self.info[r'cid'] = re.findall(REGX_C[r'cid'], self.content, re.S) self.info['location'] = re.findall( REGX_C['location'], self.content) self.info['gradeAvg'] = [ float(row) for row in re.findall( r'\<em\sclass="count".+?\>(\d+\.*\d*)\<\/em\>', self.content, re.S) ] self.info['gradeAvg'] = [sum( self.info['gradeAvg']) / len(self.info['gradeAvg'])] if self.info['gradeAvg'] else None shopRank = pq_obj.find('a#shop-rank img') self.info['shopRank'] = re.sub( r'.+?(s\_\w+\_\d)\.gif', r'\1', shopRank.attr['src']) if shopRank else None self.info['shopGoodRate'] = pq_obj.find( 'em#J_PositiveRating').text() for (k, v) in self.info.items(): if v: if isinstance(v, list) and len(v) > 0: self.info[k] = v[0] self.info[k] = self.info[k].decode( self.res.encoding, 'ignore' ).encode('utf-8') \ if isinstance(self.info[k], (str, unicode))\ else self.info[k] if k == 'attrList': for t in re.findall( r'\<\!\-\-.+?\-\-\>', self.info[k]) + re.findall( r'\s+', self.info[k]): self.info[k] = self.info[k].replace(t, ' ') elif isinstance(v, list) and len(v) == 0: self.info[k] = None else: self.info[k] = None if self.info['location']: self.info['location'] = unquote( self.info['location'] ).decode(self.res.encoding, 'ignore').encode('utf-8') if not self.info['itemImg']: img = re.search( r'id="J_ImgBooth" data-src="(.+?)"', self.content, re.S) if img: self.info['itemImg'] = img.group(1) self.info['shopName'] = eval( "'%s'" % (self.info['shopName'] or '').replace('%', '\\x')) created = re.search(r'dbst\s*:\s*(\d+)', self.content, re.S) self.info['created'] = created.group(1) if created else None self.info['attrList'] = (xhtml_unescape( self.info['attrList']).encode('utf-8') if self.info['attrList'] else None) self.info['attrList'] = re.sub( r'\s+', r' ', self.info['attrList'] or '') self.info['attrs'] = re.findall( r'<li.+?>(.+?)[::]\s*(.*?)</li>'.decode('utf-8'), (self.info['attrList'] or '').decode('utf-8'), re.S) self.info['attrs'] = [[t[0].strip().encode('utf-8'), t[1].strip('\t\r ').encode('utf-8')] for t in self.info['attrs']] self.info['offSale'] = True \ if self.content.decode(self.res.encoding).encode('utf-8').find('已下架') > -1 \ else False self.info['location'] = self.info['location'] or None self.deal_taobao_meal_basic() return self.info
def escape(raw): '''Html escape.''' return xhtml_escape(xhtml_unescape(raw))