def _descr_html2ubb(string: str) -> str: """ Build-in function to make a string from html to bbcode :param string: str :return: str """ return str(HTML2BBCode().feed(string))
def format(cls, fanfic: Story) -> None: new_line_regex = re.compile("\n") for chapter in fanfic.chapters: story_text = chapter.processed_body # remove new lines due to pretty print story_text = new_line_regex.sub('', story_text) parser = HTML2BBCode() chapter.processed_body = parser.feed(story_text)
def forwards(self, orm): "Write your forwards methods here." # Note: Don't use "from appname.models import ModelName". # Use orm.ModelName to refer to models in this application, # and orm['appname.ModelName'] for models in other applications. for comment in orm['offers.Comment'].objects.all(): parser = HTML2BBCode() comment.bbcode_content = unicode(parser.feed(comment.content)) comment.save()
def parse_content(forum_url, sleep_time, autoposting): bbcode_parser = HTML2BBCode() base_html = get_html(forum_url, sleep_time) base_url = re.search('.+\.[a-zA-Z]+/', forum_url)[0] base_folder = re.search('://(.+)/', base_url)[0][3:-1] if autoposting == False: try: os.mkdir(base_folder) print(f'Folder with name {base_folder} successfully created') except FileExistsError: print(f'Folder with name "{base_folder}" already exists') bs = BeautifulSoup(base_html, 'html5lib') forum_structItemContainer = bs.find(class_='structItemContainer') threads = forum_structItemContainer.find_all( 'a', {'data-xf-init': 'preview-tooltip'}) threads_links = [] for thread in threads: threads_links.append(base_url + thread.get('href')) for thread_link in threads_links: thread_tags = [] thread_html = get_html(thread_link) thread = BeautifulSoup(thread_html, 'html5lib') thread_title = thread.find('h1', {'class': 'p-title-value'}).text print('Parsing:' + thread_title) thread_tagList = thread.find('span', {'class': 'js-tagList'}) if thread_tagList is not None: for tag_text in thread_tagList.find_all('a'): thread_tags.append(tag_text.text) thread_creator = thread.find('h4', {'class': 'message-name'}).text thread_content = thread.find('div', {'class': 'bbWrapper'}) thread_content = str(bbcode_parser.feed(str(thread_content)[23:-6])) if API_SETTINGS is not None and autoposting == True: api_create_thread(thread_title, thread_content, thread_tags) else: try: thread_file = thread_link.replace(base_url, "").replace( "/", "").replace("threads", "") file = codecs.open(base_folder + '/' + thread_file + '.txt', 'w+', 'utf-8') content = f'Thread title:\n{thread_title}\n\n' \ f'Thread tags:\n{thread_tags}\n\n' \ f'Thread creator:\n{thread_creator}\n\n' \ f'Thread text:\n{thread_title}' file.write(content) file.close() print(f'Threade {thread_file} successfully saved\n') except Exception as exc: print(f'An error occurred while saving the theme: {exc}')
def html2ubb(html: str) -> str: return str(HTML2BBCode().feed(html))
def html2ubb(html: str) -> str: ret = str(HTML2BBCode().feed(html)) ret = re.sub("\n\n", "\n", ret) return ret
def export(self): path_documentation = Path( BASE_DIR) / '..' / 'docs' / 'releases' / '{}.txt'.format( self.version) with path_documentation.open(mode='r') as file_object: content = [] content.append('New version of Mayan EDMS available\n') content.append('===================================\n\n\n\n\n\n') content.append( 'Please read the release notes before upgrading: ' 'https://docs.mayan-edms.com/releases/{}.html\n\n'.format( self.version)) content.append('Package locations\n') content.append('=================\n\n\n\n\n\n\n\n') content.append('Docker image available at: ' 'https://hub.docker.com/r/mayanedms/mayanedms\n\n') content.append('Python packages available at: ' 'https://pypi.org/project/mayan-edms/{}/ and ' 'installed via:\n\n'.format(self.version)) content.append('``pip install mayan-edms=={}``\n\n'.format( self.version)) file_object.readline() file_object.readline() file_object.readline() file_object.readline() for line in file_object: if ':gitlab-issue:' in line: line_parts = line.split('`') result = ( '- `GitLab issue #{} ' '<https://gitlab.com/mayan-edms/mayan-edms/issues/{}>`_ {}' .format(line_parts[1], line_parts[1], line_parts[2])) content.append(result) else: content.append(line) parts = core.publish_parts(source=''.join(content), writer_name='html') html_fragment = '{}{}'.format(parts['body_pre_docinfo'], parts['fragment']) result = ReleaseNoteExporter.filter_elements( tree=html.fromstring(html_fragment)) result = result[1:] html_output = str(b''.join(result)) html_replace_list = ( ('<tt', '<code'), ('</tt>', '</code>'), ) for html_replace_item in html_replace_list: html_output = html_output.replace(*html_replace_item) parser = HTML2BBCode() result = str(parser.feed(html_output)) bbcode_replace_list = ( ('[h1]', '\n[size=150]'), ('[/h1]', '[/size]\n'), ('[h2]', '\n[size=150]'), ('[/h2]', '[/size]\n'), ('[h3]', '\n[b]'), ('[/h3]', '[/b]\n'), ('[li]', '\n[*]'), ('[/li]', ''), ('[code]', '[b][i]'), ('[/code]', '[/i][/b]'), ) for bbcode_replace_item in bbcode_replace_list: result = result.replace(*bbcode_replace_item) return result
def to_bbcode(descr): parser = HTML2BBCode() bbcode = parser.feed(descr) return bbcode
import json import logging as log import os import pickle import shutil import subprocess import sys from fuzzywuzzy import fuzz from html2bbcode.parser import HTML2BBCode from settings import (ANNOUNCE, FUZZ_RATIO, PASSWORD, USERNAME, WM2_MEDIA, WM2_ROOT, WORKING_ROOT) from whatapi import WhatAPI, ext_matcher, locate from wmapi import artistInfo, releaseInfo, torrentGroup html_to_bbcode = HTML2BBCode() VERSION = "0.7b" gazelle_url = 'https://passtheheadphones.me/' resumeList = set([]) potential_uploads = 0 headers = { 'Connection': 'keep-alive', 'Cache-Control': 'max-age=0', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3)'\ 'AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.79'\ 'Safari/535.11', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9'\ ',*/*;q=0.8', 'Accept-Encoding': 'gzip,deflate,sdch', 'Accept-Language': 'en-US,en;q=0.8',
def leech_one_entry (entry_url): html = get_html (entry_url) soup = BeautifulSoup(html) class_entry_name = 'div.'+'entry-content' category = soup.select('section.entry-category')[0].find('a').string category = u'{0}'.format(category) print category print 'type(category)',type(category) quotes_wrappers = soup.select(class_entry_name)[0] noidung = u'{0}'.format(quotes_wrappers) print 'type of entry-title' ,type(quotes_wrappers) titlea = quotes_wrappers.find('h1', attrs={'class': 'entry-title'}).string print 'type of titlea',type(titlea) titlea = u'{0}'.format(titlea) #titlea = titlea.replace(u'–', u'-') print 'title' , titlea print '#########' parser = HTML2BBCode() bbcode = parser.feed(noidung) bbcode = bbcode.encode('utf-8') p = re.compile( '\s*Posted on.*?(?P<caiquaigi>\[img\])',re.DOTALL) bbcode = p.sub( '\g<caiquaigi>', bbcode) prefix_links = ['http://rapidgator.net/file/','http://uploaded.net/file/','http://www.uploadable.ch/file/'] stuff = map(lambda w: bbcode.find(w) , prefix_links) print stuff min_index_bbcode_host = min(i for i in stuff if i > 0) print 'min_index_bbcode_host',min_index_bbcode_host code_part = bbcode[min_index_bbcode_host:] n_last_index = bbcode.rfind('\n',min_index_bbcode_host-10,min_index_bbcode_host) description = bbcode[:n_last_index] description = description.replace('[b] [/b]',u'').replace(u'[/img][/url]',u'[/img]') print 'description',description print '## Find links' prefix_links = {'rg':'http://rapidgator.net/file/','ul':'http://uploaded.net/file/','up':'http://www.uploadable.ch/file/'} links_dict ={} for key,prefix_link in prefix_links.iteritems(): links = re.findall('('+prefix_link+'.*?)[\[\]\n\r]', code_part, re.DOTALL) links = unique_list(links) linktxt = '' for link in links: linktxt = linktxt + urllib.unquote(link).decode('utf-8') +'\n' print linktxt links_dict[key] = linktxt print links_dict new_instance = Ulnew.objects.get_or_create ( title= titlea )[0] new_instance.category = category new_instance.description = description new_instance.rg= links_dict['rg'] new_instance.ul = links_dict['ul'] new_instance.up = links_dict['up'] new_instance.save() print 'ok'
def leech_one_entry_freedl2u(ahref_title): print 'ahref_title', ahref_title entry_url = ahref_title[0] title = ahref_title[1] if Ulnew.objects.filter(title=title).exists(): print 'entry nay da ton tai %s' % title return '' html = get_html(entry_url) #print 'full entry html',html soup = BeautifulSoup(html) title = u'{0}'.format(soup.find(id="news-title").string) print 'title', title category = soup.find_all('span', attrs={'itemprop': 'title'})[-1].string print 'category', category print 'type of category', type(category) ''' dlecontent = soup.find(id="dle-content") print dlecontent ''' class_entry_name = 'div.' + 'base.fullstory' fullstory = soup.select(class_entry_name)[0] #print fullstory subfull = fullstory.select('div.maincont')[0] print 'subfull', subfull #category = soup.select('section.entry-category')[0].find('a').string #category = u'{0}'.format(category) #print category noidung = u'{0}'.format(subfull).replace('<br/>', '\n').replace( '<br>', '\n').replace('Rapidgator.net:', '') parser = HTML2BBCode() bbcode = parser.feed(noidung).replace( 'Buy Premium To Support Me Get Resumable Support Max Speed', '') #print 'type of enoidung' ,type(noidung) print 'noidung', noidung, bbcode ''' titlea = subfull.find('h1', attrs={'class': 'entry-title'}).string print 'type of titlea',type(titlea) titlea = u'{0}'.format(titlea) print 'type of titlea',type(titlea) #titlea = titlea.string #titlea = titlea.replace(u'–', u'-') print 'title' , titlea ''' print '#########' ''' #bbcode = bbcode.encode('utf-8') p = re.compile( '\s*Posted on.*?(?P<caiquaigi>\[img\])',re.DOTALL) bbcode = p.sub( '\g<caiquaigi>', bbcode) p = re.compile( '\[url=.*?\]',re.DOTALL) bbcode = p.sub( '', bbcode) ''' prefix_links = [ 'http://rapidgator.net/file/', 'http://uploaded.net/file/', 'http://www.uploadable.ch/file/', 'http://www.nitroflare.com/view/', 'http://nitroflare.com/view' ] stuff = map(lambda w: bbcode.find(w), prefix_links) print 'stuff', stuff try: min_index_bbcode_host = min(i for i in stuff if i > 0) except: return '' print 'min_index_bbcode_host', min_index_bbcode_host code_part = bbcode[min_index_bbcode_host:] n_last_index = bbcode.rfind('\n', min_index_bbcode_host - 10, min_index_bbcode_host) description = bbcode[:n_last_index] description = description.replace('[b] [/b]', u'').replace( u'[/img][/url]', u'[/img]' ).replace( '[img]http://sharenxs.com/photos/2014/10/24/54498d402d290/tn-f0912e470f81f8acf2c127d9a94b5983.jpg[/img]', '').replace('nitroflare', '') print 'description', description print '## Find links' prefix_links = { 'rg': 'http://rapidgator.net/file/', 'ul': 'http://uploaded.net/file/', 'up': 'http://www.uploadable.ch/file/' } links_dict = {} for key, prefix_link in prefix_links.iteritems(): links = re.findall('(' + prefix_link + '.*?)[\[\]\n\r]', code_part, re.DOTALL) links = unique_list(links) linktxt = '' for link in links: linktxt = linktxt + urllib.unquote(link).decode('utf-8') + '\n' print linktxt links_dict[key] = linktxt print links_dict new_instance = Ulnew.objects.get_or_create(title=title)[0] new_instance.category = category new_instance.description = description new_instance.rg = links_dict['rg'] new_instance.ul = links_dict['ul'] new_instance.up = links_dict['up'] new_instance.save() print 'ok'
return Plugin(f'HandyNotes_{name[3:]}', version, tag, dir) def get_changelog(plugin, bbcode=False, full=False): with open(path.join(plugin.dir, 'CHANGELOG.md')) as f: if full: changelog = f.read() elif match := re.match(r'^# v\d+\s+(.+?)# v\d+', f.read(), re.M | re.S): changelog = match.group(1).strip() + '\n' else: changelog = 'No changelog entries for this release.\n' if bbcode: changelog = markdown(changelog) # to html changelog = HTML2BBCode('bbcode.ini').feed(changelog) # to bbcode return changelog def get_wow_version(plugin): with open(path.join(plugin.dir, f'{plugin.name}.toc')) as f: inum = re.search(r'## Interface: (\d+)', f.read(), re.M).group(1) inum = f'0{inum}' if len(inum) == 5 else inum return '.'.join( map(str, map(int, [inum[i:i + 2] for i in range(0, len(inum), 2)]))) # -----------------------------------------------------------------------------
def convert(self, text): soup = BeautifulSoup(text, 'lxml') self.__formalize_html(soup) fontsize = self._get_default_fontsize(text) return self.__postprocess_text( str(HTML2BBCode().feed(str(soup), default_fontsize=fontsize)))