Beispiel #1
0
    def test_html_to_tables(self):
        for file in os.listdir(self.static_path):
            if file.endswith('.html'):
                html_path = os.path.join(self.static_path, file)
                rst = dashtable.html2rst(html_path)
                md = dashtable.html2md(html_path)

                rst_name = os.path.splitext(file)[0] + '.rst'
                rst_path = os.path.join(self.static_path, rst_name)
                rst_file = open(rst_path, 'r', encoding='utf-8')
                rst_text = rst_file.read().rstrip()
                rst_file.close()

                try:
                    self.assertEqual(rst, rst_text)
                except AssertionError:
                    print('MATCH ERROR: ' + ntpath.basename(html_path))

                md_name = os.path.splitext(file)[0] + '.md'
                md_path = os.path.join(self.static_path, md_name)
                md_file = open(md_path, 'r', encoding='utf-8')
                md_text = md_file.read().rstrip()
                md_file.close()

                try:
                    self.assertEqual(md, md_text)
                except AssertionError:
                    print('MATCH ERROR: ' + ntpath.basename(html_path))
def build_md(doc=None):
    from com.sun.star.beans import PropertyValue
    
    if not doc:
        document = XSCRIPTCONTEXT.getDocument()
    else:
        document = doc

    html_url = os.path.join(os.path.expanduser('~'), 'temp.html')
    html_url = html_url.replace('\\', '/')

    if not html_url.startswith('/'):
        save_url = 'file:///' + html_url
    else:
        save_url = 'file://' + html_url

    props = [PropertyValue(Name='FilterName', Value='HTML (StarCalc)')]

    document.storeToURL(save_url, props)

    md = html2md(html_url)

    md_url = os.path.join(os.path.expanduser('~'), '.ascii_table.txt')
    md_url = md_url.replace('\\', '/')

    f = open(md_url, 'w')
    f.write(md)
    f.close()

    os.remove(html_url)

    if sys.platform == "win32":
        subprocess.call(['start',"", md_url], shell=True)
    else:
        subprocess.call(['xdg-open', md_url])
Beispiel #3
0
from dashtable import html2rst, html2md
import subprocess
import os

for file in os.listdir(os.getcwd() + '/test_files'):
    if file.endswith('.html'):
        path = os.path.join(os.getcwd(), 'test_files', file)
        f = open(path, 'r')
        lines = f.readlines()
        f.close()

        string = ''.join(lines)
        converted_rst = html2rst(string)
        converted_md = html2md(string)

        md_name = os.path.splitext(path)[0] + '.md'
        md_file = open(md_name, 'r')
        md_lines = md_file.readlines()
        md_file.close()
        md_string = ''.join(md_lines).rstrip()

        if not md_string == converted_md:
            print('MarkDown Error: ' + file)

        rst_name = os.path.splitext(path)[0] + '.rst'
        rst_file = open(rst_name, 'r')
        rst_lines = rst_file.readlines()
        rst_file.close()
        rst_string = ''.join(rst_lines).rstrip()

        if not rst_string == converted_rst:
Beispiel #4
0
def fetchMe(url):
    if "http" not in url:
        print(url)
        urlToUse = 'https://developer.prod.oculus.com' + url
        r = requests.get(urlToUse)
        soup = BeautifulSoup(r.text, 'html5lib')
        if ":" not in soup.title.string:
            title = soup.title.string
        else:
            title = '"' + soup.title.string + '"'
        #description = soup.description.string

        firsth1 = soup.select_one("h1")
        if firsth1:
            soup.select_one("h1").decompose()
        imgs = soup.findAll("img")
        imageNumber = 0
        for img in imgs:
            if 'https://www.facebook.com/tr?i' not in img['src']:
                print img['src']
                extension = get_ext(img['src'])
                newFileStub = '/images/' + slugify(
                    url.decode('utf-8')) + '-' + str(imageNumber) + extension
                newFilename = os.getcwd().replace('\\', '/') + newFileStub
                print newFilename
                """
                if os.path.isfile(newFilename):
                    os.remove(newFilename)
                urllib.urlretrieve(img['src'], newFilename)
                """
                imageNumber = imageNumber + 1
                img['src'] = newFileStub
                imgMD = md(str(img))
                img.name = "p"
                img.string = imgMD
        uls = soup.findAll("ul")
        for ul in uls:
            ul.string = md(str(ul))
            ul.name = 'p'
        ols = soup.findAll("ol")
        for ol in ols:
            ol.string = md(str(ol))
            ol.name = 'p'
        brs = soup.findAll("br")
        for br in brs:
            br.replaceWith('\n')
        preS = soup.findAll("pre")
        for pre in preS:
            code = soup.new_tag('code')
            tmp = pre.string
            pre.string = ''
            code.string = tmp
            pre.append(code)
        samps = soup.findAll("samp")
        for samp in samps:
            samp.name = "code"
        tables = soup.findAll("table")
        for table in tables:
            print(table)
            tableMD = dashtable.html2md(
                unicode(table).encode('ascii', 'ignore'))
            print(tableMD)
            table.string = tableMD
            table.name = "p"
        links = soup.findAll("a")
        for link in links:
            link['href'] = link['href'].replace('https://developer.oculus.com',
                                                '')
        bodyHTML = str(soup.find(class_='documentation-content'))
        output = '---\n'
        output += 'title: ' + title + '\n'
        #output += 'description: ' + description + '\n'
        output += '---\n'
        #bodyMD = md(bodyHTML, heading_style='ATX')
        bodyMD = str(tomd.convert(str(bodyHTML)))
        #bodyMD = pypandoc.convert_text(bodyHTML, 'gfm', format='html')
        #print(bodyMD)
        output += bodyMD.decode('utf-8')
        if url[-1:] == '/':
            linkToUse = url[:-1]
        else:
            linkToUse = url
        outputFileName = os.getcwd().replace('\\', '/') + linkToUse + '.md'
        #print(outputFileName)
        # if file exists, delete it. otherwise, forge the path and write
        if os.path.isfile(outputFileName):
            os.remove(outputFileName)
        else:
            dirname = os.path.dirname(outputFileName)
            if not os.path.exists(dirname):
                os.makedirs(dirname)
        f = open(outputFileName, 'w')
        f.write(output.encode('utf-8'))
        f.close
        print(output.encode('utf-8'))
Beispiel #5
0
from dashtable import html2rst, html2md
import subprocess
import os

for file in os.listdir(os.getcwd() + '/test_files'):
    if file.endswith('.html'):
        path = os.getcwd() + '/test_files/' + file
        f = open(path, 'r')
        lines = f.readlines()
        f.close()
        
        string = ''.join(lines)
        
        print(file)
        print(html2rst(string))
        print('\n')
        print(html2md(string))
        print('\n')

        script = os.path.join(os.getcwd(), 'dashtable/html2rst.py')
        filename = os.path.splitext(file)[0]
        outfile = os.path.join(os.getcwd(), 'test_files', filename + '.txt')
        subprocess.call(['python', script, path, outfile])
Beispiel #6
0
import dashtable
print(
    dashtable.html2md("""
    <table>
        <tr><th>Header 1</th><th>Header 2</th></tr>
        <tr><td>Data 1</td><td>Data 2</td></tr>
    </table>
"""))