def docx2txt(filename): newfilename = filename.replace(u'docx', u'txt') if os.name == 'nt': print 'nt' word = wc.Dispatch('Word.Application') doc = word.Documents.Open(filename) doc.SaveAs(newfilename, 4) doc.Close() word.Quit() #另存为txt文件,编码为gbk input_file = open(newfilename, 'r') gbktxt = input_file.read() utftxt = gbktxt.decode('gbk').encode('utf-8') #读取txt文件,将gbk转换成utf-8 input_file.close() output_file = open(newfilename, 'w') output_file.write(utftxt) #保存utf-8文本 output_file.close() else: ''' # 从word(docx格式)中提取text,保存为txt document = Document(filename) docText = '\n\n'.join([ paragraph.text.encode('utf-8') for paragraph in document.paragraphs ]) print docText # 保存文件 # document.save('doc/new-SL351C-A11-01.doc') output_file = open(newfilename, 'w') output_file.write(docText) output_file.close() ''' #使用pandoc进行转换 pypandoc.convert_file(filename,'markdown','docx',outputfile=newfilename) print newfilename
def convert_index_to_html(directory): """ Looks for the index.rst file, and converts it to index.html using pypandoc. """ convert_file('{0}/index.rst'.format(directory), 'html', outputfile='{0}/index.html'.format(directory))
def main(): home_link = "https://raw.githubusercontent.com/mbadry1/DeepLearning.ai-Summary/master/" marks_down_links = { "Deeplearning.ai summary Homepage": home_link + "Readme.md", "01- Neural Networks and Deep Learning": home_link + "1-%20Neural%20Networks%20and%20Deep%20Learning/Readme.md", "02- Improving Deep Neural Networks Hyperparameter tuning, Regularization and Optimization": home_link + "2-%20Improving%20Deep%20Neural%20Networks/Readme.md", "03- Structuring Machine Learning Projects": home_link + "3-%20Structuring%20Machine%20Learning%20Projects/Readme.md", "04- Convolutional Neural Networks": home_link + "4-%20Convolutional%20Neural%20Networks/Readme.md", "05- Sequence Models": home_link + "5-%20Sequence%20Models/Readme.md", } # Extracting pandoc version print("pandoc_version:", pypandoc.get_pandoc_version()) print("pandoc_path:", pypandoc.get_pandoc_path()) print("\n") # Starting downloading and converting for key, value in marks_down_links.items(): print("Converting", key) pypandoc.convert_file( value, 'pdf', extra_args=['--latex-engine=xelatex', '-V', 'geometry:margin=1.5cm'], outputfile=(key + ".pdf") ) print("Converting", key, "completed")
def convert_readme(): print("[*] Converting Markdown README to reStructuredText") import pypandoc rst = pypandoc.convert_file('README.md', 'rst') with open('README.rst', 'w+', encoding='utf-8') as f: f.write(rst) print("[*] Finished converting to README.rst ({} bytes)".format(len(rst)))
def test_convert_with_custom_writer(self): lua_file_content = self.create_sample_lua() with closed_tempfile('.md', text='#title\n') as file_name: with closed_tempfile('.lua', text=lua_file_content) as lua_file_name: expected = u'<h1 id="title">title</h1>{0}'.format(os.linesep) received = pypandoc.convert_file(file_name, lua_file_name) self.assertEqualExceptForNewlineEnd(expected, received)
def test_basic_conversion_from_file_with_format(self): with closed_tempfile('.md', text='#some title\n') as file_name: expected = u'some title{0}=========={0}{0}'.format(os.linesep) received = pypandoc.convert(file_name, 'rst', format='md') self.assertEqualExceptForNewlineEnd(expected, received) received = pypandoc.convert_file(file_name, 'rst', format='md') self.assertEqualExceptForNewlineEnd(expected, received)
def get_long_description(self, filename='README.md'): """ I really prefer Markdown to reStructuredText. PyPi does not. """ try: import pypandoc description = pypandoc.convert_file('README.md', 'rst', 'md') except (IOError, ImportError): description = open("README.md").read() return description
def convert_readme(): try: import pypandoc except ImportError: return read_rst() rst = pypandoc.convert_file('README.md', 'rst') with open('README.rst', 'w') as f: f.write(rst) return rst
def long_description(filename = "README.md"): if os.path.isfile(os.path.expandvars(filename)): try: import pypandoc long_description = pypandoc.convert_file(filename, "rst") except ImportError: long_description = open(filename).read() else: long_description = "" return long_description
def finalize(args): distclean() try: check_pypirc() repository = Repository(REPO_ROOT, args.repo) img_manager = ImageManager(args.release) pr_data = repository.find_release_pr(args.release) if not pr_data: raise ScriptError('No PR found for {}'.format(args.release)) if not check_pr_mergeable(pr_data): raise ScriptError('Can not finalize release with an unmergeable PR') if not img_manager.check_images(): raise ScriptError('Missing release image') br_name = branch_name(args.release) if not repository.branch_exists(br_name): raise ScriptError('No local branch exists for this release.') gh_release = repository.find_release(args.release) if not gh_release: raise ScriptError('No Github release draft for this version') repository.checkout_branch(br_name) pypandoc.convert_file( os.path.join(REPO_ROOT, 'README.md'), 'rst', outputfile=os.path.join(REPO_ROOT, 'README.rst') ) run_setup(os.path.join(REPO_ROOT, 'setup.py'), script_args=['sdist', 'bdist_wheel']) merge_status = pr_data.merge() if not merge_status.merged and not args.finalize_resume: raise ScriptError( 'Unable to merge PR #{}: {}'.format(pr_data.number, merge_status.message) ) pypi_upload(args) img_manager.push_images() repository.publish_release(gh_release) except ScriptError as e: print(e) return 1 return 0
def read_metadata(self, path, format=None): metadata_yaml = convert_file(path, to='markdown', format=format, extra_args=['--template', META_TEMPLATE]) raw_metadata = yaml.safe_load(metadata_yaml) logger.debug(str(raw_metadata)) metadata = {} for name, value in raw_metadata.items(): name = name.lower() value = str(value) metadata[name] = self.process_metadata(name, value) return metadata
def read_md(path): long_desc = "" if os.path.exists(path): try: from pypandoc import convert_file long_desc = convert_file(path, 'rst') except: try: long_desc = open(path, 'r').read() except: pass return long_desc
def doc_convert(project, logger): import pypandoc readme_file = project.expand_path("$distutils_readme_file") logger.debug("Converting %s into RST format for PyPi documentation...", readme_file) description = pypandoc.convert_file(readme_file, "rst") if not hasattr(project, "description") or project.description is None or project.get_property( "distutils_description_overwrite"): setattr(project, "description", description) if not hasattr(project, "summary") or project.summary is None or project.get_property( "distutils_description_overwrite"): setattr(project, "summary", description.splitlines()[0].strip())
def read_input(infilename): """ read text from a file supported formats: * plain text * pdf * all formats from pandoc """ if ".pdf" in infilename: return extract_from_pdf(infilename) try: return pypandoc.convert_file(infilename, 'md') except Exception as e: # if fileinput format is not available using pypandoc so try to read it as text with open(infilename, "r") as infile: return "".join(infile.readlines())
''' Copyright (C) 2017-2019 Bryant Moscon - [email protected] Please see the LICENSE file for the terms and conditions associated with this software. ''' import sys from setuptools import setup from setuptools import find_packages from setuptools.command.test import test as TestCommand ld = None try: import pypandoc ld = pypandoc.convert_file('README.md', 'rst', format='markdown_github') except BaseException: pass class Test(TestCommand): def run_tests(self): import pytest errno = pytest.main([]) sys.exit(errno) setup( name="cryptofeed", version="0.22.2", author="Bryant Moscon",
def convert_markdown_to_rst(file): return convert_file(file, "rst")
"--message", help="Your edit summary and commit message, use quotes for strings. If not provided an automatic message is placed with a timestamp.", ) parser.add_argument( "-ng", "--nogit", action="store_true", help="Use this flag if youre not pushing to a git repository." ) parser.add_argument( "-i", "--index", action="store_true", help="Adds a link to the published page on your wiki index page." ) parser.add_argument( "-html", "--make_html", action="store_true", help="Generates a html version of the file in the working directory." ) args = parser.parse_args() # publish to mediawiki input = args.file.name output = pypandoc.convert_file(input, "mediawiki") now = datetime.now site = mwclient.Site("pzwiki.wdka.nl", "/mw-mediadesign/") site.force_login = True site.login(wikiusername, wikipassword) wikipagename = args.wikipage page = site.pages["User:"******"/" + wikipagename] timenow = datetime.now() now = strftime("%Y-%m-%d %H:%M:%S", gmtime()) automatic_edit_summary = "version pushed from crosspublish.py at " + now if args.message: page.save(output, args.message) else: page.save(output, automatic_edit_summary) print( "Converted " + input + " to mediawiki syntax and published on " + "https://pzwiki.wdka.nl/mw-mediadesign/"
def convert_readme(): import pypandoc rst = pypandoc.convert_file('README.md', 'rst') with open('README.rst', 'w') as f: f.write(rst) return rst
def main(argv): """ Reads in the filepath and starts analysis. """ if len(argv) > 1: # Input via arguments # TODO: Doesn't work yet filepath = str(argv[1:]) else: # Input via console if sys.version_info[0] < 3: # <- python 2 filepath = str(raw_input("Filepath: ")) else: filepath = str(input("Filepath: ")) # Settings language = 'de' # Data paths attentionfile = 'data/' + language + '/attentionwords.txt' ngramfile = 'data/' + language + '/ngramlist.txt' # Test path # filepath = '../tests/test.docx' # Load textfile and convert to plain text text = pypandoc.convert_file(filepath, 'plain') # Load attentionwords as list attentionwords = open(attentionfile, encoding='utf-8').read().splitlines() # Load ngramlist as list ngramlist = open(ngramfile, encoding='utf-8').read().splitlines() # Create a textblob to work with blob = TextBlob(text) # Contains all the words wordlist = blob.words # Unordered set (unique words) wordset = list(set(wordlist)) # Contains all the sentences sentences = blob.sentences # Contains all the ngrams ngrams = blob.ngrams(n=2) ## Print all the sentences #for sentence in sentences: # print(sentence) # create wordtable wordtable = [] for word in wordset: wordtable.append( [word, str(wordlist.count(word)), str(blob.find(word))]) # sort by amount wordtable = sorted(wordtable, key=lambda word: int(word[1]), reverse=True) # print amount table print(tabulate(wordtable)) print("\n") # print attentionswords list and position where found in text attentiontable = [] for word in attentionwords: attentiontable.append([word, str(blob.find(word))]) # sort by position attentiontable = sorted(attentiontable, key=lambda word: int(word[1])) # print amount table print(tabulate(attentiontable)) print("\n") # print the ngrams which are intersting phrasetable = [] for ngram in ngrams: if (ngram[0].lower() == ngram[1].lower()) | (ngram[0].lower() in ngramlist): phrase = ' '.join(str(word) for word in ngram) phrasetable.append([phrase, str(blob.find(phrase))]) # sort by position phrasetable = sorted(phrasetable, key=lambda word: int(word[1])) # print amount table print(tabulate(phrasetable)) print("\n")
import os from setuptools import setup try: import pypandoc README = pypandoc.convert_file('README.md', 'rst') except ImportError: with open(os.path.join(os.path.dirname(__file__), 'README.md')) as readme: README = readme.read() # allow setup.py to be run from any path os.chdir(os.path.normpath(os.path.join(os.path.abspath(__file__), os.pardir))) setup( name='drf-schema-adapter', version='0.9.47', packages=['drf_auto_endpoint', 'export_app', 'export_app.management', 'export_app.management.commands'], include_package_data=True, license='MIT License', description='Making using Django with frontend libraries and frameworks DRYer', long_description=README, url='https://github.com/drf-forms/drf-schema-adapter', author='Emmanuelle Delescolle, Adrien Brunet, Mauro Bianchi, Mattia Larentis, Aaron Elliot Ross', author_email='*****@*****.**', classifiers=[ 'Environment :: Web Environment', 'Framework :: Django', 'Framework :: Django :: 1.8',
# -*- coding: utf-8 -*- from setuptools import setup import pypandoc version = {} with open("pygcgen/version.py") as fh: exec(fh.read(), version) changelog = pypandoc.convert_file( r'CHANGELOG.md', to='rst', format='markdown_github', ) long_desc = """ Automaticaly generate a changelog based on GitHub issues and pull requests. For each tag there will be a section with closed issues and merged pull requests. Also there can be user defined sections based on labels. This package started as a conversion from ruby to python of the `GitHub Changelog Generator <https://github.com/skywinder/github-changelog-generator>`__ """ + changelog setup( name=version['__title__'], version=version['__version__'],
How to upload new release 1. change version in setup.py 2. setup twine, see:https://blog.amedama.jp/entry/2017/12/31/175036 3. create zip file: python setup.py sdist 4. upload twine upload --repository pypi dist/hogehoeg """ import os from setuptools import setup, find_packages PROJECT_PATH = os.path.dirname(os.path.abspath(__file__)) # read README try: import pypandoc readme = pypandoc.convert_file(PROJECT_PATH + '/README.md', 'rst') except(IOError, ImportError): readme = open(PROJECT_PATH + '/README.md').read() setup( name="pyroombaadapter", version="0.1.1", url="https://github.com/AtsushiSakai/PyRoombaAdapter", author="Atsushi Sakai", author_email="*****@*****.**", maintainer='Atsushi Sakai', maintainer_email='*****@*****.**', description="A Python library for Roomba Open Interface", long_description=readme, python_requires='>3.6.0', license="MIT",
def read_md(filename): try: from pypandoc import convert_file return convert_file(filename, 'rst') except (ImportError, OSError): return open(filename).read()
import ast import os import re from setuptools import setup import sys assert sys.version_info >= (3, 5, 0), "flake8-mypy requires Python 3.5+" current_dir = os.path.abspath(os.path.dirname(__file__)) readme_md = os.path.join(current_dir, 'README.md') try: import pypandoc long_description = pypandoc.convert_file(readme_md, 'rst') except(IOError, ImportError): print() print( '\x1b[31m\x1b[1mwarning:\x1b[0m\x1b[31m pandoc not found, ' 'long description will be ugly (PyPI does not support .md).' '\x1b[0m' ) print() with open(readme_md, encoding='utf8') as ld_file: long_description = ld_file.read() _version_re = re.compile(r'__version__\s+=\s+(?P<version>.*)')
import pypandoc output = pypandoc.convert_file('file.html', 'docx',outputfile="file1.docx") assert output == ""
import os import sys from setuptools import setup, find_packages try: this_file = __file__ except NameError: this_file = sys.argv[0] os.chdir(os.path.dirname(os.path.abspath(this_file))) try: import pypandoc try: pypandoc.convert_file("README.md", "rst", outputfile="README.rst") except (IOError, ImportError, RuntimeError): pass long_description = pypandoc.convert_file("README.md", "rst") except (IOError, ImportError, RuntimeError): long_description = "" setup( name="UCCA", version="1.0.65", install_requires=["numpy", "spacy==2.0.11", "requests", "tqdm"], extras_require={ "visualize": ["matplotlib", "networkx"], "distances": ["distances", "zss", "munkres"] }, description="Universal Conceptual Cognitive Annotation",
#!/usr/bin/env python from setuptools import setup from gcalcli import __version__ try: import pypandoc long_description = pypandoc.convert_file('README.md', 'rst', format='markdown_github', extra_args=("--wrap=none", )) except ImportError: import sys print('Warning: No long description generated.', file=sys.stderr) long_description = '' author_emails = [ '*****@*****.**', '*****@*****.**', '*****@*****.**' ] setup(name='gcalcli', version=__version__, author='Eric Davis, Brian Hartvigsen, Joshua Crowgey', author_email=', '.join(author_emails), maintainer='Joshua Crowgey', maintainer_email='*****@*****.**', description='Google Calendar Command Line Interface', long_description=long_description, url='https://github.com/insanum/gcalcli', license='MIT', packages=['gcalcli'], data_files=[('share/man/man1', ['docs/man1/gcalcli.1'])],
def read_md(f): return convert_file(f, 'rst')
def f(filepath): pypandoc.convert_file(filepath, 'rst')
warnings.warn("Could not import matlab.engine. " + "Matlab features will be disabled.") matlab_installed = False # Set coverage options in .coveragerc create_coveragerc.create_coveragerc(matlab_installed=matlab_installed, lpy_installed=lpy_installed) # Create .rst README from .md and get long description if os.path.isfile('README.rst'): with open('README.rst', 'r') as file: long_description = file.read() elif os.path.isfile('README.md'): try: import pypandoc pypandoc.convert_file('README.md', 'rst', outputfile='README.rst') long_description = pypandoc.convert_file('README.md', 'rst') except (ImportError, IOError): with open('README.md', 'r') as file: long_description = file.read() else: raise IOError("Could not find README.rst or README.md") # Create requirements list based on platform requirements = [ 'numpy>=1.13.0', "scipy", "pyyaml", "pystache", "nose", "pyzmq", "psutil", "matplotlib<3.0; python_version < '3.5'", "matplotlib; python_version >= '3.5'", "cerberus", "jsonschema", 'pandas<0.21; python_version == "3.4"', 'pandas; python_version != "3.4"', "perf", "pint", "unyt", 'sysv_ipc; platform_system != "Windows"' ]
""" # try: # import pypandoc # long_description = pypandoc.convert_file('README.md', 'rst') # long_description = long_description.replace("\r", "") # except (ImportError, OSError): # print("NOTE: pypandoc not available, reading README.md as-is.") # Edit, switched to using reStructuredText for README file: try: long_description = open('README.rst').read() except FileNotFoundError: import pypandoc long_description = pypandoc.convert_file('README.md', to='rst') # update 'version' and 'download_url', as well as pptx_downsizer.__init__.__version__ setup( name='ooxml-git-hooks', description= 'Tools for uncompressing Microsoft Office (Office Open XML) files for better git version control.', long_description=long_description, url='https://github.com/scholer/ooxml-git-hooks', packages=[ 'ooxml_git_hooks' ], # List all packages (directories) to include in the source dist. version='0.1.1-dev', # Update for each new version # download_url='https://github.com/scholer/ooxml-git-hooks/tarball/0.1.2', # Update for each new version download_url= 'https://github.com/scholer/ooxml-git-hooks/archive/master.zip',
""" print("SandboxViolation (disabled): {} {}".format(operation, args)) # pylint: disable=W0212 DirectorySandbox._violation = violation except ImportError: pass disable_sandbox() try: # for PyPI, we want a rst readme. For github, we want md. *shruggie* import pypandoc long_desc = pypandoc.convert_file('README.md', 'rst', format=u'markdown_github') except (IOError, ImportError): long_desc = open('README.md').read() ver = open('chiasm_shell/VERSION').read().strip() setup( name='chiasm-shell', description='CLI for assembly/disassembly powered by Keystone/Capstone.', long_description=long_desc, version=ver, url='https://github.com/0xbc/chiasm-shell', download_url='https://github.com/0xbc/chiasm-shell/tarball/{}'.format(ver), author='Ben Cheney', author_email='*****@*****.**',
#!/usr/bin/env python import pypandoc import os for path, dir_list, file_list in os.walk('data/raw/baker'): for file_name in file_list: if '.docx' in file_name: nlist = file_name.split('.') out_name = nlist[-1] pypandoc.convert_file(os.path.join( path, file_name), 'textile', 'docx', outputfile='data/raw/cases/'+out_name+'.txt')
import os from setuptools import setup def filepath(fname): return os.path.join(os.path.dirname(__file__), fname) exec(compile(open('lifelines/version.py').read(), 'lifelines/version.py', 'exec')) readme_md = filepath('README.md') try: import pypandoc readme_rst = pypandoc.convert_file(readme_md, 'rst') except(ImportError): readme_rst = open(readme_md).read() setup( name="lifelines", version=__version__, author="Cameron Davidson-Pilon, Jonas Kalderstam", author_email="*****@*****.**", description="Survival analysis in Python, including Kaplan Meier, Nelson Aalen and regression", license="MIT", keywords="survival analysis statistics data analysis", url="https://github.com/CamDavidsonPilon/lifelines", packages=['lifelines', 'lifelines.datasets',
import os from setuptools import setup, find_packages def read(fname): return open(os.path.join(os.path.dirname(__file__), fname)).read() version = '2.0.0' try: import pypandoc README = pypandoc.convert_file('README.md', 'rst') CHANGES = pypandoc.convert_file('CHANGES.md', 'rst') except: README = read('README.md') CHANGES = read('CHANGES.md') setup( name="pycobertura", version=version, author="Alex Conrad", author_email="*****@*****.**", maintainer="Alex Conrad", maintainer_email="*****@*****.**", description="A Cobertura coverage parser that can diff reports and " "show coverage progress.", license="MIT License", keywords="cobertura coverage diff report parser parse xml", url="https://github.com/aconrad/pycobertura", zip_safe=False, include_package_data=True,
import pypandoc import panflute def prepare(doc): doc.images = [] doc.links = [] def action(elem, doc): if isinstance(elem, panflute.Image): doc.images.append(elem) elif isinstance(elem, panflute.Link): doc.links.append(elem) if __name__ == '__main__': data = pypandoc.convert_file('example.md', 'json') f = io.StringIO(data) doc = panflute.load(f) doc = panflute.run_filter(action, prepare=prepare, doc=doc) print("\nImages:") for image in doc.images: print(image.url) print("\nLinks:") for link in doc.links: print(link.url)
os.environ["CC"] = gcc os.environ["CXX"] = gcc else: global use_openmp use_openmp = False logging.warning('No GCC available. Install gcc from Homebrew ' 'using brew install gcc.') set_gcc() try: # if we don't have pandoc installed, don't worry about it import pypandoc long_description = pypandoc.convert_file("README.md", "rst") except ImportError: long_description = '' setup( name=NAME, version=VERSION, description='Collaborative Filtering for Implicit Datasets', long_description=long_description, url='http://github.com/benfred/implicit/', author='Ben Frederickson', author_email='*****@*****.**', license='MIT', classifiers=[ 'Development Status :: 4 - Beta',
def convert_md_to_rtf(mdfile,rtf): print('Markdown {} to RTF {}'.format(mdfile,rtf)) try: output = pypandoc.convert_file(mdfile, 'rst', format='md')
elif extension == "java": md_doc = format_multiple_line_comment(script, "java", "/\*\*", "\*\*/", "//'") #' Join list of lines md = '\n'.join(md_doc) #' Write raw markdown file md_file = open(filename + ".md", "w") md_file.write(''.join(md)) md_file.close() #' # Convert markdown to output format if args.c and args.o == "html": pandoc_args=['-c' + args.c, '--toc', '-N', '--self-contained', '--standalone'] output = pypandoc.convert_file(filename + ".md", 'html', outputfile = filename + '.html', extra_args = pandoc_args) assert output == "" else: output = pypandoc.convert_file(filename + ".md", 'html', outputfile = filename + '.html') assert output == "" if args.o != "html": output = pypandoc.convert_file(filename + ".md", args.o, outputfile = filename + "." +args.o) assert output == "" #' Remove markdown output file? if not args.md: os.remove(filename + ".md")
from setuptools import setup from setuptools import find_packages try: from pypandoc import convert_file long_description = convert_file('README.md', 'rst') except ImportError: long_description = open('README.md').read() setup( name='talkzoho', version='3.0.3', description='Asynchronous wrapper for Zoho\'s numerous APIs', long_description=long_description, url='https://github.com/A2Z-Cloud/Talk-Zoho', packages=find_packages(exclude=('tests', 'tests.*')), author='James Stidard', author_email='*****@*****.**', keywords=['talkzoho', 'Zoho', 'async', 'tornado'], install_requires=[ 'fuzzywuzzy', 'python-Levenshtein', 'inflect', 'tornado'])
""" Format the readme.md file into the sphinx index.rst file. """ import re import pypandoc from astropy.time import Time t = Time.now() t.out_subfmt = 'date' out = ('.. pyuvdata documentation master file, created by\n' ' make_index.py on {date}\n\n').format(date=t.iso) readme_text = pypandoc.convert_file('../readme.md', 'rst') end_text = 'parameters description' regex = re.compile(end_text.replace(' ', '\s+')) loc = re.search(regex, readme_text).start() out += readme_text[0:loc] + end_text + '.' out += ('\n\nFurther Documentation\n====================================\n' '.. toctree::\n' ' :maxdepth: 1\n\n' ' tutorial\n' ' parameters\n' ' developer_docs\n') F = open('index.rst', 'w') F.write(out) print("wrote index.rst")
import pypandoc output = pypandoc.convert_file('./documents/TRSABESP.docx', 'odt', outputfile="output_converted")
this_file = __file__ except NameError: this_file = sys.argv[0] os.chdir(os.path.dirname(os.path.abspath(this_file))) extras_require = {} install_requires = [] for requirements_file in glob("requirements.*txt"): suffix = re.match(r"[^.]*\.(.*)\.?txt", requirements_file).group(1).rstrip(".") with open(requirements_file) as f: (extras_require.setdefault(suffix, []) if suffix else install_requires).extend(f.read().splitlines()) try: import pypandoc try: pypandoc.convert_file("README.md", "rst", outputfile="README.rst") except (IOError, ImportError, RuntimeError): pass long_description = pypandoc.convert_file("README.md", "rst") except (IOError, ImportError, RuntimeError): long_description = "" setup(name="UCCA", version=VERSION, install_requires=install_requires, extras_require=extras_require, description="Universal Conceptual Cognitive Annotation", long_description=long_description, author="Daniel Hershcovich", author_email="*****@*****.**",
from os import path from setuptools import setup here = path.abspath(path.dirname(__file__)) # Try to convert markdown readme file to rst format try: import pypandoc md_file = path.join(here, 'README.md') rst_file = path.join(here, 'README.rst') pypandoc.convert_file(source_file=md_file, outputfile=rst_file, to='rst') except (ImportError, OSError, IOError, RuntimeError): pass # Get the long description from the relevant file with open(path.join(here, 'README.rst')) as f: long_description = f.read() setup( name='youtrack-scripts', version='0.1.23', python_requires='>=2.6, <3', packages=[ 'youtrackutils', 'youtrackutils.bugzilla', 'youtrackutils.csvClient', 'youtrackutils.fbugz', 'youtrackutils.mantis', 'youtrackutils.redmine', 'youtrackutils.tracLib', 'youtrackutils.utils', 'youtrackutils.zendesk' ], url='https://github.com/JetBrains/youtrack-python-scripts', license='Apache 2.0', maintainer='Alexander Buturlinov', maintainer_email='*****@*****.**',
text = par.text if len(text) > 0: print(text) file.write(text + "\n") file.close() # =================================================== # PYPANDOC # =================================================== import pypandoc output = pypandoc.convert_file("./data/document-1.docx", "plain", outputfile="output-pypandoc.txt") assert output == "" # =================================================== # GROUPDOCS API # =================================================== import groupdocs_conversion_cloud from shutil import copyfile client_id = "ENTER CLIENT ID HERE" client_key = "ENTER CLIENT SECRET HERE" convert_api = groupdocs_conversion_cloud.ConvertApi.from_keys(client_id, client_key)
def main(**args): ''' This corresponds to the |rstfromdocx| shell command. :param args: Keyword arguments. If empty the arguments are taken from ``sys.argv``. listtable, untable, reflow, reimg default to False. returns: The file name of the generated file. ''' import argparse if not args: parser = argparse.ArgumentParser( description= '''Convert DOCX to RST using Pandoc and additionally copy the images and helper files.''' ) parser.add_argument('docx', action='store', help='DOCX file') parser.add_argument('-l', '--listtable', action='store_true', default=False, help='''postprocess through rstlisttable''') parser.add_argument('-u', '--untable', action='store_true', default=False, help='''postprocess through rstuntable''') parser.add_argument('-r', '--reflow', action='store_true', default=False, help='''postprocess through rstreflow''') parser.add_argument('-g', '--reimg', action='store_true', default=False, help='''postprocess through rstreimg''') parser.add_argument( '-j', '--join', action='store', default='012', help= '''e.g.002. Join method per column: 0="".join; 1=" ".join; 2="\\n".join''' ) args = parser.parse_args().__dict__ adocx = args['docx'] extract_media(adocx) fnrst = _docxrest(adocx) rst = pypandoc.convert_file(adocx, 'rst', 'docx') with open(fnrst, 'w', encoding='utf-8', newline='\n') as f: f.write('.. vim: syntax=rst\n\n') f.writelines([x + '\n' for x in rst.splitlines()]) _write_confpy(adocx) _write_index(adocx) _write_makefile(adocx) _write_dcx(adocx) if 'listtable' not in args: args['listtable'] = False if 'untable' not in args: args['untable'] = False if 'reflow' not in args: args['reflow'] = False if 'reimg' not in args: args['reimg'] = False if 'join' not in args: args['join'] = '012' for a in 'listtable untable reflow reimg'.split(): if args[a]: args['in_place'] = True args['sentence'] = True if a == 'reflow': args['join'] = '0' args['rstfile'] = [argparse.FileType('r', encoding='utf-8')(fnrst)] eval(a)(**args) return fnrst
import os import sys import re from setuptools import setup, find_packages if sys.argv[-1] == "publish": os.system("python setup.py sdist upload") sys.exit() with open('README.md') as f: readme = f.read() # Convert markdown to rst try: from pypandoc import convert_file long_description = convert_file("README.md", "rst") except: long_description = "" version = '' with io.open('django_react_templatetags/__init__.py', 'r', encoding='utf8') as fd: version = re.search(r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', fd.read(), re.MULTILINE).group(1) setup( name="django_react_templatetags", version=version, description= ("This django library allows you to add React components into your django templates." ), # NOQA
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Thu Dec 1 18:39:12 2016 @author: Pranavtadepalli """ import pypandoc #converts markdown to reStructured z = pypandoc.convert_file('Users/Pranavtadepalli/WordInfo/README.rst','rst',format='markdown') #writes converted file with open('WordInfo/README.rst','w') as outfile: outfile.write(z)
def read(fname): rst = pypandoc.convert_file(os.path.join(os.path.dirname(__file__), fname), 'rst', format='md') return rst
except ImportError: print('Wheel library missing. Please run "pip install wheel"') sys.exit() os.system('python setup.py sdist upload') os.system('python setup.py bdist_wheel upload') sys.exit() if sys.argv[-1] == 'tag': print("Tagging the version on github:") os.system("git tag -a %s -m 'version %s'" % (version, version)) os.system("git push --tags") sys.exit() try: import pypandoc long_description = pypandoc.convert_file('README.md', 'rst') + \ pypandoc.convert_file('docs/HISTORY.md', 'rst') except BaseException: long_description = '' license = open('LICENSE').read() setup( name='callisto-core', version=version, description='Report intake, escrow, matching and secure delivery code for Callisto, an online reporting system for sexual assault.', long_description=long_description, license=license, author='Project Callisto', author_email='*****@*****.**', url='https://github.com/project-callisto/callisto-core',
from setuptools import setup long_description = "" try: import subprocess import pypandoc long_description = pypandoc.convert_file("README.md", to="rst", format="md") except ImportError: pass setup( name="asterisk_dialplan", version="0.2.0", author="Andrew Yager", author_email="*****@*****.**", license="BSD", packages=["asterisk_dialplan"], description= "Helpers to convert numbers to dialplan strings for use in Asterisk", long_description=long_description, classifiers=[ "Development Status :: 4 - Beta", "Topic :: Communications :: Telephony", "Topic :: Software Development :: Libraries", "License :: OSI Approved :: BSD License", ],
#!/usr/bin/env python import pypandoc import os import yara import fnmatch from codecs import open rules_dir = 'apkid/rules/' compiled_rules_path = "%srules.yarc" % rules_dir print "[*] Converting Markdown README to reStructuredText ..." rst = pypandoc.convert_file('README.md', 'rst') with open('README.rst', 'w+', encoding='utf-8') as f: f.write(rst) print "[*] Finished converting to README.rst (%s bytes)" % len(rst) yara_files = {} for root, dirnames, filenames in os.walk(rules_dir): for filename in fnmatch.filter(filenames, '*.yar'): path = os.path.join(root, filename) yara_files[path] = path #print yara_files rules = yara.compile(filepaths=yara_files) print "[*] Compiling %d Yara rules ..." % len(yara_files) rules.save(compiled_rules_path) print "[*] Saved rules to %s" % compiled_rules_path #print "[*] Registering ..." #os.system("python setup.py register")
import pypandoc output = pypandoc.convert_file('README.md', 'rst', outputfile="README.rst") assert output == ""
#!/usr/bin/env python # -*- coding: utf-8 -*- from setuptools import setup, find_packages # I really prefer Markdown to reStructuredText. PyPi does not. This allows me # to have things how I'd like, but not throw complaints when people are trying # to install the package and they don't have pypandoc or the README in the # right place. try: import pypandoc long_description = pypandoc.convert_file('README.md', 'rst') except (IOError, ImportError): long_description = open('README.md').read() about = {} with open('src/clikraken/__about__.py') as f: exec(f.read(), about) # now we have a about['__version__'] variable setup( name=about['__title__'], version=about['__version__'], packages=find_packages('src'), package_dir={'': 'src'}, author=about['__author__'], author_email=about['__email__'], license=about['__license__'], description=about['__summary__'], long_description=long_description, include_package_data=True,
for i in range(len(chapters)): #getting the chapters using the ID print("Getting Chapter", i+1, "....") story = requests.get("https://www.wattpad.com/apiv2/storytext?id=" + str(chapters[i]['ID']), headers={'User-Agent': 'Mozilla/5.0'}) try: story.raise_for_status() except Exception as exc: print("There was a problem: %s" % (exc)) #Creating soup soup_res = bs4.BeautifulSoup(story.text, 'html.parser') #Adding Content of chapters to the file file.write("<br><br><h2>Chapter "+str(i+1)+" : '"+ chapters[i]['TITLE'] +"'</h2><br><br>") file.write(soup_res.prettify()) file.write("</body></html>") #closing the file file.close() #Output print("saved "+ story_name+".html") print("Generating Epub...") #Using Pypandoc to convert html to epub output = pypandoc.convert_file(story_name+".html", 'epub3', outputfile=story_name+".epub", extra_args=['--epub-chapter-level=2']) assert output == "" print("saved "+ story_name+".epub")
def read_md(f): rst = convert_file(f, 'rst') rst = rst.replace('\r', '') # fix #773 return rst
def read_md(f): return convert_file(f, 'rst', format='md')