def load(path='jobfolder.dict', timeout=None): """ Unpickles a job-folder from file. :param str path: Filename of a pickled job-folder. :param int timeout: How long to wait when trying to acquire lock on file. Defaults to forever. :return: Returns a JobFolder object. This method first acquire an exclusive lock on the file before reading. This way not two processes can read/write to this file while using this function. """ from os.path import exists from pickle import load as load_pickle from ..misc import open_exclusive, RelativePath from .. import is_interactive path = "job.dict" if path is None else RelativePath(path).path if not exists(path): raise IOError("File " + path + " does not exist.") with open_exclusive(path, "rb", timeout=timeout) as file: result = load_pickle(file) if is_interactive: print("Loaded job list from {0}.".format(path)) return result
def example_info(request, example_type, page): example_file = f'{EXAMPLES_PATH}/{example_type}_examples_info.pickle' with open(example_file, "rb") as handle: examples = load_pickle(handle) zipped = list(zip(examples.keys(), examples.values())) return JsonResponse({ 'show_items': zipped, })
def _load_from_pickle(self, filename): """ This function reads the variable from the given pickle file """ if os.path.isfile(os.path.join(basedir, "dataset", filename)): with open(os.path.join(basedir, "dataset", filename), "rb") as f: return load_pickle(f) raise DatasetNotFoundError("Dataset not found {}".format( os.path.join(basedir, "dataset", filename)))
def loading_dissected_datasets(year: int, hour: int) -> pd.DataFrame: """ Loads dataset for provided year and hour """ assert year >= 0 and year < 30, "Year should be between 0 and 29" assert hour >= 0 and hour < 24, "Hour should be between 0 and 23" with open( "dataset/complete_dataset/hour-{}-year-{}.pickle".format( hour, year), "rb") as f: return load_pickle(f)
def jt_stoplist_init(): 'Retrieve and return the content of all JusText stoplists' global JT_STOPLIST with lzma.open(JT_PICKLE, 'rb') as picklefile: JT_STOPLIST = load_pickle(picklefile) # stoplist = set() # for language in get_stoplists(): # stoplist.update(get_stoplist(language)) # JT_STOPLIST = tuple(stoplist) return JT_STOPLIST
def validate_tei(xmldoc): # , filename="" '''Check if an XML document is conform to the guidelines of the Text Encoding Initiative''' global TEI_RELAXNG if TEI_RELAXNG is None: # load validator with lzma.open(TEI_SCHEMA, 'rb') as schemafile: schema_data = load_pickle(schemafile) TEI_RELAXNG = RelaxNG(fromstring(schema_data)) result = TEI_RELAXNG.validate(xmldoc) if result is False: LOGGER.warning('not a valid TEI document: %s', TEI_RELAXNG.error_log.last_error) return result
def fam_by_example(request, example_type, query, page): example_file = f'{EXAMPLES_PATH}/{example_type}_examples_fams.pickle' with open(example_file, "rb") as handle: example_fams = load_pickle(handle) print(example_fams[query]) examples, total_matches = get_fams(example_fams[query], page) if example_type == "fitness": faminfo_file = f'{EXAMPLES_PATH}/{example_type}_examples_faminfo.pickle' with open(faminfo_file, "rb") as handle: faminfo = load_pickle(handle) examples = { k: { **v, "fitness": faminfo.get(k, []) } for k, v in examples.items() } fams = {'show_items': examples, 'total_matches': total_matches} with open("example_fams.json", "w") as handle: dump(fams['show_items'], handle, indent=4) return JsonResponse(fams)
def get_word_vector(request): WV = load_pickle(open("WV.pkl", "rb")) word = request.GET.get("word") if word is None: return JsonResponseZh({ "code": 1, "msg": "请求错误", "error": "请指定 word 参数", }) vector = WV.get_word_vector(word) if vector is None: return JsonResponseZh({ "code": 1, "msg": "请求错误", "error": "未找到请求词", }) else: return JsonResponseZh({ "code": 0, "msg": "查询成功", "data": vector.tolist(), })
def authenticate() -> Resource: """ A simple method to authenticate a user with Google Drive API. For the first run (or if script does not have the required permissions), this method will ask the user to login with a browser window, and then save a pickle file with the credentials obtained. For subsequent runs, this pickle file will be used directly, ensuring that the user does not have to login for every run of this script. Returns -------- An instance of `Resource` that can be used to interact with the Google Drive API. """ # Simple declaration, will be populated if credentials are present. creds: Optional[Credentials] = None # The scope that is to be requested. SCOPES = ['https://www.googleapis.com/auth/drive.readonly'] if exists('token.pickle'): with open('token.pickle', 'rb') as token: creds: Credentials = load_pickle(token) if not creds or not creds.valid: if creds and creds.expired and creds.refresh_token: creds.refresh(Request()) else: flow = InstalledAppFlow.from_client_secrets_file( 'credentials.json', SCOPES) creds = flow.run_local_server(port=0) # Save the credentials for the next run with open('token.pickle', 'wb') as token: dump_pickle(creds, token) service: Resource = build('drive', 'v3', credentials=creds) return service
def __authenticate(self) -> discovery.Resource: """ Authenticates user session using Drive API. Remarks -------- Attempts to open a browser window asking user to login and grant permissions during the first run. Saves a `.pickle` file to skip this step in future runs Returns -------- Object of `googleapiclient.discovery.Resource` """ creds: Optional[Credentials] = None # Selectively asks for read-only permission SCOPES = ["https://www.googleapis.com/auth/drive.readonly"] if path_exists("token.pickle"): with open("token.pickle", "rb") as token: creds: Credentials = load_pickle(token) if not creds or not creds.valid: if creds and creds.expired and creds.refresh_token: creds.refresh(Request()) else: flow = InstalledAppFlow.from_client_secrets_file( "credentials.json", SCOPES) creds = flow.run_local_server(port=0) with open("token.pickle", "wb") as token: dump_pickle(creds, token) # save credentials for next run return googleapiclient.discovery.build("drive", "v3", credentials=creds)
def main(argv=None): argv = argv or sys.argv[1:] op = OptionParser( usage="Usage: %prog [options] [path/to/source/directory]" ) op.add_option('-d', dest='data_file', default='.articlestore', help="Set the path for a data file (default: .articlestore)") op.add_option('-o', dest='output_directory', default='website', help="Set the output directory for files (default: website)") op.add_option('-p', dest='package', default='', help="Generate documentation for a Python package (optional)") op.add_option('--clean', dest='clean', default=False, action='store_true', help="Flag to remove all generated output files") op.add_option('--force', dest='force', default=False, action='store_true', help="Flag to force regeneration of all files") op.add_option('--quiet', dest='quiet', default=False, action='store_true', help="Flag to suppress output") try: options, args = op.parse_args(argv) except SystemExit: return # normalise various options and load from the config file if args: source_directory = args[0] else: source_directory = getcwd() source_directory = abspath(source_directory) chdir(source_directory) if not isdir(source_directory): raise IOError("%r is not a directory!" % source_directory) config_file = join_path(source_directory, '_config.yml') if not isfile(config_file): raise IOError("Couldn't find: %s" % config_file) config_file_obj = open(config_file, 'rb') config_data = config_file_obj.read() config_file_obj.close() config = load_yaml(config_data) index_pages = config.pop('index_pages') if not isinstance(index_pages, list): raise ValueError("The 'index_pages' config value is not a list!") index_pages = dict( (index_page.keys()[0], index_page.values()[0]) for index_page in index_pages ) output_directory = join_path(source_directory, options.output_directory.rstrip('/')) if not isdir(output_directory): if not exists(output_directory): mkdir(output_directory) else: raise IOError("%r is not a directory!" % output_directory) verbose = not options.quiet # see if there's a persistent data file to read from data_file = join_path(source_directory, options.data_file) if isfile(data_file): data_file_obj = open(data_file, 'rb') data_dict = load_pickle(data_file_obj) data_file_obj.close() else: data_dict = {} # figure out what the generated files would be source_files = [ file for file in listfiles(source_directory) if file.endswith('.txt') ] generated_files = [ join_path(output_directory, splitext(file)[0] + '.html') for file in source_files ] index_files = [join_path(output_directory, index) for index in index_pages] # handle --clean if options.clean: for file in generated_files + index_files + [data_file]: if isfile(file): if verbose: print "Removing: %s" % file rm(file) sys.exit() # figure out layout dependencies for the source .txt files layouts = {} sources = {} def init_rst_source(source_file, destname=None): source_path = join_path(source_directory, source_file) source_file_obj = open(source_path, 'rb') content = source_file_obj.read() source_file_obj.close() if not content.startswith('---'): return filebase, filetype = splitext(source_file) filebase = filebase.lower() env = load_yaml(match_yaml_frontmatter(content).group(1)) layout = env.pop('layout') if layout not in layouts: load_layout(layout, source_directory, layouts) content = replace_yaml_frontmatter('', content) if MORE_LINE in content: lead = content.split(MORE_LINE)[0] content = content.replace(MORE_LINE, '') else: lead = content if destname: destname = join_path(output_directory, destname) else: destname = join_path(output_directory, filebase + '.html') sources[source_file] = { '__content__': content, '__deps__': find_include_refs(content), '__env__': env, '__genfile__': destname, '__id__': source_file, '__layout__': layout, '__lead__': lead, '__mtime__': stat(source_path).st_mtime, '__name__': filebase, '__outdir__': output_directory, '__path__': source_path, '__rst__': True, '__type__': filetype } for source_file in source_files: init_rst_source(source_file) # and likewise for the index_pages render_last = set() for index_page, index_source in index_pages.items(): layout, filetype = splitext(index_source) if filetype == '.genshi': if layout not in layouts: load_layout(layout, source_directory, layouts) source_path = join_path(source_directory, '_layouts', index_source) sources[index_source] = { '__content__': '', '__deps__': [], '__env__': {}, '__genfile__': join_path(output_directory, index_page), '__id__': index_source, '__layout__': layout, '__lead__': '', '__mtime__': stat(source_path).st_mtime, '__name__': index_page, '__outdir__': output_directory, '__path__': source_path, '__rst__': False, '__type__': 'index' } else: init_rst_source(index_source, index_page) render_last.add(index_source) # update the envs for all the source files for source in sources: info = sources[source] layout = info['__layout__'] layout_info = layouts[layout] if layout_info['__deps__']: for dep_layout in reversed(layout_info['__deps__']): info.update(layouts[dep_layout]['__env__']) info.update(layouts[layout]['__env__']) info.update(get_git_info(info['__path__'])) info.update(info.pop('__env__')) # figure out which files to regenerate if not options.force: no_regen = set() for source in sources: info = sources[source] try: gen_mtime = stat(info['__genfile__']).st_mtime except: continue dirty = False if gen_mtime < info['__mtime__']: dirty = True layout = info['__layout__'] layout_info = layouts[layout] if layout_info['__deps__']: layout_chain = [layout] + layout_info['__deps__'] else: layout_chain = [layout] for layout in layout_chain: if gen_mtime < layouts[layout]['__mtime__']: dirty = True break for dep in info['__deps__']: dep_mtime = stat(join_path(source_directory, dep)).st_mtime if gen_mtime < dep_mtime: dirty = True break if not dirty: no_regen.add(source) for source in no_regen: if source in render_last: continue del sources[source] remaining = set(sources.keys()) if remaining == render_last: for source in remaining.intersection(no_regen): del sources[source] # regenerate! for source, source_info in sorted(sources.items(), key=lambda x: x[1]['__rst__'] == False): info = config.copy() info.update(source_info) if verbose: print print LINE print 'Converting: [%s] %s' % (info['__type__'], info['__path__']) print LINE print if info['__rst__']: output = info['__output__'] = render_rst(info['__content__']) if info['__lead__'] == info['__content__']: info['__lead_output__'] = info['__output__'] else: info['__lead_output__'] = render_rst(info['__lead__']) else: output = '' layout = info['__layout__'] layout_info = layouts[layout] if layout_info['__deps__']: layout_chain = [layout] + layout_info['__deps__'] else: layout_chain = [layout] for layout in layout_chain: template = layouts[layout]['__template__'] output = template.generate( content=output, yatidb=data_dict, **info ).render('xhtml', encoding=None) if isinstance(output, unicode): output = output.encode('utf-8') data_dict[info['__name__']] = info output_file = open(info['__genfile__'], 'wb') output_file.write(output) output_file.close() if verbose: print 'Done!' # persist the data file to disk if data_file: data_file_obj = open(data_file, 'wb') dump_pickle(data_dict, data_file_obj) data_file_obj.close() sys.exit() # @/@ site config # @/@ need to fix up this old segment of the code to the latest approach if options.package: package_root = options.package files = [] add_file = files.append package = None for part in reversed(package_root.split(SEP)): if part: package = part break if package is None: raise ValueError("Couldn't find the package name from %r" % package_root) for dirpath, dirnames, filenames in walk(package_root): for filename in filenames: if not filename.endswith('.py'): continue filename = join_path(dirpath, filename) module = package + filename[len(package_root):] if module.endswith('__init__.py'): module = module[:-12] else: module = module[:-3] module = '.'.join(module.split(SEP)) module_file = open(filename, 'rb') module_source = module_file.read() module_file.close() docstring = docstring_regex.search(module_source) if docstring: docstring = docstring.group(0) if docstring.startswith('r'): docstring = docstring[4:-3] else: docstring = docstring[3:-3] if docstring and docstring.strip().startswith('=='): docstring = strip_leading_indent(docstring) module_source = docstring_regex.sub('', module_source, 1) else: docstring = '' info = {} if root_path and isabs(filename) and filename.startswith(root_path): info['__path__'] = filename[len(root_path)+1:] else: info['__path__'] = filename info['__updated__'] = datetime.utcfromtimestamp( stat(filename).st_mtime ) info['__outdir__'] = output_directory info['__name__'] = 'package.' + module info['__type__'] = 'py' info['__title__'] = module info['__source__'] = highlight(module_source, PythonLexer(), SYNTAX_FORMATTER) add_file((docstring, '', info)) # @/@ fix up the old index.js/json generator try: import json except ImportError: import simplejson as json index_js_template = join_path(output_directory, 'index.js.template') if isfile(index_js_template): index_json = json.dumps([ [_art['__name__'], _art['title'].encode('utf-8')] for _art in sorted( [item for item in items if item.get('x-created') and item.get('x-type', 'blog') == 'blog'], key=lambda i: i['x-created'] ) ]) index_js_template = open(index_js_template, 'rb').read() index_js = open(join_path(output_directory, 'index.js'), 'wb') index_js.write(index_js_template % index_json) index_js.close()
print('Could not find weights for facenet model.') confirm_download = input("Weights file is 88 Mb. Download now? (y/n): ") if confirm_download.lower() == 'y': r = requests.get(weights_url, stream=True) with open(facenet_weights_dir, "wb") as f: for data in tqdm(r.iter_content()): f.write(data) print( '>> Downloaded facenet weights to: {}'.format(facenet_weights_dir)) facenet_model = load_model('recognition/facenet/model.h5') facenet_model.load_weights('recognition/facenet/weights.h5') # For some reason the first prediction takes way longer facenet_model.predict_on_batch(np.zeros(shape=(1, 160, 160, 3))) with open('recognition/facenet/trainer.pickle', 'rb') as f: facenet_embeddings = load_pickle(f) # Load LBPH model lbph_model = cv2.face.LBPHFaceRecognizer_create() lbph_model.read('detection/lbph_trainer.yml') print('>> Done loading face recognition models.') # Create folder(s) to store training data name = input("Enter your name: ").lower() if not os.path.exists('data/'): os.makedir('data/') data_dir = os.path.join(os.getcwd(), 'data/') names = [ name for name in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, name)) ]
def rw_data(path, obj=None, parameters=None): """ Read/write from/to a file. See <https://pandas.pydata.org/pandas-docs/stable/io.html>. Note that the file must have an extension. Parameters ---------- path : str Path name of the file. It must start with ``./``. obj : generic object Object to be read or written parameters : dict Dictionary of parameters for the IO operation """ extension = path.split('.')[-1].lower() # Read if obj is None: if extension == 'pkl': obj = load_pickle(open(path, 'rb')) elif extension == 'json': obj = load_json(open(path, 'rb')) elif extension in {'hdf5', 'h5', 'hdf'}: if parameters is None: obj = read_hdf(path) else: obj = read_hdf(path, **parameters) elif extension == 'csv': if parameters is None: obj = read_csv(path) else: obj = read_csv(path, **parameters) else: print('WARNING: No file format extension specified') return obj # Write else: # Make sure the directory exists os.makedirs(os.path.dirname(path), exist_ok=True) if extension == 'pkl': dump_pickle(obj, open(path, 'wb')) elif extension == 'json': dump_json(obj, fp=open(path, 'w')) elif extension in {'hdf5', 'h5', 'hdf'}: obj.to_hdf(path, 'key', mode='w') elif extension == 'csv': if parameters is None: obj.to_csv(path) else: obj.to_csv(path, **parameters) else: print('WARNING: No file format extension specified')
def main(argv=None): argv = argv or sys.argv[1:] op = OptionParser( usage="Usage: %prog [options] [path/to/source/directory]" ) op.add_option('-d', dest='data_file', default='.articlestore', help="Set the path for a data file (default: .articlestore)") op.add_option('-o', dest='output_directory', default='website', help="Set the output directory for files (default: website)") op.add_option('-p', dest='package', default='', help="Generate documentation for a Python package (optional)") op.add_option('--clean', dest='clean', default=False, action='store_true', help="Flag to remove all generated output files") op.add_option('--force', dest='force', default=False, action='store_true', help="Flag to force regeneration of all files") op.add_option('--quiet', dest='quiet', default=False, action='store_true', help="Flag to suppress output") try: options, args = op.parse_args(argv) except SystemExit: return # Normalise various options and load from the config file. if args: source_directory = args[0] source_directory_specified = True else: source_directory = getcwd() source_directory_specified = False source_directory = abspath(source_directory) chdir(source_directory) if not isdir(source_directory): raise IOError("%r is not a directory!" % source_directory) config_file = join_path(source_directory, 'yatiblog.conf') if isfile(config_file): config_file_obj = open(config_file, 'rb') config_data = config_file_obj.read() config_file_obj.close() config = load_yaml(config_data) elif not source_directory_specified: raise IOError("Couldn't find: %s" % config_file) else: config = {} index_pages = config.pop('index_pages', []) if not isinstance(index_pages, list): raise ValueError("The 'index_pages' config value is not a list!") index_pages = dict( (index_page.keys()[0], index_page.values()[0]) for index_page in index_pages ) output_directory = join_path(source_directory, options.output_directory.rstrip('/')) if not isdir(output_directory): if not exists(output_directory): mkdir(output_directory) else: raise IOError("%r is not a directory!" % output_directory) code_pages = config.pop('code_pages', {}) if code_pages: code_layout = code_pages['layout'] code_paths = code_pages['paths'] code_files = {} git_root = realpath(SCMConfig().root) for output_filename, input_pattern in code_paths.items(): files = run_command(['git', 'ls-files', input_pattern], cwd=git_root) files = filter(None, files.splitlines()) if '%' in output_filename: output_pattern = True else: output_pattern = False for file in files: directory = basename(dirname(file)) filename, ext = splitext(basename(file)) if output_pattern: dest = output_filename % { 'dir':directory, 'filename':filename, 'ext':ext } else: dest = output_filename code_files[ join_path(output_directory, dest + '.html') ] = join_path(git_root, file) else: code_files = {} code_layout = None verbose = not options.quiet # See if there's a persistent data file to read from. data_file = join_path(source_directory, options.data_file) if isfile(data_file): data_file_obj = open(data_file, 'rb') data_dict = load_pickle(data_file_obj) data_file_obj.close() else: data_dict = {} # Persist the data file to disk. def persist_data_file(): if data_file: data_file_obj = open(data_file, 'wb') dump_pickle(data_dict, data_file_obj) data_file_obj.close() atexit.register(persist_data_file) # Figure out what the generated files would be. source_files = [ file for file in listfiles(source_directory) if file.endswith('.txt') ] generated_files = [ join_path(output_directory, splitext(file)[0] + '.html') for file in source_files ] index_files = [join_path(output_directory, index) for index in index_pages] # Handle --clean support. if options.clean: for file in generated_files + index_files + [data_file] + code_files.keys(): if isfile(file): if verbose: print "Removing: %s" % file rm(file) data_dict.clear() sys.exit() # Figure out layout dependencies for the source .txt files. layouts = {} sources = {} def init_rst_source(source_file, destname=None): source_path = join_path(source_directory, source_file) source_file_obj = open(source_path, 'rb') content = source_file_obj.read() source_file_obj.close() if not content.startswith('---'): return filebase, filetype = splitext(source_file) filebase = filebase.lower() env = load_yaml(match_yaml_frontmatter(content).group(1)) layout = env.pop('layout') if layout not in layouts: load_layout(layout, source_directory, layouts) content = replace_yaml_frontmatter('', content) if MORE_LINE in content: lead = content.split(MORE_LINE)[0] content = content.replace(MORE_LINE, '') else: lead = content if destname: destname = join_path(output_directory, destname) else: destname = join_path(output_directory, filebase + '.html') sources[source_file] = { '__content__': content, '__deps__': find_include_refs(content), '__env__': env, '__genfile__': destname, '__id__': source_file, '__layout__': layout, '__lead__': lead, '__mtime__': stat(source_path).st_mtime, '__name__': basename(destname), # filebase, '__outdir__': output_directory, '__path__': source_path, '__rst__': True, '__type__': 'text', '__filetype__': filetype } for source_file in source_files: init_rst_source(source_file) # And likewise for any source code files. def init_rst_source_code(source_path, destname): source_file_obj = open(source_path, 'rb') content = source_file_obj.read() source_file_obj.close() filebase, filetype = splitext(basename(source_path)) filebase = filebase.lower() sources[source_path] = { '__content__': content, '__deps__': [], '__env__': {'title': filebase}, '__genfile__': destname, '__id__': source_path, '__layout__': code_layout, '__lead__': '', '__mtime__': stat(source_path).st_mtime, '__name__': basename(destname), # filebase, '__outdir__': output_directory, '__path__': source_path, '__rst__': True, '__type__': 'code', '__filetype__': filetype } if code_layout and code_layout not in layouts: load_layout(code_layout, source_directory, layouts) for destname, source_path in code_files.items(): init_rst_source_code(source_path, destname) # And likewise for the ``index_pages``. render_last = set() for index_page, index_source in index_pages.items(): layout, filetype = splitext(index_source) if filetype == '.genshi': if layout not in layouts: load_layout(layout, source_directory, layouts) source_path = join_path(source_directory, '_layouts', index_source) sources[index_source] = { '__content__': '', '__deps__': [], '__env__': {}, '__genfile__': join_path(output_directory, index_page), '__id__': index_source, '__layout__': layout, '__lead__': '', '__mtime__': stat(source_path).st_mtime, '__name__': basename(index_page), '__outdir__': output_directory, '__path__': source_path, '__rst__': False, '__type__': 'index', '__filetype__': 'genshi' } else: init_rst_source(index_source, index_page) render_last.add(index_source) # Update the envs for all the source files. for source in sources: info = sources[source] layout = info['__layout__'] layout_info = layouts[layout] if layout_info['__deps__']: for dep_layout in reversed(layout_info['__deps__']): info.update(layouts[dep_layout]['__env__']) info.update(layouts[layout]['__env__']) info.update(get_git_info(info['__path__'])) info.update(info.pop('__env__')) # Figure out which files to regenerate. if not options.force: no_regen = set() for source in sources: info = sources[source] try: gen_mtime = stat(info['__genfile__']).st_mtime except: continue dirty = False if gen_mtime < info['__mtime__']: dirty = True layout = info['__layout__'] layout_info = layouts[layout] if layout_info['__deps__']: layout_chain = [layout] + layout_info['__deps__'] else: layout_chain = [layout] for layout in layout_chain: if gen_mtime < layouts[layout]['__mtime__']: dirty = True break for dep in info['__deps__']: dep_mtime = stat(join_path(source_directory, dep)).st_mtime if gen_mtime < dep_mtime: dirty = True break if not dirty: no_regen.add(source) for source in no_regen: if source in render_last: continue del sources[source] remaining = set(sources.keys()) if remaining == render_last: for source in remaining.intersection(no_regen): del sources[source] # Regenerate! items = sorted(sources.items(), key=lambda x: x[1]['__rst__'] == False) for source, source_info in items: info = config.copy() info.update(source_info) if verbose: print print LINE print 'Converting: [%s] %s' % (info['__type__'], info['__path__']) print LINE print if info['__type__'] == 'code': content = info['__content__'] conf = PROGLANGS[info['__filetype__']] if conf[2]: content = conf[2](content) comment_matcher = conf[3] lines = content.split('\n') include_section = None if lines and lines[0].startswith('#!'): lines.pop(0) sections = []; new_section = sections.append docs_text = []; docs_out = docs_text.append code_text = []; code_out = code_text.append for line in lines: if comment_matcher.match(line): line = comment_matcher.sub('', line) if line == '<yatiblog.comment>': include_section = 1 else: docs_out(line) else: if not line.strip(): if docs_text and not include_section: last_line = docs_text[-1].strip() if last_line: last_line_char = last_line[0] for char in last_line: if char != last_line_char: break else: include_section = 1 else: if docs_text: include_section = 1 if docs_text: if include_section: new_section({ 'docs_text': '\n'.join(docs_text) + '\n', 'code_text': '\n'.join(code_text) }) docs_text[:] = [] code_text[:] = [] include_section = None else: docs_text[:] = [] code_out(line) else: code_out(line) new_section({'docs_text': '', 'code_text': '\n'.join(code_text)}) docs = conf[6].join(part['docs_text'] for part in sections) code = conf[4].join(part['code_text'] for part in sections) docs_html, props = render_rst(docs, with_props=1) if ('title' in props) and props['title']: info['title'] = props['title'] code = code.replace('\t', ' ') code_html = highlight(code, get_lexer_by_name(conf[0]), SYNTAX_FORMATTER) docs_split = conf[7].split(docs_html) code_split = conf[5].split(code_html) output = info['__output__'] = [] out = output.append if docs_split and docs_split[0]: diff = 0 docs_split.insert(0, u'') else: diff = 1 last = len(docs_split) - 2 for i in range(last + 1): code = code_split[i+diff].split(u'<br/>') while (code and code[0] == ''): code.pop(0) while (code and code[-1] == ''): code.pop() code = u'<br />'.join(code) if code: if i == last: code = u'<div class="syntax"><pre>' + code else: code = u'<div class="syntax"><pre>' + code + "</pre></div>" out((docs_split[i], code)) elif info['__rst__']: with_props = info.get('with_props', False) if with_props: output, props = render_rst(info['__content__'], with_props=1) if ('title' in props) and props['title']: info['title'] = props['title'] info['__output__'] = output else: output = info['__output__'] = render_rst(info['__content__']) if info['__lead__'] == info['__content__']: info['__lead_output__'] = info['__output__'] else: info['__lead_output__'] = render_rst(info['__lead__']) else: output = '' layout = info['__layout__'] layout_info = layouts[layout] if layout_info['__deps__']: layout_chain = [layout] + layout_info['__deps__'] else: layout_chain = [layout] for layout in layout_chain: template = layouts[layout]['__template__'] output = template.generate( content=output, yatidb=data_dict, **info ).render('xhtml', encoding=None) if isinstance(output, unicode): output = output.encode('utf-8') data_dict[info['__name__']] = info output_file = open(info['__genfile__'], 'wb') output_file.write(output) output_file.close() if verbose: print 'Done!' sys.exit()
def main(argv, genfiles=None): op = OptionParser() op.add_option('-a', dest='authors', default='', help="Set the path for a special authors file (optional)") op.add_option( '-c', dest='package', default='', help="Generate documentation for the Python package (optional)") op.add_option('-d', dest='data_file', default='', help="Set the path for a persistent data file (optional)") op.add_option('-e', dest='output_encoding', default='utf-8', help="Set the output encoding (default: utf-8)") op.add_option('-f', dest='format', default='html', help="Set the output format (default: html)") op.add_option('-i', dest='input_encoding', default='utf-8', help="Set the input encoding (default: utf-8)") op.add_option('-o', dest='output_path', default=HOME, help="Set the output directory for files (default: $PWD)") op.add_option('-p', dest='pattern', default='', help="Generate index files for the path pattern (optional)") op.add_option('-r', dest='root_path', default='', help="Set the path to the root working directory (optional)") op.add_option('-t', dest='template', default='', help="Set the path to a template file (optional)") op.add_option('--quiet', dest='quiet', default=False, action='store_true', help="Flag to suppress output") op.add_option('--stdout', dest='stdout', default=False, action='store_true', help="Flag to redirect to stdout instead of to a file") try: options, args = op.parse_args(argv) except SystemExit: return authors = options.authors if authors: if not isfile(authors): raise IOError("%r is not a valid path!" % authors) authors = parse_authors_file(authors) else: authors = {} email2author = {'unknown': 'unknown'} author2link = {'unknown': ''} for author, author_info in authors.iteritems(): for _info in author_info: if _info.startswith('http://') or _info.startswith('https://'): if author not in author2link: author2link[author] = _info elif '@' in _info: email2author[_info] = author authors['unknown'] = ['unknown'] output_path = options.output_path.rstrip('/') if not isdir(output_path): raise IOError("%r is not a valid directory!" % output_path) root_path = options.root_path siteinfo = join_path(output_path, '.siteinfo') if isfile(siteinfo): env = {} execfile(siteinfo, env) siteinfo = env['INFO'] else: siteinfo = { 'site_url': '', 'site_nick': '', 'site_description': '', 'site_title': '' } stdout = sys.stdout if options.stdout else None verbose = False if stdout else (not options.quiet) format = options.format if format not in ('html', 'tex'): raise ValueError("Unknown format: %s" % format) if (format == 'tex') or (not options.template): template = False elif not isfile(options.template): raise IOError("%r is not a valid template!" % options.template) else: template_path = abspath(options.template) template_root = dirname(template_path) template_loader = TemplateLoader([template_root]) template_file = open(template_path, 'rb') template = MarkupTemplate(template_file.read(), loader=template_loader, encoding='utf-8') template_file.close() data_file = options.data_file if data_file: if isfile(data_file): data_file_obj = open(data_file, 'rb') data_dict = load_pickle(data_file_obj) data_file_obj.close() else: data_dict = {} input_encoding = options.input_encoding output_encoding = options.output_encoding if genfiles: files = genfiles elif options.package: package_root = options.package files = [] add_file = files.append package = None for part in reversed(package_root.split(SEP)): if part: package = part break if package is None: raise ValueError("Couldn't find the package name from %r" % package_root) for dirpath, dirnames, filenames in walk(package_root): for filename in filenames: if not filename.endswith('.py'): continue filename = join_path(dirpath, filename) module = package + filename[len(package_root):] if module.endswith('__init__.py'): module = module[:-12] else: module = module[:-3] module = '.'.join(module.split(SEP)) module_file = open(filename, 'rb') module_source = module_file.read() module_file.close() docstring = docstring_regex.search(module_source) if docstring: docstring = docstring.group(0) if docstring.startswith('r'): docstring = docstring[4:-3] else: docstring = docstring[3:-3] if docstring and docstring.strip().startswith('=='): docstring = strip_leading_indent(docstring) module_source = docstring_regex.sub('', module_source, 1) else: docstring = '' info = {} if root_path and isabs(filename) and filename.startswith( root_path): info['__path__'] = filename[len(root_path) + 1:] else: info['__path__'] = filename info['__updated__'] = datetime.utcfromtimestamp( stat(filename).st_mtime) info['__outdir__'] = output_path info['__name__'] = 'package.' + module info['__type__'] = 'py' info['__title__'] = module info['__source__'] = highlight(module_source, PythonLexer(), SYNTAX_FORMATTER) add_file((docstring, '', info)) else: files = [] add_file = files.append for filename in args: if not isfile(filename): raise IOError("%r doesn't seem to be a valid file!" % filename) if root_path and isabs(filename) and filename.startswith( root_path): path = filename[len(root_path) + 1:] else: path = filename info = get_git_info(filename, path) # old svn support: # info = get_svn_info(path.split(SEP)[0], '*.txt')[path] source_file = open(filename, 'rb') source = source_file.read() source_file.close() if MORE_LINE in source: source_lead = source.split(MORE_LINE)[0] source = source.replace(MORE_LINE, '') else: source_lead = '' filebase, filetype = splitext(basename(filename)) info['__outdir__'] = output_path info['__name__'] = filebase.lower() info['__type__'] = 'txt' info['__title__'] = filebase.replace('-', ' ') add_file((source, source_lead, info)) for source, source_lead, info in files: if verbose: print print LINE print 'Converting: [%s] %s in [%s]' % ( info['__type__'], info['__path__'], split_path(output_path)[1]) print LINE print if template: output, props = render_rst(source, format, input_encoding, True) # output = output.encode(output_encoding) info['__text__'] = output.encode(output_encoding) info.update(props) if source_lead: info['__lead__'] = render_rst(source_lead, format, input_encoding, True)[0].encode(output_encoding) output = template.generate(content=output, info=info, authors=authors, email2author=email2author, author2link=author2link, **siteinfo).render( 'xhtml', encoding=output_encoding) else: output, props = render_rst(source, format, input_encoding, True, as_whole=True) info.update(props) output = output.encode(output_encoding) info['__text__'] = output if source_lead: info['__lead__'] = render_rst( source_lead, format, input_encoding, True, as_whole=True)[0].encode(output_encoding) if data_file: data_dict[info['__path__']] = info if stdout: print output else: output_filename = join_path(output_path, '%s.%s' % (info['__name__'], format)) output_file = open(output_filename, 'wb') output_file.write(output) output_file.close() if verbose: print 'Done!' if data_file: data_file_obj = open(data_file, 'wb') dump_pickle(data_dict, data_file_obj) data_file_obj.close() if options.pattern: pattern = options.pattern items = [ item for item in data_dict.itervalues() if item['__outdir__'] == pattern ] # index.js/json import json index_js_template = join_path(output_path, 'index.js.template') if isfile(index_js_template): index_json = json.dumps( [[_art['__name__'], _art['title'].encode('utf-8')] for _art in sorted([ item for item in items if item.get('x-created') and item.get('x-type', 'blog') == 'blog' ], key=lambda i: i['x-created'])]) index_js_template = open(index_js_template, 'rb').read() index_js = open(join_path(output_path, 'index.js'), 'wb') index_js.write(index_js_template % index_json) index_js.close() for name, mode, format in INDEX_FILES: pname = name.split('.', 1)[0] template_file = None if siteinfo['site_nick']: template_path = join_path( template_root, '%s.%s.genshi' % (pname, siteinfo['site_nick'])) if isfile(template_path): template_file = open(template_path, 'rb') if not template_file: template_path = join_path(template_root, '%s.genshi' % pname) template_file = open(template_path, 'rb') page_template = MarkupTemplate(template_file.read(), loader=template_loader, encoding='utf-8') template_file.close() poutput = page_template.generate(items=items[:], authors=authors, email2author=email2author, author2link=author2link, root_path=output_path, **siteinfo).render(format) poutput = unicode(poutput, output_encoding) if mode: output = template.generate(alternative_content=poutput, authors=authors, **siteinfo).render(format) else: output = poutput # @/@ wtf is this needed??? if isinstance(output, unicode): output = output.encode(output_encoding) output_file = open(join_path(output_path, name), 'wb') output_file.write(output) output_file.close()
with open('tied_weights_settings.json', 'r') as settings_fp: settings = load_json(settings_fp) batch_size = settings['batch_size'] image_height = settings['image_height'] image_width = settings['image_width'] model_checkpoint = settings['model_checkpoint'] n_epochs = settings['n_epochs'] output_folder = settings['output_folder'] shapes_file = settings['shapes_file'] test_size = settings['test_size'] image_shape = [image_height, image_width] with open(shapes_file, 'rb') as shapes_fp: shapes_data = load_pickle(shapes_fp) logger.info('loaded %d items from %s' % (len(shapes_data), shapes_file)) n_inputs = image_height * image_width n_hidden1 = 300 n_hidden2 = 150 n_hidden3 = n_hidden1 n_outputs = n_inputs learning_rate = 0.01 l2_reg = 0.0001 activation = tf.nn.elu regularlizer = tf.contrib.layers.l2_regularizer(l2_reg) initializer = tf.contrib.layers.variance_scaling_initializer()
def main(argv=None): argv = argv or sys.argv[1:] op = OptionParser( usage="Usage: %prog [options] [path/to/source/directory]" ) op.add_option('-d', dest='data_file', default='.articlestore', help="Set the path for a data file (default: .articlestore)") op.add_option('-o', dest='output_directory', default='website', help="Set the output directory for files (default: website)") op.add_option('-p', dest='package', default='', help="Generate documentation for a Python package (optional)") op.add_option('--clean', dest='clean', default=False, action='store_true', help="Flag to remove all generated output files") op.add_option('--force', dest='force', default=False, action='store_true', help="Flag to force regeneration of all files") op.add_option('--quiet', dest='quiet', default=False, action='store_true', help="Flag to suppress output") try: options, args = op.parse_args(argv) except SystemExit: return # Normalise various options and load from the config file. if args: source_directory = args[0] source_directory_specified = True else: source_directory = getcwd() source_directory_specified = False source_directory = abspath(source_directory) chdir(source_directory) if not isdir(source_directory): raise IOError("%r is not a directory!" % source_directory) config_file = join_path(source_directory, 'yatiblog.conf') if isfile(config_file): config_file_obj = open(config_file, 'rb') config_data = config_file_obj.read() config_file_obj.close() config = load_yaml(config_data) elif not source_directory_specified: raise IOError("Couldn't find: %s" % config_file) else: config = {} index_pages = config.pop('index_pages', []) if not isinstance(index_pages, list): raise ValueError("The 'index_pages' config value is not a list!") index_pages = dict( (index_page.keys()[0], index_page.values()[0]) for index_page in index_pages ) output_directory = join_path(source_directory, options.output_directory.rstrip('/')) if not isdir(output_directory): if not exists(output_directory): mkdir(output_directory) else: raise IOError("%r is not a directory!" % output_directory) code_pages = config.pop('code_pages', {}) if code_pages: code_layout = code_pages['layout'] code_paths = code_pages['paths'] code_files = {} git_root = realpath(SCMConfig().root) for output_filename, input_pattern in code_paths.items(): ignore_pattern = None if isinstance(input_pattern, dict): definition = input_pattern input_pattern = definition['pattern'] if 'ignore' in definition: ignore_pattern = definition['ignore'] files = run_command(['git', 'ls-files', input_pattern], cwd=git_root) files = filter(None, files.splitlines()) if ignore_pattern is not None: ignore_files = run_command( ['git', 'ls-files', ignore_pattern], cwd=git_root ) for file in ignore_files.splitlines(): if file in files: files.remove(file) if '%' in output_filename: output_pattern = True else: output_pattern = False for file in files: directory = basename(dirname(file)) filename, ext = splitext(basename(file)) if output_pattern: dest = output_filename % { 'dir':directory, 'filename':filename, 'ext':ext } else: dest = output_filename code_files[ join_path(output_directory, dest + '.html') ] = [file, join_path(git_root, file)] else: code_files = {} code_layout = None verbose = not options.quiet # See if there's a persistent data file to read from. data_file = join_path(source_directory, options.data_file) if isfile(data_file): data_file_obj = open(data_file, 'rb') data_dict = load_pickle(data_file_obj) data_file_obj.close() else: data_dict = {} # Persist the data file to disk. def persist_data_file(): if data_file: data_file_obj = open(data_file, 'wb') dump_pickle(data_dict, data_file_obj) data_file_obj.close() atexit.register(persist_data_file) # Figure out what the generated files would be. source_files = [ file for file in listfiles(source_directory) if file.endswith('.txt') ] generated_files = [ join_path(output_directory, splitext(file)[0] + '.html') for file in source_files ] index_files = [join_path(output_directory, index) for index in index_pages] # Handle --clean support. if options.clean: for file in generated_files + index_files + [data_file] + code_files.keys(): if isfile(file): if verbose: print "Removing: %s" % file rm(file) data_dict.clear() sys.exit() # Figure out layout dependencies for the source .txt files. layouts = {} sources = {} def init_rst_source(source_file, destname=None): source_path = join_path(source_directory, source_file) source_file_obj = open(source_path, 'rb') content = source_file_obj.read() source_file_obj.close() if not content.startswith('---'): return filebase, filetype = splitext(source_file) filebase = filebase.lower() env = load_yaml(match_yaml_frontmatter(content).group(1)) layout = env.pop('layout') if layout not in layouts: load_layout(layout, source_directory, layouts) content = replace_yaml_frontmatter('', content) if MORE_LINE in content: lead = content.split(MORE_LINE)[0] content = content.replace(MORE_LINE, '') else: lead = content if destname: destname = join_path(output_directory, destname) else: destname = join_path(output_directory, filebase + '.html') sources[source_file] = { '__content__': content, '__deps__': find_include_refs(content), '__env__': env, '__genfile__': destname, '__id__': source_file, '__layout__': layout, '__lead__': lead, '__mtime__': stat(source_path).st_mtime, '__name__': basename(destname), # filebase, '__outdir__': output_directory, '__path__': source_path, '__rst__': True, '__type__': 'text', '__filetype__': filetype } for source_file in source_files: init_rst_source(source_file) # And likewise for any source code files. def init_rst_source_code(relative_source_path, source_path, destname): source_file_obj = open(source_path, 'rb') content = source_file_obj.read() source_file_obj.close() filebase, filetype = splitext(basename(source_path)) filebase = filebase.lower() if not filetype: if content.startswith('#!'): content = content.split('\n', 1) if len(content) == 2: shebang, content = content else: shebang = content[0] content = '' for interp, ext in SHEBANGS: if interp in shebang: filetype = ext break if not filetype: raise ValueError("Unknown file type: %s" % source_path) sources[source_path] = { '__content__': content, '__deps__': [], '__env__': {'title': filebase}, '__genfile__': destname, '__gitpath__': relative_source_path, '__id__': source_path, '__layout__': code_layout, '__lead__': '', '__mtime__': stat(source_path).st_mtime, '__name__': basename(destname), # filebase, '__outdir__': output_directory, '__path__': source_path, '__rst__': True, '__type__': 'code', '__filetype__': filetype } if code_layout and code_layout not in layouts: load_layout(code_layout, source_directory, layouts) for destname, (relative_source_path, source_path) in code_files.items(): init_rst_source_code(relative_source_path, source_path, destname) # And likewise for the ``index_pages``. render_last = set() for index_page, index_source in index_pages.items(): layout, filetype = splitext(index_source) if filetype == '.genshi': if layout not in layouts: load_layout(layout, source_directory, layouts) source_path = join_path(source_directory, '_layouts', index_source) sources[index_source] = { '__content__': '', '__deps__': [], '__env__': {}, '__genfile__': join_path(output_directory, index_page), '__id__': index_source, '__layout__': layout, '__lead__': '', '__mtime__': stat(source_path).st_mtime, '__name__': basename(index_page), '__outdir__': output_directory, '__path__': source_path, '__rst__': False, '__type__': 'index', '__filetype__': 'genshi' } else: init_rst_source(index_source, index_page) render_last.add(index_source) # Update the envs for all the source files. for source in sources: info = sources[source] layout = info['__layout__'] layout_info = layouts[layout] if layout_info['__deps__']: for dep_layout in reversed(layout_info['__deps__']): info.update(layouts[dep_layout]['__env__']) info.update(layouts[layout]['__env__']) info.update(get_git_info(info['__path__'])) info.update(info.pop('__env__')) # Figure out which files to regenerate. if not options.force: no_regen = set() for source in sources: info = sources[source] try: gen_mtime = stat(info['__genfile__']).st_mtime except: continue dirty = False if gen_mtime < info['__mtime__']: dirty = True layout = info['__layout__'] layout_info = layouts[layout] if layout_info['__deps__']: layout_chain = [layout] + layout_info['__deps__'] else: layout_chain = [layout] for layout in layout_chain: if gen_mtime < layouts[layout]['__mtime__']: dirty = True break for dep in info['__deps__']: dep_mtime = stat(join_path(source_directory, dep)).st_mtime if gen_mtime < dep_mtime: dirty = True break if not dirty: no_regen.add(source) for source in no_regen: if source in render_last: continue del sources[source] remaining = set(sources.keys()) if remaining == render_last: for source in remaining.intersection(no_regen): del sources[source] BLANK_CODE_LINE = '<div class="syntax"><pre><div class="syntax"><pre></pre></div>' # Regenerate! items = sorted(sources.items(), key=lambda x: x[1]['__rst__'] == False) for source, source_info in items: info = config.copy() info.update(source_info) if verbose: print print LINE print 'Converting: [%s] %s' % (info['__type__'], info['__path__']) print LINE print if info['__type__'] == 'code': content = info['__content__'] conf = PROGLANGS[info['__filetype__']] if conf[2]: content = conf[2](content) comment_matcher = conf[3] lines = content.split('\n') include_section = None if lines and lines[0].startswith('#!'): lines.pop(0) sections = []; new_section = sections.append docs_text = []; docs_out = docs_text.append code_text = []; code_out = code_text.append for line in lines: if comment_matcher.match(line): line = comment_matcher.sub('', line) if line == '<yatiblog.comment>': include_section = 1 else: docs_out(line) else: if not line.strip(): if docs_text and not include_section: last_line = docs_text[-1].strip() if last_line: last_line_char = last_line[0] for char in last_line: if char != last_line_char: break else: include_section = 1 else: if docs_text: include_section = 1 if docs_text: if include_section: new_section({ 'docs_text': '\n'.join(docs_text) + '\n', 'code_text': '\n'.join(code_text) }) docs_text[:] = [] code_text[:] = [] include_section = None else: docs_text[:] = [] code_out(line) else: code_out(line) new_section({'docs_text': '', 'code_text': '\n'.join(code_text)}) docs = conf[6].join(part['docs_text'] for part in sections) code = conf[4].join(part['code_text'] for part in sections) docs_html, props = render_rst(docs, with_props=1) if ('title' in props) and props['title']: info['title'] = props['title'] code = code.replace('\t', ' ') code_html = highlight(code, get_lexer_by_name(conf[0]), SYNTAX_FORMATTER) docs_split = conf[7].split(docs_html) code_split = conf[5].split(code_html) output = info['__output__'] = [] out = output.append if docs_split and docs_split[0]: diff = 0 docs_split.insert(0, u'') else: diff = 1 last = len(docs_split) - 2 for i in range(last + 1): code = code_split[i+diff].split(u'<br/>') while (code and code[0] == ''): code.pop(0) while (code and code[-1] == ''): code.pop() code = u'<br />'.join(code) if code: if i == last: code = u'<div class="syntax"><pre>' + code else: code = u'<div class="syntax"><pre>' + code + "</pre></div>" out((docs_split[i], code)) while output and output[0][1] == BLANK_CODE_LINE: if not output[0][0]: output.pop(0) elif info['__rst__']: with_props = info.get('with_props', False) if with_props: output, props = render_rst(info['__content__'], with_props=1) if ('title' in props) and props['title']: info['title'] = props['title'] info['__output__'] = output else: output = info['__output__'] = render_rst(info['__content__']) if info['__lead__'] == info['__content__']: info['__lead_output__'] = info['__output__'] else: info['__lead_output__'] = render_rst(info['__lead__']) else: output = '' layout = info['__layout__'] layout_info = layouts[layout] if layout_info['__deps__']: layout_chain = [layout] + layout_info['__deps__'] else: layout_chain = [layout] for layout in layout_chain: template = layouts[layout]['__template__'] output = template.generate( content=output, yatidb=data_dict, **info ).render('xhtml', encoding=None) if isinstance(output, unicode): output = output.encode('utf-8') data_dict[info['__name__']] = info output_file = open(info['__genfile__'], 'wb') output_file.write(output) output_file.close() if verbose: print 'Done!' sys.exit()
def load_pickle(self, filepath): with open(filepath, 'rb') as f: content = load_pickle(f) return content
def main(argv=None): argv = argv or sys.argv[1:] op = OptionParser( usage="Usage: %prog [options] [path/to/source/directory]") op.add_option('-d', dest='data_file', default='.articlestore', help="Set the path for a data file (default: .articlestore)") op.add_option('-o', dest='output_directory', default='website', help="Set the output directory for files (default: website)") op.add_option( '-p', dest='package', default='', help="Generate documentation for a Python package (optional)") op.add_option('--clean', dest='clean', default=False, action='store_true', help="Flag to remove all generated output files") op.add_option('--force', dest='force', default=False, action='store_true', help="Flag to force regeneration of all files") op.add_option('--quiet', dest='quiet', default=False, action='store_true', help="Flag to suppress output") try: options, args = op.parse_args(argv) except SystemExit: return # normalise various options and load from the config file if args: source_directory = args[0] else: source_directory = getcwd() source_directory = abspath(source_directory) chdir(source_directory) if not isdir(source_directory): raise IOError("%r is not a directory!" % source_directory) config_file = join_path(source_directory, '_config.yml') if not isfile(config_file): raise IOError("Couldn't find: %s" % config_file) config_file_obj = open(config_file, 'rb') config_data = config_file_obj.read() config_file_obj.close() config = load_yaml(config_data) index_pages = config.pop('index_pages') if not isinstance(index_pages, list): raise ValueError("The 'index_pages' config value is not a list!") index_pages = dict((index_page.keys()[0], index_page.values()[0]) for index_page in index_pages) output_directory = join_path(source_directory, options.output_directory.rstrip('/')) if not isdir(output_directory): if not exists(output_directory): mkdir(output_directory) else: raise IOError("%r is not a directory!" % output_directory) verbose = not options.quiet # see if there's a persistent data file to read from data_file = join_path(source_directory, options.data_file) if isfile(data_file): data_file_obj = open(data_file, 'rb') data_dict = load_pickle(data_file_obj) data_file_obj.close() else: data_dict = {} # figure out what the generated files would be source_files = [ file for file in listfiles(source_directory) if file.endswith('.txt') ] generated_files = [ join_path(output_directory, splitext(file)[0] + '.html') for file in source_files ] index_files = [join_path(output_directory, index) for index in index_pages] # handle --clean if options.clean: for file in generated_files + index_files + [data_file]: if isfile(file): if verbose: print "Removing: %s" % file rm(file) sys.exit() # figure out layout dependencies for the source .txt files layouts = {} sources = {} def init_rst_source(source_file, destname=None): source_path = join_path(source_directory, source_file) source_file_obj = open(source_path, 'rb') content = source_file_obj.read() source_file_obj.close() if not content.startswith('---'): return filebase, filetype = splitext(source_file) filebase = filebase.lower() env = load_yaml(match_yaml_frontmatter(content).group(1)) layout = env.pop('layout') if layout not in layouts: load_layout(layout, source_directory, layouts) content = replace_yaml_frontmatter('', content) if MORE_LINE in content: lead = content.split(MORE_LINE)[0] content = content.replace(MORE_LINE, '') else: lead = content if destname: destname = join_path(output_directory, destname) else: destname = join_path(output_directory, filebase + '.html') sources[source_file] = { '__content__': content, '__deps__': find_include_refs(content), '__env__': env, '__genfile__': destname, '__id__': source_file, '__layout__': layout, '__lead__': lead, '__mtime__': stat(source_path).st_mtime, '__name__': filebase, '__outdir__': output_directory, '__path__': source_path, '__rst__': True, '__type__': filetype } for source_file in source_files: init_rst_source(source_file) # and likewise for the index_pages render_last = set() for index_page, index_source in index_pages.items(): layout, filetype = splitext(index_source) if filetype == '.genshi': if layout not in layouts: load_layout(layout, source_directory, layouts) source_path = join_path(source_directory, '_layouts', index_source) sources[index_source] = { '__content__': '', '__deps__': [], '__env__': {}, '__genfile__': join_path(output_directory, index_page), '__id__': index_source, '__layout__': layout, '__lead__': '', '__mtime__': stat(source_path).st_mtime, '__name__': index_page, '__outdir__': output_directory, '__path__': source_path, '__rst__': False, '__type__': 'index' } else: init_rst_source(index_source, index_page) render_last.add(index_source) # update the envs for all the source files for source in sources: info = sources[source] layout = info['__layout__'] layout_info = layouts[layout] if layout_info['__deps__']: for dep_layout in reversed(layout_info['__deps__']): info.update(layouts[dep_layout]['__env__']) info.update(layouts[layout]['__env__']) info.update(get_git_info(info['__path__'])) info.update(info.pop('__env__')) # figure out which files to regenerate if not options.force: no_regen = set() for source in sources: info = sources[source] try: gen_mtime = stat(info['__genfile__']).st_mtime except: continue dirty = False if gen_mtime < info['__mtime__']: dirty = True layout = info['__layout__'] layout_info = layouts[layout] if layout_info['__deps__']: layout_chain = [layout] + layout_info['__deps__'] else: layout_chain = [layout] for layout in layout_chain: if gen_mtime < layouts[layout]['__mtime__']: dirty = True break for dep in info['__deps__']: dep_mtime = stat(join_path(source_directory, dep)).st_mtime if gen_mtime < dep_mtime: dirty = True break if not dirty: no_regen.add(source) for source in no_regen: if source in render_last: continue del sources[source] remaining = set(sources.keys()) if remaining == render_last: for source in remaining.intersection(no_regen): del sources[source] # regenerate! for source, source_info in sorted(sources.items(), key=lambda x: x[1]['__rst__'] == False): info = config.copy() info.update(source_info) if verbose: print print LINE print 'Converting: [%s] %s' % (info['__type__'], info['__path__']) print LINE print if info['__rst__']: output = info['__output__'] = render_rst(info['__content__']) if info['__lead__'] == info['__content__']: info['__lead_output__'] = info['__output__'] else: info['__lead_output__'] = render_rst(info['__lead__']) else: output = '' layout = info['__layout__'] layout_info = layouts[layout] if layout_info['__deps__']: layout_chain = [layout] + layout_info['__deps__'] else: layout_chain = [layout] for layout in layout_chain: template = layouts[layout]['__template__'] output = template.generate(content=output, yatidb=data_dict, **info).render('xhtml', encoding=None) if isinstance(output, unicode): output = output.encode('utf-8') data_dict[info['__name__']] = info output_file = open(info['__genfile__'], 'wb') output_file.write(output) output_file.close() if verbose: print 'Done!' # persist the data file to disk if data_file: data_file_obj = open(data_file, 'wb') dump_pickle(data_dict, data_file_obj) data_file_obj.close() sys.exit() # @/@ site config # @/@ need to fix up this old segment of the code to the latest approach if options.package: package_root = options.package files = [] add_file = files.append package = None for part in reversed(package_root.split(SEP)): if part: package = part break if package is None: raise ValueError("Couldn't find the package name from %r" % package_root) for dirpath, dirnames, filenames in walk(package_root): for filename in filenames: if not filename.endswith('.py'): continue filename = join_path(dirpath, filename) module = package + filename[len(package_root):] if module.endswith('__init__.py'): module = module[:-12] else: module = module[:-3] module = '.'.join(module.split(SEP)) module_file = open(filename, 'rb') module_source = module_file.read() module_file.close() docstring = docstring_regex.search(module_source) if docstring: docstring = docstring.group(0) if docstring.startswith('r'): docstring = docstring[4:-3] else: docstring = docstring[3:-3] if docstring and docstring.strip().startswith('=='): docstring = strip_leading_indent(docstring) module_source = docstring_regex.sub('', module_source, 1) else: docstring = '' info = {} if root_path and isabs(filename) and filename.startswith( root_path): info['__path__'] = filename[len(root_path) + 1:] else: info['__path__'] = filename info['__updated__'] = datetime.utcfromtimestamp( stat(filename).st_mtime) info['__outdir__'] = output_directory info['__name__'] = 'package.' + module info['__type__'] = 'py' info['__title__'] = module info['__source__'] = highlight(module_source, PythonLexer(), SYNTAX_FORMATTER) add_file((docstring, '', info)) # @/@ fix up the old index.js/json generator try: import json except ImportError: import simplejson as json index_js_template = join_path(output_directory, 'index.js.template') if isfile(index_js_template): index_json = json.dumps([[ _art['__name__'], _art['title'].encode('utf-8') ] for _art in sorted([ item for item in items if item.get('x-created') and item.get('x-type', 'blog') == 'blog' ], key=lambda i: i['x-created'])]) index_js_template = open(index_js_template, 'rb').read() index_js = open(join_path(output_directory, 'index.js'), 'wb') index_js.write(index_js_template % index_json) index_js.close()
def main(argv, genfiles=None): op = OptionParser() op.add_option('-a', dest='authors', default='', help="Set the path for a special authors file (optional)") op.add_option('-c', dest='package', default='', help="Generate documentation for the Python package (optional)") op.add_option('-d', dest='data_file', default='', help="Set the path for a persistent data file (optional)") op.add_option('-e', dest='output_encoding', default='utf-8', help="Set the output encoding (default: utf-8)") op.add_option('-f', dest='format', default='html', help="Set the output format (default: html)") op.add_option('-i', dest='input_encoding', default='utf-8', help="Set the input encoding (default: utf-8)") op.add_option('-o', dest='output_path', default=HOME, help="Set the output directory for files (default: $PWD)") op.add_option('-p', dest='pattern', default='', help="Generate index files for the path pattern (optional)") op.add_option('-r', dest='root_path', default='', help="Set the path to the root working directory (optional)") op.add_option('-t', dest='template', default='', help="Set the path to a template file (optional)") op.add_option('--quiet', dest='quiet', default=False, action='store_true', help="Flag to suppress output") op.add_option('--stdout', dest='stdout', default=False, action='store_true', help="Flag to redirect to stdout instead of to a file") try: options, args = op.parse_args(argv) except SystemExit: return authors = options.authors if authors: if not isfile(authors): raise IOError("%r is not a valid path!" % authors) authors = parse_authors_file(authors) else: authors = {} email2author = {'unknown': 'unknown'} author2link = {'unknown': ''} for author, author_info in authors.iteritems(): for _info in author_info: if _info.startswith('http://') or _info.startswith('https://'): if author not in author2link: author2link[author] = _info elif '@' in _info: email2author[_info] = author authors['unknown'] = ['unknown'] output_path = options.output_path.rstrip('/') if not isdir(output_path): raise IOError("%r is not a valid directory!" % output_path) root_path = options.root_path siteinfo = join_path(output_path, '.siteinfo') if isfile(siteinfo): env = {} execfile(siteinfo, env) siteinfo = env['INFO'] else: siteinfo = { 'site_url': '', 'site_nick': '', 'site_description': '', 'site_title': '' } stdout = sys.stdout if options.stdout else None verbose = False if stdout else (not options.quiet) format = options.format if format not in ('html', 'tex'): raise ValueError("Unknown format: %s" % format) if (format == 'tex') or (not options.template): template = False elif not isfile(options.template): raise IOError("%r is not a valid template!" % options.template) else: template_path = abspath(options.template) template_root = dirname(template_path) template_loader = TemplateLoader([template_root]) template_file = open(template_path, 'rb') template = MarkupTemplate( template_file.read(), loader=template_loader, encoding='utf-8' ) template_file.close() data_file = options.data_file if data_file: if isfile(data_file): data_file_obj = open(data_file, 'rb') data_dict = load_pickle(data_file_obj) data_file_obj.close() else: data_dict = {} input_encoding = options.input_encoding output_encoding = options.output_encoding if genfiles: files = genfiles elif options.package: package_root = options.package files = [] add_file = files.append package = None for part in reversed(package_root.split(SEP)): if part: package = part break if package is None: raise ValueError("Couldn't find the package name from %r" % package_root) for dirpath, dirnames, filenames in walk(package_root): for filename in filenames: if not filename.endswith('.py'): continue filename = join_path(dirpath, filename) module = package + filename[len(package_root):] if module.endswith('__init__.py'): module = module[:-12] else: module = module[:-3] module = '.'.join(module.split(SEP)) module_file = open(filename, 'rb') module_source = module_file.read() module_file.close() docstring = docstring_regex.search(module_source) if docstring: docstring = docstring.group(0) if docstring.startswith('r'): docstring = docstring[4:-3] else: docstring = docstring[3:-3] if docstring and docstring.strip().startswith('=='): docstring = strip_leading_indent(docstring) module_source = docstring_regex.sub('', module_source, 1) else: docstring = '' info = {} if root_path and isabs(filename) and filename.startswith(root_path): info['__path__'] = filename[len(root_path)+1:] else: info['__path__'] = filename info['__updated__'] = datetime.utcfromtimestamp( stat(filename).st_mtime ) info['__outdir__'] = output_path info['__name__'] = 'package.' + module info['__type__'] = 'py' info['__title__'] = module info['__source__'] = highlight(module_source, PythonLexer(), SYNTAX_FORMATTER) add_file((docstring, '', info)) else: files = [] add_file = files.append for filename in args: if not isfile(filename): raise IOError("%r doesn't seem to be a valid file!" % filename) if root_path and isabs(filename) and filename.startswith(root_path): path = filename[len(root_path)+1:] else: path = filename info = get_git_info(filename, path) # old svn support: # info = get_svn_info(path.split(SEP)[0], '*.txt')[path] source_file = open(filename, 'rb') source = source_file.read() source_file.close() if MORE_LINE in source: source_lead = source.split(MORE_LINE)[0] source = source.replace(MORE_LINE, '') else: source_lead = '' filebase, filetype = splitext(basename(filename)) info['__outdir__'] = output_path info['__name__'] = filebase.lower() info['__type__'] = 'txt' info['__title__'] = filebase.replace('-', ' ') add_file((source, source_lead, info)) for source, source_lead, info in files: if verbose: print print LINE print 'Converting: [%s] %s in [%s]' % ( info['__type__'], info['__path__'], split_path(output_path)[1] ) print LINE print if template: output, props = render_rst( source, format, input_encoding, True ) # output = output.encode(output_encoding) info['__text__'] = output.encode(output_encoding) info.update(props) if source_lead: info['__lead__'] = render_rst( source_lead, format, input_encoding, True )[0].encode(output_encoding) output = template.generate( content=output, info=info, authors=authors, email2author=email2author, author2link=author2link, **siteinfo ).render('xhtml', encoding=output_encoding) else: output, props = render_rst( source, format, input_encoding, True, as_whole=True ) info.update(props) output = output.encode(output_encoding) info['__text__'] = output if source_lead: info['__lead__'] = render_rst( source_lead, format, input_encoding, True, as_whole=True )[0].encode(output_encoding) if data_file: data_dict[info['__path__']] = info if stdout: print output else: output_filename = join_path( output_path, '%s.%s' % (info['__name__'], format) ) output_file = open(output_filename, 'wb') output_file.write(output) output_file.close() if verbose: print 'Done!' if data_file: data_file_obj = open(data_file, 'wb') dump_pickle(data_dict, data_file_obj) data_file_obj.close() if options.pattern: pattern = options.pattern items = [ item for item in data_dict.itervalues() if item['__outdir__'] == pattern ] # index.js/json import json index_js_template = join_path(output_path, 'index.js.template') if isfile(index_js_template): index_json = json.dumps([ [_art['__name__'], _art['title'].encode('utf-8')] for _art in sorted( [item for item in items if item.get('x-created') and item.get('x-type', 'blog') == 'blog'], key=lambda i: i['x-created'] ) ]) index_js_template = open(index_js_template, 'rb').read() index_js = open(join_path(output_path, 'index.js'), 'wb') index_js.write(index_js_template % index_json) index_js.close() for name, mode, format in INDEX_FILES: pname = name.split('.', 1)[0] template_file = None if siteinfo['site_nick']: template_path = join_path( template_root, '%s.%s.genshi' % (pname, siteinfo['site_nick']) ) if isfile(template_path): template_file = open(template_path, 'rb') if not template_file: template_path = join_path(template_root, '%s.genshi' % pname) template_file = open(template_path, 'rb') page_template = MarkupTemplate( template_file.read(), loader=template_loader, encoding='utf-8' ) template_file.close() poutput = page_template.generate( items=items[:], authors=authors, email2author=email2author, author2link=author2link, root_path=output_path, **siteinfo ).render(format) poutput = unicode(poutput, output_encoding) if mode: output = template.generate( alternative_content=poutput, authors=authors, **siteinfo ).render(format) else: output = poutput # @/@ wtf is this needed??? if isinstance(output, unicode): output = output.encode(output_encoding) output_file = open(join_path(output_path, name), 'wb') output_file.write(output) output_file.close()