def saving_dissected_datasets(hourly_data: list): for i, df in enumerate(hourly_data): yearly_data = get_yearly_data_from_df(df) for j, year in enumerate(yearly_data): with open( "dataset/complete_dataset/hour-{}-year-{}.pickle".format( i, j), "wb") as f: dump_pickle(year, f)
def save_features(feat, dir=paths.READY_TO_USE_FEAT, name='1000_htk.csv'): extension = splitext(name)[1] full_name = join(dir, name) if extension == F_CSV: with open(full_name, 'w', newline='', encoding='utf-8') as csv_file: wr = writer(csv_file, delimiter=',', quoting=QUOTE_NONNUMERIC) wr.writerows(feat) elif extension == F_PIC: with open(full_name, 'wb') as file: dump_pickle(feat, file) elif extension == F_NPY: save(full_name, feat) else: raise Exception('Данное расширение не поддерживается!')
def authenticate() -> Resource: """ A simple method to authenticate a user with Google Drive API. For the first run (or if script does not have the required permissions), this method will ask the user to login with a browser window, and then save a pickle file with the credentials obtained. For subsequent runs, this pickle file will be used directly, ensuring that the user does not have to login for every run of this script. Returns -------- An instance of `Resource` that can be used to interact with the Google Drive API. """ # Simple declaration, will be populated if credentials are present. creds: Optional[Credentials] = None # The scope that is to be requested. SCOPES = ['https://www.googleapis.com/auth/drive.readonly'] if exists('token.pickle'): with open('token.pickle', 'rb') as token: creds: Credentials = load_pickle(token) if not creds or not creds.valid: if creds and creds.expired and creds.refresh_token: creds.refresh(Request()) else: flow = InstalledAppFlow.from_client_secrets_file( 'credentials.json', SCOPES) creds = flow.run_local_server(port=0) # Save the credentials for the next run with open('token.pickle', 'wb') as token: dump_pickle(creds, token) service: Resource = build('drive', 'v3', credentials=creds) return service
def __authenticate(self) -> discovery.Resource: """ Authenticates user session using Drive API. Remarks -------- Attempts to open a browser window asking user to login and grant permissions during the first run. Saves a `.pickle` file to skip this step in future runs Returns -------- Object of `googleapiclient.discovery.Resource` """ creds: Optional[Credentials] = None # Selectively asks for read-only permission SCOPES = ["https://www.googleapis.com/auth/drive.readonly"] if path_exists("token.pickle"): with open("token.pickle", "rb") as token: creds: Credentials = load_pickle(token) if not creds or not creds.valid: if creds and creds.expired and creds.refresh_token: creds.refresh(Request()) else: flow = InstalledAppFlow.from_client_secrets_file( "credentials.json", SCOPES) creds = flow.run_local_server(port=0) with open("token.pickle", "wb") as token: dump_pickle(creds, token) # save credentials for next run return googleapiclient.discovery.build("drive", "v3", credentials=creds)
def main(argv=None): argv = argv or sys.argv[1:] op = OptionParser( usage="Usage: %prog [options] [path/to/source/directory]" ) op.add_option('-d', dest='data_file', default='.articlestore', help="Set the path for a data file (default: .articlestore)") op.add_option('-o', dest='output_directory', default='website', help="Set the output directory for files (default: website)") op.add_option('-p', dest='package', default='', help="Generate documentation for a Python package (optional)") op.add_option('--clean', dest='clean', default=False, action='store_true', help="Flag to remove all generated output files") op.add_option('--force', dest='force', default=False, action='store_true', help="Flag to force regeneration of all files") op.add_option('--quiet', dest='quiet', default=False, action='store_true', help="Flag to suppress output") try: options, args = op.parse_args(argv) except SystemExit: return # normalise various options and load from the config file if args: source_directory = args[0] else: source_directory = getcwd() source_directory = abspath(source_directory) chdir(source_directory) if not isdir(source_directory): raise IOError("%r is not a directory!" % source_directory) config_file = join_path(source_directory, '_config.yml') if not isfile(config_file): raise IOError("Couldn't find: %s" % config_file) config_file_obj = open(config_file, 'rb') config_data = config_file_obj.read() config_file_obj.close() config = load_yaml(config_data) index_pages = config.pop('index_pages') if not isinstance(index_pages, list): raise ValueError("The 'index_pages' config value is not a list!") index_pages = dict( (index_page.keys()[0], index_page.values()[0]) for index_page in index_pages ) output_directory = join_path(source_directory, options.output_directory.rstrip('/')) if not isdir(output_directory): if not exists(output_directory): mkdir(output_directory) else: raise IOError("%r is not a directory!" % output_directory) verbose = not options.quiet # see if there's a persistent data file to read from data_file = join_path(source_directory, options.data_file) if isfile(data_file): data_file_obj = open(data_file, 'rb') data_dict = load_pickle(data_file_obj) data_file_obj.close() else: data_dict = {} # figure out what the generated files would be source_files = [ file for file in listfiles(source_directory) if file.endswith('.txt') ] generated_files = [ join_path(output_directory, splitext(file)[0] + '.html') for file in source_files ] index_files = [join_path(output_directory, index) for index in index_pages] # handle --clean if options.clean: for file in generated_files + index_files + [data_file]: if isfile(file): if verbose: print "Removing: %s" % file rm(file) sys.exit() # figure out layout dependencies for the source .txt files layouts = {} sources = {} def init_rst_source(source_file, destname=None): source_path = join_path(source_directory, source_file) source_file_obj = open(source_path, 'rb') content = source_file_obj.read() source_file_obj.close() if not content.startswith('---'): return filebase, filetype = splitext(source_file) filebase = filebase.lower() env = load_yaml(match_yaml_frontmatter(content).group(1)) layout = env.pop('layout') if layout not in layouts: load_layout(layout, source_directory, layouts) content = replace_yaml_frontmatter('', content) if MORE_LINE in content: lead = content.split(MORE_LINE)[0] content = content.replace(MORE_LINE, '') else: lead = content if destname: destname = join_path(output_directory, destname) else: destname = join_path(output_directory, filebase + '.html') sources[source_file] = { '__content__': content, '__deps__': find_include_refs(content), '__env__': env, '__genfile__': destname, '__id__': source_file, '__layout__': layout, '__lead__': lead, '__mtime__': stat(source_path).st_mtime, '__name__': filebase, '__outdir__': output_directory, '__path__': source_path, '__rst__': True, '__type__': filetype } for source_file in source_files: init_rst_source(source_file) # and likewise for the index_pages render_last = set() for index_page, index_source in index_pages.items(): layout, filetype = splitext(index_source) if filetype == '.genshi': if layout not in layouts: load_layout(layout, source_directory, layouts) source_path = join_path(source_directory, '_layouts', index_source) sources[index_source] = { '__content__': '', '__deps__': [], '__env__': {}, '__genfile__': join_path(output_directory, index_page), '__id__': index_source, '__layout__': layout, '__lead__': '', '__mtime__': stat(source_path).st_mtime, '__name__': index_page, '__outdir__': output_directory, '__path__': source_path, '__rst__': False, '__type__': 'index' } else: init_rst_source(index_source, index_page) render_last.add(index_source) # update the envs for all the source files for source in sources: info = sources[source] layout = info['__layout__'] layout_info = layouts[layout] if layout_info['__deps__']: for dep_layout in reversed(layout_info['__deps__']): info.update(layouts[dep_layout]['__env__']) info.update(layouts[layout]['__env__']) info.update(get_git_info(info['__path__'])) info.update(info.pop('__env__')) # figure out which files to regenerate if not options.force: no_regen = set() for source in sources: info = sources[source] try: gen_mtime = stat(info['__genfile__']).st_mtime except: continue dirty = False if gen_mtime < info['__mtime__']: dirty = True layout = info['__layout__'] layout_info = layouts[layout] if layout_info['__deps__']: layout_chain = [layout] + layout_info['__deps__'] else: layout_chain = [layout] for layout in layout_chain: if gen_mtime < layouts[layout]['__mtime__']: dirty = True break for dep in info['__deps__']: dep_mtime = stat(join_path(source_directory, dep)).st_mtime if gen_mtime < dep_mtime: dirty = True break if not dirty: no_regen.add(source) for source in no_regen: if source in render_last: continue del sources[source] remaining = set(sources.keys()) if remaining == render_last: for source in remaining.intersection(no_regen): del sources[source] # regenerate! for source, source_info in sorted(sources.items(), key=lambda x: x[1]['__rst__'] == False): info = config.copy() info.update(source_info) if verbose: print print LINE print 'Converting: [%s] %s' % (info['__type__'], info['__path__']) print LINE print if info['__rst__']: output = info['__output__'] = render_rst(info['__content__']) if info['__lead__'] == info['__content__']: info['__lead_output__'] = info['__output__'] else: info['__lead_output__'] = render_rst(info['__lead__']) else: output = '' layout = info['__layout__'] layout_info = layouts[layout] if layout_info['__deps__']: layout_chain = [layout] + layout_info['__deps__'] else: layout_chain = [layout] for layout in layout_chain: template = layouts[layout]['__template__'] output = template.generate( content=output, yatidb=data_dict, **info ).render('xhtml', encoding=None) if isinstance(output, unicode): output = output.encode('utf-8') data_dict[info['__name__']] = info output_file = open(info['__genfile__'], 'wb') output_file.write(output) output_file.close() if verbose: print 'Done!' # persist the data file to disk if data_file: data_file_obj = open(data_file, 'wb') dump_pickle(data_dict, data_file_obj) data_file_obj.close() sys.exit() # @/@ site config # @/@ need to fix up this old segment of the code to the latest approach if options.package: package_root = options.package files = [] add_file = files.append package = None for part in reversed(package_root.split(SEP)): if part: package = part break if package is None: raise ValueError("Couldn't find the package name from %r" % package_root) for dirpath, dirnames, filenames in walk(package_root): for filename in filenames: if not filename.endswith('.py'): continue filename = join_path(dirpath, filename) module = package + filename[len(package_root):] if module.endswith('__init__.py'): module = module[:-12] else: module = module[:-3] module = '.'.join(module.split(SEP)) module_file = open(filename, 'rb') module_source = module_file.read() module_file.close() docstring = docstring_regex.search(module_source) if docstring: docstring = docstring.group(0) if docstring.startswith('r'): docstring = docstring[4:-3] else: docstring = docstring[3:-3] if docstring and docstring.strip().startswith('=='): docstring = strip_leading_indent(docstring) module_source = docstring_regex.sub('', module_source, 1) else: docstring = '' info = {} if root_path and isabs(filename) and filename.startswith(root_path): info['__path__'] = filename[len(root_path)+1:] else: info['__path__'] = filename info['__updated__'] = datetime.utcfromtimestamp( stat(filename).st_mtime ) info['__outdir__'] = output_directory info['__name__'] = 'package.' + module info['__type__'] = 'py' info['__title__'] = module info['__source__'] = highlight(module_source, PythonLexer(), SYNTAX_FORMATTER) add_file((docstring, '', info)) # @/@ fix up the old index.js/json generator try: import json except ImportError: import simplejson as json index_js_template = join_path(output_directory, 'index.js.template') if isfile(index_js_template): index_json = json.dumps([ [_art['__name__'], _art['title'].encode('utf-8')] for _art in sorted( [item for item in items if item.get('x-created') and item.get('x-type', 'blog') == 'blog'], key=lambda i: i['x-created'] ) ]) index_js_template = open(index_js_template, 'rb').read() index_js = open(join_path(output_directory, 'index.js'), 'wb') index_js.write(index_js_template % index_json) index_js.close()
def save_pickle(self,filepath:str,info): """ Save info in a picke file """ with open(filepath, 'wb') as f: dump_pickle(info, f)
def rw_data(path, obj=None, parameters=None): """ Read/write from/to a file. See <https://pandas.pydata.org/pandas-docs/stable/io.html>. Note that the file must have an extension. Parameters ---------- path : str Path name of the file. It must start with ``./``. obj : generic object Object to be read or written parameters : dict Dictionary of parameters for the IO operation """ extension = path.split('.')[-1].lower() # Read if obj is None: if extension == 'pkl': obj = load_pickle(open(path, 'rb')) elif extension == 'json': obj = load_json(open(path, 'rb')) elif extension in {'hdf5', 'h5', 'hdf'}: if parameters is None: obj = read_hdf(path) else: obj = read_hdf(path, **parameters) elif extension == 'csv': if parameters is None: obj = read_csv(path) else: obj = read_csv(path, **parameters) else: print('WARNING: No file format extension specified') return obj # Write else: # Make sure the directory exists os.makedirs(os.path.dirname(path), exist_ok=True) if extension == 'pkl': dump_pickle(obj, open(path, 'wb')) elif extension == 'json': dump_json(obj, fp=open(path, 'w')) elif extension in {'hdf5', 'h5', 'hdf'}: obj.to_hdf(path, 'key', mode='w') elif extension == 'csv': if parameters is None: obj.to_csv(path) else: obj.to_csv(path, **parameters) else: print('WARNING: No file format extension specified')
def persist_data_file(): if data_file: data_file_obj = open(data_file, 'wb') dump_pickle(data_dict, data_file_obj) data_file_obj.close()
def main(argv, genfiles=None): op = OptionParser() op.add_option('-a', dest='authors', default='', help="Set the path for a special authors file (optional)") op.add_option( '-c', dest='package', default='', help="Generate documentation for the Python package (optional)") op.add_option('-d', dest='data_file', default='', help="Set the path for a persistent data file (optional)") op.add_option('-e', dest='output_encoding', default='utf-8', help="Set the output encoding (default: utf-8)") op.add_option('-f', dest='format', default='html', help="Set the output format (default: html)") op.add_option('-i', dest='input_encoding', default='utf-8', help="Set the input encoding (default: utf-8)") op.add_option('-o', dest='output_path', default=HOME, help="Set the output directory for files (default: $PWD)") op.add_option('-p', dest='pattern', default='', help="Generate index files for the path pattern (optional)") op.add_option('-r', dest='root_path', default='', help="Set the path to the root working directory (optional)") op.add_option('-t', dest='template', default='', help="Set the path to a template file (optional)") op.add_option('--quiet', dest='quiet', default=False, action='store_true', help="Flag to suppress output") op.add_option('--stdout', dest='stdout', default=False, action='store_true', help="Flag to redirect to stdout instead of to a file") try: options, args = op.parse_args(argv) except SystemExit: return authors = options.authors if authors: if not isfile(authors): raise IOError("%r is not a valid path!" % authors) authors = parse_authors_file(authors) else: authors = {} email2author = {'unknown': 'unknown'} author2link = {'unknown': ''} for author, author_info in authors.iteritems(): for _info in author_info: if _info.startswith('http://') or _info.startswith('https://'): if author not in author2link: author2link[author] = _info elif '@' in _info: email2author[_info] = author authors['unknown'] = ['unknown'] output_path = options.output_path.rstrip('/') if not isdir(output_path): raise IOError("%r is not a valid directory!" % output_path) root_path = options.root_path siteinfo = join_path(output_path, '.siteinfo') if isfile(siteinfo): env = {} execfile(siteinfo, env) siteinfo = env['INFO'] else: siteinfo = { 'site_url': '', 'site_nick': '', 'site_description': '', 'site_title': '' } stdout = sys.stdout if options.stdout else None verbose = False if stdout else (not options.quiet) format = options.format if format not in ('html', 'tex'): raise ValueError("Unknown format: %s" % format) if (format == 'tex') or (not options.template): template = False elif not isfile(options.template): raise IOError("%r is not a valid template!" % options.template) else: template_path = abspath(options.template) template_root = dirname(template_path) template_loader = TemplateLoader([template_root]) template_file = open(template_path, 'rb') template = MarkupTemplate(template_file.read(), loader=template_loader, encoding='utf-8') template_file.close() data_file = options.data_file if data_file: if isfile(data_file): data_file_obj = open(data_file, 'rb') data_dict = load_pickle(data_file_obj) data_file_obj.close() else: data_dict = {} input_encoding = options.input_encoding output_encoding = options.output_encoding if genfiles: files = genfiles elif options.package: package_root = options.package files = [] add_file = files.append package = None for part in reversed(package_root.split(SEP)): if part: package = part break if package is None: raise ValueError("Couldn't find the package name from %r" % package_root) for dirpath, dirnames, filenames in walk(package_root): for filename in filenames: if not filename.endswith('.py'): continue filename = join_path(dirpath, filename) module = package + filename[len(package_root):] if module.endswith('__init__.py'): module = module[:-12] else: module = module[:-3] module = '.'.join(module.split(SEP)) module_file = open(filename, 'rb') module_source = module_file.read() module_file.close() docstring = docstring_regex.search(module_source) if docstring: docstring = docstring.group(0) if docstring.startswith('r'): docstring = docstring[4:-3] else: docstring = docstring[3:-3] if docstring and docstring.strip().startswith('=='): docstring = strip_leading_indent(docstring) module_source = docstring_regex.sub('', module_source, 1) else: docstring = '' info = {} if root_path and isabs(filename) and filename.startswith( root_path): info['__path__'] = filename[len(root_path) + 1:] else: info['__path__'] = filename info['__updated__'] = datetime.utcfromtimestamp( stat(filename).st_mtime) info['__outdir__'] = output_path info['__name__'] = 'package.' + module info['__type__'] = 'py' info['__title__'] = module info['__source__'] = highlight(module_source, PythonLexer(), SYNTAX_FORMATTER) add_file((docstring, '', info)) else: files = [] add_file = files.append for filename in args: if not isfile(filename): raise IOError("%r doesn't seem to be a valid file!" % filename) if root_path and isabs(filename) and filename.startswith( root_path): path = filename[len(root_path) + 1:] else: path = filename info = get_git_info(filename, path) # old svn support: # info = get_svn_info(path.split(SEP)[0], '*.txt')[path] source_file = open(filename, 'rb') source = source_file.read() source_file.close() if MORE_LINE in source: source_lead = source.split(MORE_LINE)[0] source = source.replace(MORE_LINE, '') else: source_lead = '' filebase, filetype = splitext(basename(filename)) info['__outdir__'] = output_path info['__name__'] = filebase.lower() info['__type__'] = 'txt' info['__title__'] = filebase.replace('-', ' ') add_file((source, source_lead, info)) for source, source_lead, info in files: if verbose: print print LINE print 'Converting: [%s] %s in [%s]' % ( info['__type__'], info['__path__'], split_path(output_path)[1]) print LINE print if template: output, props = render_rst(source, format, input_encoding, True) # output = output.encode(output_encoding) info['__text__'] = output.encode(output_encoding) info.update(props) if source_lead: info['__lead__'] = render_rst(source_lead, format, input_encoding, True)[0].encode(output_encoding) output = template.generate(content=output, info=info, authors=authors, email2author=email2author, author2link=author2link, **siteinfo).render( 'xhtml', encoding=output_encoding) else: output, props = render_rst(source, format, input_encoding, True, as_whole=True) info.update(props) output = output.encode(output_encoding) info['__text__'] = output if source_lead: info['__lead__'] = render_rst( source_lead, format, input_encoding, True, as_whole=True)[0].encode(output_encoding) if data_file: data_dict[info['__path__']] = info if stdout: print output else: output_filename = join_path(output_path, '%s.%s' % (info['__name__'], format)) output_file = open(output_filename, 'wb') output_file.write(output) output_file.close() if verbose: print 'Done!' if data_file: data_file_obj = open(data_file, 'wb') dump_pickle(data_dict, data_file_obj) data_file_obj.close() if options.pattern: pattern = options.pattern items = [ item for item in data_dict.itervalues() if item['__outdir__'] == pattern ] # index.js/json import json index_js_template = join_path(output_path, 'index.js.template') if isfile(index_js_template): index_json = json.dumps( [[_art['__name__'], _art['title'].encode('utf-8')] for _art in sorted([ item for item in items if item.get('x-created') and item.get('x-type', 'blog') == 'blog' ], key=lambda i: i['x-created'])]) index_js_template = open(index_js_template, 'rb').read() index_js = open(join_path(output_path, 'index.js'), 'wb') index_js.write(index_js_template % index_json) index_js.close() for name, mode, format in INDEX_FILES: pname = name.split('.', 1)[0] template_file = None if siteinfo['site_nick']: template_path = join_path( template_root, '%s.%s.genshi' % (pname, siteinfo['site_nick'])) if isfile(template_path): template_file = open(template_path, 'rb') if not template_file: template_path = join_path(template_root, '%s.genshi' % pname) template_file = open(template_path, 'rb') page_template = MarkupTemplate(template_file.read(), loader=template_loader, encoding='utf-8') template_file.close() poutput = page_template.generate(items=items[:], authors=authors, email2author=email2author, author2link=author2link, root_path=output_path, **siteinfo).render(format) poutput = unicode(poutput, output_encoding) if mode: output = template.generate(alternative_content=poutput, authors=authors, **siteinfo).render(format) else: output = poutput # @/@ wtf is this needed??? if isinstance(output, unicode): output = output.encode(output_encoding) output_file = open(join_path(output_path, name), 'wb') output_file.write(output) output_file.close()
def main(argv=None): argv = argv or sys.argv[1:] op = OptionParser( usage="Usage: %prog [options] [path/to/source/directory]") op.add_option('-d', dest='data_file', default='.articlestore', help="Set the path for a data file (default: .articlestore)") op.add_option('-o', dest='output_directory', default='website', help="Set the output directory for files (default: website)") op.add_option( '-p', dest='package', default='', help="Generate documentation for a Python package (optional)") op.add_option('--clean', dest='clean', default=False, action='store_true', help="Flag to remove all generated output files") op.add_option('--force', dest='force', default=False, action='store_true', help="Flag to force regeneration of all files") op.add_option('--quiet', dest='quiet', default=False, action='store_true', help="Flag to suppress output") try: options, args = op.parse_args(argv) except SystemExit: return # normalise various options and load from the config file if args: source_directory = args[0] else: source_directory = getcwd() source_directory = abspath(source_directory) chdir(source_directory) if not isdir(source_directory): raise IOError("%r is not a directory!" % source_directory) config_file = join_path(source_directory, '_config.yml') if not isfile(config_file): raise IOError("Couldn't find: %s" % config_file) config_file_obj = open(config_file, 'rb') config_data = config_file_obj.read() config_file_obj.close() config = load_yaml(config_data) index_pages = config.pop('index_pages') if not isinstance(index_pages, list): raise ValueError("The 'index_pages' config value is not a list!") index_pages = dict((index_page.keys()[0], index_page.values()[0]) for index_page in index_pages) output_directory = join_path(source_directory, options.output_directory.rstrip('/')) if not isdir(output_directory): if not exists(output_directory): mkdir(output_directory) else: raise IOError("%r is not a directory!" % output_directory) verbose = not options.quiet # see if there's a persistent data file to read from data_file = join_path(source_directory, options.data_file) if isfile(data_file): data_file_obj = open(data_file, 'rb') data_dict = load_pickle(data_file_obj) data_file_obj.close() else: data_dict = {} # figure out what the generated files would be source_files = [ file for file in listfiles(source_directory) if file.endswith('.txt') ] generated_files = [ join_path(output_directory, splitext(file)[0] + '.html') for file in source_files ] index_files = [join_path(output_directory, index) for index in index_pages] # handle --clean if options.clean: for file in generated_files + index_files + [data_file]: if isfile(file): if verbose: print "Removing: %s" % file rm(file) sys.exit() # figure out layout dependencies for the source .txt files layouts = {} sources = {} def init_rst_source(source_file, destname=None): source_path = join_path(source_directory, source_file) source_file_obj = open(source_path, 'rb') content = source_file_obj.read() source_file_obj.close() if not content.startswith('---'): return filebase, filetype = splitext(source_file) filebase = filebase.lower() env = load_yaml(match_yaml_frontmatter(content).group(1)) layout = env.pop('layout') if layout not in layouts: load_layout(layout, source_directory, layouts) content = replace_yaml_frontmatter('', content) if MORE_LINE in content: lead = content.split(MORE_LINE)[0] content = content.replace(MORE_LINE, '') else: lead = content if destname: destname = join_path(output_directory, destname) else: destname = join_path(output_directory, filebase + '.html') sources[source_file] = { '__content__': content, '__deps__': find_include_refs(content), '__env__': env, '__genfile__': destname, '__id__': source_file, '__layout__': layout, '__lead__': lead, '__mtime__': stat(source_path).st_mtime, '__name__': filebase, '__outdir__': output_directory, '__path__': source_path, '__rst__': True, '__type__': filetype } for source_file in source_files: init_rst_source(source_file) # and likewise for the index_pages render_last = set() for index_page, index_source in index_pages.items(): layout, filetype = splitext(index_source) if filetype == '.genshi': if layout not in layouts: load_layout(layout, source_directory, layouts) source_path = join_path(source_directory, '_layouts', index_source) sources[index_source] = { '__content__': '', '__deps__': [], '__env__': {}, '__genfile__': join_path(output_directory, index_page), '__id__': index_source, '__layout__': layout, '__lead__': '', '__mtime__': stat(source_path).st_mtime, '__name__': index_page, '__outdir__': output_directory, '__path__': source_path, '__rst__': False, '__type__': 'index' } else: init_rst_source(index_source, index_page) render_last.add(index_source) # update the envs for all the source files for source in sources: info = sources[source] layout = info['__layout__'] layout_info = layouts[layout] if layout_info['__deps__']: for dep_layout in reversed(layout_info['__deps__']): info.update(layouts[dep_layout]['__env__']) info.update(layouts[layout]['__env__']) info.update(get_git_info(info['__path__'])) info.update(info.pop('__env__')) # figure out which files to regenerate if not options.force: no_regen = set() for source in sources: info = sources[source] try: gen_mtime = stat(info['__genfile__']).st_mtime except: continue dirty = False if gen_mtime < info['__mtime__']: dirty = True layout = info['__layout__'] layout_info = layouts[layout] if layout_info['__deps__']: layout_chain = [layout] + layout_info['__deps__'] else: layout_chain = [layout] for layout in layout_chain: if gen_mtime < layouts[layout]['__mtime__']: dirty = True break for dep in info['__deps__']: dep_mtime = stat(join_path(source_directory, dep)).st_mtime if gen_mtime < dep_mtime: dirty = True break if not dirty: no_regen.add(source) for source in no_regen: if source in render_last: continue del sources[source] remaining = set(sources.keys()) if remaining == render_last: for source in remaining.intersection(no_regen): del sources[source] # regenerate! for source, source_info in sorted(sources.items(), key=lambda x: x[1]['__rst__'] == False): info = config.copy() info.update(source_info) if verbose: print print LINE print 'Converting: [%s] %s' % (info['__type__'], info['__path__']) print LINE print if info['__rst__']: output = info['__output__'] = render_rst(info['__content__']) if info['__lead__'] == info['__content__']: info['__lead_output__'] = info['__output__'] else: info['__lead_output__'] = render_rst(info['__lead__']) else: output = '' layout = info['__layout__'] layout_info = layouts[layout] if layout_info['__deps__']: layout_chain = [layout] + layout_info['__deps__'] else: layout_chain = [layout] for layout in layout_chain: template = layouts[layout]['__template__'] output = template.generate(content=output, yatidb=data_dict, **info).render('xhtml', encoding=None) if isinstance(output, unicode): output = output.encode('utf-8') data_dict[info['__name__']] = info output_file = open(info['__genfile__'], 'wb') output_file.write(output) output_file.close() if verbose: print 'Done!' # persist the data file to disk if data_file: data_file_obj = open(data_file, 'wb') dump_pickle(data_dict, data_file_obj) data_file_obj.close() sys.exit() # @/@ site config # @/@ need to fix up this old segment of the code to the latest approach if options.package: package_root = options.package files = [] add_file = files.append package = None for part in reversed(package_root.split(SEP)): if part: package = part break if package is None: raise ValueError("Couldn't find the package name from %r" % package_root) for dirpath, dirnames, filenames in walk(package_root): for filename in filenames: if not filename.endswith('.py'): continue filename = join_path(dirpath, filename) module = package + filename[len(package_root):] if module.endswith('__init__.py'): module = module[:-12] else: module = module[:-3] module = '.'.join(module.split(SEP)) module_file = open(filename, 'rb') module_source = module_file.read() module_file.close() docstring = docstring_regex.search(module_source) if docstring: docstring = docstring.group(0) if docstring.startswith('r'): docstring = docstring[4:-3] else: docstring = docstring[3:-3] if docstring and docstring.strip().startswith('=='): docstring = strip_leading_indent(docstring) module_source = docstring_regex.sub('', module_source, 1) else: docstring = '' info = {} if root_path and isabs(filename) and filename.startswith( root_path): info['__path__'] = filename[len(root_path) + 1:] else: info['__path__'] = filename info['__updated__'] = datetime.utcfromtimestamp( stat(filename).st_mtime) info['__outdir__'] = output_directory info['__name__'] = 'package.' + module info['__type__'] = 'py' info['__title__'] = module info['__source__'] = highlight(module_source, PythonLexer(), SYNTAX_FORMATTER) add_file((docstring, '', info)) # @/@ fix up the old index.js/json generator try: import json except ImportError: import simplejson as json index_js_template = join_path(output_directory, 'index.js.template') if isfile(index_js_template): index_json = json.dumps([[ _art['__name__'], _art['title'].encode('utf-8') ] for _art in sorted([ item for item in items if item.get('x-created') and item.get('x-type', 'blog') == 'blog' ], key=lambda i: i['x-created'])]) index_js_template = open(index_js_template, 'rb').read() index_js = open(join_path(output_directory, 'index.js'), 'wb') index_js.write(index_js_template % index_json) index_js.close()
def main(argv, genfiles=None): op = OptionParser() op.add_option('-a', dest='authors', default='', help="Set the path for a special authors file (optional)") op.add_option('-c', dest='package', default='', help="Generate documentation for the Python package (optional)") op.add_option('-d', dest='data_file', default='', help="Set the path for a persistent data file (optional)") op.add_option('-e', dest='output_encoding', default='utf-8', help="Set the output encoding (default: utf-8)") op.add_option('-f', dest='format', default='html', help="Set the output format (default: html)") op.add_option('-i', dest='input_encoding', default='utf-8', help="Set the input encoding (default: utf-8)") op.add_option('-o', dest='output_path', default=HOME, help="Set the output directory for files (default: $PWD)") op.add_option('-p', dest='pattern', default='', help="Generate index files for the path pattern (optional)") op.add_option('-r', dest='root_path', default='', help="Set the path to the root working directory (optional)") op.add_option('-t', dest='template', default='', help="Set the path to a template file (optional)") op.add_option('--quiet', dest='quiet', default=False, action='store_true', help="Flag to suppress output") op.add_option('--stdout', dest='stdout', default=False, action='store_true', help="Flag to redirect to stdout instead of to a file") try: options, args = op.parse_args(argv) except SystemExit: return authors = options.authors if authors: if not isfile(authors): raise IOError("%r is not a valid path!" % authors) authors = parse_authors_file(authors) else: authors = {} email2author = {'unknown': 'unknown'} author2link = {'unknown': ''} for author, author_info in authors.iteritems(): for _info in author_info: if _info.startswith('http://') or _info.startswith('https://'): if author not in author2link: author2link[author] = _info elif '@' in _info: email2author[_info] = author authors['unknown'] = ['unknown'] output_path = options.output_path.rstrip('/') if not isdir(output_path): raise IOError("%r is not a valid directory!" % output_path) root_path = options.root_path siteinfo = join_path(output_path, '.siteinfo') if isfile(siteinfo): env = {} execfile(siteinfo, env) siteinfo = env['INFO'] else: siteinfo = { 'site_url': '', 'site_nick': '', 'site_description': '', 'site_title': '' } stdout = sys.stdout if options.stdout else None verbose = False if stdout else (not options.quiet) format = options.format if format not in ('html', 'tex'): raise ValueError("Unknown format: %s" % format) if (format == 'tex') or (not options.template): template = False elif not isfile(options.template): raise IOError("%r is not a valid template!" % options.template) else: template_path = abspath(options.template) template_root = dirname(template_path) template_loader = TemplateLoader([template_root]) template_file = open(template_path, 'rb') template = MarkupTemplate( template_file.read(), loader=template_loader, encoding='utf-8' ) template_file.close() data_file = options.data_file if data_file: if isfile(data_file): data_file_obj = open(data_file, 'rb') data_dict = load_pickle(data_file_obj) data_file_obj.close() else: data_dict = {} input_encoding = options.input_encoding output_encoding = options.output_encoding if genfiles: files = genfiles elif options.package: package_root = options.package files = [] add_file = files.append package = None for part in reversed(package_root.split(SEP)): if part: package = part break if package is None: raise ValueError("Couldn't find the package name from %r" % package_root) for dirpath, dirnames, filenames in walk(package_root): for filename in filenames: if not filename.endswith('.py'): continue filename = join_path(dirpath, filename) module = package + filename[len(package_root):] if module.endswith('__init__.py'): module = module[:-12] else: module = module[:-3] module = '.'.join(module.split(SEP)) module_file = open(filename, 'rb') module_source = module_file.read() module_file.close() docstring = docstring_regex.search(module_source) if docstring: docstring = docstring.group(0) if docstring.startswith('r'): docstring = docstring[4:-3] else: docstring = docstring[3:-3] if docstring and docstring.strip().startswith('=='): docstring = strip_leading_indent(docstring) module_source = docstring_regex.sub('', module_source, 1) else: docstring = '' info = {} if root_path and isabs(filename) and filename.startswith(root_path): info['__path__'] = filename[len(root_path)+1:] else: info['__path__'] = filename info['__updated__'] = datetime.utcfromtimestamp( stat(filename).st_mtime ) info['__outdir__'] = output_path info['__name__'] = 'package.' + module info['__type__'] = 'py' info['__title__'] = module info['__source__'] = highlight(module_source, PythonLexer(), SYNTAX_FORMATTER) add_file((docstring, '', info)) else: files = [] add_file = files.append for filename in args: if not isfile(filename): raise IOError("%r doesn't seem to be a valid file!" % filename) if root_path and isabs(filename) and filename.startswith(root_path): path = filename[len(root_path)+1:] else: path = filename info = get_git_info(filename, path) # old svn support: # info = get_svn_info(path.split(SEP)[0], '*.txt')[path] source_file = open(filename, 'rb') source = source_file.read() source_file.close() if MORE_LINE in source: source_lead = source.split(MORE_LINE)[0] source = source.replace(MORE_LINE, '') else: source_lead = '' filebase, filetype = splitext(basename(filename)) info['__outdir__'] = output_path info['__name__'] = filebase.lower() info['__type__'] = 'txt' info['__title__'] = filebase.replace('-', ' ') add_file((source, source_lead, info)) for source, source_lead, info in files: if verbose: print print LINE print 'Converting: [%s] %s in [%s]' % ( info['__type__'], info['__path__'], split_path(output_path)[1] ) print LINE print if template: output, props = render_rst( source, format, input_encoding, True ) # output = output.encode(output_encoding) info['__text__'] = output.encode(output_encoding) info.update(props) if source_lead: info['__lead__'] = render_rst( source_lead, format, input_encoding, True )[0].encode(output_encoding) output = template.generate( content=output, info=info, authors=authors, email2author=email2author, author2link=author2link, **siteinfo ).render('xhtml', encoding=output_encoding) else: output, props = render_rst( source, format, input_encoding, True, as_whole=True ) info.update(props) output = output.encode(output_encoding) info['__text__'] = output if source_lead: info['__lead__'] = render_rst( source_lead, format, input_encoding, True, as_whole=True )[0].encode(output_encoding) if data_file: data_dict[info['__path__']] = info if stdout: print output else: output_filename = join_path( output_path, '%s.%s' % (info['__name__'], format) ) output_file = open(output_filename, 'wb') output_file.write(output) output_file.close() if verbose: print 'Done!' if data_file: data_file_obj = open(data_file, 'wb') dump_pickle(data_dict, data_file_obj) data_file_obj.close() if options.pattern: pattern = options.pattern items = [ item for item in data_dict.itervalues() if item['__outdir__'] == pattern ] # index.js/json import json index_js_template = join_path(output_path, 'index.js.template') if isfile(index_js_template): index_json = json.dumps([ [_art['__name__'], _art['title'].encode('utf-8')] for _art in sorted( [item for item in items if item.get('x-created') and item.get('x-type', 'blog') == 'blog'], key=lambda i: i['x-created'] ) ]) index_js_template = open(index_js_template, 'rb').read() index_js = open(join_path(output_path, 'index.js'), 'wb') index_js.write(index_js_template % index_json) index_js.close() for name, mode, format in INDEX_FILES: pname = name.split('.', 1)[0] template_file = None if siteinfo['site_nick']: template_path = join_path( template_root, '%s.%s.genshi' % (pname, siteinfo['site_nick']) ) if isfile(template_path): template_file = open(template_path, 'rb') if not template_file: template_path = join_path(template_root, '%s.genshi' % pname) template_file = open(template_path, 'rb') page_template = MarkupTemplate( template_file.read(), loader=template_loader, encoding='utf-8' ) template_file.close() poutput = page_template.generate( items=items[:], authors=authors, email2author=email2author, author2link=author2link, root_path=output_path, **siteinfo ).render(format) poutput = unicode(poutput, output_encoding) if mode: output = template.generate( alternative_content=poutput, authors=authors, **siteinfo ).render(format) else: output = poutput # @/@ wtf is this needed??? if isinstance(output, unicode): output = output.encode(output_encoding) output_file = open(join_path(output_path, name), 'wb') output_file.write(output) output_file.close()
def _save_to_pickle(self, filename, df): """ This function saves the pandas dataframe to pickle file """ with open(os.path.join(basedir, "dataset", filename), "wb") as f: dump_pickle(df, f)