def __update_context(self, target_list = None): """Builds context from target_list""" entries = [] context = {} for target in target_list: with open(os.path.join(self.config['indir'], target + '.txt'), 'r') as infh: raw_header, content = infh.read().split('\n---\n') header = util.parse_header(raw_header) title = header.get('title', 'No Title') extensions = ['codehilite', 'html_tidy'] snip = header.get('snip', markdown(content, extensions)[:50] + ' ...') pubdate = header.get('pubdate', None) # Has a date it was published, isn't a draft and isn't a static page if pubdate and not (header.get('draft', None) or header.get('static', None)): entries.append({'title': title, 'permalink': header.get('permalink', util.build_slug(title)), 'snip': snip, 'pubdate': pubdate}) entries.sort(cmp = (lambda x, y: -1 if x['pubdate'] > y['pubdate'] else 1)) context['entries'] = entries context['permalink'] = 'index' context['title'] = 'Journal' return context
def decompress(source, target): logging.debug("Starting decompression of %s to %s", repr(source), repr(target)) with open(source, "rb") as fsource: logging.debug("Parsing header") magic, method, majorversion, minorversion, pagesize, uncompressed_size = util.parse_header( fsource) logging.debug(" Magic number: %s", repr(magic)) logging.debug(" Method: %s", repr(method)) logging.debug(" Major version number: %d", majorversion) logging.debug(" Minor version number: %d", minorversion) logging.debug(" Page size: %d", pagesize) logging.debug(" Uncompressed size: %d", uncompressed_size) with open(target, "wb") as ftarget: curr_size = 0.0 pagecnt = 0 with gzip.GzipFile(fileobj=fsource, mode="rb", compresslevel=9) as fsource: while True: if pagecnt % 100 == 0 or curr_size == uncompressed_size: sys.stdout.write("\rProgress: {:.2f}%".format( curr_size / uncompressed_size * 100)) sys.stdout.flush() page = fsource.read(pagesize) if not page: break ftarget.write(page) curr_size += len(page) pagecnt += 1 sys.stdout.write("\n") logging.debug("Done")
def _walk(self): # Walk through /drafts to check for published posts and add its context listing = os.listdir('drafts') for infile in listing: with open(os.path.join('drafts', infile), 'r') as f: raw_header, raw_content = f.read().split('---') header = util.parse_header(raw_header) if util.isdraft(header['publish']): self.entries.append(header)
def _load(self): config_filename = "config.ini" self.config = util.parse_config(config_filename) #Set up logging self.logger = util.logger(self.config['loglevel'], self.config['logfile']) self.logger.info("Loaded config") #Read file and store header and content with open(self.name, 'r') as f: self.raw_header, self.raw_content = f.read().split('---') self.logger.info("Read raw header and content") self.header = util.parse_header(self.raw_header) self.logger.info("Parsed header into a dict")
def decompress(source, target): logging.debug("Starting decompression of %s to %s", repr(source), repr(target)) with open(source, "rb") as fsource: logging.debug("Parsing header") magic, method, majorversion, minorversion, pagesize, uncompressed_size = util.parse_header(fsource) logging.debug(" Magic number: %s", repr(magic)) logging.debug(" Method: %s", repr(method)) logging.debug(" Major version number: %d", majorversion) logging.debug(" Minor version number: %d", minorversion) logging.debug(" Page size: %d", pagesize) logging.debug(" Uncompressed size: %d", uncompressed_size) fsource.flush() with open(target, "wb") as ftarget: p = subprocess.Popen(["7za", "x", "-an", "-txz", "-si", "-so"], stdin=fsource, stdout=ftarget, stderr=subprocess.PIPE) p.communicate() logging.debug("Done")
def main(): # cli args checking parser = argparse.ArgumentParser() subparsers = parser.add_subparsers(dest="action") parser_c = subparsers.add_parser("c") parser_c.add_argument("source", type=str) parser_c.add_argument("target", type=str) parser_c.add_argument("reference", type=str) parser_c.add_argument("--inner", type=str) parser_c.add_argument("--delta", type=str) parser_c.add_argument("--nointra", action="store_true") parser_d = subparsers.add_parser("d") parser_d.add_argument("source", type=str) parser_d.add_argument("target", type=str) args = parser.parse_args() # create method name if args.action == "c": method = create_method_name(args.nointra, args.delta, args.inner) elif args.action == "d": with open(args.source, "rb") as f: method = util.parse_header(f)[1] # set up logging util.create_dir("logs") util.configure_logging(method, "logs/{}.log".format(method)) # check if files do (not) exist if not os.path.isfile(args.source): logging.error("Source %s does not exist", repr(args.source)) return -1 if os.path.isfile(args.target) and os.path.getsize(args.target) > 0: logging.error("Target %s already exists and is non-empty", repr(args.target)) return -1 if args.action == "c" and not os.path.isfile(args.reference): logging.error("Reference %s does not exist", repr(args.reference)) return -1 # compress/decompress if args.action == "c": return compress(args.source, args.target, args.reference, args.nointra, args.delta, args.inner) elif args.action == "d": return decompress(args.source, args.target)
def __init__(self, inpath): """Read the infile and populate the object""" # First patse the config and extract data from infile self.inpath = inpath self.config = util.parse_config() with open(inpath) as infh: raw_header, self.content = infh.read().split('\n' + self.config.get('separator', '---') + '\n', 1) # Parse the header and populate the context with this # information self.context = self.__update_context(util.parse_header(raw_header)) # Get a template ready to write tfile = util.view_mapper.get(self.context.get('view', 'single')) tlookup = TemplateLookup(directories = ['.'], output_encoding='utf-8', encoding_errors='replace') self.template = Template(filename = os.path.join(self.config.get('designdir', 'design'), tfile), lookup = tlookup)
def test_parse_header(self): raw_header = "view: single\npermalink: the-hacker-s-dream-journal-eng\npublished: True\npubdate: 2010-04-21T12:04:36Z\ntitle: The hacker's dream journal engine\nauthor: Ramkumar Ramachandra" header = util.parse_header(raw_header) assert header["pubdate"] == "2010-04-21T12:04:36Z"
def __update_context(self, target_list = None): """Builds context from target_list""" entries = [] context = {} for target in target_list: with open(target + '.txt', 'r') as infh: raw_header, content = infh.read().split('\n' + self.config.get('separator', '---') + '\n', 1) header = util.parse_header(raw_header) title = header.get('title', 'No Title') extensions = ['codehilite', 'html_tidy'] snip = header.get('snip', '') if not len(snip) and (not 'snips' in self.config or self.config['snips'] == True): snip = util.htransform(content, self.config.get('htransform', None))[:50] + ' ...' if not self.rss: html_content = util.htransform(content, self.config.get('htransform', None)) else: rss_content = saxutils.escape(util.htransform(content, self.config.get('htransform', None))) pubdate = header.get('pubdate', None) pubdate_h = util.build_timestamp_h(pubdate, rss=self.rss) # Has a date it was published, isn't a draft and isn't a static page if pubdate and not (header.get('draft', None) or header.get('static', None)): entries.append({'title': title, 'permalink': header.get('permalink', util.build_slug(self.config, title, infh.name)), 'snip': snip, 'pubdate': pubdate, 'pubdate_h': pubdate_h}) if not self.rss: entries[-1]['html_content'] = html_content else: entries[-1]['rss_content'] = rss_content entries.sort(cmp = (lambda x, y: -1 if x['pubdate'] > y['pubdate'] else 1)) indexlen = int(self.config.get('indexlen', 10)) rsslen = int(self.config.get('rsslen', 10)) taglen = int(self.config.get('taglen', 0)) if self.name == 'index' and indexlen > 0: context['entries'] = entries[:indexlen] if os.path.exists('tags'): with open('tags') as tagsfh: context['tags'] = tagsfh.read() else: context['tags'] = '' elif self.name.endswith('.rss') and rsslen > 0: context['entries'] = entries[:rsslen] elif (not (self.name.endswith('.rss') or self.name == 'archive')) and taglen > 0: context['entries'] = entries[:taglen] else: context['entries'] = entries context['permalink'] = self.name if self.name.startswith('index') or self.name.startswith('archive'): context['title'] = self.config['title'] self.tagprefix = '' else: if self.name.endswith('.rss'): context['title'] = saxutils.escape(self.config['title'] + ' - Tags: ' + self.name.replace('.rss', '')) self.tagprefix = '' else: context['title'] = self.config['title'] + ' - Tags: ' + self.name context['baseurl'] = self.config['baseurl'] return context
def decompress(source, target): # some info logging.debug("Starting decompression of %s to %s", repr(source), repr(target)) with open(source, "rb") as fsource: # some info logging.debug("Parsing header") magic, method, majorversion, minorversion, pagesize, uncompressed_size = util.parse_header( fsource) logging.debug(" Magic number: %s", repr(magic)) logging.debug(" Method: %s", repr(method)) logging.debug(" Major version number: %d", majorversion) logging.debug(" Minor version number: %d", minorversion) logging.debug(" Page size: %d", pagesize) logging.debug(" Uncompressed size: %d", uncompressed_size) nointra, delta, inner = parse_method_name(method) fsource.flush() tmphandle, tmpfile = None, None if inner == "gzip": fsource = gzip.GzipFile(fileobj=fsource, mode="rb", compresslevel=9) elif inner == "bzip2": fsource = bz2file.BZ2File(filename=fsource, mode="rb", compresslevel=9) elif inner == "7zip": util.create_dir(".tmp") tmphandle, tmpfile = tempfile.mkstemp(dir=".tmp") with open(tmpfile, "wb") as ftmp: p = subprocess.Popen(["7za", "x", "-txz", "-si", "-so"], stdin=fsource, stdout=ftmp, stderr=subprocess.PIPE) p.communicate() fsource = open(tmpfile, "rb") try: reference = util.parse_string(fsource) logging.debug("Reference dump: %s", reference) # parse deduplicated pages fills = {} reference_list = list(util.parse_pagenr_list(fsource)) for i in xrange(len(reference_list)): for left, right in util.parse_interval_list(fsource): for pagenr in xrange(left, right + 1): fills[pagenr] = reference_list[i] # parse diffs if delta: diffs = {} pagenrs = list(util.parse_pagenr_list(fsource)) for i in xrange(len(pagenrs)): diffs[pagenrs[i]] = util.parse_diff(fsource, pagesize) # parse new pages newpages = {} newdistinct = set() if nointra: for left, right in list(util.parse_interval_list(fsource)): for j in xrange(left, right + 1): page = fsource.read(pagesize) newdistinct.add(page) newpages[j] = page else: newcnt = util.parse_int(fsource, 4) intervals = [] for _ in xrange(newcnt): intervals.append(list(util.parse_interval_list(fsource))) for i in xrange(newcnt): page = fsource.read(pagesize) for left, right in intervals[i]: for j in xrange(left, right + 1): newdistinct.add(page) newpages[j] = page finally: if tmphandle is not None: os.close(tmphandle) os.remove(tmpfile) if inner is not None: fsource.close() # reconstruct file pagenr = 0 final = uncompressed_size / pagesize same_distinct, same_total = set(), 0 different_distinct, different_total = set(), 0 seen = set() diff_seen = set() with open(reference, "rb") as freference: with open(target, "wb") as ftarget: while pagenr < final: if pagenr in fills: freference.seek(pagesize * fills[pagenr]) page = freference.read(pagesize) seen.add(page) different_distinct.add(page) different_total += 1 ftarget.write(page) elif delta and pagenr in diffs: freference.seek(pagenr * pagesize) page = freference.read(pagesize) newpage = util.apply_diff(page, diffs[pagenr]) diff_seen.add(newpage) ftarget.write(newpage) elif pagenr in newpages: seen.add(newpages[pagenr]) ftarget.write(newpages[pagenr]) else: freference.seek(pagesize * pagenr) page = freference.read(pagesize) seen.add(page) same_distinct.add(page) same_total += 1 ftarget.write(page) pagenr += 1 # some info logging.debug("New pages: %d/%d (%d/%d)", len(newpages), final, len(newdistinct), len(seen)) logging.debug("Deduplicated pages at the same offset: %d/%d (%d/%d)", same_total, final, len(same_distinct), len(seen)) logging.debug("Deduplicated pages at different offsets: %d/%d (%d/%d)", len(fills), final, len(different_distinct), len(seen)) logging.debug("Deduplicated pages in total: %d/%d (%d/%d)", same_total + len(fills), final, len(same_distinct | different_distinct), len(seen)) if delta: logging.debug("Diffed pages: %d/%d (%d/%d)", len(diffs), final, len(diff_seen), len(seen)) logging.debug("Done") return 0
def decompress(source, target): # some info logging.debug("Starting decompression of %s to %s", repr(source), repr(target)) with open(source, "rb") as fsource: # some info logging.debug("Parsing header") magic, method, majorversion, minorversion, pagesize, uncompressed_size = util.parse_header( fsource) logging.debug(" Magic number: %s", repr(magic)) logging.debug(" Method: %s", repr(method)) logging.debug(" Major version number: %d", majorversion) logging.debug(" Minor version number: %d", minorversion) logging.debug(" Page size: %d", pagesize) logging.debug(" Uncompressed size: %d", uncompressed_size) inner = method.split("intradedup")[1] if not method.startswith("intradedup") or inner not in ("", "gzip", "bzip2", "7zip"): logging.error("Invalid method %s", repr(method)) return -1 fsource.flush() tmphandle, tmpfile = None, None if inner == "gzip": fsource = gzip.GzipFile(fileobj=fsource, mode="rb", compresslevel=9) elif inner == "bzip2": fsource = bz2file.BZ2File(filename=fsource, mode="rb", compresslevel=9) elif inner == "7zip": util.create_dir(".tmp") tmphandle, tmpfile = tempfile.mkstemp(dir=".tmp") with open(tmpfile, "wb") as ftmp: p = subprocess.Popen(["7za", "x", "-txz", "-si", "-so"], stdin=fsource, stdout=ftmp, stderr=subprocess.PIPE) p.communicate() fsource = open(tmpfile, "rb") try: # parse dictionary distinct = util.parse_int(fsource, 4) fills = {} pagelist = [] for _ in xrange(distinct): page = fsource.read(pagesize) pagelist.append(page) for i in xrange(distinct): for left, right in util.parse_interval_list(fsource): for pagenr in xrange(left, right + 1): fills[pagenr] = pagelist[i] # reconstruct file pagenr = 0 seen = set() with open(target, "wb") as ftarget: while True: if pagenr in fills: ftarget.write(fills[pagenr]) seen.add(fills[pagenr]) else: page = fsource.read(pagesize) seen.add(page) if not page: pagenr += 1 break ftarget.write(page) pagenr += 1 while pagenr in fills: ftarget.write(fills[pagenr]) seen.add(fills[pagenr]) pagenr += 1 logging.debug("Deduplicated pages: %d/%d (%d/%d)", len(fills), uncompressed_size / pagesize, distinct, len(seen)) finally: if tmphandle is not None: os.close(tmphandle) os.remove(tmpfile) if inner is not None: fsource.close() logging.debug("Done") return 0