def __init__(self, name, path): self.name = name self.path = path self.works_path = os.path.join(path, 'works') try: self.version_notes = read_json( local_path(path, "version_notes.json")) except FileNotFound: self.version_notes = "" try: self.lexical_notes = read_json( local_path(path, "lexical_notes.json")) except FileNotFound: self.lexical_notes = {} try: cat = read_json(local_path(self.works_path, "catalog.json")) self.works = cat["index"] self.worksorder = cat["order"] except FileNotFound: self.works = {} self.worksorder = [] self.section_cache = {}
def create(cls, path): """Create directories for a new Library at the given path.""" if not os.path.exists(path): os.makedirs(path) os.makedirs(local_path(path, "versions")) write_json(local_path(path, "auth.json"), {"versions": {}}) return cls(path)
def save_works(self): """Store the ordered list of works for this version. Each work is a dict of the form {"id", "name", "sections": [...]} """ works_path = local_path(self.works_path) if not os.path.exists(works_path): os.makedirs(works_path) path = local_path(self.works_path, "catalog.json") write_json(path, {"order": self.worksorder, "index": self.works})
def __getitem__(self, name): if name in self.version_cache: return self.version_cache[name] else: version = Version(name, local_path(self.path, name)) self.version_cache[name] = version return version
def save_section(self, section): """Store the given section.""" key = (section.workid, section.sectionid) self.section_cache[key] = section path = local_path(self.works_path, section.workid, section.sectionid + ".json") if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) data = {"order": section.order, "entries": section.entries} write_json(path, data) try: work = self.works[section.workid] if section.sectionid not in work["sections"]: work["sections"].append(section.sectionid) self.save_works() except KeyError: self.works[section.workid] = { "name": section.workid, "sections": [section.sectionid] } self.worksorder.append(section.workid) self.save_works()
def load_refs(self): """Load a flat map of {child: [parents]} pairs.""" path = local_path(self.path, "refs.json") try: data = read_json(path) except FileNotFound: data = {} return data
def destroy_work(self, workid): """Destroy directories for this Work.""" self.works.pop(workid, None) if workid in self.worksorder: self.worksorder.remove(workid) path = local_path(self.works_path, workid) if os.path.exists(path): shutil.rmtree(path)
def load_lexicon_and_book(self, bookid): """Return the Lexicon and Book of the given id.""" lex = Lexicon(bookid) book = Book(bookid) fname = self._filenames[bookid] path = local_path(self.path, fname) with open(path, 'rb') as f: last_bcv = None bcv_suffix = 1 for line in f.read().splitlines(): cols = line.decode('utf-8').split(" ") bcv, part, parsing, text, bare, norm, lemma = cols booknumber, chapter, verse = map(int, (bcv[:2], bcv[2:4], bcv[4:6])) # The SBLGNT starts with a book/chapter/verse number. # We want unique ids per word in the source so that # we can match translations to them. bcv = "%s.%s.%s" % (bookid, chapter, verse) if bcv == last_bcv: bcv_suffix += 1 else: bcv_suffix = 1 id = "%s.%s" % (bcv, bcv_suffix) last_bcv = bcv lex.add({ "id": id, "original": bare, "part": part, "parsing": parsing, "lemma": lemma }) book.add(bare, id) # The SBLGNT includes punctuation in the text itself. # Add Punctuation instances for these. for p in self.punctuation: if p in text: bcv_suffix += 1 id = "%s.%s" % (bcv, bcv_suffix) lex.add({ "id": id, "original": p, "part": "S-", "parsing": '', "lemma": p }) book.add(p, id) return lex, book
def __init__(self, path): self.path = path self.auth = self.load_auth() self.versions = Versions(local_path(path, "versions"), self.auth["versions"]) # TODO: move into each version (as part of the lexicon, probably) self.refs = normalize(self.load_refs()) self.inverse_refs = {} for child, parents in self.refs.iteritems(): for p in parents: children = self.inverse_refs.setdefault(p, []) children.append(child)
def save_refs(self): """Save a flat map of {child: [parents]} pairs.""" path = local_path(self.path, "refs.json") write_json(path, self.refs)
def save_auth(self): """Save the auth data to disk.""" path = local_path(self.path, "auth.json") write_json(path, self.auth)
def load_auth(self): """Load the auth data from disk.""" try: return read_json(local_path(self.path, "auth.json")) except FileNotFound: return {}
def save_lexical_notes(self): """Store the lexical notes for this version.""" path = local_path(self.path, "lexical_notes.json") write_json(path, self.lexical_notes)
def save_version_notes(self): """Store the version notes for this version.""" path = local_path(self.path, "version_notes.json") write_json(path, self.version_notes)
def load_section(self, workid, sectionid): """Return this version of the identified section.""" key = (workid, sectionid) if key in self.section_cache: return self.section_cache[key] else: data = read_json( local_path(self.works_path, workid, sectionid + ".json")) # Migrate needs_save = False if "text" in data: seen = {} order = [] for id in data["ids"]: newid = ".".join(id.split(".")[-2:]) if newid not in seen: seen[newid] = 0 else: seen[newid] += 1 order.append([newid, seen[newid]]) oldentries = read_json( local_path(self.works_path, workid, sectionid + ".lex"))["entries"] entries = {} for id, entry in oldentries.iteritems(): entry.pop("id") newid = ".".join(id.split(".")[-2:]) entries[newid] = entry entry["text"] = [ data["text"][i] for i in indices(data["ids"], id) ] data = {"order": order, "entries": entries} needs_save = True if data["order"] and isinstance(data["order"][0], list): data["order"] = [ "%s:%d" % (phraseid, wordid) for phraseid, wordid in data["order"] ] needs_save = True if data["entries"] and "part" in data["entries"].itervalues().next( ): partmap = { "A-": "A", "C-": "C", "D-": "D", "I-": "I", "N-": "N", "P-": "P", "RA": "T", "RD": "d", "RI": "Q", "RP": "p", "RR": "r", "V-": "V", "X-": "X", "S-": "S", "--": "-" } for e in data["entries"].itervalues(): part = e.pop("part", "--") e["parsing"] = partmap[part] + e["parsing"] needs_save = True section = Section(self.name, workid, sectionid, **data) self.section_cache[key] = section if needs_save: self.save_section(section) return section