def parse_metadata(self, sort_by_field, reverse_sort=False, header="tei"): """Parsing metadata fields in TEI or Dublin Core headers""" print("### Parsing metadata ###") print("%s: Parsing metadata in %d files..." % (time.ctime(), len(self.list_files()))) if header == "tei": load_metadata = self.parse_tei_header() elif header == "dc": load_metadata = self.parse_dc_header() print("%s: Sorting files by the following metadata fields: %s..." % (time.ctime(), ", ".join([i for i in sort_by_field])), end=' ') self.sort_order = sort_by_field # to be used for the sort by concordance biblio key in web config if sort_by_field: return sort_list(load_metadata, sort_by_field) else: sorted_load_metadata = [] for filename in self.filenames: for m in load_metadata: if m["filename"] == filename: sorted_load_metadata.append(m) break return sorted_load_metadata
def parse_metadata(self, sort_by_field, header="tei"): """Parsing metadata fields in TEI or Dublin Core headers""" print("### Parsing metadata ###", flush=True) print( f"{time.ctime()}: Parsing document level metadata: 0/{len(os.listdir(self.textdir))} done...", flush=True, end="", ) if header == "tei": load_metadata = self.parse_tei_header() elif header == "dc": load_metadata = self.parse_dc_header() print( f"\r{time.ctime()}: Parsing document level metadata: {len(os.listdir(self.textdir))}/{len(os.listdir(self.textdir))} done...", flush=True, ) print( "%s: Sorting files by the following metadata fields: %s..." % (time.ctime(), ", ".join([i for i in sort_by_field])), end=" ", flush=True, ) self.sort_order = sort_by_field # to be used for the sort by concordance biblio key in web config if sort_by_field: return sort_list(load_metadata, sort_by_field) sorted_load_metadata = [] for filename in self.filenames: for m in load_metadata: if m["filename"] == os.path.basename(filename): sorted_load_metadata.append(m) break return sorted_load_metadata
def parse_metadata(self, sort_by_field, header="tei"): """Parsing metadata fields in TEI or Dublin Core headers""" print("### Parsing metadata ###", flush=True) print("%s: Parsing metadata in %d files..." % (time.ctime(), len(os.listdir(self.textdir))), flush=True) if header == "tei": load_metadata = self.parse_tei_header() elif header == "dc": load_metadata = self.parse_dc_header() print( "%s: Sorting files by the following metadata fields: %s..." % (time.ctime(), ", ".join([i for i in sort_by_field])), end=" ", flush=True, ) self.sort_order = sort_by_field # to be used for the sort by concordance biblio key in web config if sort_by_field: return sort_list(load_metadata, sort_by_field) sorted_load_metadata = [] for filename in self.filenames: for m in load_metadata: if m["filename"] == os.path.basename(filename): sorted_load_metadata.append(m) break return sorted_load_metadata
def parse_metadata(self, sort_by_field, reverse_sort=False, header="tei"): """Parsing metadata fields in TEI or Dublin Core headers""" print("### Parsing metadata ###", flush=True) print("%s: Parsing metadata in %d files..." % (time.ctime(), len(os.listdir(self.textdir))), flush=True) if header == "tei": load_metadata = self.parse_tei_header() elif header == "dc": load_metadata = self.parse_dc_header() print("%s: Sorting files by the following metadata fields: %s..." % (time.ctime(), ", ".join([i for i in sort_by_field])), end=' ', flush=True) self.sort_order = sort_by_field # to be used for the sort by concordance biblio key in web config if sort_by_field: return sort_list(load_metadata, sort_by_field) else: return load_metadata
def get_sorted_hits(all_results, sort_keys, request, config, db, start, end): hits = db.query(request["q"], request["method"], request["arg"], **request.metadata) start, end, n = page_interval(request.results_per_page, hits, start, end) kwic_object = { "description": {"start": start, "end": end, "results_per_page": request.results_per_page}, "query": dict([i for i in request]) } kwic_results = [] for index in sort_list(all_results, sort_keys)[start:end]: hit = hits[index["index"]] kwic_result = kwic_hit_object(hit, config, db) kwic_results.append(kwic_result) kwic_object['results'] = kwic_results kwic_object['results_length'] = len(hits) kwic_object["query_done"] = hits.done return kwic_object