Exemple #1
0
    def parse_metadata(self, sort_by_field, reverse_sort=False, header="tei"):
        """Parsing metadata fields in TEI or Dublin Core headers"""
        print("### Parsing metadata ###")
        print("%s: Parsing metadata in %d files..." %
              (time.ctime(), len(self.list_files())))
        if header == "tei":
            load_metadata = self.parse_tei_header()
        elif header == "dc":
            load_metadata = self.parse_dc_header()

        print("%s: Sorting files by the following metadata fields: %s..." %
              (time.ctime(), ", ".join([i for i in sort_by_field])),
              end=' ')

        self.sort_order = sort_by_field  # to be used for the sort by concordance biblio key in web config
        if sort_by_field:
            return sort_list(load_metadata, sort_by_field)
        else:
            sorted_load_metadata = []
            for filename in self.filenames:
                for m in load_metadata:
                    if m["filename"] == filename:
                        sorted_load_metadata.append(m)
                        break
            return sorted_load_metadata
Exemple #2
0
    def parse_metadata(self, sort_by_field, header="tei"):
        """Parsing metadata fields in TEI or Dublin Core headers"""
        print("### Parsing metadata ###", flush=True)
        print(
            f"{time.ctime()}: Parsing document level metadata: 0/{len(os.listdir(self.textdir))} done...",
            flush=True,
            end="",
        )
        if header == "tei":
            load_metadata = self.parse_tei_header()
        elif header == "dc":
            load_metadata = self.parse_dc_header()

        print(
            f"\r{time.ctime()}: Parsing document level metadata: {len(os.listdir(self.textdir))}/{len(os.listdir(self.textdir))} done...",
            flush=True,
        )

        print(
            "%s: Sorting files by the following metadata fields: %s..." %
            (time.ctime(), ", ".join([i for i in sort_by_field])),
            end=" ",
            flush=True,
        )

        self.sort_order = sort_by_field  # to be used for the sort by concordance biblio key in web config
        if sort_by_field:
            return sort_list(load_metadata, sort_by_field)
        sorted_load_metadata = []
        for filename in self.filenames:
            for m in load_metadata:
                if m["filename"] == os.path.basename(filename):
                    sorted_load_metadata.append(m)
                    break
        return sorted_load_metadata
Exemple #3
0
    def parse_metadata(self, sort_by_field, header="tei"):
        """Parsing metadata fields in TEI or Dublin Core headers"""
        print("### Parsing metadata ###", flush=True)
        print("%s: Parsing metadata in %d files..." % (time.ctime(), len(os.listdir(self.textdir))), flush=True)
        if header == "tei":
            load_metadata = self.parse_tei_header()
        elif header == "dc":
            load_metadata = self.parse_dc_header()

        print(
            "%s: Sorting files by the following metadata fields: %s..." % (time.ctime(), ", ".join([i for i in sort_by_field])),
            end=" ",
            flush=True,
        )

        self.sort_order = sort_by_field  # to be used for the sort by concordance biblio key in web config
        if sort_by_field:
            return sort_list(load_metadata, sort_by_field)
        sorted_load_metadata = []
        for filename in self.filenames:
            for m in load_metadata:
                if m["filename"] == os.path.basename(filename):
                    sorted_load_metadata.append(m)
                    break
        return sorted_load_metadata
Exemple #4
0
    def parse_metadata(self, sort_by_field, reverse_sort=False, header="tei"):
        """Parsing metadata fields in TEI or Dublin Core headers"""
        print("### Parsing metadata ###", flush=True)
        print("%s: Parsing metadata in %d files..." % (time.ctime(), len(os.listdir(self.textdir))), flush=True)
        if header == "tei":
            load_metadata = self.parse_tei_header()
        elif header == "dc":
            load_metadata = self.parse_dc_header()

        print("%s: Sorting files by the following metadata fields: %s..." % (time.ctime(),
                                                                             ", ".join([i for i in sort_by_field])), end=' ', flush=True)

        self.sort_order = sort_by_field  # to be used for the sort by concordance biblio key in web config
        if sort_by_field:
            return sort_list(load_metadata, sort_by_field)
        else:
            return load_metadata
def get_sorted_hits(all_results, sort_keys, request, config, db, start, end):
    hits = db.query(request["q"], request["method"], request["arg"], **request.metadata)
    start, end, n = page_interval(request.results_per_page, hits, start, end)
    kwic_object = {
        "description": {"start": start,
                        "end": end,
                        "results_per_page": request.results_per_page},
        "query": dict([i for i in request])
    }

    kwic_results = []
    for index in sort_list(all_results, sort_keys)[start:end]:
        hit = hits[index["index"]]
        kwic_result = kwic_hit_object(hit, config, db)
        kwic_results.append(kwic_result)

    kwic_object['results'] = kwic_results
    kwic_object['results_length'] = len(hits)
    kwic_object["query_done"] = hits.done

    return kwic_object
def get_sorted_hits(all_results, sort_keys, request, config, db, start, end):
    hits = db.query(request["q"], request["method"], request["arg"], **request.metadata)
    start, end, n = page_interval(request.results_per_page, hits, start, end)
    kwic_object = {
        "description": {"start": start,
                        "end": end,
                        "results_per_page": request.results_per_page},
        "query": dict([i for i in request])
    }

    kwic_results = []
    for index in sort_list(all_results, sort_keys)[start:end]:
        hit = hits[index["index"]]
        kwic_result = kwic_hit_object(hit, config, db)
        kwic_results.append(kwic_result)

    kwic_object['results'] = kwic_results
    kwic_object['results_length'] = len(hits)
    kwic_object["query_done"] = hits.done

    return kwic_object
Exemple #7
0
    def parse_metadata(self, sort_by_field, reverse_sort=False, header="tei"):
        """Parsing metadata fields in TEI or Dublin Core headers"""
        print("### Parsing metadata ###")
        print("%s: Parsing metadata in %d files..." % (time.ctime(), len(self.list_files())))
        if header == "tei":
            load_metadata = self.parse_tei_header()
        elif header == "dc":
            load_metadata = self.parse_dc_header()

        print("%s: Sorting files by the following metadata fields: %s..." % (time.ctime(),
                                                                             ", ".join([i for i in sort_by_field])), end=' ')

        self.sort_order = sort_by_field  # to be used for the sort by concordance biblio key in web config
        if sort_by_field:
            return sort_list(load_metadata, sort_by_field)
        else:
            sorted_load_metadata = []
            for filename in self.filenames:
                for m in load_metadata:
                    if m["filename"] == filename:
                        sorted_load_metadata.append(m)
                        break
            return sorted_load_metadata