コード例 #1
0
def landing_page_bibliography(request, config):
    db = DB(config.db_path + "/data/")
    object_level = request.object_level
    if object_level and object_level in ["doc", "div1", "div2", "div3"]:
        hits = db.get_all(object_level)
    else:
        hits = db.get_all(db.locals["default_object_level"])
    results = []
    c = db.dbh.cursor()
    for hit in hits:
        hit_object = {}
        for field in db.locals["metadata_fields"]:
            hit_object[field] = hit[field] or ""
        if object_level == "doc":
            hit_object["philo_id"] = hit.philo_id[0]
        else:
            hit_object["philo_id"] = "/".join([str(i) for i in hit.philo_id])
        doc_id = str(hit.philo_id[0]) + " 0 0 0 0 0 0"
        next_doc_id = str(hit.philo_id[0] + 1) + " 0 0 0 0 0 0"
        c.execute('select rowid from toms where philo_id="%s"' % doc_id)
        doc_row = c.fetchone()["rowid"]
        c.execute('select rowid from toms where philo_id="%s"' % next_doc_id)
        try:
            next_doc_row = c.fetchone()["rowid"]
        except TypeError:  # if this is the last doc, just get the last rowid in the table.
            c.execute("select max(rowid) from toms;")
            next_doc_row = c.fetchone()[0]
        try:
            c.execute(
                'select * from toms where rowid between %d and %d and head is not null and head !="" limit 1'
                % (doc_row, next_doc_row))
        except sqlite3.OperationalError:  # no type field in DB
            c.execute(
                'select * from toms where rowid between ? and ? and head is not null and head !="" limit 1',
                (doc_row, next_doc_row),
            )
        try:
            start_head = c.fetchone()["head"].decode("utf-8")
            start_head = start_head.lower().title().encode("utf-8")
        except Exception as e:
            print(repr(e), file=sys.stderr)
            start_head = ""
        try:
            c.execute(
                'select head from toms where rowid between %d and %d and head is not null and head !="" order by rowid desc limit 1'
                % (doc_row, next_doc_row))
        except sqlite3.OperationalError:  # no type field in DB
            c.execute(
                'select head from toms where rowid between %d and %d and head is not null and head !="" order by rowid desc limit 1'
                % (doc_row, next_doc_row))
        try:
            end_head = c.fetchone()["head"]
            end_head = end_head.decode("utf-8").lower().title().encode("utf-8")
        except:
            end_head = ""
        hit_object["start_head"] = start_head
        hit_object["end_head"] = end_head

        results.append(hit_object)
    return results
コード例 #2
0
def bibliography_results(request, config):
    """Fetch bibliography results"""
    db = DB(config.db_path + "/data/")
    if request.no_metadata:
        hits = db.get_all(db.locals["default_object_level"], request["sort_order"])
    else:
        hits = db.query(sort_order=request["sort_order"], **request.metadata)
    if (
        request.simple_bibliography == "all"
    ):  # request from simple landing page report which gets all biblio in load order
        hits.finish()
        start = 1
        end = len(hits)
        page_num = end
    else:
        start, end, page_num = page_interval(request.results_per_page, hits, request.start, request.end)
    bibliography_object = {
        "description": {"start": start, "end": end, "n": page_num, "results_per_page": request.results_per_page},
        "query": dict([i for i in request]),
        "default_object": db.locals["default_object_level"],
    }
    results = []
    result_type = "doc"
    for hit in hits[start - 1 : end]:
        citation_hrefs = citation_links(db, config, hit)
        metadata_fields = {}
        for metadata in db.locals["metadata_fields"]:
            metadata_fields[metadata] = hit[metadata]
        result_type = hit.object_type
        if request.simple_bibliography == "all":
            citation = citations(hit, citation_hrefs, config, report="simple_landing")
        else:
            citation = citations(hit, citation_hrefs, config, report="bibliography", result_type=result_type)
        if config.dictionary_bibliography is False or result_type == "doc":
            results.append(
                {
                    "citation": citation,
                    "citation_links": citation_hrefs,
                    "philo_id": hit.philo_id,
                    "metadata_fields": metadata_fields,
                    "object_type": result_type,
                }
            )
        else:
            context = get_text_obj(hit, config, request, db.locals["token_regex"], images=False)
            results.append(
                {
                    "citation": citation,
                    "citation_links": citation_hrefs,
                    "philo_id": hit.philo_id,
                    "metadata_fields": metadata_fields,
                    "context": context,
                    "object_type": result_type,
                }
            )
    bibliography_object["results"] = results
    bibliography_object["results_length"] = len(hits)
    bibliography_object["query_done"] = hits.done
    bibliography_object["result_type"] = result_type
    return bibliography_object, hits
コード例 #3
0
def get_total_results(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    if request.no_q:
        if request.no_metadata:
            hits = db.get_all(db.locals['default_object_level'], request["sort_order"])
        else:
            hits = db.query(sort_order=request["sort_order"], **request.metadata)
    else:
        hits = db.query(request["q"], request["method"], request["arg"],
                        **request.metadata)
    total_results = 0
    hits.finish()
    total_results = len(hits)

    yield json.dumps(total_results).encode('utf8')
コード例 #4
0
def get_total_results(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(
        os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    if request.no_q:
        if request.no_metadata:
            hits = db.get_all(db.locals['default_object_level'],
                              request["sort_order"])
        else:
            hits = db.query(sort_order=request["sort_order"],
                            **request.metadata)
    else:
        hits = db.query(request["q"], request["method"], request["arg"],
                        **request.metadata)
    total_results = 0
    hits.finish()
    total_results = len(hits)

    yield json.dumps(total_results).encode('utf8')
コード例 #5
0
def frequency_results(request, config, sorted_results=False):
    """reads through a hitlist. looks up request.frequency_field in each hit, and builds up a list of
       unique values and their frequencies."""
    db = DB(config.db_path + "/data/")
    biblio_search = False
    if request.q == "" and request.no_q:
        biblio_search = True
        if request.no_metadata:
            hits = db.get_all(db.locals["default_object_level"],
                              sort_order=["rowid"],
                              raw_results=True)
        else:
            hits = db.query(sort_order=["rowid"],
                            raw_results=True,
                            **request.metadata)
    else:
        hits = db.query(request["q"],
                        request["method"],
                        request["arg"],
                        raw_results=True,
                        **request.metadata)

    if sorted_results is True:
        hits.finish()

    cursor = db.dbh.cursor()

    cursor.execute("select philo_id, %s from toms where %s is not null" %
                   (request.frequency_field, request.frequency_field))
    metadata_dict = {}
    for i in cursor:
        philo_id, field = i
        philo_id = tuple(int(s) for s in philo_id.split() if int(s))
        metadata_dict[philo_id] = field

    counts = {}
    frequency_object = {}
    start_time = timeit.default_timer()
    last_hit_done = request.start

    obj_dict = {
        "doc": 1,
        "div1": 2,
        "div2": 3,
        "div3": 4,
        "para": 5,
        "sent": 6,
        "word": 7
    }
    metadata_type = db.locals["metadata_types"][request.frequency_field]
    try:
        object_level = obj_dict[metadata_type]
    except KeyError:
        # metadata_type == "div"
        pass

    try:
        for philo_id in hits[request.start:]:
            if not biblio_search:
                philo_id = tuple(list(philo_id[:6]) + [philo_id[7]])
            if metadata_type == "div":
                key = ""
                for div in ["div1", "div2", "div3"]:
                    if philo_id[:obj_dict[div]] in metadata_dict:
                        key = metadata_dict[philo_id[:obj_dict[div]]]
                while not key:
                    if philo_id[:4] in metadata_dict:
                        key = metadata_dict[philo_id[:4]]
                        break
                    if philo_id[:5] in metadata_dict:
                        key = metadata_dict[philo_id[:5]]
                        break
                    break
                if not key:
                    last_hit_done += 1
                    continue
            else:
                try:
                    key = metadata_dict[philo_id[:object_level]]
                except:
                    last_hit_done += 1
                    continue
            if key not in counts:
                counts[key] = {
                    "count": 0,
                    "metadata": {
                        request.frequency_field: key
                    }
                }
                counts[key]["url"] = make_absolute_query_link(
                    config,
                    request,
                    frequency_field="",
                    start="0",
                    end="0",
                    report=request.report,
                    script="",
                    **{request.frequency_field: '"%s"' % key})
                if not biblio_search:
                    query_metadata = dict([
                        (k, v) for k, v in request.metadata.items() if v
                    ])
                    query_metadata[request.frequency_field] = '"%s"' % key
                    local_hits = db.query(**query_metadata)
                    counts[key][
                        "total_word_count"] = local_hits.get_total_word_count(
                        )
            counts[key]["count"] += 1

            # avoid timeouts by splitting the query if more than
            # request.max_time (in seconds) has been spent in the loop
            elapsed = timeit.default_timer() - start_time
            last_hit_done += 1
            if elapsed > 5 and sorted_results is False:
                break

        frequency_object["results"] = counts
        frequency_object["hits_done"] = last_hit_done
        if last_hit_done == len(hits):
            new_metadata = dict([(k, v) for k, v in request.metadata.items()
                                 if v])
            new_metadata[request.frequency_field] = '"NULL"'
            if request.q == "" and request.no_q:
                new_hits = db.query(sort_order=["rowid"],
                                    raw_results=True,
                                    **new_metadata)
            else:
                new_hits = db.query(request["q"],
                                    request["method"],
                                    request["arg"],
                                    raw_results=True,
                                    **new_metadata)
            new_hits.finish()
            if len(new_hits):
                null_url = make_absolute_query_link(
                    config,
                    request,
                    frequency_field="",
                    start="0",
                    end="0",
                    report=request.report,
                    script="",
                    **{request.frequency_field: '"NULL"'})
                local_hits = db.query(**new_metadata)
                if not biblio_search:
                    frequency_object["results"]["NULL"] = {
                        "count": len(new_hits),
                        "url": null_url,
                        "metadata": {
                            request.frequency_field: '"NULL"'
                        },
                        "total_word_count": local_hits.get_total_word_count(),
                    }
                else:
                    frequency_object["results"]["NULL"] = {
                        "count": len(new_hits),
                        "url": null_url,
                        "metadata": {
                            request.frequency_field: '"NULL"'
                        },
                    }
            frequency_object["more_results"] = False
        else:
            frequency_object["more_results"] = True
    except IndexError:
        frequency_object["results"] = {}
        frequency_object["more_results"] = False
    frequency_object["results_length"] = len(hits)
    frequency_object["query"] = dict([i for i in request])

    if sorted_results is True:
        frequency_object["results"] = sorted(
            frequency_object["results"].items(),
            key=lambda x: x[1]["count"],
            reverse=True)

    return frequency_object
コード例 #6
0
def landing_page_bibliography(request, config):
    db = DB(config.db_path + "/data/")
    object_level = request.object_level
    if object_level and object_level in ["doc", "div1", "div2", "div3"]:
        hits = db.get_all(object_level)
    else:
        hits = db.get_all(db.locals["default_object_level"])
    results = []
    c = db.dbh.cursor()
    for hit in hits:
        hit_object = {}
        for field in db.locals["metadata_fields"]:
            hit_object[field] = hit[field] or ""
        if object_level == "doc":
            hit_object["philo_id"] = hit.philo_id[0]
        else:
            hit_object["philo_id"] = "/".join([str(i) for i in hit.philo_id])
        doc_id = str(hit.philo_id[0]) + " 0 0 0 0 0 0"
        next_doc_id = str(hit.philo_id[0] + 1) + " 0 0 0 0 0 0"
        c.execute('select rowid from toms where philo_id="%s"' % doc_id)
        doc_row = c.fetchone()["rowid"]
        c.execute('select rowid from toms where philo_id="%s"' % next_doc_id)
        try:
            next_doc_row = c.fetchone()["rowid"]
        except TypeError:  # if this is the last doc, just get the last rowid in the table.
            c.execute("select max(rowid) from toms;")
            next_doc_row = c.fetchone()[0]
        try:
            c.execute(
                'select * from toms where rowid between %d and %d and head is not null and head !="" limit 1'
                % (doc_row, next_doc_row)
            )
        except sqlite3.OperationalError:  # no type field in DB
            c.execute(
                'select * from toms where rowid between ? and ? and head is not null and head !="" limit 1',
                (doc_row, next_doc_row),
            )
        try:
            start_head = c.fetchone()["head"].decode("utf-8")
            start_head = start_head.lower().title().encode("utf-8")
        except Exception as e:
            print(repr(e), file=sys.stderr)
            start_head = ""
        try:
            c.execute(
                'select head from toms where rowid between %d and %d and head is not null and head !="" order by rowid desc limit 1'
                % (doc_row, next_doc_row)
            )
        except sqlite3.OperationalError:  # no type field in DB
            c.execute(
                'select head from toms where rowid between %d and %d and head is not null and head !="" order by rowid desc limit 1'
                % (doc_row, next_doc_row)
            )
        try:
            end_head = c.fetchone()["head"]
            end_head = end_head.decode("utf-8").lower().title().encode("utf-8")
        except:
            end_head = ""
        hit_object["start_head"] = start_head
        hit_object["end_head"] = end_head

        results.append(hit_object)
    return results