Python DB Examples, philologic.DB.DB Python Examples

Example #1

0

Show file

File: get_term_groups.py Project: clovis/PhiloLogic4

def term_group(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    if not request["q"]:
        dump = json.dumps({"original_query": "", "term_groups": []})
    else:
        hits = db.query(request["q"], request["method"], request["arg"], sort_order=request["sort_order"], **request.metadata)
        parsed = parse_query(request.q)
        group = group_terms(parsed)
        all_groups = split_terms(group)
        term_groups = []
        for g in all_groups:
            term_group = ''
            not_started = False
            for kind, term in g:
                if kind == 'NOT':
                    if not_started is False:
                        not_started = True
                        term_group += ' NOT '
                elif kind == 'OR':
                    term_group += '|'
                elif kind == "TERM":
                    term_group += ' %s ' % term
                elif kind == "QUOTE":
                    term_group += ' %s ' % term
            term_group = term_group.strip()
            term_groups.append(term_group)
        dump = json.dumps({"term_groups": term_groups, "original_query": request.original_q})
    yield dump.encode('utf8')

Example #2

0

Show file

File: kwic.py Project: corpus-synodalium/philo4-source

def kwic_results(request, config):
    """Fetch KWIC results"""
    db = DB(config.db_path + '/data/')
    hits = db.query(request["q"], request["method"], request["arg"],
                    **request.metadata)
    start, end, n = page_interval(request.results_per_page, hits,
                                  request.start, request.end)
    kwic_object = {
        "description": {
            "start": start,
            "end": end,
            "results_per_page": request.results_per_page
        },
        "query": dict([i for i in request])
    }
    kwic_object['results'] = []

    for hit in hits[start - 1:end]:
        kwic_result = kwic_hit_object(hit, config, db)
        kwic_object['results'].append(kwic_result)

    kwic_object['results_length'] = len(hits)
    kwic_object["query_done"] = hits.done

    return kwic_object

Example #3

0

Show file

File: lookup_word.py Project: rwhaling/PhiloLogic4

def lookup_word_service(environ, start_response):
    status = "200 OK"
    headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = f.WebConfig()
    db = DB(config.db_path + "/data/")
    request = WSGIHandler(db, environ)
    cursor = db.dbh.cursor()

    if request.report == "concordance":
        hits = db.query(request["q"], request["method"], request["arg"], **request.metadata)
        context_size = config["concordance_length"] * 3
        hit = hits[int(request.position)]
        bytes = hit.bytes
        hit_span = hit.bytes[-1] - hit.bytes[0]
        length = context_size + hit_span + context_size
        bytes, byte_start = adjust_bytes(bytes, length)
        byte_end = byte_start + length
        filename = hit.filename
        token = request.selected
    elif request.report == "navigation":

        token = request.selected
        philo_id = request.philo_id.split(" ")
        text_obj = db[philo_id]
        byte_start, byte_end = int(text_obj.byte_start), int(text_obj.byte_end)
        filename = text_obj.filename
    #        print >> sys.stderr, "WORD LOOKUP FROM NAVIGATION", request.philo_id,request.selected, byte_start, byte_end, filename
    else:
        pass
    #    print >> sys.stderr, "TOKEN", token, "BYTES: ", byte_start, byte_end, "FILENAME: ", filename, "POSITION", request.position
    token_n = 0
    yield lookup_word(db, cursor, token, token_n, byte_start, byte_end, filename)

Example #4

0

Show file

def lookup_word_service(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    cursor = db.dbh.cursor()

    if request.report == "concordance":
        hits = db.query(request["q"], request["method"], request["arg"], **request.metadata)
        context_size = config['concordance_length'] * 3
        hit = hits[int(request.position)]
        bytes = hit.bytes
        hit_span = hit.bytes[-1] - hit.bytes[0]
        length = context_size + hit_span + context_size
        bytes, start_byte = adjust_bytes(bytes, length)
        end_byte = start_byte + length
        filename = hit.filename
        token = request.selected
    elif request.report == "navigation":

        token = request.selected
        philo_id = request.philo_id.split(" ")
        text_obj = db[philo_id]
        start_byte, end_byte = int(text_obj.start_byte), int(text_obj.end_byte)
        filename = text_obj.filename
#        print >> sys.stderr, "WORD LOOKUP FROM NAVIGATION", request.philo_id,request.selected, start_byte, end_byte, filename
    else:
        pass
#    print >> sys.stderr, "TOKEN", token, "BYTES: ", start_byte, end_byte, "FILENAME: ", filename, "POSITION", request.position
    token_n = 0
    yield lookup_word(db, cursor, token, token_n, start_byte, end_byte, filename)

Example #5

0

Show file

File: get_term_groups.py Project: mbwolff/PhiloLogic4

def term_group(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = f.WebConfig()
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(db, environ)
    hits = db.query(request["q"], request["method"], request["arg"],
                    **request.metadata)
    parsed = parse_query(request.q)
    group = group_terms(parsed)
    all_groups = split_terms(group)
    term_groups = []
    for g in all_groups:
        term_group = ''
        not_started = False
        for kind, term in g:
            if kind == 'NOT':
                if not_started == False:
                    not_started = True
                    term_group += ' NOT '
            elif kind == 'OR':
                term_group += '|'
            elif kind == "TERM":
                term_group += ' %s ' % term
            elif kind == "QUOTE":
                term_group += ' %s ' % term
        term_group = term_group.strip()
        term_groups.append(term_group)
    yield json.dumps(term_groups)

Example #6

0

Show file

File: get_metadata_token_count.py Project: rwhaling/PhiloLogic4

def get_metadata_token_count(environ,start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),("Access-Control-Allow-Origin","*")]
    start_response(status,headers)
    config = f.WebConfig()
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(db, environ)
    results = ''
    frequencies = json.loads(environ['wsgi.input'].read())
    word_counts = []
    c = db.dbh.cursor()
    count = 0
    for label, m in frequencies.iteritems():
        args = []
        query_metadata = {}
        for metadata in m['metadata']:
            query_metadata[metadata] = m['metadata'][metadata].encode('utf-8')
        hits = db.query(**query_metadata)
        total_count = 0
        for hit in hits:
            total_count += int(hit['word_count'])
        try:
            frequencies[label]['count'] = round(float(m['count']) / total_count * 1000000, 3)
        except:
            count += 1
            frequencies[label]['count'] = 0    
        
    yield json.dumps(frequencies)

Example #7

0

Show file

File: lookup_word.py Project: katelaurel/PhiloLogic4

def lookup_word_service(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    cursor = db.dbh.cursor()

    if request.report == "concordance":
        hits = db.query(request["q"], request["method"], request["arg"], **request.metadata)
        context_size = config['concordance_length'] * 3
        hit = hits[int(request.position)]
        bytes = hit.bytes
        hit_span = hit.bytes[-1] - hit.bytes[0]
        length = context_size + hit_span + context_size
        bytes, start_byte = adjust_bytes(bytes, length)
        end_byte = start_byte + length
        filename = hit.filename
        token = request.selected
    elif request.report == "navigation":

        token = request.selected
        philo_id = request.philo_id.split(" ")
        text_obj = db[philo_id]
        start_byte, end_byte = int(text_obj.start_byte), int(text_obj.end_byte)
        filename = text_obj.filename
#        print >> sys.stderr, "WORD LOOKUP FROM NAVIGATION", request.philo_id,request.selected, start_byte, end_byte, filename
    else:
        pass
#    print >> sys.stderr, "TOKEN", token, "BYTES: ", start_byte, end_byte, "FILENAME: ", filename, "POSITION", request.position
    token_n = 0
    yield lookup_word(db, cursor, token, token_n, start_byte, end_byte, filename)

Example #8

0

Show file

File: landing_page.py Project: clovis/PhiloLogic4

def landing_page_bibliography(request, config):
    db = DB(config.db_path + '/data/')
    object_level = request.object_level
    if object_level and object_level in ["doc", "div1", "div2", "div3"]:
        hits = db.get_all(object_level)
    else:
        hits = db.get_all(db.locals['default_object_level'])
    results = []
    c = db.dbh.cursor()
    for hit in hits:
        hit_object = {}
        for field in db.locals['metadata_fields']:
            hit_object[field] = hit[field] or ''
        if object_level == "doc":
            hit_object['philo_id'] = hit.philo_id[0]
        else:
            hit_object['philo_id'] = '/'.join([str(i) for i in hit.philo_id])
        doc_id = str(hit.philo_id[0]) + ' 0 0 0 0 0 0'
        next_doc_id = str(hit.philo_id[0] + 1) + ' 0 0 0 0 0 0'
        c.execute('select rowid from toms where philo_id="%s"' % doc_id)
        doc_row = c.fetchone()['rowid']
        c.execute('select rowid from toms where philo_id="%s"' % next_doc_id)
        try:
            next_doc_row = c.fetchone()['rowid']
        except TypeError:  # if this is the last doc, just get the last rowid in the table.
            c.execute('select max(rowid) from toms;')
            next_doc_row = c.fetchone()[0]
        try:
            c.execute(
                'select * from toms where rowid between %d and %d and head is not null and head !="" limit 1'
                % (doc_row, next_doc_row))
        except sqlite3.OperationalError:  # no type field in DB
            c.execute(
                'select * from toms where rowid between ? and ? and head is not null and head !="" limit 1',
                (doc_row, next_doc_row))
        try:
            start_head = c.fetchone()['head'].decode('utf-8')
            start_head = start_head.lower().title().encode('utf-8')
        except Exception as e:
            print(repr(e), file=sys.stderr)
            start_head = ''
        try:
            c.execute(
                'select head from toms where rowid between %d and %d and head is not null and head !="" order by rowid desc limit 1'
                % (doc_row, next_doc_row))
        except sqlite3.OperationalError:  # no type field in DB
            c.execute(
                'select head from toms where rowid between %d and %d and head is not null and head !="" order by rowid desc limit 1'
                % (doc_row, next_doc_row))
        try:
            end_head = c.fetchone()['head']
            end_head = end_head.decode('utf-8').lower().title().encode('utf-8')
        except:
            end_head = ''
        hit_object['start_head'] = start_head
        hit_object['end_head'] = end_head

        results.append(hit_object)
    return results

Example #9

0

Show file

def landing_page_bibliography(request, config):
    db = DB(config.db_path + '/data/')
    object_level = request.object_level
    if object_level and object_level in ["doc", "div1", "div2", "div3"]:
        hits = db.get_all(object_level)
    else:
        hits = db.get_all(db.locals['default_object_level'])
    results = []
    c = db.dbh.cursor()
    for hit in hits:
        hit_object = {}
        for field in db.locals['metadata_fields']:
            hit_object[field] = hit[field] or ''
        if object_level == "doc":
            hit_object['philo_id'] = hit.philo_id[0]
        else:
            hit_object['philo_id'] = '/'.join([str(i) for i in hit.philo_id])
        doc_id = str(hit.philo_id[0]) + ' 0 0 0 0 0 0'
        next_doc_id = str(hit.philo_id[0] + 1) + ' 0 0 0 0 0 0'
        c.execute('select rowid from toms where philo_id="%s"' % doc_id)
        doc_row = c.fetchone()['rowid']
        c.execute('select rowid from toms where philo_id="%s"' % next_doc_id)
        try:
            next_doc_row = c.fetchone()['rowid']
        except TypeError:  # if this is the last doc, just get the last rowid in the table.
            c.execute('select max(rowid) from toms;')
            next_doc_row = c.fetchone()[0]
        try:
            c.execute(
                'select * from toms where rowid between %d and %d and head is not null and head !="" limit 1'
                % (doc_row, next_doc_row))
        except sqlite3.OperationalError:  # no type field in DB
            c.execute(
                'select * from toms where rowid between ? and ? and head is not null and head !="" limit 1',
                (doc_row, next_doc_row))
        try:
            start_head = c.fetchone()['head'].decode('utf-8')
            start_head = start_head.lower().title().encode('utf-8')
        except Exception as e:
            print(repr(e), file=sys.stderr)
            start_head = ''
        try:
            c.execute(
                'select head from toms where rowid between %d and %d and head is not null and head !="" order by rowid desc limit 1'
                % (doc_row, next_doc_row))
        except sqlite3.OperationalError:  # no type field in DB
            c.execute(
                'select head from toms where rowid between %d and %d and head is not null and head !="" order by rowid desc limit 1'
                % (doc_row, next_doc_row))
        try:
            end_head = c.fetchone()['head']
            end_head = end_head.decode('utf-8').lower().title().encode('utf-8')
        except:
            end_head = ''
        hit_object['start_head'] = start_head
        hit_object['end_head'] = end_head

        results.append(hit_object)
    return results

Example #10

0

Show file

def bibliography_results(request, config):
    """Fetch bibliography results"""
    db = DB(config.db_path + '/data/')
    if request.no_metadata:
        hits = db.get_all(db.locals['default_object_level'], request["sort_order"])
    else:
        hits = db.query(sort_order=request["sort_order"], **request.metadata)
    if request.simple_bibliography == "all": # request from simple landing page report which gets all biblio in load order
        hits.finish()
        start = 1
        end = len(hits)
        page_num = end
    else:
        start, end, page_num = page_interval(request.results_per_page, hits, request.start, request.end)
    bibliography_object = {
        "description": {
            "start": start,
            "end": end,
            "n": page_num,
            "results_per_page": request.results_per_page
        },
        "query": dict([i for i in request]),
        "default_object": db.locals['default_object_level']
    }
    results = []
    result_type = "doc"
    for hit in hits[start - 1:end]:
        citation_hrefs = citation_links(db, config, hit)
        metadata_fields = {}
        for metadata in db.locals['metadata_fields']:
            metadata_fields[metadata] = hit[metadata]
        result_type = hit.object_type
        if request.simple_bibliography == "all":
            citation = citations(hit, citation_hrefs, config, report="simple_landing")
        else:
            citation = citations(hit, citation_hrefs, config, report="bibliography", result_type=result_type)
        if config.dictionary_bibliography is False or result_type == "doc":
            results.append({
                'citation': citation,
                'citation_links': citation_hrefs,
                'philo_id': hit.philo_id,
                "metadata_fields": metadata_fields,
                "object_type": result_type
            })
        else:
            context = get_text_obj(hit, config, request, db.locals["token_regex"], images=False)
            results.append({
                'citation': citation,
                'citation_links': citation_hrefs,
                'philo_id': hit.philo_id,
                "metadata_fields": metadata_fields,
                "context": context,
                "object_type": result_type
            })
    bibliography_object["results"] = results
    bibliography_object['results_length'] = len(hits)
    bibliography_object['query_done'] = hits.done
    bibliography_object['result_type'] = result_type
    return bibliography_object, hits

Example #11

0

Show file

def concordance_results(request, config):
    """Fetch concordances results."""
    db = DB(config.db_path + '/data/')
    if request.collocation_type:
        first_hits = db.query(request["q"], request["method"], request["arg"],
                              **request.metadata)
        second_hits = db.query(request["left"], request["method"],
                               request["arg"], **request.metadata)
        hits = CombinedHitlist(first_hits, second_hits)
    else:
        hits = db.query(request["q"],
                        request["method"],
                        request["arg"],
                        sort_order=request["sort_order"],
                        **request.metadata)
    start, end, page_num = page_interval(request['results_per_page'], hits,
                                         request.start, request.end)

    concordance_object = {
        "description": {
            "start": start,
            "end": end,
            "results_per_page": request.results_per_page
        },
        "query": dict([i for i in request]),
        "default_object": db.locals['default_object_level']
    }

    formatting_regexes = []
    if config.concordance_formatting_regex:
        for pattern, replacement in config.concordance_formatting_regex:
            compiled_regex = re.compile(r'%s' % pattern)
            formatting_regexes.append((compiled_regex, replacement))
    results = []
    for hit in hits[start - 1:end]:
        citation_hrefs = citation_links(db, config, hit)
        metadata_fields = {}
        for metadata in db.locals['metadata_fields']:
            metadata_fields[metadata] = hit[metadata]
        citation = citations(hit, citation_hrefs, config, report="concordance")
        context = get_concordance_text(db, hit, config.db_path,
                                       config.concordance_length)
        if formatting_regexes:
            for formatting_regex, replacement in formatting_regexes:
                context = formatting_regex.sub(r'%s' % replacement, context)
        result_obj = {
            "philo_id": hit.philo_id,
            "citation": citation,
            "citation_links": citation_hrefs,
            "context": context,
            "metadata_fields": metadata_fields,
            "bytes": hit.bytes
        }
        results.append(result_obj)
    concordance_object["results"] = results
    concordance_object['results_length'] = len(hits)
    concordance_object["query_done"] = hits.done
    return concordance_object

Example #12

0

Show file

File: word_property_filter.py Project: rwhaling/PhiloLogic4

def word_property_filter(environ,start_response):
    config = f.WebConfig()
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(db, environ)
    headers = [('Content-type', 'application/json; charset=UTF-8'),("Access-Control-Allow-Origin","*")]
    start_response('200 OK',headers)
    
    hits = db.query(request["q"],request["method"],request["arg"],**request.metadata)
    filter_results = filter_words_by_property(hits, config.db_path, request, db, config)
    yield json.dumps(filter_results)

Example #13

0

Show file

File: collocation.py Project: rwhaling/PhiloLogic4

def collocation(environ,start_response):
    config = f.WebConfig()
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(db, environ)
    headers = [('Content-type', 'application/json; charset=UTF-8'),("Access-Control-Allow-Origin","*")]
    start_response('200 OK',headers)
    hits = db.query(request["q"],"cooc",request["arg"],**request.metadata)
    hits.finish()
    collocation_object = fetch_collocation(hits, request, db, config)
    yield json.dumps(collocation_object)

Example #14

0

Show file

File: get_word_frequency.py Project: rwhaling/PhiloLogic4

def get_frequency(environ,start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),("Access-Control-Allow-Origin","*")]
    start_response(status,headers)
    config = f.WebConfig()
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(db, environ)
    hits = db.query(request["q"],request["method"],request["arg"],**request.metadata)
    field, word_frequency_object = generate_word_frequency(hits,request,db,config)
    yield json.dumps(word_frequency_object, indent=2)

Example #15

0

Show file

File: get_query_terms.py Project: rwhaling/PhiloLogic4

def term_list(environ, start_response):
    status = "200 OK"
    headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = f.WebConfig()
    db = DB(config.db_path + "/data/")
    q = WSGIHandler(db, environ)
    hits = db.query(q["q"], q["method"], q["arg"], **q.metadata)
    expanded_terms = get_expanded_query(hits)
    yield json.dumps(expanded_terms[0])

Example #16

0

Show file

def generate_word_frequency(request, config):
    """reads through a hitlist. looks up request["field"] in each hit, and builds up a list of
       unique values and their frequencies."""
    db = DB(config.db_path + "/data/")
    hits = db.query(request["q"], request["method"], request["arg"],
                    **request.metadata)
    field = request["field"]
    counts = {}
    frequency_object = {}
    start_time = timeit.default_timer()
    last_hit_done = request.start
    try:
        for n in hits[request.start:]:
            key = get_word_attrib(n, field, db)
            if not key:
                # NULL is a magic value for queries, don't change it
                # recklessly.
                key = "NULL"
            if key not in counts:
                counts[key] = 0
            counts[key] += 1
            elapsed = timeit.default_timer() - start_time
            last_hit_done += 1
            if elapsed > 5:
                break

        table = {}
        for k, v in counts.items():
            url = make_absolute_query_link(
                config,
                request,
                start="0",
                end="0",
                report="word_property_filter",
                word_property=field,
                word_property_value=k,
            )
            table[k] = {"count": v, "url": url}

        frequency_object["results"] = table
        frequency_object["hits_done"] = last_hit_done
        if last_hit_done == len(hits):
            frequency_object["more_results"] = False
        else:
            frequency_object["more_results"] = True

    except IndexError:
        frequency_object["results"] = {}
        frequency_object["more_results"] = False

    frequency_object["results_length"] = len(hits)
    frequency_object["query"] = dict([i for i in request])

    return frequency_object

Example #17

0

Show file

File: get_more_context.py Project: rwhaling/PhiloLogic4

def get_more_context(environ, start_response):
    status = "200 OK"
    headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = f.WebConfig()
    db = DB(config.db_path + "/data/")
    request = WSGIHandler(db, environ)
    hit_num = int(request.hit_num)
    hits = db.query(request["q"], request["method"], request["arg"], **request.metadata)
    context_size = config["concordance_length"] * 3
    hit_context = r.fetch_concordance(db, hits[hit_num], config.db_path, context_size)
    yield json.dumps(hit_context)

Example #18

0

Show file

File: reports.py Project: katelaurel/PhiloLogic4

def generate_word_frequency(request, config):
    """reads through a hitlist. looks up request["field"] in each hit, and builds up a list of
       unique values and their frequencies."""
    db = DB(config.db_path + '/data/')
    hits = db.query(request["q"], request["method"], request["arg"], **request.metadata)
    field = request["field"]
    counts = {}
    frequency_object = {}
    more_results = True
    start_time = timeit.default_timer()
    last_hit_done = request.start
    try:
        for n in hits[request.start:]:
            key = get_word_attrib(n, field, db)
            if not key:
                # NULL is a magic value for queries, don't change it
                # recklessly.
                key = "NULL"
            if key not in counts:
                counts[key] = 0
            counts[key] += 1
            elapsed = timeit.default_timer() - start_time
            last_hit_done += 1
            if elapsed > 5:
                break

        table = {}
        for k, v in counts.iteritems():
            url = make_absolute_query_link(config,
                                           request,
                                           start="0",
                                           end="0",
                                           report="word_property_filter",
                                           word_property=field,
                                           word_property_value=k)
            table[k] = {'count': v, 'url': url}

        frequency_object['results'] = table
        frequency_object["hits_done"] = last_hit_done
        if last_hit_done == len(hits):
            frequency_object['more_results'] = False
        else:
            frequency_object['more_results'] = True

    except IndexError:
        frequency_object['results'] = {}
        frequency_object['more_results'] = False

    frequency_object['results_length'] = len(hits)
    frequency_object['query'] = dict([i for i in request])

    return frequency_object

Example #19

0

Show file

def concordance_results(request, config):
    """Fetch concordances results."""
    db = DB(config.db_path + '/data/')
    if request.collocation_type:
        first_hits = db.query(request["q"], request["method"], request["arg"], **request.metadata)
        second_hits = db.query(request["left"], request["method"], request["arg"], **request.metadata)
        hits = CombinedHitlist(first_hits, second_hits)
    else:
        hits = db.query(request["q"],
                        request["method"],
                        request["arg"],
                        sort_order=request["sort_order"],
                        **request.metadata)
    start, end, page_num = page_interval(request['results_per_page'], hits, request.start, request.end)

    concordance_object = {
        "description": {"start": start,
                        "end": end,
                        "results_per_page": request.results_per_page},
        "query": dict([i for i in request]),
        "default_object": db.locals['default_object_level']
    }

    formatting_regexes = []
    if config.concordance_formatting_regex:
        for pattern, replacement in config.concordance_formatting_regex:
            compiled_regex = re.compile(r'%s' % pattern)
            formatting_regexes.append((compiled_regex, replacement))
    results = []
    for hit in hits[start - 1:end]:
        citation_hrefs = citation_links(db, config, hit)
        metadata_fields = {}
        for metadata in db.locals['metadata_fields']:
            metadata_fields[metadata] = hit[metadata]
        citation = citations(hit, citation_hrefs, config, report="concordance")
        context = get_concordance_text(db, hit, config.db_path, config.concordance_length)
        if formatting_regexes:
            for formatting_regex, replacement in formatting_regexes:
                context = formatting_regex.sub(r'%s' % replacement, context)
        result_obj = {
            "philo_id": hit.philo_id,
            "citation": citation,
            "citation_links": citation_hrefs,
            "context": context,
            "metadata_fields": metadata_fields,
            "bytes": hit.bytes
        }
        results.append(result_obj)
    concordance_object["results"] = results
    concordance_object['results_length'] = len(hits)
    concordance_object["query_done"] = hits.done
    return concordance_object

Example #20

0

Show file

def term_list(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    hits = db.query(request["q"], request["method"], request["arg"],
                    **request.metadata)
    hits.finish()
    expanded_terms = get_expanded_query(hits)
    yield simplejson.dumps(expanded_terms[0])

Example #21

0

Show file

File: get_query_terms.py Project: mbwolff/PhiloLogic4

def term_list(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = f.WebConfig()
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(db, environ)
    hits = db.query(request["q"], request["method"], request["arg"],
                    **request.metadata)
    hits.finish()
    expanded_terms = get_expanded_query(hits)
    yield json.dumps(expanded_terms[0])

Example #22

0

Show file

File: get_query_terms.py Project: clovis/PhiloLogic5

def term_list(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    hits = db.query(request["q"], request["method"], request["arg"],
                    **request.metadata)
    hits.finish()
    expanded_terms = get_expanded_query(hits)
    yield json.dumps(expanded_terms[0]).encode('utf8')

Example #23

0

Show file

File: get_bibliography.py Project: rwhaling/PhiloLogic4

def get_bibliography(environ,start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),("Access-Control-Allow-Origin","*")]
    start_response(status,headers)
    config = f.WebConfig()
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(db, environ)
    object_level = request.object_level
    if object_level and object_level in object_levels:
        hits = db.get_all(object_level)
    else:
        hits = db.get_all(db.locals['default_object_level'])
    results = []
    c = db.dbh.cursor()
    for hit in hits:
        hit_object = {}
        for field in db.locals['metadata_fields']:
            hit_object[field] = hit[field] or ''
        if object_level == "doc":
            hit_object['philo_id'] = hit.philo_id[0]
        else:
            hit_object['philo_id'] = '/'.join([str(i) for i in hit.philo_id])
        doc_id = str(hit.philo_id[0]) + ' 0 0 0 0 0 0'
        next_doc_id = str(hit.philo_id[0] + 1) + ' 0 0 0 0 0 0'
        c.execute('select rowid from toms where philo_id="%s"' % doc_id)
        doc_row = c.fetchone()['rowid']
        c.execute('select rowid from toms where philo_id="%s"' % next_doc_id)
        try:
            next_doc_row = c.fetchone()['rowid']
        except TypeError: # if this is the last doc, just get the last rowid in the table.
            c.execute('select max(rowid) from toms;')
            next_doc_row = c.fetchone()[0]
        c.execute('select head from toms where rowid between %d and %d and head is not null limit 1' % (doc_row, next_doc_row))
        try:
            start_head = c.fetchone()['head']
            start_head = start_head.decode('utf-8').lower().title().encode('utf-8')
        except:
            start_head = ''
        c.execute('select head from toms where rowid between %d and %d and head is not null order by rowid desc limit 1' % (doc_row, next_doc_row))
        try:
            end_head = c.fetchone()['head']
            end_head = end_head.decode('utf-8').lower().title().encode('utf-8')
        except:
            end_head = ''
        hit_object['start_head'] = start_head
        hit_object['end_head'] = end_head
        
        results.append(hit_object)
    
    yield json.dumps(results)

Example #24

0

Show file

File: get_more_context.py Project: clovis/PhiloLogic5

def get_more_context(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    hit_num = int(request.hit_num)
    hits = db.query(request["q"], request["method"], request["arg"],
                    **request.metadata)
    context_size = config['concordance_length'] * 3
    hit_context = get_concordance_text(db, hits[hit_num], config.db_path,
                                       context_size)
    yield json.dumps(hit_context).encode('utf8')

Example #25

0

Show file

def get_more_context(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    hit_num = int(request.hit_num)
    hits = db.query(request["q"], request["method"], request["arg"],
                    **request.metadata)
    context_size = config['concordance_length'] * 3
    hit_context = get_concordance_text(db, hits[hit_num], config.db_path,
                                       context_size)
    yield simplejson.dumps(hit_context)

Example #26

0

Show file

File: ObjectFormatter.py Project: brown-ccv/PhiloLogic4

def get_first_page(philo_id, config):
    """This function will fetch the first page of any given text object in case there's no <pb>
    starting the object"""
    db = DB(config.db_path + '/data/')
    c = db.dbh.cursor()
    if len(philo_id) < 9:
        c.execute('select start_byte, end_byte from toms where philo_id=?', (' '.join([str(i) for i in philo_id]), ))
        result = c.fetchone()
        start_byte = result['start_byte']
        approx_id = str(philo_id[0]) + ' 0 0 0 0 0 0 %'
        try:
            c.execute('select * from pages where philo_id like ? and end_byte >= ? limit 1', (approx_id, start_byte))
        except:
            return {'filename': '', 'start_byte': ''}
    else:
        c.execute('select * from pages where philo_id like ? limit 1', (' '.join([str(i) for i in philo_id]), ))
    page_result = c.fetchone()
    try:
        filename = page_result["facs"]
    except (IndexError, TypeError):
        filename = ""
    if not filename:
        try:
            filename = page_result['id'] or ''
        except (IndexError, TypeError):
            pass
    try:
        n = page_result['n'] or ''
        page = {'filename': filename.split(),
                "n": n,
                'start_byte': page_result['start_byte'],
                'end_byte': page_result['end_byte']}
        return page
    except:  # Let's play it safe
        return {'filename': '', 'start_byte': ''}

Example #27

0

Show file

File: get_frequency.py Project: rwhaling/PhiloLogic4

def get_frequency(environ,start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),("Access-Control-Allow-Origin","*")]
    start_response(status,headers)
    config = f.WebConfig()
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(db, environ)
    if request.q == '' and request.no_q:
        if request.no_metadata:
            hits = db.get_all(db.locals['default_object_level'])
        else:
            hits = db.query(**request.metadata)
    else:
        hits = db.query(request["q"],request["method"],request["arg"],**request.metadata)
    results = r.generate_frequency(hits, request, db, config)
    yield json.dumps(results)

Example #28

0

Show file

def search_examples(field):
    path = os.path.abspath(os.path.dirname(__file__)).replace('functions', "") + '/data/'
    db = DB(path,encoding='utf-8')
    if field == "word":
        word_path = path + '/frequencies/word_frequencies'
        word = ''
        for n,line in enumerate(open(word_path)):
            word = line.split()[0]
            if n == 100:
                break
        return word.decode('utf-8', 'ignore')
    else:
        c = db.dbh.cursor()
        object_type = db.locals['metadata_types'][field]
        try:
            if object_type != 'div':
                c.execute('select %s from toms where philo_type="%s" and %s!="" limit 1' % (field, object_type, field))
            else:
                c.execute('select %s from toms where philo_type="div1" or philo_type="div2" or philo_type="div3" and %s!="" limit 1' % (field, field))
        except sqlite3.OperationalError:
            example = ''
        try:
            example = c.fetchone()[0].decode('utf-8', 'ignore')
        except (TypeError, AttributeError):
            example = ''
        return example

Example #29

0

Show file

File: generate_words_and_ids.py Project: clovis/PhiloLogic5

def main(db_path):
    """Grab words from words table and dump to file"""
    philo_db = DB(db_path)
    words_and_ids_path = os.path.join(db_path, "words_and_philo_ids")
    status = os.system("mkdir -p %s" % words_and_ids_path)
    if status != 0:
        print("Could not create %s. Please check your write permissions to the parent directory" % words_and_ids_path)
        sys.exit(status)
    cursor = philo_db.dbh.cursor()
    cursor.execute('SELECT philo_name, philo_id, start_byte, end_byte from words')
    current_doc_id = "1"
    current_words = []
    for word, philo_id, start_byte, end_byte in cursor:
        doc_id = philo_id.split()[0]
        word_obj = {
            "token": word,
            "position": philo_id,
            "start_byte": start_byte,
            "end_byte": end_byte
        }
        if doc_id != current_doc_id:
            with open(os.path.join(words_and_ids_path, current_doc_id), "w") as output:
                output.write("\n".join(current_words))
                print("Processed document %s" % current_doc_id, flush=True)
            current_words = []
            current_doc_id = doc_id
        current_words.append(json.dumps(word_obj))
    if current_words:
        with open(os.path.join(words_and_ids_path, current_doc_id), "w") as output:
            output.write("\n".join(current_words))
            print("Processed document %s" % current_doc_id, flush=True)

Example #30

0

Show file

def get_all_page_images(philo_id, config, current_obj_imgs):
    """Get all page images"""
    if current_obj_imgs[0]:
        # We know there are images
        db = DB(config.db_path + '/data/')
        c = db.dbh.cursor()
        approx_id = str(philo_id[0]) + ' 0 0 0 0 0 0 %'
        try:
            c.execute(
                'select * from pages where philo_id like ? and facs is not null and facs != ""',
                (approx_id, ))
            current_obj_imgs = set(current_obj_imgs)
            all_imgs = [tuple(i["facs"].split()) for i in c.fetchall()]
        except sqlite3.OperationalError:
            all_imgs = []
        if not all_imgs:
            try:
                c.execute(
                    'select * from pages where philo_id like ? and id is not null and id != ""',
                    (approx_id, ))
                current_obj_imgs = set(current_obj_imgs)
                all_imgs = [tuple(i["id"].split()) for i in c.fetchall()]
            except sqlite3.OperationalError:
                return []
        return all_imgs
    else:
        return []

Example #31

0

Show file

File: resolve_cite.py Project: corpus-synodalium/philo4-source

def resolve_cite_service(environ, start_response):
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    c = db.dbh.cursor()
    q = request.q

    best_url = config['db_url']

    if " - " in q:
        milestone = q.split(" - ")[0]
    else:
        milestone = q

    milestone_segments = []
    last_segment = 0
    milestone_prefixes = []
    for separator in re.finditer(r' (?!\.)|\.(?! )', milestone):
        milestone_prefixes += [milestone[:separator.start()]]
        milestone_segments += [milestone[last_segment:separator.start()]]
        last_segment = separator.end()
    milestone_segments += [milestone[last_segment:]]
    milestone_prefixes += [milestone]

    print >> sys.stderr, "SEGMENTS", repr(milestone_segments)
    print >> sys.stderr, "PREFIXES", repr(milestone_prefixes)

    abbrev_match = None
    for pos, v in enumerate(milestone_prefixes):
        print >> sys.stderr, "QUERYING for abbrev = ", v
        abbrev_q = c.execute("SELECT * FROM toms WHERE abbrev = ?;", (v, )).fetchone()
        if abbrev_q:
            abbrev_match = abbrev_q

    print >> sys.stderr, "ABBREV", abbrev_match["abbrev"], abbrev_match["philo_id"]
    doc_obj = ObjectWrapper(abbrev_match['philo_id'].split(), db)

    nav = nav_query(doc_obj, db)

    best_match = None
    for n in nav:
        if n["head"] == request.q:
            print >> sys.stderr, "MATCH", n["philo_id"], n["n"], n["head"]
            best_match = n
            break

    if best_match:
        type_offsets = {"doc": 1, "div1": 2, "div2": 3, "div3": 4, "para": 5}
        t = best_match['philo_type']
        short_id = best_match["philo_id"].split()[:type_offsets[t]]
        best_url = f.make_absolute_object_link(config, short_id)
        print >> sys.stderr, "BEST_URL", best_url

    status = '302 Found'
    redirect = config['db_url']
    headers = [('Location', best_url)]
    start_response(status, headers)

    return ""

Example #32

0

Show file

def generate_text_object(request, config, note=False):
    """Return text object given an philo_id"""
    # verify this isn't a page ID or if this is a note
    if len(request.philo_id.split()) == 9 and note is not True:
        width = 9
    else:
        width = 7
    db = DB(config.db_path + '/data/', width=width)
    if note:
        target = request.target.replace('#', '')
        doc_id = request.philo_id.split()[0] + ' %'
        c = db.dbh.cursor()
        c.execute(
            'select philo_id from toms where id=? and philo_id like ? limit 1',
            (target, doc_id))
        philo_id = c.fetchall()[0]['philo_id'].split()[:7]
        obj = db[philo_id]
    else:
        try:
            obj = db[request.philo_id]
        except ValueError:
            obj = db[' '.join(request.path_components)]
        philo_id = obj.philo_id
    if width != 9:
        while obj['philo_name'] == '__philo_virtual' and obj[
                "philo_type"] != "div1":
            philo_id.pop()
            obj = db[philo_id]
    philo_id = list(obj.philo_id)
    while int(philo_id[-1]) == 0:
        philo_id.pop()
    text_object = {
        "query": dict([i for i in request]),
        "philo_id": ' '.join([str(i) for i in philo_id])
    }
    text_object['prev'] = neighboring_object_id(db, obj.prev, width)
    text_object['next'] = neighboring_object_id(db, obj.__next__, width)
    metadata_fields = {}
    for metadata in db.locals['metadata_fields']:
        metadata_fields[metadata] = obj[metadata]
    text_object['metadata_fields'] = metadata_fields
    if width != 9:
        citation_hrefs = citation_links(db, config, obj)
    else:
        doc_obj = db[obj.philo_id[0]]
        citation_hrefs = citation_links(db, config, doc_obj)
    citation = citations(obj, citation_hrefs, config, report="navigation")
    text_object['citation'] = citation
    text, imgs = get_text_obj(obj,
                              config,
                              request,
                              db.locals["token_regex"],
                              note=note)
    if config.navigation_formatting_regex:
        for pattern, replacement in config.navigation_formatting_regex:
            text = re.sub(r'%s' % pattern, '%s' % replacement, text)
    text_object['text'] = text
    text_object['imgs'] = imgs
    return text_object

Example #33

0

Show file

File: ObjectFormatter.py Project: brown-ccv/PhiloLogic4

def get_all_graphics(philo_id, config):
    db = DB(config.db_path + '/data/')
    c = db.dbh.cursor()
    approx_id = str(philo_id[0]) + ' 0 0 0 0 0 0 %'
    try:
        c.execute('SELECT facs FROM graphics WHERE philo_id LIKE ? AND facs IS NOT NULL AND facs != "" ORDER BY ROWID', (approx_id, ))
        graphics = [i["facs"].split() for i in c.fetchall() if i["facs"]]
        return graphics
    except sqlite3.OperationalError:
        return []

Example #34

0

Show file

File: alignment_to_text.py Project: clovis/PhiloLogic5

def alignment_to_text(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    link = byte_range_to_link(db, config, request)
    yield simplejson.dumps({"link": link})

Example #35

0

Show file

File: get_term_groups.py Project: brown-ccv/PhiloLogic4

def term_group(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(
        os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    if not request["q"]:
        dump = json.dumps({"original_query": "", "term_groups": []})
    else:
        hits = db.query(request["q"],
                        request["method"],
                        request["arg"],
                        sort_order=request["sort_order"],
                        **request.metadata)
        parsed = parse_query(request.q)
        group = group_terms(parsed)
        all_groups = split_terms(group)
        term_groups = []
        for g in all_groups:
            term_group = ''
            not_started = False
            for kind, term in g:
                if kind == 'NOT':
                    if not_started is False:
                        not_started = True
                        term_group += ' NOT '
                elif kind == 'OR':
                    term_group += '|'
                elif kind == "TERM":
                    term_group += ' %s ' % term
                elif kind == "QUOTE":
                    term_group += ' %s ' % term
            term_group = term_group.strip()
            term_groups.append(term_group)
        dump = json.dumps({
            "term_groups": term_groups,
            "original_query": request.original_q
        })
    yield dump.encode('utf8')

Example #36

0

Show file

def get_notes(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(
        os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    text_object = generate_text_object(request, config, note=True)
    yield json.dumps(text_object).encode('utf8')

Example #37

0

Show file

def get_total_results(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    if request.no_q:
        if request.no_metadata:
            hits = db.get_all(db.locals['default_object_level'], request["sort_order"])
        else:
            hits = db.query(sort_order=request["sort_order"], **request.metadata)
    else:
        hits = db.query(request["q"], request["method"], request["arg"],
                        **request.metadata)
    total_results = 0
    hits.finish()
    total_results = len(hits)

    yield simplejson.dumps(total_results)

Example #38

0

Show file

def metadata_list(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(
        os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    metadata = request.term
    field = request.field
    yield autocomplete_metadata(metadata, field, db).encode('utf8')

Example #39

0

Show file

def get_total_results(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    if request.no_q:
        if request.no_metadata:
            hits = db.get_all(db.locals['default_object_level'])
        else:
            hits = db.query(**request.metadata)
    else:
        hits = db.query(request["q"], request["method"], request["arg"],
                        **request.metadata)
    total_results = 0
    hits.finish()
    total_results = len(hits)

    yield simplejson.dumps(total_results)

Example #40

0

Show file

def kwic_results(request, config):
    """Fetch KWIC results"""
    db = DB(config.db_path + '/data/')
    hits = db.query(request["q"], request["method"], request["arg"], **request.metadata)
    start, end, n = page_interval(request.results_per_page, hits, request.start, request.end)
    kwic_object = {
        "description": {"start": start,
                        "end": end,
                        "results_per_page": request.results_per_page},
        "query": dict([i for i in request])
    }
    kwic_object['results'] = []

    for hit in hits[start - 1:end]:
        kwic_result = kwic_hit_object(hit, config, db)
        kwic_object['results'].append(kwic_result)

    kwic_object['results_length'] = len(hits)
    kwic_object["query_done"] = hits.done

    return kwic_object

Example #41

0

Show file

File: reports.py Project: katelaurel/PhiloLogic4

def bibliography_results(request, config):
    db = DB(config.db_path + '/data/')
    if request.no_metadata:
        hits = db.get_all(db.locals['default_object_level'], request["sort_order"],)
    else:
        hits = db.query(**request.metadata)
    start, end, n = page_interval(request.results_per_page, hits, request.start, request.end)
    bibliography_object = {
        "description": {
            "start": start,
            "end": end,
            "n": n,
            "results_per_page": request.results_per_page
        },
        "query": dict([i for i in request]),
        "default_object": db.locals['default_object_level']
    }
    results = []
    result_type = 'doc'
    for hit in hits[start - 1:end]:
        citation_hrefs = citation_links(db, config, hit)
        metadata_fields = {}
        for metadata in db.locals['metadata_fields']:
            metadata_fields[metadata] = hit[metadata]
        result_type = hit.type
        if hit.type == "doc":
            citation = citations(hit, citation_hrefs, config, report="bibliography")
        else:
            citation = citations(hit, citation_hrefs, config, report="concordance")
        results.append({
            'citation': citation,
            'citation_links': citation_hrefs,
            'philo_id': hit.philo_id,
            "metadata_fields": metadata_fields
        })
    bibliography_object["results"] = results
    bibliography_object['results_length'] = len(hits)
    bibliography_object['query_done'] = hits.done
    bibliography_object['result_type'] = result_type
    return bibliography_object, hits

Example #42

0

Show file

File: get_web_config.py Project: brown-ccv/PhiloLogic4

def time_series_tester(config):
    db = DB(config.db_path + '/data/')
    c = db.dbh.cursor()
    try:
        c.execute("SELECT COUNT(*) FROM toms WHERE %s IS NOT NULL" %
                  config.time_series_year_field)
        count = c.fetchone()[0]
        if count > 0:
            return True
        else:
            return False
    except sqlite3.OperationalError:
        return False

Example #43

0

Show file

File: autocomplete_term.py Project: clovis/PhiloLogic5

def term_list(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    term = request.term
    if isinstance(term, list):
        term = term[-1]
    all_words = format_query(term, db, config)[:100]
    yield json.dumps(all_words).encode('utf8')

Example #44

0

Show file

def get_start_end_date(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'text/html; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(
        os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    start_date, end_date = start_end_date(db,
                                          config,
                                          start_date=request.start_date,
                                          end_date=request.end_date)
    yield simplejson.dumps({"start_date": start_date, "end_date": end_date})

Example #45

0

Show file

def access_control(environ, start_response):
    path = os.path.abspath(os.path.dirname(__file__)).replace('functions',
                                                              '') + '/data/'
    db = DB(path, encoding='utf-8')
    if "open_access" in db.locals:  ## failsafe in case the variable is not db.locals.py
        if db.locals['open_access']:
            return True
        elif check_previous_session(environ):
            return True
        else:
            access_value = check_access(db, environ)
            return access_value
    else:
        return True

Example #46

0

Show file

File: get_sorted_frequency.py Project: mbwolff/PhiloLogic4

def get_frequency(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = f.WebConfig()
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(db, environ)
    setattr(request, 'frequency_field', json.dumps(
        eval('"%s"' % request.frequency_field)))
    if request.q == '' and request.no_q:
        if request.no_metadata:
            hits = db.get_all(db.locals['default_object_level'])
        else:
            hits = db.query(**request.metadata)
    else:
        hits = db.query(request["q"], request["method"], request["arg"],
                        **request.metadata)
    hits.finish()
    results = r.generate_frequency(hits, request, db, config)
    results['results'] = sorted(results['results'].iteritems(),
                                key=lambda x: x[1]['count'],
                                reverse=True)
    yield json.dumps(results)

Example #47

0

Show file

def get_text_object(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    path = config.db_path
    zeros = 7 - len(request.philo_id)
    if zeros:
        request.philo_id += zeros * " 0"
    obj = ObjectWrapper(request['philo_id'].split(), db)
    text_object = generate_text_object(request, config)
    yield simplejson.dumps(text_object)

Example #48

0

Show file

File: get_sorted_kwic.py Project: clovis/PhiloLogic5

def get_sorted_kwic(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)
    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    input_object = json.loads(environ['wsgi.input'].read().decode('utf8', 'ignore'))
    all_results = input_object['results']
    query_string = input_object['query_string']
    sort_keys = [i for i in input_object["sort_keys"] if i]
    environ['QUERY_STRING'] = query_string
    request = WSGIHandler(environ, config)
    sorted_hits = get_sorted_hits(all_results, sort_keys, request, config, db, input_object['start'],
                                  input_object['end'])
    yield json.dumps(sorted_hits).encode('utf8')

Example #49

0

Show file

def login_access(environ, request, config, headers):
    db = DB(config.db_path + '/data/')
    if request.authenticated:
        access = True
    else:
        if request.username and request.password:
            access = check_login_info(config, request)
            if access:
                incoming_address = environ['REMOTE_ADDR']
                token = make_token(incoming_address, db)
                if token:
                    h, ts = token
                    headers.append(("Set-Cookie", "hash=%s" % h))
                    headers.append(("Set-Cookie", "timestamp=%s" % ts))
        else:
            access = False
    return access, headers

Example #50

0

Show file

File: access_request.py Project: corpus-synodalium/philo4-source

def access_request(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    config = WebConfig(
        os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)
    access, headers = login_access(environ, request, config, headers)
    start_response(status, headers)
    if access:
        yield simplejson.dumps({'access': True})
    else:
        incoming_address, domain_name = access_control.get_client_info(environ)
        yield simplejson.dumps({
            'access': False,
            "incoming_address": incoming_address,
            "domain_name": domain_name
        })

Example #51

0

Show file

def group_by_metadata(request, config):
    citation_types = json.loads(request.citation)
    db = DB(config.db_path + "/data/")
    cursor = db.dbh.cursor()
    query = """select * from toms where philo_type="doc" and %s=?""" % request.group_by_field
    cursor.execute(query, (request.query,))
    result_group = []
    for doc in cursor:
        obj = db[doc["philo_id"]]
        links = citation_links(db, config, obj)
        citation = citations(obj, links, config, report="landing_page", citation_type=citation_types)
        result_group.append({"metadata": get_all_metadata(db, doc), "citation": citation})
    return json.dumps(
        {
            "display_count": request.display_count,
            "content_type": request.group_by_field,
            "content": [{"prefix": request.query, "results": result_group}],
        }
    )

Example #52

0

Show file

File: generate_words_and_ids.py Project: brown-ccv/PhiloLogic4

def main(db_path):
    """Grab words from words table and dump to file"""
    philo_db = DB(db_path)
    words_and_ids_path = os.path.join(db_path, "words_and_philo_ids")
    status = os.system("mkdir -p %s" % words_and_ids_path)
    if status != 0:
        print(
            "Could not create %s. Please check your write permissions to the parent directory"
            % words_and_ids_path)
        sys.exit(status)
    # Get all doc ids
    cursor = philo_db.dbh.cursor()
    cursor.execute(
        'SELECT philo_id from toms where philo_type="doc" order by rowid')
    docs = [i["philo_id"].split()[0] for i in cursor.fetchall()]

    print("Processing %d documents" % len(docs))
    for doc_id in docs:
        words = []
        current_doc_id = "{} %".format(doc_id)
        cursor.execute(
            "SELECT rowid from words where philo_id like ? order by rowid limit 1",
            (current_doc_id, ))
        first_rowid = cursor.fetchone()[0]
        next_doc_id = "{} %".format(int(doc_id) + 1)
        cursor.execute(
            "SELECT rowid from words where philo_id like ? order by rowid limit 1",
            (next_doc_id, ))
        try:
            last_rowid = cursor.fetchone()[0]
            cursor.execute(
                "SELECT philo_name, philo_id from words where rowid >= ? and rowid < ? and philo_type=? and philo_type!=? order by rowid",
                (first_rowid, last_rowid, "word", "__philo_virtual"))
        except TypeError:  # we've reached the end of the table and cursor returned None
            cursor.execute(
                "SELECT philo_name, philo_id from words where rowid >= ? and philo_type=? and philo_type!=? order by rowid",
                (first_rowid, "word", "__philo_virtual"))
        for word, philo_id in cursor.fetchall():
            words.append({"token": word, "position": philo_id})
        with open(os.path.join(words_and_ids_path, doc_id), "w") as output:
            output.write("\n".join(dumps(w) for w in words))
        print("Processed document %s" % doc_id)

Example #53

0

Show file

def collocation_results(request, config):
    """Fetch collocation results"""
    db = DB(config.db_path + '/data/')
    if request["collocate_distance"]:
        hits = db.query(request["q"], "proxy", int(request['collocate_distance']), **request.metadata)
    else:
        hits = db.query(request["q"], "cooc", request["arg"], **request.metadata)
    hits.finish()
    collocation_object = {"query": dict([i for i in request])}

    try:
        collocate_distance = int(request['collocate_distance'])
    except ValueError:  # Getting an empty string since the keyword is not specificed in the URL
        collocate_distance = None

    if request.colloc_filter_choice == "nofilter":
        filter_list = []
    else:
        filter_list = build_filter_list(request, config)
    collocation_object['filter_list'] = filter_list
    filter_list = set(filter_list)

    # Build list of search terms to filter out
    query_words = []
    for group in get_expanded_query(hits):
        for word in group:
            word = word.replace('"', '')
            query_words.append(word)
    query_words = set(query_words)
    filter_list = filter_list.union(query_words)

    if request["collocate_distance"]:
        hits = db.query(request["q"], "proxy", int(request['collocate_distance']), raw_results=True, **request.metadata)
    else:
        hits = db.query(request["q"], "cooc", request["arg"], raw_results=True, **request.metadata)
    hits.finish()

    stored_sentence_id = None
    stored_sentence_counts = defaultdict(int)
    sentence_hit_count = 1
    hits_done = request.start or 0
    max_time = request.max_time or 10
    all_collocates = defaultdict(lambda: {'count': 0})
    cursor = db.dbh.cursor()
    start_time = timeit.default_timer()
    try:
        for hit in hits[hits_done:]:
            word_id = ' '.join([str(i) for i in hit[:6]]) + ' ' + str(hit[7])
            query = """select parent, rowid from words where philo_id='%s' limit 1""" % word_id
            cursor.execute(query)
            result = cursor.fetchone()
            parent = result['parent']
            if parent != stored_sentence_id:
                rowid = int(result['rowid'])
                sentence_hit_count = 1
                stored_sentence_id = parent
                stored_sentence_counts = defaultdict(int)
                if collocate_distance:
                    begin_rowid = rowid - collocate_distance
                    if begin_rowid < 0:
                        begin_rowid = 0
                    end_rowid = rowid + collocate_distance
                    row_query = """select philo_name from words where parent='%s' and rowid between %d and %d""" % (
                        parent, begin_rowid, end_rowid)
                else:
                    row_query = """select philo_name from words where parent='%s'""" % (parent, )
                cursor.execute(row_query)
                for i in cursor.fetchall():
                    collocate = i["philo_name"]
                    if collocate not in filter_list:
                        stored_sentence_counts[collocate] += 1
            else:
                sentence_hit_count += 1
            for word in stored_sentence_counts:
                if stored_sentence_counts[word] < sentence_hit_count:
                    continue
                all_collocates[word]['count'] += 1
            hits_done += 1
            elapsed = timeit.default_timer() - start_time
            # avoid timeouts by splitting the query if more than request.max_time (in
            # seconds) has been spent in the loop
            if elapsed > int(max_time):
                break
    except IndexError:
        collocation_object['hits_done'] = len(hits)

    collocation_object['collocates'] = all_collocates
    collocation_object["results_length"] = len(hits)
    if hits_done < collocation_object["results_length"]:
        collocation_object['more_results'] = True
        collocation_object['hits_done'] = hits_done
    else:
        collocation_object['more_results'] = False
        collocation_object['hits_done'] = collocation_object["results_length"]

    return collocation_object

Example #54

0

Show file

File: get_neighboring_words.py Project: katelaurel/PhiloLogic4

def get_neighboring_words(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)

    config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', ''))
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(environ, config)

    try:
        index = int(request.hits_done)
    except:
        index = 0

    max_time = int(request.max_time)

    kwic_words = []
    start_time = timeit.default_timer()
    hits = db.query(request["q"], request["method"], request["arg"], **request.metadata)
    c = db.dbh.cursor()

    for hit in hits[index:]:
        word_id = ' '.join([str(i) for i in hit.philo_id])
        query = 'select rowid, philo_name, parent from words where philo_id="%s" limit 1' % word_id
        c.execute(query)
        results = c.fetchone()

        parent_sentence = results['parent']

        highlighted_text = kwic_hit_object(hit, config, db)["highlighted_text"]
        highlighted_text = highlighted_text.translate(remove_punctuation_map)
        highlighted_text = highlighted_text.strip()

        result_obj = {
            "left": "",
            "right": "",
            "index": index,
            "q": highlighted_text
        }

        left_rowid = results["rowid"] - 10
        right_rowid = results["rowid"] + 10

        c.execute('select philo_name, philo_id from words where rowid between ? and ?',
                  (left_rowid, results['rowid']-1))
        result_obj["left"] = []
        for i in c.fetchall():
            result_obj["left"].append(i['philo_name'].decode('utf-8'))
        result_obj["left"].reverse()
        result_obj["left"] = ' '.join(result_obj["left"])

        c.execute('select philo_name, philo_id from words where rowid between ? and ?',
                  (results['rowid']+1, right_rowid))
        result_obj["right"] = []
        for i in c.fetchall():
            result_obj["right"].append(i['philo_name'].decode('utf-8'))
        result_obj["right"] = ' '.join(result_obj["right"])

        metadata_fields = {}
        for metadata in config.kwic_metadata_sorting_fields:
            result_obj[metadata] = hit[metadata].lower()

        kwic_words.append(result_obj)

        index += 1

        elapsed = timeit.default_timer() - start_time
        if elapsed > max_time:  # avoid timeouts by splitting the query if more than 10 seconds has been spent in the loop
            break

    yield simplejson.dumps({"results": kwic_words, "hits_done": index})

Example #55

0

Show file

File: reports.py Project: katelaurel/PhiloLogic4

def filter_words_by_property(request, config):
    db = DB(config.db_path + '/data/')
    hits = db.query(request["q"], request["method"], request["arg"], **request.metadata)
    concordance_object = {"query": dict([i for i in request])}

    # Do these need to be captured in wsgi_handler?
    word_property = request["word_property"]
    word_property_value = request["word_property_value"]
    word_property_total = request["word_property_total"]

    new_hitlist = []
    results = []
    position = 0
    more_pages = False

    if request.start == 0:
        start = 1
    else:
        start = request.start

    for hit in hits:
        ## get my chunk of text ##
        hit_val = get_word_attrib(hit, word_property, db)

        if hit_val == word_property_value:
            position += 1
            if position < start:
                continue
            new_hitlist.append(hit)
            citation_hrefs = citation_links(db, config, hit)
            metadata_fields = {}
            for metadata in db.locals['metadata_fields']:
                metadata_fields[metadata] = hit[metadata]
            citation = citations(hit, citation_hrefs, config)
            context = fetch_concordance(db, hit, config.db_path, config.concordance_length)
            result_obj = {
                "philo_id": hit.philo_id,
                "citation": citation,
                "citation_links": citation_hrefs,
                "context": context,
                "metadata_fields": metadata_fields,
                "bytes": hit.bytes,
                "collocate_count": 1
            }
            results.append(result_obj)

        if len(new_hitlist) == (request.results_per_page):
            more_pages = True
            break

    end = start + len(results) - 1
    if len(results) < request.results_per_page:
        word_property_total = end
    else:
        word_property_total = end + 1
    concordance_object['results'] = results
    concordance_object["query_done"] = hits.done
    concordance_object['results_length'] = word_property_total
    concordance_object["description"] = {
        "start": start,
        "end": end,
        "results_per_page": request.results_per_page,
        "more_pages": more_pages
    }
    return concordance_object

Example #56

0

Show file

File: reports.py Project: katelaurel/PhiloLogic4

def generate_time_series(request, config):
    db = DB(config.db_path + '/data/')
    time_series_object = {'query': dict([i for i in request]), 'query_done': False}

    start_date, end_date = get_start_end_date(db, config, start_date=None, end_date=None)

    # Generate date ranges
    interval = int(request.year_interval)
    date_ranges = []
    for i in xrange(start_date, end_date, interval):
        start = i
        end = i + interval - 1
        if end > end_date:
            end = end_date
        date_range = "%d-%d" % (start, end)
        date_ranges.append((start, date_range))

    absolute_count = defaultdict(int)
    date_counts = {}
    total_hits = 0
    last_date_done = start_date
    start_time = timeit.default_timer()
    max_time = request.max_time or 10
    for start_range, date_range in date_ranges:
        request.metadata[config.time_series_year_field] = date_range
        hits = db.query(request["q"], request["method"], request["arg"], **request.metadata)
        hits.finish()
        params = {report: "concordance", start: "0", end: "0"}
        params[config.time_series_year_field] = date_range
        url = make_absolute_query_link(config, request, **params)
        absolute_count[start_range] = {"label": start_range, "count": len(hits), "url": url}

        # Get date total count
        if interval != '1':
            dates = [start_range]
            end_range = start_range + (int(request['year_interval']) - 1)
            query = 'select sum(word_count) from toms where %s between "%d" and "%d"' % (config.time_series_year_field,
                                                                                         start_range, end_range)
        else:
            query = "select sum(word_count) from toms where %s='%s'" % (config.time_series_year_field, start_range)

        c = db.dbh.cursor()
        c.execute(query)
        date_counts[start_range] = c.fetchone()[0] or 0
        total_hits += len(hits)
        print >> sys.stderr, "TOTAL", total_hits
        elapsed = timeit.default_timer() - start_time
        # avoid timeouts by splitting the query if more than request.max_time (in seconds) has been spent in the loop
        if elapsed > int(max_time):
            last_date_done = start_range
            break
        last_date_done = start_range

    time_series_object['results_length'] = total_hits
    if (last_date_done + int(request.year_interval)) >= end_date:
        time_series_object['more_results'] = False
    else:
        time_series_object['more_results'] = True
        time_series_object['new_start_date'] = last_date_done + int(request.year_interval)
    time_series_object['results'] = {'absolute_count': absolute_count, 'date_count': date_counts}

    return time_series_object

Example #57

0

Show file

def frequency_results(request, config, sorted=False):
    """reads through a hitlist. looks up request.frequency_field in each hit, and builds up a list of
       unique values and their frequencies."""
    db = DB(config.db_path + '/data/')
    biblio_search = False
    if request.q == '' and request.no_q:
        biblio_search = True
        if request.no_metadata:
            hits = db.get_all(db.locals['default_object_level'], sort_order=["rowid"], raw_results=True)
        else:
            hits = db.query(sort_order=["rowid"], raw_results=True, **request.metadata)
    else:
        hits = db.query(request["q"], request["method"], request["arg"], raw_results=True, **request.metadata)

    if sorted:
        hits.finish()

    c = db.dbh.cursor()

    c.execute('select philo_id, %s from toms where %s is not null' % (request.frequency_field, request.frequency_field))
    metadata_dict = {}
    for i in c.fetchall():
        philo_id, field = i
        philo_id = tuple(int(s) for s in philo_id.split() if int(s))
        metadata_dict[philo_id] = field

    counts = {}
    frequency_object = {}
    start_time = timeit.default_timer()
    last_hit_done = request.start

    obj_dict = {'doc': 1, 'div1': 2, 'div2': 3, 'div3': 4, 'para': 5, 'sent': 6, 'word': 7}
    metadata_type = db.locals["metadata_types"][request.frequency_field]
    try:
        object_level = obj_dict[metadata_type]
    except KeyError:
        # metadata_type == "div"
        pass

    try:
        for philo_id in hits[request.start:]:
            if not biblio_search:
                philo_id = tuple(list(philo_id[:6]) + [philo_id[7]])
            if metadata_type == "div":
                key = ""
                for div in ["div1", "div2", "div3"]:
                    if philo_id[:obj_dict[div]] in metadata_dict:
                        key = metadata_dict[philo_id[:obj_dict[div]]]
                while not key:
                    if philo_id[:4] in metadata_dict:
                        key = metadata_dict[philo_id[:4]]
                        break
                    if philo_id[:5] in metadata_dict:
                        key = metadata_dict[philo_id[:5]]
                        break
                    break
                if not key:
                    last_hit_done += 1
                    continue
            else:
                try:
                    key = metadata_dict[philo_id[:object_level]]
                except:
                    last_hit_done += 1
                    continue
            if key not in counts:
                counts[key] = {"count": 0, 'metadata': {request.frequency_field: key}}
                counts[key]["url"] = make_absolute_query_link(config,
                                                              request,
                                                              frequency_field="",
                                                              start="0",
                                                              end="0",
                                                              report=request.report,
                                                              script='',
                                                              **{request.frequency_field: '"%s"' % key})
                if not biblio_search:
                    query_metadata = dict([(k, v) for k, v in request.metadata.iteritems() if v])
                    query_metadata[request.frequency_field] = '"%s"' % key
                    local_hits = db.query(**query_metadata)
                    counts[key]["total_word_count"] = local_hits.get_total_word_count()
            counts[key]["count"] += 1

            # avoid timeouts by splitting the query if more than
            # request.max_time (in seconds) has been spent in the loop
            elapsed = timeit.default_timer() - start_time
            last_hit_done += 1
            if elapsed > 5:
                break

        frequency_object['results'] = counts
        frequency_object["hits_done"] = last_hit_done
        if last_hit_done == len(hits):
            new_metadata = dict([(k, v) for k, v in request.metadata.iteritems() if v])
            new_metadata[request.frequency_field] = '"NULL"'
            if request.q == '' and request.no_q:
                new_hits = db.query(sort_order=["rowid"], raw_results=True, **new_metadata)
            else:
                new_hits = db.query(request["q"], request["method"], request["arg"], raw_results=True, **new_metadata)
            new_hits.finish()
            if len(new_hits):
                null_url = make_absolute_query_link(config,
                                                    request,
                                                    frequency_field="",
                                                    start="0",
                                                    end="0",
                                                    report=request.report,
                                                    script='',
                                                    **{request.frequency_field: '"NULL"'})
                local_hits = db.query(**new_metadata)
                if not biblio_search:
                    frequency_object["results"]["NULL"] = {"count": len(new_hits),
                                                           "url": null_url,
                                                           "metadata": {request.frequency_field: '"NULL"'},
                                                           "total_word_count": local_hits.get_total_word_count()}
                else:
                    frequency_object["results"]["NULL"] = {"count": len(new_hits),
                                                           "url": null_url,
                                                           "metadata": {request.frequency_field: '"NULL"'}}
            frequency_object['more_results'] = False
        else:
            frequency_object['more_results'] = True
    except IndexError:
        frequency_object['results'] = {}
        frequency_object['more_results'] = False
    frequency_object['results_length'] = len(hits)
    frequency_object['query'] = dict([i for i in request])

    if sorted:
        frequency_object["results"] = sorted(frequency_object['results'].iteritems(),
                                             key=lambda x: x[1]['count'],
                                             reverse=True)

    return frequency_object

Example #58

0

Show file

File: get_neighboring_words.py Project: mbwolff/PhiloLogic4

def get_neighboring_words(environ, start_response):
    status = '200 OK'
    headers = [('Content-type', 'application/json; charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*")]
    start_response(status, headers)

    config = f.WebConfig()
    db = DB(config.db_path + '/data/')
    request = WSGIHandler(db, environ)

    try:
        index = int(request.hits_done)
    except:
        index = 0

    max_time = int(request.max_time)

    kwic_words = []
    start_time = timeit.default_timer()
    hits = db.query(request["q"], request["method"], request["arg"],
                    **request.metadata)
    c = db.dbh.cursor()

    for hit in hits[index:]:
        word_id = ' '.join([str(i) for i in hit.philo_id])
        query = 'select rowid, philo_name, parent from words where philo_id="%s" limit 1' % word_id
        c.execute(query)
        results = c.fetchone()

        parent_sentence = results['parent']

        if request.direction == "left":
            c.execute(
                'select philo_name, philo_id from words where parent=? and rowid < ?',
                (parent_sentence, results['rowid']))
            string = []
            for i in c.fetchall():
                string.append(i['philo_name'].decode('utf-8'))
            string.reverse()
            string = ' '.join(string)
        elif request.direction == "right":
            c.execute(
                'select philo_name, philo_id from words where parent=? and rowid > ?',
                (parent_sentence, results['rowid']))
            string = []
            for i in c.fetchall():
                string.append(i['philo_name'].decode('utf-8'))
            string = ' '.join(string)
        else:
            string = ""

        metadata_fields = {}
        for metadata in config.kwic_metadata_sorting_fields:
            metadata_fields[metadata] = hit[metadata].lower()

        kwic_words.append((string, index, metadata_fields))

        index += 1

        elapsed = timeit.default_timer() - start_time
        if elapsed > max_time:  # avoid timeouts by splitting the query if more than 10 seconds has been spent in the loop
            break

    yield json.dumps({"results": kwic_words, "hits_done": index})

Example #59

0

Show file

File: time_series.py Project: clovis/PhiloLogic4

def generate_time_series(request, config):
    db = DB(config.db_path + '/data/')
    time_series_object = {'query': dict([i for i in request]), 'query_done': False}

    # Invalid date range
    if request.start_date == 'invalid' or request.end_date == 'invalid':
        time_series_object['results_length'] = 0
        time_series_object['more_results'] = False
        time_series_object['new_start_date'] = 0
        time_series_object['results'] = {'absolute_count': {}, 'date_count': {}}
        return time_series_object

    start_date, end_date = get_start_end_date(db,
                                              config,
                                              start_date=request.start_date or None,
                                              end_date=request.end_date or None)

    # Generate date ranges
    interval = int(request.year_interval)
    date_ranges = []
    # Make sure last date gets included in for loop below by adding one to last step
    for start in range(start_date, end_date+1, interval):
        end = start + interval - 1
        if end > end_date:
            end = end_date
        date_range = "%d-%d" % (start, end)
        date_ranges.append((start, date_range))

    absolute_count = defaultdict(int)
    date_counts = {}
    total_hits = 0
    last_date_done = start_date
    start_time = timeit.default_timer()
    max_time = request.max_time or 10
    for start_range, date_range in date_ranges:
        request.metadata[config.time_series_year_field] = date_range
        hits = db.query(request["q"], request["method"], request["arg"], raw_results=True, **request.metadata)
        hits.finish()
        hit_len = len(hits)
        params = {"report": "concordance", "start": "0", "end": "0"}
        params[config.time_series_year_field] = date_range
        url = make_absolute_query_link(config, request, **params)
        absolute_count[start_range] = {"label": start_range, "count": hit_len, "url": url}

        # Get date total count
        if interval != '1':
            end_range = start_range + (int(request['year_interval']) - 1)
            query = 'select sum(word_count) from toms where %s between "%d" and "%d"' % (config.time_series_year_field,
                                                                                         start_range, end_range)
        else:
            query = "select sum(word_count) from toms where %s='%s'" % (config.time_series_year_field, start_range)

        cursor = db.dbh.cursor()
        cursor.execute(query)
        date_counts[start_range] = cursor.fetchone()[0] or 0
        total_hits += hit_len
        elapsed = timeit.default_timer() - start_time
        last_date_done = start_range
        # avoid timeouts by splitting the query if more than request.max_time
        # (in seconds) has been spent in the loop
        if elapsed > int(max_time):
            break

    time_series_object['results_length'] = total_hits
    if (last_date_done + int(request.year_interval)) >= end_date:
        time_series_object['more_results'] = False
    else:
        time_series_object['more_results'] = True
        time_series_object['new_start_date'] = last_date_done + int(request.year_interval)
    time_series_object['results'] = {'absolute_count': absolute_count, 'date_count': date_counts}

    return time_series_object