Beispiel #1
0
def query_instrument_fields(args):
    try:
        doc_type, search_type, body = common(args)
        es = current_app.extensions['elasticsearch']
        results = es.search(
            index="legislation",
            #explain="true",
            doc_type=doc_type,
            body=body)
        """
        def get_totals(hit):
            result = {}
            for detail in hit['_explanation']['details']:
                pass
            return result
            """
        clean_results = results['hits']  # map(get_totals, results['hits'])

        return {
            'type': 'search',
            'search_type': search_type,
            'search_results': clean_results
        }
    except Exception, e:
        print e
        raise CustomException('There was a problem with your query')
Beispiel #2
0
def get_definition_route(ids, exids=None):
    try:
        return jsonify(
            get_definition(ids.split(';'),
                           exids.split(';') if exids else None))
    except Exception, e:
        raise CustomException('Could not retrieve definition')
Beispiel #3
0
def instrument_location(instrument, location, args):
    def massage():
        return nodes_from_path_string(instrument.get_tree(), link_to_canonical(location))
    try:
        tree = nodes_from_path_string(instrument.get_tree(), location)
        if len(tree) == 1 and tree[0] == instrument.get_tree():
            raise CustomException('try again')
    except CustomException:
        tree = massage()
    full_location, _, path = generate_path_string(tree[0])
    tree = cull_tree(tree)
    return {
        'html_content': etree.tostring(tohtml(tree), encoding='UTF-8', method="html"),
        'title': instrument.title,
        'full_title': full_location,
        'document_id': instrument.id,
        'doc_type': 'instrument',
        "latest": instrument.attributes['latest'],
        "path": instrument.attributes['path'],
        "date_as_at_str": format_govt_date(instrument.attributes['date_as_at']),
        'format': 'fragment',
        'query': {
            'doc_type': 'instrument',
            'document_id': instrument.id,
            'find': 'location',
            'location': path
        }
    }
Beispiel #4
0
def get_act_summary(doc_id, db=None):
    with (db or get_db()).cursor(cursor_factory=extras.RealDictCursor) as cur:
        query = """select * from instruments
        where id = %(doc_id)s """
        cur.execute(query, {'doc_id': doc_id})
        try:
            return cur.fetchone()
        except:
            raise CustomException("Instrument not found")
Beispiel #5
0
def find_definition(tree, query):
    try:
        nodes = tree.xpath(".//def-para/para/text/def-term[contains(.,'%s')]" %
                           query)
        lev_nodes = sorted(map(lambda x: (x, levenshtein(query, x.text)),
                               nodes),
                           key=itemgetter(1))
        return lev_nodes[0][0].iterancestors(tag='def-para').next()
    except Exception, e:
        raise CustomException("Path for definition not found")
Beispiel #6
0
def find_document_id_by_govt_id(node_id, db=None):
    with (db or get_db()).cursor() as cur:
        try:
            query = """
            select a.id from latest_instruments a
            join id_lookup i on i.parent_id = a.id
            where i.govt_id = %(node_id)s """
            cur.execute(query, {'node_id': node_id})
            return cur.fetchone()[0]
        except Exception, e:
            raise CustomException("Result not found")
Beispiel #7
0
def get_act_summary_govt_id(govt_id, db=None):
    with (db or get_db()).cursor(cursor_factory=extras.RealDictCursor) as cur:
        query = """select *
            from id_lookup d
            join instruments i on parent_id = id
            where d.govt_id = %(govt_id)s"""
        cur.execute(query, {'govt_id': govt_id})
        try:
            return cur.fetchone()
        except:
            raise CustomException("Instrument not found")
Beispiel #8
0
def get_act_exact(title=None, doc_id=None, db=None):
    with (db or get_db()).cursor() as cur:
        query = """
            select document, id from latest_instruments
            where (%(title)s is  null or title = %(title)s) and (%(id)s is null or id =  %(id)s)
             """
        cur.execute(query, {'title': title, 'id': doc_id})
        try:
            result = cur.fetchone()
            return etree.fromstring(result[0], parser=large_parser), result[1]
        except:
            raise CustomException("Instrument not found")
Beispiel #9
0
def query_instrument(args):
    find = args.get('find')
    if find == 'contains':
        return query_contains(args)
    if find == 'section_references':
        return section_references(args)
    if find == 'section_versions':
        return section_versions(args)
    if find == 'more':
        return instrument_more(args.get('document_id'), args.get('parts').split(','), args)

    govt_location = args.get('govt_location')
    if args.get('id', args.get('document_id')):
        doc_id = args.get('id', args.get('document_id'))
        if isinstance(doc_id, basestring) and not doc_id.isdigit():
            govt_id = doc_id
            doc_id = find_document_id_by_govt_id(doc_id)
            instrument = get_instrument_object(doc_id)
            if instrument.attributes['govt_id'] != govt_id:
                find = 'govt_location'
                govt_location = govt_id
        else:
            instrument = get_instrument_object(doc_id)
    elif args.get('title'):
        instrument = get_latest_instrument_object(args.get('title'))
    else:
        raise CustomException('No instrument specified')

    if find == 'preview':
        return instrument_preview(instrument)

    elif find == 'location':
        if args.get('location'):
            return instrument_location(instrument, args.get('location'), args)
    elif find == 'govt_location':
        if not govt_location:
            raise CustomException('No location specified')
        return instrument_govt_location(instrument, govt_location, args.get('link_text'), args)
    """ default is full instrument """
    return instrument_full(instrument, args)
Beispiel #10
0
def query_contains(args):
    try:
        es = current_app.extensions['elasticsearch']
        offset = args.get('offset')
        doc_id = args.get('id', args.get('document_id'))
        query_filter = {"term": {"_parent": doc_id}}
        if args.get('parts'):
            parts = map(lambda x: '%s-%s' % (doc_id, x),
                        args.get('parts').split(','))
            query_filter = {
                'bool': {
                    "should": query_filter,
                    "must": {
                        "ids": {
                            'values': parts
                        }
                    }
                }
            }
        body = {
            "fields": ['title'],
            "sort": ['num'],
            "query": contains_query(args, 'html'),
            "filter": query_filter,
            "highlight": {
                "pre_tags": ["<span class='search_match'>"],
                "post_tags": ["</span>"],
                "fields": {
                    'html': {
                        "number_of_fragments": 0
                    }
                },
                #"phrase_limit" : 1024,
                #{"fragment_size" : 200, "number_of_fragments" : 100}}
            }
        }
        if args.get('parts'):
            body['highlight']['require_field_match'] = False
            body["size"] = 10000
        else:
            body["size"] = 25
            body["from"] = offset
        results = es.search(index="legislation", doc_type='part', body=body)
        return {
            'type': 'search',
            'search_type': 'contains_result',
            'search_results': results['hits'],
            'title': 'Advanced Search'
        }
    except Exception, e:
        print e
        raise CustomException('There was a problem with your query')
Beispiel #11
0
def get_instrument_object(id=None, db=None, replace=False):
    try:
        db = db or get_db()
        with db.cursor(cursor_factory=extras.RealDictCursor) as cur:
            query = """SELECT * from get_processed_instrument(%(id)s) """
            cur.execute(query, {'id': id})
            result = cur.fetchone()
            if result:
                return prep_instrument(result, replace, db)
            return prep_instrument(get_unprocessed_instrument(id, db))
    except TypeError, e:
        print e
        raise CustomException('Document does not exist')
Beispiel #12
0
def query_contains_skeleton(args):
    try:
        es = current_app.extensions['elasticsearch']
        doc_id = args.get('id', args.get('document_id'))
        results = {}
        body = {
            "fields": [],
            "query": contains_query(args, 'skeleton'),
            "filter": {
                "term": {
                    "_id": doc_id
                }
            },
            "highlight": {
                "pre_tags": ["<span class='search_match'>"],
                "post_tags": ["</span>"],
                "fields": {
                    'skeleton': {
                        "number_of_fragments": 0
                    }
                },
                "require_field_match": False
            }
        }
        es_results = es.search(index="legislation",
                               doc_type='instrument',
                               body=body)
        try:
            results['html_content'] = es_results['hits']['hits'][0][
                'highlight']['skeleton'][0]
        except IndexError:
            pass
        body = {
            "fields": [],
            "size": 10000,
            "query": contains_query(args, 'html'),
            "filter": {
                "term": {
                    "_parent": doc_id
                }
            }
        }
        es_results = es.search(index="legislation", doc_type='part', body=body)
        results['part_matches'] = sorted(map(
            lambda x: x['_id'].split('-', 1)[1], es_results['hits']['hits']),
                                         key=lambda x: int(x))
        return results
    except Exception, e:
        print e
        raise CustomException('There was a problem with your query')
Beispiel #13
0
def query_case_fields(args):
    must = []
    fields = {}
    try:
        if args.get('full_citation'):
            must.append({
                "simple_query_string": {
                    "query": args.get('full_citation'),
                    "fields": ['full_citation'],
                    "default_operator": 'AND'
                }
            })
        if args.get('contains'):
            fields['document'] = {}
            must.append(contains_query(args))
        if args.get('year'):
            must.append(year_query(args))
        """'neutral_citation', 'courtfile', , 'year', 'court', 'bench', 'parties', 'matter', 'charge']"""
        es = current_app.extensions['elasticsearch']
        offset = args.get('offset', 0)
        results = es.search(index="legislation",
                            doc_type="case",
                            body={
                                "from": offset,
                                "size": 25,
                                "fields": ["id", "full_citation"],
                                "sort": [
                                    "_score",
                                ],
                                "query": {
                                    "bool": {
                                        "must": must
                                    }
                                },
                                "highlight": {
                                    "pre_tags":
                                    ["<span class='search_match'>"],
                                    "post_tags": ["</span>"],
                                    "fields": fields
                                }
                            })
        return {
            'type': 'search',
            'search_results': results['hits'],
            'title': 'Advanced Search'
        }
    except Exception, e:
        print e
        raise CustomException('There was a problem with your query')
Beispiel #14
0
def query():
    args = request.args
    query_type = args.get('doc_type')
    if query_type == 'all':
        result = query_all(args)
    elif query_type in ['act', 'regulation', 'sop', 'bill', 'instrument']:
        result = query_instrument(args)
    elif query_type in ['instruments']:
        result = query_instrument_fields(args)
    elif query_type == 'case':
        result = query_case(args)
    elif query_type == 'cases':
        result = query_case_fields(args)
    else:
        raise CustomException('Badly formed query')
    return jsonify(result)
Beispiel #15
0
def prep_instrument(result, replace, db):
    if not result:
        raise CustomException('Instrument not found')
    tree = None
    definitions = None
    redo_skele = False

    if replace or not result.get('processed_document'):
        tree, definitions = process_instrument(row=get_unprocessed_instrument(
            result.get('id'), db=db),
                                               db=db,
                                               latest=result.get('latest'),
                                               refresh=replace)
        document = etree.tostring(tree, encoding='UTF-8', method="html")
        redo_skele = True
    else:
        document = result.get('processed_document')
    if redo_skele or not result.get('skeleton'):
        skeleton = process_skeleton(
            result.get('id'),
            tree if tree is not None else etree.fromstring(
                document, parser=large_parser),
            db=db,
            version=result.get('version'))
    else:
        skeleton = result.get('skeleton')
    if redo_skele or not result.get('heights'):
        heights = process_heights(
            result.get('id'),
            tree if tree is not None else etree.fromstring(
                document, parser=large_parser),
            db=db,
            version=result.get('version'))
    else:
        heights = result.get('heights')

    return Instrument(id=result.get('id'),
                      document=document,
                      skeleton=skeleton,
                      heights=heights,
                      title=result.get('title'),
                      attributes=result)
Beispiel #16
0
def prep_case(result, replace, db):
    if not result or not result.get('id'):
        raise CustomException('Case not found')
    if replace or not result.get('processed_document'):
        tree = process_case(row=result, db=db)
    else:
        tree = html.fromstring(result.get('processed_document'))
    #if not result.get('skeleton'):
    #    skeleton, heights = process_skeleton(result.get('id'), tree, db=db)
    #else:
    #    skeleton = result.get('skeleton')
    #if not result.get('contents'):
    #    contents = process_contents(result.get('id'), tree, db=db)
    #else:
    #    contents = result.get('contents')
    contents = ''
    return Case(
        id=result.get('id'),
        tree=tree,
        #skeleton=skeleton,
        contents=contents,
        attributes=dict(result))
Beispiel #17
0
def query_acts(args):
    raise CustomException('Not Implemented')
Beispiel #18
0
def find_definitions(tree, query):
    nodes = tree.xpath(".//def-para[descendant::def-term[contains(.,'%s')]]" %
                       query)
    if not len(nodes):
        raise CustomException("Path for definition not found")
    return nodes
Beispiel #19
0

def case_skeleton_response(case):
    return {
        'html_content': etree.tostring(case.tree,
                                       encoding='UTF-8',
                                       method="html"),
        'html_contents_page': case.contents,
        'title': case.title,
        'full_title': case.title,
        'document_id': case.id,
        'doc_type': 'case',
        'attributes': case.attributes,
        #'format': 'skeleton',
        'format': 'full',
        'parts': {},
        'query': {
            'doc_type': 'case',
            'document_id': case.id,
            'find': 'full'
        }
    }


def query_case(args):
    find = args.get('find')
    if args.get('id'):
        case = get_case_object(document_id=args.get('id'))
        return case_skeleton_response(case)
    raise CustomException('Invalid search type')
Beispiel #20
0
def get_link_route(doc_type=None, key=None):
    if doc_type is None or doc_type == 'instrument':
        return jsonify(query_instrument({'find': 'preview', 'id': key}))
    else:
        raise CustomException("Can't locate link information")
Beispiel #21
0
def find_node_by_query(tree, query):
    try:
        return tree.xpath(".//*[contains(.,'%s')]" % query)
    except Exception, e:
        raise CustomException("Path not found")
Beispiel #22
0
def find_sub_node(tree, keys, limit_tags=['part', 'subpart'], force_tag=None):
    """depth first down the tree matching labels in keys"""
    """ limit tags exists to prevent ambiguity between parts and section labels.  however, sometimes we must treat
        parts etc like sections, for ranges etc """
    node = tree
    xpath_query = ".//%s[%s]"
    depth = lambda x: len(list(x.iterancestors()))
    shallowest = lambda nodes: nodes[0] if len(node) == 1 else sorted(
        map(lambda x: (x, depth(x)), nodes), key=itemgetter(1))[0][0]

    def get_closest(node, label):
        """ note: this is split between xpath and python for performance reasons (xpath too slow on ancestors) """
        while True:
            try:
                tag = force_tag if force_tag else '*'
                nodes = node.xpath(xpath_query % (tag, labelize(label)))
                nodes = filter(
                    lambda x: x.tag not in limit_tags and not len(
                        set(map(lambda t: t.tag, x.iterancestors())).
                        intersection(IGNORE_TRAVERSAL_TAGS)), nodes)
                return shallowest(nodes)

            except IndexError:
                node = node.getparent()
                if node is None or not len(node):
                    raise StopIteration('no more parents')

    nodes = []

    try:
        for i, a in enumerate(keys):
            if a:
                adds = a.split('+')
                for add in adds:
                    add = add.strip()
                    if not add:
                        continue
                    elif '-' in add:
                        # we can't assume any reasonable lexicographical ordering of labels, so instead
                        # find first match and continue until last
                        labels = [x.strip() for x in add.split('-')]
                        # get first node
                        start = get_closest(node, labels[0])
                        last = get_closest(node, labels[1])
                        # this sucks, having to start at start,
                        tag = start.tag
                        nodes.append(start)
                        # try to find way to start iter at arbitrary node
                        tree_iter = tree.iter(tag)
                        current = None
                        while True:
                            current = next(tree_iter)
                            if current == start:
                                break
                        while True:
                            current = next(tree_iter)
                            nodes.append(current)
                            if current == last:
                                break
                        # find every tag that matches depth, until we match last
                    else:
                        nodes.append(get_closest(node, add.strip()))
                node = nodes
            if i < len(keys) - 1:
                node = nodes[-1]
        if not len(keys):
            nodes = [node]
        if not len(nodes):
            raise CustomException("Empty")
        # remove ancestors
        ancestors = []
        for n in nodes:
            ancestors.extend(list(n.iterancestors()))
        nodes = [n for n in nodes if n not in ancestors]
        return nodes
    except (IndexError, StopIteration, AttributeError), e:
        raise CustomException("Path not found")
Beispiel #23
0
def find_node_by_govt_id(tree, query):
    try:
        return tree.xpath(".//*[@id='%s']" % query)
    except Exception, e:
        raise CustomException("Path not found")
Beispiel #24
0
def nodes_from_path_string(tree, path):
    path = path.lower().strip()
    pattern = re.compile(
        '(schedule|section|regulation|clause|rule|subpart|part|sch|pt|reg|rr|hcr|ss|s|r|cl)[, ]{,2}(.*)'
    )
    part_pattern = re.compile(
        '([a-z\d]+),? ?(subpart|regulation|clause|rule|section|part|pt|reg|rr|hcr|ss|s|cl|r)?'
    )
    range_pattern = re.compile('^\w+[-+]\w+$')
    parts = pattern.match(path)
    keys = []
    try:
        if parts:
            parts = parts.groups()
            if any([parts[0].startswith(k) for k in tag_names.keys()]):
                # match up 'cl ' or 's ', 'section ' or 'clause ' then until '('
                label = ''
                remainder = parts[1].strip()
                if remainder:
                    # total hack job, but must support subpart/part/schedule ranges
                    if range_pattern.match(remainder):
                        return find_sub_node(tree, [remainder],
                                             limit_tags=[],
                                             force_tag=tag_names[parts[0]])
                    sub = part_pattern.match(remainder).groups()
                    # sub[0] is the sch/part label
                    label = sub[0]
                    remainder = remainder[len(label):].replace(',', '').strip()
                try:
                    tree = tree.xpath(
                        ".//%s[%s]" %
                        (tag_names[parts[0]], labelize(label)))[0]

                except IndexError:
                    try:
                        # try fake Part
                        tree = tree.xpath(".//head1[%s]" %
                                          labelize('part %s' % label))[0]
                    except IndexError:
                        # try empty label, ie unnumbered schedule
                        tree = tree.xpath(".//%s" % (tag_names[parts[0]]))[0]
                        remainder = path[len(parts[0]):]
                        remainder = re.sub('^ ?1?,?', '', remainder)

                return nodes_from_path_string(tree, remainder)
            else:
                if isinstance(tree, etree._ElementTree) or tree.getroottree(
                ).getroot() == tree:
                    tree = tree.findall(".//body")[0]
            if parts[1]:
                _keys = map(
                    lambda x: x.strip(),
                    filter(lambda x: len(x),
                           re.split('[^.a-zA-Z\d\+\- ]+', parts[1])))
                # join on + -
                i = 0
                keys = []
                while i < len(_keys):
                    if i and _keys[i] in ['+', '-']:
                        keys[-1] += _keys[i] + _keys[i + 1]
                        i += 2
                    else:
                        keys.append(_keys[i])
                        i += 1
    except IndexError, e:
        raise CustomException("Path not found")