Python get_extracts Exemples, helpers.get_extracts Python Exemples

Exemple #1

0

Afficher le fichier

def get_phr_content(q):
    """Given a query it returns the portion enclosed in quotes
	:params q : Query string"""
    extracts, non_extracts = helpers.get_extracts(q, 0)
    try:
        return extracts[0]
    except:
        return ''

Exemple #2

0

Afficher le fichier

    def create_law_links(self):
        """Creates links from existing laws"""

        for identifier, law in self.laws.items():
            articles = law.sentences.keys()

            self.detect_and_apply_removals(identifier=identifier, generate_links=True)

            for article in articles:
                for paragraph in law.get_paragraphs(article):
                    try:
                        extracts, non_extracts = helpers.get_extracts(
                            paragraph, 0)

                        for entity in entities.LegalEntities.entities:
                            # If law found in amendment body then it is
                            # modifying
                            for s in non_extracts:

                                neighbors = re.finditer(entity, s)
                                neighbors = set([neighbor.group().lower()
                                                 for neighbor in neighbors])

                                tmp = s.split(' ')

                                for u in map(lambda x: x.lower(), neighbors):
                                    if u not in self.links:
                                        self.links[u] = Link(u)
                                    is_modifying = False

                                    for action in entities.actions:
                                        for i, w in enumerate(tmp):
                                            if action == w:
                                                is_modifying = True
                                                break
                                        if is_modifying:
                                            break

                                    if is_modifying:
                                        self.links[u].add_link(
                                            law.identifier, paragraph, link_type='τροποποιητικός')
                                    else:
                                        self.links[u].add_link(
                                            law.identifier, paragraph, link_type='αναφορικός')

                            # If enclosed in brackets the link is only
                            # referential
                            for s in extracts:
                                neighbors = re.finditer(entity, s)
                                neighbors = set([neighbor.group().lower()
                                                 for neighbor in neighbors])

                                for u in map(lambda x: x.lower(), neighbors):
                                    u = u.lower()
                                    if u not in self.links:
                                        self.links[u] = Link(u)

                                    self.links[u].add_link(
                                        law.identifier, paragraph, link_type='αναφορικός')
                    # except there are Unmatched brackets
                    except Exception as e:
                        neighbors = re.finditer(entity, paragraph)
                        neighbors = set([neighbor.group().lower()
                                         for neighbor in neighbors])

                        for u in map(lambda x: x.lower(), neighbors):

                            if u not in self.links:
                                self.links[u] = Link(u)

                            self.links[u].add_link(
                                law.identifier, paragraph, link_type='γενικός')

        for link in self.links.values():
            try:
                self.db.links.save(link.serialize())
            except:
                pass

Exemple #3

0

Afficher le fichier

sys.path.insert(0, '../3gm')

if __name__ == '__main__':
    counter = 0
    cod = codifier.LawCodifier(sys.argv[1])
    f = open(sys.argv[2], 'w+')
    cod.codify_new_laws()
    for identifier, law in cod.laws.items():
        for article in law.sentences:
            print(article)
            for s in law.get_paragraphs(article):
                global actions
                global whats
                trees = []
                try:
                    extracts, non_extracts = helpers.get_extracts(s)
                except:
                    counter += 1
                    continue
                non_extracts = ' '.join(non_extracts)
                non_extracts = tokenizer.tokenizer.split(non_extracts,
                                                         delimiter='. ')

                for non_extract in non_extracts:
                    tmp = list(
                        map(lambda s: s.strip(string.punctuation),
                            non_extract.split(' ')))

                    for action in actions:
                        for i, w in enumerate(tmp):
                            if action == w:

Exemple #4

0

Afficher le fichier

Fichier : syntax.py Projet : thodoris/gsoc2018-3gm

    def generate_action_tree_from_string(s,
                                         nested=False,
                                         max_what_window=20,
                                         max_where_window=30,
                                         use_regex=False):
        """Main algorithm for amendment detection
        The approach followed is hybrid
        The procedure is outlined here:
        https://github.com/eellak/gsoc2018-3gm/wiki/Algorithms-for-analyzing-Government-Gazette-Documents
        """

        # results are stored here

        trees = []
        # fix par abbrev
        s = helpers.fix_par_abbrev(s)

        # get extracts and non-extracts using helper functions
        parts = tokenizer.tokenizer.split(s, False, '. ')
        extracts, non_extracts = helpers.get_extracts(s)

        non_extracts = ' '.join(non_extracts)
        non_extracts = tokenizer.tokenizer.split(non_extracts, True, '. ')

        extract_cnt = 0

        for part_cnt, non_extract in enumerate(non_extracts):

            doc = nlp(non_extract)

            tmp = list(
                map(lambda s: s.strip(string.punctuation),
                    non_extract.split(' ')))

            # Detect amendment action
            for action in entities.actions:
                for i, w in enumerate(doc):
                    if action == w.text:
                        tree = collections.defaultdict(dict)
                        tree['root'] = {
                            '_id': i,
                            'action': action.__str__(),
                            'children': []
                        }
                        max_depth = 0

                        logging.info('Found ' + str(action))

                        extract = None
                        if str(action) not in [
                                'διαγράφεται', 'παύεται', 'καταργείται'
                        ]:
                            try:
                                extract = extracts[extract_cnt]
                                extract_cnt += 1
                            except IndexError:
                                extract = None

                        # Detect what is amended
                        found_what, tree, is_plural = ActionTreeGenerator.get_nsubj(
                            doc, i, tree)
                        if found_what:
                            k = tree['what']['index']
                            if tree['what']['context'] not in [
                                    'φράση', 'φράσεις', 'λέξη', 'λέξεις'
                            ]:
                                tree['what']['number'] = list(
                                    helpers.ssconj_doc_iterator(
                                        doc, k, is_plural))
                            else:
                                tree = phrase_fun.detect_phrase_components(
                                    parts[part_cnt], tree)
                                tree['what']['context'] = 'φράση'
                            logging.info(tree['what'])

                        else:
                            found_what, tree, is_plural = ActionTreeGenerator.get_nsubj_fallback(
                                tmp, tree, i)

                        # get content
                        if action not in [
                                'διαγράφεται', 'διαγράφονται', 'αναριθμείται',
                                'αναριθμούνται'
                        ]:
                            tree, max_depth = ActionTreeGenerator.get_content(
                                tree, extract, s)
                        if action in ['αναριθμείται', 'αναριθμούνται']:
                            # get renumbering
                            tree = ActionTreeGenerator.get_renumbering(
                                tree, doc)
                            subtrees = ActionTreeGenerator.split_renumbering_tree(
                                tree)

                        # split to subtrees
                        if action not in ['αναριθμείται', 'αναριθμούνται']:
                            subtrees = ActionTreeGenerator.split_tree(tree)

                        # iterate over subtrees
                        for subtree in subtrees:

                            subtree, max_depth = ActionTreeGenerator.get_content(
                                subtree, extract, s, secondary=True)

                            # get latest statute
                            try:
                                law = ActionTreeGenerator.detect_latest_statute(
                                    non_extract)
                            except BaseException:
                                law = ''

                            # first level are laws
                            subtree['law'] = {
                                '_id': law,
                                'children': ['article']
                            }

                            splitted = non_extract.split(' ')

                            # build levels bottom up
                            subtree = ActionTreeGenerator.build_levels(
                                splitted, subtree)

                            # nest into dictionary
                            if nested:
                                ActionTreeGenerator.nest_tree('root', subtree)

                            trees.append(subtree)

        return trees