Exemple #1
0
    def test_05_output(self):
        """
        test ouput of pending messages
        """
        Messager.warning(u'Hello warning')
        Messager.info(u'Hello info')
        Messager.debug(u'Hello debug')

        Messager.error(u'Hello error')
        output = NamedTemporaryFile("w", delete=False)
        try:
            Messager.output(output)

            output.close()
            with open(output.name, "r") as output:
                self.assertEqual(
                    output.read(), u"warning : Hello warning\n"
                    u"comment : Hello info\n"
                    u'debug : Hello debug\n'
                    u'error : Hello error\n')
            Messager.clear()

            with open(output.name, "w") as output:
                Messager.output(output)
            with open(output.name, "r") as output:
                self.assertEqual(output.read(), "")
        finally:
            os.unlink(output.name)
Exemple #2
0
 def test_03_error(self):
     """
     test error level
     """
     Messager.error(u'Hello 世界!')
     json_dic = {}
     Messager.output_json(json_dic)
     self.assertEqual(
         json_dic,
         {'messages': [(u'Hello \u4e16\u754c\uff01', 'error', 3)]})
Exemple #3
0
def _listdir(directory, user):
    # return listdir(directory)
    try:
        assert_allowed_to_read(directory, user)
        return [
            f for f in listdir(directory) if not _is_hidden(f)
            and allowed_to_read(path_join(directory, f), user)
        ]
    except OSError as exception:
        Messager.error("Error listing %s: %s" % (directory, exception))
        raise AnnotationCollectionNotFoundError(directory)
Exemple #4
0
def _enrich_json_with_text(j_dic, txt_file_path, raw_text=None):
    if raw_text is not None:
        # looks like somebody read this already; nice
        text = raw_text
    else:
        # need to read raw text
        try:
            with open_textfile(txt_file_path, 'r') as txt_file:
                text = txt_file.read()
        except IOError:
            raise UnableToReadTextFile(txt_file_path)
        except UnicodeDecodeError:
            Messager.error(
                'Error reading text file: nonstandard encoding or binary?', -1)
            raise UnableToReadTextFile(txt_file_path)

    j_dic['text'] = text

    tokeniser = options_get_tokenization(dirname(txt_file_path))

    # First, generate tokenisation
    tok_offset_gen = tokeniser_by_name(tokeniser)
    j_dic['token_offsets'] = [o for o in tok_offset_gen(text)]

    ssplitter = options_get_ssplitter(dirname(txt_file_path))
    if ssplitter == 'newline':
        from arat.server.ssplit import newline_sentence_boundary_gen
        ss_offset_gen = newline_sentence_boundary_gen
    elif ssplitter == 'regex':
        from arat.server.ssplit import regex_sentence_boundary_gen
        ss_offset_gen = regex_sentence_boundary_gen
    else:
        Messager.warning('Unrecognized sentence splitting option '
                         ', reverting to newline sentence splitting.')
        from arat.server.ssplit import newline_sentence_boundary_gen
        ss_offset_gen = newline_sentence_boundary_gen
    j_dic['sentence_offsets'] = [o for o in ss_offset_gen(text)]

    return True
Exemple #5
0
def retrieve_stored(document, suffix):
    stored_path = _stored_path() + '.' + suffix

    if not isfile(stored_path):
        # @ninjin: not sure what 'version' was supposed to be returned
        # here, but none was defined, so returning that
        #         raise NoSVGError(version)
        raise NoSVGError('None')

    filename = document + '.' + suffix

    # sorry, quick hack to get the content-type right
    # TODO: send this with initial 'stored' response instead of
    # guessing on suffix
    if suffix == SVG_SUFFIX:
        content_type = 'image/svg+xml'
    elif suffix == PNG_SUFFIX:
        content_type = 'image/png'
    elif suffix == PDF_SUFFIX:
        content_type = 'application/pdf'
    elif suffix == EPS_SUFFIX:
        content_type = 'application/postscript'
    else:
        Messager.error('Unknown suffix "%s"; cannot determine Content-Type' %
                       suffix)
        # TODO: reasonable backoff value
        content_type = None

    # Bail out with a hack since we violated the protocol
    hdrs = [('Content-Type', content_type),
            ('Content-Disposition', 'inline; filename=' + filename)]

    with open(stored_path, 'rb') as stored_file:
        data = stored_file.read()

    return (hdrs, data)
Exemple #6
0
def __read_term_hierarchy(input_, section=None):
    """

    Output a list of TypeHierarchyNode

    >>> _input = ["# This a comment to be ignored"]
    >>> _input.append("[spans]")
    >>> _input.append("# POS tags")
    >>> _input.append("adj")
    >>> _input.append("adv")
    >>> _input.append("art")

    >>> isinstance((__read_term_hierarchy("\\n".join(_input))[0]), TypeHierarchyNode)
    True

    """

    root_nodes = []
    last_node_at_depth = {}
    last_args_at_depth = {}

    macros = {}
    for line in input_:
        # skip empties and lines starting with '#'
        if line.strip() == '' or re.match(r'^\s*#', line):
            continue

        # interpret lines of only hyphens as separators
        # for display
        if re.match(r'^\s*-+\s*$', line):
            # TODO: proper placeholder and placing
            root_nodes.append(cst.SEPARATOR_STR)
            continue

        # interpret lines of the format <STR1>=STR2 as "macro"
        # definitions, defining <STR1> as a placeholder that should be
        # replaced with STR2 whevever it occurs.
        match_obj = re.match(r'^<([a-zA-Z_-]+)>=\s*(.*?)\s*$', line)
        if match_obj:
            name, value = match_obj.groups()
            if name in cst.RESERVED_CONFIG_NAME:
                Messager.error("Cannot redefine <%s> in configuration, "
                               "it is a reserved name." % name)
                # TODO: proper exception
                raise InvalidProjectConfigException("Reserved name: " + name)
            else:
                macros["<%s>" % name] = value
            continue

        # macro expansion
        for token in macros:
            line = line.replace(token, macros[token])

        # check for undefined macros
        for match_obj in re.finditer(r'(<.*?>)', line):
            token = match_obj.group(1)
            assert token in cst.RESERVED_CONFIG_STRING, (
                "Error: undefined macro %s "
                "in configuration. (Note that macros are section-specific.)"
            ) % token

        # choose strict tab-only separator or looser any-space
        # separator matching depending on section
        if __require_tab_separator(section):
            match_obj = re.match(r'^(\s*)([^\t]+)(?:\t(.*))?$', line)
        else:
            match_obj = re.match(r'^(\s*)(\S+)(?:\s+(.*))?$', line)
        assert match_obj, "Error parsing line: '%s'" % line
        indent, terms, args = match_obj.groups()
        terms = [i.strip() for i in terms.split("|") if i.strip() != ""]
        if args is None or args.strip() == "":
            args = []
        else:
            args = [i.strip() for i in args.split(",") if i.strip() != ""]

        # older configs allowed space in term strings, splitting those
        # from arguments by space. Trying to parse one of these in the
        # new way will result in a crash from space in arguments.
        # The following is a workaround for the transition.
        if [i for i in args if re.search(r'\s', i)] and '\t' in line:
            # re-parse in the old way (dups from above)
            match_obj = re.match(r'^(\s*)([^\t]+)(?:\t(.*))?$', line)
            assert match_obj, "Error parsing line: '%s'" % line
            indent, terms, args = match_obj.groups()
            terms = [i.strip() for i in terms.split("|") if i.strip() != ""]
            if args is None or args.strip() == "":
                args = []
            else:
                args = [i.strip() for i in args.split(",") if i.strip() != ""]
            # issue a warning
            Messager.warning(
                "Space in term name(s) (%s) on line \"%s\" "
                "in config. This feature is deprecated and "
                "support will be removed in future versions. "
                "Please revise your configuration." %
                (",".join(['"%s"' % i for i in terms if " " in i]), line), 20)

        # depth in the ontology corresponds to the number of
        # spaces in the initial indent.
        depth = len(indent)

        # expand <INHERIT> into parent arguments
        expanded_args = []
        for a in args:
            if a != '<INHERIT>':
                expanded_args.append(a)
            else:
                assert depth-1 in last_args_at_depth, \
                    "Error no parent for '%s'" % line
                expanded_args.extend(last_args_at_depth[depth - 1])

        args = expanded_args

        n = TypeHierarchyNode(terms, args)
        if depth == 0:
            # root level, no children assignments
            root_nodes.append(n)
        else:
            # assign as child of last node at the depth of the parent
            assert depth-1 in last_node_at_depth, \
                "Error: no parent for '%s'" % line
            last_node_at_depth[depth - 1].children.append(n)
        last_node_at_depth[depth] = n
        last_args_at_depth[depth] = args

    return root_nodes
Exemple #7
0
def _enrich_json_with_data(j_dic, ann_obj):
    # TODO: figure out if there's a reason for all the unicode()
    # invocations here; remove if not.

    # We collect trigger ids to be able to link the textbound later on
    trigger_ids = set()
    for event_ann in ann_obj.get_events():
        trigger_ids.add(event_ann.trigger)
        j_dic['events'].append([
            six.text_type(event_ann.id),
            six.text_type(event_ann.trigger), event_ann.args
        ])

    for rel_ann in ann_obj.get_relations():
        j_dic['relations'].append([
            six.text_type(rel_ann.id),
            six.text_type(rel_ann.type),
            [(rel_ann.arg1l, rel_ann.arg1), (rel_ann.arg2l, rel_ann.arg2)]
        ])

    for tb_ann in ann_obj.get_textbounds():
        #j_tb = [unicode(tb_ann.id), tb_ann.type, tb_ann.start, tb_ann.end]
        j_tb = [six.text_type(tb_ann.id), tb_ann.type, tb_ann.spans]

        # If we spotted it in the previous pass as a trigger for an
        # event or if the type is known to be an event type, we add it
        # as a json trigger.
        # TODO: proper handling of disconnected triggers. Currently
        # these will be erroneously passed as 'entities'
        if six.text_type(tb_ann.id) in trigger_ids:
            j_dic['triggers'].append(j_tb)
            # special case for BioNLP ST 2013 format: send triggers
            # also as entities for those triggers that are referenced
            # from annotations other than events (#926).
            if BIONLP_ST_2013_COMPATIBILITY:
                if tb_ann.id in ann_obj.externally_referenced_triggers:
                    try:
                        j_dic['entities'].append(j_tb)
                    except KeyError:
                        j_dic['entities'] = [
                            j_tb,
                        ]
        else:
            try:
                j_dic['entities'].append(j_tb)
            except KeyError:
                j_dic['entities'] = [
                    j_tb,
                ]

    for eq_ann in ann_obj.get_equivs():
        j_dic['equivs'].append(
            (['*', eq_ann.type] + [e for e in eq_ann.entities]))

    for att_ann in ann_obj.get_attributes():
        j_dic['attributes'].append([
            six.text_type(att_ann.id),
            six.text_type(att_ann.type),
            six.text_type(att_ann.target), att_ann.value
        ])

    for norm_ann in ann_obj.get_normalizations():
        j_dic['normalizations'].append([
            six.text_type(norm_ann.id),
            six.text_type(norm_ann.type),
            six.text_type(norm_ann.target),
            six.text_type(norm_ann.refdb),
            six.text_type(norm_ann.refid),
            six.text_type(norm_ann.reftext)
        ])

    for com_ann in ann_obj.get_oneline_comments():
        comment = [
            six.text_type(com_ann.target),
            six.text_type(com_ann.type),
            com_ann.tail.strip()
        ]
        tmp = j_dic.get('comments', [])
        tmp.append(comment)
        j_dic['comments'] = tmp

    if ann_obj.failed_lines:
        # The line number is off by one
        error_msg = 'Unable to parse the following line(s):\n%s' % ('\n'.join(
            [('%i: %s' %
              (line_num + 1, six.text_type(ann_obj[line_num]))).strip()
             for line_num in ann_obj.failed_lines]))
        Messager.error(error_msg, duration=len(ann_obj.failed_lines) * 3)

    j_dic['mtime'] = ann_obj.ann_mtime
    j_dic['ctime'] = ann_obj.ann_ctime

    try:
        # XXX avoid digging the directory from the ann_obj
        docdir = os.path.dirname(ann_obj._document)
        if options_get_validation(docdir) in (
                'all',
                'full',
        ):
            projectconf = ProjectConfiguration(docdir)
            issues = verify_annotation(ann_obj, projectconf)
        else:
            issues = []
    except Exception as exception:
        # TODO add an issue about the failure?
        issues = []
        Messager.error('Error: verify_annotation() failed: %s' % exception, -1)

    for i in issues:
        issue = (six.text_type(i.ann_id), i.type, i.description)
        tmp = j_dic.get('comments', [])
        tmp.append(issue)
        j_dic['comments'] = tmp

    # Attach the source files for the annotations and text
    ann_files = [splitext(p)[1][1:] for p in ann_obj.input_files]
    ann_files.append(TEXT_FILE_SUFFIX)
    ann_files = [p for p in set(ann_files)]
    ann_files.sort()
    j_dic['source_files'] = ann_files
Exemple #8
0
 def json(self, json_dic):
     json_dic['exception'] = 'accessDenied'
     # TODO: Client should be responsible here
     Messager.error('Access Denied')
     return json_dic