Ejemplo n.º 1
0
    def demand(self,
               cls,
               prop,
               isfull=False,
               isemail=False,
               isdate=False,
               maxlen=None,
               type=None):
        v = getattr(cls, prop)

        if type is not None and v is not None:
            if builtins.type(v) is not type:
                self += brokenrule(prop + ' is wrong type', prop, 'valid')

        if isfull:
            if (builtins.type(v) == str and v.strip() == '') or v is None:
                self += brokenrule(prop + ' is empty', prop, 'full')

        if isemail:
            pattern = r'[^@]+@[^@]+\.[^@]+'
            if v == None or not re.match(pattern, v):
                self += brokenrule(prop + ' is invalid', prop, 'valid')

        if maxlen != None:
            if v != None and len(v) > maxlen:
                # property can only break the 'fits' rule if it hasn't broken
                # the 'full' rule. E.g., a property can be a string of
                # whitespaces which may break the 'full' rule. In that case,
                # a broken 'fits' rule would't make sense.
                if not self.contains(prop, 'full'):
                    self += brokenrule(prop + ' is too lengthy', prop, 'fits')

        if isdate:
            if builtins.type(v) != datetime:
                self += brokenrule(prop + " isn't a date", prop, 'valid')
Ejemplo n.º 2
0
>>> def checker_a() :
	for m in colors:
		print(m, type(m))
		if str(type(m)) == "<class 'str'>" :
			print("string in here")
		else :
			continue
Ejemplo n.º 3
0
def _typeOf(x):
    if hasattr(x, '_t'):
        return x._t
    else:
        t = builtins.type(x)
        if t is _Proxy:
            t = builtins.type(x._target)
        return _BonesTByPythonT.get(t, t)
Ejemplo n.º 4
0
def print_dict(dict_):
    for key, val in dict_.items():
        print(type(key), type(val))
        if type(key) == dict:
            print_dict(key)
        elif type(key) == list:
            for l in key:
                print(l)
Ejemplo n.º 5
0
def minus(o1,o2):
    if o1 is None or o2 is None:
      return None
    if builtins.type(o1) is list and builtins.type(o2) is list:
      return [x for x in o1 if not x in o2]
    if builtins.type(o1) is list:
      return [x for x in o1 if x != o2]
    if builtins.type(o2) is list:
      return [x for x in o2 if x != o1]
    return o1 - o2
Ejemplo n.º 6
0
def inherits_from(obj, a_class):
    """
    Checks if obj is subclass of a specified class
    :param obj: Object
    :param a_class: Class
    :return: True if is subclass, False otherwise
    """
    if issubclass(type(obj), a_class) and not type(obj) is a_class:
        return True
    else:
        return False
Ejemplo n.º 7
0
def type(o):
    if builtins.type(o) is list:
      return "list"
    if builtins.type(o) is str:
      return "string"
    if builtins.type(o) is float or builtins.type(o) is int:
      return "number"
    if builtins.type(o) is dict:
      return "dictionary"
    if isinstance(o,uxadt.Value):
      return "pattern"
    if callable(o):
      return "function"
    return "null";
Ejemplo n.º 8
0
    def __init__(
        self,
        default: T = None,
        *,
        type: type = None,
        choices: Sequence[T] = None,
        required: bool = False,
        nargs: Union[int, str] = None,
        help: str = None,
        metavar: Union[str, Tuple[str, ...]] = None,
    ) -> None:
        # infer type
        if type is None:
            if default is not None:
                if isinstance(default, (list, tuple)) and len(default) > 0:
                    type = builtins.type(default[0])
                else:
                    type = builtins.type(default)
            elif choices is not None and len(choices) > 0:
                type = builtins.type(choices[0])

        if type is None:
            raise InferTypeError('failed to infer type ({} {})'.format(
                default, choices))

        # check type
        if choices is not None:
            for v in choices:
                if not isinstance(v, type):
                    raise ValueError(
                        'type of value {} in choices is not {}'.format(
                            default, type))

        # set nargs
        if nargs is None:
            if isinstance(default, (list, tuple)):
                nargs = '+'

        # check bool
        if type == bool and default != False:
            raise ValueError('bool type only supports store_true action.')

        self.type = type
        self.choices = choices
        self.required = required
        self.nargs = nargs
        self.help = help
        self.metavar = metavar
        self.set_default(default)
Ejemplo n.º 9
0
    def __init__(self,
                 option_strings,
                 dest,
                 nargs=None,
                 default=None,
                 type=None,
                 help=None,
                 metavar=None,
                 sort=False,
                 min=None,
                 max=None,
                 **kwargs):

        if nargs is None or builtins.type(nargs) != int:
            raise ValueError(
                'Invalid `nargs`: multiple arguments only allowed')
        self.array_size = nargs

        if builtins.type(default) is not list:
            raise TypeError(
                f'Invalid `default`: list of {type.__name__} values is required'
            )

        if type == None:
            type = builtins.type(default[0])

        default = list(map(type, default))

        if min != None and builtins.min(default) < min:
            raise ValueError(
                f'Invalid `default`: attemp to set value below `min`')
        if max != None and max < builtins.max(default):
            raise ValueError(
                f'Invalid `default`: attemp to set value over `max`')

        self.min = min
        self.max = max

        type_func = lambda x: list(map(type, x.split(',')))
        if sort == True:
            type_func = lambda x: sorted(list(map(type, x.split(','))))
        super(ArrayAction, self).__init__(option_strings,
                                          dest,
                                          nargs=1,
                                          default=default,
                                          type=type_func,
                                          help=help,
                                          metavar=metavar,
                                          **kwargs)
Ejemplo n.º 10
0
 def __str__(self):
     """Return a string that describes this exception."""
     return (super().__str__() +
             ": value {!r} is an instance of {} "
             "but should be an instance of: {}"
             .format(self.__value, type(self.__value).__qualname__,
                     ", ".join(type_names(self.__type))))
Ejemplo n.º 11
0
 def __init__(self,
              data: Union[list, dict],
              description: str = None,
              type: str = None):
     self.data = data
     self.type = type or builtins.type(data).__name__
     self.description = description or self.type
Ejemplo n.º 12
0
    def get(self, name, type='model'):
        '''
        Get an object from the pool

        :param name: the object name
        :param type: the type
        :return: the instance
        '''
        if type == '*':
            for type in self.classes.keys():
                if name in self._pool[self.database_name][type]:
                    break
        try:
            return self._pool[self.database_name][type][name]
        except KeyError:
            if type == 'report':
                from trytond.report import Report
                # Keyword argument 'type' conflicts with builtin function
                cls = builtins.type(name, (Report, ), {'__slots__': ()})
                cls.__setup__()
                cls.__post_setup__()
                self.add(cls, type)
                self.setup_mixin(self._modules, type='report', name=name)
                return self.get(name, type=type)
            raise
Ejemplo n.º 13
0
def _index(obj: object, /) -> int:
    """Losslessly convert an object to an integer object.

    If obj is an instance of int, return it directly. Otherwise call __index__()
    and require it be a direct instance of int (raising TypeError if it isn't).
    """
    # https://github.com/python/cpython/blob/v3.8.3/Objects/abstract.c#L1260-L1302
    if isinstance(obj, int):
        return obj

    length_type = builtins.type(obj)
    try:
        __index__ = _mro_getattr(length_type, "__index__")
    except AttributeError:
        msg = (
            f"{length_type!r} cannot be interpreted as an integer "
            "(must be either a subclass of 'int' or have an __index__() method)"
        )
        raise TypeError(msg)
    index = __index__(obj)
    # Returning a subclass of int is deprecated in CPython.
    if index.__class__ is int:
        return index
    else:
        raise TypeError(
            f"expected an 'int', not {builtins.type(index).__name__!r}")
Ejemplo n.º 14
0
def _is_true(obj: Any, /) -> bool:
    """Detect if the argument is "truthy"."""
    if obj is True:
        return True
    elif obj is False:
        return False
    elif obj is None:
        return False
    obj_type = builtins.type(obj)
    try:
        __bool__ = _mro_getattr(obj_type, "__bool__")
    except AttributeError:
        # Only try calling len() if it makes sense.
        try:
            __len__ = _mro_getattr(obj_type, "__len__")
        except AttributeError:
            # If all else fails...
            return True
        else:
            return True if len(obj) > 0 else False
    else:
        boolean = __bool__(obj)
        if isinstance(boolean, bool):
            # Coerce into True or False.
            return _is_true(boolean)
        else:
            raise TypeError(
                f"expected a 'bool' from {obj_type.__name__}.__bool__(), "
                f"not {builtins.type(boolean).__name__!r}")
Ejemplo n.º 15
0
    def __call__(
            self,
            name: _TYPE_FP_NAME = None,
            *,
            type: _TYPE_FP_TYPE = empty,
            converter: _TYPE_FP_CONVERTER = None,
            validator: _TYPE_FP_VALIDATOR = None,
            metadata: typing.Optional[_TYPE_FP_METADATA] = None
        ) -> 'VarKeyword':
        """
        A factory method which creates a new
        :class:`~forge._parameter.VarKeyword` instance.
        Convenient for use like::

            **kwargs(
                converter=lambda ctx, name, value:
                    {'_' + k: v for k, v in value.items()},
            )

        :param name: see :paramref:`~forge.FParameter.name`
        :param type: see :paramref:`~forge.FParameter.type`
        :param converter: see :paramref:`~forge.FParameter.converter`
        :param validator: see :paramref:`~forge.FParameter.validator`
        :param metadata: see :paramref:`~forge.FParameter.metadata`
        :returns: a new instance of :class:`~forge._parameter.VarKeyword`
        """
        # pylint: disable=W0622, redefined-builtin
        return builtins.type(self)(
            name=name,
            type=type,
            converter=converter,
            validator=validator,
            metadata=metadata,
        )
Ejemplo n.º 16
0
def _check_requirement(name : str, value, req : Union[str, Tuple[str]], type=None):
    if type is not None:
        if not isinstance(value, type):
            raise TypeError(type(value))
    def check(_req):
        search_res = VAR_NAMES_AND_EMPTY_PATTERN.findall(_req)
        if not search_res:
            expression = f'value{_req}'
        else:
            format_args = []
            format_kwargs = {}
            for _, arg_name in search_res:
                if arg_name == '':
                    format_args.append(value)
                elif arg_name == name:
                    format_kwargs[name] = value
                else:
                    raise argreq.errors.InvalidArgumentError(name, arg_name, _req)
            expression = _req.format(*format_args, **format_kwargs)            
        if not eval(expression):
            raise argreq.errors.NotMetRequirementError(_req, expression)
    if builtins.type(req) in [tuple, list]:
        for _req in req:
            check(_req)
    else:
        check(req)
Ejemplo n.º 17
0
    def _add_entry(self, name=None, type=None, token_object=None, eval=None):

        assert name, "name not provided for entry"
        assert type is not None, "type not specified for entry"
        assert isinstance(
            type, Type
        ), "type: '" + str(type) + "' is not of Type class, provided: " + str(
            (builtins.type(type)))

        if self.table.get(name):
            Errors(errorType='RedeclarationError',
                   errorText='variable already declared',
                   token_object=token_object)
            return None

        self.table[name] = Entry(name=name,
                                 type=type,
                                 symbol_table=self,
                                 token_object=token_object,
                                 eval=eval)

        if type.class_type != "FunctionType":
            #update offset
            self.table[name].offset = self._update_offset(self.table[name])

            #update width
            self._update_width(self.table[name])

        return self.table[name]
Ejemplo n.º 18
0
 def __init__(
     self,
     option_strings,
     dest,
     default=None,
     type=None,  # pylint: disable=redefined-builtin
     required=False,
     help=None,  # pylint: disable=redefined-builtin
     metavar=None,
 ):
     if type is None:
         type = builtins.type(ASSERT.not_none(default))
     # Do NOT assign ``type`` field here for the same reason above.
     super().__init__(
         option_strings=option_strings,
         dest=dest,
         default=default,
         choices=list(map(self.__format, type)),
         required=required,
         help=help,
         metavar=metavar,
     )
     # Create ``default_string`` for the same reason above.
     if default is not None:
         self.default_string = self.__format(default)
     else:
         self.default_string = None
     self.__type = type
Ejemplo n.º 19
0
def littleendian(ptype):
    '''Will convert an pfloat_t to littleendian form'''
    if not issubclass(ptype, type) or ptype is type:
        raise error.TypeError(ptype, 'littleendian')
    res = dict(ptype.__dict__)
    res['byteorder'] = config.byteorder.littleendian
    return builtins.type(ptype.__name__, ptype.__bases__, res)
Ejemplo n.º 20
0
def replace_pydantic_types(type_: Any):
    origin = getattr(type_, "__origin__", None)
    if origin is Literal:
        # Literal does not have types in its __args__ so we return early
        return type_
    if hasattr(type_, "__args__"):
        replaced_type = type_.copy_with(
            tuple(replace_pydantic_types(t) for t in type_.__args__)
        )

        if isinstance(replaced_type, TypeDefinition):
            # TODO: Not sure if this is necessary. No coverage in tests
            # TODO: Unnecessary with StrawberryObject

            replaced_type = builtins.type(
                replaced_type.name,
                (),
                {"_type_definition": replaced_type},
            )

        return replaced_type

    if issubclass(type_, BaseModel):
        if hasattr(type_, "_strawberry_type"):
            return type_._strawberry_type
        else:
            raise UnregisteredTypeException(type_)

    return type_
Ejemplo n.º 21
0
 def __ror__(self, instance):  # instance | type
     if not isinstance(
             t := instance._t if hasattr(instance, '_t') else
             builtins.type(instance), BTIntersection):
         raise TypeError(
             f'Can only subtract a type from an intersection but LHS type is {t}'
         )
Ejemplo n.º 22
0
 def form_valid(self, form):
     current_user = self.request.user
     if current_user.is_authenticated:
         form.instance.author = current_user
         return super(type(self), self).form_valid(form)
     else:
         return redirect('/blog/')
Ejemplo n.º 23
0
def getattr(obj: object, attr: str, default: Any = _NOTHING, /) -> Any:
    """Implement attribute access via  __getattribute__ and __getattr__."""
    # Python/bltinmodule.c:builtin_getattr
    if not isinstance(attr, str):
        raise TypeError("attribute name must be a 'str'")

    obj_type = builtins.type(obj)
    attr_exc = _NOTHING
    getattribute = _mro_getattr(obj_type, "__getattribute__")
    try:
        return getattribute(obj, attr)
    except AttributeError as exc:
        attr_exc = exc
    # Objects/typeobject.c:slot_tp_getattr_hook
    # It is cheating to do this here as CPython actually rebinds the tp_getattro
    # slot with a wrapper that handles __getattr__() when present.
    try:
        getattr_ = _mro_getattr(obj_type, "__getattr__")
    except AttributeError:
        pass
    else:
        return getattr_(obj, attr)

    if default is not _NOTHING:
        return default
    else:
        raise attr_exc
Ejemplo n.º 24
0
def try_to_push_complex_type(val):
    import xarray as xr
    if isinstance(val, xr.core.dataset.Dataset):
        push_xarray_dataset(val)
    else:
        raise TypeError('Cannot push this type of argument to Metview: ',
                        builtins.type(val))
Ejemplo n.º 25
0
    def getNeighbors(self, id, depth=1, blankNodes=False, relationshipType=None, direction='BOTH', project='*', callback=None, output='application/json'):
        """ Get neighbors from: /graph/neighbors/{id}

            Arguments:
            id: This ID should be either a CURIE or an IRI
            depth: How far to traverse neighbors
            blankNodes: Traverse blank nodes
            relationshipType: Which relationship to traverse
            direction: Which direction to traverse: INCOMING, OUTGOING, BOTH (default). Only used if relationshipType is specified.
            project: Which properties to project. Defaults to '*'.
            callback: Name of the JSONP callback ('fn' by default). Supplying this parameter or
            requesting a javascript media type will cause a JSONP response to be
            rendered.
            outputs:
                application/json
                application/graphson
                application/xml
                application/graphml+xml
                application/xgmml
                text/gml
                text/csv
                text/tab-separated-values
                image/jpeg
                image/png
        """

        if id and id.startswith('http:'):
            id = id.replace('/','%2F').replace('#','%23')
        kwargs = {'id':id, 'depth':depth, 'blankNodes':blankNodes, 'relationshipType':relationshipType, 'direction':direction, 'project':project, 'callback':callback}
        kwargs = {k:dumps(v) if builtins.type(v) is dict else v for k, v in kwargs.items()}
        param_rest = self._make_rest('id', **kwargs)
        url = self._basePath + ('/graph/neighbors/{id}').format(**kwargs)
        requests_params = {k:v for k, v in kwargs.items() if k != 'id'}
        output = self._get('GET', url, requests_params, output)
        return output if output else {'nodes':[], 'edges':[]}
Ejemplo n.º 26
0
    def annotatePost(self, content, includeCat=None, excludeCat=None, minLength=4, longestOnly=False, includeAbbrev=False, includeAcronym=False, includeNumbers=False, ignoreTag=None, stylesheet=None, scripts=None, targetId=None, targetClass=None):
        """ Annotate text from: /annotations

            Arguments:
            content: The content to annotate
            includeCat: A set of categories to include
            excludeCat: A set of categories to exclude
            minLength: The minimum number of characters in annotated entities
            longestOnly: Should only the longest entity be returned for an overlapping group
            includeAbbrev: Should abbreviations be included
            includeAcronym: Should acronyms be included
            includeNumbers: Should numbers be included
            ignoreTag: HTML tags that should not be annotated
            stylesheet: CSS stylesheets to add to the HEAD
            scripts: JavaScripts that should to add to the HEAD
            targetId: A set of element IDs to annotate
            targetClass: A set of CSS class names to annotate
        """

        kwargs = {'content':content, 'includeCat':includeCat, 'excludeCat':excludeCat, 'minLength':minLength, 'longestOnly':longestOnly, 'includeAbbrev':includeAbbrev, 'includeAcronym':includeAcronym, 'includeNumbers':includeNumbers, 'ignoreTag':ignoreTag, 'stylesheet':stylesheet, 'scripts':scripts, 'targetId':targetId, 'targetClass':targetClass}
        kwargs = {k:dumps(v) if builtins.type(v) is dict else v for k, v in kwargs.items()}
        param_rest = self._make_rest(None, **kwargs)
        url = self._basePath + ('/annotations').format(**kwargs)
        requests_params = kwargs
        output = self._get('POST', url, requests_params)
        return output if output else None
Ejemplo n.º 27
0
    def getEdges(self, type, entail=True, limit=100, skip=0, callback=None, output='application/json'):
        """ Get nodes connected by an edge type from: /graph/edges/{type}

            Arguments:
            type: The type of the edge
            entail: Should subproperties and equivalent properties be included
            limit: The number of edges to be returned
            skip: The number of edges to skip
            callback: Name of the JSONP callback ('fn' by default). Supplying this parameter or
            requesting a javascript media type will cause a JSONP response to be
            rendered.
            outputs:
                application/json
                application/graphson
                application/xml
                application/graphml+xml
                application/xgmml
                text/gml
                text/csv
                text/tab-separated-values
                image/jpeg
                image/png
        """

        if type and type.startswith('http:'):
            type = type.replace('/','%2F').replace('#','%23')
        kwargs = {'type':type, 'entail':entail, 'limit':limit, 'skip':skip, 'callback':callback}
        kwargs = {k:dumps(v) if builtins.type(v) is dict else v for k, v in kwargs.items()}
        param_rest = self._make_rest('type', **kwargs)
        url = self._basePath + ('/graph/edges/{type}').format(**kwargs)
        requests_params = {k:v for k, v in kwargs.items() if k != 'type'}
        output = self._get('GET', url, requests_params, output)
        return output if output else {'nodes':[], 'edges':[]}
Ejemplo n.º 28
0
    def suggestFromTerm(self, query=None, queries=None, callback=None):
        """ Reconcile terms from: /refine/reconcile

            Arguments:
            query: A call to a reconciliation service API
            for a single query looks like either
            of these:<ul><li>http://foo.com/bar/reconcile?query=...string...</li><li>http://foo.com/bar/reconcile?query={...json object literal...}</li></ul>If the query parameter
            is a string, then it's an abbreviation
            of <em>query={"query":...string...}</em>.<em>NOTE:</em> We encourage all API consumers
            to consider the single query mode <b>DEPRECATED</b>.Refine
            currently only uses the multiple query mode,
            but other consumers of the API may
            use the single query option since it
            was included in the spec.
            queries: A call to a standard reconciliation service API
            for multiple queries looks like this:<ul><li>http://foo.com/bar/reconcile?queries={...json object literal...}</li></ul>The
            json object literal has zero or more key/value
            pairs with arbitrary keys where the value is
            in the same format as a single query,
            e.g.<ul><li>http://foo.com/bar/reconcile?queries={ "q0" : { "query" : "foo" },
            "q1" : { "query" : "bar" } }</li></ul>"q0"
            and "q1" can be arbitrary strings.
            callback: Name of the JSONP callback ('fn' by default). Supplying this parameter or
            requesting a javascript media type will cause a JSONP response to be
            rendered.
        """

        kwargs = {'query':query, 'queries':queries, 'callback':callback}
        kwargs = {k:dumps(v) if builtins.type(v) is dict else v for k, v in kwargs.items()}
        param_rest = self._make_rest(None, **kwargs)
        url = self._basePath + ('/refine/reconcile').format(**kwargs)
        requests_params = kwargs
        output = self._get('GET', url, requests_params)
        return output if output else None
Ejemplo n.º 29
0
def dataset_to_fieldset(val, **kwarg):
    # we try to import xarray as locally as possible to reduce startup time
    # try to write the xarray as a GRIB file, then read into a fieldset
    import xarray as xr
    import cfgrib

    if not isinstance(val, xr.core.dataset.Dataset):
        raise TypeError(
            'dataset_to_fieldset requires a variable of type xr.core.dataset.Dataset;'
            ' was supplied with ', builtins.type(val))

    f, tmp = tempfile.mkstemp(".grib")
    os.close(f)

    try:
        # could add keys, e.g. grib_keys={'centre': 'ecmf'})
        cfgrib.to_grib(val, tmp, **kwarg)
    except:
        print(
            "Error trying to write xarray dataset to GRIB for conversion to Metview Fieldset"
        )
        raise

    # TODO: tell Metview that this is a temporary file that should be deleted when no longer needed
    fs = read(tmp)
    return fs
Ejemplo n.º 30
0
    def __call__(
        cls,
        value,
        names=None,
        *,
        module=None,
        qualname=None,
        type=None,
        start=1,
    ):
        """set the item value case to lowercase for value lookup"""
        # simple value lookup
        if names is None:
            if isinstance(value, str):
                return super().__call__(value.lower())
            elif isinstance(value, cls):
                return value
            else:
                raise ValueError(
                    trans._(
                        '{class_name} may only be called with a `str` or an instance of {class_name}. Got {dtype}',
                        deferred=True,
                        class_name=cls,
                        dtype=builtins.type(value),
                    ))

        # otherwise create new Enum class
        return cls._create_(
            value,
            names,
            module=module,
            qualname=qualname,
            type=type,
            start=start,
        )
Ejemplo n.º 31
0
    def getNode(self, id, project='*', callback=None, output='application/json'):
        """ Get all properties of a node from: /graph/{id}

            Arguments:
            id: This ID should be either a CURIE or an IRI
            project: Which properties to project. Defaults to '*'.
            callback: Name of the JSONP callback ('fn' by default). Supplying this parameter or
            requesting a javascript media type will cause a JSONP response to be
            rendered.
            outputs:
                application/json
                application/graphson
                application/xml
                application/graphml+xml
                application/xgmml
                text/gml
                text/csv
                text/tab-separated-values
                image/jpeg
                image/png
        """

        if id and id.startswith('http:'):
            id = id.replace('/','%2F').replace('#','%23')
        kwargs = {'id':id, 'project':project, 'callback':callback}
        kwargs = {k:dumps(v) if builtins.type(v) is dict else v for k, v in kwargs.items()}
        param_rest = self._make_rest('id', **kwargs)
        url = self._basePath + ('/graph/{id}').format(**kwargs)
        requests_params = {k:v for k, v in kwargs.items() if k != 'id'}
        output = self._get('GET', url, requests_params, output)
        return output if output else {'nodes':[], 'edges':[]}
Ejemplo n.º 32
0
    def annotateUrl(self, url, includeCat=None, excludeCat=None, minLength=4, longestOnly=False, includeAbbrev=False, includeAcronym=False, includeNumbers=False, ignoreTag=None, stylesheet=None, scripts=None, targetId=None, targetClass=None, output='text/html'):
        """ Annotate a URL from: /annotations/url

            Arguments:
            url: 
            includeCat: A set of categories to include
            excludeCat: A set of categories to exclude
            minLength: The minimum number of characters in annotated entities
            longestOnly: Should only the longest entity be returned for an overlapping group
            includeAbbrev: Should abbreviations be included
            includeAcronym: Should acronyms be included
            includeNumbers: Should numbers be included
            ignoreTag: HTML tags that should not be annotated
            stylesheet: CSS stylesheets to add to the HEAD
            scripts: JavaScripts that should to add to the HEAD
            targetId: A set of element IDs to annotate
            targetClass: A set of CSS class names to annotate
            outputs:
                text/html
        """

        if url and url.startswith('http:'):
            url = url.replace('/','%2F').replace('#','%23')
        kwargs = {'url':url, 'includeCat':includeCat, 'excludeCat':excludeCat, 'minLength':minLength, 'longestOnly':longestOnly, 'includeAbbrev':includeAbbrev, 'includeAcronym':includeAcronym, 'includeNumbers':includeNumbers, 'ignoreTag':ignoreTag, 'stylesheet':stylesheet, 'scripts':scripts, 'targetId':targetId, 'targetClass':targetClass}
        kwargs = {k:dumps(v) if builtins.type(v) is dict else v for k, v in kwargs.items()}
        param_rest = self._make_rest(None, **kwargs)
        url = self._basePath + ('/annotations/url').format(**kwargs)
        requests_params = kwargs
        output = self._get('GET', url, requests_params, output)
        return output if output else None
Ejemplo n.º 33
0
  def __init__(self, option_strings, dest, nargs,
    default=None, type=None, choices=None, required=False, help=None,
    metavar=None
  ):
    assert builtins.type(nargs) is range and len(nargs) > 1
    assert nargs[0] >= 0 and nargs[1] > nargs[0]
    self.nargs_range = nargs

    nargs = argparse.ZERO_OR_MORE if self.nargs_range[0] == 0 else argparse.ONE_OR_MORE
    super().__init__(option_strings, dest, nargs, None, default,
      type, choices, required, help, metavar)
Ejemplo n.º 34
0
    def getEntities(self, text):
        """ Extract entities from text. from: /lexical/entities

            Arguments:
            text: The text from which to extract entities
        """

        kwargs = {'text':text}
        kwargs = {k:dumps(v) if builtins.type(v) is dict else v for k, v in kwargs.items()}
        param_rest = self._make_rest(None, **kwargs)
        url = self._basePath + ('/lexical/entities').format(**kwargs)
        requests_params = kwargs
        output = self._get('GET', url, requests_params)
        return output if output else []
Ejemplo n.º 35
0
    def getSentences(self, text):
        """ Split text into sentences. from: /lexical/sentences

            Arguments:
            text: The text to split
        """

        kwargs = {'text':text}
        kwargs = {k:dumps(v) if builtins.type(v) is dict else v for k, v in kwargs.items()}
        param_rest = self._make_rest(None, **kwargs)
        url = self._basePath + ('/lexical/sentences').format(**kwargs)
        requests_params = kwargs
        output = self._get('GET', url, requests_params)
        return output if output else []
Ejemplo n.º 36
0
    def getCuriePrefixes(self):
        """ Get all CURIE prefixes from: /vocabulary/prefixes

            Arguments:

        """

        kwargs = {}
        kwargs = {k:dumps(v) if builtins.type(v) is dict else v for k, v in kwargs.items()}
        param_rest = self._make_rest(None, **kwargs)
        url = self._basePath + ('/vocabulary/prefixes').format(**kwargs)
        requests_params = kwargs
        output = self._get('GET', url, requests_params)
        return output if output else []
Ejemplo n.º 37
0
    def getPos(self, text):
        """ Tag parts of speech. from: /lexical/pos

            Arguments:
            text: The text to tag
        """

        kwargs = {'text':text}
        kwargs = {k:dumps(v) if builtins.type(v) is dict else v for k, v in kwargs.items()}
        param_rest = self._make_rest(None, **kwargs)
        url = self._basePath + ('/lexical/pos').format(**kwargs)
        requests_params = kwargs
        output = self._get('GET', url, requests_params)
        return output if output else []
Ejemplo n.º 38
0
    def suggestFromTerm(self, term, limit=1):
        """ Suggest terms from: /vocabulary/suggestions/{term}

            Arguments:
            term: Mispelled term
            limit: Maximum result count
        """

        kwargs = {'term':term, 'limit':limit}
        kwargs = {k:dumps(v) if builtins.type(v) is dict else v for k, v in kwargs.items()}
        param_rest = self._make_rest('term', **kwargs)
        url = self._basePath + ('/vocabulary/suggestions/{term}').format(**kwargs)
        requests_params = {k:v for k, v in kwargs.items() if k != 'term'}
        output = self._get('GET', url, requests_params)
        return output if output else []
Ejemplo n.º 39
0
    def findById(self, id):
        """ Find a concept by its ID from: /vocabulary/id/{id}

            Arguments:
            id: ID to find
        """

        if id and id.startswith('http:'):
            id = id.replace('/','%2F').replace('#','%23')
        kwargs = {'id':id}
        kwargs = {k:dumps(v) if builtins.type(v) is dict else v for k, v in kwargs.items()}
        param_rest = self._make_rest('id', **kwargs)
        url = self._basePath + ('/vocabulary/id/{id}').format(**kwargs)
        requests_params = {k:v for k, v in kwargs.items() if k != 'id'}
        output = self._get('GET', url, requests_params)
        return output if output else None
Ejemplo n.º 40
0
    def resolve(self, cypherQuery, output='text/plain'):
        """ Cypher query resolver from: /cypher/resolve

            Arguments:
            cypherQuery: The cypher query to resolve
            outputs:
                text/plain
        """

        kwargs = {'cypherQuery':cypherQuery}
        kwargs = {k:dumps(v) if builtins.type(v) is dict else v for k, v in kwargs.items()}
        param_rest = self._make_rest(None, **kwargs)
        url = self._basePath + ('/cypher/resolve').format(**kwargs)
        requests_params = kwargs
        output = self._get('GET', url, requests_params, output)
        return output if output else None
Ejemplo n.º 41
0
    def execute(self, cypherQuery, limit, output='text/plain'):
        """ Execute an arbitrary Cypher query. from: /cypher/execute

            Arguments:
            cypherQuery: The cypher query to execute
            limit: Limit
            outputs:
                text/plain
                application/json
        """

        kwargs = {'cypherQuery':cypherQuery, 'limit':limit}
        kwargs = {k:dumps(v) if builtins.type(v) is dict else v for k, v in kwargs.items()}
        param_rest = self._make_rest(None, **kwargs)
        url = self._basePath + ('/cypher/execute').format(**kwargs)
        requests_params = kwargs
        output = self._get('GET', url, requests_params, output)
        return output if output else None
Ejemplo n.º 42
0
    def getRelationships(self, callback=None, output='application/json'):
        """ Get all relationship types from: /graph/relationship_types

            Arguments:
            callback: Name of the JSONP callback ('fn' by default). Supplying this parameter or
            requesting a javascript media type will cause a JSONP response to be
            rendered.
            outputs:
                application/json
        """

        kwargs = {'callback':callback}
        kwargs = {k:dumps(v) if builtins.type(v) is dict else v for k, v in kwargs.items()}
        param_rest = self._make_rest(None, **kwargs)
        url = self._basePath + ('/graph/relationship_types').format(**kwargs)
        requests_params = kwargs
        output = self._get('GET', url, requests_params, output)
        return output if output else []
Ejemplo n.º 43
0
def plus(o1,o2):
    if o1 is None or o2 is None:
      return None
    if builtins.type(o1) is list and builtins.type(o2) is list:
      return o1 + o2
    if builtins.type(o1) is list:
      return o1 + [o2]
    if builtins.type(o2) is list:
      return [o1] + o2
    if builtins.type(o1) is str:
      return o1 + str(o2)
    if builtins.type(o2) is str:
      return str(o1) + o2
    return o1 + o2
Ejemplo n.º 44
0
    def findByTerm(self, term, limit=20, searchSynonyms=True, searchAbbreviations=False, searchAcronyms=False, category=None, prefix=None):
        """ Find a concept from a term from: /vocabulary/term/{term}

            Arguments:
            term: Term to find
            limit: Maximum result count
            searchSynonyms: Should synonyms be matched
            searchAbbreviations: Should abbreviations be matched
            searchAcronyms: Should acronyms be matched
            category: Categories to search (defaults to all)
            prefix: CURIE prefixes to search (defaults to all)
        """

        kwargs = {'term':term, 'limit':limit, 'searchSynonyms':searchSynonyms, 'searchAbbreviations':searchAbbreviations, 'searchAcronyms':searchAcronyms, 'category':category, 'prefix':prefix}
        kwargs = {k:dumps(v) if builtins.type(v) is dict else v for k, v in kwargs.items()}
        param_rest = self._make_rest('term', **kwargs)
        url = self._basePath + ('/vocabulary/term/{term}').format(**kwargs)
        requests_params = {k:v for k, v in kwargs.items() if k != 'term'}
        output = self._get('GET', url, requests_params)
        return output if output else []
Ejemplo n.º 45
0
    def enrich(self, sample, ontologyClass, path, callback=None, output='application/json'):
        """ Class Enrichment Service from: /analyzer/enrichment

            Arguments:
            sample: A list of CURIEs for nodes whose attributes are to be tested for enrichment. For example, a list of genes.
            ontologyClass: CURIE for parent ontology class for the attribute to be tested. For example, GO biological process
            path: A path expression that connects sample nodes to attribute class nodes
            callback: Name of the JSONP callback ('fn' by default). Supplying this parameter or
            requesting a javascript media type will cause a JSONP response to be
            rendered.
            outputs:
                application/json
        """

        kwargs = {'sample':sample, 'ontologyClass':ontologyClass, 'path':path, 'callback':callback}
        kwargs = {k:dumps(v) if builtins.type(v) is dict else v for k, v in kwargs.items()}
        param_rest = self._make_rest(None, **kwargs)
        url = self._basePath + ('/analyzer/enrichment').format(**kwargs)
        requests_params = kwargs
        output = self._get('GET', url, requests_params, output)
        return output if output else None
Ejemplo n.º 46
0
    def postEntities(self, content, includeCat=None, excludeCat=None, minLength=4, longestOnly=False, includeAbbrev=False, includeAcronym=False, includeNumbers=False):
        """ Get entities from text from: /annotations/entities

            Arguments:
            content: The content to annotate
            includeCat: A set of categories to include
            excludeCat: A set of categories to exclude
            minLength: The minimum number of characters in annotated entities
            longestOnly: Should only the longest entity be returned for an overlapping group
            includeAbbrev: Should abbreviations be included
            includeAcronym: Should acronyms be included
            includeNumbers: Should numbers be included
        """

        kwargs = {'content':content, 'includeCat':includeCat, 'excludeCat':excludeCat, 'minLength':minLength, 'longestOnly':longestOnly, 'includeAbbrev':includeAbbrev, 'includeAcronym':includeAcronym, 'includeNumbers':includeNumbers}
        kwargs = {k:dumps(v) if builtins.type(v) is dict else v for k, v in kwargs.items()}
        param_rest = self._make_rest(None, **kwargs)
        url = self._basePath + ('/annotations/entities').format(**kwargs)
        requests_params = kwargs
        output = self._get('POST', url, requests_params)
        return output if output else []
Ejemplo n.º 47
0
 def __call__(self,payload,canonicalIndex,extraIndex=None,entry=None):
   payload_ = self.mtype(payload)
   type = None if entry is None else entry.type
   if canonicalIndex in self.struct.map:
     i = performExtraIndex(self.struct.map[canonicalIndex],extraIndex=extraIndex,entry=entry)
     try:
       if type is None:
         self.master[i] = payload_
       elif type=="symm":
         iflip = performExtraIndex(self.struct.map[canonicalIndex],extraIndex=extraIndex,entry=entry,flip=True)
         if payload_.is_scalar():
           self.master[i] = payload_
           self.master[iflip] = payload_
         else:
           oi = performExtraIndex(DM.ones(entry.originalsparsity),extraIndex=extraIndex,entry=entry)
           if oi.sparsity()!=payload_.sparsity():
             raise Exception("Payload sparsity " + payload_.dim() +  " does not match lhs sparisty " + oi.dim() + "." )
           self.master[iflip] = payload_.T[iflip.sparsity()]
           self.master[i] = payload_[i.sparsity()]
       else:
         raise Exception("Cannot handle type '%s'." % entry.type)
     except NotImplementedError as e:
       raise CompatibilityException("Error in canonicalIndex slicing for %s: Incompatible types in a[i]=b with a %s (%s) and b %s (%s) and i %s (%s). Error: %s" % (str(canonicalIndex),str(self.master),str(builtins.type(self.master)),str(payload),str(builtins.type(payload)),str(i),str(builtins.type(i)),str(e)))
     except Exception as e:
       raise Exception("Error in powerIndex slicing for canonicalIndex %s" % (str(canonicalIndex))) from e
   else:
     raise Exception("Canonical index %s does not exist." % str(canonicalIndex))
Ejemplo n.º 48
0
    def __init__(
        self,
        required_i32_field,
        required_string_field,
        binary_field=None,
        bool_field=None,
        byte_field=None,
        date_time_field=None,
        decimal_field=None,
        email_address_field=None,
        enum_field=None,
        i16_field=None,
        i32_field=None,
        i64_field=None,
        string_field=None,
        string_list_field=None,
        string_set_field=None,
        string_string_map_field=None,
        url_field=None,
    ):
        '''
        :type required_i32_field: int
        :type required_string_field: str
        :type binary_field: str or None
        :type bool_field: bool or None
        :type byte_field: int or None
        :type date_time_field: datetime.datetime or None
        :type decimal_field: Decimal or None
        :type email_address_field: str or None
        :type enum_field: thryft_test.protocol.test.protocol_test_enum.ProtocolTestEnum or None
        :type i16_field: int or None
        :type i32_field: int or None
        :type i64_field: int or long or None
        :type string_field: str or None
        :type string_list_field: tuple(str) or None
        :type string_set_field: frozenset(str) or None
        :type string_string_map_field: dict(str: str) or None
        :type url_field: str or None
        '''

        if required_i32_field is None:
            raise ValueError('required_i32_field is required')
        if not isinstance(required_i32_field, int):
            raise TypeError("expected required_i32_field to be a int but it is a %s" % builtins.type(required_i32_field))
        self.__required_i32_field = required_i32_field

        if required_string_field is None:
            raise ValueError('required_string_field is required')
        if not isinstance(required_string_field, str):
            raise TypeError("expected required_string_field to be a str but it is a %s" % builtins.type(required_string_field))
        if len(required_string_field) < 1:
            raise ValueError("expected len(required_string_field) to be >= 1, was %d" % len(required_string_field))
        self.__required_string_field = required_string_field

        if binary_field is not None:
            if not isinstance(binary_field, str):
                raise TypeError("expected binary_field to be a str but it is a %s" % builtins.type(binary_field))
        self.__binary_field = binary_field

        if bool_field is not None:
            if not isinstance(bool_field, bool):
                raise TypeError("expected bool_field to be a bool but it is a %s" % builtins.type(bool_field))
        self.__bool_field = bool_field

        if byte_field is not None:
            if not isinstance(byte_field, int):
                raise TypeError("expected byte_field to be a int but it is a %s" % builtins.type(byte_field))
        self.__byte_field = byte_field

        if date_time_field is not None:
            if not isinstance(date_time_field, datetime.datetime):
                raise TypeError("expected date_time_field to be a datetime.datetime but it is a %s" % builtins.type(date_time_field))
        self.__date_time_field = date_time_field

        if decimal_field is not None:
            if not isinstance(decimal_field, decimal.Decimal):
                raise TypeError("expected decimal_field to be a Decimal but it is a %s" % builtins.type(decimal_field))
        self.__decimal_field = decimal_field

        if email_address_field is not None:
            if not isinstance(email_address_field, str):
                raise TypeError("expected email_address_field to be a str but it is a %s" % builtins.type(email_address_field))
        self.__email_address_field = email_address_field

        if enum_field is not None:
            if not isinstance(enum_field, thryft_test.protocol.test.protocol_test_enum.ProtocolTestEnum):
                raise TypeError("expected enum_field to be a thryft_test.protocol.test.protocol_test_enum.ProtocolTestEnum but it is a %s" % builtins.type(enum_field))
        self.__enum_field = enum_field

        if i16_field is not None:
            if not isinstance(i16_field, int):
                raise TypeError("expected i16_field to be a int but it is a %s" % builtins.type(i16_field))
        self.__i16_field = i16_field

        if i32_field is not None:
            if not isinstance(i32_field, int):
                raise TypeError("expected i32_field to be a int but it is a %s" % builtins.type(i32_field))
        self.__i32_field = i32_field

        if i64_field is not None:
            if not isinstance(i64_field, (int, long)):
                raise TypeError("expected i64_field to be a int or long but it is a %s" % builtins.type(i64_field))
        self.__i64_field = i64_field

        if string_field is not None:
            if not isinstance(string_field, str):
                raise TypeError("expected string_field to be a str but it is a %s" % builtins.type(string_field))
            if len(string_field) < 1:
                raise ValueError("expected len(string_field) to be >= 1, was %d" % len(string_field))
        self.__string_field = string_field

        if string_list_field is not None:
            if not (isinstance(string_list_field, tuple) and len(list(filterfalse(lambda _: isinstance(_, str), string_list_field))) == 0):
                raise TypeError("expected string_list_field to be a tuple(str) but it is a %s" % builtins.type(string_list_field))
        self.__string_list_field = string_list_field

        if string_set_field is not None:
            if not (isinstance(string_set_field, frozenset) and len(list(filterfalse(lambda _: isinstance(_, str), string_set_field))) == 0):
                raise TypeError("expected string_set_field to be a frozenset(str) but it is a %s" % builtins.type(string_set_field))
        self.__string_set_field = string_set_field

        if string_string_map_field is not None:
            if not (isinstance(string_string_map_field, dict) and len(list(filterfalse(lambda __item: isinstance(__item[0], str) and isinstance(__item[1], str), string_string_map_field.items()))) == 0):
                raise TypeError("expected string_string_map_field to be a dict(str: str) but it is a %s" % builtins.type(string_string_map_field))
        self.__string_string_map_field = string_string_map_field.copy() if string_string_map_field is not None else None

        if url_field is not None:
            if not isinstance(url_field, str):
                raise TypeError("expected url_field to be a str but it is a %s" % builtins.type(url_field))
        self.__url_field = url_field
Ejemplo n.º 49
0
def size(o):
    if o is None:
      return None
    if builtins.type(o) is list or builtins.type(o) is str:
      return len(o)
    return None
Ejemplo n.º 50
0
def nlp_coptic(input_data, lb=False, parse_only=False, do_tok=True, do_norm=True, do_mwe=True, do_tag=True, do_lemma=True, do_lang=True,
			   do_milestone=True, do_parse=True, sgml_mode="sgml", tok_mode="auto", old_tokenizer=False, sent_tag=None,
			   preloaded=None, pos_spans=False, merge_parse=False, detokenize=0):

	data = input_data.replace("\t","")
	data = data.replace("\r","")

	if preloaded is not None:
		stk = preloaded
	else:
		stk = StackedTokenizer(pipes=sgml_mode != "sgml", lines=lb, tokenized=tok_mode=="from_pipes",
							   detok=detokenize, segment_merged=opts.segment_merged)

	if do_milestone:
		data = binarize(data)

	if do_tok:
		if old_tokenizer:
			tokenize = ['perl', lib_dir + 'tokenize_coptic.pl', '-n']
			if lb:
				tokenize.append('-l')
			if sgml_mode == "pipes":
				tokenize.append('-p')
			if tok_mode == "from_pipes":
				tokenize.append('-t')
			tokenize += ['-d', data_dir + 'copt_lex.tab', '-s', data_dir + 'segmentation_table.tab', '-m', data_dir + 'morph_table.tab', 'tempfilename']
			tokenized = exec_via_temp(data,tokenize)
			tokenized = tokenized.replace('\r','').strip()
			tokenized = re.sub(r'_$','',tokenized)
		else:
			tokenized = stk.analyze(data)

		if not lb and sgml_mode == "pipes":
			tokenized = tokenized.replace("\n","")
		if sgml_mode == "pipes":
			return tokenized
	else:
		tokenized = data
		if sgml_mode == "sgml" and "norm=" not in tokenized:
			# Assume raw one token per line, wrap everything in norm tags
			tok_lines = []
			for line in tokenized.split("\n"):
				if not line.startswith("<"):  # Leave XML tags alone
					line = '<norm_group norm_group="' + line + '">\n<norm norm="'+ line +'">\n' + line + '\n</norm>\n</norm_group>'
				tok_lines.append(line)
			tokenized = "\n".join(tok_lines)

	tokenized = tokenized.replace('\r','').strip()
	output = tokenized
	norms = read_attributes(tokenized,"norm")

	if do_norm:
		from lib.auto_norm import normalize
		norms = normalize(norms,table_file=data_dir + "norm_table.tab")
		output = inject("norm", norms, "norm", output)

	if parse_only or merge_parse:
		if not do_tag and (parse_only or merge_parse):
			if not "\t" in input_data and not 'pos="' in input_data:
				sys.stderr.write("! You selected parsing without tagging (-t) and your data format appears to contain no POS tag column.\n")
				resp = inp("! Would you like to add POS tagging to the job profile? [Y]es/[N]o/[A]bort ")
				if resp.lower() == "y":
					do_tag = True
				elif resp.lower() == "a":
					sys.exit(0)
		if do_tag:
			tag = [tt_path+'tree-tagger', tt_path+'coptic_fine.par', '-token','-lemma','-no-unknown', '-sgml' ,'tempfilename'] #no -token
			tagged = exec_via_temp(norms,tag)
			tagged = re.sub('\r','',tagged)
		else:  # Assume data is already tagged, in TT SGML format
			if pos_spans:
				tagged = harvest_tt(input_data, keep_sgml=True)
			else:
				tagged = input_data
				if PY3:
					tagged = input_data.encode("utf8")  # Handle non-UTF-8 when calling TT from subprocess in Python 3
		conllized = conllize(tagged,tag="PUNCT",element=sent_tag, no_zero=True)  # NB element is present it supercedes the POS tag
		deped = DepEdit(io.open(data_dir + "add_ud_and_flat_morph.ini",encoding="utf8"),options=type('', (), {"quiet":True})())
		depedited = deped.run_depedit(conllized.split("\n"))
		parse_coptic = ['java','-mx512m','-jar',"maltparser-1.8.jar",'-c','coptic','-i','tempfilename','-m','parse']
		parsed = exec_via_temp(depedited,parse_coptic,parser_path)
		deped = DepEdit(io.open(data_dir + "parser_postprocess_nodom.ini",encoding="utf8"),options=type('', (), {"quiet":True})())
		depedited = deped.run_depedit(parsed.split("\n"))
		if parse_only:  # Output parse in conll format
			return depedited
		elif merge_parse:  # Insert parse into input SGML as attributes of <norm>
			if "norm=" not in input_data:
				sys.stderr.write('ERR: --merge_parse was selected but no <norm norm=".."> tags found in input\n')
				sys.exit(0)
			if sgml_mode == "conllu":
				return depedited
			ids, funcs, parents = extract_conll(depedited.strip())
			output = inject("xml:id", ids, "norm", input_data)
			output = inject("func", funcs, "norm", output)
			output = inject("head", parents, "norm", output)
			output = output.replace(' head="#u0"', "")
			output = merge_into_tag("pos", "norm", output)
			output = merge_into_tag("lemma", "norm", output)
			return output

	elif not do_parse:
		tag = [tt_path + 'tree-tagger', tt_path+'coptic_fine.par', '-lemma','-no-unknown', '-sgml' ,'tempfilename'] #no -token
		tagged = exec_via_temp(norms,tag)
		tagged = re.sub('\r','',tagged)
	if do_parse:
		tag = [tt_path + 'tree-tagger', tt_path+'coptic_fine.par', '-token','-lemma','-no-unknown', '-sgml' ,'tempfilename'] #no -token
		if sent_tag is None:
			tagged = exec_via_temp(norms,tag)
		else:
			norm_with_sgml = tok_from_norm(output)
			tagged = exec_via_temp(norm_with_sgml,tag)
		tagged = re.sub('\r','',tagged)
		conllized = conllize(tagged, tag="PUNCT", element=sent_tag, no_zero=True)
		deped = DepEdit(io.open(data_dir + "add_ud_and_flat_morph.ini",encoding="utf8"),options=type('', (), {"quiet":True})())
		depedited = deped.run_depedit(conllized.split("\n"))
		parse_coptic = ['java','-mx1g','-jar',"maltparser-1.8.jar",'-c','coptic','-i','tempfilename','-m','parse']
		parsed = exec_via_temp(depedited,parse_coptic,parser_path)
		deped = DepEdit(io.open(data_dir + "parser_postprocess_nodom.ini",encoding="utf8"),options=type('', (), {"quiet":True})())
		depedited = deped.run_depedit(parsed.split("\n"))

		ids, funcs, parents = extract_conll(depedited)
		tagged = re.sub(r"(^|\n)[^\t]+\t",r"\1",tagged)
		if sent_tag is not None:
			tagged = re.sub(r"^<[^>]*>","",tagged)

	lemmas = re.sub('^[^\t]+\t','',tagged)
	lemmas = re.sub('\n[^\t]+\t','\n',lemmas)
	tagged = re.sub('(\t[^\t]+\n)','\n',tagged)
	langed = lookup_lang(norms, lexicon=data_dir + "lang_lexicon.tab")

	if do_parse:
		output = inject("xml:id",ids,"norm",output)
	if do_tag:
		output = inject("pos",tagged,"norm",output)
	if do_lemma:
		output = inject("lemma",lemmas,"norm",output)
	if do_mwe:
		mwe_positions = tag_mwes(norms.split('\n'),lemmas.split('\n'))
		output = inject_tags(output, mwe_positions)
	if do_lang:
		output = inject("xml:lang",langed,"norm",output)
		if "morph" in tokenized:
			morphs = read_attributes(tokenized, "morph")
			if len(morphs) > 0:
				# langed_morphs = exec_via_temp(morphs,lang).replace("\r","")
				langed_morphs = lookup_lang(morphs,lexicon=data_dir+"lang_lexicon.tab")
				output = inject("xml:lang", langed_morphs, "morph", output)
			# Make sure no foreign language norms also contain foreign language morphs (morph has priority over norm)
			output = remove_nesting_attr(output,"norm","morph","xml:lang")
	if do_parse:
		output = inject("func",funcs,"norm",output)
		output = inject("head",parents,"norm",output)
		output = output.replace(' head="#u0"',"")  # Remove head attribute for root tokens in dependency tree

	if do_norm and "norm=" in output:
		groups = groupify(output,"norm")
		output = inject("norm_group",groups,"norm_group",output)

		# Add orig from tokens based on norm spans
		origs = get_origs(output)
		output = inject("orig",origs,"norm",output)
		orig_groups = groupify(output, "orig")
		if "orig_group=" in output:
			# Replace existing orig groups in output with newly harvested orig content
			output = inject("orig_group",orig_groups,"orig_group",output)
		else:
			# Add orig_group attribute since not yet present
			output = inject("orig_group",orig_groups,"norm_group",output)
	else:
		if "orig_group=" in tokenized:  # There are already orig_group attrs and we're not normalizing
			orig_groups = read_attributes(tokenized, "orig_group")
			origs = get_origs(tokenized)
			output = inject("orig", origs, "orig", output)
			output = inject("orig_group", orig_groups, "orig_group", output)
		elif "orig=" in tokenized:  # Need to reconstitute
			origs = get_origs(tokenized)
			orig_groups = groupify(tokenized, "orig")
			output = inject("orig", origs, "orig", output)
			output = inject("orig_group", orig_groups, "orig_group", output)

	return output.strip() + "\n"
Ejemplo n.º 51
0
        def set_url_field(self, url_field):
            '''
            :type url_field: str or None
            '''

            if url_field is not None:
                if not isinstance(url_field, str):
                    raise TypeError("expected url_field to be a str but it is a %s" % builtins.type(url_field))
            self.__url_field = url_field
            return self
 def create_type(self,name, dic):
     return type(name, (object,), dic)
Ejemplo n.º 53
0
 |      Return getattr(self, name).
 |  
 |  __getitem__(...)
 |      x.__getitem__(y) <==> x[y]
 |  
 |  __gt__(self, value, /)
 |      Return self>value.
 |  
 |  __iadd__(self, value, /)
 |      Implement self+=value.
 |  
 |  __imul__(self, value, /)
 |      Implement self*=value.
 |  
 |  __init__(self, /, *args, **kwargs)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  __iter__(self, /)
 |      Implement iter(self).
 |  
 |  __le__(self, value, /)
 |      Return self<=value.
 |  
 |  __len__(self, /)
 |      Return len(self).
 |  
 |  __lt__(self, value, /)
 |      Return self<value.
 |  
 |  __mul__(self, value, /)
 |      Return self*value.n
Ejemplo n.º 54
0
        def set_string_string_map_field(self, string_string_map_field):
            '''
            :type string_string_map_field: dict(str: str) or None
            '''

            if string_string_map_field is not None:
                if not (isinstance(string_string_map_field, dict) and len(list(filterfalse(lambda __item: isinstance(__item[0], str) and isinstance(__item[1], str), string_string_map_field.items()))) == 0):
                    raise TypeError("expected string_string_map_field to be a dict(str: str) but it is a %s" % builtins.type(string_string_map_field))
            self.__string_string_map_field = string_string_map_field
            return self