Example #1
0
def _get_value(xml, rtype=unicode):
    """Returns xml node value."""
    # Get xml value.
    if isinstance(xml, types.ListType):
        xml = None if len(xml) == 0 else xml[0]
    if xml is None:
        return None

    # Get unicode.
    if rtype is unicode:
        if isinstance(xml, types.StringTypes):
            result = convert.str_to_unicode(xml)
        else:
            result = convert.str_to_unicode(et.tostring(xml))
    else:
        if isinstance(xml, types.StringTypes):
            result = convert.unicode_to_str(xml)
        else:
            result = et.tostring(xml)

    # Format.
    result = result.strip()
    result = result.rstrip('|')

    return result
def _get_value(xml, rtype=unicode):
    """Returns xml node value."""
    # Get xml value.
    if isinstance(xml, types.ListType):
        xml = None if len(xml) == 0 else xml[0]
    if xml is None:
        return None

    # Get unicode.
    if rtype is unicode:
        if isinstance(xml, types.StringTypes):
            result = convert.str_to_unicode(xml)
        else:
            result = convert.str_to_unicode(et.tostring(xml))
    else:
        if isinstance(xml, types.StringTypes):
            result = convert.unicode_to_str(xml)
        else:
            result = et.tostring(xml)

    # Format.
    result = result.strip()
    result = result.rstrip('|')

    return result
Example #3
0
def read(fpath, encoding=None, decode=True):
    """Reads a document from file system.

    :param str fpath: Path to previously saved file.
    :param str encoding: Encoding to use during deserialization.
    :param bool decode: Flag indicating whether document will be decoded.

    :returns: A pyesdoc document instance.
    :rtype: object

    """
    # Validate file path.
    if not os.path.isfile(fpath):
        raise IOError("Document file path does not exist")

    # Optionally derive encoding from file extension.
    if encoding is None:
        encoding = os.path.splitext(fpath)[1][1:]

    # Set raw content.
    with open(fpath, "r") as fstream:
        fcontent = str_to_unicode(fstream.read())

    # Decode upon request.
    return pyesdoc.decode(fcontent, encoding) if decode else fcontent
Example #4
0
def read(fpath, encoding=None, decode=True):
    """Reads a document from file system.

    :param str fpath: Path to previously saved file.
    :param str encoding: Encoding to use during deserialization.
    :param bool decode: Flag indicating whether document will be decoded.

    :returns: A pyesdoc document instance.
    :rtype: object

    """
    fpath = os.path.expanduser(fpath)

    # Validate file path.
    if not os.path.isfile(fpath):
        raise IOError("Document file path does not exist")

    # Optionally derive encoding from file extension.
    if encoding is None:
        encoding = os.path.splitext(fpath)[1][1:]

    # Set raw content.
    with open(fpath, 'r') as fstream:
        fcontent = str_to_unicode(fstream.read())

    # Decode upon request.
    return pyesdoc.decode(fcontent, encoding) if decode else fcontent
Example #5
0
def execute(ctx):
    """Creates document index.

    :param object ctx: Document processing context information.

    """
    # Instantiate.
    instance = models.Document()
    instance.description = str_to_unicode(ctx.doc.ext.description)
    instance.institute = ctx.doc.meta.institute
    instance.name = unicode(ctx.doc.ext.display_name)
    instance.project = ctx.doc.meta.project.strip().lower()
    if ctx.doc.meta.sub_projects:
        instance.sub_projects = ",".join([u"<{}>".format(i.lower()) for i in sorted(ctx.doc.meta.sub_projects)])
    instance.typeof = unicode(ctx.doc.meta.type)
    instance.uid = unicode(ctx.doc.meta.id)
    instance.version = ctx.doc.meta.version

    # Set alternative name.
    if hasattr(ctx.doc, "alternative_name"):
        if ctx.doc.alternative_name:
            instance.alternative_name = ctx.doc.alternative_name
    elif hasattr(ctx.doc, "alternative_names"):
        if ctx.doc.alternative_names:
            instance.alternative_name = ctx.doc.alternative_names[0]

    # Set short/long names.
    fields = [f for f in ctx.doc.ext.summary_fields if f is not None]
    try:
        instance.canonical_name = fields[0]
    except IndexError:
        pass
    try:
        instance.long_name = fields[1]
    except IndexError:
        pass

    # Set other fields.
    try:
        parser = _PARSERS[type(ctx.doc)]
    except KeyError:
        pass
    else:
        parser(instance, ctx.doc)

    # Persist.
    try:
        session.insert(instance)
    except sqlalchemy.exc.IntegrityError:
        session.rollback()
        print instance.uid, instance.version, instance.typeof
        raise StopIteration("Document already ingested")
    else:
        ctx.primary = instance
Example #6
0
    def _format(s):
        if s is None:
            s = None
        # TODO add support for time formatting.
        elif isinstance(v, datetime.datetime):
            s = str(s)[:10]
        else:
            s = str(s)

        if s and len(s):
            s = convert.str_to_unicode(s)
            if output_formatter:
                s = output_formatter(s)

        if s and len(s):
            s = s.strip()

        return s
Example #7
0
def decode(as_json):
    """Decodes a document from a UTF-8 encoded json text blob.

    :param as_json: Document json representation.
    :type as_json: unicode | str

    :returns: A pyesdoc document instance.
    :rtype: object

    """
    # Convert to unicode.
    as_json = convert.str_to_unicode(as_json)

    # Convert to dictionary.
    as_dict = convert.json_to_dict(as_json)

    # Decode from dictionary.
    return dict_decoder.decode(as_dict)
    def _format(s):
        if s is None:
            s = None
        # TODO add support for time formatting.
        elif isinstance(v, datetime.datetime):
            s = str(s)[:10]
        else:
            s = str(s)

        if s and len(s):
            s = convert.str_to_unicode(s)
            if output_formatter:
                s = output_formatter(s)

        if s and len(s):
            s = s.strip()

        return s
def decode(as_json):
    """Decodes a document from a UTF-8 encoded json text blob.

    :param as_json: Document json representation.
    :type as_json: unicode | str

    :returns: A pyesdoc document instance.
    :rtype: object

    """
    # Convert to unicode.
    as_json = convert.str_to_unicode(as_json)

    # Convert to dictionary.
    as_dict = convert.json_to_dict(as_json)

    # Decode from dictionary.
    return dict_decoder.decode(as_dict)
def _encode_simple(xml, val):
    """Encodes a simple value.

    """
    # Format according to type.
    if val in (None, 'None'):
        return u''
    elif isinstance(val, datetime.datetime):
        val = val.isoformat().replace('T', ' ')
    elif isinstance(val, datetime.date):
        val = val.isoformat()
    elif isinstance(val, datetime.time):
        val = val.isoformat()
    else:
        val = convert.str_to_unicode(val)
    if val is None or len(val) == 0:
        val = u''

    # Assign to xml.
    xml.text = val.strip()
Example #11
0
def _encode_simple(xml, val):
    """Encodes a simple value.

    """
    # Format according to type.
    if val in (None, 'None'):
        return u''
    elif isinstance(val, datetime.datetime):
        val = val.isoformat().replace('T', ' ')
    elif isinstance(val, datetime.date):
        val = val.isoformat()
    elif isinstance(val, datetime.time):
        val = val.isoformat()
    else:
        val = convert.str_to_unicode(val)
    if val is None or len(val) == 0:
        val = u''

    # Assign to xml.
    xml.text = val.strip()