Example #1
0
def parse_opnsense(input_path, document):
    handler = OpnSenseContentHandler(document)
    if input_path == '-':
        with sys.stdin as input_file:
            parse(input_file, handler)
    else:
        with open(input_path, 'rb') as input_file:
            parse(input_file, handler)
Example #2
0
    def __init__(self, err, data, namespace=None):
        self.err = err
        self.manifest = u"urn:mozilla:install-manifest"
        self.namespace = namespace or "http://www.mozilla.org/2004/em-rdf"

        if isinstance(data, types.StringTypes):
            data = StringIO(data)  # Wrap data in a pseudo-file

        try:
            # Use an empty ContentHandler, we just want to make sure it parses.
            parse(data, ContentHandler())
        except SAXParseException as ex:
            # Raise the SAX parse exceptions so we get some line info.
            raise RDFException(orig_exception=ex)
        else:
            # We consumed the file, start over from the beginning.
            data.seek(0)

        from rdflib.plugins.parsers import rdfxml

        orig_create_parser = rdfxml.create_parser

        try:
            # Patch rdflib to not resolve URL entities.
            def create_parser(*args, **kwargs):
                parser = orig_create_parser(*args, **kwargs)
                parser.setEntityResolver(AddonRDFEntityResolver(err))
                return parser

            rdfxml.create_parser = create_parser

            # Load up and parse the file in XML format.
            graph = Graph()
            graph.parse(data, format="xml")
            self.rdf = graph

        except ParserError as ex:
            # Re-raise the exception in a local exception type.
            raise RDFException(message=ex.message)
        except SAXParseException as ex:
            # Raise the SAX parse exceptions so we get some line info.
            raise RDFException(orig_exception=ex)
        finally:
            # If we fail, we don't want to sully up the creation function.
            rdfxml.create_parser = orig_create_parser
Example #3
0
    def __init__(self, err, data, namespace=None):
        self.err = err
        self.manifest = u'urn:mozilla:install-manifest'
        self.namespace = namespace or 'http://www.mozilla.org/2004/em-rdf'

        if (hasattr(data, 'read') and hasattr(data, 'readline') or
            isinstance(data, StringIO)
        ):
            # It could be a file-like object, let's read it so that we can
            # wrap it in StringIO so that we can re-open at any time
            data.seek(0)
            data = data.read()

        try:
            # Use an empty ContentHandler, we just want to make sure it parses.
            parse(StringIO(data), ContentHandler())
        except SAXParseException as ex:
            # Raise the SAX parse exceptions so we get some line info.
            raise RDFException(orig_exception=ex)

        from rdflib.plugins.parsers import rdfxml
        orig_create_parser = rdfxml.create_parser

        try:
            # Patch rdflib to not resolve URL entities.
            def create_parser(*args, **kwargs):
                parser = orig_create_parser(*args, **kwargs)
                parser.setEntityResolver(AddonRDFEntityResolver(err))
                return parser
            rdfxml.create_parser = create_parser

            # Load up and parse the file in XML format.
            graph = Graph()
            graph.parse(StringIO(data), format='xml')
            self.rdf = graph

        except ParserError as ex:
            # Re-raise the exception in a local exception type.
            raise RDFException(message=ex.message)
        except SAXParseException as ex:
            # Raise the SAX parse exceptions so we get some line info.
            raise RDFException(orig_exception=ex)
        finally:
            # If we fail, we don't want to sully up the creation function.
            rdfxml.create_parser = orig_create_parser
Example #4
0
    def __init__(self, err, data, namespace=None):
        self.err = err
        self.manifest = u'urn:mozilla:install-manifest'
        self.namespace = namespace or 'http://www.mozilla.org/2004/em-rdf'

        if (hasattr(data, 'read') and hasattr(data, 'readline')
                or isinstance(data, StringIO)):
            # It could be a file-like object, let's read it so that we can
            # wrap it in StringIO so that we can re-open at any time
            data.seek(0)
            data = data.read()

        try:
            # Use an empty ContentHandler, we just want to make sure it parses.
            parse(StringIO(data), ContentHandler())
        except SAXParseException as ex:
            # Raise the SAX parse exceptions so we get some line info.
            raise RDFException(orig_exception=ex)

        from rdflib.plugins.parsers import rdfxml
        orig_create_parser = rdfxml.create_parser

        try:
            # Patch rdflib to not resolve URL entities.
            def create_parser(*args, **kwargs):
                parser = orig_create_parser(*args, **kwargs)
                parser.setEntityResolver(AddonRDFEntityResolver(err))
                return parser

            rdfxml.create_parser = create_parser

            # Load up and parse the file in XML format.
            graph = Graph()
            graph.parse(StringIO(data), format='xml')
            self.rdf = graph

        except ParserError as ex:
            # Re-raise the exception in a local exception type.
            raise RDFException(message=ex.message)
        except SAXParseException as ex:
            # Raise the SAX parse exceptions so we get some line info.
            raise RDFException(orig_exception=ex)
        finally:
            # If we fail, we don't want to sully up the creation function.
            rdfxml.create_parser = orig_create_parser
Example #5
0
    def __init__(self, err, data, namespace=None):
        self.err = err
        self.manifest = u'urn:mozilla:install-manifest'
        self.namespace = namespace or 'http://www.mozilla.org/2004/em-rdf'

        if isinstance(data, types.StringTypes):
            data = StringIO(data)  # Wrap data in a pseudo-file

        try:
            # Use an empty ContentHandler, we just want to make sure it parses.
            parse(data, ContentHandler())
        except SAXParseException as ex:
            # Raise the SAX parse exceptions so we get some line info.
            raise RDFException(orig_exception=ex)
        else:
            # We consumed the file, start over from the beginning.
            data.seek(0)

        from rdflib.plugins.parsers import rdfxml
        orig_create_parser = rdfxml.create_parser

        try:
            # Patch rdflib to not resolve URL entities.
            def create_parser(*args, **kwargs):
                parser = orig_create_parser(*args, **kwargs)
                parser.setEntityResolver(AddonRDFEntityResolver(err))
                return parser

            rdfxml.create_parser = create_parser

            # Load up and parse the file in XML format.
            graph = Graph()
            graph.parse(data, format='xml')
            self.rdf = graph

        except ParserError as ex:
            # Re-raise the exception in a local exception type.
            raise RDFException(message=ex.message)
        except SAXParseException as ex:
            # Raise the SAX parse exceptions so we get some line info.
            raise RDFException(orig_exception=ex)
        finally:
            # If we fail, we don't want to sully up the creation function.
            rdfxml.create_parser = orig_create_parser
def xmlxls2pd(attachedDoc, table=0):
    xxh = XmlXlsHandler()
    if isinstance(attachedDoc, str):
        with open(attachedDoc, 'r') as ad:
            parse(ad, xxh)
    else:
        parse(attachecDoc, xxh)

    table = xxh.tables[table]
    header = table.showRowList[header]
    row = table.showRowList[:]
    col = {}

    def show_rows(cells):
        for cell in cells:
            index = cell.attr["ss:Index"]
            if index in index_columns:
                column = index_columns[index]
                if cell.data is not None:
                    data = cell.data.data
                    dtype = cell.data.attr.get("ss:Type", None)
                    if column in coll:
                        assert (coll[column] == dtype)
                    else:
                        coll[column] = dtype
                    yield column, data

    def show_items(rows):
        rows = iter(rows)

        while True:
            row = next(rows)
            cells = row.cells

            item = dict(show_rows(cells))

            merge_down = max(int(c.attr.get("ss:MergeDown", 0)) for c in cells)
            if merge_down > 0:
                merger = defaultdict(list)
                for i in range(merge_down):
                    # consumer rows to be merged
                    cells = next(rows).cells
                    for k, v in row_gen(cells):
                        merger[k].append(v)
                for k, v in merger.items():
                    v.insert(0, item[k])
                    item[k] = '\n'.join(v)
            yield item

    df = pd.DataFrame(show_items(rows))

    conversions = {
        None: lambda x: x,
        'DateTime': lambda x: pd.to_datetime(x, utc=True),
        'String': lambda x: x,
        'Number': pd.to_numeric
    }

    for col in df.columns:
        df[col] = conversions[coll[col]](df[col])
    return df
Example #7
0
def parse_pfsense(input_path, document):
    handler = PfSenseContentHandler(document)
    with open(input_path, 'rb') as input_file:
        parse(input_file, handler)
def xmlxls_to_pd(attachment,
                 table=0,
                 header=0,
                 skiprows=None,
                 skip_footer=0,
                 merge_down=True):
    if skiprows is None:
        skiprows = header+1

    excelHandler = ExcelHandler()
    if isinstance(attachment, str):
        with open(attachment, 'r') as attachment:
            parse(attachment, excelHandler)
    else:
        parse(attachment, excelHandler)

    table = excelHandler.tables[table]

    header = table.rows[header]

    columns_index = {c.data.data: c.attr["ss:Index"] for c in header.cells}
    index_columns = {v: k for k, v in columns_index.items()}

    # print sorted(index_columns.items(), key=lambda x: int(x[0]))
    rows = table.rows[skiprows:]
    rows = rows[:(len(rows) - skip_footer)]

    coll_dtype = {}

    def row_gen(cells):
        for cell in cells:
            index = cell.attr["ss:Index"]
            if index in index_columns:
                column = index_columns[index]
                if cell.data is not None:
                    data = cell.data.data
                    dtype = cell.data.attr.get("ss:Type", None)
                    if column in coll_dtype:
                        assert(coll_dtype[column] == dtype)
                    else:
                        coll_dtype[column] = dtype
                    yield column, data

    def item_gen(rows):
        rows = iter(rows)

        while True:
            row = next(rows)
            cells = row.cells

            item = dict(row_gen(cells))

            merge_down = max(int(c.attr.get("ss:MergeDown", 0)) for c in cells)
            if merge_down > 0:
                merger = defaultdict(list)
                for i in range(merge_down):
                    # consumer rows to be merged
                    cells = next(rows).cells
                    for k, v in row_gen(cells):
                        merger[k].append(v)
                for k, v in merger.items():
                    v.insert(0, item[k])
                    item[k] = '\n'.join(v)
            yield item

    # make data frame
    df = pd.DataFrame(item_gen(rows))

    # apply data types
    for col in df.columns:
        df[col] = conversions[coll_dtype[col]](df[col])
    return df