Example #1
0
    def getRTCCTV(self, lng, lat):
        param_cctv = "key=%s&ReqType=2&MinX=%s&MaxX=%s&MinY=%s&MaxY=%s&type=" % (self.API_KEY, str(lng-0.000001), str(lng+0.000001), str(lat-0.000001), str(lat+0.000001))
        cctv_ex_data = urllib.request.urlopen(self.cctv_url + param_cctv + "ex")
        cctv_ex_xml = cctv_ex_data.read().decode('utf-8')
        cctv_ex_tree = eltree.fromstringlist(cctv_ex_xml)

        cctv_its_data = urllib.request.urlopen(self.cctv_url + param_cctv + "its")
        cctv_its_xml = cctv_its_data.read().decode('utf-8')
        cctv_its_tree = eltree.fromstringlist(cctv_its_xml)

        cctv_ex_arr = []
        cctv_its_arr = []
        cctv_arr = []

        for i in cctv_ex_tree.findall('data'):
            data = {"content": i.find('cctvurl').text}
            cctv_ex_arr.append(json.dumps(data, ensure_ascii=False))

        for i in cctv_its_tree.findall('data'):
            data = {"content": i.find('cctvurl').text}
            cctv_its_arr.append(json.dumps(data, ensure_ascii=False))

        cctv_arr.extend(cctv_ex_arr)
        cctv_arr.extend(cctv_its_arr)

        print(cctv_arr)
        return cctv_arr
Example #2
0
    def getAcci(self):

        accid_ex_data = urllib.request.urlopen(self.accid_url + self.params + "ex")
        accid_ex_xml = accid_ex_data.read().decode('utf-8')
        accid_ex_tree = eltree.fromstringlist(accid_ex_xml)

        accid_its_data = urllib.request.urlopen(self.accid_url + self.params + "its")
        accid_its_xml = accid_its_data.read().decode('utf-8')
        accid_its_tree = eltree.fromstringlist(accid_its_xml)

        accid_ex_arr = []
        accid_its_arr = []
        accid_arr = []

        for i in accid_ex_tree.findall('data'):
            data = {"title": i.find('incidentmsg').text,
                    "lat": float(i.find('coordy').text),
                    "lng": float(i.find('coordx').text),
                    "content": i.find('incidentmsg').text
                    }
            accid_ex_arr.append(json.dumps(data, ensure_ascii=False))

        for i in accid_its_tree.findall('data'):
            data = {"title": i.find('incidentmsg').text,
                    "lat": float(i.find('coordy').text),
                    "lng": float(i.find('coordx').text),
                    "content": i.find('incidentmsg').text
                    }
            accid_its_arr.append(json.dumps(data, ensure_ascii=False))

        accid_arr.extend(accid_ex_arr)
        accid_arr.extend(accid_its_arr)

        return accid_arr
Example #3
0
def main():
    ida_auto.set_ida_state(ida_auto.st_Work)
    data = None
    with open(ida_kernwin.ask_file(0, "*.xml",
                                   "Select a file to import")) as f:
        data = f.readlines()

    if data is None:
        ida_auto.set_ida_state(ida_auto.st_Ready)
        return

    ida_kernwin.show_wait_box("Importing file")
    make_basic_structs()

    try:
        # SM 1.10 <= has bad XML, assume its correct first then try to fix it
        tree = et.fromstringlist(data)
    except:
        fix_xml(data)
        tree = et.fromstringlist(data)

    if tree is None:
        ida_kernwin.hide_wait_box()
        ida_kernwin.warning("Something bad happened :(")
        ida_auto.set_ida_state(ida_auto.st_Ready)
        return

    global IMPORT_VTABLE
    IMPORT_VTABLE = ida_kernwin.ask_yn(
        1, "Import virtual tables for classes? (Longer)")

    for i in tree:
        parse_class(i)
    ida_kernwin.hide_wait_box()
    ida_auto.set_ida_state(ida_auto.st_Ready)
Example #4
0
    def getCCTV(self):

        cctv_ex_data = urllib.request.urlopen(self.cctv_url + self.params + "ex")
        cctv_ex_xml = cctv_ex_data.read().decode('utf-8')
        cctv_ex_tree = eltree.fromstringlist(cctv_ex_xml)

        cctv_its_data = urllib.request.urlopen(self.cctv_url + self.params + "its")
        cctv_its_xml = cctv_its_data.read().decode('utf-8')
        cctv_its_tree = eltree.fromstringlist(cctv_its_xml)

        cctv_ex_arr = []
        cctv_its_arr = []
        cctv_arr = []

        for i in cctv_ex_tree.findall('data'):
            data = {"title": i.find('cctvname').text,
                    "lat": float(i.find('coordy').text),
                    "lng": float(i.find('coordx').text),
                    "content": i.find('cctvurl').text
                    }
            cctv_ex_arr.append(json.dumps(data, ensure_ascii=False))

        for i in cctv_its_tree.findall('data'):
            data = {"title": i.find('cctvname').text,
                    "lat": float(i.find('coordy').text),
                    "lng": float(i.find('coordx').text),
                    "content": i.find('cctvurl').text
                    }
            cctv_its_arr.append(json.dumps(data, ensure_ascii=False))

        cctv_arr.extend(cctv_ex_arr)
        cctv_arr.extend(cctv_its_arr)

        return cctv_arr
Example #5
0
def create_xml_diff_from_files(file1, file2, logger=None):
    if not file1 or not file2:
        raise Exception('Expected files path as parameters')

    with open(file1, 'r') as _f1:
        _lines = _f1.readlines()
        _root1 = ET.fromstringlist(_lines)

    with open(file2, 'r') as _f2:
        _lines = _f2.readlines()
        _root2 = ET.fromstringlist(_lines)

    return XmlComparator(_root1, _root2, logger)
Example #6
0
def split_file(filename):
    # we want you to split the input file into separate files
    # each containing a single patent.
    # As a hint - each patent declaration starts with the same line that was causing the error
    # The new files should be saved with filename in the following format:
    # "{}-{}".format(filename, n) where n is a counter, starting from 0.

    output = []
    data = {}

    f = open(filename)
    count = 0
    file_number = 0

    # import pprint
    # pprint.pprint(f.readlines())


    output.append(f.readline())

    for line in f.readlines():

        if line.startswith("<?xml"):
            data["patent.data-{}".format(file_number)] = output



            root = ET.fromstringlist(output)
            # print ""
            # print root.tag
            # print root.attrib
            #
            # for child in root:
            #     print(child.tag, child.attrib)

            tree = ET.ElementTree(root)
            tree.write("patent.data-{}".format(file_number), encoding = 'UTF-8')
            output = []
            file_number += 1
        output.append(line)


    data["patent.data-{}".format(file_number)] = output
    root = ET.fromstringlist(output)
    tree = ET.ElementTree(root)
    tree.write("patent.data-{}".format(file_number), encoding = 'UTF-8')

    #import pprint
    #pprint.pprint(data)
    # return data
    pass
Example #7
0
def parse_opml_file(filename):
    with open(filename) as file:
        tree = etree.fromstringlist(file)
    return [
        node.get('xmlUrl') for node in tree.findall("*/outline/[@type='rss']")
        if node.get('xmlUrl') is not None
    ]
    def __parse_linear_base64(self) -> Element:
        """Original parser will fail in case such email
        ```
        Content-Type: text/plain\r\n
        Content-Transfer-Encoding: base64\r\n\r\n
        PHByZW...==\r\n
        ...
        ...==\r\n
        ```
        as a workaround getting raw body and split by '\n' and parse xml
        as string list
        :return: Element
        """
        def decode_payload(data: Any) -> Generator:
            """

            :return: generator
            """
            if isinstance(data, (str, bytes)):
                data = data.splitlines()
            for line in filter(None, data):
                if line[-2:] in ['==', b'=='] or line[-1] in ['+', b'+']:
                    yield body_decode(line)
                else:
                    yield line

        return ElementTree.fromstringlist(
            decode_payload(self.__message.get_payload())
        )
Example #9
0
 def add_data_block(self):
     #2nd step: add a data block with Org as suffix, and put value in it
     Logger.recordLog("****adding data block*****\r\n")
     parent = self.root.find('MEM')
     datablock_list = parent.findall(DATA_BLOCK)
     datablock_namelist = self._elementNameList(datablock_list, DATA_BLOCK_NAME)
     for key in data_array:
         block_name = key
         string = block_name + TEXT_VALUE_EXSIT
         #if current value of the datablock has been added in cnt (if no data need to be added in the cnt, don't add the corresponding data block)
         if string in datablock_namelist:
             continue
         elif operator.eq(data_array[key][0], None) or operator.eq(data_array[key][0], '') or operator.eq(data_array[key][0], 'EMPTY'):
             continue
         #else
         else:
             new_blocks = self._setValueForTemplate(data_array, block_name, data_block_template)
             text = block_name + SEARCH_BY_TEXT
             idx = datablock_namelist.index(text) + 1
             new_elements = ETree.fromstringlist(new_blocks)
             element_list = new_elements.findall(DATA_BLOCK)
             for j in range(0, 1):
                 parent.insert(idx + j + 1,new_elements[j])
                 
     #self._saveCNT()
     Logger.recordLog("****End of adding data block*****\r\n")
Example #10
0
def analysis(start, end, path):
    msg_list = []
    set_list = []
    start = datetime.strptime(start.get(), '%Y-%m-%d')
    end = datetime.strptime(end.get(), '%Y-%m-%d')

    if not path:
        path = './data/arcgis_log/'

    wb = openpyxl.Workbook(write_only=True)
    sh_counts_times = wb.create_sheet(title="total count")
    sh_all = wb.create_sheet(title="total table")
    sh_user_usage_count_2 = wb.create_sheet(title="user total count")
    sh_counts_times.append(["source", "count"])
    sh_all.append([
        "time", 'type', 'code', "source", "process", "thread", "methodName",
        "machine", "user", "elapsed", "msg"
    ])
    sh_user_usage_count_2.append(["user", "source", "count"])
    print("start to analysis")
    for fn in os.listdir(path):
        with open('%s/%s' % (path, fn), 'r', encoding='utf-8') as f:
            it = itertools.chain('<root>', f, '</root>')
            datas = ET.fromstringlist(it)
            for data in datas:
                split_time = datetime.strptime(
                    data.attrib['time'].split('T')[0], '%Y-%m-%d')
                if start <= split_time <= end:
                    if "Request user" in data.text:
                        msg_list.append(data.text)

                    sh_all.append([
                        data.attrib['time'], data.attrib['type'],
                        data.attrib['code'], data.attrib['source'],
                        data.attrib['process'], data.attrib['thread'],
                        data.attrib['methodName'], data.attrib['machine'],
                        data.attrib['user'], data.attrib['elapsed'], data.text
                    ])
    source_list_of_msg_list = []
    for m in msg_list:
        sp1 = m.split(',')
        mapserver = sp1[1].split(':')[1][1:]
        source_list_of_msg_list.append(mapserver)
    from collections import Counter
    counter = Counter(source_list_of_msg_list)
    for i in counter:
        sh_counts_times.append([i, counter[i]])
    for m in msg_list:
        sp1 = m.split(',')
        user = sp1[0].split(':')[1][1:]
        mapserver = sp1[1].split(':')[1][1:]
        set_list.append(user + "$$$" + mapserver)

    from collections import Counter
    counter = Counter(set_list)
    for i in counter:
        a = i.split('$$$')
        sh_user_usage_count_2.append([a[0], a[1], counter[i]])

    wb.save('analysis.xlsx')
Example #11
0
def readXml(
    filename
):  #filenameを文字列で渡してあげると、自動的にxmlデータから木構造を作ってくれる。最終的にもどってくるのはrootノードの情報。これをもとに木を巡回する。
    with open(filename) as f:
        it = itertools.chain('<root>', f, '</root>')  #もしルートタグがない場合はつける
        root = ET.fromstringlist(it)
    return root
Example #12
0
def split_file(filename):
    data = list()
    results = list()
    n = 0
    with open(filename, 'rb') as f:
        lines = f.readlines()
        for i in range(0, len(lines)):
            line = lines[i]
            if line.startswith("<?xml") and len(data) > 0:
                results.append(data)
                data = list()
            else:
                data.append(line)

            if i == (len(lines) - 1):
                results.append(data)

    for result in results:
        t = ET.ElementTree(ET.fromstringlist(result))
        new_filename = "{}-{}".format(filename, n)
        n += 1
        t.write(new_filename,
                xml_declaration=True,
                method="xml",
                encoding="UTF-8")

    pass
Example #13
0
    def parse(self, ofx):
        try:
            for line in ofx.splitlines():
                if line.strip() == "":
                    break
                header, value = line.split(":")
                self.headers[header] = value
        except ValueError:
            pass
        finally:
            if "OFXHEADER" not in self.headers:
                self.headers["OFXHEADER"] = "100"
            if "VERSION" not in self.headers:
                self.headers["VERSION"] = "102"
            if "SECURITY" not in self.headers:
                self.headers["SECURITY"] = "NONE"
            if "OLDFILEUID" not in self.headers:
                self.headers["OLDFILEUID"] = "NONE"
            if "NEWFILEUID" not in self.headers:
                self.headers["NEWFILEUID"] = "NONE"

        try:
            tags = ofx.split("<")
            if len(tags) > 1:
                tags = ["<" + t.strip() for t in tags[1:]]

            heirarchy = []
            can_open = True

            for i, tag in enumerate(tags):
                gt = tag.index(">")
                if tag[1] != "/":
                    # Is an opening tag
                    if not can_open:
                        tags[i - 1] = tags[i - 1] + "</" + \
                            heirarchy.pop() + ">"
                        can_open = True
                    tag_name = tag[1:gt].split()[0]
                    heirarchy.append(tag_name)
                    if len(tag) > gt + 1:
                        can_open = False
                else:
                    # Is a closing tag
                    tag_name = tag[2:gt].split()[0]
                    if tag_name not in heirarchy:
                        # Close tag with no matching open, so delete it
                        tags[i] = tag[gt + 1:]
                    else:
                        # Close tag with matching open, but other open
                        # tags that need to be closed first
                        while(tag_name != heirarchy[-1]):
                            tags[i - 1] = tags[i - 1] + "</" + \
                                heirarchy.pop() + ">"
                        can_open = True
                        heirarchy.pop()

            self.xml = ET.fromstringlist(tags)
            self.load_from_xml(self, self.xml)
        except Exception:
            raise InvalidOFXStructureException
Example #14
0
    def parse_XML(self, output, returncode, isTimeout):
        #an empty tag cannot be parsed into a tree
        def sanitizeXML(s):
            return s.replace("<>", "<emptyTag>") \
                    .replace("</>", "</emptyTag>")

        try:
            tree = ET.fromstringlist(map(sanitizeXML, output))
            status = tree.findtext('cprover-status')

            if status is None:

                def isErrorMessage(msg):
                    return msg.get('type', None) == 'ERROR'

                messages = list(
                    filter(isErrorMessage, tree.getiterator('message')))
                if messages:
                    # for now, use only the first error message if there are several
                    msg = messages[0].findtext('text')
                    if msg == 'Out of memory':
                        status = 'OUT OF MEMORY'
                    elif msg:
                        status = 'ERROR ({0})'.format(msg)
                    else:
                        status = 'ERROR'
                else:
                    status = 'INVALID OUTPUT'

            elif status == "FAILURE":
                assert returncode == 10
                reason = tree.find('goto_trace').find('failure').findtext(
                    'reason')
                if not reason:
                    reason = tree.find('goto_trace').find('failure').get(
                        'reason')
                if 'unwinding assertion' in reason:
                    status = result.RESULT_UNKNOWN
                else:
                    status = result.RESULT_FALSE_REACH

            elif status == "SUCCESS":
                assert returncode == 0
                if "--no-unwinding-assertions" in self.options:
                    status = result.RESULT_UNKNOWN
                else:
                    status = result.RESULT_TRUE_PROP

        except Exception:
            if isTimeout:
                # in this case an exception is expected as the XML is invalid
                status = 'TIMEOUT'
            elif 'Minisat::OutOfMemoryException' in output:
                status = 'OUT OF MEMORY'
            else:
                status = 'INVALID OUTPUT'
                logging.exception(
                    "Error parsing CBMC output for returncode %d", returncode)

        return status
Example #15
0
def log_response_error(response_error):
    """
    @type response_error: owncloud.ResponseError
    """

    message = response_error.get_resource_body()

    if message[:38] == '<?xml version="1.0" encoding="utf-8"?>':
        import xml.etree.ElementTree as ElementTree

        response_exception = ''
        response_message = ''
        response = message[39:]

        root_element = ElementTree.fromstringlist(response)
        if root_element.tag == '{DAV:}error':
            for child in root_element:
                if child.tag == '{http://sabredav.org/ns}exception':
                    response_exception = child.text
                if child.tag == '{http://sabredav.org/ns}message':
                    response_message = child.text

        if response_exception != '':
            message = 'SabreDAV Exception: %s - Message: %s' % (
                response_exception, response_message)

    logger.error('Unexpected response: Status code: %i - %s' %
                 (response_error.status_code, message))
    logger.info('Full Response: %s' % (response_error.get_resource_body()))
Example #16
0
def main():
	sourceFile, inputFile = parseArguments()

	if sourceFile:
		sourceLines = getLinesFromFile(sourceFile)
	else:
		sourceLines = [line.strip() for line in sys.stdin]	 
	
	inputLines = getLinesFromFile(inputFile)

	try:
		root = ET.fromstringlist(sourceLines)
	except:
		exitWithError(errorTypes.xmlNotWellFormated)

	customAssert(root.tag == "program", errorTypes.xmlStructureSyntaxLex)
	customAssert(root.get("language") == "IPPcode19", errorTypes.xmlStructureSyntaxLex)
	for atrib in root.attrib:
		customAssert(atrib in ("language", "name" , "description"), errorTypes.xmlStructureSyntaxLex)
	checkXmlHeader(sourceLines)

	instructionsList = getInstructionsFromXml(root)

	checkOperandLexems(instructionsList)
	checkSyntax(instructionsList)

	checkLabelsSematics(instructionsList)

	instructionsList = replaceEscapeSequences(instructionsList)

	interpretCode(instructionsList, inputLines)
Example #17
0
    def fill_dictionary(cls, result_dir):
        """
        Parsed files.xml and symbols.xml and fill dictionary
        :return:
        """
        XML_FILES = ['files.xml', 'symbols.xml']
        results_dict = {}

        for tag in settings.CHECKER_TAGS:
            results_dict[tag] = []
        for file_name in [os.path.join(result_dir, x) for x in XML_FILES]:
            logger.info('Processing %s file.', file_name)
            try:
                with open(file_name, "r") as f:
                    lines = f.readlines()
                    lines.insert(0, '<pkgdiff>')
                    lines.append('</pkgdiff>')
                    pkgdiff_tree = ElementTree.fromstringlist(lines)
                    for tag in settings.CHECKER_TAGS:
                        for pkgdiff in pkgdiff_tree.findall('.//' + tag):
                            results_dict[tag].extend([x.strip() for x in pkgdiff.text.strip().split('\n')])
            except IOError:
                continue

        return results_dict
Example #18
0
def extractDeletionKeys(file):
    """
    [String] full path of XML file => [Iterable] key values of trades
    """
    return map(keyValue
               , filter(isDeletion
                        , ET.fromstringlist(addRemoveHeader(fileToLines(file)))))
def log_response_error(response_error):
    """
    @type response_error: owncloud.ResponseError
    """

    message = response_error.get_resource_body()

    if message[:38] == '<?xml version="1.0" encoding="utf-8"?>':
        import xml.etree.ElementTree as ElementTree

        response_exception = ''
        response_message = ''
        response = message[39:]

        root_element = ElementTree.fromstringlist(response)
        if root_element.tag == '{DAV:}error':
            for child in root_element:
                if child.tag == '{http://sabredav.org/ns}exception':
                    response_exception = child.text
                if child.tag == '{http://sabredav.org/ns}message':
                    response_message = child.text

        if response_exception != '':
            message = 'SabreDAV Exception: %s - Message: %s' % (response_exception, response_message)

    logger.error('Unexpected response: Status code: %i - %s' % (response_error.status_code, message))
    logger.info('Full Response: %s' % (response_error.get_resource_body()))
Example #20
0
 def getCores(self, xmlData):
     """ Return a list with all core names inside a xmlData list """
     cores = list()
     root = ET.fromstringlist(xmlData)
     # for each core in result (getting names)
     map(lambda x: cores.append(x.text), root.findall(".//*[@name='name']"))
     return cores
Example #21
0
def main():
    s = xmlrpc.client.ServerProxy('http://localhost:8000')

    lines = []
    for line in sys.stdin:
        if line.strip():
            lines.append(line.strip())
    corpus = ET.fromstringlist(lines)

    for sentence in corpus:
        sentnum = sentence.attrib['ref']
        tuples = lexsel_util.get_tuples(sentence)
        surface = [tup[1] for tup in tuples]
        dprint("[SURFACE]", " ".join(surface))
        answers = s.label_sentence(tuples)
        dprint("[ANSWERS]", answers)
        ## all the NODE elements in the tree that have a SYN underneath
        target_nodes = sentence.findall(".//NODE/SYN/..")
        changed = False
        for node in target_nodes:
            changed_here = make_decision(node, answers)
            if changed_here:
                changed = True
        if changed:
            dprint("[CLASSIFIERSENTENCE]", sentnum)

    print(ET.tostring(corpus,encoding="unicode"))
Example #22
0
 def test_variationFontOrigin(self):
     # Glyphs 2.4.1 introduced a custom parameter “Variation Font Origin”
     # to specify which master should be considered the origin.
     # https://glyphsapp.com/blog/glyphs-2-4-1-released
     masters = [
         makeMaster("Family", "Thin", weight=26),
         makeMaster("Family", "Regular", weight=100),
         makeMaster("Family", "Medium", weight=111),
         makeMaster("Family", "Black", weight=190),
     ]
     instances = {
         "data": [
             makeInstance("Black", weight=("Black", 900, 190)),
             makeInstance("Medium", weight=("Medium", 444.4, 111)),
             makeInstance("Regular", weight=("Regular", 400, 100)),
             makeInstance("Thin", weight=("Thin", 100, 26)),
         ],
         "Variation Font Origin":
         "Medium",
     }
     doc = etree.fromstringlist(self.build_designspace(masters, instances))
     medium = doc.find('sources/source[@stylename="Medium"]')
     self.assertEqual(medium.find("lib").attrib["copy"], "1")
     weightAxis = doc.find('axes/axis[@tag="wght"]')
     self.assertEqual(weightAxis.attrib["default"], "444.4")
Example #23
0
def _pdf_get_all_pageinfo(infile, log=None):
    if not log:
        log = Mock()

    pdf = pikepdf.open(infile)

    existing_text = ghostscript.extract_text(infile, pageno=None)
    existing_text = regex_remove_char_tags.sub(b' ', existing_text)

    try:
        root = ET.fromstringlist(
            [b'<document>\n', existing_text, b'</document>\n'])
        page_xml = root.findall('page')
    except ET.ParseError as e:
        log.error(
            "An error occurred while attempting to retrieve existing text in "
            "the input file. Will attempt to continue assuming that there is "
            "no existing text in the file. The error was:")
        log.error(e)
        page_xml = [None] * len(pdf.pages)

    page_count_difference = len(pdf.pages) - len(page_xml)
    if page_count_difference != 0:
        log.error("The number of pages in the input file is inconsistent.")
        if page_count_difference > 0:
            page_xml.extend([None] * page_count_difference)

    pages = []
    for n in range(len(pdf.pages)):
        page = PageInfo(pdf, n, infile, page_xml[n])
        pages.append(page)

    return pages, pdf
Example #24
0
def preprocess(input_fps, output_fp, zero_proportion):
    for input_fp in input_fps:
        # these XML files don't have a "root" (outermost) node, so we can add one :)
        contents = itertools.chain('<root>', input_fp, '</root>')
        root = ElementTree.fromstringlist(contents)
        # assuming that the input has the structure specified above, this iterates thru all the <sent_tag>s
        for sent in root.findall('sent/sent_tag'):

            # write the binary label for this sentence
            binary_label = 0 if random.random() < zero_proportion else 1
            output_fp.write(str(binary_label))

            # write the tab-separator
            output_fp.write('\t')

            # write the characters for this sentence
            for line in sent.text.splitlines():
                line = line.strip()  # remove whitespace
                if not line:  # empty lines
                    continue
                for char in line:
                    # stop writing when we encounter the '/' character
                    if char == '/':
                        break
                    # FIXME: should this check hanzidentifer.has_chinese(char) ??
                    output_fp.write(char)

            # write the newline-separator
            output_fp.write('\n')
Example #25
0
def load_xml_tree(xml_path):
    """Utility function for loading mujoco xml files that may contain
    nested include tags

    Args:

    xml_path: str
        a path to a mujoco xml file with include tags to be expanded

    Returns:

    tree: ElementTree.Element
        an element that represents the root node of an xml tree
    """

    with open(xml_path, "r") as f:

        root = ET.fromstringlist(f.readlines()[16:])

        for c in root.findall(".//include"):
            file = c.attrib['file']
            target = os.path.join(os.path.dirname(xml_path), file)

            p = root.find(f".//include[@file='{file}']...")
            i = list(p).index(c)
            p.remove(c)

            for s in reversed(load_xml_tree(target)):
                p.insert(i, s)

        return root
Example #26
0
def from_stringlist(sequence):
    try:
        element = ElementTree.fromstringlist(sequence)
    except ElementTree.ParseError as e:
        raise VaspParseError(e)

    return from_element(element)
Example #27
0
def retrieve_xml_post_information():
    posts = open("./sampledata_1/Posts.xml", "r")
    root_node = ET.fromstringlist(posts)
    post_dict = {}
    title_dict = {}
    answers_dict = {}
    for child in range(len(root_node)):
        post_type_id = root_node[child].get('PostTypeId')
        if post_type_id == '1':
            post_id = root_node[child].get('Id')
            body_text = root_node[child].get('Body')
            if post_id not in post_dict:
                post_dict[post_id] = [body_text]
            else:
                post_dict[post_id].append(body_text)

            title = root_node[child].get('Title')
            if post_id not in title_dict:
                title_dict[post_id] = [title]
            else:
                title_dict[post_id].append(title)

        elif post_type_id == '2':
            post_id = root_node[child].get('ParentId')
            answers = root_node[child].get('Body')
            if post_id not in answers_dict:
                answers_dict[post_id] = [answers]
            else:
                answers_dict[post_id].append(answers)
    return post_dict, title_dict, answers_dict
Example #28
0
def get_xml_file(xml_file: str) -> ElementTree.Element:
	"""
	Gets an XML File from the data.tar.gz file
	:param xml_file: The name of the XML File to parse
	:return: The Element Tree of the XML File
	"""
	return ElementTree.fromstringlist(get_file(xml_file))
Example #29
0
def get_scheme_letters(inputfile):
    # Mark '&' correctly because it is sometimes used incorrectly in generated
    # xmls.
    fixed_input = []
    with open(inputfile) as input:
        for line in input:
            fixed_input.append(re.sub('&(?!amp;)', '&amp;', line))
    try:
        root = ET.fromstringlist(fixed_input)
    except Exception as error:
        print('Failed analyzing', inputfile)
        print(error)
        return None, None

    # Parse rhyme scheme.
    # Change Prolog format into JSON to be parsed as a dictionary.
    scheme = root[2][0].attrib['Stanza-based_Rhyme_Schemes']
    scheme = scheme.replace('-', '\":\"'
                            '').replace('[', '{\"').replace(']',
                                                            '\"}').replace(
                                                                ',', '\",\"')
    scheme = '{\"scheme\":' + scheme.replace('\"{', '{').replace('}"',
                                                                 '}') + '}'
    # Get rid of empty values with regex.
    scheme = re.sub('\".?\":\{\"\"\},', '', scheme)
    scheme = json.loads(scheme)
    scheme_letters = {}
    for stanza in scheme['scheme'].values():
        scheme_letters.update(stanza)
    return scheme_letters, root
def standardize_file_target(file_target):
    """For file targets that are not source files, return the target that generated them.

    This is needed because rdeps of generated files do not include targets that reference
    their generating rules.
    https://github.com/bazelbuild/bazel/issues/4949
    """

    query_result = bazel_query(['--output=xml', file_target])
    if not query_result:
        sys.exit(
            "Empty query response for {}. It is probably not handled by bazel".
            format(file_target))

    target_xml = ElementTree.fromstringlist(query_result.split('\n'))
    source_element = target_xml.find('source-file')
    if source_element is not None:
        return file_target

    generated_element = target_xml.find('generated-file')
    if generated_element is not None:
        return generated_element.get('generating-rule')

    sys.exit("Error parsing query xml for " + file_target + ":\n" +
             query_result)
Example #31
0
def main():
    data = None
    with open(ida_kernwin.ask_file(0, "*.xml",
                                   "Select a file to import")) as f:
        data = f.readlines()

    if data is None:
        return

    ida_kernwin.show_wait_box("Importing file")
    fix_xml(data)
    make_basic_structs()

    tree = et.fromstringlist(data)
    if (tree is None):
        ida_kernwin.hide_wait_box()
        ida_kernwin.warning("Something bad happened :(")
        return

    global IMPORT_VTABLE
    IMPORT_VTABLE = ida_kernwin.ask_yn(
        1, "Import virtual tables for classes? (Longer)")

    for i in tree:
        parse_class(i)
    ida_kernwin.hide_wait_box()
Example #32
0
def main():
    # ignore SIGCHLD, prevent the zombie apocalypse
    signal.signal(signal.SIGCHLD, signal.SIG_IGN)

    utils.drop_privileges()
    bad_regex = re.compile("[,()]+")  # avoid forbidden by TSD symbols

    while True:
        try:
            if vstats == "all":
                stats = subprocess.Popen(
                    ["varnishstat", "-1", "-x"],
                    stdout=subprocess.PIPE,
                )
            else:
                fields = ",".join(vstats)
                stats = subprocess.Popen(
                    ["varnishstat", "-1", "-f" + fields, "-x"],
                    stdout=subprocess.PIPE,
                )
        except OSError, e:
            # Die and signal to tcollector not to run this script.
            sys.stderr.write("Error: %s\n" % e)
            sys.exit(13)

        metrics = ""
        for line in stats.stdout.readlines():
            metrics += line
        metrics = ET.fromstringlist(metrics)

        timestamp = ""
        if use_varnishstat_timestamp:
            pattern = "%Y-%m-%dT%H:%M:%S"
            timestamp = int(
                time.mktime(time.strptime(metrics['timestamp'], pattern)))
        else:
            timestamp = time.time()

        for stat in metrics.findall('stat'):
            tags = ""
            k = stat.findtext('name')
            if None == bad_regex.search(k):
                stattype = stat.findtext('type')
                if stattype == None:
                    metric_name = metric_prefix + "." + k
                elif stattype == "LCK":
                    metric_name = metric_prefix + ".locks." + k
                    ident = stat.findtext('ident')
                    tags = "ident=" + ident
                elif stattype == "SMA":
                    metric_name = metric_prefix + ".storage." + k
                    ident = stat.findtext('ident')
                    tags = "ident=" + ident
                else:
                    continue
                print "%s %d %s %s" % \
                  (metric_name, timestamp, stat.findtext('value'), tags)

        sys.stdout.flush()
        time.sleep(interval)
def get_model(serial_number):
    """A helper function to get the friendly model.
    Args:
        serial_number:  Devices' Serial Number.
    Returns:
        stdout:  friendly model name or "".
    """

    if len(serial_number) == 12:
        lookup_code = serial_number[-4:]
    elif len(serial_number) == 11:
        lookup_code = serial_number[-3:]
    else:
        print("Unexpected serial number length:  {}".format(serial_number))
        return ""

    lookup_url = "https://support-sp.apple.com/sp/product?cc={lookup_code}".format(
        lookup_code=lookup_code)

    xml = urllib.urlopen(lookup_url).read()

    try:
        tree = ElementTree.fromstringlist(xml)
        model_friendly = tree.find('.//configCode').text
        return model_friendly

    except ElementTree.ParseError as err:
        print("Failed to retrieve model name:  {}".format(err.strerror))
        return ""
Example #34
0
def read_xml(path):
    """通过ElementTree获取

    大部分字符串内容都能读取出来,但是如果<string></string>标签内嵌套了子标签,
    那么子标签内的内容读取不出来,并且只能读取到第一个子标签前的内容

    :param path:
    :return:
    """
    if path is None or len(path) == 0:
        Log().error('file path is None')
        return

    file = open(path, encoding='utf-8')
    string_list = file.read()
    root = ElementTree.fromstringlist(string_list)
    item_list = root.findall('string')
    keys = []
    values = []
    for item in item_list:
        key = item.attrib['name']
        value = item.text
        keys.append(key)
        values.append(value)
    file.close()
    return keys, values
Example #35
0
 def parse(self, filename):
     self.last_filter = []
     root = None
     with open(filename) as ref:
         root = ET.fromstringlist(['<root>', ref.read(), '</root>'])
     self.process(root)
     return root
Example #36
0
    def parse(self, ofx):
        try:
            for line in ofx.splitlines():
                if line.strip() == "":
                    break
                header, value = line.split(":")
                self.headers[header] = value
        except ValueError:
            pass
        finally:
            if "OFXHEADER" not in self.headers:
                self.headers["OFXHEADER"] = "100"
            if "VERSION" not in self.headers:
                self.headers["VERSION"] = "102"
            if "SECURITY" not in self.headers:
                self.headers["SECURITY"] = "NONE"
            if "OLDFILEUID" not in self.headers:
                self.headers["OLDFILEUID"] = "NONE"
            if "NEWFILEUID" not in self.headers:
                self.headers["NEWFILEUID"] = "NONE"

        try:
            tags = ofx.split("<")
            if len(tags) > 1:
                tags = ["<" + t.strip() for t in tags[1:]]

            heirarchy = []
            can_open = True

            for i, tag in enumerate(tags):
                gt = tag.index(">")
                if tag[1] != "/":
                    # Is an opening tag
                    if not can_open:
                        tags[i - 1] = tags[i - 1] + "</" + \
                            heirarchy.pop() + ">"
                        can_open = True
                    tag_name = tag[1:gt].split()[0]
                    heirarchy.append(tag_name)
                    if len(tag) > gt + 1:
                        can_open = False
                else:
                    # Is a closing tag
                    tag_name = tag[2:gt].split()[0]
                    if tag_name not in heirarchy:
                        # Close tag with no matching open, so delete it
                        tags[i] = tag[gt + 1:]
                    else:
                        # Close tag with matching open, but other open
                        # tags that need to be closed first
                        while (tag_name != heirarchy[-1]):
                            tags[i - 1] = tags[i - 1] + "</" + \
                                heirarchy.pop() + ">"
                        can_open = True
                        heirarchy.pop()

            self.xml = ET.fromstringlist(tags)
            self.load_from_xml(self, self.xml)
        except Exception:
            raise InvalidOFXStructureException
def split_file(filename):
    """
    Split the input file into separate files, each containing a single patent.
    As a hint - each patent declaration starts with the same line that was
    causing the error found in the previous exercises.
    
    The new files should be saved with filename in the following format:
    "{}-{}".format(filename, n) where n is a counter, starting from 0.
    """
     
    data=[]
    results=[]
    n=0
    with open(filename,"rb") as f:
        flines=f.readlines()
        for i in range(len(flines)):
            line=flines[i]
            if line.startswith("<?xml") and len(data) >0:
                    results.append(data)
                    data=[]
            else:
                    data.append(line)
                    
            if (i==len(flines)-1):
                    results.append(data)
                    
    for res in results:
            tre=ET.ElementTree(ET.fromstringlist(res))
            newfile="{}-{}".format(filename,n)
            n+=1
            tre.write(newfile,xml_declaration=True,method="xml",encoding="UTF-8")
Example #38
0
def main():
 # ignore SIGCHLD, prevent the zombie apocalypse
 signal.signal(signal.SIGCHLD, signal.SIG_IGN)

 utils.drop_privileges()
 bad_regex = re.compile("[,()]+")  # avoid forbidden by TSD symbols

 while True:
    try:
      if vstats == "all":
        stats = subprocess.Popen(
          ["varnishstat", "-1", "-x"],
          stdout=subprocess.PIPE,
        )
      else:
        fields = ",".join(vstats)
        stats = subprocess.Popen(
          ["varnishstat", "-1", "-f" + fields, "-x"],
          stdout=subprocess.PIPE,
        )
    except OSError, e:
      # Die and signal to tcollector not to run this script.
      sys.stderr.write("Error: %s\n" % e)
      sys.exit(13)

    metrics = ""
    for line in stats.stdout.readlines():
      metrics += line
    metrics = ET.fromstringlist(metrics)

    timestamp = ""
    if use_varnishstat_timestamp:
      pattern = "%Y-%m-%dT%H:%M:%S"
      timestamp = int(time.mktime(time.strptime(metrics['timestamp'], pattern)))
    else:
      timestamp = time.time()

    for stat in metrics.findall('stat'):
      tags = ""
      k = stat.findtext('name')
      if None == bad_regex.search(k):
        stattype = stat.findtext('type')
        if stattype == None:
          metric_name = metric_prefix + "." + k
        elif stattype == "LCK":
          metric_name = metric_prefix + ".locks." + k
          ident = stat.findtext('ident')
          tags = "ident=" + ident
        elif stattype == "SMA":
          metric_name = metric_prefix + ".storage." + k
          ident = stat.findtext('ident')
          tags = "ident=" + ident
        else:
          continue
        print "%s %d %s %s" % \
          (metric_name, timestamp, stat.findtext('value'), tags)

    sys.stdout.flush()
    time.sleep(interval)
Example #39
0
    def parse_XML(self, output, returncode, isTimeout):
        # an empty tag cannot be parsed into a tree
        def sanitizeXML(s):
            return s.replace("<>", "<emptyTag>").replace("</>", "</emptyTag>")

        try:
            tree = ElementTree.fromstringlist(map(sanitizeXML, output))
            status = tree.findtext("cprover-status")

            if status is None:

                def isErrorMessage(msg):
                    return msg.get("type", None) == "ERROR"

                messages = list(filter(isErrorMessage, tree.getiterator("message")))
                if messages:
                    # for now, use only the first error message if there are several
                    msg = messages[0].findtext("text")
                    if msg == "Out of memory":
                        status = "OUT OF MEMORY"
                    elif msg == "SAT checker ran out of memory":
                        status = "OUT OF MEMORY"
                    elif msg:
                        status = "ERROR ({0})".format(msg)
                    else:
                        status = "ERROR"
                else:
                    status = "INVALID OUTPUT"

            elif status == "FAILURE":
                assert returncode == 10
                reason = tree.find("goto_trace").find("failure").findtext("reason")
                if not reason:
                    reason = tree.find("goto_trace").find("failure").get("reason")
                if "unwinding assertion" in reason:
                    status = result.RESULT_UNKNOWN
                else:
                    status = result.RESULT_FALSE_REACH

            elif status == "SUCCESS":
                assert returncode == 0
                if "--unwinding-assertions" in self.options:
                    status = result.RESULT_TRUE_PROP
                else:
                    status = result.RESULT_UNKNOWN

        except Exception:
            if isTimeout:
                # in this case an exception is expected as the XML is invalid
                status = "TIMEOUT"
            elif "Minisat::OutOfMemoryException" in output:
                status = "OUT OF MEMORY"
            else:
                status = "INVALID OUTPUT"
                logging.exception(
                    "Error parsing CBMC output for returncode %d", returncode
                )

        return status
Example #40
0
 def export_as_TEI (self, request, queryset):
     entries = ['<listBibl>']
     entries.extend(queryset.values_list('tei_entry', flat=True))
     entries.append('</listBibl>')
     root = ElementTree.fromstringlist(entries)
     tei = ElementTree.tostring(root, encoding='utf-8')
     response = HttpResponse(tei, mimetype='text/xml')
     return response
Example #41
0
def get_totals(boundingbox):
    search = flickr.photos_search(min_upload_date='2013-01-01',
                                  bbox=boundingbox,
                                  accuracy=16,
                                  extras='geo,date_taken,tags')
    root = ET.fromstringlist(search, parser=None)
    for child in root:
        print child.tag, child.attrib
Example #42
0
 def __parseLines(self,lines):
     variables = {}
     result = ET.fromstringlist(lines)
     
     for element in list(result):
         variables[element.tag]=element.text
             
     return variables
Example #43
0
    def tagTimeContent(self, tagname, time):
        self.tree = ET.fromstringlist(self.makeTimeRequest(self, time))

        for elt in self.tree.iter():
            if elt.tag == str(tagname):
                self.tagvalue = elt.text.strip()

        return self.tagvalue
Example #44
0
 def _run_info_parser(run_info):
     result = dict()
     if len(run_info['result']) > 0:
         root = ET.fromstringlist(run_info['result'])
         result = dict(
             reads=[r.attrib for r in root.iter('Read')],
             fc_layout=[fc.attrib for fc in root.iter('FlowcellLayout')],
         )
     return result
Example #45
0
    def parse_XML(self, output, returncode, isTimeout):
        #an empty tag cannot be parsed into a tree
        def sanitizeXML(s):
            return s.replace("<>", "<emptyTag>") \
                    .replace("</>", "</emptyTag>")

        try:
            tree = ET.fromstringlist(map(sanitizeXML, output))
            status = tree.findtext('cprover-status')

            if status is None:
                def isErrorMessage(msg):
                    return msg.get('type', None) == 'ERROR'

                messages = list(filter(isErrorMessage, tree.getiterator('message')))
                if messages:
                    # for now, use only the first error message if there are several
                    msg = messages[0].findtext('text')
                    if msg == 'Out of memory':
                        status = 'OUT OF MEMORY'
                    elif msg:
                        status = 'ERROR (%s)'.format(msg)
                    else:
                        status = 'ERROR'
                else:
                    status = 'INVALID OUTPUT'

            elif status == "FAILURE":
                assert returncode == 10
                reason = tree.find('goto_trace').find('failure').findtext('reason')
                if not reason:
                    reason = tree.find('goto_trace').find('failure').get('reason')
                if 'unwinding assertion' in reason:
                    status = result.RESULT_UNKNOWN
                else:
                    status = result.RESULT_FALSE_REACH

            elif status == "SUCCESS":
                assert returncode == 0
                if "--no-unwinding-assertions" in self.options:
                    status = result.RESULT_UNKNOWN
                else:
                    status = result.RESULT_TRUE_PROP

        except Exception:
            if isTimeout:
                # in this case an exception is expected as the XML is invalid
                status = 'TIMEOUT'
            elif 'Minisat::OutOfMemoryException' in output:
                status = 'OUT OF MEMORY'
            else:
                status = 'INVALID OUTPUT'
                logging.exception("Error parsing CBMC output for returncode %d" % (returncode))

        return status
Example #46
0
def get_word_count(html):
    """Get word count for given html.

    :param html: html string to count
    """
    try:
        root = etree.fromstringlist('<doc>{0}</doc>'.format(html))
        text = etree.tostring(root, encoding='unicode', method='text')
        return get_text_word_count(text)
    except ParseError:
        return get_text_word_count(html)
Example #47
0
 def __iter__(self):
     root = ElementTree.fromstringlist(codecs.iterencode(self.inputreader, 'utf'))
     table = root.findall('.//table')[5]
     for row in table[2:-1]:
         date_str = row.find('.//td[3].nobr').text
         tdate = datetime.strptime(date_str, '%d.%m.%Y')
         amount_str = row.find('.//td[5].nobr').text
         amount_str = amount_str.replace('.', '')
         tamount = float(normalize_num(amount_str))
         desc = plain_content(row.find('.//td[4]'))
         tmessage = normalize_field(desc)
         yield TransactionData(tdate, tamount, message=tmessage)
Example #48
0
def test1():
    import os
    import xml.etree.ElementTree as et
    from xml.etree.ElementTree import XMLParser as fuckyouall
    from xml.etree.ElementTree import ParseError as fuckit
    
    curdir = os.path.abspath(os.path.curdir)
    file_ = '  AniDB.net   Person - Hanazawa Kana   .html'
    file_path = os.path.join(curdir, 'lists', file_)
    with open(file_path) as f:
        tree = et.fromstringlist(f.readlines(), fuckyouall(html=True))
        root = tree.getroot()
        for table in root.findall('table'):
            print 'table'
def convert(idir, odir, subpath):
    """
    read XML file, write tweaked XML file
    """
    # Is there a cleaner way to do this?
    parser = ET.XMLParser(encoding='utf-8')
    prefix = fp.splitext(subpath)[0]
    ifile = fp.join(idir, subpath)
    with codecs.open(ifile, 'r', 'utf-8') as fin:
        utext = fin.read().encode('utf-8')
        tree = ET.fromstringlist([utext], parser=parser)
        _remove_boring_parts(tree)
        oprefix = fp.join(odir, prefix)
        if not fp.exists(oprefix):
            os.makedirs(oprefix)
        _write_items(tree, fp.join(oprefix, prefix))
    def xml_to_df(self, xml_str):
        '''
        Transforms the xml in a string into a data frame

        : xml_str(str): string with the xml to be transformed in dataframe
        '''
        # convert to dataframe
        attr_list = []
        try:
            tree = ElementTree.fromstringlist(xml_str)
        except:
            return None

        for node in tree.iter('row'):
             attr_list.append(dict(zip(node.attrib.keys(), node.attrib.values())))

        return DataFrame(attr_list)
Example #51
0
def __parse_product(prod):
    root = ET.fromstringlist(prod)
    name = root.find('./name').text
    arch = root.find('./arch').text
    try:
        version = root.find('./baseversion').text
        sp = root.find('./patchlevel').text if root.find('./patchlevel').text != '0' else ""
        version += "-SP{}".format(sp) if sp else ""
    except AttributeError:
        version = root.find('./version').text
        logger.debug("simpleversion")

    # CAASP uses ALL for update repos and there is only one supported version at time
    # can change in tommorow
    if name == "CAASP":
        version = "ALL"
    return (name, version, arch)
Example #52
0
def parseXML(file_chosen):   
    xmlInfo = file_chosen.readlines()
    
    # Disallow entities for now 
    # because we're using XSD
    for line in xmlInfo:
        if "!ENTITY" in line:
            raise BadXMLException("Bad XML File")
    
    if not validateXML(xmlInfo):
        raise ValdiationFailedException()

    try:
        tree = ET.fromstringlist(xmlInfo)
        return tree
    except Exception:
        raise BadXMLException("Parse Failed")
Example #53
0
def set_cloud_env(env):
    # get lines need to be replaced
    removed_lines = ""
    with file(rpds_config_file) as f:
        lines = f.readlines()
        got_header = False
        for i in range(0, len(lines)):
            if config_cloud_env_header in lines[i]:
                got_header = True
            if got_header:
                removed_lines = removed_lines + lines[i]
                if config_cloud_env_tail in lines[i]:
                    got_header = False

    # prepare url according to env
    media_url = ""
    users_url = ""
    status_url = ""
    app_url = ""
    if env == 'int' or env == 'int2':
        media_url = server_media_url.format(env)
        users_url = server_users_url.format(env)
        status_url = server_status_url.format(env)
        app_url = server_app_url.format(env)
    elif env == 'prod':
        media_url = server_media_url.replace("{0}.", "")
        users_url = server_users_url.replace("{0}.", "")
        status_url = server_status_url.replace("{0}.", "")
        app_url = server_app_url.replace("{0}.", "")

    # set env
    root = ET.fromstringlist(removed_lines)
    for el in root.findall("Var"):
        if "ServerMediaURL" in el.attrib:
            el.attrib["ServerMediaURL"] = media_url
        if "ServerUsersURL" in el.attrib:
            el.attrib["ServerUsersURL"] = users_url
        if "ServerStatusURL" in el.attrib:
            el.attrib["ServerStatusURL"] = status_url
        if "ServerAppURL" in el.attrib:
            el.attrib["ServerAppURL"] = app_url

    # change config file
    replace_lines = ET.tostring(root) + "\n"
    replace_string_in_file(rpds_config_file, removed_lines, replace_lines)
Example #54
0
def main():
    lines = []
    for line in sys.stdin:
        if line.strip():
            lines.append(line.strip())
    corpus = ET.fromstringlist(lines)

    dprint("!" * 80)
    lexsel_util.get_tuples(corpus)
    dprint(lexsel_util.prettify(corpus))
    dprint("!" * 80)

    ## find all the NODE elements in the tree that have a SYN underneath them
    target_nodes = corpus.findall(".//NODE/SYN/..")
    for node in target_nodes:
        make_decision(node)

    print(ET.tostring(corpus,encoding="unicode"))
def getWeather(city,country):
    import suds
    from xml.etree import ElementTree
    import time,socket
    #time.sleep(n)
    host = socket.gethostname()
    resultmsg = ''
    result = ''
    client = suds.client.Client("http://www.webservicex.net/globalweather.asmx?WSDL")
    try:
        result = client.service.GetWeather(city.strip(),country.strip())
        temp = result.split("\n")
        temp.remove(temp[0])
        eparse = ElementTree.fromstringlist(temp)
        for it in eparse:
            resultmsg += it.tag.ljust(20)+it.text.strip()+"\n"
    except suds.WebFault, e:
        resultmsg = "Error\n"
def convert(ifile):
    """
    Return a list of date, string tuples for each row in the table
    """
    # The data is actually iso-8859-1 converted but it
    # contains entities which are defined elsewhere,
    # so without access to the DTD, we manually translate
    # the entities to unicode chars, encode everything
    # as 'utf-8' and then feed it to a forced-utf-8
    # XML parser
    #
    # Is there a cleaner way to do this?
    parser = ET.XMLParser(encoding='utf-8')
    with codecs.open(ifile, 'r', 'iso-8859-1') as fin:
        utext = unescape(fin.read()).encode('utf-8')
        tree = ET.fromstringlist([utext], parser=parser)
        return concat_l(_convert_section(x)
                        for x in tree.findall('section'))
Example #57
0
def get_metadata(acc):
        meta = {
	    'Molecule Type': '', 'Topology': '',
	    'Organism': '', 'Taxonomy': '', 'Taxon ID': '', 'Strain': '',
	    'Description': '', 'Keywords': '', 'Reference Location': '',
	     'PubMed ID': '', 'Notes': '', 'URL': '', 'Sequence': '',
	     'First Release': '', 'Last Updated': ''
	}
    
        url = "http://www.ebi.ac.uk/ena/data/view/%s" % acc
        meta['URL'] = url
        url += "&display=xml"
        xml = ''
        try:
	    xml = urllib2.urlopen(url)
	except urllib2.URLError:
	    print "Error accessing %s" % url
	
	# parse
	root = ET.fromstringlist(xml.readlines())
	
	# check if valid
	if "entry is not found" in root.text:
	    return meta
	    
	    
	print acc
	
	# get interesting elements
	entry = root.find('entry')
	if entry is None:
	        return meta
	[meta['Molecule Type'], meta['Topology'], meta['Keywords'], meta['First Release'], meta['Last Updated']] = _entry_metadata(entry)
	
	description = entry.find('description')
	meta['Description'] = description.text if description is not None else ''
	  
	source = entry.find("feature[@name='source']")
	[meta['Taxon ID'], meta['Taxonomy'], meta['Organism'], meta['Strain']] = _source_metadata(source)
	
	reference = entry.find("reference[@type='article']")
	[meta['Reference Location'], meta['PubMed ID']] = _reference_metadata(reference)
		
	return meta
 def loadTraceXml(self, stream):
     xmlstringlist = []
     data = []
     for line in stream:
         if line[0]=="#":
             xmlstringlist.append(line.lstrip("# "))
         else:
             data.append( list(map(float,line.split())) )
     root = ElementTree.fromstringlist(xmlstringlist)
     columnspec = ColumnSpec.fromXmlElement(root.find("./Variables/ColumnSpec"))
     for colname, d in zip(columnspec, zip(*data)):
         if math.isnan(d[-1]):
             a = numpy.array(d[0:-1])
         else:
             a = numpy.array(d)
         self[colname] = a
     tpelement = root.find("./Variables/TracePlottingList")
     self.description["tracePlottingList"] = TracePlottingList.fromXmlElement(tpelement) if tpelement is not None else None
     for element in root.findall("./Variables/Element"):
         self.varFromXmlElement(element, self.description)
Example #59
0
    def status(self):
        """
        Query HCP for the status of the request log download.

        :returns:   a *collection.OrderedDict*:
                    ::

                        {
                         readyForStreaming: bool,
                         streamingInProgress: bool,
                         started: bool,
                         error: bool,
                         content: list # one or more of: L_ACCESS, L_SYSTEM,
                                       # L_SERVICE, L_APPLICATION)
                        }
        :raises:    re-raises whatever is raised below
        """
        self.logger.debug('status query issued')

        try:
            self.con.GET('/mapi/logs')
        except Exception as e:
            self.logger.error(e)
            raise
        else:
            self.logger.debug('response headers: {}'.format(self.con.getheaders()))
            xml = self.con.read().decode()
            time.sleep(.5)

            if self.con.response_status != 200:
                return None
            else:
                stat = OrderedDict()
                for child in Et.fromstringlist(xml):
                    if child.text == 'true':
                        stat[child.tag] = True
                    elif child.text == 'false':
                        stat[child.tag] = False
                    else:
                        stat[child.tag] = child.text.split(',')
                return stat
Example #60
0
        def _run_parameters_parser(run_parameters):
            result = dict()
            if len(run_parameters['result']) > 0:
                root = ET.fromstringlist(run_parameters['result'])
                result = dict(
                    run_info=dict(
                        run_id=list(root.iter('RunID')).pop(0).text if len(list(root.iter('RunID'))) else '',
                        fc_id=list(root.iter('Barcode')).pop(0).text if len(list(root.iter('Barcode'))) else '',
                        date=list(root.iter('RunStartDate')).pop(0).text if len(
                            list(root.iter('RunStartDate'))) else '',
                        scanner_id=list(root.iter('ScannerID')).pop(0).text if len(
                            list(root.iter('ScannerID'))) else '',
                        scanner_number=list(root.iter('ScannerNumber')).pop(0).text if len(
                            list(root.iter('ScannerNumber'))) else '',
                    ),

                    reads=[r.attrib for r in root.iter('Read')],
                    reagents=dict(
                        sbs=dict(
                            kit=list(root.iter('Sbs')).pop(0).text if len(list(root.iter('Sbs'))) else '',
                            id=list(root.iter('SbsReagentKit')).pop(0).find('ID').text if len(
                                list(root.iter('SbsReagentKit'))) else '',
                        ),
                        index=dict(
                            kit=list(root.iter('Index')).pop(0).text if len(list(root.iter('Index'))) else '',
                            id=list(r.find('ReagentKit').find('ID').text for r in root.iter('Index') if
                                    r.find('ReagentKit') is not None).pop() if len(list(
                                r.find('ReagentKit').find('ID').text for r in root.iter('Index') if
                                r.find('ReagentKit') is not None)) else '',
                        ),
                        pe=dict(
                            kit=list(root.iter('Pe')).pop(0).text if len(list(root.iter('Pe'))) else '',
                            id=list(r.find('ReagentKit').find('ID').text for r in root.iter('Pe') if
                                    r.find('ReagentKit') is not None).pop() if len(list(
                                r.find('ReagentKit').find('ID').text for r in root.iter('Pe') if
                                r.find('ReagentKit') is not None)) else '',
                        ),
                    ),
                )
            return result