Ejemplo n.º 1
0
 def extract_line(movie_selector: ElementTree) -> dict:
     has_data_cells = movie_selector.xpath('.//td[@class="s"]/text()')
     if has_data_cells:
         movie = {}
         title_cell_xpath = './/td[@class="nam"]/a'
         movie['title'] = movie_selector.xpath(title_cell_xpath + '/text()')[0]
         movie['details_link'] = movie_selector.xpath(title_cell_xpath + '/@href')[0]
         movie['seeds_num'] = movie_selector.xpath('.//td[@class="sl_s"]/text()')[0]
         movie['size'] = size_processor(movie_selector.xpath('.//td[@class="s"]/text()')[1])
         details_link_parsed = urllib.parse.urlparse(movie['details_link'])
         movie['id'] = KINOZAL_ID_PREFIX + parse_qs(details_link_parsed.query)['id'][0]
         title_match = re.search(r'((\d+)-)?(\d+) сезон(ы)?: ((\d+)-)?(\d+) +сери(и|я) из (\d+)', movie['title'])
         # assert title_match, f'Cannot parse title {movie["title"]}'
         try:
             if title_match:
                 first_season = title_match.group(2)
                 movie['last_season'] = int(title_match.group(3))
                 movie['last_episode'] = int(title_match.group(7))
                 if not first_season:
                     first_season = movie['last_season']
                 movie['seasons'] = list(range(int(first_season), movie['last_season'] + 1))
             else:
                 raise ValueError('Season data signature was not found')
         except ValueError as e:  # just suppress the exception and do not return fields that we could not convert
             logging.error(
                 f'Cannot extract season data from title: "{movie["title"]}": \n{e}'
             )
         download_link_parsed = urllib.parse.urlparse('http://dl.kinozal.tv/download.php')._replace(
             query=details_link_parsed.query
         )
         movie['torrent_link'] = urllib.parse.urlunparse(download_link_parsed)
         return movie
     else:
         return {}
Ejemplo n.º 2
0
    def scrape(self, chamber, session):
        for term in self.metadata["terms"]:
            if session in term["sessions"]:
                year = term["start_year"]
                break

        self.versions_dict = self._versions_dict(year)

        base_bill_url = "http://data.opi.mt.gov/bills/%d/BillHtml/" % year
        index_page = ElementTree(lxml.html.fromstring(self.urlopen(base_bill_url)))

        bill_urls = []
        for bill_anchor in index_page.findall("//a"):
            # See 2009 HB 645
            if bill_anchor.text.find("govlineveto") == -1:
                # House bills start with H, Senate bills start with S
                if chamber == "lower" and bill_anchor.text.startswith("H"):
                    bill_urls.append("%s%s" % (base_bill_url, bill_anchor.text))
                elif chamber == "upper" and bill_anchor.text.startswith("S"):
                    bill_urls.append("%s%s" % (base_bill_url, bill_anchor.text))

        for bill_url in bill_urls:
            bill = self.parse_bill(bill_url, session, chamber)
            if bill:
                self.save_bill(bill)
Ejemplo n.º 3
0
def loadProject_0_1(projectFile):
    # Parse the XML Document as 0.1 version
    tree = ElementTree()

    tree.parse(projectFile)

    xmlProject = tree.getroot()

    # Register the namespace
    etree.register_namespace('netzob', PROJECT_NAMESPACE)
    etree.register_namespace('netzob-common', COMMON_NAMESPACE)

    projectID = xmlProject.get('id')
    projectName = xmlProject.get('name', 'none')
    projectCreationDate = TypeConvertor.xsdDatetime2PythonDatetime(xmlProject.get('creation_date'))
    projectPath = xmlProject.get('path')
    project = Project(projectID, projectName, projectCreationDate, projectPath)

    # Parse the configuration
    if xmlProject.find("{" + PROJECT_NAMESPACE + "}configuration") != None:
        projectConfiguration = ProjectConfiguration.loadProjectConfiguration(xmlProject.find("{" + PROJECT_NAMESPACE + "}configuration"), PROJECT_NAMESPACE, "0.1")
        project.setConfiguration(projectConfiguration)

    # Parse the vocabulary
    if xmlProject.find("{" + PROJECT_NAMESPACE + "}vocabulary") != None:
        projectVocabulary = Vocabulary.loadVocabulary(xmlProject.find("{" + PROJECT_NAMESPACE + "}vocabulary"), PROJECT_NAMESPACE, COMMON_NAMESPACE, "0.1", project)
        project.setVocabulary(projectVocabulary)

    # Parse the grammar
    if xmlProject.find("{" + PROJECT_NAMESPACE + "}grammar") != None:
        projectGrammar = Grammar.loadGrammar(xmlProject.find("{" + PROJECT_NAMESPACE + "}grammar"), projectVocabulary, PROJECT_NAMESPACE, "0.1")
        if projectGrammar != None:
            project.setGrammar(projectGrammar)

    return project
Ejemplo n.º 4
0
def generate_data_type_conf(supported_file_formats, data_types_destination):
    data_types_node = Element("datatypes")
    registration_node = add_child_node(data_types_node, "registration")
    registration_node.attrib[
        "converters_path"] = "lib/galaxy/datatypes/converters"
    registration_node.attrib["display_path"] = "display_applications"

    for format_name in supported_file_formats:
        data_type = supported_file_formats[format_name]
        # add only if it's a data type that does not exist in Galaxy
        if data_type.galaxy_type is not None:
            data_type_node = add_child_node(registration_node, "datatype")
            # we know galaxy_extension is not None
            data_type_node.attrib["extension"] = data_type.galaxy_extension
            data_type_node.attrib["type"] = data_type.galaxy_type
            if data_type.mimetype is not None:
                data_type_node.attrib["mimetype"] = data_type.mimetype

    data_types_tree = ElementTree(data_types_node)
    data_types_tree.write(open(data_types_destination, 'w'),
                          encoding="UTF-8",
                          xml_declaration=True,
                          pretty_print=True)
    logger.info(
        "Generated Galaxy datatypes_conf.xml in %s" % data_types_destination,
        0)
Ejemplo n.º 5
0
	def _append_element(self, field_name, data, node=self._root.tag):
		if(node==self._root.tag):
			self._root.append(ElementTree.Element(field,attrib={text:data}))
		else:
			for field in root.iter(node):
				field.append(ElementTree.Element(field,attrib={text:data}))
		self._tree.write('xml_renderer.xml')
Ejemplo n.º 6
0
    def getNameOfProject(workspace, projectDirectory):
        projectFile = os.path.join(os.path.join(workspace.getPath(), projectDirectory), Project.CONFIGURATION_FILENAME)

        # verify we can open and read the file
        if projectFile == None:
            return None
        # is the projectFile is a file
        if not os.path.isfile(projectFile):
            logging.warn("The specified project's configuration file (" + str(projectFile) + ") is not valid : its not a file.")
            return None
        # is it readable
        if not os.access(projectFile, os.R_OK):
            logging.warn("The specified project's configuration file (" + str(projectFile) + ") is not readable.")
            return None

        # We validate the file given the schemas
        for xmlSchemaFile in Project.PROJECT_SCHEMAS.keys():
            xmlSchemaPath = os.path.join(ResourcesConfiguration.getStaticResources(), xmlSchemaFile)
            # If we find a version which validates the XML, we parse with the associated function
            if Project.isSchemaValidateXML(xmlSchemaPath, projectFile):
                logging.debug("The file " + str(projectFile) + " validates the project configuration file.")
                tree = ElementTree()
                tree.parse(projectFile)
                xmlProject = tree.getroot()
                # Register the namespace
                etree.register_namespace('netzob', PROJECT_NAMESPACE)
                etree.register_namespace('netzob-common', COMMON_NAMESPACE)

                projectName = xmlProject.get('name', 'none')

                if projectName != None and projectName != 'none':
                    return projectName
            else:
                logging.warn("The project declared in file (" + projectFile + ") is not valid")
        return None
Ejemplo n.º 7
0
    def parse_bill(self, bill_url, session, chamber):
        bill = None
        bill_page = ElementTree(lxml.html.fromstring(self.urlopen(bill_url)))
        for anchor in bill_page.findall("//a"):
            if anchor.text_content().startswith("status of") or anchor.text_content().startswith(
                "Detailed Information (status)"
            ):
                status_url = anchor.attrib["href"].replace("\r", "").replace("\n", "")
                bill = self.parse_bill_status_page(status_url, bill_url, session, chamber)
            elif anchor.text_content().startswith("This bill in WP"):
                index_url = anchor.attrib["href"]
                index_url = index_url[0 : index_url.rindex("/")]
                # this looks weird.  See http://data.opi.mt.gov/bills/BillHtml/SB0002.htm for why
                index_url = index_url[index_url.rindex("http://") :]
                self.add_bill_versions(bill, index_url)

        if bill is None:
            # No bill was found.  Maybe something like HB0790 in the 2005 session?
            # We can search for the bill metadata.
            page_name = bill_url.split("/")[-1].split(".")[0]
            bill_type = page_name[0:2]
            bill_number = page_name[2:]
            laws_year = metadata["session_details"][session]["years"][0] % 100

            status_url = self.search_url_template % (laws_year, bill_type, bill_number)
            bill = self.parse_bill_status_page(status_url, bill_url, session, chamber)
        return bill
Ejemplo n.º 8
0
def main():

    argparser = configargparse.ArgumentParser(
        description="AWIS API Proof of Concept")

    argparser.add_argument('--key-id', required=True)
    argparser.add_argument('--secret-key', required=True)
    argparser.add_argument('--sites', required=True, nargs='+')
    args = argparser.parse_args()

    client = AwisApi(args.key_id, args.secret_key)

    tree = client.url_info(args.sites, "Rank", "LinksInCount", "Speed")
    print etree_tostring(tree)

    print "client ns_prefixes: ", client.NS_PREFIXES
    alexa_prefix = client.NS_PREFIXES['alexa']
    awis_prefix = client.NS_PREFIXES['awis']

    elem = tree.find('//{%s}StatusCode' % alexa_prefix)
    assert elem.text == 'Success'

    for elem_result in tree.findall('//{%s}UrlInfoResult' % awis_prefix):
        # print etree_tostring(elem_result)
        print "elem_result tag: %s, text: %s" % (elem_result.tag,
                                                 elem_result.text)

        tree_result = ElementTree(elem_result)
        elem_url = tree_result.find('//{%s}DataUrl' % awis_prefix)
        if elem_url is not None:
            print "elem_url tag: %s, text: %s" % (elem_url.tag, elem_url.text)
        elem_metric = tree_result.find('//{%s}Rank' % awis_prefix)
        if elem_metric is not None:
            print "elem_metric tag: %s, text: %s " % (elem_metric.tag,
                                                      elem_metric.text)
Ejemplo n.º 9
0
    def retrieveMessagesFromFiles(self):
        # We read each file and create one message for each file
        self.messages = []
        self.lineView.get_model().clear()

        for file in self.filesToBeImported:

            from netzob.Common.ResourcesConfiguration import ResourcesConfiguration
            xmlSchemaPath = os.path.join(ResourcesConfiguration.getStaticResources(), "xsds/0.1/common.xsd")
            # If we find a version which validates the XML, we parse with the associated function
            if not Workspace.isSchemaValidateXML(xmlSchemaPath, file):
                logging.error(_("The specified XML file {0} is not valid according to the XSD ({1}).").format(str(file), str(xmlSchemaPath)))
            else:
                logging.debug(_("XML file valid according to the XSD schema"))

                # Parse the XML Document as 0.1 version
                tree = ElementTree()
                tree.parse(file)
                xmlFile = tree.getroot()

                for xmlMessage in xmlFile.findall("{" + Project.COMMON_NAMESPACE + "}message"):
                    message = AbstractMessageFactory.loadFromXML(xmlMessage, Project.COMMON_NAMESPACE, "0.1")
                    logging.debug(_("XML String data: {0}").format(message.getStringData()))
                    self.messages.append(message)
                    self.lineView.get_model().append(None, [str(message.getID()), message.getType(), message.getStringData()])
Ejemplo n.º 10
0
def write_xml(filepath: str, tree: etree.ElementTree) -> None:
    """Write an XML file."""

    with open(filepath, 'wb') as stream:
        tree.write(stream, xml_declaration=True,
                   pretty_print=True,
                   encoding='UTF-8')
Ejemplo n.º 11
0
def draw_frame_by_xml(src_pic, annot_file):
    if not os.path.exists(raw_image_dir + src_pic):
        print('No source image file')
        return
    if not os.path.exists(xml_file_dir + annot_file):
        print('No annotation file')
        return
    if not os.path.exists(tgt_image_dir):
        print('No target image folder')
        return

    tree = ElementTree()
    shutil.copyfile(raw_image_dir + src_pic, tgt_image_dir + src_pic)
    tree.parse(xml_file_dir + annot_file)
    root = tree.getroot()
    while (1):
        obj = root.find('object')
        if obj is None:
            break
        bb = obj.find('bndbox')
        xmin = bb.find('xmin').text
        ymin = bb.find('ymin').text
        xmax = bb.find('xmax').text
        ymax = bb.find('ymax').text
        #print(xmin + ' ' + ymin + ' ' + xmax + ' ' + ymax)
        draw_rectangle(xmin, ymin, xmax, ymax, tgt_image_dir + src_pic)
        root.remove(obj)
Ejemplo n.º 12
0
    def flip(self, dst):
        #Horizontal flip
        size = self.root.find('size')
        W = int(size.find('width').text)
        for obj in self.root.iter('object'):
            print(obj.find('name').text)
            box = obj.find('bndbox')
            rect = []
            rect.append(int(box.find('xmin').text))
            rect.append(int(box.find('ymin').text))
            rect.append(int(box.find('xmax').text))
            rect.append(int(box.find('ymax').text))

            #y' = y
            #x' = w - x
            tmp = rect[2]
            rect[2] = W - rect[0]
            rect[0] = W - tmp
            box.find('xmin').text = str(rect[0])
            box.find('ymin').text = str(rect[1])
            box.find('xmax').text = str(rect[2])
            box.find('ymax').text = str(rect[3])

            newTree = ElementTree(self.root)
            newTree.write(dst, pretty_print=True, xml_declaration=False)
Ejemplo n.º 13
0
 def __init__(self, template: etree.ElementTree, global_decl: Decl):
     self.template = template
     self.locations = [
         Location(location, self)
         for location in template.findall("location")
     ]
     self.transitions = [
         Transition(transition, self)
         for transition in template.findall("transition")
     ]
     self.global_decl = global_decl
     self.declaration = Decl(values=global_decl.values)
     self.executedTransitions = []
     if template.find("declaration") is not None:
         self.declaration.parse(template.find("declaration").text)
     self.systemName = self.name
     self.templateName = self.name
     self.usefull = True
     self.active = True
     self.StoredTemplate = copy.deepcopy(template)
     self.staticFuncs = dict(TimeShiftable=self.modifyTS,
                             TimeShiftableR=self.revertTS,
                             EV=self.modifyEV,
                             EVR=self.revertEV,
                             Battery=self.modifyBattery,
                             BatteryR=self.revertBattery)
     self.neededVars = dict(TimeShiftable=self.tsVars,
                            EV=self.evVars,
                            Battery=self.batteryVars)
     self.dynamic = True
     self.previousVarValues = None
     self.startTime = None  # used to sort EVs
Ejemplo n.º 14
0
def generate_tool_conf(parsed_ctds, tool_conf_destination, galaxy_tool_path, default_category):
    # for each category, we keep a list of models corresponding to it
    categories_to_tools = dict()
    for parsed_ctd in parsed_ctds:
        category = strip(parsed_ctd.ctd_model.opt_attribs.get("category", ""))
        if not category.strip():
            category = default_category
        if category not in categories_to_tools:
            categories_to_tools[category] = []
        categories_to_tools[category].append(utils.get_filename(parsed_ctd.suggested_output_file))
                
    # at this point, we should have a map for all categories->tools
    toolbox_node = Element("toolbox")
    
    if galaxy_tool_path is not None and not galaxy_tool_path.strip().endswith("/"):
        galaxy_tool_path = galaxy_tool_path.strip() + "/"
    if galaxy_tool_path is None:
        galaxy_tool_path = ""
    
    for category, file_names in categories_to_tools.iteritems():
        section_node = add_child_node(toolbox_node, "section")
        section_node.attrib["id"] = "section-id-" + "".join(category.split())
        section_node.attrib["name"] = category
    
        for filename in file_names:
            tool_node = add_child_node(section_node, "tool")
            tool_node.attrib["file"] = galaxy_tool_path + filename

    toolconf_tree = ElementTree(toolbox_node)
    toolconf_tree.write(open(tool_conf_destination,'w'), encoding="UTF-8", xml_declaration=True, pretty_print=True)
    logger.info("Generated Galaxy tool_conf.xml in %s" % tool_conf_destination, 0)
Ejemplo n.º 15
0
def _convert_internal(parsed_ctds, **kwargs):
    # parse all input files into models using CTDopts (via utils)
    # the output is a tuple containing the model, output destination, origin file
    for parsed_ctd in parsed_ctds:
        model = parsed_ctd.ctd_model
        origin_file = parsed_ctd.input_file
        output_file = parsed_ctd.suggested_output_file

        if kwargs["skip_tools"] is not None and model.name in kwargs["skip_tools"]:
            logger.info("Skipping tool %s" % model.name, 0)
            continue
        elif kwargs["required_tools"] is not None and model.name not in kwargs["required_tools"]:
            logger.info("Tool %s is not required, skipping it" % model.name, 0)
            continue
        else:
            logger.info("Converting %s (source %s)" % (model.name, utils.get_filename(origin_file)), 0)
            tool = create_tool(model)
            write_header(tool, model)
            create_description(tool, model)
            expand_macros(tool, model, **kwargs)
            create_command(tool, model, **kwargs)
            create_inputs(tool, model, **kwargs)
            create_outputs(tool, model, **kwargs)
            create_help(tool, model)

            # wrap our tool element into a tree to be able to serialize it
            tree = ElementTree(tool)
            logger.info("Writing to %s" % utils.get_filename(output_file), 1)
            tree.write(open(output_file, 'w'), encoding="UTF-8", xml_declaration=True, pretty_print=True)
Ejemplo n.º 16
0
Archivo: main.py Proyecto: chergert/ddg
def get_info(path):
    e = ET(file = path)
    items = []    
    # hack, it might not always be [1]. it just is on here
    for f in e.getroot()[1]:
        items.insert(0, (f.get('name'), 'file://' + os.path.join(os.path.dirname(path), f.get('link'))))
    return items
Ejemplo n.º 17
0
    def scrape(self, chamber, year):
        year = int(year)
        session = self.getSession(year)
        #2 year terms starting on odd year, so if even number, use the previous odd year
        if year < 1999:
            raise NoDataForPeriod(year)
        if year % 2 == 0:
            year -= 1

        if year == 1999:
            base_bill_url = 'http://data.opi.mt.gov/bills/BillHtml/'
        else:
            base_bill_url = 'http://data.opi.mt.gov/bills/%d/BillHtml/' % year
        index_page = ElementTree(
            lxml.html.fromstring(self.urlopen(base_bill_url)))

        bill_urls = []
        for bill_anchor in index_page.findall('//a'):
            # See 2009 HB 645
            if bill_anchor.text.find("govlineveto") == -1:
                # House bills start with H, Senate bills start with S
                if chamber == 'lower' and bill_anchor.text.startswith('H'):
                    bill_urls.append("%s%s" %
                                     (base_bill_url, bill_anchor.text))
                elif chamber == 'upper' and bill_anchor.text.startswith('S'):
                    bill_urls.append("%s%s" %
                                     (base_bill_url, bill_anchor.text))

        for bill_url in bill_urls:
            bill = self.parse_bill(bill_url, session, chamber)
            self.save_bill(bill)
Ejemplo n.º 18
0
def parse_activity(
    activity: etree.ElementTree
) -> Tuple[List[backend.events.AcademicalEvent], str, str, str]:
    """
    Parses an element from a request into a list of events and some activity information.

    :param activity: the activity element
    :type activity: etree.ElementTree
    :return: the events, the name, the id and the code of this activity
    :rtype: Tuple[List[backend.events.AcademicalEvent], str, str, str]
    """
    activity_id = activity.attrib['name']
    activity_type = activity.attrib['type']
    activity_name = activity.attrib['code']

    event_type = backend.events.extract_type(activity_type, activity_id)
    event_codes = activity.xpath(
        './/eventParticipant[@category="category5"]/@name')
    events = activity.xpath('.//event')
    events_list = list()

    if len(event_codes) == 0:
        activity_code = backend.events.extract_code(activity_id)
    else:
        activity_code = Counter(event_codes).most_common()[0][0]
    if activity_code is '':
        activity_code = 'Other'

    for event in events:
        events_list.append(
            parse_event(event, event_type, activity_name, activity_id,
                        activity_code))

    return events_list, activity_name, activity_id, activity_code
Ejemplo n.º 19
0
    def scrape(self, chamber, year):
        year = int(year)
        session = self.getSession(year)
        # 2 year terms starting on odd year, so if even number, use the previous odd year
        if year < 1999:
            raise NoDataForPeriod(year)
        if year % 2 == 0:
            year -= 1

        if year == 1999:
            base_bill_url = "http://data.opi.mt.gov/bills/BillHtml/"
        else:
            base_bill_url = "http://data.opi.mt.gov/bills/%d/BillHtml/" % year
        index_page = ElementTree(lxml.html.fromstring(self.urlopen(base_bill_url)))

        bill_urls = []
        for bill_anchor in index_page.findall("//a"):
            # See 2009 HB 645
            if bill_anchor.text.find("govlineveto") == -1:
                # House bills start with H, Senate bills start with S
                if chamber == "lower" and bill_anchor.text.startswith("H"):
                    bill_urls.append("%s%s" % (base_bill_url, bill_anchor.text))
                elif chamber == "upper" and bill_anchor.text.startswith("S"):
                    bill_urls.append("%s%s" % (base_bill_url, bill_anchor.text))

        for bill_url in bill_urls:
            bill = self.parse_bill(bill_url, session, chamber)
            self.save_bill(bill)
Ejemplo n.º 20
0
    def parse_bill(self, bill_url, session, chamber):
        bill = None
        bill_page = ElementTree(lxml.html.fromstring(self.urlopen(bill_url)))
        for anchor in bill_page.findall('//a'):
            if (anchor.text_content().startswith('status of')
                    or anchor.text_content().startswith(
                        'Detailed Information (status)')):
                status_url = anchor.attrib['href'].replace("\r", "").replace(
                    "\n", "")
                bill = self.parse_bill_status_page(status_url, bill_url,
                                                   session, chamber)
            elif anchor.text_content().startswith('This bill in WP'):
                index_url = anchor.attrib['href']
                index_url = index_url[0:index_url.rindex('/')]
                # this looks weird.  See http://data.opi.mt.gov/bills/BillHtml/SB0002.htm for why
                index_url = index_url[index_url.rindex("http://"):]
                self.add_bill_versions(bill, index_url)

        if bill is None:
            # No bill was found.  Maybe something like HB0790 in the 2005 session?
            # We can search for the bill metadata.
            page_name = bill_url.split("/")[-1].split(".")[0]
            bill_type = page_name[0:2]
            bill_number = page_name[2:]
            laws_year = self.metadata['session_details'][session]['years'][
                0] % 100

            status_url = self.search_url_template % (laws_year, bill_type,
                                                     bill_number)
            bill = self.parse_bill_status_page(status_url, bill_url, session,
                                               chamber)
        return bill
Ejemplo n.º 21
0
def getdescendants(request, code):
    params = {}
    results = {}
    
    language = request.LANGUAGE_CODE.lower()
    if language == 'pt-br':
        language = 'pt'

    for lang in DECS_LANGS:
        params[lang] = urllib.urlencode({
            'tree_id': code or '',
            'lang': lang,
            })

        resource = urllib.urlopen(settings.DECS_SERVICE, params[lang])

        tree = ElementTree()
        tree.parse(resource)

        descendants = tree.findall('decsws_response/tree/descendants/term_list[@lang="%s"]/term' % lang)
        for d in descendants:
            if d.attrib['tree_id'] in results:
                results[ d.attrib['tree_id'] ] += ',"%s":"%s"' % (lang,d.text.capitalize())
            else:
                results[ d.attrib['tree_id'] ] = '"%s":"%s"' % (lang,d.text.capitalize())

    json = '[%s]' % ','.join((JSON_MULTILINGUAL_TERM % (id,desc) for desc,id in results.items()))
    json_response = json_loads(json)
    json_response.sort(key=lambda x: x['fields']['description'][language])
        
    return HttpResponse(json_dumps(json_response), mimetype='application/json')
Ejemplo n.º 22
0
Archivo: Menu.py Proyecto: KurSh/netzob
    def exportProjectAction(self, widget, data):
        chooser = gtk.FileChooserDialog(title=_("Export as (XML)"), action=gtk.FILE_CHOOSER_ACTION_SAVE,
                                        buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL, gtk.STOCK_OPEN, gtk.RESPONSE_OK))
        res = chooser.run()
        if res == gtk.RESPONSE_OK:
            fileName = chooser.get_filename()
        chooser.destroy()

        doCreateFile = False
        isFile = os.path.isfile(fileName)
        if not isFile:
            doCreateFile = True
        else:
            md = gtk.MessageDialog(None,
                                   gtk.DIALOG_DESTROY_WITH_PARENT, gtk.MESSAGE_QUESTION,
                                   gtk.BUTTONS_OK_CANCEL, _("Are you sure to override the file '{0}'?").format(fileName))
            resp = md.run()
            md.destroy()
            if resp == gtk.RESPONSE_OK:
                doCreateFile = True

        if doCreateFile:
            root = self.netzob.getCurrentProject().generateXMLConfigFile()
            tree = ElementTree(root)
            tree.write(fileName)
            NetzobInfoMessage(_("Project correctly exported to '{0}'").format(fileName))
Ejemplo n.º 23
0
    def save(self, filePath, setAllReferences=True):
        if setAllReferences:
            self.setAllReferences()

        tree = XMLElementTree(self.toXML())
        indent(tree, space="    ")
        tree.write(filePath, xml_declaration=True, encoding="utf-8", pretty_print=True)
Ejemplo n.º 24
0
def loadProject_0_1(projectFile):
    # Parse the XML Document as 0.1 version
    tree = ElementTree()

    tree.parse(projectFile)

    xmlProject = tree.getroot()

    # Register the namespace
    etree.register_namespace("netzob", PROJECT_NAMESPACE)
    etree.register_namespace("netzob-common", COMMON_NAMESPACE)

    projectID = str(xmlProject.get("id"))
    projectName = xmlProject.get("name", "none")
    projectCreationDate = TypeConvertor.xsdDatetime2PythonDatetime(xmlProject.get("creation_date"))
    projectPath = xmlProject.get("path")
    project = Project(projectID, projectName, projectCreationDate, projectPath)

    description = xmlProject.get("description")
    project.setDescription(description)

    # Parse the configuration
    if xmlProject.find("{" + PROJECT_NAMESPACE + "}configuration") is not None:
        projectConfiguration = ProjectConfiguration.loadProjectConfiguration(
            xmlProject.find("{" + PROJECT_NAMESPACE + "}configuration"), PROJECT_NAMESPACE, "0.1"
        )
        project.setConfiguration(projectConfiguration)

    # Parse the vocabulary
    if xmlProject.find("{" + PROJECT_NAMESPACE + "}vocabulary") is not None:
        projectVocabulary = Vocabulary.loadVocabulary(
            xmlProject.find("{" + PROJECT_NAMESPACE + "}vocabulary"),
            PROJECT_NAMESPACE,
            COMMON_NAMESPACE,
            "0.1",
            project,
        )
        project.setVocabulary(projectVocabulary)

    # Parse the grammar
    if xmlProject.find("{" + PROJECT_NAMESPACE + "}grammar") is not None:
        projectGrammar = Grammar.loadGrammar(
            xmlProject.find("{" + PROJECT_NAMESPACE + "}grammar"), projectVocabulary, PROJECT_NAMESPACE, "0.1"
        )
        if projectGrammar is not None:
            project.setGrammar(projectGrammar)

    # Parse the simulator
    if xmlProject.find("{" + PROJECT_NAMESPACE + "}simulator") is not None:
        projectSimulator = Simulator.loadSimulator(
            xmlProject.find("{" + PROJECT_NAMESPACE + "}simulator"),
            PROJECT_NAMESPACE,
            "0.1",
            project.getGrammar().getAutomata(),
            project.getVocabulary(),
        )
        if projectSimulator is not None:
            project.setSimulator(projectSimulator)

    return project
Ejemplo n.º 25
0
    def scrape(self, chamber, session):
        for term in self.metadata['terms']:
            if session in term['sessions']:
                year = term['start_year']
                break

        self.versions_dict = self._versions_dict(year)

        base_bill_url = 'http://data.opi.mt.gov/bills/%d/BillHtml/' % year
        index_page = ElementTree(
            lxml.html.fromstring(self.urlopen(base_bill_url)))

        bill_urls = []
        for bill_anchor in index_page.findall('//a'):
            # See 2009 HB 645
            if bill_anchor.text.find("govlineveto") == -1:
                # House bills start with H, Senate bills start with S
                if chamber == 'lower' and bill_anchor.text.startswith('H'):
                    bill_urls.append("%s%s" %
                                     (base_bill_url, bill_anchor.text))
                elif chamber == 'upper' and bill_anchor.text.startswith('S'):
                    bill_urls.append("%s%s" %
                                     (base_bill_url, bill_anchor.text))

        for bill_url in bill_urls:
            bill = self.parse_bill(bill_url, session, chamber)
            if bill:
                self.save_bill(bill)
Ejemplo n.º 26
0
    def scrape(self, chamber, session):
        for term in self.metadata['terms']:
            if session in term['sessions']:
                year = term['start_year']
                break

        self.versions_dict = self._versions_dict(year)

        base_bill_url = 'http://leg.mt.gov/bills/%d/BillHtml/' % year
        index_page = ElementTree(lxml.html.fromstring(self.get(base_bill_url).text))

        bill_urls = []
        for bill_anchor in index_page.findall('//a'):
            # See 2009 HB 645
            if bill_anchor.text.find("govlineveto") == -1:
                # House bills start with H, Senate bills start with S
                if chamber == 'lower' and bill_anchor.text.startswith('H'):
                    bill_urls.append("%s%s" % (base_bill_url, bill_anchor.text))
                elif chamber == 'upper' and bill_anchor.text.startswith('S'):
                    bill_urls.append("%s%s" % (base_bill_url, bill_anchor.text))

        for bill_url in bill_urls:
            bill = self.parse_bill(bill_url, session, chamber)
            if bill:
                self.save_bill(bill)
Ejemplo n.º 27
0
 def __str__ (self) :
     tree = ElementTree ()
     tree._setroot (Element ('Header'))
     r = tree.getroot ()
     for e in self.header ('HUHU') :
         r.append (e)
     return tostring (tree, pretty_print = True, encoding = 'unicode')
Ejemplo n.º 28
0
    def saveConfigFile(self, overrideTraces=[]):
        """This functions allows to save the current (and only)
        instance of the Workspace. You can supply a list of traces
        that should be written on-disk through the `overrideTraces`
        variable. This allows to override specific traces that where
        modified.

        :param overrideTraces: a list of trace identifiers that should
        be written on-disk, even if they already exists.
        """

        workspaceFile = os.path.join(self.path, Workspace.CONFIGURATION_FILENAME)

        logging.info("Save the config file of the workspace {0} in {1}".format(self.getName(), workspaceFile))

        # Register the namespace
        etree.register_namespace('netzob', WORKSPACE_NAMESPACE)
        etree.register_namespace('netzob-common', COMMON_NAMESPACE)

        # Dump the file
        root = etree.Element("{" + WORKSPACE_NAMESPACE + "}workspace")
        root.set("creation_date", TypeConvertor.pythonDatetime2XSDDatetime(self.getCreationDate()))
        root.set("name", str(self.getName()))

        xmlWorkspaceConfig = etree.SubElement(root, "{" + WORKSPACE_NAMESPACE + "}configuration")

        relTracePath = os.path.relpath(self.getPathOfTraces(), self.path)
        xmlTraces = etree.SubElement(xmlWorkspaceConfig, "{" + WORKSPACE_NAMESPACE + "}traces")
        xmlTraces.text = str(self.getPathOfTraces())

        xmlLogging = etree.SubElement(xmlWorkspaceConfig, "{" + WORKSPACE_NAMESPACE + "}logging")
        xmlLogging.text = str(self.getPathOfLogging())

        xmlPrototypes = etree.SubElement(xmlWorkspaceConfig, "{" + WORKSPACE_NAMESPACE + "}prototypes")
        xmlPrototypes.text = str(self.getPathOfPrototypes())

        xmlPrototypes = etree.SubElement(xmlWorkspaceConfig, "{" + WORKSPACE_NAMESPACE + "}enable_bug_reporting")
        xmlPrototypes.text = str(self.enableBugReporting).lower()

        xmlWorkspaceProjects = etree.SubElement(root, "{" + WORKSPACE_NAMESPACE + "}projects")
        for projectPath in self.getProjectsPath():
            xmlProject = etree.SubElement(xmlWorkspaceProjects, "{" + WORKSPACE_NAMESPACE + "}project")
            xmlProject.set("path", projectPath)

        xmlWorkspaceImported = etree.SubElement(root, "{" + WORKSPACE_NAMESPACE + "}traces")
        for importedTrace in self.getImportedTraces():
            # overrideTraces variable contains the list of
            # ImportedTraces that should be overriden. This is useful
            # in case of message removal for example.
            forceOverride = (importedTrace.id in overrideTraces)

            importedTrace.save(xmlWorkspaceImported, WORKSPACE_NAMESPACE, COMMON_NAMESPACE,
                               os.path.join(self.path, self.getPathOfTraces()), forceOverride)

        xmlWorkspaceFunctions = etree.SubElement(root, "{" + WORKSPACE_NAMESPACE + "}functions")
        for function in self.getCustomFunctions():
            function.save(xmlWorkspaceFunctions, WORKSPACE_NAMESPACE)

        tree = ElementTree(root)
        tree.write(workspaceFile, pretty_print=True)
Ejemplo n.º 29
0
 def save(self, filePath):
     tree = XMLElementTree(self.toXML())
     indent(tree, space="    ")
     tree.write(filePath,
                xml_declaration=True,
                encoding="utf-8",
                pretty_print=True)
Ejemplo n.º 30
0
def print_predictions(agency, stops, label=""):
    title_index = build_title_index(stops)
    url = build_url(agency, stops)

    debug("NextBus predictions for %s: %s" % (agency, url))
    f = urllib.urlopen(url)

    e = ElementTree(file=f)

    predictions = e.findall("//predictions")
    predictions = filter(lambda el: el.find(".//prediction") is not None, predictions)
    predictions.sort(key=lambda el: el.find(".//prediction").get("epochTime"))

    for n, p in enumerate(predictions):
        routeTag = p.get("routeTag")
        stopTag = p.get("stopTag")
        title = title_index.get((routeTag, stopTag), False)
        if title:
            title = "<em>%s</em>" % (title.replace("\n", "<br>"), )
        else:
            title = p.get("routeTitle")
            title = re.sub(r'^Saferide ', '', title)
            title = label + title

        print "<h2>"+title+"</h2>"
        times = p.findall(".//prediction")
        print "<ol class='predictions'>"
        print '<li>%s</li>' % minutes(times.pop(0).get("minutes"))
        for t in times[0:2]:
            print '<li>%s</li>' % minutes(t.get("minutes"))
        print "</ol>"
Ejemplo n.º 31
0
def get_chapters(request):
    params = {}
    results = {}

    language = "pt"
    #    language = request.LANGUAGE_CODE.lower()
    #    if language == 'pt-br':
    #        language = 'pt'

    params = urllib.urlencode({"LI": "CAPITULO"})

    resource = urllib.urlopen(settings.ICD10_SERVICE, params)

    tree = ElementTree()
    tree.parse(resource)

    terms = tree.findall("cid10ws_response")

    data = []
    for term in terms:
        description = {}
        chapter = term.findall("tree/self/term_list/term")[0]

        for lang in ICD10_LANGS:
            term_trans = term.findall('record_list/record/descriptor_list/descriptor[@lang="%s"]' % lang)[0]
            if term_trans.text:
                description[lang] = "%s - %s" % (chapter.attrib["chapter"], term_trans.text.strip().capitalize())

        data.append({"fields": {"description": description, "label": chapter.attrib["tree_id"]}})

    return HttpResponse(json.dumps(data), mimetype="application/json")
Ejemplo n.º 32
0
    def dumpVOCAnnotations(output_folder, filename, size, names,
                           bounding_boxes):
        node_root = Element('annotation')

        SubElement(node_root, 'folder').text = 'images'
        SubElement(node_root, 'filename').text = filename + '.jpg'
        SubElement(node_root,
                   'path').text = os.path.join(output_folder, 'images')

        node_size = SubElement(node_root, 'size')
        SubElement(node_size, 'width').text = str(size[1])
        SubElement(node_size, 'height').text = str(size[0])
        SubElement(node_size, 'depth').text = str(size[2])

        SubElement(node_root, 'segmented').text = '0'

        for name, bb in zip(names, bounding_boxes):
            node_object = SubElement(node_root, 'object')
            SubElement(node_object, 'name').text = name
            SubElement(node_object, 'pose').text = 'Unspecified'
            SubElement(node_object, 'truncated').text = '0'
            SubElement(node_object, 'difficult').text = '0'

            node_bndbox = SubElement(node_object, 'bndbox')
            SubElement(node_bndbox, 'xmin').text = str(bb[0])
            SubElement(node_bndbox, 'ymin').text = str(bb[1])
            SubElement(node_bndbox, 'xmax').text = str(bb[2])
            SubElement(node_bndbox, 'ymax').text = str(bb[3])

        tree = ElementTree(node_root)
        tree.write(os.path.join(output_folder, 'annotations', filename) +
                   '.xml',
                   pretty_print=True)
Ejemplo n.º 33
0
    def scrape_pre_2003_legislators(self, chamber, year, session, suffix):
        url = 'http://leg.mt.gov/css/Sessions/%d%s/legname.asp' % (session, suffix)
        legislator_page = ElementTree(lxml.html.fromstring(self.urlopen(url)))

        if year == 2001:
            if chamber == 'upper':
                tableName = '57th Legislatore Roster Senate (2001-2002)'
                startRow = 3
            else:
                tableName = '57th Legislator Roster (House)(2001-2002)'
                startRow = 5
        elif year == 1999:
            if chamber == 'upper':
                tableName = 'Members of the Senate'
                startRow = 3
            else:
                tableName = 'Members of the House'
                startRow = 5

        for table in legislator_page.xpath("//table"):
            if table.attrib.has_key('name') and table.attrib['name'] == tableName:
                parse_names = False
                for row in table.getchildren():
                    if row.tag != 'tr':
                        continue
                    celldata = row.getchildren()[0].text_content().strip()
                    if parse_names and len(celldata) != 0:
                        name, party_letter = celldata.rsplit(' (', 1)
                        party_letter = party_letter[0]

                        nameParts = [namePart.strip() for namePart in name.split(',')]
                        assert len(nameParts) < 4
                        if len(nameParts) == 2:
                            last_name, first_name = nameParts
                        elif len(nameParts) == 3:
                            last_name = ' '.join(nameParts[0:2])
                            first_name = nameParts[2]
                        else:
                            name, party_letter = celldata.rsplit(' (', 1)

                        district = row.getchildren()[2].text_content().strip()

                        if party_letter == 'R':
                            party = 'Republican'
                        elif party_letter == 'D':
                            party = 'Democrat'
                        else:
                            party = party_letter

                        legislator = Legislator(session, chamber, district, '%s %s' % (first_name, last_name), \
                                                first_name, last_name, '', party)
                        legislator.add_source(url)
                        self.save_legislator(legislator)

                    if celldata == "Name (Party)":
                        # The table headers seem to vary in size, but the last row
                        # always seems to start with 'Name (Party)' -- once we find
                        # that, start parsing legislator names
                        parse_names = True
Ejemplo n.º 34
0
def serialize_browse_layers(browse_layers, stream=None, pretty_print=False):
    if not stream:
        stream = StringIO()
    browse_layers_elem = Element(ns_cfg("browseLayers"),
                                 nsmap={"cfg": ns_cfg.uri})

    for browse_layer in browse_layers:
        bl_elem = SubElement(browse_layers_elem,
                             ns_cfg("browseLayer"),
                             attrib={"browseLayerId": browse_layer.id})

        rgb = browse_layer.r_band, browse_layer.g_band, browse_layer.b_band
        has_rgb = len(filter(lambda v: v is not None, rgb)) == 3

        ri = browse_layer.radiometric_interval_min, browse_layer.radiometric_interval_max
        has_ri = len(filter(lambda v: v is not None, ri)) == 2

        SubElement(bl_elem,
                   ns_cfg("browseType")).text = browse_layer.browse_type
        SubElement(bl_elem, ns_cfg("title")).text = browse_layer.title
        if browse_layer.description is not None:
            SubElement(bl_elem,
                       ns_cfg("description")).text = browse_layer.description
        SubElement(bl_elem, ns_cfg("grid")).text = browse_layer.grid
        SubElement(bl_elem, ns_cfg(
            "browseAccessPolicy")).text = browse_layer.browse_access_policy
        SubElement(bl_elem, ns_cfg("hostingBrowseServerName")).text = ""
        rel_ds_elem = SubElement(bl_elem, ns_cfg("relatedDatasetIds"))
        for rel_ds_id in browse_layer.related_dataset_ids:
            SubElement(rel_ds_elem, ns_cfg("datasetId")).text = rel_ds_id
        SubElement(
            bl_elem, ns_cfg("containsVerticalCurtains")
        ).text = "true" if browse_layer.contains_vertical_curtains else "false"
        if has_rgb:
            SubElement(bl_elem,
                       ns_cfg("rgbBands")).text = ",".join(map(str, rgb))

        if has_ri:
            ri_elem = SubElement(bl_elem, ns_cfg("radiometricInterval"))
            SubElement(ri_elem, ns_cfg("min")).text = str(ri[0])
            SubElement(ri_elem, ns_cfg("max")).text = str(ri[1])

        SubElement(bl_elem, ns_cfg("highestMapLevel")).text = str(
            browse_layer.highest_map_level)
        SubElement(bl_elem, ns_cfg("lowestMapLevel")).text = str(
            browse_layer.lowest_map_level)
        SubElement(bl_elem, ns_cfg("timeDimensionDefault")).text = str(
            browse_layer.timedimension_default)
        SubElement(bl_elem, ns_cfg("tileQueryLimit")).text = str(
            browse_layer.tile_query_limit)

    # TODO: encoding
    et = ElementTree(browse_layers_elem)
    et.write(stream,
             pretty_print=pretty_print,
             encoding="utf-8",
             xml_declaration=True)

    return stream
Ejemplo n.º 35
0
    def scrape_pre_58_legislators(self, chamber, term, suffix):
        url = 'http://leg.mt.gov/css/Sessions/%s%s/legname.asp' % (term, suffix)
        legislator_page = ElementTree(lxml.html.fromstring(self.urlopen(url)))

        if term == '57':
            if chamber == 'upper':
                tableName = '57th Legislatore Roster Senate (2001-2002)'
                startRow = 3
            else:
                tableName = '57th Legislator Roster (House)(2001-2002)'
                startRow = 5
        elif term == '56':
            if chamber == 'upper':
                tableName = 'Members of the Senate'
                startRow = 3
            else:
                tableName = 'Members of the House'
                startRow = 5

        for table in legislator_page.xpath("//table"):
            if table.attrib.has_key('name') and table.attrib['name'] == tableName:
                parse_names = False
                for row in table.getchildren():
                    if row.tag != 'tr':
                        continue
                    celldata = row.getchildren()[0].text_content().strip()
                    if parse_names and len(celldata) != 0:
                        name, party_letter = celldata.rsplit(' (', 1)
                        party_letter = party_letter[0]

                        nameParts = [namePart.strip() for namePart in name.split(',')]
                        assert len(nameParts) < 4
                        if len(nameParts) == 2:
                            last_name, first_name = nameParts
                        elif len(nameParts) == 3:
                            last_name = ' '.join(nameParts[0:2])
                            first_name = nameParts[2]
                        else:
                            name, party_letter = celldata.rsplit(' (', 1)

                        district = row.getchildren()[2].text_content().strip()

                        if party_letter == 'R':
                            party = 'Republican'
                        elif party_letter == 'D':
                            party = 'Democrat'
                        else:
                            party = party_letter

                        legislator = Legislator(term, chamber, district, '%s %s' % (first_name, last_name), \
                                                first_name, last_name, '', party)
                        legislator.add_source(url)
                        self.save_legislator(legislator)

                    if celldata == "Name (Party)":
                        # The table headers seem to vary in size, but the last row
                        # always seems to start with 'Name (Party)' -- once we find
                        # that, start parsing legislator names
                        parse_names = True
Ejemplo n.º 36
0
    def parse_bill(self, bill_url, session, chamber):

        # Temporarily skip the differently-formatted house budget bill.
        if 'billhtml/hb0002.htm' in bill_url.lower():
            return

        bill = None
        try:
            doc = lxml.html.fromstring(self.get(bill_url).text)
        except XMLSyntaxError as e:
            self.logger.warning("Got %r while parsing %r" % (e, bill_url))
            return
        bill_page = ElementTree(doc)

        for anchor in bill_page.findall('//a'):
            if (anchor.text_content().startswith('status of')
                    or anchor.text_content().startswith(
                        'Detailed Information (status)')):
                status_url = anchor.attrib['href'].replace("\r", "").replace(
                    "\n", "")
                bill = self.parse_bill_status_page(status_url, bill_url,
                                                   session, chamber)

        if bill is None:
            # No bill was found.  Maybe something like HB0790 in the 2005 session?
            # We can search for the bill metadata.
            page_name = bill_url.split("/")[-1].split(".")[0]
            bill_type = page_name[0:2]
            bill_number = page_name[2:]
            laws_year = self.metadata['session_details'][session]['years'][
                0] % 100

            status_url = self.search_url_template % (laws_year, bill_type,
                                                     bill_number)
            bill = self.parse_bill_status_page(status_url, bill_url, session,
                                               chamber)

        # Get versions on the detail page.
        versions = [a['action'] for a in bill['actions']]
        versions = [a for a in versions if 'Version Available' in a]
        if not versions:
            version_name = 'Introduced'
        else:
            version = versions.pop()
            if 'New Version' in version:
                version_name = 'Amended'
            elif 'Enrolled' in version:
                version_name = 'Enrolled'

        self.add_other_versions(bill)

        # Add pdf.
        url = set(bill_page.xpath('//a/@href[contains(., "BillPdf")]')).pop()
        bill.add_version(version_name, url, mimetype='application/pdf')

        # Add status url as a source.
        bill.add_source(status_url)

        return bill
Ejemplo n.º 37
0
def main(input_file, output_file):
    root = ElementTree().parse(input_file)
    for screen in root.findall('screen'):
        print("====", screen.get('name'))
        process(screen)
        for widget in screen:
            process(widget)
    ElementTree(root).write(output_file)
Ejemplo n.º 38
0
    def __init__(self, path):
        et = ElementTree()
        parser = XMLParser(remove_comments=True, strip_cdata=True)
        self.__doc = et.parse(path, parser=parser)

        if self.__doc is None:
            raise PomLoadingException(
                "Failed to load pom.xml. You have a problem")
Ejemplo n.º 39
0
def lxml_get_files(page_response):
    tree = ElementTree(file=io.BytesIO(page_response.content),
                       parser=HTMLParser()).getroot()
    all_a_elements = tree.cssselect('pre a')
    dir_and_files_list = [i.get("href") for i in all_a_elements]

    file_list = [i for i in dir_and_files_list if not i.endswith("/")]
    return file_list
Ejemplo n.º 40
0
def dump_xml(inf, outf, pretty_print=True):
    root = Element("wild")
    game = to_xml(inf, "firered")  # XXX
    root.append(game)
    outf.write("""<?xml version="1.0" encoding="utf-8"?>\n""")
    xml = ElementTree(root)

    xml.write(outf, pretty_print=pretty_print)
Ejemplo n.º 41
0
def parse_catalog_ref(element: etree.ElementTree):
    xlink_ns = element.nsmap.get("xlink")
    href = element.get("{%s}href" % xlink_ns)
    title = element.get("{%s}title" % xlink_ns)
    id = element.get("ID")
    if id is None:
        id = title
    return DotDict(href=href, title=title, id=id)
Ejemplo n.º 42
0
    def _get_asserts(self, content: etree.ElementTree) -> List[Dict[str, Any]]:
        """ Получение списка проверок Schematron-выражений. """
        assertions = content.findall('.//xs:appinfo', namespaces=content.nsmap)
        assert_list = []

        for assertion in assertions:
            for pattern in assertion:
                name = pattern.attrib.get('name', None)
                if not name:
                    continue

                for rule in pattern:
                    context = rule.attrib['context']

                    # Пропуск проверок, родительский элемент
                    # которых может не встречаться, minOccurs=0
                    occurs_elements = assertion.xpath(
                        f'ancestor::*[@minOccurs=0]')
                    if len(occurs_elements):
                        continue

                    # Проверка, присутствует ли контекст в xml файле
                    if len(self.xml_content.xpath(f'//{context}')) == 0:
                        # Не найден контекст в xml файле

                        # Пропуск опциональных проверок, choice
                        choice_elements = assertion.xpath(f'ancestor::xs:choice',
                                                          namespaces=content.nsmap)
                        if len(choice_elements):
                            # Опциональная проверка, пропускаем
                            continue

                        # Пропуск опциональных проверок, minOccurs="0"
                        is_optional = True
                        min_occurs = content.xpath(f'//xs:element[@name="{context}"]/@minOccurs',
                                                   namespaces=content.nsmap)
                        for occur_attrib in min_occurs:
                            # Ошибка, проверка обязательна, контекст не найден
                            if occur_attrib != '0':
                                is_optional = False
                                break

                        # Ошибка, проверка обязательна, контекст не найден
                        if not is_optional:
                            raise ContextError(context, self.filename)

                    for sch_assert in rule:
                        for error_node in sch_assert:
                            error = self._get_error(error_node)

                            assert_list.append({
                                'name':     name,
                                'assert':   sch_assert.attrib['test'],
                                'context':  context,
                                'error': error
                            })

        return assert_list
Ejemplo n.º 43
0
    def parse_bill(self, bill_url, session, chamber):

        # Temporarily skip the differently-formatted house budget bill.
        if "/2011/billhtml/hb0002.htm" in bill_url.lower():
            return

        bill = None
        try:
            doc = lxml.html.fromstring(self.urlopen(bill_url))
        except XMLSyntaxError as e:
            self.logger.warning("Got %r while parsing %r" % (e, bill_url))
            return
        bill_page = ElementTree(doc)

        for anchor in bill_page.findall("//a"):
            if anchor.text_content().startswith("status of") or anchor.text_content().startswith(
                "Detailed Information (status)"
            ):
                status_url = anchor.attrib["href"].replace("\r", "").replace("\n", "")
                bill = self.parse_bill_status_page(status_url, bill_url, session, chamber)

        if bill is None:
            # No bill was found.  Maybe something like HB0790 in the 2005 session?
            # We can search for the bill metadata.
            page_name = bill_url.split("/")[-1].split(".")[0]
            bill_type = page_name[0:2]
            bill_number = page_name[2:]
            laws_year = self.metadata["session_details"][session]["years"][0] % 100

            status_url = self.search_url_template % (laws_year, bill_type, bill_number)
            bill = self.parse_bill_status_page(status_url, bill_url, session, chamber)

        # Get versions on the detail page.
        versions = [a["action"] for a in bill["actions"]]
        versions = [a for a in versions if "Version Available" in a]
        if not versions:
            version_name = "Introduced"
        else:
            version = versions.pop()
            if "New Version" in version:
                version_name = "Amended"
            elif "Enrolled" in version:
                version_name = "Enrolled"

        self.add_other_versions(bill)

        # Add html.
        bill.add_version(version_name, bill_url, mimetype="text/html")

        # Add pdf.
        url = set(bill_page.xpath('//a/@href[contains(., "BillPdf")]')).pop()
        bill.add_version(version_name, url, mimetype="application/pdf")

        # Add status url as a source.
        bill.add_source(status_url)

        return bill
Ejemplo n.º 44
0
def remove_circle(tree: etree.ElementTree):
    """
    Remove the circle.
    """
    root = tree.getroot()
    for child in tree.findall("{http://www.w3.org/2000/svg}circle"):
        if 'id' in child.attrib and child.attrib['id'] == "circle":
            root.remove(child)
            break
Ejemplo n.º 45
0
def dict_to_xml(obj):
    """
    Return a xml representation of the given dictionary:
    1.  keys of the dictionary become sublements.
    2.  if a value is a list, then key is a set of sublements.
    3.  keys starting with '@' became an attribute.

    {'duck': {'birth_date': '1934-06-04T00:00:00',
              'created_by': {'@href': 'http://en.wikipedia.org/wiki/Walt_Disney',
                             'cryopreserved': True,
                             'name': 'Walt Disney'},
              'family': {'nephew': [{'name': 'Huey'},
                                    {'name': 'Dewey'},
                                    {'name': 'Louie'}],
                         'children': [],
                         'uncles': {'uncle': [{'name': 'Scrooge McDuck'},
                                              {'name': 'Ludwig Von Drake'}]}},
              'first_film': None,
              'last_film': None,
              'name': 'Donald',
              'species': {'@href': 'http://en.wikipedia.org/wiki/Pekin_duck'}}
    }

    <?xml version="1.0" encoding="UTF-8"?>
    <duck>
        <name>Donald</name>
        <family>
            <children />
            <nephew><name>Huey</name></nephew>
            <nephew><name>Dewey</name></nephew>
            <nephew><name>Louie</name></nephew>
            <uncles>
                <uncle><name>Scrooge McDuck</name></uncle>
                <uncle><name>Ludwig Von Drake</name></uncle>
            </uncles>
        </family>
        <last_film />
        <first_film />
        <created_by href="http://en.wikipedia.org/wiki/Walt_Disney">
            <cryopreserved>True</cryopreserved>
            <name>Walt Disney</name>
        </created_by>
        <birth_date>1934-06-04T00:00:00</birth_date>
        <species href="http://en.wikipedia.org/wiki/Pekin_duck" />
    </duck>
    """
    if not obj:
        return

    # top level dictionary must contain a single entry
    # corresponding to the root element
    key, value = obj.popitem()

    root = etree.Element(key)
    element_for_value(value, root)
    return (b'<?xml version="1.0" encoding="UTF-8"?>' +
            etree.tostring(root, encoding='utf-8'))
Ejemplo n.º 46
0
def xml(data, xmlFileName):
    from lxml.etree import ElementTree, Element, SubElement, Comment, tostring
    from collections import OrderedDict

    xsi = 'http://www.w3.org/2001/XMLSchema-instance'
    noNamespaceSchemaLocation = "{%s}noNamespaceSchemaLocation" % xsi
    doc = Element('transcript',
                  {noNamespaceSchemaLocation: 'transcript_new.xsd'})

    #, { 'xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance',
    #                              'xsi:noNamespaceSchemaLocation': 'transcript_new.xsd' })
    head = SubElement(doc, 'head')
    recording = SubElement(head, 'recording')
    annotations = SubElement(head, 'annotations')
    annotation_id = 'transcript_manual'
    annotation = SubElement(annotations, 'annotation', {'id': annotation_id})
    speakers = SubElement(head, 'speakers')
    speakerSet = data['speaker'] if 'speaker' in data else set()
    programSet = set()
    body = SubElement(doc, 'body')
    segments = SubElement(body, 'segments', {'annotation_id': annotation_id})
    programId = data['id']
    wordCount = 0
    for i, e in enumerate(data['turn']):
        tokens = e.text.split()
        startTime = e.startTime
        endTime = e.endTime
        averageWordDuration = (e.endTime - e.startTime) / len(tokens)
        speakerName = e.speaker if e.speaker else "{}_unknown_{}".format(
            programId, i)
        if speakerName not in speakerSet:
            speaker = SubElement(
                speakers, 'speaker',
                OrderedDict([('id', speakerName), ('name', speakerName)]))
            speakerSet.add(speaker)
        segment = SubElement(
            segments, 'segment',
            OrderedDict([('id', "{}_utt_{}".format(programId, i)),
                         ('starttime', str(startTime)),
                         ('endtime', str(endTime)),
                         ('AWD', "{:2f}".format(averageWordDuration)),
                         ('PMER', "0.0"), ('WMER', "0.0"),
                         ('who', speakerName)]))
        for word in tokens:
            element = SubElement(
                segment, 'element',
                OrderedDict([('id', "{}_w{}".format(programId, wordCount)),
                             ('type', 'word')]))
            element.text = word
            wordCount += 1

    tree = ElementTree(doc)
    tree.write(xmlFileName,
               encoding='utf-8',
               xml_declaration=True,
               pretty_print=True)
Ejemplo n.º 47
0
def load(pom_path):
    et = ElementTree()
    parser = XMLParser(remove_comments=True, strip_cdata=True)
    try:
        doc = et.parse(pom_path, parser=parser)
    except IOError:
        raise PomLoadingException("Cannot read file {f}".format(f=pom_path))

    if doc is None:
        raise PomLoadingException("Failed to load {f}".format(f=pom_path))
    return doc
    def produce_settings_file(self, printer):
        printer_name = printer['name']

        tree = ElementTree()
        tree.parse(
            StringIO(
                XML_TEMPLATE.replace('\n', '').replace('\r',
                                                       '').replace('\t', '')))

        name_node = tree.find('name')
        name_node.text = 'AirPrint %s @ %%h' % printer_name

        service_node = tree.find('service')

        port_node = service_node.find('port')
        port_node.text = '%d' % printer['port']

        host = printer['host']
        if host:
            if self.dnsdomain:
                pair = host.rsplit('.', 1)
                if len(pair) > 1:
                    host = '.'.join((pair[0], self.dnsdomain))
            service_node.append(self.new_node('host-name', host))

        txt = printer['txt']
        for key in txt:
            if self.adminurl or key != 'adminurl':
                service_node.append(
                    self.new_txtrecord_node('%s=%s' % (key, txt[key])))

        source = printer['SOURCE'] if printer.has_key('SOURCE') else ''

        fname = '%s%s%s.service' % (self.prefix, '%s-' % source
                                    if len(source) > 0 else '', printer_name)

        if self.directory:
            fname = os.path.join(self.directory, fname)

        f = open(fname, 'w')

        if etree:
            tree.write(f,
                       pretty_print=True,
                       xml_declaration=True,
                       encoding="UTF-8")
        else:
            xmlstr = tostring(tree.getroot())
            doc = parseString(xmlstr)
            dt = minidom.getDOMImplementation('').createDocumentType(
                'service-group', None, 'avahi-service.dtd')
            doc.insertBefore(dt, doc.documentElement)
            doc.writexml(f)
        f.close()

        if self.verbose:
            src = source if len(source) > 0 else 'unknown'
            sys.stderr.write('Created from %s: %s%s' %
                             (src, fname, os.linesep))
Ejemplo n.º 49
0
    def from_wordpress_xml_file(cls, filename):
        x = ElementTree(file=filename)

        new_importer = WordpressImporter()
        new_importer.get_original_blog_address(x)
        new_importer.posts = [new_importer.convert_posts(post) for post in x.getiterator() if cls.is_valid_post(post)]
        new_importer.pages = [new_importer.convert_posts(page) for page in x.getiterator() if cls.is_valid_page(page)]

        cls.sort_posts_by_date(new_importer.posts)

        return new_importer
Ejemplo n.º 50
0
    def parse_bill(self, bill_url, session, chamber):

        # Temporarily skip the differently-formatted house budget bill.
        if '/2011/billhtml/hb0002.htm' in bill_url.lower():
            return

        bill = None
        bill_page = ElementTree(lxml.html.fromstring(self.urlopen(bill_url)))
        
        for anchor in bill_page.findall('//a'):
            if (anchor.text_content().startswith('status of') or
                anchor.text_content().startswith('Detailed Information (status)')):
                status_url = anchor.attrib['href'].replace("\r", "").replace("\n", "")
                bill = self.parse_bill_status_page(status_url, bill_url, session, chamber)

        if bill is None:
            # No bill was found.  Maybe something like HB0790 in the 2005 session?
            # We can search for the bill metadata.
            page_name = bill_url.split("/")[-1].split(".")[0]
            bill_type = page_name[0:2]
            bill_number = page_name[2:]
            laws_year = self.metadata['session_details'][session]['years'][0] % 100

            status_url = self.search_url_template % (laws_year, bill_type, bill_number)
            bill = self.parse_bill_status_page(status_url, bill_url, session, chamber)

        # Get versions on the detail page.
        versions = [a['action'] for a in bill['actions']]
        versions = [a for a in versions if 'Version Available' in a]
        if not versions:
            version_name = 'Introduced'
        else:
            version = versions.pop()
            if 'New Version' in version:
                version_name = 'Amended'
            elif 'Enrolled' in version:
                version_name = 'Enrolled'

        self.add_other_versions(bill)

        # Add html.
        bill.add_version(version_name, bill_url, mimetype='text/html')

        # Add pdf.
        url = set(bill_page.xpath('//a/@href[contains(., "BillPdf")]')).pop()
        bill.add_version(version_name, url, mimetype='application/pdf')

        # Add status url as a source.
        bill.add_source(status_url)

        return bill
Ejemplo n.º 51
0
def fetch_text_from_url(url):
    """Simple helper to scrap the text content of a webpage"""
    opener = urllib2.build_opener()
    request = urllib2.Request(url)
    # change the User Agent to avoid being blocked by Wikipedia
    # downloading a couple of articles ones should not be abusive
    request.add_header('User-Agent', 'pignlproc categorizer')
    html_content = opener.open(request).read()
    tree = ElementTree(lxml.html.document_fromstring(html_content))
    elements = [e.text_content()
                for tag in ('h1', 'h2', 'h3', 'h4', 'p')
                for e in tree.findall('//' + tag)]
    text = "\n\n".join(elements)
    return text
Ejemplo n.º 52
0
    def exportProjectApplyButton_clicked_cb(self, button):
        """Display the dialog in order to export the current project
        when the user request it through the menu."""

        logging.debug("Export project")

        try:
            selectedFolder = self.view.exportProjectFileChooserButton.get_current_folder()
            filename = self.view.exportProjectFilenameEntry.get_text()

            if selectedFolder is None:
                raise Exception(_("No directory selected"))
            elif filename is None or len(filename) == 0:
                raise Exception(_("No filename provided"))
            else:
                outputFilename = os.path.join(selectedFolder, filename)
                logging.debug("Output filename: {0}".format(outputFilename))
                overwrite = True

                if os.path.exists(outputFilename):
                    questionMsg = _("A file named \"{0}\" already exists. Do you want to replace it?").format(filename)

                    dialog = Gtk.MessageDialog(self.view.exportProject,
                                               Gtk.DialogFlags.MODAL | Gtk.DialogFlags.DESTROY_WITH_PARENT,
                                               Gtk.MessageType.WARNING,
                                               Gtk.ButtonsType.NONE,
                                               questionMsg)

                    dialog.format_secondary_text(_("The file already exists in \"{0}\". Replacing it will overwrite its contents.").format(selectedFolder))

                    dialog.add_button(Gtk.STOCK_CANCEL, Gtk.ResponseType.CANCEL)
                    dialog.add_button(_("Replace"), Gtk.ResponseType.YES)
                    dialog.set_default_response(Gtk.ResponseType.YES)

                    response = dialog.run()
                    dialog.destroy()

                    if response == Gtk.ResponseType.CANCEL:
                        self.view.destroy()
                        self.log.info("Export was cancelled")
                        return

                xmlDefinitionOfProject = self.project.generateXMLConfigFile()
                tree = ElementTree(xmlDefinitionOfProject)
                tree.write(outputFilename, pretty_print=True)
                self.view.destroy()

        except Exception, e:
            self.view.showErrorMessage(_("An error occurred while exporting the project."))
            logging.warn("Error when exporting project: {0}".format(e))
Ejemplo n.º 53
0
    def test_etree_from_file(self):
        with open('sample.xml') as f:
            et = ElementTree(file=f)
        root = et.getroot()
        self.assertEqual('{http://example.tld}document', root.tag)
        self.assertEqual('x', root.prefix)
        self.assertTrue('x' in root.nsmap)

        with open('hello.xml') as f:
            et = ElementTree(file=f)
        root = et.getroot()
        self.assertEqual('hello', root.tag)
        self.assertEqual(None, root.prefix)
        self.assertEqual({}, root.nsmap)
Ejemplo n.º 54
0
def serialize_browse_layers(browse_layers, stream=None, pretty_print=False):
    if not stream:
        stream = StringIO()
    browse_layers_elem = Element(ns_cfg("browseLayers"), 
                                 nsmap={"cfg": ns_cfg.uri})
    
    for browse_layer in browse_layers:
        bl_elem = SubElement(
            browse_layers_elem, ns_cfg("browseLayer"), 
            attrib={"browseLayerId": browse_layer.id}
        )
        
        rgb = browse_layer.r_band, browse_layer.g_band, browse_layer.b_band
        has_rgb = len(filter(lambda v: v is not None, rgb)) == 3
        
        ri = browse_layer.radiometric_interval_min, browse_layer.radiometric_interval_max
        has_ri = len(filter(lambda v: v is not None, ri)) == 2 
        
        SubElement(bl_elem, ns_cfg("browseType")).text = browse_layer.browse_type
        SubElement(bl_elem, ns_cfg("title")).text = browse_layer.title
        if browse_layer.description is not None:
            SubElement(bl_elem, ns_cfg("description")).text = browse_layer.description
        SubElement(bl_elem, ns_cfg("grid")).text = browse_layer.grid
        SubElement(bl_elem, ns_cfg("browseAccessPolicy")).text = browse_layer.browse_access_policy
        SubElement(bl_elem, ns_cfg("hostingBrowseServerName")).text = ""
        rel_ds_elem = SubElement(bl_elem, ns_cfg("relatedDatasetIds"))
        for rel_ds_id in browse_layer.related_dataset_ids:
            SubElement(rel_ds_elem, ns_cfg("datasetId")).text = rel_ds_id
        SubElement(bl_elem, ns_cfg("containsVerticalCurtains")).text = "true" if browse_layer.contains_vertical_curtains else "false"
        SubElement(bl_elem, ns_cfg("contains_volumes")).text = "true" if browse_layer.contains_volumes else "false"
        if has_rgb:
            SubElement(bl_elem, ns_cfg("rgbBands")).text = ",".join(map(str, rgb))
        
        if has_ri:
            ri_elem = SubElement(bl_elem, ns_cfg("radiometricInterval"))
            SubElement(ri_elem, ns_cfg("min")).text = str(ri[0])
            SubElement(ri_elem, ns_cfg("max")).text = str(ri[1])
        
        SubElement(bl_elem, ns_cfg("highestMapLevel")).text = str(browse_layer.highest_map_level)
        SubElement(bl_elem, ns_cfg("lowestMapLevel")).text = str(browse_layer.lowest_map_level)
        SubElement(bl_elem, ns_cfg("timeDimensionDefault")).text = str(browse_layer.timedimension_default)
        SubElement(bl_elem, ns_cfg("tileQueryLimit")).text = str(browse_layer.tile_query_limit)
    
    # TODO: encoding
    et = ElementTree(browse_layers_elem)
    et.write(stream, pretty_print=pretty_print, encoding="utf-8", 
             xml_declaration=True)
    
    return stream
Ejemplo n.º 55
0
    def saveConfigFile(self, workspace):
        projectPath = os.path.join(workspace.getPath(), self.getPath())
        projectFile = os.path.join(projectPath, Project.CONFIGURATION_FILENAME)

        logging.info("Save the config file of project {0} in {1}".format(self.getName(), projectFile))

        # First we verify and create if necessary the directory of the project
        if not os.path.exists(projectPath):
            logging.info("Creation of the directory: {0}".format(projectPath))
            os.mkdir(projectPath)

        # We generate the XML Config file
        root = self.generateXMLConfigFile()
        tree = ElementTree(root)
        tree.write(projectFile, pretty_print=True)
Ejemplo n.º 56
0
 def __init__(self, name):
     self.name = name
     tree = ElementTree(file = name)
     labels = tuple((elt.tag.strip(), elt.text.strip()) for elt in tree.find("labels"))
     self.labels = tuple(pair[0] for pair in labels)
     self.descrs = dict(labels)
     self.date = tree.getroot().get("date")
     for elt in tree.findall("validation_status"):
         status = elt.get("status")
         uri = elt.text.strip()
         if status.startswith("rsync_transfer_") or elt.get("generation") != "current":
             continue
         if uri not in self:
             self[uri] = Object(self, uri)
         self[uri].add(status)
Ejemplo n.º 57
0
def loadWorkspace_0_1(workspacePath, workspaceFile):

    # Parse the XML Document as 0.1 version
    tree = ElementTree()

    tree.parse(workspaceFile)
    xmlWorkspace = tree.getroot()
    wsName = xmlWorkspace.get('name', 'none')
    wsCreationDate = TypeConvertor.xsdDatetime2PythonDatetime(xmlWorkspace.get('creation_date'))

    # Parse the configuration to retrieve the main paths
    xmlWorkspaceConfig = xmlWorkspace.find("{" + WORKSPACE_NAMESPACE + "}configuration")
    pathOfTraces = xmlWorkspaceConfig.find("{" + WORKSPACE_NAMESPACE + "}traces").text

    pathOfLogging = None
    if xmlWorkspaceConfig.find("{" + WORKSPACE_NAMESPACE + "}logging") != None and xmlWorkspaceConfig.find("{" + WORKSPACE_NAMESPACE + "}logging").text != None and len(xmlWorkspaceConfig.find("{" + WORKSPACE_NAMESPACE + "}logging").text) > 0:
        pathOfLogging = xmlWorkspaceConfig.find("{" + WORKSPACE_NAMESPACE + "}logging").text

    pathOfPrototypes = None
    if xmlWorkspaceConfig.find("{" + WORKSPACE_NAMESPACE + "}prototypes") != None and xmlWorkspaceConfig.find("{" + WORKSPACE_NAMESPACE + "}prototypes").text != None and len(xmlWorkspaceConfig.find("{" + WORKSPACE_NAMESPACE + "}prototypes").text) > 0:
        pathOfPrototypes = xmlWorkspaceConfig.find("{" + WORKSPACE_NAMESPACE + "}prototypes").text

    lastProject = None
    if xmlWorkspace.find("{" + WORKSPACE_NAMESPACE + "}projects") != None:
        xmlProjects = xmlWorkspace.find("{" + WORKSPACE_NAMESPACE + "}projects")
        if xmlProjects.get("last", "none") != "none":
            lastProject = xmlProjects.get("last", "none")

    # Instantiation of the workspace
    workspace = Workspace(wsName, wsCreationDate, workspacePath, pathOfTraces, pathOfLogging, pathOfPrototypes)

    # Load the already imported traces
    if xmlWorkspace.find("{" + WORKSPACE_NAMESPACE + "}traces") != None:
        xmlTraces = xmlWorkspace.find("{" + WORKSPACE_NAMESPACE + "}traces")
        for xmlTrace in xmlTraces.findall("{" + WORKSPACE_NAMESPACE + "}trace"):
            trace = ImportedTrace.loadTrace(xmlTrace, WORKSPACE_NAMESPACE, COMMON_NAMESPACE, "0.1", workspace.getPathOfTraces())
            if trace != None:
                workspace.addImportedTrace(trace)

    # Reference the projects
    if xmlWorkspace.find("{" + WORKSPACE_NAMESPACE + "}projects") != None:
        for xmlProject in xmlWorkspace.findall("{" + WORKSPACE_NAMESPACE + "}projects/{" + WORKSPACE_NAMESPACE + "}project"):
            project_path = xmlProject.get("path")
            workspace.referenceProject(project_path)
            if project_path == lastProject and lastProject != None:
                workspace.referenceLastProject(lastProject)

    return workspace
Ejemplo n.º 58
0
    def add_bill_versions(self, bill, index_url):
        # This method won't pick up bill versions where the bill is published
        # exclusively in PDF.  See 2009 HB 645 for a sample
        index_page = ElementTree(lxml.html.fromstring(self.urlopen(index_url)))
        tokens = bill["bill_id"].split(" ")
        bill_regex = re.compile("%s0*%s\_" % (tokens[0], tokens[1]))
        for anchor in index_page.findall("//a"):
            if bill_regex.match(anchor.text_content()) is not None:
                file_name = anchor.text_content()
                version = file_name[file_name.find("_") + 1 : file_name.find(".")]
                version_title = "Final Version"
                if version != "x":
                    version_title = "Version %s" % version

                version_url = index_url[0 : index_url.find("bills") - 1] + anchor.attrib["href"]
                bill.add_version(version_title, version_url)