Exemple #1
0
 def __call__(self, pages):
     size = 0
     for page in pages:
         els = self._re.findall(page)
         print utils.ascii(u"\n".join(els))
         size += len(els)
     print u"Total size: %d" % size
Exemple #2
0
 def __call__(self, pages):
     size = 0
     for page in pages:
         els = self._re.findall(page)
         print utils.ascii(u"\n".join(els))
         size += len(els)
     print u"Total size: %d" % size
Exemple #3
0
 def get_id( self, latex ):
     """ Get unique id. """
     try:
         return hashlib.md5(latex.encode("utf-8")).hexdigest()
     except:
         _logger.exception(u"Could not get id from unicode, possible conflict [%s]", latex)
         return hashlib.md5(utils.ascii(latex)).hexdigest()
Exemple #4
0
 def get_id(self, latex):
     """ Get unique id. """
     try:
         return hashlib.md5(latex.encode("utf-8")).hexdigest()
     except:
         _logger.exception(
             u"Could not get id from unicode, possible conflict [%s]",
             latex)
         return hashlib.md5(utils.ascii(latex)).hexdigest()
    def add_include(self, include):
        """
        Adds an additional include directive, needed to compile this python module

        :param include: the name of the header file to include, including
                   surrounding "" or <>.
        """
        include = utils.ascii(include)
        assert include.startswith('"') or include.startswith('<')
        assert include.endswith('"') or include.endswith('>')
        if include not in self.includes:
            self.includes.append(include)
Exemple #6
0
    def add_include(self, include):
        """
        Adds an additional include directive, needed to compile this python module

        :param include: the name of the header file to include, including
                   surrounding "" or <>.
        """
        include = utils.ascii(include)
        assert include.startswith('"') or include.startswith('<')
        assert include.endswith('"') or include.endswith('>')
        if include not in self.includes:
            self.includes.append(include)
 def _add_function_obj(self, wrapper):
     assert isinstance(wrapper, Function)
     name = utils.ascii(wrapper.custom_name)
     if name is None:
         name = self.c_function_name_transformer(wrapper.function_name)
         name = utils.get_mangled_name(name, wrapper.template_parameters)
     try:
         overload = self.functions[name]
     except KeyError:
         overload = OverloadedFunction(name)
         self.functions[name] = overload
     wrapper.module = self
     wrapper.section = self.current_section
     overload.add(wrapper)
Exemple #8
0
 def _add_function_obj(self, wrapper):
     assert isinstance(wrapper, Function)
     name = utils.ascii(wrapper.custom_name)
     if name is None:
         name = self.c_function_name_transformer(wrapper.function_name)
         name = utils.get_mangled_name(name, wrapper.template_parameters)
     try:
         overload = self.functions[name]
     except KeyError:
         overload = OverloadedFunction(name)
         self.functions[name] = overload
     wrapper.module = self
     wrapper.section = self.current_section
     overload.add(wrapper)
Exemple #9
0
    def handle(self, *args, **options):

        with open('loadmat.err', 'a+') as ferr:
            for filepath in options['filepath']:
                with open(filepath, 'r') as f:
                    header = True
                    for line in f:
                        if header:
                            header = False
                            continue
                        line = line.strip()
                        register = line.split('\t')
                        for i in range(len(register)):
                            register[i] = register[i].strip()

                        try:

                            material, creado = Material.objects.get_or_create(
                                cod_material=register[COD_MATERIAL],
                                material=register[MATERIAL],
                                )

                            material.secoes_possiveis.add(
                                Secao.objects.get(cod_secao=register[COD_SECAO])
                                )

                            if creado:
                                material.refresh_from_db()
                                self.stdout.write(self.style.SUCCESS(ascii(u'Material "%s" criado com sucesso.' % material)))
                            else:
                                self.stdout.write(self.style.WARNING(ascii(u'Material "%s" ja existe.' % material)))

                        except IntegrityError:

                            material = Material.objects.get(cod_material=register[COD_MATERIAL])
                            self.stdout.write(self.style.ERROR(ascii(u'ERRO: Material %s já existe na seção %s y difiere do registro fornecido: "%s".' % (material, material.secoes_possiveis.all(), register))))
                            print(line,file=ferr)
    def add_typedef(self, wrapper, alias):
        """
        Declares an equivalent to a typedef in C::
          typedef Foo Bar;

        :param wrapper: the wrapper object to alias (Foo in the example)
        :param alias: name of the typedef alias

        @note: only typedefs for CppClass objects have been
        implemented so far; others will be implemented in the future.
        """
        assert isinstance(wrapper, CppClass)
        alias = utils.ascii(alias)
        self.typedefs.append((wrapper, alias))
        self.register_type(alias, alias, wrapper)
        wrapper.register_alias(alias)
        full_name = '::'.join(self.get_namespace_path() + [alias])
        wrapper.register_alias(full_name)
Exemple #11
0
    def add_typedef(self, wrapper, alias):
        """
        Declares an equivalent to a typedef in C::
          typedef Foo Bar;

        :param wrapper: the wrapper object to alias (Foo in the example)
        :param alias: name of the typedef alias

        @note: only typedefs for CppClass objects have been
        implemented so far; others will be implemented in the future.
        """
        assert isinstance(wrapper, CppClass)
        alias = utils.ascii(alias)
        self.typedefs.append((wrapper, alias))
        self.register_type(alias, alias, wrapper)
        wrapper.register_alias(alias)
        full_name = '::'.join(self.get_namespace_path() + [alias])
        wrapper.register_alias(full_name)
    def add_cpp_namespace(self, name):
        """
        Add a nested module namespace corresponding to a C++
        namespace.  If the requested namespace was already added, the
        existing module is returned instead of creating a new one.

        :param name: name of C++ namespace (just the last component,
        not full scoped name); this also becomes the name of the
        submodule.

        :return: a L{SubModule} object that maps to this namespace.
        """
        name = utils.ascii(name)
        try:
            return self.get_submodule(name)
        except ValueError:
            module = SubModule(name, parent=self, cpp_namespace=name)
            module.stack_where_defined = traceback.extract_stack()
            return module
Exemple #13
0
    def add_cpp_namespace(self, name):
        """
        Add a nested module namespace corresponding to a C++
        namespace.  If the requested namespace was already added, the
        existing module is returned instead of creating a new one.

        :param name: name of C++ namespace (just the last component,
        not full scoped name); this also becomes the name of the
        submodule.

        :return: a L{SubModule} object that maps to this namespace.
        """
        name = utils.ascii(name)
        try:
            return self.get_submodule(name)
        except ValueError:
            module = SubModule(name, parent=self, cpp_namespace=name)
            module.stack_where_defined = traceback.extract_stack()
            return module
    def declare_one_time_definition(self, definition_name):
        """
        Internal helper method for code geneneration to coordinate
        generation of code that can only be defined once per compilation unit

        (note: assuming here one-to-one mapping between 'module' and
        'compilation unit').

        :param definition_name: a string that uniquely identifies the code
        definition that will be added.  If the given definition was
        already declared KeyError is raised.
        
        >>> module = Module('foo')
        >>> module.declare_one_time_definition("zbr")
        >>> module.declare_one_time_definition("zbr")
        Traceback (most recent call last):
        ...
        KeyError: 'zbr'
        >>> module.declare_one_time_definition("bar")
        """
        definition_name = utils.ascii(definition_name)
        if definition_name in self.one_time_definitions:
            raise KeyError(definition_name)
        self.one_time_definitions[definition_name] = None
Exemple #15
0
    def declare_one_time_definition(self, definition_name):
        """
        Internal helper method for code geneneration to coordinate
        generation of code that can only be defined once per compilation unit

        (note: assuming here one-to-one mapping between 'module' and
        'compilation unit').

        :param definition_name: a string that uniquely identifies the code
        definition that will be added.  If the given definition was
        already declared KeyError is raised.
        
        >>> module = Module('foo')
        >>> module.declare_one_time_definition("zbr")
        >>> module.declare_one_time_definition("zbr")
        Traceback (most recent call last):
        ...
        KeyError: 'zbr'
        >>> module.declare_one_time_definition("bar")
        """
        definition_name = utils.ascii(definition_name)
        if definition_name in self.one_time_definitions:
            raise KeyError(definition_name)
        self.one_time_definitions[definition_name] = None
Exemple #16
0
class mathml(object):
    """
        MathML object.
    """
    url_form_latex = settings["converters"]["latexml"]["url"]
    encoding = settings["converters"]["encoding"]
    id_str = u' egomath="%s" '
    pattern_id_add = re.compile(
        u'(<math)\s(.*?xmlns="http://www.w3.org/1998/Math/MathML")')
    pattern_id_get = re.compile(id_str % u'(.*?)')

    def __init__(self, mathml_str):
        self.str = mathml_str

    @staticmethod
    def from_latex(latex_math_orig):
        """
            Returns either mathml object or None.
        """
        # try fetching the answer
        js = None
        latex_math = latex_math_orig
        try:
            latex_math = latex(latex_math, full=False).str

            # is empty?
            if len(latex_math.strip()) == 0:
                _logger.warning(u"Warning: empty math - [%s]",
                                repr(latex_math))
                return None, None

            latex_math = u"$ %s $" % latex_math
            # old service req = urllib2.Request(
            # URL, urllib.urlencode({ 'formula' : utils.ascii(latex,DEF_ENCODING) }) )
            # new service
            req = urllib2.Request(
                mathml.url_form_latex,
                urllib.urlencode({
                    'tex': latex_math.encode("utf-8"),
                    'profile': 'math',
                }))
            response = urllib2.urlopen(
                req, timeout=settings["converters"]["latexml"]["timeout"])

            # try parsing the answer
            import json

            js = json.load(response)
            result = js[settings["converters"]["latexml"]["result_field"]]
            message = js[settings["converters"]["latexml"]["status_field"]]
            if result:
                result = result.encode(mathml.encoding)
            if message:
                message = message.encode(mathml.encoding)

        except Exception, e:
            if js is None:
                # fake js
                js = {
                    "result": None,
                    "status": "Problem at early stage.",
                    "status_code": -1,
                    "log": repr(e),
                }
            _logger.error(u"Error: Connection problem - %s with [%s]", repr(e),
                          latex_math)
            return None, js

        everything_ok = False
        for msg in settings["converters"]["latexml"]["status_ok"]:
            if msg in message:
                everything_ok = not message is None and 0 < len(message)
                break
        not_empty_result = result and result != ''
        # everything ok - return answer
        if everything_ok and not_empty_result:
            return mathml(result).str, js

        # something fishy - try to correct it
        ascii_latex = utils.ascii(latex_math, mathml.encoding)
        if everything_ok and not_empty_result and len(ascii_latex) < 6:
            # in case the service returns empty string and it seems to be just a variable
            _logger.warning(u"Warning: returning original - %s",
                            repr(ascii_latex))
            return mathml(ascii_latex).str, js

        # seems not ok but the latest converter returns valid results
        if not everything_ok and not_empty_result:
            _logger.warning(
                u"Warning: returning conversion but with errors - %s",
                repr(ascii_latex))
            return mathml(result).str, js

        _logger.error(
            u"\n!ERROR - converting [%s] -> result [%s] with message [%s]\n%s",
            ascii_latex, utils.uni(result), utils.uni(message), 40 * "=")
        return None, js
Exemple #17
0
def convert_wikimath_to_realmath(env_dict,
                                 wiki_math_match,
                                 mathml_pickled,
                                 url,
                                 doc,
                                 total_count,
                                 formula_unique=None,
                                 try_one_more_if_invalid=True):
    """
     The page we got should be wiki tag free; however, it will contain only
     basic math &lt;math&gt; B \gt &lt;/math&gt; which can contain
     non latex characters &gt; instead of \gt
     - we must fix this

     - get latex math from wiki math
     - try to get mathml from dictionary
        - if not in dict store it after fetching
     - stupid replace of wiki_math with mathml representation

     fix e.g., &gt; or even worse &amp;gt;
  """
    from _parser import math as _math_parser
    global math_parser
    if math_parser is None:
        math_parser = _math_parser()

    latex_math = wiki_math_match.group(1)
    # invalid math - not ended e.g., 26358420
    if env_dict["pager"]["wiki_math_tags"][0] in latex_math:
        logger_suspicious.warning(
            u"Math includes another math start elem - truncating [%s][%s]",
            doc, latex_math[:100])
        latex_math = latex_math[:latex_math.
                                find(env_dict["pager"]["wiki_math_tags"][0])]

    latex_math = math_parser.texify(latex_math)
    latex_math_with_mbox = converters.latex(latex_math, full=False).str
    latex_math = converters.latex(latex_math).str

    # what if math is not finished?
    if not formula_unique is None:
        formula_unique.add(latex_math)

    if not len(latex_math) < 2 * 1024:
        logger_suspicious.warning(u"Long latex [%s]",
                                  latex_math.replace(u"\n", u""))

    if "&" in latex_math or "amp;" in latex_math:
        pass

    wiki_math = u"%s%s%s" % (env_dict["pager"]["wiki_math_tags"][0],
                             latex_math,
                             env_dict["pager"]["wiki_math_tags"][1])
    # set dataset
    dataset = env_dict["wiki"]["dataset"]
    #set_dataset( mathml_pickled, latex_math, env_dict["wiki"]["dataset"] )

    add_info = {}
    mathml_text = mathml_pickled.get_ok(
        latex_math_with_mbox, add_info=add_info,
        qf="dataset:wiki-2013") if not latex_math_with_mbox is None else None
    latex_math_db_id = mathml_pickled.db.get_id(latex_math_with_mbox)

    if env_dict["mathml"]["convert"] and 0 < len(latex_math):
        try:
            if not mathml_text:
                should_add = True
                convert_js = None
                if not env_dict["mathml"]["convert_latex"] is None:
                    if mathml_pickled.get_not_ok(latex_math_with_mbox) is None:
                        mathml_text, convert_js = mathml.from_latex(
                            latex_math_with_mbox)
                    else:
                        # we know mathml is not valid
                        mathml_text = wiki_math
                        logger.warning(
                            u"Using wiki math because conversion failed [%s]",
                            latex_math)
                        should_add = False

                if should_add:
                    status_code = 10
                    if convert_js is not None:
                        status_code = int(convert_js["status_code"])
                    if mathml_text and status_code < 2:
                        logger.info(u"Done math: %s [%s]",
                                    utils.ascii(latex_math, errors="replace"),
                                    latex_math_db_id)
                        if not env_dict["mathml"]["convert_latex"] is None:
                            assert not convert_js is None
                            mathml_pickled.add_ok(latex_math_with_mbox,
                                                  mathml_text,
                                                  convert_js, [doc], [url],
                                                  dataset,
                                                  create_ego=True)
                    else:
                        msg = u"Failed conversion of [%s] [%s] resp. [%s]" % (
                            latex_math, wiki_math_match.group(1),
                            converters.latex(wiki_math_match.group(1)).str)
                        logger_suspicious.warning(msg)
                        logger.warning(msg)
                        if not convert_js is None:
                            mathml_text = wiki_math
                            mathml_pickled.add_not_ok(latex_math_with_mbox,
                                                      None,
                                                      convert_js, [doc], [url],
                                                      dataset,
                                                      create_ego=True)
                        else:
                            logger.error(u"Returned js is None for [%s]",
                                         latex_math)
            else:
                logger.debug(u"Found latex in db [%s].", total_count)
                datasets = add_info["dataset"]
                if not dataset in datasets:
                    mathml_pickled.add_dataset(latex_math_db_id, dataset)

                # add it to the text
                mathml_text = mathml.add_id(mathml_text, latex_math_db_id)

        except Exception, e:
            logger.exception(u"Exception at [%s] [%s]", utils.ascii(doc),
                             mathml_pickled.db.get_id(latex_math_with_mbox))
Exemple #18
0
def _huge_math_page_to_pages(env_dict):
    """
    Grab one huge wiki page and have fun with it while creating all pages.
    """
    import _math
    wiki_xml_math_output = env_dict["wiki"]["xml_math_output_big"]
    #wiki_xml_math_output = env_dict["wiki"]["xml_math_output_test"]

    from indexer.egomath.interface import egomath_inst

    egomath_inst.reset_logging()

    wiki_pages_output = env_dict["wiki"]["pages_output"]
    pickle_mathml_ok = env_dict["converters"]["latexml"]["pickle_ok"]
    pickle_mathml_fail = env_dict["converters"]["latexml"]["pickle_fail"]

    logger.info(u"Started separating pages from [%s] to [%s]",
                wiki_xml_math_output, wiki_pages_output)

    # load wiki dump
    #
    wiki_page_dumper = dump.pager(wiki_xml_math_output,
                                  env_dict["pager"]["delimiter"],
                                  env_dict["pager"]["buffer"])

    # try to load pickled mathml (ok/fail)
    #
    converted_mathml = None
    if env_dict["mathml"]["convert"] == "pickle":
        buffering = 100 * 1024 * 1024
        converted_mathml = _math.mathpickles(pickle_mathml_ok,
                                             pickle_mathml_fail,
                                             buffering=buffering)
    elif env_dict["mathml"]["convert"] == "db":
        converted_mathml = _math.mathdb(env_dict)

    latex_pattern = env_dict["pager"]["re_math"]
    title_pattern = re.compile(env_dict["pager"]["re_title"], re.DOTALL)
    total_formula_count = 0
    formula_unique = set() if env_dict["wiki"]["collect_stats"] else None
    pages_done = 0
    converted_mathml_cnt = 0
    from collections import defaultdict

    pages_formula = defaultdict(int)

    # for all pages and for all wiki maths
    #
    for pages_done, page in enumerate(
            wiki_page_dumper.pages(templates.htmltemplate)):
        logger.info(u'Done %d pages', pages_done)
        # if title already exists do not write
        try:
            title = title_pattern.search(page).group(1).replace(" ", "_")
            url = u"http://en.wikipedia.org/wiki/%s" % title
            assert not u"$title" in title
            page_store = _math.page_to_store(wiki_pages_output,
                                             title + ".html")
            if not env_dict["pager"]["overwrite"] and page_store.exists():
                logger.warning(u"Page exists [%s] [%d]", title, pages_done)
                continue
        except Exception, e:
            logger.error(u"Could not store page because of %s", repr(e))
            continue

        from _parser import parser
        page = parser.preprocess_page_math(env_dict, page)

        # the page we got should be wiki tag free; however, it will contain only
        # basic math &lt;math&gt; B \gt &lt;/math&gt; which can contain
        # non latex characters &gt; instead of \gt
        # - we must fix this
        #
        page_replacements = []
        page_formula_count = 0
        for wiki_math_iter in latex_pattern.finditer(page):
            page_formula_count += 1
            total_formula_count += 1

            page_replacements += \
                    _math.convert_wikimath_to_realmath(
                        env_dict,
                        wiki_math_iter,
                        converted_mathml,
                        url,
                        title,
                        total_formula_count,
                        formula_unique)
        pages_formula[page_formula_count] += 1

        info_msg = u"# of formulae on page [%s] is [%d], total [%d]" % (
            utils.ascii(title), page_formula_count, total_formula_count)
        if page_formula_count == 0:
            logger_suspicious.warning(info_msg + u" -> skipping 0.")
            logger.warning(info_msg)
            continue
        else:
            logger.warning(info_msg)

        # create the page
        #
        tmp = ""
        last = 0
        e = None
        for (s, e, r) in page_replacements:
            tmp += page[last:s] + r
            last = e
        tmp += page[e:]
        page = tmp

        # store the page
        try:
            page_store.store(page)
        except IOError, e:
            logger.error(u"Could not store [%s] page because of %s", title,
                         repr(e))
Exemple #19
0
    def __init__(self,
                 function_name,
                 return_value,
                 parameters,
                 docstring=None,
                 unblock_threads=None,
                 template_parameters=(),
                 custom_name=None,
                 deprecated=False,
                 foreign_cpp_namespace=None,
                 throw=()):
        """
        :param function_name: name of the C function
        :param return_value: the function return value
        :type return_value: L{ReturnValue}
        :param parameters: the function parameters
        :type parameters: list of L{Parameter}

        :param custom_name: an alternative name to give to this
           function at python-side; if omitted, the name of the
           function in the python module will be the same name as the
           function in C++ (minus namespace).

        :param deprecated: deprecation state for this API:
          - False: Not deprecated
          - True: Deprecated
          - "message": Deprecated, and deprecation warning contains the given message

        :param foreign_cpp_namespace: if set, the function is assumed
          to belong to the given C++ namespace, regardless of the C++
          namespace of the python module it will be added to.

        :param throw: list of C++ exceptions that the function may throw
        :type throw: list of L{CppException}
        """
        self.stack_where_defined = traceback.extract_stack()

        if unblock_threads is None:
            unblock_threads = settings.unblock_threads

        ## backward compatibility check
        if isinstance(return_value, str) and isinstance(
                function_name, ReturnValue):
            warnings.warn(
                "Function has changed API; see the API documentation (but trying to correct...)",
                DeprecationWarning,
                stacklevel=2)
            function_name, return_value = return_value, function_name

        if return_value is None:
            return_value = ReturnValue.new('void')

        return_value = utils.eval_retval(return_value, self)
        parameters = [utils.eval_param(param, self) for param in parameters]

        super(Function, self).__init__(return_value,
                                       parameters,
                                       parse_error_return="return NULL;",
                                       error_return="return NULL;",
                                       unblock_threads=unblock_threads)
        self.deprecated = deprecated
        self.foreign_cpp_namespace = foreign_cpp_namespace
        self._module = None
        function_name = utils.ascii(function_name)
        self.function_name = function_name
        self.wrapper_base_name = None
        self.wrapper_actual_name = None
        self.docstring = docstring
        self.self_parameter_pystruct = None
        self.template_parameters = template_parameters
        self.custom_name = custom_name
        self.mangled_name = utils.get_mangled_name(function_name,
                                                   self.template_parameters)
        for t in throw:
            assert isinstance(t, CppException)
        self.throw = list(throw)
        self.custodians_and_wards = []  # list of (custodian, ward, postcall)
        cppclass_typehandlers.scan_custodians_and_wards(self)
Exemple #20
0
def convert_wikimath_to_realmath( env_dict,
                                  wiki_math_match,
                                  mathml_pickled,
                                  url,
                                  doc,
                                  total_count,
                                  formula_unique=None,
                                  try_one_more_if_invalid=True ):
    """
     The page we got should be wiki tag free; however, it will contain only
     basic math &lt;math&gt; B \gt &lt;/math&gt; which can contain
     non latex characters &gt; instead of \gt
     - we must fix this

     - get latex math from wiki math
     - try to get mathml from dictionary
        - if not in dict store it after fetching
     - stupid replace of wiki_math with mathml representation

     fix e.g., &gt; or even worse &amp;gt;
  """
    from _parser import math as _math_parser
    global math_parser
    if math_parser is None:
        math_parser = _math_parser()

    latex_math = wiki_math_match.group(1)
    # invalid math - not ended e.g., 26358420
    if env_dict["pager"]["wiki_math_tags"][0] in latex_math:
        logger_suspicious.warning( u"Math includes another math start elem - truncating [%s][%s]", doc, latex_math[:100] )
        latex_math = latex_math[:latex_math.find(env_dict["pager"]["wiki_math_tags"][0])]

    latex_math = math_parser.texify(latex_math)
    latex_math_with_mbox = converters.latex( latex_math, full=False ).str
    latex_math = converters.latex( latex_math ).str

    # what if math is not finished?
    if not formula_unique is None:
        formula_unique.add( latex_math )

    if not len(latex_math) < 2 * 1024:
        logger_suspicious.warning( u"Long latex [%s]", latex_math.replace(u"\n", u"") )

    if "&" in latex_math or "amp;" in latex_math:
        pass

    wiki_math = u"%s%s%s" % ( env_dict["pager"]["wiki_math_tags"][0],
                              latex_math,
                              env_dict["pager"]["wiki_math_tags"][1] )
    # set dataset
    dataset = env_dict["wiki"]["dataset"]
    #set_dataset( mathml_pickled, latex_math, env_dict["wiki"]["dataset"] )

    add_info = {}
    mathml_text = mathml_pickled.get_ok(latex_math_with_mbox,
                                        add_info=add_info,
                                        qf="dataset:wiki-2013") if not latex_math_with_mbox is None else None
    latex_math_db_id = mathml_pickled.db.get_id(latex_math_with_mbox)

    if env_dict["mathml"]["convert"] and 0 < len(latex_math):
        try:
            if not mathml_text:
                should_add = True
                convert_js = None
                if not env_dict["mathml"]["convert_latex"] is None:
                    if mathml_pickled.get_not_ok(latex_math_with_mbox) is None:
                        mathml_text, convert_js = mathml.from_latex(latex_math_with_mbox)
                    else:
                        # we know mathml is not valid
                        mathml_text = wiki_math
                        logger.warning( u"Using wiki math because conversion failed [%s]", latex_math )
                        should_add = False

                if should_add:
                    status_code = 10
                    if convert_js is not None:
                        status_code = int(convert_js["status_code"])
                    if mathml_text and status_code < 2:
                        logger.info( u"Done math: %s [%s]",
                                     utils.ascii(latex_math, errors="replace"),
                                     latex_math_db_id )
                        if not env_dict["mathml"]["convert_latex"] is None:
                            assert not convert_js is None
                            mathml_pickled.add_ok( latex_math_with_mbox, mathml_text, convert_js, [doc], [url], dataset, create_ego=True )
                    else:
                        msg = u"Failed conversion of [%s] [%s] resp. [%s]" % (
                              latex_math, wiki_math_match.group(1), converters.latex(wiki_math_match.group(1)).str)
                        logger_suspicious.warning( msg )
                        logger.warning( msg )
                        if not convert_js is None:
                            mathml_text = wiki_math
                            mathml_pickled.add_not_ok( latex_math_with_mbox, None, convert_js, [doc], [url], dataset, create_ego=True )
                        else:
                            logger.error( u"Returned js is None for [%s]", latex_math )
            else:
                logger.debug( u"Found latex in db [%s].", total_count )
                datasets = add_info["dataset"]
                if not dataset in datasets:
                    mathml_pickled.add_dataset( latex_math_db_id, dataset )

                # add it to the text
                mathml_text = mathml.add_id( mathml_text, latex_math_db_id )


        except Exception, e:
            logger.exception( u"Exception at [%s] [%s]", utils.ascii(doc), mathml_pickled.db.get_id(latex_math_with_mbox) )
Exemple #21
0
def _huge_math_page_to_pages( env_dict ):
    """
    Grab one huge wiki page and have fun with it while creating all pages.
    """
    import _math
    wiki_xml_math_output = env_dict["wiki"]["xml_math_output_big"]
    #wiki_xml_math_output = env_dict["wiki"]["xml_math_output_test"]

    from indexer.egomath.interface import egomath_inst

    egomath_inst.reset_logging()

    wiki_pages_output = env_dict["wiki"]["pages_output"]
    pickle_mathml_ok = env_dict["converters"]["latexml"]["pickle_ok"]
    pickle_mathml_fail = env_dict["converters"]["latexml"]["pickle_fail"]

    logger.info(u"Started separating pages from [%s] to [%s]",
                wiki_xml_math_output, wiki_pages_output)

    # load wiki dump
    #
    wiki_page_dumper = dump.pager(wiki_xml_math_output,
                                  env_dict["pager"]["delimiter"],
                                  env_dict["pager"]["buffer"])

    # try to load pickled mathml (ok/fail)
    #
    converted_mathml = None
    if env_dict["mathml"]["convert"] == "pickle":
        buffering = 100 * 1024 * 1024
        converted_mathml = _math.mathpickles(pickle_mathml_ok, pickle_mathml_fail, buffering=buffering)
    elif env_dict["mathml"]["convert"] == "db":
        converted_mathml = _math.mathdb(env_dict)

    latex_pattern = env_dict["pager"]["re_math"]
    title_pattern = re.compile(env_dict["pager"]["re_title"], re.DOTALL)
    total_formula_count = 0
    formula_unique = set() if env_dict["wiki"]["collect_stats"] else None
    pages_done = 0
    converted_mathml_cnt = 0
    from collections import defaultdict


    pages_formula = defaultdict(int)

    # for all pages and for all wiki maths
    #
    for pages_done, page in enumerate(wiki_page_dumper.pages(templates.htmltemplate)):
        logger.info(u'Done %d pages', pages_done)
        # if title already exists do not write
        try:
            title = title_pattern.search(page).group(1).replace(" ", "_")
            url = u"http://en.wikipedia.org/wiki/%s" % title
            assert not u"$title" in title
            page_store = _math.page_to_store(wiki_pages_output, title + ".html")
            if not env_dict["pager"]["overwrite"] and page_store.exists():
                logger.warning(u"Page exists [%s] [%d]", title, pages_done)
                continue
        except Exception, e:
            logger.error(u"Could not store page because of %s", repr(e))
            continue

        from _parser import parser
        page = parser.preprocess_page_math(env_dict, page)


        # the page we got should be wiki tag free; however, it will contain only
        # basic math &lt;math&gt; B \gt &lt;/math&gt; which can contain
        # non latex characters &gt; instead of \gt
        # - we must fix this
        #
        page_replacements = []
        page_formula_count = 0
        for wiki_math_iter in latex_pattern.finditer(page):
            page_formula_count += 1
            total_formula_count += 1

            page_replacements += \
                    _math.convert_wikimath_to_realmath(
                        env_dict,
                        wiki_math_iter,
                        converted_mathml,
                        url,
                        title,
                        total_formula_count,
                        formula_unique)
        pages_formula[page_formula_count] += 1

        info_msg = u"# of formulae on page [%s] is [%d], total [%d]" % (
            utils.ascii(title), page_formula_count, total_formula_count)
        if page_formula_count == 0:
            logger_suspicious.warning(info_msg + u" -> skipping 0.")
            logger.warning(info_msg)
            continue
        else:
            logger.warning(info_msg)

        # create the page
        #
        tmp = ""
        last = 0
        e = None
        for (s, e, r) in page_replacements:
            tmp += page[last:s] + r
            last = e
        tmp += page[e:]
        page = tmp

        # store the page
        try:
            page_store.store(page)
        except IOError, e:
            logger.error(u"Could not store [%s] page because of %s", title, repr(e))
Exemple #22
0
    def __init__(
        self,
        function_name,
        return_value,
        parameters,
        docstring=None,
        unblock_threads=None,
        template_parameters=(),
        custom_name=None,
        deprecated=False,
        foreign_cpp_namespace=None,
        throw=(),
    ):
        """
        :param function_name: name of the C function
        :param return_value: the function return value
        :type return_value: L{ReturnValue}
        :param parameters: the function parameters
        :type parameters: list of L{Parameter}

        :param custom_name: an alternative name to give to this
           function at python-side; if omitted, the name of the
           function in the python module will be the same name as the
           function in C++ (minus namespace).

        :param deprecated: deprecation state for this API:
          - False: Not deprecated
          - True: Deprecated
          - "message": Deprecated, and deprecation warning contains the given message

        :param foreign_cpp_namespace: if set, the function is assumed
          to belong to the given C++ namespace, regardless of the C++
          namespace of the python module it will be added to.

        :param throw: list of C++ exceptions that the function may throw
        :type throw: list of L{CppException}
        """
        self.stack_where_defined = traceback.extract_stack()

        if unblock_threads is None:
            unblock_threads = settings.unblock_threads

        ## backward compatibility check
        if isinstance(return_value, str) and isinstance(function_name, ReturnValue):
            warnings.warn(
                "Function has changed API; see the API documentation (but trying to correct...)",
                DeprecationWarning,
                stacklevel=2,
            )
            function_name, return_value = return_value, function_name

        if return_value is None:
            return_value = ReturnValue.new("void")

        return_value = utils.eval_retval(return_value, self)
        parameters = [utils.eval_param(param, self) for param in parameters]

        super(Function, self).__init__(
            return_value,
            parameters,
            parse_error_return="return NULL;",
            error_return="return NULL;",
            unblock_threads=unblock_threads,
        )
        self.deprecated = deprecated
        self.foreign_cpp_namespace = foreign_cpp_namespace
        self._module = None
        function_name = utils.ascii(function_name)
        self.function_name = function_name
        self.wrapper_base_name = None
        self.wrapper_actual_name = None
        self.docstring = docstring
        self.self_parameter_pystruct = None
        self.template_parameters = template_parameters
        self.custom_name = custom_name
        self.mangled_name = utils.get_mangled_name(function_name, self.template_parameters)
        for t in throw:
            assert isinstance(t, CppException)
        self.throw = list(throw)
        self.custodians_and_wards = []  # list of (custodian, ward, postcall)
        cppclass_typehandlers.scan_custodians_and_wards(self)
Exemple #23
0
    def handle(self, *args, **options):

        with open('loadfam.err', 'a+') as ferr:

            for filepath in options['filepath']:
                with open(filepath, 'r') as f:
                    header = True
                    for line in f:
                        if header:
                            header = False
                            continue
                        line = line.strip('\r\n')
                        register = line.split('\t')
                        for i in range(len(register)):
                            register[i] = register[i].strip()

                        try:

                            try:

                                secao = Secao.objects.get(
                                    secao=register[SECAO]
                                    )

                            except Secao.DoesNotExist:

                                secao = Secao.objects.get_or_create(
                                    cod_secao=register[COD_SECAO].zfill(2),
                                    secao=register[SECAO]
                                    )[0]

                                secao.refresh_from_db()

                            cod_grupo=register[COD_GRUPO]
                            if cod_grupo <> '': 
                                cod_grupo.zfill(4)
                            cod_subgrupo=register[COD_SUBGRUPO]
                            if cod_subgrupo <> '': 
                                cod_subgrupo.zfill(6)
                            cod_familia=register[COD_FAMILIA]
                            if cod_familia <> '': 
                                cod_familia.zfill(9)


                            familia, creado = Familia.objects.get_or_create(
                                secao = secao,
                                cod_grupo=cod_grupo,
                                grupo=register[GRUPO],
                                cod_subgrupo=cod_subgrupo,
                                subgrupo=register[SUBGRUPO],
                                cod_familia=cod_familia,
                                familia=register[FAMILIA],
                                )

                            if creado:
                                familia.refresh_from_db()
                                self.stdout.write(self.style.SUCCESS(u'Familia "%s" criada com sucesso.' % familia))
                            else:
                                self.stdout.write(self.style.WARNING(ascii(u'Familia "%s" ja existe.' % familia)))


                        except CodigoSecaoNaoCoincide:
                            self.stdout.write(self.style.ERROR(ascii(u'ERRO: Código de seção não coincide: "%s".' % register)))
                            print(line,file=ferr)
                        except CodigoGrupoNaoCoincide:
                            self.stdout.write(self.style.ERROR(ascii(u'ERRO: Código de grupo não coincide: "%s".' % register)))
                            print(line,file=ferr)
                        except CodigoSubGrupoNaoCoincide:
                            self.stdout.write(self.style.ERROR(ascii(u'ERRO: Código de sub grupo não coincide: "%s".' % register)))
                            print(line,file=ferr)
                        except IntegrityError:
                            self.stdout.write(self.style.ERROR(ascii(u'ERRO: Error de integridad: "%s".' % register)))
                            print(line,file=ferr)