Ejemplo n.º 1
0
def read_file(filename, file_id, missing_tags=None, problem_files=None):
    """
    Read file for parsing

    :type filename: string
    :param filename: file to be parsed

    :rtype: list(SymbolTree)
    :return list of Symbol trees found in the file
    """
    #s = time.time()
    (ext,content) = MathDocument.read_doc_file(filename)
    if ext == '.tex':
        t = MathExtractor.parse_from_tex(content, file_id)
        #print("file %s took %s"%(file_id,time.time()-s))
        return [t]
    elif ext in {'.xhtml', '.mathml', '.mml', '.html'}:
        t = MathExtractor.parse_from_xml(content, file_id, missing_tags=missing_tags, problem_files=problem_files)
        #print("file %s took %s per expr"%(file_id,(time.time()-s)/len(t)))
        return t
    else:
        problem_files["unknown_filetype"] = problem_files.get("unknown_filetype", set())
        problem_files["unknown_filetype"].add(filename)
        print('Unknown filetype %s for %s' % (ext, filename))
        return []
Ejemplo n.º 2
0
def read_file(filename, file_id, missing_tags=None, problem_files=None):
    """
    Read file for parsing

    :type filename: string
    :param filename: file to be parsed

    :rtype: list(SymbolTree)
    :return list of Symbol trees found in the file
    """
    #s = time.time()
    (ext, content) = MathDocument.read_doc_file(filename)
    if ext == '.tex':
        t = MathExtractor.parse_from_tex(content, file_id)
        #print("file %s took %s"%(file_id,time.time()-s))
        return [t]
    elif ext in {'.xhtml', '.mathml', '.mml', '.html'}:
        t = MathExtractor.parse_from_xml(content,
                                         file_id,
                                         missing_tags=missing_tags,
                                         problem_files=problem_files)
        #print("file %s took %s per expr"%(file_id,(time.time()-s)/len(t)))
        return t
    else:
        problem_files["unknown_filetype"] = problem_files.get(
            "unknown_filetype", set())
        problem_files["unknown_filetype"].add(filename)
        print('Unknown filetype %s for %s' % (ext, filename))
        return []