def read_file(filename, file_id, missing_tags=None, problem_files=None): """ Read file for parsing :type filename: string :param filename: file to be parsed :rtype: list(SymbolTree) :return list of Symbol trees found in the file """ #s = time.time() (ext,content) = MathDocument.read_doc_file(filename) if ext == '.tex': t = MathExtractor.parse_from_tex(content, file_id) #print("file %s took %s"%(file_id,time.time()-s)) return [t] elif ext in {'.xhtml', '.mathml', '.mml', '.html'}: t = MathExtractor.parse_from_xml(content, file_id, missing_tags=missing_tags, problem_files=problem_files) #print("file %s took %s per expr"%(file_id,(time.time()-s)/len(t))) return t else: problem_files["unknown_filetype"] = problem_files.get("unknown_filetype", set()) problem_files["unknown_filetype"].add(filename) print('Unknown filetype %s for %s' % (ext, filename)) return []
def read_file(filename, file_id, missing_tags=None, problem_files=None): """ Read file for parsing :type filename: string :param filename: file to be parsed :rtype: list(SymbolTree) :return list of Symbol trees found in the file """ #s = time.time() (ext, content) = MathDocument.read_doc_file(filename) if ext == '.tex': t = MathExtractor.parse_from_tex(content, file_id) #print("file %s took %s"%(file_id,time.time()-s)) return [t] elif ext in {'.xhtml', '.mathml', '.mml', '.html'}: t = MathExtractor.parse_from_xml(content, file_id, missing_tags=missing_tags, problem_files=problem_files) #print("file %s took %s per expr"%(file_id,(time.time()-s)/len(t))) return t else: problem_files["unknown_filetype"] = problem_files.get( "unknown_filetype", set()) problem_files["unknown_filetype"].add(filename) print('Unknown filetype %s for %s' % (ext, filename)) return []