def TrainUsingSVM(xmls, preprocessor, trainer, xmlloc, annotatedxmlloc): svm = SVMImpl() annotatedxmllist = list() for xmlname in xmls: fontdict = preprocessor.getFontDictionary(ET.parse(xmlloc + xmlname + ".xml")) #list(pages), pages -> list(cols), col -> list(<Sparse/NonSparse, tag>) annotatedxml = trainer.readAnnotatedXml(annotatedxmlloc + xmlname + "_annotated") annotatedxmllist.append([annotatedxml, fontdict]) svm.domaintrain(annotatedxmllist) return svm
def TrainUsingSVM(xmls, preprocessor, trainer, xmlloc, annotatedxmlloc): svm = SVMImpl() annotatedxmllist = list() for xmlname in xmls: fontdict = preprocessor.getFontDictionary( ET.parse(xmlloc + xmlname + ".xml") ) #list(pages), pages -> list(cols), col -> list(<Sparse/NonSparse, tag>) annotatedxml = trainer.readAnnotatedXml(annotatedxmlloc + xmlname + "_annotated") annotatedxmllist.append([annotatedxml, fontdict]) svm.domaintrain(annotatedxmllist) return svm