def TrainUsingSVM(xmls, annotatedxmlloc): svm = SVMImpl() trainer = Utils.Trainer.Trainer() tableslist = list() for xml in xmls: tableslist.append(trainer.readAnnotatedxmlforTableDecomposition(annotatedxmlloc + xml + "_TD_ANNOTATED")) svm.domaintrainforTableDecomposition(tableslist) return svm
def TrainUsingSVM(xmls, preprocessor, trainer, xmlloc, annotatedxmlloc): svm = SVMImpl() annotatedxmllist = list() for xmlname in xmls: fontdict = preprocessor.getFontDictionary(ET.parse(xmlloc + xmlname + ".xml")) #list(pages), pages -> list(cols), col -> list(<Sparse/NonSparse, tag>) annotatedxml = trainer.readAnnotatedXml(annotatedxmlloc + xmlname + "_annotated") annotatedxmllist.append([annotatedxml, fontdict]) svm.domaintrain(annotatedxmllist) return svm
def TrainUsingSVM(xmls, annotatedxmlloc): svm = SVMImpl() trainer = Utils.Trainer.Trainer() tableslist = list() for xml in xmls: tableslist.append( trainer.readAnnotatedxmlforTableDecomposition(annotatedxmlloc + xml + "_TD_ANNOTATED")) svm.domaintrainforTableDecomposition(tableslist) return svm
def TrainUsingSVM(xmls, preprocessor, trainer, xmlloc, annotatedxmlloc): svm = SVMImpl() annotatedxmllist = list() for xmlname in xmls: fontdict = preprocessor.getFontDictionary( ET.parse(xmlloc + xmlname + ".xml") ) #list(pages), pages -> list(cols), col -> list(<Sparse/NonSparse, tag>) annotatedxml = trainer.readAnnotatedXml(annotatedxmlloc + xmlname + "_annotated") annotatedxmllist.append([annotatedxml, fontdict]) svm.domaintrain(annotatedxmllist) return svm