class UnitExtractorTest(unittest.TestCase): def setUp(self): unittest.TestCase.setUp(self) self.uE = UnitExtractor() def testPfund(self): self.assertEqual("Pfund", self.uE.getUnit("Pfund")) def testNonUnit(self): self.assertIsNone(self.uE.getUnit("aasdf"))
def testExtractionIsValidXml(self): uE = UnitExtractor() plainTextRcp = XmlParser(createCueMLDom([getRecipeB49()])).getPlainTextRecipes().__next__() iE = IngredientExtractor(parse(pathToListIngredients)) e = Extractor(iE, uE) xmlString = e.extractRecipe(plainTextRcp) try: parseString(xmlString).toprettyxml() except Exception as e: self.fail("Not valid xml extracted: {}".format(str(e)))
class EvalRecipesTest(unittest.TestCase): ingE = IngredientExtractor(parse(pathToListIngredients)) unitE = UnitExtractor() extractor = Extractor(ingE, unitE) def testPrecisionOf2Recipes(self): iEIngs, goldenStandardIngs = getIEIngsAndGoldenStandardIngs() attris = set(("ref", )) retrievedAndRelevant, relevant = recallOf2Recipes( goldenStandardIngs, iEIngs, attris) self.assertEqual(1, retrievedAndRelevant) self.assertEqual(1, relevant) def testRecallOf2Recipes(self): iEIngs, goldenStandardIngs = getIEIngsAndGoldenStandardIngs() attris = set(("ref", )) retrievedAndRelevant, retrieved = precisionOf2Recipes( goldenStandardIngs, iEIngs, attris) self.assertEqual(1, retrievedAndRelevant) self.assertEqual(3, retrieved)
def setUp(self): self.ingE = IngredientExtractor(parse(pathToListIngredients)) self.unitE = UnitExtractor()
def setUp(self): unittest.TestCase.setUp(self) self.uE = UnitExtractor()
from informationExtraction.UnitExtractor import UnitExtractor import time from informationExtraction.Extractor import Extractor from evaluation.evalRecipes import evalFromFiles from model.PlainTextRecipe import PlainTextRecipe from informationExtraction.dictBasedExtractor import dictBasedEnrichment from informationExtraction.ruleBasedExtractor import applyRulesToWordProperties evalAttris = set( ["quantity", "atLeast", "atMost", "unit"] ) #["quantity", "atLeast", "atMost", "unit", "optional", "altGrp"]) #attris which should be relevant in evaluation ingE = IngredientExtractor( parse( "/home/torsten/Desktop/MyMasterThesis/docs/DavidisesKochbuch/listIngredients.xml" )) unitE = UnitExtractor() goldenStandardPath = "/home/torsten/Desktop/MyMasterThesis/docs/DavidisesKochbuch/GoldenStandard.xml" # contains B-1 to B-68 labeled and the rest unlabeled defaultErgFilePath = "erg.xml" def evalRecipes(rcpIds=["B-{}".format(i) for i in range(1, 51)], debug=True, attris=evalAttris, ergFilePath=defaultErgFilePath): startTime = time.time() cookbook = XmlParser(parse(goldenStandardPath)) extractor = Extractor(ingE, unitE) extractor.extractRecipes2TEICueML(cookbook.getPlainTextRecipes(rcpIds), ergFilePath) print( '--- Needed for extracting and writingTo "{}": {} seconds ---'.format(