class UnitExtractorTest(unittest.TestCase):
    def setUp(self):
        unittest.TestCase.setUp(self)
        self.uE = UnitExtractor()

    def testPfund(self):
        self.assertEqual("Pfund", self.uE.getUnit("Pfund"))

    def testNonUnit(self):
        self.assertIsNone(self.uE.getUnit("aasdf"))
Example #2
0
 def testExtractionIsValidXml(self):
     uE = UnitExtractor()
     plainTextRcp = XmlParser(createCueMLDom([getRecipeB49()])).getPlainTextRecipes().__next__()
     iE = IngredientExtractor(parse(pathToListIngredients))
     e = Extractor(iE, uE)
     xmlString = e.extractRecipe(plainTextRcp)
     try:
         parseString(xmlString).toprettyxml()
     except Exception as e:
         self.fail("Not valid xml extracted: {}".format(str(e)))
Example #3
0
class EvalRecipesTest(unittest.TestCase):
    ingE = IngredientExtractor(parse(pathToListIngredients))
    unitE = UnitExtractor()
    extractor = Extractor(ingE, unitE)

    def testPrecisionOf2Recipes(self):
        iEIngs, goldenStandardIngs = getIEIngsAndGoldenStandardIngs()
        attris = set(("ref", ))
        retrievedAndRelevant, relevant = recallOf2Recipes(
            goldenStandardIngs, iEIngs, attris)
        self.assertEqual(1, retrievedAndRelevant)
        self.assertEqual(1, relevant)

    def testRecallOf2Recipes(self):
        iEIngs, goldenStandardIngs = getIEIngsAndGoldenStandardIngs()
        attris = set(("ref", ))
        retrievedAndRelevant, retrieved = precisionOf2Recipes(
            goldenStandardIngs, iEIngs, attris)
        self.assertEqual(1, retrievedAndRelevant)
        self.assertEqual(3, retrieved)
 def setUp(self):
     self.ingE = IngredientExtractor(parse(pathToListIngredients))
     self.unitE = UnitExtractor()
 def setUp(self):
     unittest.TestCase.setUp(self)
     self.uE = UnitExtractor()
from informationExtraction.UnitExtractor import UnitExtractor
import time
from informationExtraction.Extractor import Extractor
from evaluation.evalRecipes import evalFromFiles
from model.PlainTextRecipe import PlainTextRecipe
from informationExtraction.dictBasedExtractor import dictBasedEnrichment
from informationExtraction.ruleBasedExtractor import applyRulesToWordProperties

evalAttris = set(
    ["quantity", "atLeast", "atMost", "unit"]
)  #["quantity", "atLeast", "atMost", "unit", "optional", "altGrp"]) #attris which should be relevant in evaluation
ingE = IngredientExtractor(
    parse(
        "/home/torsten/Desktop/MyMasterThesis/docs/DavidisesKochbuch/listIngredients.xml"
    ))
unitE = UnitExtractor()
goldenStandardPath = "/home/torsten/Desktop/MyMasterThesis/docs/DavidisesKochbuch/GoldenStandard.xml"  # contains B-1 to B-68 labeled and the rest unlabeled
defaultErgFilePath = "erg.xml"


def evalRecipes(rcpIds=["B-{}".format(i) for i in range(1, 51)],
                debug=True,
                attris=evalAttris,
                ergFilePath=defaultErgFilePath):
    startTime = time.time()
    cookbook = XmlParser(parse(goldenStandardPath))
    extractor = Extractor(ingE, unitE)
    extractor.extractRecipes2TEICueML(cookbook.getPlainTextRecipes(rcpIds),
                                      ergFilePath)
    print(
        '--- Needed for extracting and writingTo "{}": {} seconds ---'.format(