def readXML(useSmall): if useSmall: print('Start reading in experimental dataset xml files!') xmlReader = XMLReader('../data/semevaltest', True) else: print('Start reading in complete dataset xml files!') xmlReader = XMLReader('../data/SemEval-2010', True) dswa = xmlReader.readXML() print('Finished reading in dataset xml files!') return dswa
def test_baseline(): # TNG_set = fetch_20newsgroups(subset='all', remove=('headers', 'footers', 'quotes'), shuffle=True, # random_state=42) # ba = BaselineApproach(TNG_set['data'][:144]) # my_file = read_file('..\\data\\sample_texts', 'sample_text.txt') # keyphrases = ba.returnSolutionforDoc(my_file) # keyphrases = ba.returnSolution() # print('Keyphrases selected from sample text: ', keyphrases) xmlReader = XMLReader('../data/SemEval-2010', True) dswa = xmlReader.readXML() un_xml = BaselineApproach(dswa) solution_xml = un_xml.returnSolution() stats = evaluate_dataset(readJSONSolution(False), solution_xml) print(solution_xml)