def call_count_unique(arg_json, arg_unique, arg_nlp): examples = LabeledExample.read(arg_json) templates = list() for index in arg_unique: example = examples[index] natural_language = NLP.read(arg_nlp, index) wp = WordProblem(example, natural_language) templates.append(wp.extract_template()) print(len(set(templates))) print(json.dumps([t.to_json() for t in templates]))
def call_print(arg_json, arg_index, arg_nlp): examples = LabeledExample.read(arg_json) example = examples[arg_index] natural_language = NLP.read(arg_nlp, arg_index) wp = WordProblem(example, natural_language) wp.extract_template() print(wp) print('questions: {}' .format([(s.as_text(), s.object_of_sentence()) for s in wp.nlp.questions().itervalues()])) print('commands: {}' .format([(s.as_text(), s.object_of_sentence()) for s in wp.nlp.commands().itervalues()]))
def call_find_template_set(arg_json, arg_nlp, arg_templates): examples = LabeledExample.read(arg_json) indices = [e.index for e in examples.itervalues()] natural_language = {i: NLP.read(arg_nlp, i) for i in indices} word_problems = [WordProblem(examples[i], natural_language[i]) for i in indices] templates = [wp.extract_template() for wp in word_problems] unique = list() wp_template_map = dict() for wp in word_problems: template = wp.template wp_index = wp.labeled_example.index found_template = False for unique_i, u in enumerate(unique): if template == u: wp_template_map[wp_index] = unique_i found_template = True break if not found_template: unique.append(template) wp_template_map[wp_index] = len(unique) - 1 print('{} total and {} unique templates'.format(len(templates), len(unique))) with open(arg_templates, 'wt') as f_handle: out_json = {'templates': [t.to_json() for t in unique], 'wp_template_map': wp_template_map} f_handle.write(json.dumps(out_json))
def call_extract_features(arg_json, arg_nlp, arg_templates, arg_parameters): examples = LabeledExample.read(arg_json) indices = [e.index for e in examples.itervalues()] natural_language = {i: NLP.read(arg_nlp, i) for i in indices} word_problems = [WordProblem(examples[i], natural_language[i]) for i in indices] with open(arg_templates, 'rt') as f_handle: raw = f_handle.read() parsed = json.loads(raw) unique_templates = [Template.from_json(j) for j in parsed['templates']] # TODO(Eric): using only 2 word problems for testing unique_templates = unique_templates[:2] word_problems = word_problems[:2] feature_extractor = FeatureExtractor(unique_templates, word_problems) derivations = initialize_partial_derivations_for_all_templates( word_problems[0], unique_templates) derivation = derivations[0] while not derivation.is_complete(): derivation = derivation.all_ways_to_fill_next_slot()[0] print(feature_extractor.extract(derivation)) print(derivation)
def prepare_word_problems(directory: str, loader: 'Loader') -> List['WordProblem']: """ Creates array of WordProblem objects. :param directory: Directory where to pick word problems. :param loader: Instance of class Loader. :return: List of WordProblem objects. """ from word_problem import WordProblem count = count_in_dir(directory) sentences = list(map(lambda x: load_sentence_dir(directory, x), range(1, count + 1))) word_problems = list(map(lambda x: WordProblem(x, loader), sentences)) return word_problems
def get_files_for_svm(directory: str, loader: 'Loader', count: int = None) -> Tuple: """ Gets files for svm and expressions processing. Use example: get_files_for_svm('./dataset/WP500/traindata', loader) :param count: Specifying the number of files. :param loader: Instance of class Loader. :param directory: String. Specified path. :return: Dictionary of numpy arrays for word_problems, results and expressions. """ from word_problem import WordProblem files = {'sentences', 'results', 'expressions'} dictionary = get_specified_files(directory, files, count) word_problems = [WordProblem(sentence, loader) for sentence in dictionary['sentences']] return np.array(word_problems), np.array(dictionary['results']), np.array(dictionary['expressions'])
def process_single(word_problem: str) -> Tuple: """ Process single word problem. :param word_problem: String given by user. :return: Tuple error_flag - if an error occurred and result. """ error = False result = -1 try: wp = WordProblem(word_problem, loader) result = solve_single(wp) except Exception as e: error = True print( 'Něco je špatně se slovní úlohou. Prosím, zkontrolujte slovní úlohu.' ) return error, result
def call_fold(arg_testfold, arg_numfolds, arg_foldoutput, arg_json, arg_nlp, arg_templates, arg_parameters): examples = LabeledExample.read(arg_json) indices = [e.index for e in examples.itervalues()][:5] # TODO just 5 for testing natural_language = {i: NLP.read(arg_nlp, i) for i in indices} word_problems = [WordProblem(examples[i], natural_language[i]) for i in indices] fold_indices = make_fold_indices(arg_numfolds, len(word_problems)) test_indices = fold_indices.pop(arg_testfold) train_indices = list() for per_fold in fold_indices: train_indices.extend(per_fold) with open(arg_templates, 'rt') as f_handle: raw = f_handle.read() parsed = json.loads(raw) unique_templates = [Template.from_json(j) for j in parsed['templates']] wp_template_map = {int(k): v for k, v in parsed['wp_template_map'].iteritems()} train_wps = [word_problems[i] for i in train_indices] train_templates_indices = list({wp_template_map[wp.labeled_example.index] for wp in train_wps}) remap_templates = {wp.labeled_example.index: train_templates_indices.index( wp_template_map[wp.labeled_example.index]) for wp in train_wps} train_templates = [unique_templates[i] for i in train_templates_indices] feature_extractor = FeatureExtractor(train_templates, train_wps) classifier = optimize_parameters(feature_extractor, train_wps, train_templates, remap_templates) with open(arg_parameters, 'wt') as f_handle: f_handle.write(json.dumps(classifier.to_json())) correct = 0 for test_i in test_indices: test_wp = word_problems[test_i] correct += classifier.solve(test_wp) print('{} correct out of {}'.format(correct, len(test_indices)))