def cross_validate_inner(i): corpus_dir, algo_dir, morph_analysis_dir, N_func, error_dir, num_iters, corpus_files, splits, algo_name = _NAIVE_CV_GLOBALS remove_directory_content(algo_dir) print "Starting {0} fold".format( i ) train_fold_corpus_files = flatten(splits[j] for j in range(num_iters) if i != j) test_corpus_files = flatten(splits[j] for j in range(num_iters) if i == j) morph_analysis_files = [ os.path.join( morph_analysis_dir, os.path.basename( test_file ) ) for test_file in test_corpus_files if os.path.exists( os.path.join( morph_analysis_dir, os.path.basename( test_file ) ) )] algo = None if algo_name == ALGONAMES.BASELINE: algo = NaiveAlgorithm(N_func=N_func) algo.train_from_filelist( train_fold_corpus_files ) elif algo_name == ALGONAMES.HMM: algo = HMMAlgorithm(N_filter_func=N_func) algo.train_model_from_filelist(corpus_files = train_fold_corpus_files ) elif algo_name == ALGONAMES.MEMM: algo = MMEMAlgorithm(N_filter_func=N_func) algo.train_model_file_list(corpus_filelist = train_fold_corpus_files, ambiguity_dir = morph_analysis_dir ) if algo is None: raise Exception("Not supported algorithm {0}".format( algo_name )) print "Finished training. Starting testing phase!" remove_ambiguity_file_list(ambig_filelist=morph_analysis_files, output_dir= algo_dir, algo = algo ) print "Finished working of algo. Starting measuring phase" total_correct_known, total_correct_unknown, total_known, total_unknown, upper_bound_known,upper_bound_unknown = calculate_dir_precision( algo_dir = algo_dir, ambi_dir= morph_analysis_dir, gold_dir = corpus_dir, M = M_strict_mathcher, N = N_func, P = P_no_garbage, errors_context_filename = os.path.join(error_dir, "{1}_errors_context_{0}_{2}.txt".format( i , algo_name, get_tag_set_by_func( N_func ) ) ) ) return (total_correct_known, total_correct_unknown, total_known, total_unknown, upper_bound_known,upper_bound_unknown )
def bull(action, experiment_name, experiment_path, fold, gold = None, morph_analysis = None): experiment_dir = os.path.join( experiment_path, experiment_name ) sys.path.append( os.path.abspath(experiment_path) ) mod_run_py = '.'.join( [experiment_name, RUNPY_FILENAME ] ) try: # Import the module mod = __import__(mod_run_py, globals(), locals(), ['*']) except ImportError: # Log error print >>sys.stderr, "Failed to run.py module from experiment {0}. {1}".format(experiment_name,traceback.format_exc()) print "Loaded run.py for experiment {0}".format( experiment_name ) params = { 'action' : action, 'experiment_name' : experiment_name, 'experiment_dir' : experiment_dir, 'fold' : fold } if action == 'train': fold_dir = os.path.join(experiment_dir, FOLDS_DIRNAME, fold ) train_files_source_dir = os.path.join(experiment_dir, TRAIN_DIR) fold_train_file = os.path.join( fold_dir, TRAIN_FILENAME ) params['train_file_list'] = [os.path.abspath(os.path.join(train_files_source_dir,line.strip())) for line in open(fold_train_file, 'r').readlines()] if morph_analysis: params['ambiguity_dir'] = morph_analysis params['action'] = 'train' mod.runner( **params ) elif action == 'test': fold_dir = os.path.join(experiment_dir, FOLDS_DIRNAME, fold ) test_files_source_dir = os.path.join(experiment_dir, TEST_DIR) fold_test_file = os.path.join( fold_dir, TEST_FILENAME ) test_files= [os.path.abspath(os.path.join(test_files_source_dir,line.strip())) for line in open(fold_test_file, 'r').readlines()] result_dir = os.path.join( fold_dir, 'test_result' ) create_dir( result_dir ) params['action'] = 'load_model' algo = mod.runner( **params ) remove_directory_content( result_dir ) remove_ambiguity_file_list( ambig_filelist = test_files, output_dir=result_dir, algo=algo) print "Finished removing ambiguity for experiment {0} for fold {1}".format(experiment_name, fold ) elif action == 'eval': #результат работы алгоритма в конкретном фолде eval_funcs = mod.runner( **params ) fold_dir = os.path.join(experiment_dir, 'folds', fold ) errors_filename = os.path.join( fold_dir, 'errors.txt' ) algo_dir = os.path.join( fold_dir, 'test_result' ) return calculate_dir_precision( algo_dir = algo_dir, gold_dir = gold,ambi_dir=morph_analysis, M=eval_funcs['M'], N=eval_funcs['N'], P = eval_funcs['P'], errors_context_filename=errors_filename)