if not categorize: profile.categorize = None profile.inputdir(jobfile.rootdir) if graph: for cpu in cpus: profile.cpu = cpu if funcdata: name = 'funcstacks%d' % cpu else: name = 'stacks%d' % cpu output = StatOutput(jobfile, info=profile) output.xlabel = 'System Configuration' output.ylabel = '% CPU utilization' output.stat = name output.graph(name, graph) if dodot: for cpu in cpus: profile.cpu = cpu profile.write_dot(jobfile=jobfile, threshold=threshold) if textout: for cpu in cpus: profile.cpu = cpu profile.write_txt(jobfile=jobfile) if not graph and not textout and not dodot: for cpu in cpus: if not categorize: profile.categorize = None
def run_evaluation(embeddings, report = False, graphs = False, classifier_args = [], classifier = 'knn', selected_feature_areas = None, selected_features = None, folder = 'output'): ''' This is the main function. It runs the evaluation on the embeddings, and returns the results and averages. Parameters ---------- embeddings : dictionary (string: list (int/float)) A dictionary with the embeddings to evaluate. The keys must be the language identifier, and the values are the language embeddings report : bool Whether to write text reports with the results and averages in the output folder graphs : bool Whether to create graphs of the results in the output folder. This includes bar graphs, maps, and count graphs. This can take several minutes to complete classifier_args : list (int) A list of arguments to the classifier. If using k-nearest neighbors, it's k, if using multilayer perceprton, it's the layer sizes, where the length of the list indicates the number of layers classifier : string Which classifier to use. Possible values are knn for k-nearest neighbors, mlp for multilayer perceptron, and svm for support vector machine. selected_feature_areas : list (int) List of feature areas to evalute for. Leave out for all, or use ant of the following integers 0 - None (add individual features with selected features) 1 - Phonology 2 - Morphology 3 - Nominal Categories 4 - Nominal Syntax 5 - Verbal Categories 6 - Word Order 7 - Simple Clauses 8 - Complex Sentences 9 - Lexicon 10 - Sign Languages 11 - Other 12 - Word Order selected_features : list (string) Add individual features not included by selected_feature_areas folder : string The output folder where to place the text reports and graphs. Returns -------- Tuple (True, (dictionary, dictionary)) or Tuple (False, string) If success, it results a tuple where the first value is true, and the second value is a tuple with a the results and the averages. If failes, the first value is false, and the second value is an error message. ''' if not verify_embeddings(embeddings): return (False, "Wrong embeddings format. Format must be a dictionary where the keys are language IDs (ISO 639-3) and values are the language embeddings") if folder and not folder.endswith("/"): folder += "/" if folder and not os.path.isdir(folder): return (False, "Could not find the path {}".format(folder)) print("Starting") with open('language.csv', 'rt', encoding='utf8') as file: reader = csv.reader(file) languages = list(reader) headers = languages.pop(0) # Remove languages we do not have embeddings for languages = [lang for lang in languages if lang[1] in embeddings] with open('feature_areas.csv', 'rt', encoding='utf8') as file: reader = csv.reader(file) feature_areas = {rows[0]:rows[1] for rows in reader} included_features = get_included_features(feature_areas, selected_feature_areas, selected_features) if classifier == 'knn': classifier = neighbors.KNeighborsClassifier(classifier_args[0]) if classifier_args \ else neighbors.KNeighborsClassifier(10) elif classifier == 'mlp': from sklearn.neural_network import MLPClassifier classifier = MLPClassifier(hidden_layer_sizes=tuple(classifier_args)) if classifier_args \ else MLPClassifier(hidden_layer_sizes=(50, 50, 50)) else: classifier = svm.SVC() print("Evaluating embeddings") results = evaluate(languages, headers, embeddings, included_features, classifier) print("Calculating averages") averages = calculate_averages(results) if report: print("Writing text reports") write_report(folder, results, averages) if graphs: print("Creating bar graphs") graph(folder, results, averages) print("Creating maps") maps(folder, averages, languages) print("Creating count graphs") count_score_graph(folder, averages, languages) print("Finished\n") return (True, (results, averages))
import input import process import output #main process.value(x) process.constant(x) process.Numpy(x) output.result(f) if n == 0: output.graph(f) elif n == 1: output.approximation(f)