def __init__(self): #dict mapping competition to .bin self.com_to_bin = utils.mapping_competition_to_bin() #dict mapping competition to xml file self.com_to_xml = utils.mapping_competition_to_xml() #set settings to class attributes self.settings_to_class_attributes() #set ivar data self.data = defaultdict(lambda: defaultdict(list)) #loop self.loop() #plot plot(self.data, self.data.keys(), 'Average precision per %s' % self.feature, '%s' % self.feature, 'precision', os.environ['output_path_pdf'], 12) #plot barplot list_of_lists = [] headers = ['POS', 'Competition (System)', 'Recall'] for feature_value in ['a', 'n', 'r', 'v']: for competition in ['sval2', 'sval3', 'sval2007', 'sval2010', 'sval2013']: for (the_competition, system_name), answers in self.data[feature_value].iteritems(): if the_competition == competition: recall = float(sum(answers)) / len(answers) official_name = filename2official_name[system_name] one_row = [feature_value, old2new[competition] + ' (%s)' % official_name, recall] list_of_lists.append(one_row) plt.figure(figsize=(15, 8)) df = pandas.DataFrame(list_of_lists, columns=headers) ax = seaborn.barplot(x='POS', y='Recall', hue='Competition (System)', data=df) ax.legend(loc=2, title='Competition (Top System overall $F_{1}$)', fontsize=11) ax.set_title('Recall per part of speech for each top ranked system', fontsize=16) matplotlib.rcParams.update({'font.size': 16}) plt.xlabel('POS', fontsize=16) plt.ylabel('Recall', fontsize=16) plt.xticks(fontsize=16) plt.yticks(fontsize=16) output_path = os.environ['barplot_path_pdf'] plt.savefig(output_path, bbox_inches='tight') print 'barplot saved to', output_path
def __init__(self): #set class attribute data self.context_levels = ['all', 'document', 'sentence', 'word'] self.data = { context_level: {} for context_level in self.context_levels } for context_level in self.context_levels: self.data[context_level]['lemmas'] = {} #get dict competition to .bin and set class attributes of exp settings self.competitions = os.environ['competitions'].split("_") self.allowed_pos = os.environ['allowed_pos'].split('_') self.com_to_bin = { competition: path for competition, path in utils.mapping_competition_to_bin().iteritems() if competition in self.competitions } #update ivar data with .bin self.loop_bins() #plot + write information to file utils.plot_it(self.data, self.context_levels, self.competitions, self.allowed_pos) #write stats to file for context_level in self.context_levels: output_file = os.path.join(os.environ['output_folder'], context_level + ".csv") stat_file = open(output_file + "stats.csv", "w") stats = defaultdict(int) with open(output_file, "w") as outfile: headers = "\t".join( ['lemma', 'pos', 'num_instances', 'instances']) outfile.write(headers + "\n") for uri, info in self.data[context_level]['lemmas'].iteritems( ): for lemma, d in info.iteritems(): not_one_sense, how_much = utils.one_sense_used( d['instances']) if all([d['num_instances'] >= 2, not_one_sense]): stats[lemma] += how_much output_line = [ lemma, d['pos'], d['num_instances'] ] + d['instances'] output_line = map(str, output_line) outfile.write("\t".join(output_line) + "\n") stat_file.write("lemma\toccurences\n") for lemma, instances in sorted(stats.iteritems(), key=operator.itemgetter(1), reverse=True): stat_file.write("%s\t%s\n" % (lemma, instances)) stat_file.close()
def __init__(self): #dict mapping competition to .bin self.com_to_bin = utils.mapping_competition_to_bin() #dict mapping competition to xml file self.com_to_xml = utils.mapping_competition_to_xml() #class attributes self.mfs = {} self.notmfs = {} self.labels = ['sval2', 'sval3', 'sval2007', 'sval2010', 'sval2013'] #loop and write self.loop() self.plot_it()
def __init__(self): #dict mapping competition to .bin self.com_to_bin = utils.mapping_competition_to_bin() #dict mapping competition to xml file self.com_to_xml = utils.mapping_competition_to_xml() #set settings to class attributes self.settings_to_class_attributes() #loop and write stats, comp_stats = self.loop() print(comp_stats) self.compute_stats(stats, comp_stats)
def __init__(self): #dict mapping competition to .bin self.com_to_bin = utils.mapping_competition_to_bin() #dict mapping competition to xml file self.com_to_xml = utils.mapping_competition_to_xml() #set settings to class attributes self.settings_to_class_attributes() #loop self.loop() #plot outfile = open(os.environ['output_path_txt'],"w") for counter,competition in enumerate(self.competitions): if competition in self.results: mistakes = float(self.results[competition].count(0)) error_rate = 100 * ( mistakes/len(self.results[competition]) ) outfile.write("%s\t%s\n" % (competition,error_rate)) outfile.close()
#import general modules import cPickle import os #modules in this cwd import utils #obtain exp settings from os.environ and load dict competition = os.environ['competition'] output_path = os.environ['output_path'] com_to_bin = utils.mapping_competition_to_bin() path_bin = com_to_bin[competition] com, d = cPickle.load(open(path_bin)) output = [] #num instances num_tokens = len(d) output.append(("# instances", num_tokens)) #pos pos = set(value['pos'] for value in d.itervalues() if value['pos'] != 'u') pos = " ".join(pos) output.append(("POS", pos)) #types num_types = len(set(value['lemma'] for value in d.itervalues())) output.append(("# lemmas", num_types)) #type token ratio type_token_ratio = float(num_types) / float(num_tokens) type_token_ratio = round(type_token_ratio, 2)