class Domain: def __init__(self, name): self.name = name self.sensitivities = OrderedDict() self.baseClassifications = OrderedDict() self.rawToTrans = OrderedDict() self.transToRaw = OrderedDict() self.groups = OrderedDict() def addSensitivity(self, sensitivity, level): if self.sensitivities.has_key(sensitivity) == 0: self.sensitivities[sensitivity] = level def findSensitivityByName(self, sensitivity): for k, v in self.sensitivities.iteritems(): # print k,v if v == sensitivity: return sensitivity return None def addGroup(self, groupName): group = Group(groupName) self.groups[groupName] = group return group def addBaseClassification(self, bcName): baseClassification = BaseClassification(bcName) self.baseClassifications[bcName] = baseClassification return baseClassification def str(self): str = "" str = str + "Domain name: %s\n" % (self.name) str = str + "Sensitivities:" for key in self.sensitivities.keys(): str = str + "%s %s\n" % (key, self.sensitivities[key]) str = str + "Base classifications:\n" for key in self.baseClassifications.keys(): str = str + str(self.baseClassifications[key]) str = str + "Groups:\n" for key in self.groups.keys(): str = str + str(self.groups[key]) # str = str + "Raw to translated:" # for key in self.rawToTrans.keys(): # str = str + "%s %s" % (key, self.rawToTrans[key]) # str = str + "Translated to raw:" # for key in self.transToRaw.keys(): # str = str + "%s %s" % (key, self.transToRaw[key]) return str
class Domain: def __init__(self, name): self.name = name self.sensitivities = OrderedDict() self.baseClassifications = OrderedDict() self.rawToTrans = OrderedDict() self.transToRaw = OrderedDict() self.groups = OrderedDict() def addSensitivity(self, sensitivity, level): if self.sensitivities.has_key(sensitivity) == 0: self.sensitivities[sensitivity] = level def findSensitivityByName(self, sensitivity): for k, v in self.sensitivities.iteritems(): # print k,v if v == sensitivity: return sensitivity return None def addGroup(self, groupName): group = Group(groupName) self.groups[groupName] = group return group def addBaseClassification(self, bcName): baseClassification = BaseClassification(bcName) self.baseClassifications[bcName] = baseClassification return baseClassification def str(self): str = "" str = str + "Domain name: %s\n" % (self.name) str = str + "Sensitivities:" for key in self.sensitivities.keys(): str = str + "%s %s\n" % (key, self.sensitivities[key]) str = str + "Base classifications:\n" for key in self.baseClassifications.keys(): str = str + str(self.baseClassifications[key]) str = str + "Groups:\n" for key in self.groups.keys(): str = str + str(self.groups[key]) # str = str + "Raw to translated:" # for key in self.rawToTrans.keys(): # str = str + "%s %s" % (key, self.rawToTrans[key]) # str = str + "Translated to raw:" # for key in self.transToRaw.keys(): # str = str + "%s %s" % (key, self.transToRaw[key]) return str
def get_stats(self, dim, prefix=''): """Get various 1d statistics for the datatable. """ def add_stat(stats, key, val): stats[prefix + key] = val def get_stat(stats, key): return stats[prefix + key] #print dim #print self.num_cells #print self.data p = self.get_points(dim) s = OrderedDict() add_stat(s, 'num_cells', self.num_cells) add_stat(s, 'min', np.min(p)) add_stat(s, 'max', np.max(p)) add_stat(s, 'average', np.average(p)) add_stat(s, 'std', np.std(p)) add_stat(s, 'median', np.median(p)) add_stat( s, 'gaussian_fit', self.gaussian_pdf_compare(dim, 100, get_stat(s, 'average'), get_stat(s, 'std'))) keys = s.keys() vals = np.array([s.values()]) ret = DataTable(vals, keys, name=self.sub_name('stats for %s' % dim)) ret.properties['original_table'] = self return ret
def get_stats(self, dim, prefix=''): """Get various 1d statistics for the datatable. """ def add_stat(stats, key, val): stats[prefix+key] = val def get_stat(stats, key): return stats[prefix+key] #print dim #print self.num_cells #print self.data p = self.get_points(dim) s = OrderedDict() add_stat(s, 'num_cells', self.num_cells) add_stat(s, 'min', np.min(p)) add_stat(s, 'max', np.max(p)) add_stat(s, 'average', np.average(p)) add_stat(s, 'std', np.std(p)) add_stat(s, 'median', np.median(p)) add_stat(s, 'gaussian_fit', self.gaussian_pdf_compare( dim, 100, get_stat(s, 'average'), get_stat(s, 'std'))) keys = s.keys() vals = np.array([s.values()]) ret = DataTable(vals, keys, name=self.sub_name('stats for %s' % dim)) ret.properties['original_table'] = self return ret
class Group: def __init__(self, name): self.name = name self.wordDict = OrderedDict() self.whitespace = "" self.join = "" self.default = "" self.prefixes = [] self.suffixes = [] def __str__(self): str = "" str = str + "\nName: %s\n" % (self.name) str = str + "Whitespace: %s\n" % (self.whitespace) str = str + "Join: %s\n" % (self.join) str = str + "Default: %s\n" % (self.default) str = str + "Prefixes: %s\n" % (self.prefixes) str = str + "Suffixes: %s\n" % (self.suffixes) str = str + "Words:\n" for key in self.wordDict.keys(): str = str + "%s %s\n" % (key, self.wordDict[key]) return str def set_used_codeword(self, codeword): self.wordDict[codeword].used = True def count_used_codewords(self): count = 0 for codeword in self.wordDict.values(): if codeword.used: count += 1 return count def get_used_codeword(self): def make_iter(self=self): for codeword in self.wordDict.values(): if codeword.used: yield codeword return make_iter() def get_unused_codeword(self): def make_iter(self=self): for codeword in self.wordDict.values(): if not codeword.used: yield codeword return make_iter()
class Group: def __init__(self, name): self.name = name self.wordDict = OrderedDict() self.whitespace = "" self.join = "" self.default = "" self.prefixes = [] self.suffixes = [] def __str__(self): str = "" str = str + "\nName: %s\n" % (self.name) str = str + "Whitespace: %s\n" % (self.whitespace) str = str + "Join: %s\n" % (self.join) str = str + "Default: %s\n" % (self.default) str = str + "Prefixes: %s\n" % (self.prefixes) str = str + "Suffixes: %s\n" % (self.suffixes) str = str + "Words:\n" for key in self.wordDict.keys(): str = str + "%s %s\n" % (key, self.wordDict[key]) return str def set_used_codeword(self, codeword): self.wordDict[codeword].used = True def count_used_codewords(self): count = 0 for codeword in self.wordDict.values(): if codeword.used: count += 1 return count def get_used_codeword(self): def make_iter(self=self): for codeword in self.wordDict.values(): if codeword.used: yield codeword return make_iter() def get_unused_codeword(self): def make_iter(self=self): for codeword in self.wordDict.values(): if not codeword.used: yield codeword return make_iter()
def cheat(url, mappings): """ Modifies BitTorrent tracker URLs, faking the amount of data uploaded. All other URLs should pass through unimpeded. """ parsed = urlparse.urlparse(url) if "=" not in parsed.query: return url query = OrderedDict([i.split("=") for i in parsed.query.split("&")]) if "uploaded" not in query or query["uploaded"] == "0": return url if parsed.hostname in mappings: multiple = mappings[parsed.hostname] else: if "default" in mappings: multiple = mappings["default"] else: return url # Don't bother munging the URL if the upload amount isn't going # to change. if multiple == 1: return url fakeupload = int(query["uploaded"]) logger = logging.getLogger("cheatbt") logger.debug("%s: %d -> %d" % (parsed.hostname, fakeupload, fakeupload * multiple)) fakeupload = fakeupload * multiple query["uploaded"] = str(fakeupload) new_query = "" for k in query.keys(): new_query += k + "=" + query[k] + "&" new_query = new_query[:-1] # Remove trailing "&" # <scheme>://<netloc>/<path>;<params>?<query>#<fragment> new_url = urlparse.urlunparse((parsed.scheme, parsed.netloc, parsed.path, parsed.params, new_query, parsed.fragment)) return new_url
class BaseClassification: def __init__(self, name): self.name = name self.sensitivities = OrderedDict() def addSensitivity(self, sensitivity, level): if self.sensitivities.has_key(sensitivity) == 0: self.sensitivities[sensitivity] = level def findSensitivityByName(self, sensitivity): for k, v in self.sensitivities.iteritems(): # print k,v if v == sensitivity: return v, k return None def __str__(self): str = "" str = str + "Name: %s\n" % (self.name) str = str + "Sensitivities:\n" for key in self.sensitivities.keys(): str = str + "%s %s\n" % (key, self.sensitivities[key]) return str
class BaseClassification: def __init__(self, name): self.name = name self.sensitivities = OrderedDict() def addSensitivity(self, sensitivity, level): if self.sensitivities.has_key(sensitivity) == 0: self.sensitivities[sensitivity] = level def findSensitivityByName(self, sensitivity): for k, v in self.sensitivities.iteritems(): # print k,v if v == sensitivity: return v, k return None def __str__(self): str = "" str = str + "Name: %s\n" % (self.name) str = str + "Sensitivities:\n" for key in self.sensitivities.keys(): str = str + "%s %s\n" % (key, self.sensitivities[key]) return str
class Model(object): def __init__(self): self.Identifiers = OrderedDict() self.Parameters = OrderedDict() self.Variables = OrderedDict() self.Equations = OrderedDict() self.DifferentialEquations = OrderedDict() self.Functions = OrderedDict() self.InitialValues = OrderedDict() self.Bounds = {} #---------Some builtin functions------- self.Functions['exp'] = [('math.exp')] self.Functions['log'] = [('math.log')] self.Functions['log10'] = [('math.log10')] self.Functions['pow'] = [('math.pow')] self.Functions['sqrt'] = [('math.sqrt')] self.Functions['sin'] = [('math.sin')] self.Functions['cos'] = [('math.cos')] self.Functions['tan'] = [('math.tan')] self.Functions['hypot'] = [('math.hypot')] self.Functions['asin'] = [('math.asin')] self.Functions['acos'] = [('math.acos')] self.Functions['atan'] = [('math.atan')] self.Functions['radians'] = [('math.radians')] self.Functions['degrees'] = [('math.degrees')] def addInitialValue(self, name, value): self.InitialValues[name] = value def addIdentifier(self, name): if name not in self.Identifiers.keys(): self.Identifiers[name] = 'id_' + str(len(self.Identifiers)).zfill(3) return self.Identifiers[name] def addVariable(self, id): attrs_present, parts = has_attributes(id) if attrs_present: raise EquationSenseError, 'Cannot use attributes in equation' if not self.Variables.has_key(id): self.Variables[id] = 0.0 return 'x[' + str(self.Variables.index(id)) + ']' def addEquation(self, LHS, RHS, eqn_id = None): if eqn_id is None: eqn_id = '#' + str(len(self.Equations) + 1) self.Equations[eqn_id] = (LHS, RHS) def addVariableBound(self, id, lower_bound, upper_bound): existing = self.Bounds.get(id, None) if existing: if lower_bound is not None: existing = (float(lower_bound), existing[1]) if upper_bound is not None: existing = (existing[0], float(upper_bound)) self.Bounds[id] = existing else: if lower_bound is not None: lower_bound = float(lower_bound) if upper_bound is not None: upper_bound = float(upper_bound) self.Bounds[id] = (lower_bound, upper_bound) def addParameter(self, id, value): attrs_present, parts = has_attributes(id) if attrs_present: actual_id = id.split('.')[0] parts = [p.lower() for p in parts] if len(parts) == 1 and 'guess' in parts: self.addInitialValue(actual_id, value) elif len(parts) == 1 and 'min' in parts: self.addVariableBound(actual_id, value, None) elif len(parts) == 1 and 'max' in parts: self.addVariableBound(actual_id, None, value) else: raise EquationSenseError, 'Only supports guess, min and max attributes' else: #Check if the id is in the variable map, if it is delete it #and place it in the parameters map #This happens when a parameters is defined after its use #in an equation if self.Variables.has_key(id): del self.Variables[id] self.Parameters[id] = value def addFunction(self, func_id): if not self.Functions.has_key(func_id): raise EquationSenseError, '%s is not a valid function in this context.' % func_id return self.Functions[func_id][0] def generate_nle(self): #Make sure number of variables == number of equations if len(self.Variables) != len(self.Equations): raise EquationSenseError, 'Number of equations does not equal number of variables (No least squares solutions yet!)' line_count = 1 #print self.Identifiers #Debugging only #output = 'def f_nle(n, x, f, iflag): #%d\n\tprint x\n' % (line_count) #for dnsqe output = 'def f_nle(n, x, f, iflag): #%d\n' % (line_count) #for dnsqe line_count += 1 #output = 'def f_nle(iflag, m, n, x, f, fjac, ldfjac):\n' #for d1nlse #Dump out all the parameters for param in self.Parameters.keys(): id = self.Identifiers[param] value = self.Parameters[param] output = output + '\t%s = %s #%s %d\n' % (id, value, param, line_count) line_count += 1 #Dump out all the variables var_count = 0 for var in self.Variables.keys(): id = self.Identifiers[var] output = output + '\t%s = x[%d] #%s %d\n' % (id, var_count, var, line_count) var_count += 1 line_count += 1 #Dump out all the equations eqn_count = 0 for eqn in self.Equations.keys(): LHS, RHS = self.Equations[eqn] eqn_string = '(%s) - (%s)' % (LHS, RHS) output = output + '\tf[%d] = %s;\n' % (eqn_count, eqn_string) #General partial derivatives -- Begin eqn_count += 1 line_count += 1 output = output + '\treturn True\n' #print output return output def generate_nle_initial_guess(self, default=1.0): n = len(self.Variables) x = [default]*n f = [-1.0]*n for initvalue in self.InitialValues.keys(): if self.Variables.has_key(initvalue): index = self.Variables.index(initvalue) x[index] = eval(self.InitialValues[initvalue]) bounds = None #Do we have any bounds? bounds_count = len(self.Bounds.keys()) if bounds_count > 0: bounds = [None]*len(self.Variables) i = 0 for v in self.Variables: bounds_item = self.Bounds.get(v, None) bounds[i] = bounds_item i += 1 return x, f, bounds def solve(self, x, f, bounds=None): f_nle_string = self.generate_nle() exec(f_nle_string) x, f, fnorm, info_code, info_mesg = dnsqe_nice(f_nle, None, x, bounds=bounds) i = 0 keys = self.Variables.keys() for i in xrange(0, len(x)): self.Variables[keys[i]] = x[i] self.fnorm = fnorm; #print 'Norm of function residual vector:', fnorm self.info = info_code, info_mesg #print 'Info:', info_code #print 'Message:', info_mesg ret = '' if self.info[0] == 1: ret = ret + "Most recent solution:\n" ret = ret + "---------------------\n" ret = ret + "Solution norm: " + str(self.fnorm) + "\n" ret = ret + pretty_variables(self.Variables) return '\n' + ret + '\n' else: ret = ret + self.info[1] ret = ret + "---------------------\n" ret = ret + "Solution norm: " + str(self.fnorm) + "\n" ret = ret + pretty_variables(self.Variables) return '\n' + ret + '\n'
class Config(dict): def __init__(self, user_file=None): self.config = OrderedDict() self.user_file = user_file self.update(self.get_running_config()) def set_user_file(self, user_file): self.user_file = user_file def add_category(self, category): if not self.config.has_key(category): self.config[category] = OrderedDict() def add_setting(self, category, setting, value, about='', type=0, stub=False, **kwargs): assert category != None, 'Must specify a category' assert setting != None, 'Must specify a setting' assert value != None, 'Must specify a value' self.add_category(category) if not self.config[category].has_key(setting): self.config[category][setting] = OrderedDict() self.config[category][setting]['value'] = value if not stub: self.config[category][setting]['about'] = about self.config[category][setting]['type'] = type for k in kwargs: self.config[category][setting][k] = kwargs[k] self.update(self.get_running_config()) def get_setting(self, category, setting, complete=False): assert category != None, 'Must specify a category' assert setting != None, 'Must specify a setting' assert self.config.has_key(category), 'Category does not exist' assert self.config[category].has_key( setting), 'Setting in category does not exist' assert self.config[category][setting].has_key( 'value'), 'Setting in category has no value' if complete: return self.config[category][setting] else: return self.config[category][setting]['value'] def update_setting_value(self, category, setting, value): assert category != None, 'Must specify a category' assert setting != None, 'Must specify a setting' assert value != None, 'Must specify a value' if self.config.has_key(category): if self.config[category].has_key(setting): self.config[category][setting]['value'] = value self.update(self.get_running_config()) def get_settings(self, category): assert category != None, 'Must specify a category' assert self.config.has_key(category), 'Category does not exist' return self.config[category].keys() def get_categories(self): return self.config.keys() def update_from_string(self, json_config): try: config = json.loads(json_config) for c in config.keys(): for s in config[c].keys(): self.update_setting_value(c, s, config[c][s]['value']) except ValueError: sys.stderr.write('Empty or malformed config file found!\n') def update_from_user_file(self): if self.user_file and os.path.isfile(self.user_file): with open(self.user_file, 'r') as f: return self.update_from_string(f.read()) def get_running_config(self): tmpCfg = {} for category in self.config.keys(): tmpCfg[category] = {} for setting in self.config[category].keys(): tmpCfg[category][setting] = self.config[category][setting][ 'value'] return tmpCfg def __str__(self): return json.dumps(self.get_running_config(), separators=(',', ':'), sort_keys=True)
class Config(dict): def __init__(self, user_file=None): self.config = OrderedDict() self.user_file = user_file self.update(self.get_running_config()) def set_user_file(self, user_file): self.user_file = user_file def add_category(self, category): if not self.config.has_key(category): self.config[category] = OrderedDict() def add_setting(self, category, setting, value, about='', type=0, stub=False, **kwargs): assert category != None, 'Must specify a category' assert setting != None, 'Must specify a setting' assert value != None, 'Must specify a value' self.add_category(category) if not self.config[category].has_key(setting): self.config[category][setting] = OrderedDict() self.config[category][setting]['value'] = value if not stub: self.config[category][setting]['about'] = about self.config[category][setting]['type'] = type for k in kwargs: self.config[category][setting][k] = kwargs[k] self.update(self.get_running_config()) def get_setting(self, category, setting, complete=False): assert category != None, 'Must specify a category' assert setting != None, 'Must specify a setting' assert self.config.has_key(category), 'Category does not exist' assert self.config[category].has_key(setting), 'Setting in category does not exist' assert self.config[category][setting].has_key('value'), 'Setting in category has no value' if complete: return self.config[category][setting] else: return self.config[category][setting]['value'] def update_setting_value(self, category, setting, value): assert category != None, 'Must specify a category' assert setting != None, 'Must specify a setting' assert value != None, 'Must specify a value' if self.config.has_key(category): if self.config[category].has_key(setting): self.config[category][setting]['value'] = value self.update(self.get_running_config()) def get_settings(self, category): assert category != None, 'Must specify a category' assert self.config.has_key(category), 'Category does not exist' return self.config[category].keys() def get_categories(self): return self.config.keys() def update_from_string(self, json_config): try: config = json.loads(json_config) for c in config.keys(): for s in config[c].keys(): self.update_setting_value(c, s, config[c][s]['value']) except ValueError: sys.stderr.write('Empty or malformed config file found!\n') def update_from_user_file(self): if self.user_file and os.path.isfile(self.user_file): with open(self.user_file, 'r') as f: return self.update_from_string(f.read()) def get_running_config(self): tmpCfg = {} for category in self.config.keys(): tmpCfg[category] = {} for setting in self.config[category].keys(): tmpCfg[category][setting] = self.config[category][setting]['value'] return tmpCfg def __str__(self): return json.dumps(self.get_running_config(), separators=(',', ':'), sort_keys=True)
def view(self, tables): """ The view method of this module draws the control panel and the histograms. We need at least one input to be able to draw something. """ if not tables: return View(self, 'No tables to show.') self.widgets.color.guess_or_remember(('histogram text', tables), ['name']) self.widgets.text.guess_or_remember(('histogram colors', tables), ['name']) self.widgets.shift.guess_or_remember(('histogram shift', tables), '0.2') self.widgets.sort_inside.guess_or_remember(('histogram sort inside', tables), ['similarity']) self.widgets.sort_outside.guess_or_remember(('histogram sort outside', tables), ['sort']) self.widgets.trim.guess_or_remember(('histogram trim', tables), ['no']) self.widgets.trim_thresh.guess_or_remember(('histogram trim thresh', tables), '0') sort_inside_options = [('unsort', 'Keep original order'), ('similarity', 'Put similar curves together')] sort_inside_options += [(x, 'Sort by %s' % x) for x in tables[0].tags.keys()] # Create the control panel view. This will enable users to choose the dimensions. control_panel_view = stack_lines( self.widgets.dims.view('Dimension', self.widgets.apply, options_from_table(tables[0])), self.widgets.text.view('Text by', self.widgets.apply, tables[0].tags.keys()), self.widgets.color.view('Color by', self.widgets.apply, tables[0].tags.keys()), self.widgets.shift.view('Shift for multiple curves', self.widgets.apply), self.widgets.sort_inside.view('Curve sorting', self.widgets.apply, sort_inside_options, multiple=False), self.widgets.sort_outside.view('Plot sorting', self.widgets.apply, [('sort', 'Put plots with many differences first'), ('unsort', 'Keep original order')], multiple=False), self.widgets.trim.view('Trim plots', self.widgets.apply, [('yes', 'Convert values lower than threshold to 0'), ('no', 'Don\'t trim')], multiple=False), self.widgets.trim_thresh.view('Trim threshold', self.widgets.apply), self.widgets.apply.view()) main_views = [] shift = self.widgets.shift.value_as_float() plots_for_legend = OrderedDict() colorer = axes.Colorer() # Check that the user has already chosen dimensions. Otherwise, ask him # to do so. if self.widgets.dims.values.choices: timer = MultiTimer(len(self.widgets.dims.values.choices)) for i, dim in enumerate(self.widgets.dims.values.choices): try: # Go over every dimension and create the histogram: # First create a new figure: fig = self.create_and_adjust_figure(tables) ax = fig.add_subplot(111) # Draw the histogram for every input plots = [] sorted_tables = tables sort_method = self.widgets.sort_inside.values.choices[0] if sort_method == 'unsort': sorted_tables = tables elif sort_method == 'similarity': thresh = None if self.widgets.trim.get_choices()[0] == 'yes': thresh = self.widgets.trim_thresh.value_as_float() # get distances table: distances = datatable.ks_distances(tables, dim, thresh) # sort by distance sorted_tables = greedy_distance_sort(distances, tables) else: # we need to sort by tags: tag_for_sort = self.widgets.sort_inside.values.choices[0] sorted_tables = sorted(tables, key=lambda table: table.tags[tag_for_sort]) for i, table in enumerate(sorted_tables): color_tags = self.widgets.color.values.choices color_key = tuple([table.tags[c] for c in color_tags]) min_x = None if self.widgets.trim.get_choices()[0] =='yes': min_x = self.widgets.trim_thresh.value_as_float() plot = axes.kde1d(ax, table, dim, color=colorer.get_color(color_key), min_x=min_x, shift=shift*i) plots_for_legend[color_key] = plot # Add ticks with table names: if self.widgets.shift.value_as_float() > 0: ax.set_yticks(np.arange(0, len(tables)*shift, shift)) ax.set_yticklabels([t.get_tags(self.widgets.text.values.choices) for t in sorted_tables], size='xx-small') # set axes y range: ax.set_ylim(bottom = -0.1, top=0.8+shift*(len(sorted_tables)-1)) # Make sure we don't create the same widget twice. We create a new widget # for every dimension asked. widget_key = self._normalize_id(dim) if not widget_key in self.widgets: self._add_widget(widget_key, Figure) figure_widget = self.widgets[widget_key] if len(tables) > 1: from scipy.stats import ks_2samp ks, p_ks = ks_2samp(tables[0].get_cols(dim)[0], tables[1].get_cols(dim)[0]) ks_view = View(self, 'ks: %.3f, p_ks: %.10f' % (ks, p_ks)) final_view = stack_lines(ks_view, figure_widget.view(fig)) else: ks, p_ks = 0, 0 final_view = figure_widget.view(fig) # Add the new widget's view main_views.append((ks, p_ks, final_view)) except Exception as e: logging.exception('Exception when drawing histogram') main_views.append((0, 0, View(self, str(e)))) timer.complete_task(dim) # sort by the ks test: main_views = sorted(main_views, key=itemgetter(0), reverse=True) main_views = [v[2] for v in main_views] # create legend: legened_titles = plots_for_legend.keys() print len(legened_titles) max_title_len = max([len(str(t)) for t in legened_titles] + [0]) print max_title_len WIDTH_PER_LETTER = 7 EXTRA_WIDTH = 60 HEIGHT_PER_LINE = 30 EXTRA_HEIGHT = 50 MIN_X = 300 MIN_Y = 100 legend_x = max(MIN_X, EXTRA_WIDTH + WIDTH_PER_LETTER * max_title_len) legend_y = max(MIN_Y, EXTRA_HEIGHT + HEIGHT_PER_LINE * len(legened_titles)) fig = axes.new_figure(legend_x, legend_y) ax = fig.add_subplot(111) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) ax.legend(plots_for_legend.values(), plots_for_legend.keys(), loc='center', mode='expand', frameon=False, prop={'size' : 'xx-small'}) main_views = [self.widgets.legend_figure.view(fig)] + main_views main_view = view.stack_left(*main_views) else: main_view = View(None, 'Please select dimensions') # combine the control panel and the main view togeteher: return self.widgets.layout.view(main_view, control_panel_view)
def view(self, tables): """ The view method of this module draws the control panel and the histograms. We need at least one input to be able to draw something. """ if not tables: return View(self, 'No tables to show.') self.widgets.color.guess_or_remember(('histogram text', tables), ['name']) self.widgets.text.guess_or_remember(('histogram colors', tables), ['name']) self.widgets.shift.guess_or_remember(('histogram shift', tables), '0.2') self.widgets.sort_inside.guess_or_remember( ('histogram sort inside', tables), ['similarity']) self.widgets.sort_outside.guess_or_remember( ('histogram sort outside', tables), ['sort']) self.widgets.trim.guess_or_remember(('histogram trim', tables), ['no']) self.widgets.trim_thresh.guess_or_remember( ('histogram trim thresh', tables), '0') sort_inside_options = [('unsort', 'Keep original order'), ('similarity', 'Put similar curves together')] sort_inside_options += [(x, 'Sort by %s' % x) for x in tables[0].tags.keys()] # Create the control panel view. This will enable users to choose the dimensions. control_panel_view = stack_lines( self.widgets.dims.view('Dimension', self.widgets.apply, options_from_table(tables[0])), self.widgets.text.view('Text by', self.widgets.apply, tables[0].tags.keys()), self.widgets.color.view('Color by', self.widgets.apply, tables[0].tags.keys()), self.widgets.shift.view('Shift for multiple curves', self.widgets.apply), self.widgets.sort_inside.view('Curve sorting', self.widgets.apply, sort_inside_options, multiple=False), self.widgets.sort_outside.view( 'Plot sorting', self.widgets.apply, [('sort', 'Put plots with many differences first'), ('unsort', 'Keep original order')], multiple=False), self.widgets.trim.view( 'Trim plots', self.widgets.apply, [('yes', 'Convert values lower than threshold to 0'), ('no', 'Don\'t trim')], multiple=False), self.widgets.trim_thresh.view('Trim threshold', self.widgets.apply), self.widgets.apply.view()) main_views = [] shift = self.widgets.shift.value_as_float() plots_for_legend = OrderedDict() colorer = axes.Colorer() # Check that the user has already chosen dimensions. Otherwise, ask him # to do so. if self.widgets.dims.values.choices: timer = MultiTimer(len(self.widgets.dims.values.choices)) for i, dim in enumerate(self.widgets.dims.values.choices): try: # Go over every dimension and create the histogram: # First create a new figure: fig = self.create_and_adjust_figure(tables) ax = fig.add_subplot(111) # Draw the histogram for every input plots = [] sorted_tables = tables sort_method = self.widgets.sort_inside.values.choices[0] if sort_method == 'unsort': sorted_tables = tables elif sort_method == 'similarity': thresh = None if self.widgets.trim.get_choices()[0] == 'yes': thresh = self.widgets.trim_thresh.value_as_float() # get distances table: distances = datatable.ks_distances(tables, dim, thresh) # sort by distance sorted_tables = greedy_distance_sort(distances, tables) else: # we need to sort by tags: tag_for_sort = self.widgets.sort_inside.values.choices[ 0] sorted_tables = sorted( tables, key=lambda table: table.tags[tag_for_sort]) for i, table in enumerate(sorted_tables): color_tags = self.widgets.color.values.choices color_key = tuple([table.tags[c] for c in color_tags]) min_x = None if self.widgets.trim.get_choices()[0] == 'yes': min_x = self.widgets.trim_thresh.value_as_float() plot = axes.kde1d(ax, table, dim, color=colorer.get_color(color_key), min_x=min_x, shift=shift * i) plots_for_legend[color_key] = plot # Add ticks with table names: if self.widgets.shift.value_as_float() > 0: ax.set_yticks(np.arange(0, len(tables) * shift, shift)) ax.set_yticklabels([ t.get_tags(self.widgets.text.values.choices) for t in sorted_tables ], size='xx-small') # set axes y range: ax.set_ylim(bottom=-0.1, top=0.8 + shift * (len(sorted_tables) - 1)) # Make sure we don't create the same widget twice. We create a new widget # for every dimension asked. widget_key = self._normalize_id(dim) if not widget_key in self.widgets: self._add_widget(widget_key, Figure) figure_widget = self.widgets[widget_key] if len(tables) > 1: from scipy.stats import ks_2samp ks, p_ks = ks_2samp(tables[0].get_cols(dim)[0], tables[1].get_cols(dim)[0]) ks_view = View(self, 'ks: %.3f, p_ks: %.10f' % (ks, p_ks)) final_view = stack_lines(ks_view, figure_widget.view(fig)) else: ks, p_ks = 0, 0 final_view = figure_widget.view(fig) # Add the new widget's view main_views.append((ks, p_ks, final_view)) except Exception as e: logging.exception('Exception when drawing histogram') main_views.append((0, 0, View(self, str(e)))) timer.complete_task(dim) # sort by the ks test: main_views = sorted(main_views, key=itemgetter(0), reverse=True) main_views = [v[2] for v in main_views] # create legend: legened_titles = plots_for_legend.keys() print len(legened_titles) max_title_len = max([len(str(t)) for t in legened_titles] + [0]) print max_title_len WIDTH_PER_LETTER = 7 EXTRA_WIDTH = 60 HEIGHT_PER_LINE = 30 EXTRA_HEIGHT = 50 MIN_X = 300 MIN_Y = 100 legend_x = max(MIN_X, EXTRA_WIDTH + WIDTH_PER_LETTER * max_title_len) legend_y = max( MIN_Y, EXTRA_HEIGHT + HEIGHT_PER_LINE * len(legened_titles)) fig = axes.new_figure(legend_x, legend_y) ax = fig.add_subplot(111) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) ax.legend(plots_for_legend.values(), plots_for_legend.keys(), loc='center', mode='expand', frameon=False, prop={'size': 'xx-small'}) main_views = [self.widgets.legend_figure.view(fig)] + main_views main_view = view.stack_left(*main_views) else: main_view = View(None, 'Please select dimensions') # combine the control panel and the main view togeteher: return self.widgets.layout.view(main_view, control_panel_view)