def correspondence(self, templates, template_name, q1=None, artifact=None, var=None): ''' Fulchignoni et al. (2000) extension used to give a correspondence to clusters''' from itertools import imap from gmode_module import Invert, free, hyp_test from file_module import pickle, writedict import cPickle as pkl if q1 == None: q1 = self.q1 cluster_members = self.cluster_members cluster_stats = self.cluster_stats templ = pkl.load(open(templates,'rb')) interpretation = dict() for n, stat in enumerate(cluster_stats): iS = Invert(stat[2][var, :][:, var]) f = free(stat[3][var, :][:, var]) size = len(cluster_members[n]) selected = filter(lambda x: x != None, \ imap(lambda key, y: hyp_test(size, q1, f, key, y[var], stat[0][var], iS), templ.keys(), templ.values())) interpretation[n+1] = selected writedict(interpretation,open(pathjoin("TESTS",self.label,'correspondence_q'+str(q1)+'_'+template_name+'.dat'), 'w')) pickle(interpretation, self.label, "correspondence_q"+str(q1)+'_'+template_name)
def evaluate(self, q2=None): if q2 == None: q2 = self.q1 if len(self.cluster_members) > 1: from eval_variables import distance from gmode_module import mad from file_module import pickle elems = copy(self.elems) dev = mad(elems, median(elems, axis=0)) #errs = self.errs self.report.append('\n############################## Part II : Verifying the variable significance ###############################\n') self.report.append("Confidence level q2: "+str(normal.cdf(q2) - normal.cdf(-q2))) d2, Gc, D2 = distance(self.cluster_members, self.cluster_stats, elems/dev) j = 0 for i in range(len(elems[0])): self.report.append('\nMatrix Gc for variable '+str(i+1)+10*" "+' Weight: '+str(d2[i].sum()/d2.sum())) #+pretty_print(Gc[i])) if all(Gc[i] < q2): self.report.append('\n Variable '+str(i+1)+' is statistically redundant.') print('Variable '+str(i+1)+' is statistically redundant.') j += 1 pickle(D2, self.label, "D2") pickle(Gc, self.label, "Gc")
def writelog(self): from file_module import writeit, writedict from file_module import pickle mypath = pathjoin("TESTS",self.label) writeit(self.report, open(pathjoin(mypath, 'log_'+self.label+'.dat'), 'w')) writeit(self.clusters_report, open(pathjoin(mypath, 'cluster_'+self.label+'.dat'), 'w')) pickle(self.cluster_stats, self.label, "cluster_stats") pickle(self.cluster_members, self.label, "cluster_members") pickle(self.excluded, self.label, "excluded")