def big(): from BiCluster import DupbestBC, BiCluster samples = np.random.choice(['A','T','C','G', None], (1, 4, 6)) Gs = map( Graph, samples) bcs = [] grammar = {} totalBits = sum(map(lambda x : x._G.order(), Gs)) Gs[0].vis() for i in range(50): print "alpha:%s\n"%(sum(map(lambda x : x._G.order(), Gs))/float(totalBits)) tables, symbols, ecms, cols = prepare(Gs) bc = DupbestBC(tables, symbols, ecms, cols) if not bc: print "no more rules!" break #import pdb; pdb.set_trace() print bc bcs.append(bc) new = T((-1, 'NT%s'%i)) bc._nt = new r = rule.fromBC(bc) print r grammar[r._lhs] = r for G in Gs: G.reduction(r) tables, symbols, ecms, cols = prepare(Gs) for ind, _bc in enumerate(bcs): bc_new_c = BiCluster().update(_bc, tables, ecms, col=bc._nt) bc_new_r = BiCluster().update(_bc, tables, ecms, row=bc._nt) #print "bcG: %s"%bc_new.logGain() best = None if bc_new_c : bc_new = bc_new_c best = bc_new_c.logGain() if bc_new_r and bc_new_r.logGain() > best: bc_new = bc_new_r best = bc_new_r.logGain() if best - bc.logGain() > 2.0: print "Attach" print bc_new r = rule.fromBC(bc_new) print r grammar[r._lhs] = r for G in Gs: G.reduction(r) bcs[ind] = bc_new Gs[0].vis(file='big_%s.png'%str(new), rule = r) for g in grammar.values(): print g
def learning(samples, alpha=0.05, beta=5, cut=30, gamma=2.0, N=50): from BiCluster import DupbestBC, BiCluster Gs = map( Graph, samples) bcs = [] grammar = {} totalBits = sum(map(lambda x : x._G.order(), Gs)) #Gs[0].vis() for i in range(N): print "Compression:%s\n"%(sum(map(lambda x : x._G.order(), Gs))/float(totalBits)) tables, symbols, ecms, cols = prepare(Gs) bc = DupbestBC(tables, symbols, ecms, cols, alpha=alpha, beta=beta, cut=cut) if not bc: print "no more rules!" break #import pdb; pdb.set_trace() print bc bcs.append(bc) new = T((-1, 'NT%s'%i)) bc._nt = new r = rule.fromBC(bc) grammar[r._lhs] = r for G in Gs: G.reduction(r) tables, symbols, ecms, cols = prepare(Gs) for ind, _bc in enumerate(bcs): bc_new_c = BiCluster().update(_bc, tables, ecms, col=bc._nt) bc_new_r = BiCluster().update(_bc, tables, ecms, row=bc._nt) best = None if bc_new_c : bc_new = bc_new_c best = bc_new_c.logGain() if bc_new_r and bc_new_r.logGain() > best: bc_new = bc_new_r best = bc_new_r.logGain() if best - bc.logGain() > gamma: r = rule.fromBC(bc_new) grammar[r._lhs] = r for G in Gs: G.reduction(r) bcs[ind] = bc_new #Gs[0].vis(file='big_%s.png'%str(new), rule = r) return Gs, grammar, bcs