Example #1
0
def big():
	from BiCluster import DupbestBC, BiCluster
	samples = np.random.choice(['A','T','C','G', None], (1, 4, 6))
	Gs = map( Graph, samples)
	bcs = []
	grammar = {}
	totalBits = sum(map(lambda x : x._G.order(), Gs))
	Gs[0].vis()
	for i in range(50):
		print "alpha:%s\n"%(sum(map(lambda x : x._G.order(), Gs))/float(totalBits))
		tables, symbols, ecms, cols = prepare(Gs)
		bc = DupbestBC(tables, symbols, ecms, cols)
		if not bc: 
			print "no more rules!"
			break
		#import pdb; pdb.set_trace()
		print bc

		bcs.append(bc)
		new = T((-1, 'NT%s'%i))
		bc._nt = new
		r = rule.fromBC(bc)
		print r
		grammar[r._lhs] = r
		for G in Gs:
			G.reduction(r)

		tables, symbols, ecms, cols = prepare(Gs)
		for ind, _bc in enumerate(bcs):
			bc_new_c = BiCluster().update(_bc, tables, ecms, col=bc._nt)
			bc_new_r = BiCluster().update(_bc, tables, ecms, row=bc._nt)
			#print "bcG: %s"%bc_new.logGain()
			best = None
			if bc_new_c :
				bc_new = bc_new_c 
				best = bc_new_c.logGain()
			if bc_new_r and bc_new_r.logGain() > best:
				bc_new = bc_new_r
				best = bc_new_r.logGain()
			if best - bc.logGain() > 2.0:
				print "Attach"
				print bc_new
				r = rule.fromBC(bc_new)
				print r
				grammar[r._lhs] = r
				for G in Gs:
					G.reduction(r)
			bcs[ind] = bc_new
		Gs[0].vis(file='big_%s.png'%str(new), rule = r)
	for g in grammar.values():
		print g
Example #2
0
def train():
	from BiCluster import DupbestBC, BiCluster
	samples = np.random.choice(['A','T','C','G', None], (3, 4,20))
	Gs = map( Graph, samples)
	bcs = []
	totalBits = sum(map(lambda x : x._G.order(), Gs))
	for i in range(10):
		print "alpha:%s\n"%(sum(map(lambda x : x._G.order(), Gs))/float(totalBits))
		tables, symbols, ecms, cols = prepare(Gs)
		bc = DupbestBC(tables, symbols, ecms, cols)
		if not bc: 
			print "no more rules!"
			break
		#import pdb; pdb.set_trace()
		print bc
		bcs.append(bc)
		new = T((-1, 'NT%s'%i))
		bc._nt = new
		r = rule(bc._nt, bc._rows, bc._cols, bc._op)
		print r
		for G in Gs:
			G.reduction(r)

		tables, symbols, ecms, cols = prepare(Gs)
		for _bc in bcs:
			
			bc_new = BiCluster().update(_bc, tables, ecms, col=new)
			#print "bcG: %s"%bc_new.logGain()
			if bc_new and bc_new.logGain() > 0.0:
				print "Adding col %s to %s"%(new, bc_new._nt)
				print bc_new
				#import pdb; pdb.set_trace()
				r = rule(bc_new._nt, bc_new._rows, bc_new._cols, bc_new._op)
				print r
				for G in Gs:
					G.reduction(r)
				
				bcs.append(bc_new)

			bc_new = BiCluster().update(_bc, tables, ecms, row=new)
			#print "bcG: %s"%bc_new.logGain()
			if bc_new and bc_new.logGain() > 0.0:
				print "Adding col %s to %s"%(new, bc_new._nt)
				print bc_new
				#import pdb; pdb.set_trace()
				r = rule(bc_new._nt, bc_new._rows, bc_new._cols, bc_new._op)
				print r
				for G in Gs:
					G.reduction(r)
				bcs.append(bc_new)
Example #3
0
def learning(samples, alpha=0.05, beta=5, cut=30, gamma=2.0, N=50):
	from BiCluster import DupbestBC, BiCluster
	Gs = map( Graph, samples)
	bcs = []
	grammar = {}
	totalBits = sum(map(lambda x : x._G.order(), Gs))
	#Gs[0].vis()
	for i in range(N):
		print "Compression:%s\n"%(sum(map(lambda x : x._G.order(), Gs))/float(totalBits))
		tables, symbols, ecms, cols = prepare(Gs)
		bc = DupbestBC(tables, symbols, ecms, cols, alpha=alpha, beta=beta, cut=cut)
		if not bc: 
			print "no more rules!"
			break
		#import pdb; pdb.set_trace()
		print bc

		bcs.append(bc)
		new = T((-1, 'NT%s'%i))
		bc._nt = new
		r = rule.fromBC(bc)
		grammar[r._lhs] = r
		for G in Gs:
			G.reduction(r)

		tables, symbols, ecms, cols = prepare(Gs)
		for ind, _bc in enumerate(bcs):
			bc_new_c = BiCluster().update(_bc, tables, ecms, col=bc._nt)
			bc_new_r = BiCluster().update(_bc, tables, ecms, row=bc._nt)
			
			best = None
			if bc_new_c :
				bc_new = bc_new_c 
				best = bc_new_c.logGain()
			if bc_new_r and bc_new_r.logGain() > best:
				bc_new = bc_new_r
				best = bc_new_r.logGain()
			if best - bc.logGain() > gamma:
				r = rule.fromBC(bc_new)
				grammar[r._lhs] = r
				for G in Gs:
					G.reduction(r)
				bcs[ind] = bc_new
		#Gs[0].vis(file='big_%s.png'%str(new), rule = r)
	return Gs, grammar, bcs