コード例 #1
0
def old_main():
	global feature_selector
	# feature_selector = tuple(range(15)) + tuple(range(16,23)) # all
	feature_selector = tuple(range(11)) + tuple(range(16,19)) # head and hand position
	# feature_selector = tuple(range(8)) # only the head
	motion_type = 'learn'
	score_col = 259 # 259-learn_score, 151-learn_srate, 302-prac_score, 433-prac_srate
	score_format = int # score format function
	out_file = 'learn_score_{}.pkl'.format(len(feature_selector))
	print('Output filename is {}'.format(out_file))

	N = 62 # number of rows
	workbook = xlrd.open_workbook(resultFile)
	sheet = workbook.sheet_by_index(0)

	# retrieve the uid and score from excel
	uids = [sheet.cell(i,0).value for i in range(1,N)]
	scores = [score_format(sheet.cell(i,score_col).value) for i in range(1,N)]

	dataset = []
	for i,x in enumerate(uids):
		utils.progress(i+1,N)
		movdata = load_user_motion(x,motion_type)
		if movdata is not None:
			dataset.append( (x,movdata,scores[i]) )

	utils.pickle_dump(dataset,out_file)
コード例 #2
0
def extract_cvx_feature(X, inds, verbose=False):
    B = X[inds, :]
    ret = []
    for i, item in enumerate(X):
        if verbose:
            progress(i + 1, X.shape[0], "Extracting Feature")
        ret.append(distance_p2ch(item, B, EPS=1e-2)[1])
    return np.array(ret)
コード例 #3
0
	def chowliu_tree(data):
		'''
		Learn a chowliu tree structure based on give data
		data: S*N numpy array, where S is #samples, N is #RV (Discrete)
		'''
		_,D = data.shape
		marginals = {}
		# compute single r.v. marginals
		totalnum = D + (D*(D-1))/2
		nownum = 0
		for i in range(D):
			nownum += 1; utils.progress(nownum,totalnum,'Learning chowliu tree')
			values, counts = np.unique(data[:,i], return_counts=True)
			marginals[i] = dict(zip(values, counts))

		# compute joint marginal for each pair
		for i,j in utils.halfprod(range(D)):
			nownum += 1; utils.progress(nownum,totalnum,'Learning chowliu tree')
			values, counts = np.unique(data[:,(i,j)], axis=0 ,return_counts=True)
			values = list(map(lambda x:tuple(x),values))
			marginals[i,j] = dict(zip(values, counts))
			allcomb = utils.crossprod(list(marginals[i].keys()),list(marginals[j].keys()))
			for v in allcomb:
				if v not in marginals[i,j]: marginals[i,j][v] = 0

		# normalize all marginals
		for key in marginals:
			dist = marginals[key]
			summation = sum(dist.values())
			for k in dist: dist[k] = (dist[k]+1) / float(summation) # 1- correction

		mutual = {}
		# compute mutual information
		for i,j in utils.halfprod(range(D)):
			mutual[i,j] = 0
			for vi,vj in marginals[i,j]:
				mutual[i,j] += np.log(marginals[i,j][vi,vj] / (marginals[i][vi] * marginals[j][vj])) * marginals[i,j][vi,vj]

		# find the maximum spanning tree
		G = Graph(digraph=False)
		for i in range(D):
			node = Node('N{}'.format(i))
			node.domain = list(marginals[i].keys())
			G.add_vertice(node)

		for i,j in mutual:
			G.add_edge(i,j,weight = mutual[i,j])

		G = G.max_spanning_tree()
		root = int(D/2)
		G = G.todirect(root)
		return G
コード例 #4
0
def main(args):
	score_col = 259 # 259-learn_score, 151-learn_srate, 302-prac_score, 433-prac_srate
	score_format = int # score format function
	# ==============================================
	rootdir = args[0]
	mov_type = args[1]
	out_file = args[2]

	resultFile = f'{rootdir}/results.xlsx'
	N = 62 # number of rows
	workbook = xlrd.open_workbook(resultFile)
	sheet = workbook.sheet_by_index(0)
	uids = [sheet.cell(i,0).value for i in range(1,N)]
	scores = [score_format(sheet.cell(i,score_col).value) for i in range(1,N)]

	selector =  tuple(range(14)) + tuple(range(15,22)) # head pos + orientation, lhand pos + orientation, skip (lbutton), rhand pos + orientation

	dataset = []
	for i,u in enumerate(uids):
		utils.progress(i+1,N)
		motion_dir = '{}/mov_{}'.format(rootdir,mov_type)
		# find the filename of that user
		candidates = glob( '{}/{}_*.csv'.format(motion_dir,u) )
		if len(candidates) < 1:
			print('User {} does not have motion data'.format(uid))
		else:
			assert(len(candidates) == 1)
			mov = utils.read_text(candidates[0],header = True)
			data = np.array(mov)
			data = data[:,selector]
			data = data.astype(float)

			# invalid data remove
			valid = 0
			EPS = 1e-20
			while 1:
				row = data[valid,:]
				if np.all(np.abs(row)> EPS):
					break
				valid += 1
			if valid != 0:
				print('Eliminated {} rows in {}'.format(valid,candidates[0]))
			data = data[valid:,:]


			dataset.append( (u,data,scores[i]) )

	utils.pickle_dump(dataset,out_file)
コード例 #5
0
def find_cvx_hull(X, k, verbose=False):
    N, F = X.shape
    if N <= F:
        print('WARN: #Samples less than #Feature')
    ###################
    ratio = 0.1
    pct = 0.1
    multiple = 3.0
    ###################
    candidates = np.array(list(range(X.shape[0])))
    idxes = [0]

    init_dist = -1
    last_dist = -1

    t_start = time.time()
    for i in range(k):
        if verbose:
            progress(i + 1, k, "Calculating CVX Hull")

        B = X[idxes, :]
        D = np.array([distance_p2ch(item, B)[0] for item in X[candidates, :]])
        ind = np.argmax(D)
        idxes.append(candidates[ind])

        maxdis = D[ind]
        thresh = max(ratio * maxdis, np.percentile(D, pct * 100))
        selector = D > thresh
        if np.sum(selector) > multiple * (k - i):
            candidates = candidates[selector]

        if i == 0:
            init_dist = maxdis
        else:
            last_dist = maxdis

    t_end = time.time()
    if verbose:
        print('Time:{:.2f} secs DeductionRatio:{:.2f}'.format(
            t_end - t_start, 1 - last_dist / init_dist))

    return idxes
コード例 #6
0
	def chowliu_tree(data):
		N,D = data.shape
		maxN = (D*(D-1))/2
		curN = 0
		g = Graph(digraph=False)
		for i in range(D):
			n = Node('x{}'.format(i))
			g.add_vertice(n)

		allpair = crossprod(range(D),range(D))
		for i,j in allpair:
			curN += 1; progress(curN,maxN,'Calculate mutual info')
			mu = np.mean(data[:,(i,j)],axis=0)
			var = np.cov(data[:,(i,j)],rowvar=False)
			coef = var[0,1] / np.sqrt(var[0,0]*var[1,1])
			# mutual = - np.log(1-coef*coef)
			g.add_edge(i,j,weight=coef)

		g = g.max_spanning_tree()
		g = g.todirect(0)
		return g
コード例 #7
0
	def fit(self,traindata):
		# traindata - list of 2D numpy array
		M = len(traindata)
		for i in range(M):
			progress(i+1,M,'DBN learning')
			data = traindata[i]
			T,N = data.shape
			assert(N == self.G.N)
			# basically learning the empirical distribution
			for t in range(T):
				now = data[t,:]
				if t == 0:
					for i in self.SV:
						idx = tuple(now[self.ICPT[i].ids])
						self.ICPT[i].P[idx] += 1
				else:
					prev = data[t-1,:]
					exnow = np.append(now,[0 for i in self.SV])
					for k,v in self.M.items():
						exnow[v] = prev[k]

					for i in self.SV:
						idx = tuple(exnow[self.CPT[i].ids])
						self.CPT[i].P[idx] += 1

				for i in self.EV:
					idx = tuple(now[self.CPT[i].ids])
					self.CPT[i].P[idx] += 1

		# normalize all CPT
		for i in range(self.G.N):
			self.norm_CPT(self.CPT[i])

		for i in self.SV:
			self.norm_CPT(self.ICPT[i])
		return
コード例 #8
0
	def predict(self,testdata,it=0):
		T,D = testdata.shape
		assert(D == self.g.N), "Invalid test data"
		if it<=0: it=int(1.1*D*T)
		fg = self.construct_factor_graph(T)
		edgeset = fg.get_edges()
		# pre-find the neighbors of RV node
		for i in range(fg.N):
			node = fg.V[i]
			if node.type == 'RV': node.nb = fg.find_neighbor(i)
		message = {}
		# initialize all the messages
		for i,j in edgeset: message[i,j] = (1.0,1.0)
		# loppy GaBP
		for xx in range(it):
			progress(xx+1,it,'Message Passing')
			for i,j in edgeset:
				ni,nj = fg.V[i],fg.V[j]
				# no need for passing message to evidence node
				if (nj.type == 'RV') and ((j%D) in self.ev): continue
				############
				if ni.type == 'RV' and nj.type == 'FN':
					if (i%D) in self.ev:
						vi = testdata[i//D,i%D]
						message[i,j] = (vi,1e+8)
					else:
						X0,P0 = (0,0)
						if ni.P != 'na':
							mu,P = ni.P
							X0 += mu*P
							P0 += P
						for n in ni.nb:
							if n==j:continue
							mu,P = message[n,i]
							X0 += mu*P
							P0 += P
						assert(P0!=0),"Leaf node must be EV"
						message[i,j] = (X0/P0,P0)

				elif ni.type == 'FN' and nj.type == 'RV':
					ids = deepcopy(ni.ids)
					b,P = deepcopy(ni.P)
					if j!=ids[0]:
						idx = ids.index(j)
						ids[0],ids[idx] = ids[idx],ids[0]
						b[1],b[idx+1] = b[idx+1],b[1]

					vmu = []
					vP = [P]
					for nid in ids[1:]:
						mu,P = message[nid,i]
						vmu.append(mu)
						vP.append(P)

					# calculate mu
					summation = 0.0
					for s,v in enumerate(vmu):
						summation += v*b[s+2]
					mu = -(summation+b[0])/b[1]

					# calculate P
					product = np.prod(vP)
					numerator = b[1]*b[1]*product
					denominator = product/vP[0]
					for s in range(2,len(b)):
						denominator += b[s]*b[s]*product/vP[s-1]
					P = numerator/denominator
					message[i,j] = (mu,P)

				else:
					assert(False), "Invalid Factor graph!"

		# calculate marginal distribution
		prediction = deepcopy(testdata)
		for i in range(fg.N):
			node = fg.V[i]
			if node.type == 'RV' and (i%D) in self.sv:
				if node.P == 'na':
					X0,P0=0,0
				else:
					mu,P = node.P
					X0 = mu*P
					P0 = P
				for n in node.nb:
					mu,P = message[n,i]
					X0 += mu*P
					P0 += P
				prediction[i//D,i%D] = X0/P0
		return prediction
コード例 #9
0
	def smooth(self,data,numnodes=4,smooth=True):
		assert(numnodes > 1)
		st = 0
		appro = []
		while st < len(self.SV):
			ed = st + numnodes
			if ed > len(self.SV):
				ed = len(self.SV)
			appro.append(self.SV[st:ed])
			st = ed

		# create junction tree J1
		T1G = deepcopy(self.G)
		T1G = T1G.moralize()
		for bkc in appro:
			for s,t in crossprod(bkc,bkc):
				T1G.add_edge(s,t)

		self.J1 = T1G.junction_tree(preserve=self.G)

		# find come and out node
		self.J1.out = []
		for bkc in appro:
			self.J1.out.append( self.min_clique(self.J1,bkc) )
		self.J1.come = deepcopy(self.J1.out)

		# create junction tree Jt
		T2G = self.G2.moralize()
		for bkc in appro:
			for s,t in crossprod(bkc,bkc):
				T2G.add_edge(s,t)

			fbkc = list(map(lambda x:self.M[x],bkc))
			for s,t in crossprod(fbkc,fbkc):
				T2G.add_edge(s,t)

		self.J2 = T2G.junction_tree(preserve = self.G2)

		# find come and out node
		self.J2.out = []
		for bkc in appro:
			self.J2.out.append( self.min_clique(self.J2,bkc) )

		self.J2.come = []
		for bkc in appro:
			fbkc = list(map(lambda x:self.M[x],bkc))
			self.J2.come.append( self.min_clique(self.J2,fbkc) )


		T,N = data.shape
		assert(N == self.G.N)

		fmsg = {}
		for t in range(T):
			progress(t+1,T, 'Forward')

			fmsg[t] = {}
			evidence = data[t,:]

			if t==0:
				self.init_message(self.J1,fmsg[t])
				self.multiply_CPT(self.J1,evidence,fmsg[t],init=True)
				# collect message to out node for each bk cluster
				npt = deepcopy(fmsg[t])
				message = self.calculate_msg(self.J1,npt)
				for i in self.J1.out:
					fmsg[t][i] = self.collect_msg(self.J1,i,npt,message)

			else:
				pt = t-1
				self.init_message(self.J2,fmsg[t])
				self.multiply_CPT(self.J2,evidence,fmsg[t])
				# absorb message from the previous time slice
				for i,inid in enumerate(self.J2.come):
					if pt == 0:
						outid = self.J1.out[i]
					else:
						outid = self.J2.out[i]

					msg = self.get_message(fmsg[pt][outid],fmsg[t][inid],timestep = 1)
					fmsg[pt][outid,-1] = msg
					fmsg[t][inid] = self.multiply_potential(msg,fmsg[t][inid])

				npt = deepcopy(fmsg[t])
				message = self.calculate_msg(self.J2,npt)
				for i in self.J2.out:
					fmsg[t][i] = self.collect_msg(self.J2,i,npt,message)

			if t==(T-1):
				for i,outid in enumerate(self.J2.out):
					inid = self.J2.come[i]
					fmsg[t][outid,-1] = self.get_message(fmsg[t][outid],fmsg[t][inid],timestep = 1)

		if smooth:
			endtime = -1
		else:
			endtime = T

		bmsg = {}
		for t in range(T-1,endtime,-1):
			progress(T-t,T, 'Backward')

			bmsg[t] = {}
			evidence = data[t,:]

			if t==(T-1):
				curG = self.J2
				self.init_message(curG,bmsg[t])
				self.multiply_CPT(curG,evidence,bmsg[t])
				npt = deepcopy(bmsg[t])
				message = self.calculate_msg(curG,npt)
				for i,inid in enumerate(curG.come):
					bmsg[t][inid] = self.collect_msg(curG,inid,npt,message)
					outid = curG.out[i]
					bmsg[t][-1,outid] = self.init_potential(appro[i])

			if t<(T-1):
				nt = t+1
				curG = self.J2
				if t==0:
					curG = self.J1
				# initialize message
				self.init_message(curG,bmsg[t])
				if t==0:
					self.multiply_CPT(curG,evidence,bmsg[t],init=True)
				else:
					self.multiply_CPT(curG,evidence,bmsg[t])
				# absorb message from the previous time slice
				for i,outid in enumerate(curG.out):
					inid = self.J2.come[i]
					msg = self.get_message(bmsg[nt][inid],bmsg[t][outid],timestep = -1)
					bmsg[t][-1,outid] = msg
					bmsg[t][outid] = self.multiply_potential(msg,bmsg[t][outid])

				npt = deepcopy(bmsg[t])
				message = self.calculate_msg(curG,npt)
				for i in curG.come:
					bmsg[t][i] = self.collect_msg(curG,i,npt,message)


		prediction = deepcopy(data)
		for t in range(T):
			if t==0:
				tg = self.J1
			else:
				tg = self.J2

			for bki,outid in enumerate(tg.out):
				fP = fmsg[t][outid,-1]
				fP.ids = list(map(lambda x:self.rM[x],fP.ids))
				potential = fP
				if smooth:
					bP = bmsg[t][-1,outid]
					potential =  self.multiply_potential(potential,bP)
				P = potential.P/np.sum(potential.P)
				idx = np.unravel_index(P.argmax(), P.shape)
				for v in appro[bki]:
					prediction[t,v] = idx[fP.ids.index(v)]

		return prediction