Ejemplo n.º 1
0
#reading the column names and choosing indexes corresponging to Ret_2, ... , Ret_180 (or 120?)
try:
	fhand=open(defpath+'/'+fname,'r')
except:
	print 'gowno, can\'t open this file'
	exit()
counter=0
for line in fhand:
	if counter == 0: 
		column_names=line.split(',')
		break
	counter=counter+1
fhand.close()

#extracting ret column indexes
ret_indexlist=extract_index_column('^Ret_\d+',column_names)
#extracting feature column indexes
fea_indexlist=extract_index_column('^Fea.+',column_names)
# extracting weight column indexes
weight_indexlist=extract_index_column('^We.+',column_names)

#extracting feature values from the whole table
fea_values=np.copy(whole[1:, fea_indexlist])

#extracting weight values from the whole table
wei_values=np.copy(whole[1:,weight_indexlist])

#cutting features, returns and weights for whole days, leaving just 1...180 returns(time), also cutting the column names
ret_values=np.copy(whole[1:,ret_indexlist]) # row 0 consists of nan values (strings)

Ejemplo n.º 2
0
#file details
defpath='/home/jakub/Work/winton/data';
fname='train.csv'
#reading data from file
whole=np.array(np.genfromtxt(defpath+'/'+fname,delimiter=','),dtype=float)
#reading the column names and choosing indexes corresponging to Ret_2, ... , Ret_180 (or 120?)
fhand=open(defpath+'/'+fname,'r')
counter=0
for line in fhand:
	if counter == 0: 
		column_names=line.split(',')
		break
	counter=counter+1
fhand.close()
#extracting feature column indexes
fea_indexlist=extract_index_column('^Fea.+',column_names)
#extracting ret column indexes (D-2, D-1, ... but no time series Ret)
ret_D_indexlist=extract_index_column('(^Ret_[a-zA-Z]+)',column_names)
#extracting feature values from the whole table
fea_values=np.copy(whole[1:, fea_indexlist])
#returns and weights for whole days
ret_D_values=np.copy(whole[1:,ret_D_indexlist]) # row 0 consists of nan values (strings)

# making a dictionary with a structure (+/-1 or 0,+/- 1 or 0) : list(), that contain corresponding D-2, D-1 values
dix=group_9_clusters(ret_D_values)


# print ret_D_values[mm_index,:]
# print column_names

# looking for common features