def getNbrFeatures(self, *filenames):
     ''' Get the number of features directly from the data file (in case we do not have an info file)'''
     if 'feat_num' not in self.info.keys():
         self.getFormatData(filenames[0])
         if self.info['format'] == 'dense':
             data = data_converter.file_to_array(filenames[0])
             self.info['feat_num'] = len(data[0])
         elif self.info['format'] == 'sparse':
             self.info['feat_num'] = 0
             for filename in filenames:
                 sparse_list = data_converter.sparse_file_to_sparse_list(
                     filename)
                 last_column = [
                     sparse_list[i][-1] for i in range(len(sparse_list))
                 ]
                 last_column_feature = [a for (a, b) in last_column]
                 self.info['feat_num'] = max(self.info['feat_num'],
                                             max(last_column_feature))
         elif self.info['format'] == 'sparse_binary':
             self.info['feat_num'] = 0
             for filename in filenames:
                 data = data_converter.file_to_array(filename)
                 last_column = [int(data[i][-1]) for i in range(len(data))]
                 self.info['feat_num'] = max(self.info['feat_num'],
                                             max(last_column))
     return self.info['feat_num']
Example #2
0
def data_sparse(filename, feat_type):
    ''' This function takes as argument a file representing a sparse matrix
    sparse_matrix[i][j] = "a:b" means matrix[i][a] = b
    It converts it into a numpy array, using sparse_list_to_array function, and returns this array'''
    sparse_list = data_converter.sparse_file_to_sparse_list(filename)
    return data_converter.sparse_list_to_csr_sparse(sparse_list,
                                                    len(feat_type))
Example #3
0
	def getNbrFeatures (self, *filenames):
		''' Get the number of features directly from the data file (in case we do not have an info file)'''
		if 'feat_num' not in self.info.keys():
			self.getFormatData(filenames[0])
			if self.info['format'] == 'dense':
				data = data_converter.file_to_array(filenames[0])
				self.info['feat_num'] = len(data[0])
			elif self.info['format'] == 'sparse':
				self.info['feat_num'] = 0
				for filename in filenames:
					sparse_list = data_converter.sparse_file_to_sparse_list (filename)
					last_column = [sparse_list[i][-1] for i in range(len(sparse_list))]
					last_column_feature = [a for (a,b) in last_column]
					self.info['feat_num'] = max(self.info['feat_num'], max(last_column_feature))				
			elif self.info['format'] == 'sparse_binary':
				self.info['feat_num'] = 0
				for filename in filenames:
					data = data_converter.file_to_array (filename)
					last_column = [int(data[i][-1]) for i in range(len(data))]
					self.info['feat_num'] = max(self.info['feat_num'], max(last_column))			
		return self.info['feat_num']
Example #4
0
def data_sparse (filename, nbr_features):
	''' This function takes as argument a file representing a sparse matrix
	sparse_matrix[i][j] = "a:b" means matrix[i][a] = b
	It converts it into a numpy array, using sparse_list_to_array function, and returns this array'''
	sparse_list = data_converter.sparse_file_to_sparse_list(filename)
	return data_converter.sparse_list_to_csr_sparse (sparse_list, nbr_features)