Ejemplo n.º 1
0
def svm_read_problem(data_file_name, return_scipy=False):
    """
    svm_read_problem(data_file_name, return_scipy=False) -> [y, x], y: list, x: list of dictionary
    svm_read_problem(data_file_name, return_scipy=True)  -> [y, x], y: ndarray, x: csr_matrix

    Read LIBSVM-format data from data_file_name and return labels y
    and data instances x.
    """
    prob_y = []
    prob_x = []
    row_ptr = [0]
    col_idx = []
    for i, line in enumerate(open(data_file_name)):
        line = line.split(None, 1)
        # In case an instance with all zero features
        if len(line) == 1:
            line += ['']
        label, features = line
        prob_y += [float(label)]
        if scipy is not None and return_scipy:
            nz = 0
            for e in features.split():
                ind, val = e.split(":")
                val = float(val)
                if val != 0:
                    col_idx += [int(ind) - 1]
                    prob_x += [val]
                    nz += 1
            row_ptr += [row_ptr[-1] + nz]
        else:
            xi = {}
            for e in features.split():
                ind, val = e.split(":")
                if val != 0:
                    xi[int(ind)] = float(val)
            prob_x += [xi]
    if scipy is not None and return_scipy:
        prob_y = scipy.array(prob_y)
        prob_x = scipy.array(prob_x)
        col_idx = scipy.array(col_idx)
        row_ptr = scipy.array(row_ptr)
        prob_x = sparse.csr_matrix((prob_x, col_idx, row_ptr))
    return (prob_y, prob_x)
def svm_read_problem(data_file_name, return_scipy=False):
	"""
	svm_read_problem(data_file_name, return_scipy=False) -> [y, x], y: list, x: list of dictionary
	svm_read_problem(data_file_name, return_scipy=True)  -> [y, x], y: ndarray, x: csr_matrix

	Read LIBSVM-format data from data_file_name and return labels y
	and data instances x.
	"""
	prob_y = []
	prob_x = []
	row_ptr = [0]
	col_idx = []
	for i, line in enumerate(open(data_file_name)):
		line = line.split(None, 1)
		# In case an instance with all zero features
		if len(line) == 1: line += ['']
		label, features = line
		prob_y += [float(label)]
		if scipy != None and return_scipy:
			nz = 0
			for e in features.split():
				ind, val = e.split(":")
				val = float(val)
				if val != 0:
					col_idx += [int(ind)-1]
					prob_x += [val]
					nz += 1
			row_ptr += [row_ptr[-1]+nz]
		else:
			xi = {}
			for e in features.split():
				ind, val = e.split(":")
				if val != 0:
					xi[int(ind)] = float(val)
			prob_x += [xi]
	if scipy != None and return_scipy:
		prob_y = scipy.array(prob_y)
		prob_x = scipy.array(prob_x)
		col_idx = scipy.array(col_idx)
		row_ptr = scipy.array(row_ptr)
		prob_x = sparse.csr_matrix((prob_x, col_idx, row_ptr))
	return (prob_y, prob_x)