Esempio n. 1
0
def getUCIdata(name, N, dim, getOnline=False):
    if getOnline:
        link = "https://archive.ics.uci.edu/ml/machine-learning-databases/"
        f = requests.get(link + name + "/" + name + ".data")
        txt = f.text.splitlines()
    else:
        text_file = open(name + ".data", "r")
        txt = text_file.readlines()
        text_file.close()
    x = numpy.zeros((N, dim))
    labx = numpy.empty(N, dtype=object)
    i = 0
    for line in txt:
        nr = line.split(',')
        for j in range(dim):
            try:
                x[i, j] = float(nr[j])
            except:
                x[i, j] = numpy.nan
        # finally get the label:
        thislab = nr[dim].rstrip()
        try:
            labx[i] = float(thislab)
        except:
            labx[i] = thislab
        i += 1
        if (i >= N):
            break
    a = dataset.prdataset(x, labx)
    return a
Esempio n. 2
0
def read_mat(file):
    """
    Reads a dataset from a .mat file and converts it into a prdataset

    :param file: name of .mat file to be read from the /data folder
    :return: a prdataset containing the features/labels read from the file
    """
    import prtools # import prtools to get its installation path
    data = loadmat(os.path.dirname(prtools.__file__) + '/data/' + file + '.mat')
    if file == 'diabetes' or file == 'mfeat_zer' or file == 'mfeat_pix':
        features = data['a'][0][0][0]
        labels = data['a'][0][0][1]
    else:
        features = data['a']['data'][0][0]
        labels = data['a']['nlab'][0][0]
    a = dataset.prdataset(features, labels)
    return a