Esempio n. 1
0
def get_data(zip_filename, file_name):
    fh = smart_open(zip_filename)
    data = fh.read(file_name)
    all_data = {}
    for line in data.split('\n'):
        line = line.rstrip()
        data_in_line = line.split('\t')
        # Remove trailing whitespace
        #data_in_line = data_in_line[:-1]
        if data_in_line == ['']:
            continue
        obs_number, class_id, attrs = int(data_in_line[0]), data_in_line[1], data_in_line[2:]
        attrs = map(lambda x: float(x), attrs)
        if class_id in all_data:
            all_data[class_id].append(attrs)
        else:
            all_data[class_id] = [attrs]

    return all_data
Esempio n. 2
0
def get_data(filename, key_list, separator):
    with smart_open(filename) as fh:
        # Get the key line
        col_list = fh.readline().split(separator)
        # print col_list
        # This is a dictionary with the data key as its keys
        data_dict = {}
        for line in fh.readlines():
            data_points = line.split(separator)
            #print data_points
            key_values  = ()
            temp_data_dict = {}
            for (key, value) in zip(col_list, data_points):
                #print 'Processing', key, value
                if key in key_list:
                    key_values += (value, )
                else:
                    # This is a data field
                    #print 'Setting ', key, value
                    temp_data_dict[key] = value
            data_dict[key_values] = deepcopy(temp_data_dict)

    return data_dict