def get_data(zip_filename, file_name): fh = smart_open(zip_filename) data = fh.read(file_name) all_data = {} for line in data.split('\n'): line = line.rstrip() data_in_line = line.split('\t') # Remove trailing whitespace #data_in_line = data_in_line[:-1] if data_in_line == ['']: continue obs_number, class_id, attrs = int(data_in_line[0]), data_in_line[1], data_in_line[2:] attrs = map(lambda x: float(x), attrs) if class_id in all_data: all_data[class_id].append(attrs) else: all_data[class_id] = [attrs] return all_data
def get_data(filename, key_list, separator): with smart_open(filename) as fh: # Get the key line col_list = fh.readline().split(separator) # print col_list # This is a dictionary with the data key as its keys data_dict = {} for line in fh.readlines(): data_points = line.split(separator) #print data_points key_values = () temp_data_dict = {} for (key, value) in zip(col_list, data_points): #print 'Processing', key, value if key in key_list: key_values += (value, ) else: # This is a data field #print 'Setting ', key, value temp_data_dict[key] = value data_dict[key_values] = deepcopy(temp_data_dict) return data_dict