def load_dataset(self, dataset_path, x_col_name='x', y_col_name='y'): logger.info("Load a dataset from {}.".format(dataset_path)) dataset_dirpath = os.path.dirname(dataset_path) xlist = [] ylist = [] indexcsv = pd.read_csv(dataset_path) for cell in indexcsv[x_col_name]: df = pd.read_csv(os.path.join(dataset_dirpath, cell), header=None) xlist.append(np.float32(df.as_matrix().flatten())) for cell in indexcsv[y_col_name]: ylist.append(np.int32(cell)) return tuple_dataset.TupleDataset(xlist, ylist)
def train(network, loss, X_tr, Y_tr, X_te, Y_te, n_epochs=30, gamma=1): model = Objective(network, loss=loss, gamma=gamma) # optimizer = optimizers.SGD() optimizer = optimizers.Adam() optimizer.setup(model) train = tuple_dataset.TupleDataset(X_tr, Y_tr) test = tuple_dataset.TupleDataset(X_te, Y_te) train_iter = iterators.SerialIterator(train, batch_size=1, shuffle=True) test_iter = iterators.SerialIterator(test, batch_size=1, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer) trainer = training.Trainer(updater, (n_epochs, 'epoch')) trainer.run()
def union(dataset_dict, args, dump_path): print('start data load domain-union') union_train_x = [] union_test_x = [] union_train_ga = [] union_test_ga = [] union_train_o = [] union_test_o = [] union_train_ni = [] union_test_ni = [] for domain in domain_dict: train_size = math.ceil(len(dataset_dict['{0}_x'.format(domain)]) * 0.7) dev_size = math.ceil(len(dataset_dict['{0}_x'.format(domain)]) * 0.8) union_train_x += dataset_dict['{0}_x'.format(domain)][:train_size] union_test_x += dataset_dict['{0}_x'.format( domain)][train_size:dev_size] union_train_ga += dataset_dict['{0}_y_ga'.format(domain)][:train_size] union_test_ga += dataset_dict['{0}_y_ga'.format( domain)][train_size:dev_size] union_train_o += dataset_dict['{0}_y_o'.format(domain)][:train_size] union_test_o += dataset_dict['{0}_y_o'.format( domain)][train_size:dev_size] union_train_ni += dataset_dict['{0}_y_ni'.format(domain)][:train_size] union_test_ni += dataset_dict['{0}_y_ni'.format( domain)][train_size:dev_size] train_data = tuple_dataset.TupleDataset(union_train_x, union_train_ga) test_data = tuple_dataset.TupleDataset(union_test_x, union_test_ga) training(train_data, test_data, 'union', 'ga', dump_path, args) train_data = tuple_dataset.TupleDataset(union_train_x, union_train_o) test_data = tuple_dataset.TupleDataset(union_test_x, union_test_o) training(train_data, test_data, 'union', 'o', dump_path, args) train_data = tuple_dataset.TupleDataset(union_train_x, union_train_ni) test_data = tuple_dataset.TupleDataset(union_test_x, union_test_ni) training(train_data, test_data, 'union', 'ni', dump_path, args)
def data_manage_animefacedata(data_path, in_size=224): #Data path setup folders = sorted(os.listdir(data_path)) cats = [] #Categorys list all_data = [] for folder in folders: if os.path.isfile(data_path + folder + "/" + "ignore"): #print("Folder "+ folder + "is ignored!") continue else: cats.append(folder) label = folder img_filelist = glob.glob(data_path + folder + "/" + "*.png") for imgfile in img_filelist: all_data.append([imgfile, label]) print("labels=" + str(len(cats))) all_data = np.random.permutation(all_data) #Random the rank imageData = [] labelData = [] for PathAndLabel in all_data: img = Image.open(PathAndLabel[0]) img = img.resize((in_size, in_size)) label_id = cats.index(PathAndLabel[1]) #print PathAndLabel[1] img = np.asarray(np.float32(img)) img = img.transpose(2, 0, 1) img = img[:3, ...] #img = np.reshape(img,(3,in_size,in_size)) imageData.append(img) labelData.append(np.int32(label_id)) threshold = np.int32(len(imageData) / 8 * 7) train = tuple_dataset.TupleDataset(imageData[0:threshold], labelData[0:threshold]) test = tuple_dataset.TupleDataset(imageData[threshold:], labelData[threshold:]) return train, test
def __filter_class(dataset, extract_class): target_data = [] target_label = [] for data, label in dataset: if label in extract_class: target_data.append(data) target_label.append(extract_class.index(label)) target_data = np.array(target_data) target_label = np.array(target_label, dtype=np.int32) dataset = tuple_dataset.TupleDataset(target_data, target_label) train, val = split_dataset(dataset, int(len(dataset) * 0.9)) return train, val
def load_dataset(self, dataset_path, x_col_name='x', y_col_name='y'): logger.info("Load a dataset from {}.".format(dataset_path)) dataset_dirpath = os.path.dirname(dataset_path) xlist = [] ylist = [] indexcsv = pd.read_csv(dataset_path) for cell in indexcsv[x_col_name]: img = np.asarray(Image.open(os.path.join(dataset_dirpath, cell))) x = np.float32(img.reshape(img.shape[0], img.shape[1], 1) / 255.0) xlist.append(x.transpose(2, 0, 1)) for cell in indexcsv[y_col_name]: ylist.append(np.int32(cell)) return tuple_dataset.TupleDataset(xlist, ylist)
def union_train(dataset_dict, args, dump_path): print('start data load domain-union') union_train_x = [] union_test_x = [] union_train_ga = [] union_test_ga = [] union_train_o = [] union_test_o = [] union_train_ni = [] union_test_ni = [] union_train_z = [] union_test_z = [] train_dataset_dict = {} for domain in domain_dict: train_size = math.ceil(len(dataset_dict['{0}_x'.format(domain)]) * 0.7) dev_size = math.ceil( len(dataset_dict['{0}_x'.format(domain)]) * args.train_test_ratio) union_train_x += dataset_dict['{0}_x'.format(domain)][:train_size] union_test_x += dataset_dict['{0}_x'.format( domain)][train_size:dev_size] union_train_ga += dataset_dict['{0}_y_ga'.format(domain)][:train_size] union_test_ga += dataset_dict['{0}_y_ga'.format( domain)][train_size:dev_size] union_train_o += dataset_dict['{0}_y_o'.format(domain)][:train_size] union_test_o += dataset_dict['{0}_y_o'.format( domain)][train_size:dev_size] union_train_ni += dataset_dict['{0}_y_ni'.format(domain)][:train_size] union_test_ni += dataset_dict['{0}_y_ni'.format( domain)][train_size:dev_size] union_train_z += dataset_dict['{0}_z'.format(domain)][:train_size] union_test_z += dataset_dict['{0}_z'.format( domain)][train_size:dev_size] train_dataset_dict['{0}_y_ga'.format(domain)] = dataset_dict[ '{0}_y_ga'.format(domain)][:train_size] train_dataset_dict['{0}_y_o'.format(domain)] = dataset_dict[ '{0}_y_o'.format(domain)][:train_size] train_dataset_dict['{0}_y_ni'.format(domain)] = dataset_dict[ '{0}_y_ni'.format(domain)][:train_size] train_data = tuple_dataset.TupleDataset(union_train_x, union_train_ga, union_train_z) test_data = tuple_dataset.TupleDataset(union_test_x, union_test_ga, union_test_z) type_statistics_dict = calculate_type_statistics(train_dataset_dict, 'ga') training(train_data, test_data, type_statistics_dict, 'union', 'ga', dump_path, args) train_data = tuple_dataset.TupleDataset(union_train_x, union_train_o, union_train_z) test_data = tuple_dataset.TupleDataset(union_test_x, union_test_o, union_test_z) type_statistics_dict = calculate_type_statistics(train_dataset_dict, 'o') training(train_data, test_data, type_statistics_dict, 'union', 'o', dump_path, args) train_data = tuple_dataset.TupleDataset(union_train_x, union_train_ni, union_train_z) test_data = tuple_dataset.TupleDataset(union_test_x, union_test_ni, union_test_z) type_statistics_dict = calculate_type_statistics(train_dataset_dict, 'ni') training(train_data, test_data, type_statistics_dict, 'union', 'ni', dump_path, args)
def load_dataset(): image_data = np.load("./data/image_data.npy") label_data = np.load("./data/label_data.npy") #numpy配列をTupleDataset型に変換 dataset = tuple_dataset.TupleDataset(image_data, label_data) #学習データとテストデータに分割 train_data, test_data = (split_dataset_random(dataset=dataset, first_size=int( len(dataset) * 0.8), seed=0)) #デバック print("train_data: {0}\ttest_data: {1}".format(len(train_data), len(test_data))) return train_data, test_data
def convert_to_variable(x_train, x_test, t_train): """ numpyの形式からVariableに変換するためのメソッド :param x_train: :param t_train: :param x_test: :return: """ x_test_v = Variable(x_test) train = tuple_dataset.TupleDataset(x_train, t_train) return train, x_test_v
def __convert_tests(self, tests): data = [] labels = [] for r in tests: input_tag_tokenizer = tokenizer.InputTagTokenizer() tokens = input_tag_tokenizer.get_attrs_value(r.html) bow = self.dictionary.doc2bow(tokens) vec = matutils.corpus2dense([bow], self.in_units).T[0] if r.label not in self.label_types: continue # skip labels undefined in training data label_id = self.label_types.index(r.label) data.append(np.array(vec).astype(np.float32)) labels.append(np.int32(label_id)) return tuple_dataset.TupleDataset(data, labels)
def _preprocess_svhn(raw, withlabel, scale, image_dtype, label_dtype): images = raw["x"].transpose(3, 2, 0, 1) images = images.astype(image_dtype) images *= scale / 255. labels = raw["y"].astype(label_dtype).flatten() # labels go from 1-10, with the digit "0" having label 10. # Set "0" to be label 0 to restore expected ordering labels[labels == 10] = 0 if withlabel: return tuple_dataset.TupleDataset(images, labels) else: return images
def __init__(self, path, width=60, height=60): channels = 3 path = glob.glob('./mouth/*') pathsAndLabels = [] index = 0 for p in path: print(p + "," + str(index)) pathsAndLabels.append(np.asarray([p, index])) index = index + 1 allData = [] for pathAndLabel in pathsAndLabels: path = pathAndLabel[0] label = pathAndLabel[1] imagelist = glob.glob(path + "/*") for imgName in imagelist: allData.append([imgName, label]) allData = np.random.permutation(allData) imageData = [] labelData = [] for pathAndLabel in allData: #print(pathAndLabel[0]) img = Image.open(pathAndLabel[0]) img = img.resize((width, height)) r, g, b = img.split() rImgData = np.asarray(np.float32(r) / 255.0) gImgData = np.asarray(np.float32(g) / 255.0) bImgData = np.asarray(np.float32(b) / 255.0) imgData = np.asarray([rImgData, gImgData, bImgData]) imageData.append(imgData) labelData.append(np.int32(pathAndLabel[1])) threshold = np.int32(len(imageData) / 8 * 7) self.train = tuple_dataset.TupleDataset(imageData[0:threshold], labelData[0:threshold]) self.test = tuple_dataset.TupleDataset(imageData[threshold:], labelData[threshold:])
def learn(trial, train, test, seq_len): train = tuple_dataset.TupleDataset(train) train_iter = LSTM_Iterator(train, batch_size=10, seq_len=seq_len) test = tuple_dataset.TupleDataset(test) test_iter = LSTM_Iterator(test, batch_size=10, seq_len=seq_len, repeat=False) model = create_model(trial) gpu_device = 0 cuda.get_device(gpu_device).use() model.to_gpu(gpu_device) optimizer = create_optimizer(trial, model) updater = LSTM_updater(train_iter, optimizer, gpu_device) #stop_trigger = training.triggers.EarlyStoppingTrigger( # monitor='validation/main/loss', check_trigger=(5, 'epoch'), # max_trigger=(100, 'epoch')) #trainer = training.Trainer(updater, stop_trigger, out="result") trainer = training.Trainer(updater, (100, 'epoch'), out='result') test_model = model.copy() test_rnn = test_model.predictor test_rnn.dr = 0.0 trainer.extend(extensions.Evaluator(test_iter, test_model, device=gpu_device)) trainer.extend( optuna.integration.ChainerPruningExtension( trial, 'validation/main/loss', (5, 'epoch'))) trainer.extend(extensions.LogReport()) trainer.extend(extensions.ProgressBar()) log_report_extension = extensions.LogReport(log_name=None) trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss', 'elapsed_time'])) trainer.extend(log_report_extension) trainer.run() return log_report_extension.log[-1]
def main(): dataset_dict = load_dataset('../dataframe') train_dataset_dict = {} for domain in domain_dict: train_size = math.ceil(len(dataset_dict['{0}_x'.format(domain)]) * 0.7) train_dataset_dict['{0}_y_ga'.format(domain)] = dataset_dict[ '{0}_y_ga'.format(domain)][:train_size] train_dataset_dict['{0}_y_o'.format(domain)] = dataset_dict[ '{0}_y_o'.format(domain)][:train_size] train_dataset_dict['{0}_y_ni'.format(domain)] = dataset_dict[ '{0}_y_ni'.format(domain)][:train_size] for case in ['ga', 'o', 'ni']: type_statistics_dict = calculate_type_statistics( train_dataset_dict, case) frust_model_path = load_frust_model_path( '../frustratingly_easy_method_k_params/normal/dropout-0.2_batchsize-32', case) statistics_model_path = load_statistics_model_path( '../statistics_method/normal/dropout-0.2_batchsize-32', case) for domain in domain_dict: fine_model_path = load_fine_model_path( '../fine_tuning_method/fine_tuning/alpha-0.001_beta1-0.9_weightdecay-0.0001', case, domain) size = math.ceil(len(dataset_dict['{0}_x'.format(domain)]) * 0.8) test_x = dataset_dict['{0}_x'.format(domain)][size:] test_z = dataset_dict['{0}_z'.format(domain)][size:] if case == 'ga': test_y = dataset_dict['{0}_y_ga'.format(domain)][size:] test_y_dep_tag = dataset_dict['{0}_y_ga_dep_tag'.format( domain)][size:] elif case == 'o': test_y = dataset_dict['{0}_y_o'.format(domain)][size:] test_y_dep_tag = dataset_dict['{0}_y_o_dep_tag'.format( domain)][size:] elif case == 'ni': test_y = dataset_dict['{0}_y_ni'.format(domain)][size:] test_y_dep_tag = dataset_dict['{0}_y_ni_dep_tag'.format( domain)][size:] test_word = dataset_dict['{0}_word'.format(domain)][size:] test_is_verb = dataset_dict['{0}_is_verb'.format(domain)][size:] test_data = tuple_dataset.TupleDataset(test_x, test_y, test_y_dep_tag, test_z, test_word, test_is_verb) predict(frust_model_path, statistics_model_path, fine_model_path, test_data, type_statistics_dict, domain, case)
def _preprocess_cifar(images, labels, withlabel, ndim, scale): if ndim == 1: images = images.reshape(-1, 3072) elif ndim == 3: images = images.reshape(-1, 3, 32, 32) else: raise ValueError('invalid ndim for CIFAR dataset') images = images.astype(numpy.float32) images *= scale / 255. if withlabel: labels = labels.astype(numpy.int32) return tuple_dataset.TupleDataset(images, labels) else: return images
def prepare_dataset(dataset_dir): current_images = [] next_images = [] if os.path.isdir(dataset_dir): images = load_images(dataset_dir) current_images.extend(images[0:-2]) next_images.extend(images[1:-1]) for file_name in os.listdir(dataset_dir): path = os.path.join(dataset_dir, file_name) if os.path.isdir(path): print('sub dir: ', file_name) images = load_images(path) current_images.extend(images[0:-2]) next_images.extend(images[1:-1]) return tuple_dataset.TupleDataset(current_images, next_images)
def run(inputData, outputData): xArray = np.array(inputData) yArray = np.array(outputData) xTrain = xArray[[i for i in range(len(xArray)) if i % 4 != 0], :] yTrain = yArray[[i for i in range(len(yArray)) if i % 4 != 0], :] xTest = xArray[[i for i in range(len(xArray)) if i % 4 == 0], :] yTest = yArray[[i for i in range(len(yArray)) if i % 4 == 0], :] xTrain, xTest = np.vsplit(xArray, [int(len(xArray) * 3.0 / 4.0)]) yTrain, yTest = np.vsplit(yArray, [int(len(xArray) * 3.0 / 4.0)]) # model = L.Classifier(LotoNN(), lossfun=sigmoid_cross_entropy.sigmoid_cross_entropy) model = L.Classifier(LotoNN()) optimizer = chainer.optimizers.Adam() optimizer.setup(model) train = tuple_dataset.TupleDataset(xTrain, yTrain) test = tuple_dataset.TupleDataset(xTest, yTest) trainIter = chainer.iterators.SerialIterator(train, 100) testIter = chainer.iterators.SerialIterator(test, 100, repeat=False, shuffle=False) updater = training.StandardUpdater(trainIter, optimizer, device=-1) trainer = training.Trainer(updater, (100, 'epoch'), out="result") trainer.extend(extensions.Evaluator(testIter, model, device=-1)) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy' ])) trainer.extend(extensions.ProgressBar()) trainer.run()
def loadData(total): train = [] test = [] f = open('converted_data.txt', 'r') index = 0 text_data = [] label_data = [] for line in f: if index < total: tmp = line.split(",") text = np.array([np.float32(x) for x in tmp[2:]]) label = np.int32(tmp[1]) text_data.append(text) label_data.append(label) index += 1 f.close() threshold = np.int32(total * 0.9) train = tuple_dataset.TupleDataset(text_data[0:threshold], label_data[0:threshold]) test = tuple_dataset.TupleDataset(text_data[threshold:], label_data[threshold:]) return train, test
def load_dataset(): name = os.path.dirname(os.path.abspath(__name__)) joined_path = os.path.join(name, './utils') data_path = os.path.normpath(joined_path) X_train, y_train = load_fmnist(str(data_path), kind='train') X_test, y_test = load_fmnist(str(data_path), kind='t10k') X_train = X_train.astype('float32') / 255 X_test = X_test.astype('float32') / 255 train_data = np.array( [X_train[i].reshape(1, 28, 28) for i in range(len(X_train))]) test_data = np.array( [X_test[i].reshape(1, 28, 28) for i in range(len(X_test))]) y_train = y_train.astype('int8') y_test = y_test.astype('int8') train = tuple_dataset.TupleDataset(train_data, y_train) test = tuple_dataset.TupleDataset(test_data, y_test) return train, test
def get_titanic(): train_data = pd.read_csv("datasets/train.csv", header="infer") df = pd.DataFrame(train_data) df = df[['PassengerId', 'Survived','Pclass','Sex','Age','SibSp','Parch','Fare','Embarked']] df = pd.get_dummies(df[['PassengerId', 'Survived','Pclass','Sex','Age','SibSp','Parch','Fare','Embarked']]) l = [0.05, 0.25, 0.5, 0.75, 0.99] num_over_99 = df.describe(percentiles=l)['Fare']['99%'] df["Fare"].where(df["Fare"] > num_over_99, None).dropna() imr = Imputer(missing_values='NaN', strategy='median', axis=0) imr = imr.fit(df) train = imr.transform(df.values) # 標準化 sc = StandardScaler() train = sc.fit_transform(train) X = train[:,2:] y = train[:,1] X_train, X_test, y_train, y_test= train_test_split(X, y, test_size=0.2, random_state=1) train = tuple_dataset.TupleDataset(np.float32(X_train), np.int32(y_train)) test = tuple_dataset.TupleDataset(np.float32(X_test), np.int32(y_test)) return train, test
def main(): X, y = generate_data() model = L.Classifier(MakeMoonModel()) optimizer = optimizers.Adam() optimizer.setup(model) train_dataset = tuple_dataset.TupleDataset(X, y) train_iter = iterators.SerialIterator(train_dataset, batch_size=200) updater = training.StandardUpdater(train_iter, optimizer) trainer = training.Trainer(updater, (10000, 'epoch'), out='result') trainer.extend(extensions.ProgressBar()) trainer.run() visualize(X, y, model)
def get_train_and_test_2_dim(dataset='kemerer', train_size=0.5, validation_size=0): x_train, x_validation, x_test, y_train, y_validation, y_test, in_size = \ get_splited_train_and_test(dataset, train_size, validation_size) train = tuple_dataset.TupleDataset(tuple_dataset.TupleDataset(x_train.astype('float32')), y_train.astype('float32')) validation = tuple_dataset.TupleDataset(tuple_dataset.TupleDataset(x_validation.astype('float32')), y_validation.astype('float32')) test = tuple_dataset.TupleDataset(tuple_dataset.TupleDataset(x_test.astype('float32')), y_test.astype('float32')) return train, validation, test, in_size, \ x_train.astype('float32').reshape((len(x_train), 1, len(x_train[0]))), y_train.astype('float32'), \ x_validation.astype('float32').reshape((len(x_validation), 1, len(x_validation[0]))), y_validation.astype('float32'), \ x_test.astype('float32').reshape((len(x_test), 1, len(x_test[0]))), y_test.astype('float32')
def main(): parser = argparse.ArgumentParser() parser.add_argument('--dir', '-m', type=str, default='') parser.add_argument('--train_test_ratio', type=float, default=0.8) parser.add_argument('--case', type=str, default='') args = parser.parse_args() dataset_dict = load_dataset('../dataframe') train_dataset_dict = {} for domain in domain_dict: train_size = math.ceil(len(dataset_dict['{0}_x'.format(domain)]) * 0.7) train_dataset_dict['{0}_y_ga'.format(domain)] = dataset_dict[ '{0}_y_ga'.format(domain)][:train_size] train_dataset_dict['{0}_y_o'.format(domain)] = dataset_dict[ '{0}_y_o'.format(domain)][:train_size] train_dataset_dict['{0}_y_ni'.format(domain)] = dataset_dict[ '{0}_y_ni'.format(domain)][:train_size] for case in [args.case]: type_statistics_dict = calculate_type_statistics( train_dataset_dict, case) for domain in domain_dict: model_path = load_model_path(args.dir, case, domain) size = math.ceil( len(dataset_dict['{0}_x'.format(domain)]) * args.train_test_ratio) test_x = dataset_dict['{0}_x'.format(domain)][size:] test_z = dataset_dict['{0}_z'.format(domain)][size:] if case == 'ga': test_y = dataset_dict['{0}_y_ga'.format(domain)][size:] test_y_dep_tag = dataset_dict['{0}_y_ga_dep_tag'.format( domain)][size:] elif case == 'o': test_y = dataset_dict['{0}_y_o'.format(domain)][size:] test_y_dep_tag = dataset_dict['{0}_y_o_dep_tag'.format( domain)][size:] elif case == 'ni': test_y = dataset_dict['{0}_y_ni'.format(domain)][size:] test_y_dep_tag = dataset_dict['{0}_y_ni_dep_tag'.format( domain)][size:] test_word = dataset_dict['{0}_word'.format(domain)][size:] test_is_verb = dataset_dict['{0}_is_verb'.format(domain)][size:] test_data = tuple_dataset.TupleDataset(test_x, test_y, test_y_dep_tag, test_z, test_word, test_is_verb) predict(model_path, test_data, type_statistics_dict, domain, case)
def get_tuple(data): data = np.array(data) print('get_tuple data:', data.shape) t_data = [] t_label = [] for it in data: if len(it[0]) == 0: it[0].append([0 for i in range(54)]) it[1].append(0) t_data.append(np.array(it[0]).astype(np.float32)) t_label.append(np.array(it[1]).astype(np.int32)) t_data = np.array(t_data) t_label = np.array(t_label) print("t_data:", t_data.shape) print("t_data[0].shape:", t_data[0].shape) return tuple_dataset.TupleDataset(t_data, t_label)
def _preprocess_mnist(raw, withlabel, ndim, scale, image_dtype, label_dtype): images = raw['x'] if ndim == 2: images = images.reshape(-1, 28, 28) elif ndim == 3: images = images.reshape(-1, 1, 28, 28) elif ndim != 1: raise ValueError('invalid ndim for MNIST dataset') images = images.astype(image_dtype) images *= scale / 255. if withlabel: labels = raw['y'].astype(label_dtype) return tuple_dataset.TupleDataset(images, labels) else: return images
def main(): iris = load_iris() model = L.Classifier(IrisModel()) optimizer = optimizers.Adam() optimizer.setup(model) train_data = tuple_dataset.TupleDataset(iris.data.astype(np.float32), iris.target.astype(np.int32)) train_iter = iterators.SerialIterator(train_data, batch_size=50) updater = training.StandardUpdater(train_iter, optimizer) trainer = training.Trainer(updater, (10000, 'epoch'), out='result') trainer.extend(extensions.ProgressBar()) trainer.run() X = np.array([[5.4, 3.6, 1.4, 0.3], [5.4, 2.6, 4.0, 1.4], [6.8, 3.2, 5.5, 2.1]]) y = model.predictor(Variable(X.astype(np.float32))) print(y)
def _preprocess_mnist(raw, withlabel, ndim, scale, image_dtype, label_dtype, rgb_format): images = raw['x'] if ndim == 2: images = images.reshape(-1, 28, 28) elif ndim == 3: images = images.reshape(-1, 1, 28, 28) if rgb_format: images = np.broadcast_to(images, (len(images), 3) + images.shape[2:]) elif ndim != 1: raise ValueError('invalid ndim for MNIST dataset') images = images.astype(image_dtype) images *= scale / 255. if withlabel: labels = raw['y'].astype(label_dtype) return tuple_dataset.TupleDataset(images, labels) return images
def load_image(filepath, rootpath, patchsize, label_num): file_name = [] with open(filepath) as f: all_line = f.readlines() for line in all_line: file_name.append(line.replace("\n","")) tmp = np.zeros((patchsize, patchsize), dtype = np.float32) # input images x = np.zeros((len(file_name), 1, patchsize, patchsize), dtype = np.float32) # supervised data (label) t = np.zeros((len(file_name), patchsize, patchsize), dtype = np.int32) with tqdm(total=len(file_name)) as pbar: for i in range(len(file_name)): img, d_ = IO.read_mhd_and_raw_withoutSitk(rootpath + "/image/" + file_name[i] + ".mhd") #nda_img = img.reshape((d_['DimSize'][1], d_['DimSize'][0])).astype(np.float32) / 255 # img => [0,1] nda_img = img.reshape((d_['DimSize'][1], d_['DimSize'][0])).astype(np.float32) # img => zve = 0, var = 1 label, d_ = IO.read_mhd_and_raw_withoutSitk(rootpath + "/label/" + file_name[i] + ".mhd") nda_label = label.reshape((d_['DimSize'][1], d_['DimSize'][0])).astype(np.int32) #img = sitk.ReadImage(rootpath + "/image/" + file_name[i] + ".mhd") #nda_img = sitk.GetArrayFromImage(img).astype(np.float32) # img => zve = 0, var = 1 #label = sitk.ReadImage(rootpath + "/label/" + file_name[i] + ".mhd") #nda_label = sitk.GetArrayFromImage(label).astype(np.int32) if label_num == 2: # train target => bkg, accumulate nda_label[np.where(nda_label == 2)] = -1 nda_label[np.where(nda_label == 4)] = 1 elif label_num == 3: # train target => bkg, normal, abnormal nda_label[np.where(nda_label == 2)] = -1 nda_label[np.where(nda_label == 4)] = 2 elif label_num == 4: # train target => bkg, excluded, normal, abnormal nda_label[np.where(nda_label == 4)] = 3 # input x[i,0,:,:] = nda_img # label t[i,:,:] = nda_label pbar.update(1) temp = tuple_dataset.TupleDataset(x, t) return temp
def get_new_tuple(data_list): data = [] label = [] for it in data_list: if len(it[0]) == 0: continue assert len(it[0]) != 0 assert len(it[0]) == len(it[1]) for x in it[0]: assert len(x) == 54 data.append(it[0]) # 为了方便后续的计算,对每个标签进行加1 label.append(it[1] + 1) data = np.array(data) label = np.array(label) return tuple_dataset.TupleDataset(data, label)
def __get_dataset(root_path): def __get_hidden_layer_value(val, model, batch_size): # set model model.train = False if GPU_ID > -1: model.to_gpu() # set dataset outputs = [] labels = [] for i in range(0, len(val), batch_size): logging.info('forwarding... [%s / %s]', i+batch_size, len(val)) data = [_data for _data, _ in val[i:i+batch_size]] label = [_label for _, _label in val[i:i+batch_size]] x = Variable(np.array(data)) if GPU_ID > -1: x.to_gpu() output = model.predictor.get_features(x).data if GPU_ID > -1: output = cuda.to_cpu(output) outputs.extend(output) labels.extend(label) return outputs, labels output_path = os.path.join(root_path, 'result/resnet50_pretrain_warp/dataset.npz') if output_path != '' and os.path.isfile(output_path): np_file = np.load(output_path) data = np_file['data'] label = np_file['label'] else: # get root dataset train, _ = get_clf_data(use_memory=False, img_size=224, img_type='warp', split_val=False) # model model_path = os.path.join(root_path, 'result/resnet50_pretrain_warp/model_epoch_100') model = get_model('ResNet50-cls', pretrained_path=model_path) # get data and label data, label = __get_hidden_layer_value(train, model, 10) # save data and label if output_path != '': logging.info('saving...') np.savez_compressed(output_path, data=np.array(data), label=np.array(label)) return tuple_dataset.TupleDataset(data, label)