Ejemplo n.º 1
0
 def load_dataset(self, dataset_path, x_col_name='x', y_col_name='y'):
     logger.info("Load a dataset from {}.".format(dataset_path))
     dataset_dirpath = os.path.dirname(dataset_path)
     xlist = []
     ylist = []
     indexcsv = pd.read_csv(dataset_path)
     for cell in indexcsv[x_col_name]:
         df = pd.read_csv(os.path.join(dataset_dirpath, cell), header=None)
         xlist.append(np.float32(df.as_matrix().flatten()))
     for cell in indexcsv[y_col_name]:
         ylist.append(np.int32(cell))
     return tuple_dataset.TupleDataset(xlist, ylist)
Ejemplo n.º 2
0
    def train(network, loss, X_tr, Y_tr, X_te, Y_te, n_epochs=30, gamma=1):
        model = Objective(network, loss=loss, gamma=gamma)

        # optimizer = optimizers.SGD()
        optimizer = optimizers.Adam()
        optimizer.setup(model)

        train = tuple_dataset.TupleDataset(X_tr, Y_tr)
        test = tuple_dataset.TupleDataset(X_te, Y_te)

        train_iter = iterators.SerialIterator(train,
                                              batch_size=1,
                                              shuffle=True)
        test_iter = iterators.SerialIterator(test,
                                             batch_size=1,
                                             repeat=False,
                                             shuffle=False)
        updater = training.StandardUpdater(train_iter, optimizer)
        trainer = training.Trainer(updater, (n_epochs, 'epoch'))

        trainer.run()
Ejemplo n.º 3
0
def union(dataset_dict, args, dump_path):
    print('start data load domain-union')
    union_train_x = []
    union_test_x = []
    union_train_ga = []
    union_test_ga = []
    union_train_o = []
    union_test_o = []
    union_train_ni = []
    union_test_ni = []
    for domain in domain_dict:
        train_size = math.ceil(len(dataset_dict['{0}_x'.format(domain)]) * 0.7)
        dev_size = math.ceil(len(dataset_dict['{0}_x'.format(domain)]) * 0.8)
        union_train_x += dataset_dict['{0}_x'.format(domain)][:train_size]
        union_test_x += dataset_dict['{0}_x'.format(
            domain)][train_size:dev_size]
        union_train_ga += dataset_dict['{0}_y_ga'.format(domain)][:train_size]
        union_test_ga += dataset_dict['{0}_y_ga'.format(
            domain)][train_size:dev_size]
        union_train_o += dataset_dict['{0}_y_o'.format(domain)][:train_size]
        union_test_o += dataset_dict['{0}_y_o'.format(
            domain)][train_size:dev_size]
        union_train_ni += dataset_dict['{0}_y_ni'.format(domain)][:train_size]
        union_test_ni += dataset_dict['{0}_y_ni'.format(
            domain)][train_size:dev_size]
    train_data = tuple_dataset.TupleDataset(union_train_x, union_train_ga)
    test_data = tuple_dataset.TupleDataset(union_test_x, union_test_ga)
    training(train_data, test_data, 'union', 'ga', dump_path, args)
    train_data = tuple_dataset.TupleDataset(union_train_x, union_train_o)
    test_data = tuple_dataset.TupleDataset(union_test_x, union_test_o)
    training(train_data, test_data, 'union', 'o', dump_path, args)
    train_data = tuple_dataset.TupleDataset(union_train_x, union_train_ni)
    test_data = tuple_dataset.TupleDataset(union_test_x, union_test_ni)
    training(train_data, test_data, 'union', 'ni', dump_path, args)
def data_manage_animefacedata(data_path, in_size=224):
    #Data path setup

    folders = sorted(os.listdir(data_path))
    cats = []  #Categorys list
    all_data = []
    for folder in folders:
        if os.path.isfile(data_path + folder + "/" + "ignore"):
            #print("Folder "+ folder + "is ignored!")
            continue
        else:
            cats.append(folder)
            label = folder
            img_filelist = glob.glob(data_path + folder + "/" + "*.png")
            for imgfile in img_filelist:
                all_data.append([imgfile, label])
    print("labels=" + str(len(cats)))

    all_data = np.random.permutation(all_data)  #Random the rank

    imageData = []
    labelData = []
    for PathAndLabel in all_data:
        img = Image.open(PathAndLabel[0])
        img = img.resize((in_size, in_size))
        label_id = cats.index(PathAndLabel[1])
        #print PathAndLabel[1]
        img = np.asarray(np.float32(img))
        img = img.transpose(2, 0, 1)
        img = img[:3, ...]
        #img = np.reshape(img,(3,in_size,in_size))
        imageData.append(img)
        labelData.append(np.int32(label_id))

    threshold = np.int32(len(imageData) / 8 * 7)
    train = tuple_dataset.TupleDataset(imageData[0:threshold],
                                       labelData[0:threshold])
    test = tuple_dataset.TupleDataset(imageData[threshold:],
                                      labelData[threshold:])
    return train, test
Ejemplo n.º 5
0
def __filter_class(dataset, extract_class):
    target_data = []
    target_label = []
    for data, label in dataset:
        if label in extract_class:
            target_data.append(data)
            target_label.append(extract_class.index(label))
    target_data = np.array(target_data)
    target_label = np.array(target_label, dtype=np.int32)

    dataset = tuple_dataset.TupleDataset(target_data, target_label)
    train, val = split_dataset(dataset, int(len(dataset) * 0.9))
    return train, val
Ejemplo n.º 6
0
 def load_dataset(self, dataset_path, x_col_name='x', y_col_name='y'):
     logger.info("Load a dataset from {}.".format(dataset_path))
     dataset_dirpath = os.path.dirname(dataset_path)
     xlist = []
     ylist = []
     indexcsv = pd.read_csv(dataset_path)
     for cell in indexcsv[x_col_name]:
         img = np.asarray(Image.open(os.path.join(dataset_dirpath, cell)))
         x = np.float32(img.reshape(img.shape[0], img.shape[1], 1) / 255.0)
         xlist.append(x.transpose(2, 0, 1))
     for cell in indexcsv[y_col_name]:
         ylist.append(np.int32(cell))
     return tuple_dataset.TupleDataset(xlist, ylist)
Ejemplo n.º 7
0
def union_train(dataset_dict, args, dump_path):
    print('start data load domain-union')
    union_train_x = []
    union_test_x = []
    union_train_ga = []
    union_test_ga = []
    union_train_o = []
    union_test_o = []
    union_train_ni = []
    union_test_ni = []
    union_train_z = []
    union_test_z = []
    train_dataset_dict = {}
    for domain in domain_dict:
        train_size = math.ceil(len(dataset_dict['{0}_x'.format(domain)]) * 0.7)
        dev_size = math.ceil(
            len(dataset_dict['{0}_x'.format(domain)]) * args.train_test_ratio)
        union_train_x += dataset_dict['{0}_x'.format(domain)][:train_size]
        union_test_x += dataset_dict['{0}_x'.format(
            domain)][train_size:dev_size]
        union_train_ga += dataset_dict['{0}_y_ga'.format(domain)][:train_size]
        union_test_ga += dataset_dict['{0}_y_ga'.format(
            domain)][train_size:dev_size]
        union_train_o += dataset_dict['{0}_y_o'.format(domain)][:train_size]
        union_test_o += dataset_dict['{0}_y_o'.format(
            domain)][train_size:dev_size]
        union_train_ni += dataset_dict['{0}_y_ni'.format(domain)][:train_size]
        union_test_ni += dataset_dict['{0}_y_ni'.format(
            domain)][train_size:dev_size]
        union_train_z += dataset_dict['{0}_z'.format(domain)][:train_size]
        union_test_z += dataset_dict['{0}_z'.format(
            domain)][train_size:dev_size]
        train_dataset_dict['{0}_y_ga'.format(domain)] = dataset_dict[
            '{0}_y_ga'.format(domain)][:train_size]
        train_dataset_dict['{0}_y_o'.format(domain)] = dataset_dict[
            '{0}_y_o'.format(domain)][:train_size]
        train_dataset_dict['{0}_y_ni'.format(domain)] = dataset_dict[
            '{0}_y_ni'.format(domain)][:train_size]
    train_data = tuple_dataset.TupleDataset(union_train_x, union_train_ga,
                                            union_train_z)
    test_data = tuple_dataset.TupleDataset(union_test_x, union_test_ga,
                                           union_test_z)
    type_statistics_dict = calculate_type_statistics(train_dataset_dict, 'ga')
    training(train_data, test_data, type_statistics_dict, 'union', 'ga',
             dump_path, args)
    train_data = tuple_dataset.TupleDataset(union_train_x, union_train_o,
                                            union_train_z)
    test_data = tuple_dataset.TupleDataset(union_test_x, union_test_o,
                                           union_test_z)
    type_statistics_dict = calculate_type_statistics(train_dataset_dict, 'o')
    training(train_data, test_data, type_statistics_dict, 'union', 'o',
             dump_path, args)
    train_data = tuple_dataset.TupleDataset(union_train_x, union_train_ni,
                                            union_train_z)
    test_data = tuple_dataset.TupleDataset(union_test_x, union_test_ni,
                                           union_test_z)
    type_statistics_dict = calculate_type_statistics(train_dataset_dict, 'ni')
    training(train_data, test_data, type_statistics_dict, 'union', 'ni',
             dump_path, args)
Ejemplo n.º 8
0
def load_dataset():
    image_data = np.load("./data/image_data.npy")
    label_data = np.load("./data/label_data.npy")
    #numpy配列をTupleDataset型に変換
    dataset = tuple_dataset.TupleDataset(image_data, label_data)
    #学習データとテストデータに分割
    train_data, test_data = (split_dataset_random(dataset=dataset,
                                                  first_size=int(
                                                      len(dataset) * 0.8),
                                                  seed=0))
    #デバック
    print("train_data: {0}\ttest_data: {1}".format(len(train_data),
                                                   len(test_data)))
    return train_data, test_data
Ejemplo n.º 9
0
    def convert_to_variable(x_train, x_test, t_train):
        """
        numpyの形式からVariableに変換するためのメソッド
        :param x_train:
        :param t_train:
        :param x_test:
        :return:
        """

        x_test_v = Variable(x_test)

        train = tuple_dataset.TupleDataset(x_train, t_train)

        return train, x_test_v
Ejemplo n.º 10
0
 def __convert_tests(self, tests):
     data = []
     labels = []
     for r in tests:
         input_tag_tokenizer = tokenizer.InputTagTokenizer()
         tokens = input_tag_tokenizer.get_attrs_value(r.html)
         bow = self.dictionary.doc2bow(tokens)
         vec = matutils.corpus2dense([bow], self.in_units).T[0]
         if r.label not in self.label_types:
             continue  # skip labels undefined in training data
         label_id = self.label_types.index(r.label)
         data.append(np.array(vec).astype(np.float32))
         labels.append(np.int32(label_id))
     return tuple_dataset.TupleDataset(data, labels)
Ejemplo n.º 11
0
def _preprocess_svhn(raw, withlabel, scale, image_dtype, label_dtype):
    images = raw["x"].transpose(3, 2, 0, 1)
    images = images.astype(image_dtype)
    images *= scale / 255.

    labels = raw["y"].astype(label_dtype).flatten()
    # labels go from 1-10, with the digit "0" having label 10.
    # Set "0" to be label 0 to restore expected ordering
    labels[labels == 10] = 0

    if withlabel:
        return tuple_dataset.TupleDataset(images, labels)
    else:
        return images
Ejemplo n.º 12
0
    def __init__(self, path, width=60, height=60):
        channels = 3
        path = glob.glob('./mouth/*')
        pathsAndLabels = []
        index = 0
        for p in path:
            print(p + "," + str(index))
            pathsAndLabels.append(np.asarray([p, index]))
            index = index + 1
        allData = []
        for pathAndLabel in pathsAndLabels:
            path = pathAndLabel[0]
            label = pathAndLabel[1]
            imagelist = glob.glob(path + "/*")
            for imgName in imagelist:
                allData.append([imgName, label])
        allData = np.random.permutation(allData)
        imageData = []
        labelData = []

        for pathAndLabel in allData:
            #print(pathAndLabel[0])
            img = Image.open(pathAndLabel[0])
            img = img.resize((width, height))
            r, g, b = img.split()
            rImgData = np.asarray(np.float32(r) / 255.0)
            gImgData = np.asarray(np.float32(g) / 255.0)
            bImgData = np.asarray(np.float32(b) / 255.0)
            imgData = np.asarray([rImgData, gImgData, bImgData])
            imageData.append(imgData)
            labelData.append(np.int32(pathAndLabel[1]))

            threshold = np.int32(len(imageData) / 8 * 7)
            self.train = tuple_dataset.TupleDataset(imageData[0:threshold],
                                                    labelData[0:threshold])
            self.test = tuple_dataset.TupleDataset(imageData[threshold:],
                                                   labelData[threshold:])
Ejemplo n.º 13
0
def learn(trial, train, test, seq_len):
    train = tuple_dataset.TupleDataset(train)
    train_iter = LSTM_Iterator(train, batch_size=10, seq_len=seq_len)
    test = tuple_dataset.TupleDataset(test)
    test_iter = LSTM_Iterator(test, batch_size=10, seq_len=seq_len, repeat=False)
    
    model = create_model(trial)

    gpu_device = 0
    cuda.get_device(gpu_device).use()
    model.to_gpu(gpu_device)

    optimizer = create_optimizer(trial, model)
    updater = LSTM_updater(train_iter, optimizer, gpu_device)

    #stop_trigger = training.triggers.EarlyStoppingTrigger(
    #    monitor='validation/main/loss', check_trigger=(5, 'epoch'),
    #    max_trigger=(100, 'epoch'))
    #trainer = training.Trainer(updater, stop_trigger, out="result")
    trainer = training.Trainer(updater, (100, 'epoch'), out='result')

    test_model = model.copy()
    test_rnn = test_model.predictor
    test_rnn.dr = 0.0
    trainer.extend(extensions.Evaluator(test_iter, test_model, device=gpu_device))

    trainer.extend(
        optuna.integration.ChainerPruningExtension(
            trial, 'validation/main/loss', (5, 'epoch')))
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.ProgressBar())
    log_report_extension = extensions.LogReport(log_name=None)
    trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss', 'elapsed_time']))
    trainer.extend(log_report_extension)
    trainer.run()

    return log_report_extension.log[-1]
Ejemplo n.º 14
0
def main():
    dataset_dict = load_dataset('../dataframe')
    train_dataset_dict = {}
    for domain in domain_dict:
        train_size = math.ceil(len(dataset_dict['{0}_x'.format(domain)]) * 0.7)
        train_dataset_dict['{0}_y_ga'.format(domain)] = dataset_dict[
            '{0}_y_ga'.format(domain)][:train_size]
        train_dataset_dict['{0}_y_o'.format(domain)] = dataset_dict[
            '{0}_y_o'.format(domain)][:train_size]
        train_dataset_dict['{0}_y_ni'.format(domain)] = dataset_dict[
            '{0}_y_ni'.format(domain)][:train_size]

    for case in ['ga', 'o', 'ni']:
        type_statistics_dict = calculate_type_statistics(
            train_dataset_dict, case)
        frust_model_path = load_frust_model_path(
            '../frustratingly_easy_method_k_params/normal/dropout-0.2_batchsize-32',
            case)
        statistics_model_path = load_statistics_model_path(
            '../statistics_method/normal/dropout-0.2_batchsize-32', case)
        for domain in domain_dict:
            fine_model_path = load_fine_model_path(
                '../fine_tuning_method/fine_tuning/alpha-0.001_beta1-0.9_weightdecay-0.0001',
                case, domain)

            size = math.ceil(len(dataset_dict['{0}_x'.format(domain)]) * 0.8)
            test_x = dataset_dict['{0}_x'.format(domain)][size:]
            test_z = dataset_dict['{0}_z'.format(domain)][size:]
            if case == 'ga':
                test_y = dataset_dict['{0}_y_ga'.format(domain)][size:]
                test_y_dep_tag = dataset_dict['{0}_y_ga_dep_tag'.format(
                    domain)][size:]
            elif case == 'o':
                test_y = dataset_dict['{0}_y_o'.format(domain)][size:]
                test_y_dep_tag = dataset_dict['{0}_y_o_dep_tag'.format(
                    domain)][size:]
            elif case == 'ni':
                test_y = dataset_dict['{0}_y_ni'.format(domain)][size:]
                test_y_dep_tag = dataset_dict['{0}_y_ni_dep_tag'.format(
                    domain)][size:]

            test_word = dataset_dict['{0}_word'.format(domain)][size:]
            test_is_verb = dataset_dict['{0}_is_verb'.format(domain)][size:]
            test_data = tuple_dataset.TupleDataset(test_x, test_y,
                                                   test_y_dep_tag, test_z,
                                                   test_word, test_is_verb)

            predict(frust_model_path, statistics_model_path, fine_model_path,
                    test_data, type_statistics_dict, domain, case)
Ejemplo n.º 15
0
def _preprocess_cifar(images, labels, withlabel, ndim, scale):
    if ndim == 1:
        images = images.reshape(-1, 3072)
    elif ndim == 3:
        images = images.reshape(-1, 3, 32, 32)
    else:
        raise ValueError('invalid ndim for CIFAR dataset')
    images = images.astype(numpy.float32)
    images *= scale / 255.

    if withlabel:
        labels = labels.astype(numpy.int32)
        return tuple_dataset.TupleDataset(images, labels)
    else:
        return images
Ejemplo n.º 16
0
def prepare_dataset(dataset_dir):
    current_images = []
    next_images = []
    if os.path.isdir(dataset_dir):
        images = load_images(dataset_dir)
        current_images.extend(images[0:-2])
        next_images.extend(images[1:-1])
    for file_name in os.listdir(dataset_dir):
        path = os.path.join(dataset_dir, file_name)
        if os.path.isdir(path):
            print('sub dir: ', file_name)
            images = load_images(path)
            current_images.extend(images[0:-2])
            next_images.extend(images[1:-1])
    return tuple_dataset.TupleDataset(current_images, next_images)
Ejemplo n.º 17
0
def run(inputData, outputData):
    xArray = np.array(inputData)
    yArray = np.array(outputData)
    xTrain = xArray[[i for i in range(len(xArray)) if i % 4 != 0], :]
    yTrain = yArray[[i for i in range(len(yArray)) if i % 4 != 0], :]
    xTest = xArray[[i for i in range(len(xArray)) if i % 4 == 0], :]
    yTest = yArray[[i for i in range(len(yArray)) if i % 4 == 0], :]
    xTrain, xTest = np.vsplit(xArray, [int(len(xArray) * 3.0 / 4.0)])
    yTrain, yTest = np.vsplit(yArray, [int(len(xArray) * 3.0 / 4.0)])

    # model = L.Classifier(LotoNN(), lossfun=sigmoid_cross_entropy.sigmoid_cross_entropy)

    model = L.Classifier(LotoNN())
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    train = tuple_dataset.TupleDataset(xTrain, yTrain)
    test = tuple_dataset.TupleDataset(xTest, yTest)
    trainIter = chainer.iterators.SerialIterator(train, 100)
    testIter = chainer.iterators.SerialIterator(test,
                                                100,
                                                repeat=False,
                                                shuffle=False)

    updater = training.StandardUpdater(trainIter, optimizer, device=-1)
    trainer = training.Trainer(updater, (100, 'epoch'), out="result")
    trainer.extend(extensions.Evaluator(testIter, model, device=-1))
    trainer.extend(extensions.LogReport())
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
            'validation/main/accuracy'
        ]))
    trainer.extend(extensions.ProgressBar())

    trainer.run()
Ejemplo n.º 18
0
def loadData(total):
  train = []
  test = []
  f = open('converted_data.txt', 'r')
  index = 0

  text_data = []
  label_data = []
  for line in f:
    if index < total:
      tmp = line.split(",")
      text = np.array([np.float32(x) for x in tmp[2:]])
      label = np.int32(tmp[1])
      text_data.append(text)
      label_data.append(label)
      
    index += 1
  f.close()

  threshold = np.int32(total * 0.9)
  train = tuple_dataset.TupleDataset(text_data[0:threshold], label_data[0:threshold])
  test  = tuple_dataset.TupleDataset(text_data[threshold:],  label_data[threshold:])

  return train, test
Ejemplo n.º 19
0
def load_dataset():

    name = os.path.dirname(os.path.abspath(__name__))
    joined_path = os.path.join(name, './utils')
    data_path = os.path.normpath(joined_path)

    X_train, y_train = load_fmnist(str(data_path), kind='train')
    X_test, y_test = load_fmnist(str(data_path), kind='t10k')

    X_train = X_train.astype('float32') / 255
    X_test = X_test.astype('float32') / 255

    train_data = np.array(
        [X_train[i].reshape(1, 28, 28) for i in range(len(X_train))])
    test_data = np.array(
        [X_test[i].reshape(1, 28, 28) for i in range(len(X_test))])

    y_train = y_train.astype('int8')
    y_test = y_test.astype('int8')

    train = tuple_dataset.TupleDataset(train_data, y_train)
    test = tuple_dataset.TupleDataset(test_data, y_test)

    return train, test
Ejemplo n.º 20
0
def get_titanic():
    train_data = pd.read_csv("datasets/train.csv", header="infer")
    df = pd.DataFrame(train_data)
    df = df[['PassengerId', 'Survived','Pclass','Sex','Age','SibSp','Parch','Fare','Embarked']]
    df = pd.get_dummies(df[['PassengerId', 'Survived','Pclass','Sex','Age','SibSp','Parch','Fare','Embarked']])
    
    l = [0.05, 0.25, 0.5, 0.75, 0.99]
    num_over_99 = df.describe(percentiles=l)['Fare']['99%']
    df["Fare"].where(df["Fare"] > num_over_99, None).dropna()

    imr = Imputer(missing_values='NaN', strategy='median', axis=0)
    imr = imr.fit(df)
    train = imr.transform(df.values)

    # 標準化
    sc = StandardScaler()
    train = sc.fit_transform(train)

    X = train[:,2:]
    y = train[:,1]
    X_train, X_test, y_train, y_test= train_test_split(X, y, test_size=0.2, random_state=1)
    train = tuple_dataset.TupleDataset(np.float32(X_train), np.int32(y_train))
    test  = tuple_dataset.TupleDataset(np.float32(X_test), np.int32(y_test))
    return train, test
Ejemplo n.º 21
0
def main():
    X, y = generate_data()
    model = L.Classifier(MakeMoonModel())
    optimizer = optimizers.Adam()
    optimizer.setup(model)

    train_dataset = tuple_dataset.TupleDataset(X, y)
    train_iter = iterators.SerialIterator(train_dataset, batch_size=200)

    updater = training.StandardUpdater(train_iter, optimizer)
    trainer = training.Trainer(updater, (10000, 'epoch'), out='result')
    trainer.extend(extensions.ProgressBar())
    trainer.run()

    visualize(X, y, model)
Ejemplo n.º 22
0
def get_train_and_test_2_dim(dataset='kemerer', train_size=0.5, validation_size=0):
    x_train, x_validation, x_test, y_train, y_validation, y_test, in_size = \
        get_splited_train_and_test(dataset, train_size, validation_size)
    train = tuple_dataset.TupleDataset(tuple_dataset.TupleDataset(x_train.astype('float32')), y_train.astype('float32'))
    validation = tuple_dataset.TupleDataset(tuple_dataset.TupleDataset(x_validation.astype('float32')), y_validation.astype('float32'))
    test = tuple_dataset.TupleDataset(tuple_dataset.TupleDataset(x_test.astype('float32')), y_test.astype('float32'))
    return train, validation, test, in_size, \
           x_train.astype('float32').reshape((len(x_train), 1, len(x_train[0]))), y_train.astype('float32'), \
           x_validation.astype('float32').reshape((len(x_validation), 1, len(x_validation[0]))), y_validation.astype('float32'), \
           x_test.astype('float32').reshape((len(x_test), 1, len(x_test[0]))), y_test.astype('float32')
Ejemplo n.º 23
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dir', '-m', type=str, default='')
    parser.add_argument('--train_test_ratio', type=float, default=0.8)
    parser.add_argument('--case', type=str, default='')
    args = parser.parse_args()
    dataset_dict = load_dataset('../dataframe')
    train_dataset_dict = {}
    for domain in domain_dict:
        train_size = math.ceil(len(dataset_dict['{0}_x'.format(domain)]) * 0.7)
        train_dataset_dict['{0}_y_ga'.format(domain)] = dataset_dict[
            '{0}_y_ga'.format(domain)][:train_size]
        train_dataset_dict['{0}_y_o'.format(domain)] = dataset_dict[
            '{0}_y_o'.format(domain)][:train_size]
        train_dataset_dict['{0}_y_ni'.format(domain)] = dataset_dict[
            '{0}_y_ni'.format(domain)][:train_size]

    for case in [args.case]:
        type_statistics_dict = calculate_type_statistics(
            train_dataset_dict, case)
        for domain in domain_dict:
            model_path = load_model_path(args.dir, case, domain)

            size = math.ceil(
                len(dataset_dict['{0}_x'.format(domain)]) *
                args.train_test_ratio)
            test_x = dataset_dict['{0}_x'.format(domain)][size:]
            test_z = dataset_dict['{0}_z'.format(domain)][size:]
            if case == 'ga':
                test_y = dataset_dict['{0}_y_ga'.format(domain)][size:]
                test_y_dep_tag = dataset_dict['{0}_y_ga_dep_tag'.format(
                    domain)][size:]
            elif case == 'o':
                test_y = dataset_dict['{0}_y_o'.format(domain)][size:]
                test_y_dep_tag = dataset_dict['{0}_y_o_dep_tag'.format(
                    domain)][size:]
            elif case == 'ni':
                test_y = dataset_dict['{0}_y_ni'.format(domain)][size:]
                test_y_dep_tag = dataset_dict['{0}_y_ni_dep_tag'.format(
                    domain)][size:]

            test_word = dataset_dict['{0}_word'.format(domain)][size:]
            test_is_verb = dataset_dict['{0}_is_verb'.format(domain)][size:]
            test_data = tuple_dataset.TupleDataset(test_x, test_y,
                                                   test_y_dep_tag, test_z,
                                                   test_word, test_is_verb)

            predict(model_path, test_data, type_statistics_dict, domain, case)
Ejemplo n.º 24
0
def get_tuple(data):
    data = np.array(data)
    print('get_tuple data:', data.shape)
    t_data = []
    t_label = []
    for it in data:
        if len(it[0]) == 0:
            it[0].append([0 for i in range(54)])
            it[1].append(0)
        t_data.append(np.array(it[0]).astype(np.float32))
        t_label.append(np.array(it[1]).astype(np.int32))
    t_data = np.array(t_data)
    t_label = np.array(t_label)
    print("t_data:", t_data.shape)
    print("t_data[0].shape:", t_data[0].shape)
    return tuple_dataset.TupleDataset(t_data, t_label)
Ejemplo n.º 25
0
def _preprocess_mnist(raw, withlabel, ndim, scale, image_dtype, label_dtype):
    images = raw['x']
    if ndim == 2:
        images = images.reshape(-1, 28, 28)
    elif ndim == 3:
        images = images.reshape(-1, 1, 28, 28)
    elif ndim != 1:
        raise ValueError('invalid ndim for MNIST dataset')
    images = images.astype(image_dtype)
    images *= scale / 255.

    if withlabel:
        labels = raw['y'].astype(label_dtype)
        return tuple_dataset.TupleDataset(images, labels)
    else:
        return images
Ejemplo n.º 26
0
def main():
    iris = load_iris()
    model = L.Classifier(IrisModel())
    optimizer = optimizers.Adam()
    optimizer.setup(model)
    train_data = tuple_dataset.TupleDataset(iris.data.astype(np.float32),
                                            iris.target.astype(np.int32))
    train_iter = iterators.SerialIterator(train_data, batch_size=50)
    updater = training.StandardUpdater(train_iter, optimizer)
    trainer = training.Trainer(updater, (10000, 'epoch'), out='result')
    trainer.extend(extensions.ProgressBar())
    trainer.run()

    X = np.array([[5.4, 3.6, 1.4, 0.3], [5.4, 2.6, 4.0, 1.4],
                  [6.8, 3.2, 5.5, 2.1]])
    y = model.predictor(Variable(X.astype(np.float32)))
    print(y)
def _preprocess_mnist(raw, withlabel, ndim, scale, image_dtype, label_dtype, rgb_format):
    images = raw['x']
    if ndim == 2:
        images = images.reshape(-1, 28, 28)
    elif ndim == 3:
        images = images.reshape(-1, 1, 28, 28)
        if rgb_format:
            images = np.broadcast_to(images, (len(images), 3) + images.shape[2:])
    elif ndim != 1:
        raise ValueError('invalid ndim for MNIST dataset')
    images = images.astype(image_dtype)
    images *= scale / 255.

    if withlabel:
        labels = raw['y'].astype(label_dtype)
        return tuple_dataset.TupleDataset(images, labels)
    return images
Ejemplo n.º 28
0
def load_image(filepath, rootpath, patchsize, label_num):
    file_name = []
    with open(filepath) as f:
        all_line = f.readlines()
        for line in all_line:
            file_name.append(line.replace("\n",""))
    
    tmp = np.zeros((patchsize, patchsize), dtype = np.float32)
    # input images
    x = np.zeros((len(file_name), 1, patchsize, patchsize), dtype = np.float32)
    # supervised data (label)
    t = np.zeros((len(file_name), patchsize, patchsize), dtype = np.int32)
    with tqdm(total=len(file_name)) as pbar:
        for i in range(len(file_name)):
            
            img, d_ = IO.read_mhd_and_raw_withoutSitk(rootpath + "/image/" + file_name[i] + ".mhd")
            #nda_img = img.reshape((d_['DimSize'][1], d_['DimSize'][0])).astype(np.float32) / 255  # img => [0,1]
            nda_img = img.reshape((d_['DimSize'][1], d_['DimSize'][0])).astype(np.float32) # img => zve = 0, var = 1
            label, d_ = IO.read_mhd_and_raw_withoutSitk(rootpath + "/label/" + file_name[i] + ".mhd")
            nda_label = label.reshape((d_['DimSize'][1], d_['DimSize'][0])).astype(np.int32)

            #img = sitk.ReadImage(rootpath + "/image/" + file_name[i] + ".mhd")
            #nda_img = sitk.GetArrayFromImage(img).astype(np.float32)  # img => zve = 0, var = 1
            #label = sitk.ReadImage(rootpath + "/label/" + file_name[i] + ".mhd")
            #nda_label = sitk.GetArrayFromImage(label).astype(np.int32)

            
            if label_num == 2:
                # train target => bkg, accumulate
                nda_label[np.where(nda_label == 2)] = -1
                nda_label[np.where(nda_label == 4)] = 1
            elif label_num == 3:
                # train target => bkg, normal, abnormal
                nda_label[np.where(nda_label == 2)] = -1
                nda_label[np.where(nda_label == 4)] = 2
            elif label_num == 4:
                # train target => bkg, excluded, normal, abnormal
                nda_label[np.where(nda_label == 4)] = 3

            # input
            x[i,0,:,:] = nda_img
            # label
            t[i,:,:] = nda_label
            pbar.update(1)
    temp = tuple_dataset.TupleDataset(x, t)
    return temp
Ejemplo n.º 29
0
def get_new_tuple(data_list):
    data = []
    label = []
    for it in data_list:
        if len(it[0]) == 0:
            continue
        assert len(it[0]) != 0
        assert len(it[0]) == len(it[1])
        for x in it[0]:
            assert len(x) == 54
        data.append(it[0])
        # 为了方便后续的计算,对每个标签进行加1
        label.append(it[1] + 1)

    data = np.array(data)
    label = np.array(label)
    return tuple_dataset.TupleDataset(data, label)
Ejemplo n.º 30
0
def __get_dataset(root_path):

    def __get_hidden_layer_value(val, model, batch_size):
        # set model
        model.train = False
        if GPU_ID > -1:
            model.to_gpu()
        # set dataset
        outputs = []
        labels = []
        for i in range(0, len(val), batch_size):
            logging.info('forwarding... [%s / %s]', i+batch_size, len(val))
            data = [_data for _data, _ in val[i:i+batch_size]]
            label = [_label for _, _label in val[i:i+batch_size]]
            x = Variable(np.array(data))
            if GPU_ID > -1:
                x.to_gpu()
            output = model.predictor.get_features(x).data
            if GPU_ID > -1:
                output = cuda.to_cpu(output)
            outputs.extend(output)
            labels.extend(label)
        return outputs, labels

    output_path = os.path.join(root_path, 'result/resnet50_pretrain_warp/dataset.npz')
    if output_path != '' and os.path.isfile(output_path):
        np_file = np.load(output_path)
        data = np_file['data']
        label = np_file['label']
    else:
        # get root dataset
        train, _ = get_clf_data(use_memory=False, img_size=224, img_type='warp', split_val=False)
        # model
        model_path = os.path.join(root_path, 'result/resnet50_pretrain_warp/model_epoch_100')
        model = get_model('ResNet50-cls', pretrained_path=model_path)
        # get data and label
        data, label = __get_hidden_layer_value(train, model, 10)
        # save data and label
        if output_path != '':
            logging.info('saving...')
            np.savez_compressed(output_path, data=np.array(data), label=np.array(label))
    return tuple_dataset.TupleDataset(data, label)