def top_level_task():
    num_classes = 10

    num_samples = 10000

    (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)

    x_train = x_train.astype('float32')
    x_train /= 255
    y_train = y_train.astype('int32')
    print("shape: ", x_train.shape)

    input_tensor1 = Input(shape=(3, 32, 32), dtype="float32", name="input1")
    input_tensor2 = Input(shape=(3, 32, 32), dtype="float32", name="input2")

    ot1 = cifar_cnn_sub(input_tensor1, 1)
    model1 = Model(input_tensor1, ot1)
    print(model1.summary())
    ot2 = cifar_cnn_sub(input_tensor2, 2)
    model2 = Model(input_tensor2, ot2)
    print(model2.summary())
    output_tensor = Concatenate(axis=1)([model1.output, model2.output])
    output_tensor = MaxPooling2D(pool_size=(2, 2),
                                 strides=(2, 2),
                                 padding="valid")(output_tensor)
    output_tensor = Conv2D(filters=64,
                           kernel_size=(3, 3),
                           strides=(1, 1),
                           padding=(1, 1),
                           activation="relu",
                           name="conv2d_0_4")(output_tensor)
    output_tensor = Conv2D(filters=64,
                           kernel_size=(3, 3),
                           strides=(1, 1),
                           padding=(1, 1),
                           activation="relu")(output_tensor)
    output_tensor = MaxPooling2D(pool_size=(2, 2),
                                 strides=(2, 2),
                                 padding="valid")(output_tensor)
    output_tensor = Flatten()(output_tensor)
    output_tensor = Dense(512, activation="relu")(output_tensor)
    output_tensor = Dense(num_classes)(output_tensor)
    output_tensor = Activation("softmax")(output_tensor)

    model = Model([input_tensor1, input_tensor2], output_tensor)
    # print(model.summary())

    opt = flexflow.keras.optimizers.SGD(learning_rate=0.01)
    model.compile(optimizer=opt,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy', 'sparse_categorical_crossentropy'])
    print(model.summary())

    model.fit([x_train, x_train],
              y_train,
              epochs=160,
              callbacks=[
                  VerifyMetrics(ModelAccuracy.CIFAR10_CNN),
                  EpochVerifyMetrics(ModelAccuracy.CIFAR10_CNN)
              ])
Esempio n. 2
0
def top_level_task():
  backend.set_image_data_format('channels_first')
  num_classes = 10
  
  num_samples = 10000
  
  (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)
  
  x_train = x_train.astype('float32')
  x_train /= 255
  y_train = y_train.astype('int32')
  print("shape: ", x_train.shape)
  
  input_tensor1 = Input(shape=(3, 32, 32), dtype="float32")
  
  o1 = Conv2D(filters=32, input_shape=(3,32,32), kernel_size=(3,3), strides=(1,1), padding="valid", activation="relu")(input_tensor1)
  o2 = Conv2D(filters=32, input_shape=(3,32,32), kernel_size=(3,3), strides=(1,1), padding="valid", activation="relu")(input_tensor1)
  output_tensor = Concatenate(axis=1)([o1, o2])
  output_tensor = Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), padding="valid", activation="relu")(output_tensor)
  output_tensor = MaxPooling2D(pool_size=(2,2), strides=(2,2), padding="valid")(output_tensor)
  output_tensor = Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), padding="valid", activation="relu")(output_tensor)
  output_tensor = Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), padding="valid", activation="relu")(output_tensor)
  output_tensor = MaxPooling2D(pool_size=(2,2), strides=(2,2), padding="valid")(output_tensor)
  output_tensor = Flatten()(output_tensor)
  output_tensor = Dense(512, activation="relu")(output_tensor)
  output_tensor = Dense(num_classes)(output_tensor)
  output_tensor = Activation("softmax")(output_tensor)

  model = Model({1: input_tensor1}, output_tensor)
  
  opt = optimizers.SGD(learning_rate=0.01)
  model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy', 'sparse_categorical_crossentropy'])
  print(model.summary())

  model.fit(x_train, y_train, epochs=1)
Esempio n. 3
0
def top_level_task():
  
  num_classes = 10
  
  num_samples = 10000
  
  (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)
  
  x_train = x_train.astype('float32')
  x_train /= 255
  #x_train *= 0
  #y_train = np.random.randint(1, 9, size=(num_samples,1), dtype='int32')
  y_train = y_train.astype('int32')
  print("shape: ", x_train.shape)
  
  model = Sequential()
  model.add(Conv2D(filters=32, input_shape=(3,32,32), kernel_size=(3,3), strides=(1,1), padding=(1,1), activation="relu"))
  model.add(Conv2D(filters=32, kernel_size=(3,3), strides=(1,1), padding=(1,1), activation="relu"))
  model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding="valid"))
  model.add(Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), padding=(1,1), activation="relu"))
  model.add(Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), padding=(1,1), activation="relu"))
  model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding="valid"))
  model.add(Flatten())
  model.add(Dense(512, activation="relu"))
  model.add(Dense(num_classes))
  model.add(Activation("softmax"))
  
  print(model.summary())

  opt = flexflow.keras.optimizers.SGD(learning_rate=0.01)
  model.compile(optimizer=opt)

  model.fit(x_train, y_train, epochs=1)
def top_level_task():
  
  num_classes = 10
  
  num_samples = 10000
  
  (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)
  
  x_train = x_train.astype('float32')
  x_train /= 255
  y_train = y_train.astype('int32')
  print("shape: ", x_train.shape)
  
  model = Sequential()
  model.add(Conv2D(filters=32, input_shape=(3,32,32), kernel_size=(3,3), strides=(1,1), padding=(1,1), activation="relu"))
  model.add(Conv2D(filters=32, kernel_size=(3,3), strides=(1,1), padding=(1,1), activation="relu"))
  model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding="valid"))
  model.add(Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), padding=(1,1), activation="relu"))
  model.add(Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), padding=(1,1), activation="relu"))
  model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding="valid"))
  model.add(Flatten())
  model.add(Dense(512, activation="relu"))
  model.add(Dense(num_classes))
  model.add(Activation("softmax"))

  opt = flexflow.keras.optimizers.SGD(learning_rate=0.02)
  model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy', 'sparse_categorical_crossentropy'])
  print(model.summary())

  model.fit(x_train, y_train, epochs=80, callbacks=[VerifyMetrics(ModelAccuracy.CIFAR10_CNN), EpochVerifyMetrics(ModelAccuracy.CIFAR10_CNN)])
Esempio n. 5
0
def top_level_task():
    ffconfig = FFConfig()
    ffconfig.parse_args()
    print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %
          (ffconfig.get_batch_size(), ffconfig.get_workers_per_node(),
           ffconfig.get_num_nodes()))
    ffmodel = FFModel(ffconfig)

    dims_input = [ffconfig.get_batch_size(), 3, 229, 229]
    input_tensor = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT)

    torch_model = PyTorchModel("alexnet.ff")
    output_tensors = torch_model.apply(ffmodel, [input_tensor])

    ffoptimizer = SGDOptimizer(ffmodel, 0.01)
    ffmodel.set_sgd_optimizer(ffoptimizer)
    ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY,
                    metrics=[
                        MetricsType.METRICS_ACCURACY,
                        MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY
                    ])
    label_tensor = ffmodel.get_label_tensor()

    num_samples = 10000

    (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)

    full_input_np = np.zeros((num_samples, 3, 229, 229), dtype=np.float32)

    for i in range(0, num_samples):
        image = x_train[i, :, :, :]
        image = image.transpose(1, 2, 0)
        pil_image = Image.fromarray(image)
        pil_image = pil_image.resize((229, 229), Image.NEAREST)
        image = np.array(pil_image, dtype=np.float32)
        image = image.transpose(2, 0, 1)
        full_input_np[i, :, :, :] = image

    full_input_np /= 255

    y_train = y_train.astype('int32')
    full_label_np = y_train

    dataloader_input = ffmodel.create_data_loader(input_tensor, full_input_np)
    dataloader_label = ffmodel.create_data_loader(label_tensor, full_label_np)

    num_samples = dataloader_input.get_num_samples()

    ffmodel.init_layers()

    epochs = ffconfig.get_epochs()

    ts_start = ffconfig.get_current_time()

    ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs)

    ts_end = ffconfig.get_current_time()
    run_time = 1e-6 * (ts_end - ts_start)
    print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %
          (epochs, run_time, num_samples * epochs / run_time))
Esempio n. 6
0
def top_level_task():
  ffconfig = FFConfig()
  alexnetconfig = NetConfig()
  print(alexnetconfig.dataset_path)
  print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %(ffconfig.batch_size, ffconfig.workers_per_node, ffconfig.num_nodes))
  ffmodel = FFModel(ffconfig)
  
  dims_input = [ffconfig.batch_size, 3, 229, 229]
  input = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT)

  onnx_model = ONNXModel("resnet18.onnx")
  t = onnx_model.apply(ffmodel, {"input.1": input})
  t = ffmodel.softmax(t)

  ffoptimizer = SGDOptimizer(ffmodel, 0.01)
  ffmodel.optimizer = ffoptimizer
  ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY])
  label = ffmodel.label_tensor
  
  num_samples = 10000
  
  (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)

  full_input_np = np.zeros((num_samples, 3, 229, 229), dtype=np.float32)
  
  for i in range(0, num_samples):
    image = x_train[i, :, :, :]
    image = image.transpose(1, 2, 0)
    pil_image = Image.fromarray(image)
    pil_image = pil_image.resize((229,229), Image.NEAREST)
    image = np.array(pil_image, dtype=np.float32)
    image = image.transpose(2, 0, 1)
    full_input_np[i, :, :, :] = image

  full_input_np /= 255
  
  y_train = y_train.astype('int32')
  full_label_np = y_train
  
  dataloader_input = ffmodel.create_data_loader(input, full_input_np)
  dataloader_label = ffmodel.create_data_loader(label, full_label_np)
  
  num_samples = dataloader_input.num_samples
  assert dataloader_input.num_samples == dataloader_label.num_samples

  ffmodel.init_layers()

  epochs = ffconfig.epochs

  ts_start = ffconfig.get_current_time()

  ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs)

  ts_end = ffconfig.get_current_time()
  run_time = 1e-6 * (ts_end - ts_start);
  print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %(epochs, run_time, num_samples * epochs / run_time));
  perf_metrics = ffmodel.get_perf_metrics()
  accuracy = perf_metrics.get_accuracy()
  if accuracy < ModelAccuracy.CIFAR10_ALEXNET.value:
    assert 0, 'Check Accuracy'
Esempio n. 7
0
def top_level_task():
    ffconfig = FFConfig()
    print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %
          (ffconfig.batch_size, ffconfig.workers_per_node, ffconfig.num_nodes))
    ffmodel = FFModel(ffconfig)

    dims_input = [ffconfig.batch_size, 3, 32, 32]
    input_tensor = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT)
    output_tensors = file_to_ff("cnn.ff", ffmodel,
                                [input_tensor, input_tensor])

    t = ffmodel.softmax(output_tensors[0])

    ffoptimizer = SGDOptimizer(ffmodel, 0.01)
    ffmodel.optimizer = ffoptimizer
    ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY,
                    metrics=[
                        MetricsType.METRICS_ACCURACY,
                        MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY
                    ])
    label_tensor = ffmodel.label_tensor

    num_samples = 10000

    (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)

    x_train = x_train.astype('float32')
    x_train /= 255
    full_input_array = x_train

    y_train = y_train.astype('int32')
    full_label_array = y_train

    dataloader_input = ffmodel.create_data_loader(input_tensor,
                                                  full_input_array)
    dataloader_label = ffmodel.create_data_loader(label_tensor,
                                                  full_label_array)

    num_samples = dataloader_input.num_samples

    ffmodel.init_layers()

    layers = ffmodel.get_layers()
    for layer in layers:
        print(layers[layer].name)

    layer = ffmodel.get_layer_by_name("relu_1")
    print(layer)

    epochs = ffconfig.epochs

    ts_start = ffconfig.get_current_time()

    ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs)

    ts_end = ffconfig.get_current_time()
    run_time = 1e-6 * (ts_end - ts_start)
    print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %
          (epochs, run_time, num_samples * epochs / run_time))
Esempio n. 8
0
def cifar_cnn_concat():
    num_classes = 10

    num_samples = 10000

    (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)

    x_train = x_train.astype('float32')
    x_train /= 255
    #x_train *= 0
    #y_train = np.random.randint(1, 9, size=(num_samples,1), dtype='int32')
    y_train = y_train.astype('int32')
    print("shape: ", x_train.shape)

    input_tensor1 = Input(batch_shape=[0, 3, 32, 32], dtype="float32")
    input_tensor2 = Input(batch_shape=[0, 3, 32, 32], dtype="float32")

    ot1 = cifar_cnn_sub(input_tensor1, 1)
    ot2 = cifar_cnn_sub(input_tensor2, 2)
    ot3 = cifar_cnn_sub(input_tensor2, 3)
    output_tensor = Concatenate(axis=1)([ot1, ot2, ot3])
    output_tensor = MaxPooling2D(pool_size=(2, 2),
                                 strides=(2, 2),
                                 padding="valid")(output_tensor)
    o1 = Conv2D(filters=64,
                kernel_size=(3, 3),
                strides=(1, 1),
                padding=(1, 1),
                activation="relu",
                name="conv2d_0_4")(output_tensor)
    o2 = Conv2D(filters=64,
                kernel_size=(3, 3),
                strides=(1, 1),
                padding=(1, 1),
                activation="relu",
                name="conv2d_1_4")(output_tensor)
    output_tensor = Concatenate(axis=1)([o1, o2])
    output_tensor = Conv2D(filters=64,
                           kernel_size=(3, 3),
                           strides=(1, 1),
                           padding=(1, 1),
                           activation="relu")(output_tensor)
    output_tensor = MaxPooling2D(pool_size=(2, 2),
                                 strides=(2, 2),
                                 padding="valid")(output_tensor)
    output_tensor = Flatten()(output_tensor)
    output_tensor = Dense(512, activation="relu")(output_tensor)
    output_tensor = Dense(num_classes)(output_tensor)
    output_tensor = Activation("softmax")(output_tensor)

    model = Model([input_tensor1, input_tensor2], output_tensor)

    print(model.summary())

    opt = flexflow.keras.optimizers.SGD(learning_rate=0.01)
    model.compile(optimizer=opt)

    model.fit([x_train, x_train], y_train, epochs=1)
def top_level_task():
    num_classes = 10

    num_samples = 10000

    (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)

    x_train = x_train.astype('float32')
    x_train /= 255
    y_train = y_train.astype('int32')
    print("shape: ", x_train.shape)

    input_tensor1 = Input(shape=(3, 32, 32), dtype="float32")
    output_tensor1 = Conv2D(filters=32,
                            kernel_size=(3, 3),
                            strides=(1, 1),
                            padding=(1, 1),
                            activation="relu")(input_tensor1)
    output_tensor1 = Conv2D(filters=32,
                            kernel_size=(3, 3),
                            strides=(1, 1),
                            padding=(1, 1),
                            activation="relu")(output_tensor1)
    output_tensor1 = MaxPooling2D(pool_size=(2, 2),
                                  strides=(2, 2),
                                  padding="valid")(output_tensor1)
    model1 = Model(input_tensor1, output_tensor1)

    input_tensor2 = Input(shape=(3, 32, 32), dtype="float32")
    output_tensor2 = Conv2D(filters=64,
                            kernel_size=(3, 3),
                            strides=(1, 1),
                            padding=(1, 1),
                            activation="relu")(input_tensor2)
    output_tensor2 = Conv2D(filters=64,
                            kernel_size=(3, 3),
                            strides=(1, 1),
                            padding=(1, 1),
                            activation="relu")(output_tensor2)
    output_tensor2 = MaxPooling2D(pool_size=(2, 2),
                                  strides=(2, 2),
                                  padding="valid")(output_tensor2)
    output_tensor2 = Flatten()(output_tensor2)
    output_tensor2 = Dense(512, activation="relu")(output_tensor2)
    output_tensor2 = Dense(num_classes)(output_tensor2)
    output_tensor2 = Activation("softmax")(output_tensor2)
    model2 = Model(input_tensor2, output_tensor2)

    input_tensor3 = Input(shape=(3, 32, 32), dtype="float32")
    output_tensor3 = model1(input_tensor3)
    output_tensor3 = model2(output_tensor3)
    model = Model(input_tensor3, output_tensor3)

    opt = flexflow.keras.optimizers.SGD(learning_rate=0.01)
    model.compile(optimizer=opt,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy', 'sparse_categorical_crossentropy'])
    print(model.summary())

    model.fit(x_train,
              y_train,
              epochs=40,
              callbacks=[
                  VerifyMetrics(ModelAccuracy.CIFAR10_CNN),
                  EpochVerifyMetrics(ModelAccuracy.CIFAR10_CNN)
              ])
Esempio n. 10
0
def top_level_task():
  ffconfig = FFConfig()
  ffconfig.parse_args()
  print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %(ffconfig.get_batch_size(), ffconfig.get_workers_per_node(), ffconfig.get_num_nodes()))
  ffmodel = FFModel(ffconfig)

  dims_input = [ffconfig.get_batch_size(), 3, 32, 32]
  input_tensor = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT)
  
  torch_model = PyTorchModel("cnn.ff")
  
  output_tensors = torch_model.apply(ffmodel, [input_tensor, input_tensor])

  t = ffmodel.softmax(output_tensors[0])

  ffoptimizer = SGDOptimizer(ffmodel, 0.01)
  ffmodel.set_sgd_optimizer(ffoptimizer)
  ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY])
  label = ffmodel.get_label_tensor()

  num_samples = 10000

  (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)

  x_train = x_train.astype('float32')
  x_train /= 255
  full_input_array = x_train

  y_train = y_train.astype('int32')
  full_label_array = y_train

  dims_full_input = [num_samples, 3, 32, 32]
  full_input = ffmodel.create_tensor(dims_full_input, DataType.DT_FLOAT)

  dims_full_label = [num_samples, 1]
  full_label = ffmodel.create_tensor(dims_full_label, DataType.DT_INT32)

  full_input.attach_numpy_array(ffconfig, full_input_array)
  full_label.attach_numpy_array(ffconfig, full_label_array)

  dataloader_input = SingleDataLoader(ffmodel, input_tensor, full_input, num_samples, DataType.DT_FLOAT)
  dataloader_label = SingleDataLoader(ffmodel, label, full_label, num_samples, DataType.DT_INT32)

  full_input.detach_numpy_array(ffconfig)
  full_label.detach_numpy_array(ffconfig)

  num_samples = dataloader_input.get_num_samples()

  ffmodel.init_layers()

  layers = ffmodel.get_layers()
  for layer in layers:
    print(layers[layer].name)

  layer = ffmodel.get_layer_by_name("relu_1")
  print(layer)

  epochs = ffconfig.get_epochs()

  ts_start = ffconfig.get_current_time()
  
  ffmodel.train((dataloader_input, dataloader_label), epochs)

  ts_end = ffconfig.get_current_time()
  run_time = 1e-6 * (ts_end - ts_start);
  print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %(epochs, run_time, num_samples * epochs / run_time));
Esempio n. 11
0
def top_level_task():
    ffconfig = FFConfig()
    alexnetconfig = NetConfig()
    print(alexnetconfig.dataset_path)
    ffconfig.parse_args()
    print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %
          (ffconfig.get_batch_size(), ffconfig.get_workers_per_node(),
           ffconfig.get_num_nodes()))
    ffmodel = FFModel(ffconfig)

    dims_input = [ffconfig.get_batch_size(), 3, 32, 32]
    input = ffmodel.create_tensor(dims_input, "", DataType.DT_FLOAT)

    t1 = ffmodel.conv2d(input, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
    t2 = ffmodel.conv2d(input, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
    t3 = ffmodel.conv2d(input, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
    t = ffmodel.concat([t1, t2, t3], 1)
    ts = ffmodel.split(t, 3, 1)
    print("new", ts[0].handle.impl)
    t = ffmodel.conv2d(ts[1], 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
    t = ffmodel.pool2d(
        t,
        2,
        2,
        2,
        2,
        0,
        0,
    )
    t = ffmodel.conv2d(t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
    t = ffmodel.conv2d(t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
    t = ffmodel.pool2d(t, 2, 2, 2, 2, 0, 0)
    t = ffmodel.flat(t)
    t = ffmodel.dense(t, 512, ActiMode.AC_MODE_RELU)
    t = ffmodel.dense(t, 10)
    t = ffmodel.softmax(t)
    print("end model", ts[0].handle.impl)

    ffoptimizer = SGDOptimizer(ffmodel, 0.01)
    ffmodel.set_sgd_optimizer(ffoptimizer)
    ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY,
                    metrics=[
                        MetricsType.METRICS_ACCURACY,
                        MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY
                    ])
    label = ffmodel.get_label_tensor()

    num_samples = 10000

    (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)

    x_train = x_train.astype('float32')
    x_train /= 255
    full_input_array = x_train
    print(full_input_array.__array_interface__["strides"])

    y_train = y_train.astype('int32')
    full_label_array = y_train

    print(full_input_array.__array_interface__["strides"])
    print(full_input_array.shape, full_label_array.shape)
    #print(full_input_array[0,:,:,:])
    #print(full_label_array[0, 0:64])
    print(full_label_array.__array_interface__["strides"])

    dims_full_input = [num_samples, 3, 32, 32]
    full_input = ffmodel.create_tensor(dims_full_input, "", DataType.DT_FLOAT)

    dims_full_label = [num_samples, 1]
    full_label = ffmodel.create_tensor(dims_full_label, "", DataType.DT_INT32)

    full_input.attach_numpy_array(ffconfig, full_input_array)
    full_label.attach_numpy_array(ffconfig, full_label_array)

    dataloader_input = SingleDataLoader(ffmodel, input, full_input,
                                        num_samples, DataType.DT_FLOAT)
    dataloader_label = SingleDataLoader(ffmodel, label, full_label,
                                        num_samples, DataType.DT_INT32)

    full_input.detach_numpy_array(ffconfig)
    full_label.detach_numpy_array(ffconfig)

    num_samples = dataloader_input.get_num_samples()

    ffmodel.init_layers()

    print("end init model", ts[0].handle.impl)

    epochs = ffconfig.get_epochs()
    #epochs = 10

    ts_start = ffconfig.get_current_time()
    for epoch in range(0, epochs):
        dataloader_input.reset()
        dataloader_label.reset()
        ffmodel.reset_metrics()
        iterations = int(num_samples / ffconfig.get_batch_size())
        print(iterations, num_samples)

        for iter in range(0, int(iterations)):
            dataloader_input.next_batch(ffmodel)
            dataloader_label.next_batch(ffmodel)
            if (epoch > 0):
                ffconfig.begin_trace(111)
            ffmodel.forward()
            ffmodel.zero_gradients()
            ffmodel.backward()
            ffmodel.update()
            if (epoch > 0):
                ffconfig.end_trace(111)

    ts_end = ffconfig.get_current_time()
    run_time = 1e-6 * (ts_end - ts_start)
    print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %
          (epochs, run_time, num_samples * epochs / run_time))

    # perf_metrics = ffmodel.get_perf_metrics()
    # accuracy = perf_metrics.get_accuracy()
    # if accuracy < ModelAccuracy.CIFAR10_CNN.value:
    #   assert 0, 'Check Accuracy'

    print("end", ts[0].handle.impl)
Esempio n. 12
0
def top_level_task():
    ffconfig = FFConfig()
    alexnetconfig = NetConfig()
    print(alexnetconfig.dataset_path)
    ffconfig.parse_args()
    print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %
          (ffconfig.get_batch_size(), ffconfig.get_workers_per_node(),
           ffconfig.get_num_nodes()))
    ffmodel = FFModel(ffconfig)

    dims_input = [ffconfig.get_batch_size(), 3, 229, 229]
    #print(dims)
    input = ffmodel.create_tensor(dims_input, "", DataType.DT_FLOAT)

    # dims_label = [ffconfig.get_batch_size(), 1]
    # #print(dims)
    # label = ffmodel.create_tensor(dims_label, "", DataType.DT_INT32)

    kernel_init = GlorotUniformInitializer(123)
    bias_init = ZeroInitializer()
    # ts0 = ffmodel.conv2d(input, 64, 11, 11, 4, 4, 2, 2, ActiMode.AC_MODE_NONE, True, kernel_init, bias_init)
    # ts1 = ffmodel.conv2d(input, 64, 11, 11, 4, 4, 2, 2, ActiMode.AC_MODE_NONE, True, kernel_init, bias_init)
    # ts0 = ffmodel.conv2d(input, 64, 11, 11, 4, 4, 2, 2)
    # ts1 = ffmodel.conv2d(input, 64, 11, 11, 4, 4, 2, 2)
    t = ffmodel.conv2d(input, 64, 11, 11, 4, 4, 2, 2, ActiMode.AC_MODE_RELU,
                       True, kernel_init, bias_init)
    #t = ffmodel.concat([ts0, ts1], 1)
    t = ffmodel.pool2d(t, 3, 3, 2, 2, 0, 0)
    t = ffmodel.conv2d(t, 192, 5, 5, 1, 1, 2, 2, ActiMode.AC_MODE_RELU)
    t = ffmodel.pool2d(t, 3, 3, 2, 2, 0, 0)
    t = ffmodel.conv2d(t, 384, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
    t = ffmodel.conv2d(t, 256, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
    t = ffmodel.conv2d(t, 256, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
    t = ffmodel.pool2d(t, 3, 3, 2, 2, 0, 0)
    t = ffmodel.flat(t)
    t = ffmodel.dense(t, 4096, ActiMode.AC_MODE_RELU)
    t = ffmodel.dense(t, 4096, ActiMode.AC_MODE_RELU)
    t = ffmodel.dense(t, 10)
    t = ffmodel.softmax(t)

    ffoptimizer = SGDOptimizer(ffmodel, 0.01)
    ffmodel.set_sgd_optimizer(ffoptimizer)
    ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY,
                    metrics=[
                        MetricsType.METRICS_ACCURACY,
                        MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY
                    ])
    label = ffmodel.get_label_tensor()

    use_external = True
    if (use_external == True):
        num_samples = 10000

        (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)

        full_input_np = np.zeros((num_samples, 3, 229, 229), dtype=np.float32)

        for i in range(0, num_samples):
            image = x_train[i, :, :, :]
            image = image.transpose(1, 2, 0)
            pil_image = Image.fromarray(image)
            pil_image = pil_image.resize((229, 229), Image.NEAREST)
            image = np.array(pil_image, dtype=np.float32)
            image = image.transpose(2, 0, 1)
            full_input_np[i, :, :, :] = image
            if (i == 0):
                print(image)

        full_input_np /= 255
        print(full_input_np.shape)
        print(full_input_np.__array_interface__["strides"])
        print(full_input_np[0, :, :, :])

        y_train = y_train.astype('int32')
        full_label_np = y_train

        dims_full_input = [num_samples, 3, 229, 229]
        full_input = ffmodel.create_tensor(dims_full_input, "",
                                           DataType.DT_FLOAT)

        dims_full_label = [num_samples, 1]
        full_label = ffmodel.create_tensor(dims_full_label, "",
                                           DataType.DT_INT32)

        full_input.attach_numpy_array(ffconfig, full_input_np)
        full_label.attach_numpy_array(ffconfig, full_label_np)

        dataloader_input = SingleDataLoader(ffmodel, input, full_input,
                                            num_samples, DataType.DT_FLOAT)
        dataloader_label = SingleDataLoader(ffmodel, label, full_label,
                                            num_samples, DataType.DT_INT32)
        #dataloader = DataLoader4D(ffmodel, input, label, full_input, full_label, num_samples)

        full_input.detach_numpy_array(ffconfig)
        full_label.detach_numpy_array(ffconfig)

        num_samples = dataloader_input.get_num_samples()
        assert dataloader_input.get_num_samples(
        ) == dataloader_label.get_num_samples()

    else:
        # Data Loader
        dataloader = DataLoader4D(ffmodel,
                                  input,
                                  label,
                                  ffnetconfig=alexnetconfig)
        num_samples = dataloader.get_num_samples()

    # input.inline_map(ffconfig)
    # input_array = input.get_array(ffconfig, DataType.DT_FLOAT)
    # input_array *= 1.0
    # print(input_array.shape)
    # input.inline_unmap(ffconfig)
    # label.inline_map(ffconfig)
    # label.inline_unmap(ffconfig)

    ffmodel.init_layers()

    #  conv_2d1 = ffmodel.get_layer_by_id(11)
    #  cbias_tensor = conv_2d1.get_weight_tensor()
    #  input_tensor = conv_2d1.get_input_tensor_by_id(0)
    #  cbias_tensor.inline_map(ffconfig)
    #  cbias = cbias_tensor.get_array(ffconfig, DataType.DT_FLOAT)
    # # cbias += 0.125
    #  print(cbias.shape)
    #  #print(cbias)
    #  cbias_tensor.inline_unmap(ffconfig)

    #use_external = False

    epochs = ffconfig.get_epochs()

    ts_start = ffconfig.get_current_time()
    for epoch in range(0, epochs):
        if (use_external == True):
            dataloader_input.reset()
            dataloader_label.reset()
        else:
            dataloader.reset()
        ffmodel.reset_metrics()
        iterations = int(num_samples / ffconfig.get_batch_size())
        print(iterations, num_samples)

        for iter in range(0, int(iterations)):
            if (use_external == True):
                dataloader_input.next_batch(ffmodel)
                dataloader_label.next_batch(ffmodel)
            else:
                dataloader.next_batch(ffmodel)
            if (epoch > 0):
                ffconfig.begin_trace(111)
            ffmodel.forward()
            ffmodel.zero_gradients()
            ffmodel.backward()
            ffmodel.update()
            if (epoch > 0):
                ffconfig.end_trace(111)

    ts_end = ffconfig.get_current_time()
    run_time = 1e-6 * (ts_end - ts_start)
    print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %
          (epochs, run_time, num_samples * epochs / run_time))
    perf_metrics = ffmodel.get_perf_metrics()
    accuracy = perf_metrics.get_accuracy()
    if accuracy < ModelAccuracy.CIFAR10_ALEXNET.value:
        assert 0, 'Check Accuracy'

    #ffmodel.print_layers(13)

    conv_2d1 = ffmodel.get_layer_by_id(0)
    #cbias_tensor = conv_2d1.get_input_tensor()
    cbias_tensor = conv_2d1.get_input_tensor()
    cbias_tensor.inline_map(ffconfig)
    cbias = cbias_tensor.get_flat_array(ffconfig, DataType.DT_FLOAT)
    print(cbias.shape)
    print(cbias)
    #save_image(cbias, 2)
    cbias_tensor.inline_unmap(ffconfig)

    label.inline_map(ffconfig)
    label_array = label.get_flat_array(ffconfig, DataType.DT_INT32)
    print(label_array.shape)
    # print(cbias)
    print(label_array)
    label.inline_unmap(ffconfig)
def top_level_task():
    num_samples = 10000

    (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)

    full_input_np = np.zeros((num_samples, 3, 229, 229), dtype=np.float32)
    for i in range(0, num_samples):
        image = x_train[i, :, :, :]
        image = image.transpose(1, 2, 0)
        pil_image = Image.fromarray(image)
        pil_image = pil_image.resize((229, 229), Image.NEAREST)
        image = np.array(pil_image, dtype=np.float32)
        image = image.transpose(2, 0, 1)
        full_input_np[i, :, :, :] = image
        if (i == 0):
            print(image)

    full_input_np /= 255
    y_train = y_train.astype('int32')
    full_label_np = y_train

    input_tensor = Input(shape=(3, 229, 229), dtype="float32")

    output = Conv2D(filters=64,
                    input_shape=(3, 229, 229),
                    kernel_size=(11, 11),
                    strides=(4, 4),
                    padding=(2, 2),
                    activation="relu")(input_tensor)
    output = MaxPooling2D(pool_size=(3, 3), strides=(2, 2),
                          padding="valid")(output)
    output = Conv2D(filters=192,
                    kernel_size=(5, 5),
                    strides=(1, 1),
                    padding=(2, 2),
                    activation="relu")(output)
    output = MaxPooling2D(pool_size=(3, 3), strides=(2, 2),
                          padding="valid")(output)
    output = Conv2D(filters=384,
                    kernel_size=(3, 3),
                    strides=(1, 1),
                    padding=(1, 1),
                    activation="relu")(output)
    output = Conv2D(filters=256,
                    kernel_size=(3, 3),
                    strides=(1, 1),
                    padding=(1, 1),
                    activation="relu")(output)
    output = Conv2D(filters=256,
                    kernel_size=(3, 3),
                    strides=(1, 1),
                    padding=(1, 1),
                    activation="relu")(output)
    output = MaxPooling2D(pool_size=(3, 3), strides=(2, 2),
                          padding="valid")(output)
    output = Flatten()(output)
    output = Dense(4096, activation="relu")(output)
    output = Dense(4096, activation="relu")(output)
    output = Dense(10)(output)
    output = Activation("softmax")(output)

    model = Model(input_tensor, output)

    opt = flexflow.keras.optimizers.SGD(learning_rate=0.01)
    model.compile(optimizer=opt,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy', 'sparse_categorical_crossentropy'])
    print(model.summary())

    model.fit(full_input_np,
              full_label_np,
              epochs=160,
              callbacks=[
                  VerifyMetrics(ModelAccuracy.CIFAR10_ALEXNET),
                  EpochVerifyMetrics(ModelAccuracy.CIFAR10_ALEXNET)
              ])
Esempio n. 14
0
def top_level_task():
    ffconfig = FFConfig()
    alexnetconfig = NetConfig()
    print(alexnetconfig.dataset_path)
    ffconfig.parse_args()
    print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %
          (ffconfig.get_batch_size(), ffconfig.get_workers_per_node(),
           ffconfig.get_num_nodes()))
    ffmodel = FFModel(ffconfig)

    dims_input = [ffconfig.get_batch_size(), 3, 32, 32]
    input_tensor = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT)

    t = ffmodel.conv2d(input_tensor, 32, 3, 3, 1, 1, 1, 1,
                       ActiMode.AC_MODE_RELU)
    t = ffmodel.conv2d(t, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
    t = ffmodel.pool2d(
        t,
        2,
        2,
        2,
        2,
        0,
        0,
    )
    t = ffmodel.conv2d(t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
    t = ffmodel.conv2d(t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
    t = ffmodel.pool2d(t, 2, 2, 2, 2, 0, 0)
    t = ffmodel.flat(t)
    t = ffmodel.dense(t, 512, ActiMode.AC_MODE_RELU)
    t = ffmodel.dense(t, 10)
    t = ffmodel.softmax(t)

    ffoptimizer = SGDOptimizer(ffmodel, 0.01)
    ffmodel.set_sgd_optimizer(ffoptimizer)
    ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY,
                    metrics=[
                        MetricsType.METRICS_ACCURACY,
                        MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY
                    ])
    label_tensor = ffmodel.get_label_tensor()

    num_samples = 10000

    (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)

    x_train = x_train.astype('float32')
    x_train /= 255
    full_input_array = x_train
    print(full_input_array.__array_interface__["strides"])

    y_train = y_train.astype('int32')
    full_label_array = y_train

    print(full_input_array.__array_interface__["strides"])
    print(full_input_array.shape, full_label_array.shape)
    #print(full_input_array[0,:,:,:])
    #print(full_label_array[0, 0:64])
    print(full_label_array.__array_interface__["strides"])

    dataloader_input = ffmodel.create_data_loader(input_tensor,
                                                  full_input_array)
    dataloader_label = ffmodel.create_data_loader(label_tensor,
                                                  full_label_array)

    num_samples = dataloader_input.get_num_samples()

    ffmodel.init_layers()

    epochs = ffconfig.get_epochs()

    ts_start = ffconfig.get_current_time()

    ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs)

    ts_end = ffconfig.get_current_time()
    run_time = 1e-6 * (ts_end - ts_start)
    print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %
          (epochs, run_time, num_samples * epochs / run_time))

    perf_metrics = ffmodel.get_perf_metrics()
    accuracy = perf_metrics.get_accuracy()
    if accuracy < ModelAccuracy.CIFAR10_CNN.value:
        assert 0, 'Check Accuracy'
Esempio n. 15
0
def top_level_task():
    ffconfig = FFConfig()
    alexnetconfig = NetConfig()
    print(alexnetconfig.dataset_path)
    ffconfig.parse_args()
    print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %
          (ffconfig.get_batch_size(), ffconfig.get_workers_per_node(),
           ffconfig.get_num_nodes()))
    ffmodel = FFModel(ffconfig)

    dims_input = [ffconfig.get_batch_size(), 3, 32, 32]
    input = ffmodel.create_tensor(dims_input, "", DataType.DT_FLOAT)

    dims_label = [ffconfig.get_batch_size(), 1]
    label = ffmodel.create_tensor(dims_label, "", DataType.DT_INT32)

    num_samples = 10000

    (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)

    x_train = x_train.astype('float32')
    x_train /= 255

    # x_train_t = x_train.transpose(3, 2, 1, 0)
    #
    # x_train = np.zeros((32,32,3,num_samples), dtype=np.float32)
    #
    # for i in range(0, num_samples):
    #   for j in range(0, 3):
    #     for k in range(0, 32):
    #       for l in range(0, 32):
    #         x_train[l][k][j][i] = x_train_t[l][k][j][i]

    full_input_array = x_train
    print(full_input_array.__array_interface__["strides"])

    y_train = y_train.astype('int32')

    full_label_array = y_train

    print(full_input_array.__array_interface__["strides"])
    print(full_input_array.shape, full_label_array.shape)
    print(full_label_array.__array_interface__["strides"])

    next_batch(0, x_train, input, ffconfig)
    next_batch_label(0, y_train, label, ffconfig)

    t = ffmodel.conv2d("conv1", input, 32, 3, 3, 1, 1, 1, 1,
                       ActiMode.AC_MODE_RELU)
    t = ffmodel.conv2d("conv2", t, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
    t = ffmodel.pool2d(
        "pool1",
        t,
        2,
        2,
        2,
        2,
        0,
        0,
    )
    t = ffmodel.conv2d("conv3", t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
    t = ffmodel.conv2d("conv4", t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
    t = ffmodel.pool2d("pool2", t, 2, 2, 2, 2, 0, 0)
    t = ffmodel.flat("flat", t)
    t = ffmodel.dense("lienar1", t, 512, ActiMode.AC_MODE_RELU)
    t = ffmodel.dense("lienar1", t, 10)
    t = ffmodel.softmax("softmax", t, label)

    ffoptimizer = SGDOptimizer(ffmodel, 0.01)
    ffmodel.set_sgd_optimizer(ffoptimizer)

    ffmodel.init_layers()

    epochs = ffconfig.get_epochs()
    #epochs = 10

    ts_start = ffconfig.get_current_time()
    for epoch in range(0, epochs):
        ffmodel.reset_metrics()
        iterations = int(num_samples / ffconfig.get_batch_size())
        print(iterations, num_samples)
        ct = 0
        for iter in range(0, int(iterations)):
            next_batch(ct, x_train, input, ffconfig)
            next_batch_label(ct, y_train, label, ffconfig)
            ct += 1
            if (epoch > 0):
                ffconfig.begin_trace(111)
            ffmodel.forward()
            ffmodel.zero_gradients()
            ffmodel.backward()
            ffmodel.update()
            if (epoch > 0):
                ffconfig.end_trace(111)

    ts_end = ffconfig.get_current_time()
    run_time = 1e-6 * (ts_end - ts_start)
    print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %
          (epochs, run_time, num_samples * epochs / run_time))
    #ffmodel.print_layers(13)

    conv_2d1 = ffmodel.get_layer_by_id(0)
    cbias_tensor = conv_2d1.get_input_tensor()
    #cbias_tensor = conv_2d1.get_output_tensor()
    cbias_tensor.inline_map(ffconfig)
    cbias = cbias_tensor.get_flat_array(ffconfig, DataType.DT_FLOAT)
    print(cbias.shape)
    print(cbias)
    cbias_tensor.inline_unmap(ffconfig)

    label.inline_map(ffconfig)
    label_array = label.get_flat_array(ffconfig, DataType.DT_INT32)
    print(label_array.shape)
    # print(cbias)
    print(label_array)
    label.inline_unmap(ffconfig)
Esempio n. 16
0
def top_level_task():
  ffconfig = FFConfig()
  alexnetconfig = NetConfig()
  print(alexnetconfig.dataset_path)
  ffconfig.parse_args()
  print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %(ffconfig.get_batch_size(), ffconfig.get_workers_per_node(), ffconfig.get_num_nodes()))
  ffmodel = FFModel(ffconfig)
  
  dims_input = [ffconfig.get_batch_size(), 3, 32, 32]
  input = ffmodel.create_tensor(dims_input, "", DataType.DT_FLOAT)

  dims_label = [ffconfig.get_batch_size(), 1]
  label = ffmodel.create_tensor(dims_label, "", DataType.DT_INT32)
  
  use_external = True
  if (use_external == True):
    num_samples = 10000
    
    (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)
    
    x_train = x_train.astype('float32')
    x_train /= 255
    #x_train = x_train.transpose(2, 3, 1, 0)
    #full_input_array = np.zeros((x_train.shape[0], x_train.shape[1], x_train.shape[2], x_train.shape[3]), dtype=np.float32)
    full_input_array = x_train
    print(full_input_array.__array_interface__["strides"])
    # ct = 0.0
    # for i in range(0, x_train.shape[0]):
    #   for j in range(0, x_train.shape[1]):
    #     for k in range(0, x_train.shape[2]):
    #       for l in range(0, x_train.shape[3]):
    #         full_input_array[i, j, k, l] = x_train[i, j, k, l]
    #         ct += 1
    
    y_train = y_train.astype('int32')
    # y_train = y_train.transpose(1, 0)
    # full_label_array = np.zeros((y_train.shape[0], y_train.shape[1]), dtype=np.int32)
    # for i in range(0, y_train.shape[0]):
    #   for j in range(0, y_train.shape[1]):
    #     full_label_array[i, j] = y_train[i, j]
    full_label_array = y_train
   
    print(full_input_array.__array_interface__["strides"])
    print(full_input_array.shape, full_label_array.shape)
    #print(full_input_array[0,:,:,:])
    #print(full_label_array[0, 0:64])
    print(full_label_array.__array_interface__["strides"])
    
    dims_full_input = [num_samples, 3, 32, 32]
    full_input = ffmodel.create_tensor(dims_full_input, "", DataType.DT_FLOAT)

    dims_full_label = [num_samples, 1]
    full_label = ffmodel.create_tensor(dims_full_label, "", DataType.DT_INT32)
    
    full_input.attach_numpy_array(ffconfig, full_input_array)
    full_label.attach_numpy_array(ffconfig, full_label_array)
    
    dataloader_input = SingleDataLoader(ffmodel, input, full_input, num_samples, DataType.DT_FLOAT)
    dataloader_label = SingleDataLoader(ffmodel, label, full_label, num_samples, DataType.DT_INT32)
    
    full_input.detach_numpy_array(ffconfig)
    full_label.detach_numpy_array(ffconfig)
    
    num_samples = dataloader_input.get_num_samples()
  else:
    # Data Loader
    dataloader = DataLoader4D(ffmodel, input, label, ffnetconfig=alexnetconfig)
    num_samples = dataloader.get_num_samples()

  # t = ffmodel.conv2d("conv1", input, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
  # t = ffmodel.conv2d("conv2", t, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
  # t = ffmodel.pool2d("pool1", t, 2, 2, 2, 2, 0, 0,)
  # t = ffmodel.conv2d("conv3", t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
  # t = ffmodel.conv2d("conv4", t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
  # t = ffmodel.pool2d("pool2", t, 2, 2, 2, 2, 0, 0)
  # t = ffmodel.flat("flat", t);
  # t = ffmodel.dense("lienar1", t, 512, ActiMode.AC_MODE_RELU)
  # t = ffmodel.dense("lienar1", t, 10)
  # t = ffmodel.softmax("softmax", t, label)
  
  t1 = ffmodel.conv2d("conv1", input, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
  t1 = ffmodel.conv2d("conv2", t1, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
  t2 = ffmodel.conv2d("conv1", input, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
  t2 = ffmodel.conv2d("conv2", t2, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
  t3 = ffmodel.conv2d("conv1", input, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
  t3 = ffmodel.conv2d("conv2", t3, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
  t = ffmodel.concat("concat", [t1, t2, t3], 1)
  t = ffmodel.pool2d("pool1", t, 2, 2, 2, 2, 0, 0,)
  t1 = ffmodel.conv2d("conv3", t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
  t2 = ffmodel.conv2d("conv3", t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
  t = ffmodel.concat("concat", [t1, t2], 1)
  t = ffmodel.conv2d("conv4", t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
  t = ffmodel.pool2d("pool2", t, 2, 2, 2, 2, 0, 0)
  t = ffmodel.flat("flat", t);
  t = ffmodel.dense("lienar1", t, 512, ActiMode.AC_MODE_RELU)
  t = ffmodel.dense("lienar1", t, 10)
  t = ffmodel.softmax("softmax", t, label)

  ffoptimizer = SGDOptimizer(ffmodel, 0.01)
  ffmodel.set_sgd_optimizer(ffoptimizer)

  ffmodel.init_layers()

 #  conv_2d1 = ffmodel.get_layer_by_id(11)
 #  cbias_tensor = conv_2d1.get_weight_tensor()
 #  input_tensor = conv_2d1.get_input_tensor_by_id(0)
 #  cbias_tensor.inline_map(ffconfig)
 #  cbias = cbias_tensor.get_array(ffconfig, DataType.DT_FLOAT)
 # # cbias += 0.125
 #  print(cbias.shape)
 #  #print(cbias)
 #  cbias_tensor.inline_unmap(ffconfig)



  epochs = ffconfig.get_epochs()
  #epochs = 10

  ts_start = ffconfig.get_current_time()
  for epoch in range(0,epochs):
    if (use_external == True):
      dataloader_input.reset()
      dataloader_label.reset()
    else:
      dataloader.reset()
    ffmodel.reset_metrics()
    iterations = int(num_samples / ffconfig.get_batch_size())
    print(iterations, num_samples)

    for iter in range(0, int(iterations)):
      # if (len(alexnetconfig.dataset_path) == 0):
      #   if (iter == 0 and epoch == 0):
      #     dataloader.next_batch(ffmodel)
      # else:
    #  dataloader.next_batch(ffmodel)
      if (use_external == True):
        dataloader_input.next_batch(ffmodel)
        dataloader_label.next_batch(ffmodel)
      else:
        dataloader.next_batch(ffmodel)
      if (epoch > 0):
        ffconfig.begin_trace(111)
      ffmodel.forward()
      ffmodel.zero_gradients()
      ffmodel.backward()
      ffmodel.update()
      if (epoch > 0):
        ffconfig.end_trace(111)

  ts_end = ffconfig.get_current_time()
  run_time = 1e-6 * (ts_end - ts_start);
  print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %(epochs, run_time, num_samples * epochs / run_time));
  #ffmodel.print_layers(13)

  conv_2d1 = ffmodel.get_layer_by_id(0)
  cbias_tensor = conv_2d1.get_input_tensor()
  #cbias_tensor = conv_2d1.get_output_tensor()
  cbias_tensor.inline_map(ffconfig)
  cbias = cbias_tensor.get_flat_array(ffconfig, DataType.DT_FLOAT)
  print(cbias.shape)
  print(cbias)
  cbias_tensor.inline_unmap(ffconfig)

  label.inline_map(ffconfig)
  label_array = label.get_flat_array(ffconfig, DataType.DT_INT32)
  print(label_array.shape)
  # print(cbias)
  print(label_array)
  label.inline_unmap(ffconfig)
Esempio n. 17
0
def inception():
    ffconfig = FFConfig()
    print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %
          (ffconfig.batch_size, ffconfig.workers_per_node, ffconfig.num_nodes))
    ffmodel = FFModel(ffconfig)

    dims_input = [ffconfig.batch_size, 3, 299, 299]
    #print(dims)
    input = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT)

    t = ffmodel.conv2d(input, 32, 3, 3, 2, 2, 0, 0)
    t = ffmodel.conv2d(t, 32, 3, 3, 1, 1, 0, 0)
    t = ffmodel.conv2d(t, 64, 3, 3, 1, 1, 1, 1)
    t = ffmodel.pool2d(t, 3, 3, 2, 2, 0, 0)
    t = ffmodel.conv2d(t, 80, 1, 1, 1, 1, 0, 0)
    t = ffmodel.conv2d(t, 192, 3, 3, 1, 1, 1, 1)
    t = ffmodel.pool2d(t, 3, 3, 2, 2, 0, 0)
    t = InceptionA(ffmodel, t, 32)
    t = InceptionA(ffmodel, t, 64)
    t = InceptionA(ffmodel, t, 64)
    t = InceptionB(ffmodel, t)
    t = InceptionC(ffmodel, t, 128)
    t = InceptionC(ffmodel, t, 160)
    t = InceptionC(ffmodel, t, 160)
    t = InceptionC(ffmodel, t, 192)
    t = InceptionD(ffmodel, t)
    t = InceptionE(ffmodel, t)
    t = InceptionE(ffmodel, t)
    t = ffmodel.pool2d(t, 8, 8, 1, 1, 0, 0, PoolType.POOL_AVG)
    t = ffmodel.flat(t)
    t = ffmodel.dense(t, 10)
    t = ffmodel.softmax(t)

    ffoptimizer = SGDOptimizer(ffmodel, 0.001)
    ffmodel.optimizer = ffoptimizer
    ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY,
                    metrics=[
                        MetricsType.METRICS_ACCURACY,
                        MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY
                    ])
    label = ffmodel.label_tensor

    num_samples = 10000

    (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)

    full_input_np = np.zeros((num_samples, 3, 299, 299), dtype=np.float32)

    for i in range(0, num_samples):
        image = x_train[i, :, :, :]
        image = image.transpose(1, 2, 0)
        pil_image = Image.fromarray(image)
        pil_image = pil_image.resize((299, 299), Image.NEAREST)
        image = np.array(pil_image, dtype=np.float32)
        image = image.transpose(2, 0, 1)
        full_input_np[i, :, :, :] = image

    full_input_np /= 255
    print(full_input_np.shape)
    print(full_input_np.__array_interface__["strides"])
    print(full_input_np[0, :, :, :])

    y_train = y_train.astype('int32')
    full_label_np = y_train

    dataloader_input = ffmodel.create_data_loader(input, full_input_np)
    dataloader_label = ffmodel.create_data_loader(label, full_label_np)

    num_samples = dataloader_input.num_samples
    assert dataloader_input.num_samples == dataloader_label.num_samples

    ffmodel.init_layers()

    epochs = ffconfig.epochs

    ts_start = ffconfig.get_current_time()

    ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs)

    ts_end = ffconfig.get_current_time()
    run_time = 1e-6 * (ts_end - ts_start)
    print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %
          (epochs, run_time, 8192 * epochs / run_time))
Esempio n. 18
0
def top_level_task():
    ffconfig = FFConfig()
    alexnetconfig = NetConfig()
    print(alexnetconfig.dataset_path)
    print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %
          (ffconfig.batch_size, ffconfig.workers_per_node, ffconfig.num_nodes))
    ffmodel = FFModel(ffconfig)

    dims_input = [ffconfig.batch_size, 3, 229, 229]
    input = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT)

    torch_model = PyTorchModel("resnet.ff")
    output_tensors = torch_model.apply(ffmodel, [input])
    t = ffmodel.softmax(output_tensors[0])

    ffoptimizer = SGDOptimizer(ffmodel, 0.01)
    ffmodel.optimizer = ffoptimizer
    ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY,
                    metrics=[
                        MetricsType.METRICS_ACCURACY,
                        MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY
                    ])
    label = ffmodel.label_tensor

    num_samples = 10000

    (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)

    full_input_np = np.zeros((num_samples, 3, 229, 229), dtype=np.float32)

    for i in range(0, num_samples):
        image = x_train[i, :, :, :]
        image = image.transpose(1, 2, 0)
        pil_image = Image.fromarray(image)
        pil_image = pil_image.resize((229, 229), Image.NEAREST)
        image = np.array(pil_image, dtype=np.float32)
        image = image.transpose(2, 0, 1)
        full_input_np[i, :, :, :] = image

    full_input_np /= 255

    y_train = y_train.astype('int32')
    full_label_np = y_train

    dims_full_input = [num_samples, 3, 229, 229]
    full_input = ffmodel.create_tensor(dims_full_input, DataType.DT_FLOAT)

    dims_full_label = [num_samples, 1]
    full_label = ffmodel.create_tensor(dims_full_label, DataType.DT_INT32)

    full_input.attach_numpy_array(ffconfig, full_input_np)
    full_label.attach_numpy_array(ffconfig, full_label_np)

    dataloader_input = SingleDataLoader(ffmodel, input, full_input,
                                        num_samples, DataType.DT_FLOAT)
    dataloader_label = SingleDataLoader(ffmodel, label, full_label,
                                        num_samples, DataType.DT_INT32)

    full_input.detach_numpy_array(ffconfig)
    full_label.detach_numpy_array(ffconfig)

    num_samples = dataloader_input.get_num_samples()
    assert dataloader_input.get_num_samples(
    ) == dataloader_label.get_num_samples()

    ffmodel.init_layers()

    epochs = ffconfig.epochs

    ts_start = ffconfig.get_current_time()

    ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs)

    ts_end = ffconfig.get_current_time()
    run_time = 1e-6 * (ts_end - ts_start)
    print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %
          (epochs, run_time, num_samples * epochs / run_time))
Esempio n. 19
0
def top_level_task():
    ffconfig = FFConfig()
    alexnetconfig = NetConfig()
    print(alexnetconfig.dataset_path)
    print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %
          (ffconfig.batch_size, ffconfig.workers_per_node, ffconfig.num_nodes))
    ffmodel = FFModel(ffconfig)

    dims_input = [ffconfig.batch_size, 3, 32, 32]
    input_tensor = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT)

    t1 = ffmodel.conv2d(input_tensor, 32, 3, 3, 1, 1, 1, 1,
                        ActiMode.AC_MODE_RELU)
    t2 = ffmodel.conv2d(input_tensor, 32, 3, 3, 1, 1, 1, 1,
                        ActiMode.AC_MODE_RELU)
    t3 = ffmodel.conv2d(input_tensor, 32, 3, 3, 1, 1, 1, 1,
                        ActiMode.AC_MODE_RELU)
    t = ffmodel.concat([t1, t2, t3], 1)
    ts = ffmodel.split(t, 3, 1)
    t = ffmodel.conv2d(ts[1], 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
    t = ffmodel.pool2d(
        t,
        2,
        2,
        2,
        2,
        0,
        0,
    )
    t = ffmodel.conv2d(t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
    t = ffmodel.conv2d(t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
    t = ffmodel.pool2d(t, 2, 2, 2, 2, 0, 0)
    t = ffmodel.flat(t)
    t = ffmodel.dense(t, 512, ActiMode.AC_MODE_RELU)
    t = ffmodel.dense(t, 10)
    t = ffmodel.softmax(t)

    ffoptimizer = SGDOptimizer(ffmodel, 0.01)
    ffmodel.optimizer = ffoptimizer
    ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY,
                    metrics=[
                        MetricsType.METRICS_ACCURACY,
                        MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY
                    ])
    label_tensor = ffmodel.label_tensor

    num_samples = 10000

    (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)

    x_train = x_train.astype('float32')
    x_train /= 255
    full_input_array = x_train
    print(full_input_array.__array_interface__["strides"])

    y_train = y_train.astype('int32')
    full_label_array = y_train

    print(full_input_array.__array_interface__["strides"])
    print(full_input_array.shape, full_label_array.shape)
    #print(full_input_array[0,:,:,:])
    #print(full_label_array[0, 0:64])
    print(full_label_array.__array_interface__["strides"])

    dims_full_input = [num_samples, 3, 32, 32]
    full_input = ffmodel.create_tensor(dims_full_input, DataType.DT_FLOAT)

    dims_full_label = [num_samples, 1]
    full_label = ffmodel.create_tensor(dims_full_label, DataType.DT_INT32)

    full_input.attach_numpy_array(ffconfig, full_input_array)
    full_label.attach_numpy_array(ffconfig, full_label_array)

    dataloader_input = SingleDataLoader(ffmodel, input_tensor, full_input,
                                        num_samples, DataType.DT_FLOAT)
    dataloader_label = SingleDataLoader(ffmodel, label_tensor, full_label,
                                        num_samples, DataType.DT_INT32)

    full_input.detach_numpy_array(ffconfig)
    full_label.detach_numpy_array(ffconfig)

    num_samples = dataloader_input.get_num_samples()

    ffmodel.init_layers()

    print("end init model", ts[0].handle.impl)

    epochs = ffconfig.epochs
    #epochs = 10

    ts_start = ffconfig.get_current_time()

    ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs)

    ts_end = ffconfig.get_current_time()
    run_time = 1e-6 * (ts_end - ts_start)
    print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %
          (epochs, run_time, num_samples * epochs / run_time))
def top_level_task():
    ffconfig = FFConfig()
    resnetconfig = NetConfig()
    print(resnetconfig.dataset_path)
    ffconfig.parse_args()
    print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %
          (ffconfig.get_batch_size(), ffconfig.get_workers_per_node(),
           ffconfig.get_num_nodes()))
    ffmodel = FFModel(ffconfig)
    dims_input = [ffconfig.get_batch_size(), 3, 229, 229]
    inputi = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT)
    model = rgn.RegNetX32gf()
    model = nn.Sequential(model, nn.Flatten(), nn.Linear(2520 * 7 * 7, 1000))
    onnx_input = torch.randn(64, 3, 229, 229)
    onnx_model = ONNXModel(model, onnx_input)
    t = onnx_model.apply(ffmodel, {"input.1": inputi})
    t = ffmodel.softmax(t)
    ffoptimizer = SGDOptimizer(ffmodel, 0.01)
    ffmodel.set_sgd_optimizer(ffoptimizer)
    ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY,
                    metrics=[
                        MetricsType.METRICS_ACCURACY,
                        MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY
                    ])
    label = ffmodel.get_label_tensor()

    num_samples = 10000

    (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)

    full_input_np = np.zeros((num_samples, 3, 229, 229), dtype=np.float32)

    for i in range(0, num_samples):
        image = x_train[i, :, :, :]
        image = image.transpose(1, 2, 0)
        pil_image = Image.fromarray(image)
        pil_image = pil_image.resize((229, 229), Image.NEAREST)
        image = np.array(pil_image, dtype=np.float32)
        image = image.transpose(2, 0, 1)
        full_input_np[i, :, :, :] = image

    full_input_np /= 255
    print("$$$$$$$$$$$$$$$$$$$FULL_INPUT_NP$$$$$$$$$$$$$$$$$$$$")
    print(full_input_np)
    input("ENTER to continue.")
    y_train = y_train.astype('int32')
    full_label_np = y_train

    dims_full_input = [num_samples, 3, 229, 229]
    full_input = ffmodel.create_tensor(dims_full_input, DataType.DT_FLOAT)

    dims_full_label = [num_samples, 1]
    full_label = ffmodel.create_tensor(dims_full_label, DataType.DT_INT32)

    full_input.attach_numpy_array(ffconfig, full_input_np)
    full_label.attach_numpy_array(ffconfig, full_label_np)
    print(full_input)
    input("ENTER to continue.")

    dataloader_input = SingleDataLoader(ffmodel, inputi, full_input,
                                        num_samples, DataType.DT_FLOAT)
    dataloader_label = SingleDataLoader(ffmodel, label, full_label,
                                        num_samples, DataType.DT_INT32)

    full_input.detach_numpy_array(ffconfig)
    full_label.detach_numpy_array(ffconfig)

    num_samples = dataloader_input.get_num_samples()
    assert dataloader_input.get_num_samples(
    ) == dataloader_label.get_num_samples()

    ffmodel.init_layers()

    epochs = ffconfig.get_epochs()

    ts_start = ffconfig.get_current_time()

    ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs)

    ts_end = ffconfig.get_current_time()
    run_time = 1e-6 * (ts_end - ts_start)
    print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %
          (epochs, run_time, num_samples * epochs / run_time))
    perf_metrics = ffmodel.get_perf_metrics()
    accuracy = perf_metrics.get_accuracy()
    if accuracy < ModelAccuracy.CIFAR10_ALEXNET.value:
        assert 0, 'Check Accuracy'
Esempio n. 21
0
def top_level_task():
    ffconfig = FFConfig()
    alexnetconfig = NetConfig()
    print(alexnetconfig.dataset_path)
    ffconfig.parse_args()
    print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %
          (ffconfig.get_batch_size(), ffconfig.get_workers_per_node(),
           ffconfig.get_num_nodes()))
    ffmodel = FFModel(ffconfig)

    dims_input = [ffconfig.get_batch_size(), 3, 229, 229]
    #print(dims)
    input = ffmodel.create_tensor(dims_input, "", DataType.DT_FLOAT)

    dims_label = [ffconfig.get_batch_size(), 1]
    #print(dims)
    label = ffmodel.create_tensor(dims_label, "", DataType.DT_INT32)

    use_external = True
    if (use_external == True):
        num_samples = 10000

        (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)

        full_input_np = np.zeros((num_samples, 3, 229, 229), dtype=np.float32)

        for i in range(0, num_samples):
            image = x_train[i, :, :, :]
            image = image.transpose(1, 2, 0)
            pil_image = Image.fromarray(image)
            pil_image = pil_image.resize((229, 229), Image.NEAREST)
            image = np.array(pil_image, dtype=np.float32)
            image = image.transpose(2, 0, 1)
            full_input_np[i, :, :, :] = image
            if (i == 0):
                print(image)

        full_input_np /= 255
        print(full_input_np.shape)
        print(full_input_np.__array_interface__["strides"])
        print(full_input_np[0, :, :, :])

        y_train = y_train.astype('int32')
        full_label_np = y_train

        dims_full_input = [num_samples, 3, 229, 229]
        full_input = ffmodel.create_tensor(dims_full_input, "",
                                           DataType.DT_FLOAT)

        dims_full_label = [num_samples, 1]
        full_label = ffmodel.create_tensor(dims_full_label, "",
                                           DataType.DT_INT32)

        full_input.attach_numpy_array(ffconfig, full_input_np)
        full_label.attach_numpy_array(ffconfig, full_label_np)

        dataloader_input = SingleDataLoader(ffmodel, input, full_input,
                                            num_samples, DataType.DT_FLOAT)
        dataloader_label = SingleDataLoader(ffmodel, label, full_label,
                                            num_samples, DataType.DT_INT32)
        #dataloader = DataLoader4D(ffmodel, input, label, full_input, full_label, num_samples)

        full_input.detach_numpy_array(ffconfig)
        full_label.detach_numpy_array(ffconfig)

        num_samples = dataloader_input.get_num_samples()
        assert dataloader_input.get_num_samples(
        ) == dataloader_label.get_num_samples()

    else:
        # Data Loader
        dataloader = DataLoader4D(ffmodel,
                                  input,
                                  label,
                                  ffnetconfig=alexnetconfig)
        num_samples = dataloader.get_num_samples()

    kernel_init = GlorotUniformInitializer(123)
    bias_init = ZeroInitializer()
    t = ffmodel.conv2d("conv1", input, 64, 7, 7, 2, 2, 3, 3)
    t = ffmodel.pool2d("pool1", t, 3, 3, 2, 2, 1, 1)
    for i in range(0, 3):
        t = BottleneckBlock(ffmodel, t, 64, 1)
    for i in range(0, 4):
        if (i == 0):
            stride = 2
        else:
            stride = 1
        t = BottleneckBlock(ffmodel, t, 128, stride)
    for i in range(0, 6):
        if (i == 0):
            stride = 2
        else:
            stride = 1
        t = BottleneckBlock(ffmodel, t, 256, stride)
    for i in range(0, 3):
        if (i == 0):
            stride = 2
        else:
            stride = 1
        t = BottleneckBlock(ffmodel, t, 512, stride)
    t = ffmodel.pool2d("pool2", t, 7, 7, 1, 1, 0, 0, PoolType.POOL_AVG)
    t = ffmodel.flat("flat", t)
    t = ffmodel.dense("linear1", t, 10)
    t = ffmodel.softmax("softmax", t, label)

    ffoptimizer = SGDOptimizer(ffmodel, 0.001)
    ffmodel.set_sgd_optimizer(ffoptimizer)

    # input.inline_map(ffconfig)
    # input_array = input.get_array(ffconfig, DataType.DT_FLOAT)
    # input_array *= 1.0
    # print(input_array.shape)
    # input.inline_unmap(ffconfig)
    # label.inline_map(ffconfig)
    # label.inline_unmap(ffconfig)

    ffmodel.init_layers()

    #  conv_2d1 = ffmodel.get_layer_by_id(11)
    #  cbias_tensor = conv_2d1.get_weight_tensor()
    #  input_tensor = conv_2d1.get_input_tensor_by_id(0)
    #  cbias_tensor.inline_map(ffconfig)
    #  cbias = cbias_tensor.get_array(ffconfig, DataType.DT_FLOAT)
    # # cbias += 0.125
    #  print(cbias.shape)
    #  #print(cbias)
    #  cbias_tensor.inline_unmap(ffconfig)

    #use_external = False

    epochs = ffconfig.get_epochs()

    ts_start = ffconfig.get_current_time()
    for epoch in range(0, epochs):
        if (use_external == True):
            dataloader_input.reset()
            dataloader_label.reset()
        else:
            dataloader.reset()
        ffmodel.reset_metrics()
        iterations = int(num_samples / ffconfig.get_batch_size())
        print(iterations, num_samples)

        for iter in range(0, int(iterations)):
            if (use_external == True):
                dataloader_input.next_batch(ffmodel)
                dataloader_label.next_batch(ffmodel)
            else:
                dataloader.next_batch(ffmodel)
            if (epoch > 0):
                ffconfig.begin_trace(111)
            ffmodel.forward()
            ffmodel.zero_gradients()
            ffmodel.backward()
            ffmodel.update()
            if (epoch > 0):
                ffconfig.end_trace(111)

    ts_end = ffconfig.get_current_time()
    run_time = 1e-6 * (ts_end - ts_start)
    print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %
          (epochs, run_time, num_samples * epochs / run_time))
    #ffmodel.print_layers(13)

    conv_2d1 = ffmodel.get_layer_by_id(0)
    #cbias_tensor = conv_2d1.get_input_tensor()
    cbias_tensor = conv_2d1.get_input_tensor()
    cbias_tensor.inline_map(ffconfig)
    cbias = cbias_tensor.get_flat_array(ffconfig, DataType.DT_FLOAT)
    print(cbias.shape)
    print(cbias)
    cbias_tensor.inline_unmap(ffconfig)

    label.inline_map(ffconfig)
    label_array = label.get_flat_array(ffconfig, DataType.DT_INT32)
    print(label_array.shape)
    # print(cbias)
    print(label_array)
    label.inline_unmap(ffconfig)
Esempio n. 22
0
def top_level_task():
    ffconfig = FFConfig()
    alexnetconfig = NetConfig()
    print(alexnetconfig.dataset_path)
    print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %
          (ffconfig.batch_size, ffconfig.workers_per_node, ffconfig.num_nodes))
    ffmodel = FFModel(ffconfig)

    dims_input = [ffconfig.batch_size, 3, 229, 229]
    input_tensor = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT)

    kernel_init = GlorotUniformInitializer(123)
    bias_init = ZeroInitializer()
    t = ffmodel.conv2d(input_tensor, 64, 11, 11, 4, 4, 2, 2,
                       ActiMode.AC_MODE_RELU, 1, True, None, kernel_init,
                       bias_init)
    t = ffmodel.pool2d(t, 3, 3, 2, 2, 0, 0)
    t = ffmodel.conv2d(t, 192, 5, 5, 1, 1, 2, 2, ActiMode.AC_MODE_RELU)
    t = ffmodel.pool2d(t, 3, 3, 2, 2, 0, 0)
    t = ffmodel.conv2d(t, 384, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
    t = ffmodel.conv2d(t, 256, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
    t = ffmodel.conv2d(t, 256, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
    t = ffmodel.pool2d(t, 3, 3, 2, 2, 0, 0)
    t = ffmodel.flat(t)
    t = ffmodel.dense(t, 4096, ActiMode.AC_MODE_RELU)
    t = ffmodel.dense(t, 4096, ActiMode.AC_MODE_RELU)
    t = ffmodel.dense(t, 10)
    t = ffmodel.softmax(t)

    ffoptimizer = SGDOptimizer(ffmodel, 0.01)
    ffmodel.optimizer = ffoptimizer
    ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY,
                    metrics=[
                        MetricsType.METRICS_ACCURACY,
                        MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY
                    ])
    label_tensor = ffmodel.label_tensor

    num_samples = 10000

    (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)

    full_input_np = np.zeros((num_samples, 3, 229, 229), dtype=np.float32)

    for i in range(0, num_samples):
        image = x_train[i, :, :, :]
        image = image.transpose(1, 2, 0)
        pil_image = Image.fromarray(image)
        pil_image = pil_image.resize((229, 229), Image.NEAREST)
        image = np.array(pil_image, dtype=np.float32)
        image = image.transpose(2, 0, 1)
        full_input_np[i, :, :, :] = image
        if (i == 0):
            print(image)

    full_input_np /= 255
    print(full_input_np.shape)
    print(full_input_np.__array_interface__["strides"])
    print(full_input_np[0, :, :, :])

    y_train = y_train.astype('int32')
    full_label_np = y_train

    dims_full_input = [num_samples, 3, 229, 229]
    full_input = ffmodel.create_tensor(dims_full_input, DataType.DT_FLOAT)

    dims_full_label = [num_samples, 1]
    full_label = ffmodel.create_tensor(dims_full_label, DataType.DT_INT32)

    full_input.attach_numpy_array(ffconfig, full_input_np)
    full_label.attach_numpy_array(ffconfig, full_label_np)

    dataloader_input = SingleDataLoader(ffmodel, input_tensor, full_input,
                                        num_samples, DataType.DT_FLOAT)
    dataloader_label = SingleDataLoader(ffmodel, label_tensor, full_label,
                                        num_samples, DataType.DT_INT32)

    full_input.detach_numpy_array(ffconfig)
    full_label.detach_numpy_array(ffconfig)

    num_samples = dataloader_input.get_num_samples()
    assert dataloader_input.get_num_samples(
    ) == dataloader_label.get_num_samples()

    ffmodel.init_layers()

    epochs = ffconfig.epochs

    ts_start = ffconfig.get_current_time()

    ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs)

    ts_end = ffconfig.get_current_time()
    run_time = 1e-6 * (ts_end - ts_start)
    print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %
          (epochs, run_time, num_samples * epochs / run_time))
    perf_metrics = ffmodel.get_perf_metrics()

    return perf_metrics
    accuracy = perf_metrics.get_accuracy()
    if accuracy < ModelAccuracy.CIFAR10_ALEXNET.value:
        assert 0, 'Check Accuracy'
Esempio n. 23
0
def top_level_task():
    ffconfig = FFConfig()
    alexnetconfig = NetConfig()
    print(alexnetconfig.dataset_path)
    ffconfig.parse_args()
    print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %
          (ffconfig.get_batch_size(), ffconfig.get_workers_per_node(),
           ffconfig.get_num_nodes()))
    ffmodel = FFModel(ffconfig)

    dims_input = [ffconfig.get_batch_size(), 3, 32, 32]
    input = ffmodel.create_tensor(dims_input, "", DataType.DT_FLOAT)

    # dims_label = [ffconfig.get_batch_size(), 1]
    # label = ffmodel.create_tensor(dims_label, "", DataType.DT_INT32)

    onnx_model = ONNXModel("cifar10_cnn.onnx")
    t = onnx_model.apply(ffmodel, {"input.1": input})

    ffoptimizer = SGDOptimizer(ffmodel, 0.01)
    ffmodel.set_sgd_optimizer(ffoptimizer)
    ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY,
                    metrics=[
                        MetricsType.METRICS_ACCURACY,
                        MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY
                    ])
    label = ffmodel.get_label_tensor()

    use_external = True
    if (use_external == True):
        num_samples = 10000

        (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)

        x_train = x_train.astype('float32')
        x_train /= 255
        full_input_array = x_train
        print(full_input_array.__array_interface__["strides"])

        y_train = y_train.astype('int32')
        full_label_array = y_train

        print(full_input_array.__array_interface__["strides"])
        print(full_input_array.shape, full_label_array.shape)
        #print(full_input_array[0,:,:,:])
        #print(full_label_array[0, 0:64])
        print(full_label_array.__array_interface__["strides"])

        dims_full_input = [num_samples, 3, 32, 32]
        full_input = ffmodel.create_tensor(dims_full_input, "",
                                           DataType.DT_FLOAT)

        dims_full_label = [num_samples, 1]
        full_label = ffmodel.create_tensor(dims_full_label, "",
                                           DataType.DT_INT32)

        full_input.attach_numpy_array(ffconfig, full_input_array)
        full_label.attach_numpy_array(ffconfig, full_label_array)

        dataloader_input = SingleDataLoader(ffmodel, input, full_input,
                                            num_samples, DataType.DT_FLOAT)
        dataloader_label = SingleDataLoader(ffmodel, label, full_label,
                                            num_samples, DataType.DT_INT32)

        full_input.detach_numpy_array(ffconfig)
        full_label.detach_numpy_array(ffconfig)

        num_samples = dataloader_input.get_num_samples()
    else:
        # Data Loader
        dataloader = DataLoader4D(ffmodel,
                                  input,
                                  label,
                                  ffnetconfig=alexnetconfig)
        num_samples = dataloader.get_num_samples()

    ffmodel.init_layers()

    epochs = ffconfig.get_epochs()
    #epochs = 10

    ts_start = ffconfig.get_current_time()
    for epoch in range(0, epochs):
        if (use_external == True):
            dataloader_input.reset()
            dataloader_label.reset()
        else:
            dataloader.reset()
        ffmodel.reset_metrics()
        iterations = int(num_samples / ffconfig.get_batch_size())
        print(iterations, num_samples)

        for iter in range(0, int(iterations)):
            # if (len(alexnetconfig.dataset_path) == 0):
            #   if (iter == 0 and epoch == 0):
            #     dataloader.next_batch(ffmodel)
            # else:
            #  dataloader.next_batch(ffmodel)
            if (use_external == True):
                dataloader_input.next_batch(ffmodel)
                dataloader_label.next_batch(ffmodel)
            else:
                dataloader.next_batch(ffmodel)
            if (epoch > 0):
                ffconfig.begin_trace(111)
            ffmodel.forward()
            ffmodel.zero_gradients()
            ffmodel.backward()
            ffmodel.update()
            if (epoch > 0):
                ffconfig.end_trace(111)

    ts_end = ffconfig.get_current_time()
    run_time = 1e-6 * (ts_end - ts_start)
    print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %
          (epochs, run_time, num_samples * epochs / run_time))

    perf_metrics = ffmodel.get_perf_metrics()
    accuracy = perf_metrics.get_accuracy()
    if accuracy < ModelAccuracy.CIFAR10_CNN.value:
        assert 0, 'Check Accuracy'

    conv_2d1 = ffmodel.get_layer_by_id(0)
    cbias_tensor = conv_2d1.get_input_tensor()
    #cbias_tensor = conv_2d1.get_output_tensor()
    cbias_tensor.inline_map(ffconfig)
    cbias = cbias_tensor.get_flat_array(ffconfig, DataType.DT_FLOAT)
    print(cbias.shape)
    print(cbias)
    cbias_tensor.inline_unmap(ffconfig)

    label.inline_map(ffconfig)
    label_array = label.get_flat_array(ffconfig, DataType.DT_INT32)
    print(label_array.shape)
    # print(cbias)
    print(label_array)
    label.inline_unmap(ffconfig)
Esempio n. 24
0
def top_level_task():
  ffconfig = FFConfig()
  alexnetconfig = NetConfig()
  print(alexnetconfig.dataset_path)
  print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %(ffconfig.batch_size, ffconfig.workers_per_node, ffconfig.num_nodes))
  ffmodel = FFModel(ffconfig)

  dims_input = [ffconfig.batch_size, 3, 229, 229]
  input = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT)

  t = ffmodel.conv2d(input, 64, 7, 7, 2, 2, 3, 3)
  t = ffmodel.batch_norm(t);
  t = ffmodel.pool2d(t, 3, 3, 2, 2, 1, 1)
  for i in range(0, 3):
    t = BottleneckBlock(ffmodel, t, 64, 1)
  for i in range(0, 4):
    if (i == 0):
      stride = 2
    else:
      stride = 1
    t = BottleneckBlock(ffmodel, t, 128, stride)
  for i in range(0, 6):
    if (i == 0):
      stride = 2
    else:
      stride = 1
    t = BottleneckBlock(ffmodel, t, 256, stride)
  for i in range(0, 3):
    if (i == 0):
      stride = 2
    else:
      stride = 1
    t = BottleneckBlock(ffmodel, t, 512, stride);
  t = ffmodel.pool2d(t, 7, 7, 1, 1, 0, 0, PoolType.POOL_AVG)
  t = ffmodel.flat(t);
  t = ffmodel.dense(t, 10)
  t = ffmodel.softmax(t)

  ffoptimizer = SGDOptimizer(ffmodel, 0.001)
  ffmodel.optimizer = ffoptimizer
  ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY])
  label = ffmodel.label_tensor

  # load data
  num_samples = 10000

  (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)

  full_input_np = np.zeros((num_samples, 3, 229, 229), dtype=np.float32)

  for i in range(0, num_samples):
    image = x_train[i, :, :, :]
    image = image.transpose(1, 2, 0)
    pil_image = Image.fromarray(image)
    pil_image = pil_image.resize((229,229), Image.NEAREST)
    image = np.array(pil_image, dtype=np.float32)
    image = image.transpose(2, 0, 1)
    full_input_np[i, :, :, :] = image


  full_input_np /= 255
  print(full_input_np.shape)
  print(full_input_np.__array_interface__["strides"])
  print(full_input_np[0,:, :, :])

  y_train = y_train.astype('int32')
  full_label_np = y_train

  dataloader_input = ffmodel.create_data_loader(input, full_input_np)
  dataloader_label = ffmodel.create_data_loader(label, full_label_np)

  num_samples = dataloader_input.num_samples
  assert dataloader_input.num_samples == dataloader_label.num_samples

  ffmodel.init_layers()

  epochs = ffconfig.epochs

  ts_start = ffconfig.get_current_time()

  ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs)

  ts_end = ffconfig.get_current_time()
  run_time = 1e-6 * (ts_end - ts_start);
  print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %(epochs, run_time, num_samples * epochs / run_time));
def top_level_task():
    num_classes = 10

    num_samples = 10000

    (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)

    x_train = x_train.astype('float32')
    x_train /= 255
    y_train = y_train.astype('int32')
    print("shape: ", x_train.shape)

    #teacher
    input_tensor1 = Input(shape=(3, 32, 32), dtype="float32")

    c1 = Conv2D(filters=32,
                input_shape=(3, 32, 32),
                kernel_size=(3, 3),
                strides=(1, 1),
                padding="same",
                activation="relu")
    c2 = Conv2D(filters=32,
                kernel_size=(3, 3),
                strides=(1, 1),
                padding=(1, 1),
                activation="relu")
    c3 = Conv2D(filters=64,
                kernel_size=(3, 3),
                strides=(1, 1),
                padding=(1, 1),
                activation="relu")
    c4 = Conv2D(filters=64,
                kernel_size=(3, 3),
                strides=(1, 1),
                padding=(1, 1),
                activation="relu")
    d1 = Dense(512, activation="relu")
    d2 = Dense(num_classes)

    output_tensor = c1(input_tensor1)
    output_tensor = c2(output_tensor)
    output_tensor = MaxPooling2D(pool_size=(2, 2),
                                 strides=(2, 2),
                                 padding="same")(output_tensor)
    output_tensor = c3(output_tensor)
    output_tensor = c4(output_tensor)
    output_tensor = MaxPooling2D(pool_size=(2, 2),
                                 strides=(2, 2),
                                 padding="valid")(output_tensor)
    output_tensor = Flatten()(output_tensor)
    output_tensor = d1(output_tensor)
    output_tensor = d2(output_tensor)
    output_tensor = Activation("softmax")(output_tensor)

    teacher_model = Model(input_tensor1, output_tensor)

    opt = flexflow.keras.optimizers.SGD(learning_rate=0.01)
    teacher_model.compile(
        optimizer=opt,
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy', 'sparse_categorical_crossentropy'])

    teacher_model.fit(x_train, y_train, epochs=10)

    c1_kernel, c1_bias = c1.get_weights(teacher_model.ffmodel)
    c2_kernel, c2_bias = c2.get_weights(teacher_model.ffmodel)
    c3_kernel, c3_bias = c3.get_weights(teacher_model.ffmodel)
    c4_kernel, c4_bias = c4.get_weights(teacher_model.ffmodel)
    d1_kernel, d1_bias = d1.get_weights(teacher_model.ffmodel)
    d2_kernel, d2_bias = d2.get_weights(teacher_model.ffmodel)
    #d2_kernel *= 0

    c2_kernel_new = np.concatenate((c2_kernel, c2_kernel), axis=1)
    print(c2_kernel.shape, c2_kernel_new.shape, c2_bias.shape)

    #student model
    input_tensor2 = Input(shape=(3, 32, 32), dtype="float32")

    sc1_1 = Conv2D(filters=32,
                   input_shape=(3, 32, 32),
                   kernel_size=(3, 3),
                   strides=(1, 1),
                   padding="same",
                   activation="relu")
    sc1_2 = Conv2D(filters=32,
                   input_shape=(3, 32, 32),
                   kernel_size=(3, 3),
                   strides=(1, 1),
                   padding="same",
                   activation="relu")
    sc2 = Conv2D(filters=32,
                 kernel_size=(3, 3),
                 strides=(1, 1),
                 padding=(1, 1),
                 activation="relu")
    sc3 = Conv2D(filters=64,
                 kernel_size=(3, 3),
                 strides=(1, 1),
                 padding=(1, 1),
                 activation="relu")
    sc4 = Conv2D(filters=64,
                 kernel_size=(3, 3),
                 strides=(1, 1),
                 padding=(1, 1),
                 activation="relu")
    sd1 = Dense(512, activation="relu")
    sd2 = Dense(num_classes)

    t1 = sc1_1(input_tensor2)
    t2 = sc1_2(input_tensor2)
    output_tensor = Concatenate(axis=1)([t1, t2])
    output_tensor = sc2(output_tensor)
    output_tensor = MaxPooling2D(pool_size=(2, 2),
                                 strides=(2, 2),
                                 padding="same")(output_tensor)
    output_tensor = sc3(output_tensor)
    output_tensor = sc4(output_tensor)
    output_tensor = MaxPooling2D(pool_size=(2, 2),
                                 strides=(2, 2),
                                 padding="valid")(output_tensor)
    output_tensor = Flatten()(output_tensor)
    output_tensor = sd1(output_tensor)
    output_tensor = sd2(output_tensor)
    output_tensor = Activation("softmax")(output_tensor)

    student_model = Model(input_tensor2, output_tensor)

    opt = flexflow.keras.optimizers.SGD(learning_rate=0.01)
    student_model.compile(
        optimizer=opt,
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy', 'sparse_categorical_crossentropy'])

    sc1_1.set_weights(student_model.ffmodel, c1_kernel, c1_bias)
    sc1_2.set_weights(student_model.ffmodel, c1_kernel, c1_bias)
    sc2.set_weights(student_model.ffmodel, c2_kernel_new, c2_bias)
    sc3.set_weights(student_model.ffmodel, c3_kernel, c3_bias)
    sc4.set_weights(student_model.ffmodel, c4_kernel, c4_bias)
    sd1.set_weights(student_model.ffmodel, d1_kernel, d1_bias)
    sd2.set_weights(student_model.ffmodel, d2_kernel, d2_bias)

    student_model.fit(x_train,
                      y_train,
                      epochs=160,
                      callbacks=[
                          VerifyMetrics(ModelAccuracy.CIFAR10_CNN),
                          EpochVerifyMetrics(ModelAccuracy.CIFAR10_CNN)
                      ])
Esempio n. 26
0
def top_level_task():
    ffconfig = FFConfig()
    alexnetconfig = NetConfig()
    print(alexnetconfig.dataset_path)
    ffconfig.parse_args()
    print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %
          (ffconfig.get_batch_size(), ffconfig.get_workers_per_node(),
           ffconfig.get_num_nodes()))
    ffmodel = FFModel(ffconfig)

    dims_input = [ffconfig.get_batch_size(), 3, 229, 229]
    #print(dims)
    input = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT)

    t = ffmodel.conv2d(input, 64, 7, 7, 2, 2, 3, 3)
    t = ffmodel.batch_norm(t)
    t = ffmodel.pool2d(t, 3, 3, 2, 2, 1, 1)
    for i in range(0, 3):
        t = BottleneckBlock(ffmodel, t, 64, 1)
    for i in range(0, 4):
        if (i == 0):
            stride = 2
        else:
            stride = 1
        t = BottleneckBlock(ffmodel, t, 128, stride)
    for i in range(0, 6):
        if (i == 0):
            stride = 2
        else:
            stride = 1
        t = BottleneckBlock(ffmodel, t, 256, stride)
    for i in range(0, 3):
        if (i == 0):
            stride = 2
        else:
            stride = 1
        t = BottleneckBlock(ffmodel, t, 512, stride)
    t = ffmodel.pool2d(t, 7, 7, 1, 1, 0, 0, PoolType.POOL_AVG)
    t = ffmodel.flat(t)
    t = ffmodel.dense(t, 10)
    t = ffmodel.softmax(t)

    ffoptimizer = SGDOptimizer(ffmodel, 0.001)
    ffmodel.set_sgd_optimizer(ffoptimizer)
    ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY,
                    metrics=[
                        MetricsType.METRICS_ACCURACY,
                        MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY
                    ])
    label = ffmodel.get_label_tensor()

    # load data
    num_samples = 10000

    (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)

    full_input_np = np.zeros((num_samples, 3, 229, 229), dtype=np.float32)

    for i in range(0, num_samples):
        image = x_train[i, :, :, :]
        image = image.transpose(1, 2, 0)
        pil_image = Image.fromarray(image)
        pil_image = pil_image.resize((229, 229), Image.NEAREST)
        image = np.array(pil_image, dtype=np.float32)
        image = image.transpose(2, 0, 1)
        full_input_np[i, :, :, :] = image

    full_input_np /= 255
    print(full_input_np.shape)
    print(full_input_np.__array_interface__["strides"])
    print(full_input_np[0, :, :, :])

    y_train = y_train.astype('int32')
    full_label_np = y_train

    dims_full_input = [num_samples, 3, 229, 229]
    full_input = ffmodel.create_tensor(dims_full_input, DataType.DT_FLOAT)

    dims_full_label = [num_samples, 1]
    full_label = ffmodel.create_tensor(dims_full_label, DataType.DT_INT32)

    full_input.attach_numpy_array(ffconfig, full_input_np)
    full_label.attach_numpy_array(ffconfig, full_label_np)

    dataloader_input = SingleDataLoader(ffmodel, input, full_input,
                                        num_samples, DataType.DT_FLOAT)
    dataloader_label = SingleDataLoader(ffmodel, label, full_label,
                                        num_samples, DataType.DT_INT32)

    full_input.detach_numpy_array(ffconfig)
    full_label.detach_numpy_array(ffconfig)

    num_samples = dataloader_input.get_num_samples()
    assert dataloader_input.get_num_samples(
    ) == dataloader_label.get_num_samples()

    ffmodel.init_layers()

    epochs = ffconfig.get_epochs()

    ts_start = ffconfig.get_current_time()
    for epoch in range(0, epochs):
        dataloader_input.reset()
        dataloader_label.reset()
        ffmodel.reset_metrics()
        iterations = int(num_samples / ffconfig.get_batch_size())
        print(iterations, num_samples)

        for iter in range(0, int(iterations)):
            dataloader_input.next_batch(ffmodel)
            dataloader_label.next_batch(ffmodel)
            if (epoch > 0):
                ffconfig.begin_trace(111)
            ffmodel.forward()
            ffmodel.zero_gradients()
            ffmodel.backward()
            ffmodel.update()
            if (epoch > 0):
                ffconfig.end_trace(111)

    ts_end = ffconfig.get_current_time()
    run_time = 1e-6 * (ts_end - ts_start)
    print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %
          (epochs, run_time, num_samples * epochs / run_time))
Esempio n. 27
0
def top_level_task():
  ffconfig = FFConfig()
  ffconfig.parse_args()
  print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %(ffconfig.get_batch_size(), ffconfig.get_workers_per_node(), ffconfig.get_num_nodes()))
  ffmodel = FFModel(ffconfig)
  
  dims_input = [ffconfig.get_batch_size(), 3, 32, 32]
  input_tensor = ffmodel.create_tensor(dims_input, "", DataType.DT_FLOAT)

  output_tensors = ffmodel.construct_model_from_file([input_tensor, input_tensor], "cnn.ff")
  
  t = ffmodel.softmax(output_tensors[0])

  ffoptimizer = SGDOptimizer(ffmodel, 0.01)
  ffmodel.set_sgd_optimizer(ffoptimizer)
  ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY])
  label = ffmodel.get_label_tensor()
  
  num_samples = 10000
  
  (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)
  
  x_train = x_train.astype('float32')
  x_train /= 255
  full_input_array = x_train
  
  y_train = y_train.astype('int32')
  full_label_array = y_train
  
  dims_full_input = [num_samples, 3, 32, 32]
  full_input = ffmodel.create_tensor(dims_full_input, "", DataType.DT_FLOAT)

  dims_full_label = [num_samples, 1]
  full_label = ffmodel.create_tensor(dims_full_label, "", DataType.DT_INT32)
  
  full_input.attach_numpy_array(ffconfig, full_input_array)
  full_label.attach_numpy_array(ffconfig, full_label_array)
  
  dataloader_input = SingleDataLoader(ffmodel, input_tensor, full_input, num_samples, DataType.DT_FLOAT)
  dataloader_label = SingleDataLoader(ffmodel, label, full_label, num_samples, DataType.DT_INT32)
  
  full_input.detach_numpy_array(ffconfig)
  full_label.detach_numpy_array(ffconfig)
  
  num_samples = dataloader_input.get_num_samples()

  ffmodel.init_layers()

  epochs = ffconfig.get_epochs()

  ts_start = ffconfig.get_current_time()
  for epoch in range(0,epochs):
    dataloader_input.reset()
    dataloader_label.reset()
    ffmodel.reset_metrics()
    iterations = int(num_samples / ffconfig.get_batch_size())
    print(iterations, num_samples)

    for iter in range(0, int(iterations)):
      dataloader_input.next_batch(ffmodel)
      dataloader_label.next_batch(ffmodel)
      if (epoch > 0):
        ffconfig.begin_trace(111)
      ffmodel.forward()
      ffmodel.zero_gradients()
      ffmodel.backward()
      ffmodel.update()
      if (epoch > 0):
        ffconfig.end_trace(111)

  ts_end = ffconfig.get_current_time()
  run_time = 1e-6 * (ts_end - ts_start);
  print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %(epochs, run_time, num_samples * epochs / run_time));
Esempio n. 28
0
def top_level_task():
    ffconfig = FFConfig()
    alexnetconfig = NetConfig()
    print(alexnetconfig.dataset_path)
    ffconfig.parse_args()
    print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %
          (ffconfig.get_batch_size(), ffconfig.get_workers_per_node(),
           ffconfig.get_num_nodes()))
    ffmodel = FFModel(ffconfig)

    dims_input = [ffconfig.get_batch_size(), 3, 32, 32]
    input = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT)

    onnx_model = ONNXModel("cifar10_cnn.onnx")
    t = onnx_model.apply(ffmodel, {"input.1": input})

    ffoptimizer = SGDOptimizer(ffmodel, 0.01)
    ffmodel.set_sgd_optimizer(ffoptimizer)
    ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY,
                    metrics=[
                        MetricsType.METRICS_ACCURACY,
                        MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY
                    ])
    label = ffmodel.get_label_tensor()

    num_samples = 10000

    (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)

    x_train = x_train.astype('float32')
    x_train /= 255
    full_input_array = x_train
    print(full_input_array.__array_interface__["strides"])

    y_train = y_train.astype('int32')
    full_label_array = y_train

    dims_full_input = [num_samples, 3, 32, 32]
    full_input = ffmodel.create_tensor(dims_full_input, DataType.DT_FLOAT)

    dims_full_label = [num_samples, 1]
    full_label = ffmodel.create_tensor(dims_full_label, DataType.DT_INT32)

    full_input.attach_numpy_array(ffconfig, full_input_array)
    full_label.attach_numpy_array(ffconfig, full_label_array)

    dataloader_input = SingleDataLoader(ffmodel, input, full_input,
                                        num_samples, DataType.DT_FLOAT)
    dataloader_label = SingleDataLoader(ffmodel, label, full_label,
                                        num_samples, DataType.DT_INT32)

    full_input.detach_numpy_array(ffconfig)
    full_label.detach_numpy_array(ffconfig)

    num_samples = dataloader_input.get_num_samples()

    ffmodel.init_layers()

    epochs = ffconfig.get_epochs()

    ts_start = ffconfig.get_current_time()

    ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs)

    ts_end = ffconfig.get_current_time()
    run_time = 1e-6 * (ts_end - ts_start)
    print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %
          (epochs, run_time, num_samples * epochs / run_time))

    perf_metrics = ffmodel.get_perf_metrics()
    accuracy = perf_metrics.get_accuracy()
    if accuracy < ModelAccuracy.CIFAR10_CNN.value:
        assert 0, 'Check Accuracy'
Esempio n. 29
0
def top_level_task():
    ffconfig = FFConfig()
    alexnetconfig = NetConfig()
    print(alexnetconfig.dataset_path)
    ffconfig.parse_args()
    print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %
          (ffconfig.get_batch_size(), ffconfig.get_workers_per_node(),
           ffconfig.get_num_nodes()))
    ffmodel = FFModel(ffconfig)

    dims_input = [ffconfig.get_batch_size(), 3, 32, 32]
    input_tensor = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT)

    num_samples = 10000

    (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)

    x_train = x_train.astype('float32')
    x_train /= 255

    full_input_array = x_train
    print(full_input_array.__array_interface__["strides"])

    y_train = y_train.astype('int32')

    full_label_array = y_train

    print(full_input_array.__array_interface__["strides"])
    print(full_input_array.shape, full_label_array.shape)
    print(full_label_array.__array_interface__["strides"])

    t = ffmodel.conv2d(input_tensor, 32, 3, 3, 1, 1, 1, 1,
                       ActiMode.AC_MODE_RELU)
    t = ffmodel.conv2d(t, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
    t = ffmodel.pool2d(
        t,
        2,
        2,
        2,
        2,
        0,
        0,
    )
    t = ffmodel.conv2d(t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
    t = ffmodel.conv2d(t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU)
    t = ffmodel.pool2d(t, 2, 2, 2, 2, 0, 0)
    t = ffmodel.flat(t)
    t = ffmodel.dense(t, 512, ActiMode.AC_MODE_RELU)
    t = ffmodel.dense(t, 10)
    t = ffmodel.softmax(t)

    ffoptimizer = SGDOptimizer(ffmodel, 0.01)
    ffmodel.set_sgd_optimizer(ffoptimizer)
    ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY,
                    metrics=[
                        MetricsType.METRICS_ACCURACY,
                        MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY
                    ])
    label_tensor = ffmodel.get_label_tensor()

    next_batch(0, x_train, input_tensor, ffconfig)
    next_batch_label(0, y_train, label_tensor, ffconfig)

    ffmodel.init_layers()

    epochs = ffconfig.get_epochs()

    ts_start = ffconfig.get_current_time()
    for epoch in range(0, epochs):
        ffmodel.reset_metrics()
        iterations = int(num_samples / ffconfig.get_batch_size())
        print(iterations, num_samples)
        ct = 0
        for iter in range(0, int(iterations)):
            next_batch(ct, x_train, input, ffconfig)
            next_batch_label(ct, y_train, label, ffconfig)
            ct += 1
            if (epoch > 0):
                ffconfig.begin_trace(111)
            ffmodel.forward()
            ffmodel.zero_gradients()
            ffmodel.backward()
            ffmodel.update()
            if (epoch > 0):
                ffconfig.end_trace(111)

    ts_end = ffconfig.get_current_time()
    run_time = 1e-6 * (ts_end - ts_start)
    print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %
          (epochs, run_time, num_samples * epochs / run_time))

    perf_metrics = ffmodel.get_perf_metrics()
    accuracy = perf_metrics.get_accuracy()
    if accuracy < 0.3:
        assert 0, 'Check Accuracy'

    conv_2d1 = ffmodel.get_layer_by_id(0)
    cbias_tensor = conv_2d1.get_input_tensor()
    #cbias_tensor = conv_2d1.get_output_tensor()
    cbias_tensor.inline_map(ffconfig)
    cbias = cbias_tensor.get_flat_array(ffconfig, DataType.DT_FLOAT)
    print(cbias.shape)
    print(cbias)
    cbias_tensor.inline_unmap(ffconfig)

    label.inline_map(ffconfig)
    label_array = label.get_flat_array(ffconfig, DataType.DT_INT32)
    print(label_array.shape)
    # print(cbias)
    print(label_array)
    label.inline_unmap(ffconfig)
def top_level_task(test_type=1):
    ffconfig = FFConfig()
    alexnetconfig = NetConfig()
    print(alexnetconfig.dataset_path)
    print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %
          (ffconfig.batch_size, ffconfig.workers_per_node, ffconfig.num_nodes))
    ffmodel = FFModel(ffconfig)

    dims_input = [ffconfig.batch_size, 3, 32, 32]
    input = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT)

    if test_type == 1:
        onnx_model = ONNXModel("cifar10_cnn_pt.onnx")
        t = onnx_model.apply(ffmodel, {"input.1": input})
    else:
        onnx_model = ONNXModelKeras("cifar10_cnn_keras.onnx", ffconfig,
                                    ffmodel)
        t = onnx_model.apply(ffmodel, {"input_1": input})

    ffoptimizer = SGDOptimizer(ffmodel, 0.01)
    ffmodel.optimizer = ffoptimizer
    ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY,
                    metrics=[
                        MetricsType.METRICS_ACCURACY,
                        MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY
                    ])
    label = ffmodel.label_tensor

    num_samples = 10000

    (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)

    x_train = x_train.astype('float32')
    x_train /= 255
    full_input_array = x_train
    print(full_input_array.__array_interface__["strides"])

    y_train = y_train.astype('int32')
    full_label_array = y_train

    dataloader_input = ffmodel.create_data_loader(input, full_input_array)
    dataloader_label = ffmodel.create_data_loader(label, full_label_array)

    num_samples = dataloader_input.num_samples

    ffmodel.init_layers()

    epochs = ffconfig.epochs

    ts_start = ffconfig.get_current_time()

    ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs)

    ts_end = ffconfig.get_current_time()
    run_time = 1e-6 * (ts_end - ts_start)
    print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %
          (epochs, run_time, num_samples * epochs / run_time))

    perf_metrics = ffmodel.get_perf_metrics()
    accuracy = perf_metrics.get_accuracy()
    if accuracy < ModelAccuracy.CIFAR10_CNN.value:
        assert 0, 'Check Accuracy'