Exemple #1
0
def top_level_task():
    ffconfig = FFConfig()
    ffconfig.parse_args()
    print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %
          (ffconfig.get_batch_size(), ffconfig.get_workers_per_node(),
           ffconfig.get_num_nodes()))
    ffmodel = FFModel(ffconfig)

    dims_input = [ffconfig.get_batch_size(), 3, 229, 229]
    input_tensor = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT)

    torch_model = PyTorchModel("alexnet.ff")
    output_tensors = torch_model.apply(ffmodel, [input_tensor])

    ffoptimizer = SGDOptimizer(ffmodel, 0.01)
    ffmodel.set_sgd_optimizer(ffoptimizer)
    ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY,
                    metrics=[
                        MetricsType.METRICS_ACCURACY,
                        MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY
                    ])
    label_tensor = ffmodel.get_label_tensor()

    num_samples = 10000

    (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)

    full_input_np = np.zeros((num_samples, 3, 229, 229), dtype=np.float32)

    for i in range(0, num_samples):
        image = x_train[i, :, :, :]
        image = image.transpose(1, 2, 0)
        pil_image = Image.fromarray(image)
        pil_image = pil_image.resize((229, 229), Image.NEAREST)
        image = np.array(pil_image, dtype=np.float32)
        image = image.transpose(2, 0, 1)
        full_input_np[i, :, :, :] = image

    full_input_np /= 255

    y_train = y_train.astype('int32')
    full_label_np = y_train

    dataloader_input = ffmodel.create_data_loader(input_tensor, full_input_np)
    dataloader_label = ffmodel.create_data_loader(label_tensor, full_label_np)

    num_samples = dataloader_input.get_num_samples()

    ffmodel.init_layers()

    epochs = ffconfig.get_epochs()

    ts_start = ffconfig.get_current_time()

    ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs)

    ts_end = ffconfig.get_current_time()
    run_time = 1e-6 * (ts_end - ts_start)
    print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %
          (epochs, run_time, num_samples * epochs / run_time))
Exemple #2
0
def init_ff_mt5():
    """
    Initializes the FlexFlow representation of the HuggingFace mT5 model.

    Returns:
        (ffmodel, input_dls, label_dl)

        ffmodel (FFModel): Compiled and initialized FlexFlow model representing
            HuggingFace mT5.
        input_dls (List[SingleDataLoader]): List consisting of the encoder
            input IDs, encoder attention mask, and decoder input IDs
            dataloaders.
        label_dl (SingleDataLoader): Label dataloader.
    """
    ffconfig = FFConfig()
    ffmodel = FFModel(ffconfig)
    mt5_torch = MT5ForConditionalGeneration.from_pretrained(
        PRETRAINED_MODEL_NAME,
    )
    input_ids, attention_mask, decoder_input_ids, labels = load_batch_ff()
    input_tensors = [
        ffmodel.create_tensor(input_ids.shape, DataType.DT_INT64),
        ffmodel.create_tensor(attention_mask.shape, DataType.DT_INT64),
        ffmodel.create_tensor(decoder_input_ids.shape, DataType.DT_INT64),
    ]
    mt5_model = PyTorchModel(
        mt5_torch,
        is_hf_model=True,
        input_names=["input_ids", "attention_mask", "decoder_input_ids"],
        batch_size=ffconfig.batch_size,
        seq_length=(input_ids.shape[1], decoder_input_ids.shape[1]),
    )
    output_tensors = mt5_model.torch_to_ff(ffmodel, input_tensors)
    ffoptimizer = SGDOptimizer(ffmodel, lr=0.01)
    ffmodel.compile(
        optimizer=ffoptimizer,
        loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY,
        metrics=[
            MetricsType.METRICS_ACCURACY,
            MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY,
        ],
    )
    input_ids_dl = ffmodel.create_data_loader(input_tensors[0], input_ids)
    attention_mask_dl = ffmodel.create_data_loader(
        input_tensors[1], attention_mask,
    )
    decoder_input_ids_dl = ffmodel.create_data_loader(
        input_tensors[2], decoder_input_ids,
    )
    # NOTE: We cast down the label tensor data to 32-bit to accomomodate the
    # label tensor's bitwidth requirement
    label_dl = ffmodel.create_data_loader(
        ffmodel.label_tensor, labels.astype("int32"),
    )
    input_dls = [input_ids_dl, attention_mask_dl, decoder_input_ids_dl]
    ffmodel.init_layers()
    return (ffmodel, input_dls, label_dl)
Exemple #3
0
def top_level_task():
  ffconfig = FFConfig()
  alexnetconfig = NetConfig()
  print(alexnetconfig.dataset_path)
  print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %(ffconfig.batch_size, ffconfig.workers_per_node, ffconfig.num_nodes))
  ffmodel = FFModel(ffconfig)
  
  dims_input = [ffconfig.batch_size, 1]
  input = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT)

  torch_model = PyTorchModel("customParam.ff")
  output_tensors = torch_model.apply(ffmodel, [input])
  t = ffmodel.softmax(output_tensors[0])

  ffoptimizer = SGDOptimizer(ffmodel, 0.01)
  ffmodel.optimizer = ffoptimizer
  ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY])
  label = ffmodel.label_tensor
  
  num_samples = 10000
  x_sample_train = np.random.rand(num_samples) 
  x_sample_test = np.random.rand(num_samples) 
  (x_train, y_train), (x_test, y_test) = (x_sample_train,x_sample_train), (x_sample_test,x_sample_test) 

  full_input_np = np.zeros((num_samples, 3, 229, 229), dtype=np.float32)
  
  for i in range(0, num_samples):
    image = x_train[i, :, :, :]
    image = image.transpose(1, 2, 0)
    pil_image = Image.fromarray(image)
    pil_image = pil_image.resize((229,229), Image.NEAREST)
    image = np.array(pil_image, dtype=np.float32)
    image = image.transpose(2, 0, 1)
    full_input_np[i, :, :, :] = image

  full_input_np /= 255
  
  y_train = y_train.astype('int32')
  full_label_np = y_train
  
  dataloader_input = ffmodel.create_data_loader(input, full_input_np)
  dataloader_label = ffmodel.create_data_loader(label, full_label_np)
  
  num_samples = dataloader_input.num_samples
  assert dataloader_input.num_samples == dataloader_label.num_samples

  ffmodel.init_layers()

  epochs = ffconfig.epochs

  ts_start = ffconfig.get_current_time()

  ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs)

  ts_end = ffconfig.get_current_time()
  run_time = 1e-6 * (ts_end - ts_start);
  print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %(epochs, run_time, num_samples * epochs / run_time));
Exemple #4
0
def top_level_task():
  ffconfig = FFConfig()
  ffconfig.parse_args()
  print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %(ffconfig.get_batch_size(), ffconfig.get_workers_per_node(), ffconfig.get_num_nodes()))
  ffmodel = FFModel(ffconfig)

  dims = [ffconfig.get_batch_size(), 784]
  input_tensor = ffmodel.create_tensor(dims, DataType.DT_FLOAT);

  num_samples = 60000

  torch_model = PyTorchModel("mlp.ff")
  
  output_tensors = torch_model.apply(ffmodel, [input_tensor])

  ffoptimizer = SGDOptimizer(ffmodel, 0.01)
  ffmodel.set_sgd_optimizer(ffoptimizer)
  ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY])
  label = ffmodel.get_label_tensor()

  (x_train, y_train), (x_test, y_test) = mnist.load_data()

  print(x_train.shape)
  x_train = x_train.reshape(60000, 784)
  x_train = x_train.astype('float32')
  x_train /= 255
  y_train = y_train.astype('int32')
  y_train = np.reshape(y_train, (len(y_train), 1))

  dims_full_input = [num_samples, 784]
  full_input = ffmodel.create_tensor(dims_full_input, DataType.DT_FLOAT)

  dims_full_label = [num_samples, 1]
  full_label = ffmodel.create_tensor(dims_full_label, DataType.DT_INT32)

  full_input.attach_numpy_array(ffconfig, x_train)
  full_label.attach_numpy_array(ffconfig, y_train)

  dataloader_input = SingleDataLoader(ffmodel, input_tensor, full_input, num_samples, DataType.DT_FLOAT)
  dataloader_label = SingleDataLoader(ffmodel, label, full_label, num_samples, DataType.DT_INT32)

  full_input.detach_numpy_array(ffconfig)
  full_label.detach_numpy_array(ffconfig)

  ffmodel.init_layers()

  epochs = ffconfig.get_epochs()

  ts_start = ffconfig.get_current_time()
  
  ffmodel.train((dataloader_input, dataloader_label), epochs)

  ts_end = ffconfig.get_current_time()
  run_time = 1e-6 * (ts_end - ts_start);
  print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %(epochs, run_time, num_samples * epochs / run_time));
Exemple #5
0
def top_level_task():
  ffconfig = FFConfig()
  ffconfig.parse_args()
  print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %(ffconfig.get_batch_size(), ffconfig.get_workers_per_node(), ffconfig.get_num_nodes()))
  ffmodel = FFModel(ffconfig)

  dims_input = [ffconfig.get_batch_size(), 3, 32, 32]
  input_tensor = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT)
  
  torch_model = PyTorchModel("cnn.ff")
  
  output_tensors = torch_model.apply(ffmodel, [input_tensor, input_tensor])

  t = ffmodel.softmax(output_tensors[0])

  ffoptimizer = SGDOptimizer(ffmodel, 0.01)
  ffmodel.set_sgd_optimizer(ffoptimizer)
  ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY])
  label_tensor = ffmodel.get_label_tensor()

  num_samples = 10000

  (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)

  x_train = x_train.astype('float32')
  x_train /= 255
  full_input_array = x_train

  y_train = y_train.astype('int32')
  full_label_array = y_train

  dataloader_input = ffmodel.create_data_loader(input_tensor, full_input_array)
  dataloader_label = ffmodel.create_data_loader(label_tensor, full_label_array)

  num_samples = dataloader_input.get_num_samples()

  ffmodel.init_layers()

  layers = ffmodel.get_layers()
  for layer in layers:
    print(layers[layer].name)

  layer = ffmodel.get_layer_by_name("relu_1")
  print(layer)

  epochs = ffconfig.get_epochs()

  ts_start = ffconfig.get_current_time()
  
  ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs)

  ts_end = ffconfig.get_current_time()
  run_time = 1e-6 * (ts_end - ts_start);
  print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %(epochs, run_time, num_samples * epochs / run_time));
Exemple #6
0
def top_level_task():
    ffconfig = FFConfig()
    print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %
          (ffconfig.batch_size, ffconfig.workers_per_node, ffconfig.num_nodes))
    ffmodel = FFModel(ffconfig)

    dims = [ffconfig.batch_size, 784]
    input_tensor = ffmodel.create_tensor(dims, DataType.DT_FLOAT)

    num_samples = 60000

    model = MLP()

    ff_torch_model = PyTorchModel(model)
    output_tensors = ff_torch_model.torch_to_ff(ffmodel, [input_tensor])

    ffoptimizer = SGDOptimizer(ffmodel, 0.01)
    ffmodel.optimizer = ffoptimizer
    ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY,
                    metrics=[
                        MetricsType.METRICS_ACCURACY,
                        MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY
                    ])
    label_tensor = ffmodel.label_tensor

    (x_train, y_train), (x_test, y_test) = mnist.load_data()

    print(x_train.shape)
    x_train = x_train.reshape(60000, 784)
    x_train = x_train.astype('float32')
    x_train /= 255
    y_train = y_train.astype('int32')
    y_train = np.reshape(y_train, (len(y_train), 1))

    dataloader_input = ffmodel.create_data_loader(input_tensor, x_train)
    dataloader_label = ffmodel.create_data_loader(label_tensor, y_train)

    ffmodel.init_layers()

    epochs = ffconfig.epochs

    ts_start = ffconfig.get_current_time()

    ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs)

    ts_end = ffconfig.get_current_time()
    run_time = 1e-6 * (ts_end - ts_start)
    print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %
          (epochs, run_time, num_samples * epochs / run_time))
Exemple #7
0
def extract_mt5_subgraph(
    initial_op_name: Optional[str] = None,
    final_op_name: Optional[str] = None,
):
    """
    Extracts the mT5 subgraph starting from ``initial_op_name`` and ending
    with ``final_op_name`` (inclusive) in the topological order. If either
    argument is ``None``, then that side of the limit defaults to the first
    and last operator, respectively.

    NOTE: HuggingFace's symbolic trace only supports tracing a selection of
    classes. As a result, we must extract subgraphs from the full mT5 graph
    in the Python FlexFlow space.

    Returns:
        subgraph (List[Node]): List of the nodes comprising the subgraph.
    """
    mt5_torch = MT5ForConditionalGeneration.from_pretrained(
        PRETRAINED_MODEL_NAME,
    )
    input_ids, _, decoder_input_ids, _ = load_batch_ff()
    BATCH_SIZE = 8
    mt5_model = PyTorchModel(
        mt5_torch,
        is_hf_model=True,
        input_names=["input_ids", "attention_mask", "decoder_input_ids"],
        batch_size=BATCH_SIZE,
        seq_length=(input_ids.shape[1], decoder_input_ids.shape[1]),
    )
    graph = mt5_model._trace_model()
    subgraph = []
    in_subgraph: bool = initial_op_name is None
    for node in graph:
        if initial_op_name is not None and node.name == initial_op_name:
            in_subgraph = True
        if in_subgraph:
            subgraph.append(node)
        if final_op_name is not None and node.name == final_op_name:
            break
    return subgraph
Exemple #8
0
def top_level_task():
    ffconfig = FFConfig()
    ffmodel = FFModel(ffconfig)
    model = MT5ForConditionalGeneration.from_pretrained("google/mt5-small")

    # Load train data as numpy arrays
    print("Loading data...")
    ids = np.load(os.path.join(NUMPY_DIR, "train_source_ids.npy"))
    mask = np.load(os.path.join(NUMPY_DIR, "train_source_mask.npy"))
    y_ids = np.load(os.path.join(NUMPY_DIR, "train_y_ids.npy"))
    lm_labels = np.load(os.path.join(NUMPY_DIR, "train_lm_labels.npy"))

    batch_size = ffconfig.batch_size
    input_ids_shape = (batch_size, ids.shape[1])
    attention_mask_shape = (batch_size, mask.shape[1])
    decoder_input_ids_shape = (batch_size, y_ids.shape[1])
    input_tensors = [
        ffmodel.create_tensor(input_ids_shape, DataType.DT_INT64),  # input_ids
        ffmodel.create_tensor(attention_mask_shape,
                              DataType.DT_INT64),  # attention_mask
        ffmodel.create_tensor(decoder_input_ids_shape,
                              DataType.DT_INT64),  # decoder_input_ids
    ]
    encoder_seq_length = ids.shape[1]
    decoder_seq_length = y_ids.shape[1]
    seq_length = (encoder_seq_length, decoder_seq_length)
    input_names = ["input_ids", "attention_mask", "decoder_input_ids"]

    print("Tracing the model...")
    hf_model = PyTorchModel(
        model,
        is_hf_model=True,
        input_names=input_names,
        batch_size=batch_size,
        seq_length=seq_length,
    )
    output_tensors = hf_model.torch_to_ff(ffmodel, input_tensors, verbose=True)
    ffoptimizer = SGDOptimizer(ffmodel, lr=0.01)

    print("Compiling the model...")
    ffmodel.compile(
        optimizer=ffoptimizer,
        loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY,
        metrics=[
            MetricsType.METRICS_ACCURACY,
            MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY,
        ],
    )

    print("Creating data loaders...")
    input_ids_dl = ffmodel.create_data_loader(input_tensors[0], ids)
    attention_mask_dl = ffmodel.create_data_loader(input_tensors[1], mask)
    decoder_input_ids_dl = ffmodel.create_data_loader(input_tensors[2], y_ids)
    # NOTE: We cast down the label tensor data to 32-bit to accommodate the
    # label tensor's required dtype
    labels_dl = ffmodel.create_data_loader(ffmodel.label_tensor,
                                           lm_labels.astype("int32"))

    print("Initializing model layers...")
    ffmodel.init_layers()

    print("Training...")
    epochs = ffconfig.epochs
    ffmodel.fit(
        x=[input_ids_dl, attention_mask_dl, decoder_input_ids_dl],
        y=labels_dl,
        batch_size=batch_size,
        epochs=epochs,
    )
Exemple #9
0
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    kwargs['width_per_group'] = 64 * 2
    return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3], pretrained,
                   progress, **kwargs)


def wide_resnet101_2(pretrained: bool = False,
                     progress: bool = True,
                     **kwargs: Any) -> ResNet:
    r"""Wide ResNet-101-2 model from
    `"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_.

    The model is the same as ResNet except for the bottleneck number of channels
    which is twice larger in every block. The number of channels in outer 1x1
    convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
    channels, and in Wide ResNet-50-2 has 2048-1024-2048.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    kwargs['width_per_group'] = 64 * 2
    return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3], pretrained,
                   progress, **kwargs)


input = torch.randn(64, 3, 224, 224)
model = resnet18()
ff_torch_model = PyTorchModel(model)
ff_torch_model.torch_to_file("resnet.ff")
        self.linear1 = nn.Linear(512, 512)
        self.linear2 = nn.Linear(512, 10)
        self.relu = nn.ReLU()

    def forward(self, input1, input2):
        y1 = self.conv1(input1)
        y1 = self.relu(y1)
        y2 = self.conv1(input2)
        y2 = self.relu(y2)
        y = torch.cat((y1, y2), 1)
        (y1, y2) = torch.split(y, 1)
        y = torch.cat((y1, y2), 1)
        y = self.conv2(y)
        y = self.relu(y)
        y = self.pool1(y)
        y = self.conv3(y)
        y = self.relu(y)
        y = self.conv4(y)
        y = self.relu(y)
        y = self.pool2(y)
        y = self.flat1(y)
        y = self.linear1(y)
        y = self.relu(y)
        yo = self.linear2(y)
        return (yo, y)


model = CNN()
ff_torch_model = PyTorchModel(model)
ff_torch_model.torch_to_file("cnn.ff")
Exemple #11
0
import torch.nn as nn
import torchvision.models as models
from flexflow.torch.model import PyTorchModel

# model = models.alexnet()

# model = models.vgg16()

# model = models.squeezenet1_0()

# model = models.densenet161()

# model = models.inception_v3()

model = models.googlenet()

# model = models.shufflenet_v2_x1_0()

# model = models.mobilenet_v2()
ff_torch_model = PyTorchModel(model)
ff_torch_model.torch_to_file("googlenet.ff")
Exemple #12
0
import classy_vision.models.regnet as rgn
from flexflow.torch.model import PyTorchModel
import torch.nn as nn

model = rgn.RegNetX32gf()
model = nn.Sequential(model, nn.Flatten(), nn.Linear(2520 * 7 * 7, 1000))
ff_torch_model = PyTorchModel(model)
ff_torch_model.torch_to_file("regnetX32gf.ff")