def top_level_task(): ffconfig = FFConfig() ffconfig.parse_args() print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" % (ffconfig.get_batch_size(), ffconfig.get_workers_per_node(), ffconfig.get_num_nodes())) ffmodel = FFModel(ffconfig) dims_input = [ffconfig.get_batch_size(), 3, 229, 229] input_tensor = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT) torch_model = PyTorchModel("alexnet.ff") output_tensors = torch_model.apply(ffmodel, [input_tensor]) ffoptimizer = SGDOptimizer(ffmodel, 0.01) ffmodel.set_sgd_optimizer(ffoptimizer) ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[ MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY ]) label_tensor = ffmodel.get_label_tensor() num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) full_input_np = np.zeros((num_samples, 3, 229, 229), dtype=np.float32) for i in range(0, num_samples): image = x_train[i, :, :, :] image = image.transpose(1, 2, 0) pil_image = Image.fromarray(image) pil_image = pil_image.resize((229, 229), Image.NEAREST) image = np.array(pil_image, dtype=np.float32) image = image.transpose(2, 0, 1) full_input_np[i, :, :, :] = image full_input_np /= 255 y_train = y_train.astype('int32') full_label_np = y_train dataloader_input = ffmodel.create_data_loader(input_tensor, full_input_np) dataloader_label = ffmodel.create_data_loader(label_tensor, full_label_np) num_samples = dataloader_input.get_num_samples() ffmodel.init_layers() epochs = ffconfig.get_epochs() ts_start = ffconfig.get_current_time() ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs) ts_end = ffconfig.get_current_time() run_time = 1e-6 * (ts_end - ts_start) print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" % (epochs, run_time, num_samples * epochs / run_time))
def init_ff_mt5(): """ Initializes the FlexFlow representation of the HuggingFace mT5 model. Returns: (ffmodel, input_dls, label_dl) ffmodel (FFModel): Compiled and initialized FlexFlow model representing HuggingFace mT5. input_dls (List[SingleDataLoader]): List consisting of the encoder input IDs, encoder attention mask, and decoder input IDs dataloaders. label_dl (SingleDataLoader): Label dataloader. """ ffconfig = FFConfig() ffmodel = FFModel(ffconfig) mt5_torch = MT5ForConditionalGeneration.from_pretrained( PRETRAINED_MODEL_NAME, ) input_ids, attention_mask, decoder_input_ids, labels = load_batch_ff() input_tensors = [ ffmodel.create_tensor(input_ids.shape, DataType.DT_INT64), ffmodel.create_tensor(attention_mask.shape, DataType.DT_INT64), ffmodel.create_tensor(decoder_input_ids.shape, DataType.DT_INT64), ] mt5_model = PyTorchModel( mt5_torch, is_hf_model=True, input_names=["input_ids", "attention_mask", "decoder_input_ids"], batch_size=ffconfig.batch_size, seq_length=(input_ids.shape[1], decoder_input_ids.shape[1]), ) output_tensors = mt5_model.torch_to_ff(ffmodel, input_tensors) ffoptimizer = SGDOptimizer(ffmodel, lr=0.01) ffmodel.compile( optimizer=ffoptimizer, loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[ MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY, ], ) input_ids_dl = ffmodel.create_data_loader(input_tensors[0], input_ids) attention_mask_dl = ffmodel.create_data_loader( input_tensors[1], attention_mask, ) decoder_input_ids_dl = ffmodel.create_data_loader( input_tensors[2], decoder_input_ids, ) # NOTE: We cast down the label tensor data to 32-bit to accomomodate the # label tensor's bitwidth requirement label_dl = ffmodel.create_data_loader( ffmodel.label_tensor, labels.astype("int32"), ) input_dls = [input_ids_dl, attention_mask_dl, decoder_input_ids_dl] ffmodel.init_layers() return (ffmodel, input_dls, label_dl)
def top_level_task(): ffconfig = FFConfig() alexnetconfig = NetConfig() print(alexnetconfig.dataset_path) print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %(ffconfig.batch_size, ffconfig.workers_per_node, ffconfig.num_nodes)) ffmodel = FFModel(ffconfig) dims_input = [ffconfig.batch_size, 1] input = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT) torch_model = PyTorchModel("customParam.ff") output_tensors = torch_model.apply(ffmodel, [input]) t = ffmodel.softmax(output_tensors[0]) ffoptimizer = SGDOptimizer(ffmodel, 0.01) ffmodel.optimizer = ffoptimizer ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY]) label = ffmodel.label_tensor num_samples = 10000 x_sample_train = np.random.rand(num_samples) x_sample_test = np.random.rand(num_samples) (x_train, y_train), (x_test, y_test) = (x_sample_train,x_sample_train), (x_sample_test,x_sample_test) full_input_np = np.zeros((num_samples, 3, 229, 229), dtype=np.float32) for i in range(0, num_samples): image = x_train[i, :, :, :] image = image.transpose(1, 2, 0) pil_image = Image.fromarray(image) pil_image = pil_image.resize((229,229), Image.NEAREST) image = np.array(pil_image, dtype=np.float32) image = image.transpose(2, 0, 1) full_input_np[i, :, :, :] = image full_input_np /= 255 y_train = y_train.astype('int32') full_label_np = y_train dataloader_input = ffmodel.create_data_loader(input, full_input_np) dataloader_label = ffmodel.create_data_loader(label, full_label_np) num_samples = dataloader_input.num_samples assert dataloader_input.num_samples == dataloader_label.num_samples ffmodel.init_layers() epochs = ffconfig.epochs ts_start = ffconfig.get_current_time() ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs) ts_end = ffconfig.get_current_time() run_time = 1e-6 * (ts_end - ts_start); print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %(epochs, run_time, num_samples * epochs / run_time));
def top_level_task(): ffconfig = FFConfig() ffconfig.parse_args() print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %(ffconfig.get_batch_size(), ffconfig.get_workers_per_node(), ffconfig.get_num_nodes())) ffmodel = FFModel(ffconfig) dims = [ffconfig.get_batch_size(), 784] input_tensor = ffmodel.create_tensor(dims, DataType.DT_FLOAT); num_samples = 60000 torch_model = PyTorchModel("mlp.ff") output_tensors = torch_model.apply(ffmodel, [input_tensor]) ffoptimizer = SGDOptimizer(ffmodel, 0.01) ffmodel.set_sgd_optimizer(ffoptimizer) ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY]) label = ffmodel.get_label_tensor() (x_train, y_train), (x_test, y_test) = mnist.load_data() print(x_train.shape) x_train = x_train.reshape(60000, 784) x_train = x_train.astype('float32') x_train /= 255 y_train = y_train.astype('int32') y_train = np.reshape(y_train, (len(y_train), 1)) dims_full_input = [num_samples, 784] full_input = ffmodel.create_tensor(dims_full_input, DataType.DT_FLOAT) dims_full_label = [num_samples, 1] full_label = ffmodel.create_tensor(dims_full_label, DataType.DT_INT32) full_input.attach_numpy_array(ffconfig, x_train) full_label.attach_numpy_array(ffconfig, y_train) dataloader_input = SingleDataLoader(ffmodel, input_tensor, full_input, num_samples, DataType.DT_FLOAT) dataloader_label = SingleDataLoader(ffmodel, label, full_label, num_samples, DataType.DT_INT32) full_input.detach_numpy_array(ffconfig) full_label.detach_numpy_array(ffconfig) ffmodel.init_layers() epochs = ffconfig.get_epochs() ts_start = ffconfig.get_current_time() ffmodel.train((dataloader_input, dataloader_label), epochs) ts_end = ffconfig.get_current_time() run_time = 1e-6 * (ts_end - ts_start); print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %(epochs, run_time, num_samples * epochs / run_time));
def top_level_task(): ffconfig = FFConfig() ffconfig.parse_args() print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %(ffconfig.get_batch_size(), ffconfig.get_workers_per_node(), ffconfig.get_num_nodes())) ffmodel = FFModel(ffconfig) dims_input = [ffconfig.get_batch_size(), 3, 32, 32] input_tensor = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT) torch_model = PyTorchModel("cnn.ff") output_tensors = torch_model.apply(ffmodel, [input_tensor, input_tensor]) t = ffmodel.softmax(output_tensors[0]) ffoptimizer = SGDOptimizer(ffmodel, 0.01) ffmodel.set_sgd_optimizer(ffoptimizer) ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY]) label_tensor = ffmodel.get_label_tensor() num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) x_train = x_train.astype('float32') x_train /= 255 full_input_array = x_train y_train = y_train.astype('int32') full_label_array = y_train dataloader_input = ffmodel.create_data_loader(input_tensor, full_input_array) dataloader_label = ffmodel.create_data_loader(label_tensor, full_label_array) num_samples = dataloader_input.get_num_samples() ffmodel.init_layers() layers = ffmodel.get_layers() for layer in layers: print(layers[layer].name) layer = ffmodel.get_layer_by_name("relu_1") print(layer) epochs = ffconfig.get_epochs() ts_start = ffconfig.get_current_time() ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs) ts_end = ffconfig.get_current_time() run_time = 1e-6 * (ts_end - ts_start); print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %(epochs, run_time, num_samples * epochs / run_time));
def top_level_task(): ffconfig = FFConfig() print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" % (ffconfig.batch_size, ffconfig.workers_per_node, ffconfig.num_nodes)) ffmodel = FFModel(ffconfig) dims = [ffconfig.batch_size, 784] input_tensor = ffmodel.create_tensor(dims, DataType.DT_FLOAT) num_samples = 60000 model = MLP() ff_torch_model = PyTorchModel(model) output_tensors = ff_torch_model.torch_to_ff(ffmodel, [input_tensor]) ffoptimizer = SGDOptimizer(ffmodel, 0.01) ffmodel.optimizer = ffoptimizer ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[ MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY ]) label_tensor = ffmodel.label_tensor (x_train, y_train), (x_test, y_test) = mnist.load_data() print(x_train.shape) x_train = x_train.reshape(60000, 784) x_train = x_train.astype('float32') x_train /= 255 y_train = y_train.astype('int32') y_train = np.reshape(y_train, (len(y_train), 1)) dataloader_input = ffmodel.create_data_loader(input_tensor, x_train) dataloader_label = ffmodel.create_data_loader(label_tensor, y_train) ffmodel.init_layers() epochs = ffconfig.epochs ts_start = ffconfig.get_current_time() ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs) ts_end = ffconfig.get_current_time() run_time = 1e-6 * (ts_end - ts_start) print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" % (epochs, run_time, num_samples * epochs / run_time))
def extract_mt5_subgraph( initial_op_name: Optional[str] = None, final_op_name: Optional[str] = None, ): """ Extracts the mT5 subgraph starting from ``initial_op_name`` and ending with ``final_op_name`` (inclusive) in the topological order. If either argument is ``None``, then that side of the limit defaults to the first and last operator, respectively. NOTE: HuggingFace's symbolic trace only supports tracing a selection of classes. As a result, we must extract subgraphs from the full mT5 graph in the Python FlexFlow space. Returns: subgraph (List[Node]): List of the nodes comprising the subgraph. """ mt5_torch = MT5ForConditionalGeneration.from_pretrained( PRETRAINED_MODEL_NAME, ) input_ids, _, decoder_input_ids, _ = load_batch_ff() BATCH_SIZE = 8 mt5_model = PyTorchModel( mt5_torch, is_hf_model=True, input_names=["input_ids", "attention_mask", "decoder_input_ids"], batch_size=BATCH_SIZE, seq_length=(input_ids.shape[1], decoder_input_ids.shape[1]), ) graph = mt5_model._trace_model() subgraph = [] in_subgraph: bool = initial_op_name is None for node in graph: if initial_op_name is not None and node.name == initial_op_name: in_subgraph = True if in_subgraph: subgraph.append(node) if final_op_name is not None and node.name == final_op_name: break return subgraph
def top_level_task(): ffconfig = FFConfig() ffmodel = FFModel(ffconfig) model = MT5ForConditionalGeneration.from_pretrained("google/mt5-small") # Load train data as numpy arrays print("Loading data...") ids = np.load(os.path.join(NUMPY_DIR, "train_source_ids.npy")) mask = np.load(os.path.join(NUMPY_DIR, "train_source_mask.npy")) y_ids = np.load(os.path.join(NUMPY_DIR, "train_y_ids.npy")) lm_labels = np.load(os.path.join(NUMPY_DIR, "train_lm_labels.npy")) batch_size = ffconfig.batch_size input_ids_shape = (batch_size, ids.shape[1]) attention_mask_shape = (batch_size, mask.shape[1]) decoder_input_ids_shape = (batch_size, y_ids.shape[1]) input_tensors = [ ffmodel.create_tensor(input_ids_shape, DataType.DT_INT64), # input_ids ffmodel.create_tensor(attention_mask_shape, DataType.DT_INT64), # attention_mask ffmodel.create_tensor(decoder_input_ids_shape, DataType.DT_INT64), # decoder_input_ids ] encoder_seq_length = ids.shape[1] decoder_seq_length = y_ids.shape[1] seq_length = (encoder_seq_length, decoder_seq_length) input_names = ["input_ids", "attention_mask", "decoder_input_ids"] print("Tracing the model...") hf_model = PyTorchModel( model, is_hf_model=True, input_names=input_names, batch_size=batch_size, seq_length=seq_length, ) output_tensors = hf_model.torch_to_ff(ffmodel, input_tensors, verbose=True) ffoptimizer = SGDOptimizer(ffmodel, lr=0.01) print("Compiling the model...") ffmodel.compile( optimizer=ffoptimizer, loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[ MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY, ], ) print("Creating data loaders...") input_ids_dl = ffmodel.create_data_loader(input_tensors[0], ids) attention_mask_dl = ffmodel.create_data_loader(input_tensors[1], mask) decoder_input_ids_dl = ffmodel.create_data_loader(input_tensors[2], y_ids) # NOTE: We cast down the label tensor data to 32-bit to accommodate the # label tensor's required dtype labels_dl = ffmodel.create_data_loader(ffmodel.label_tensor, lm_labels.astype("int32")) print("Initializing model layers...") ffmodel.init_layers() print("Training...") epochs = ffconfig.epochs ffmodel.fit( x=[input_ids_dl, attention_mask_dl, decoder_input_ids_dl], y=labels_dl, batch_size=batch_size, epochs=epochs, )
progress (bool): If True, displays a progress bar of the download to stderr """ kwargs['width_per_group'] = 64 * 2 return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3], pretrained, progress, **kwargs) def wide_resnet101_2(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet: r"""Wide ResNet-101-2 model from `"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_. The model is the same as ResNet except for the bottleneck number of channels which is twice larger in every block. The number of channels in outer 1x1 convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048 channels, and in Wide ResNet-50-2 has 2048-1024-2048. Args: pretrained (bool): If True, returns a model pre-trained on ImageNet progress (bool): If True, displays a progress bar of the download to stderr """ kwargs['width_per_group'] = 64 * 2 return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3], pretrained, progress, **kwargs) input = torch.randn(64, 3, 224, 224) model = resnet18() ff_torch_model = PyTorchModel(model) ff_torch_model.torch_to_file("resnet.ff")
self.linear1 = nn.Linear(512, 512) self.linear2 = nn.Linear(512, 10) self.relu = nn.ReLU() def forward(self, input1, input2): y1 = self.conv1(input1) y1 = self.relu(y1) y2 = self.conv1(input2) y2 = self.relu(y2) y = torch.cat((y1, y2), 1) (y1, y2) = torch.split(y, 1) y = torch.cat((y1, y2), 1) y = self.conv2(y) y = self.relu(y) y = self.pool1(y) y = self.conv3(y) y = self.relu(y) y = self.conv4(y) y = self.relu(y) y = self.pool2(y) y = self.flat1(y) y = self.linear1(y) y = self.relu(y) yo = self.linear2(y) return (yo, y) model = CNN() ff_torch_model = PyTorchModel(model) ff_torch_model.torch_to_file("cnn.ff")
import torch.nn as nn import torchvision.models as models from flexflow.torch.model import PyTorchModel # model = models.alexnet() # model = models.vgg16() # model = models.squeezenet1_0() # model = models.densenet161() # model = models.inception_v3() model = models.googlenet() # model = models.shufflenet_v2_x1_0() # model = models.mobilenet_v2() ff_torch_model = PyTorchModel(model) ff_torch_model.torch_to_file("googlenet.ff")
import classy_vision.models.regnet as rgn from flexflow.torch.model import PyTorchModel import torch.nn as nn model = rgn.RegNetX32gf() model = nn.Sequential(model, nn.Flatten(), nn.Linear(2520 * 7 * 7, 1000)) ff_torch_model = PyTorchModel(model) ff_torch_model.torch_to_file("regnetX32gf.ff")