def response_audio_with_audio(name: str = Form(...), file: UploadFile = File(...)): """ Receive blob(wav) file, store it to disk :param name: Name field in formdata, refers to file name :param file: File field in formdata, refers to the blob file :return: responses in dict """ # file name of wav file received from network filename = f"./data/{name}.wav" # output full path of speech synthesis wav_output_dir = os.path.join(os.getcwd(), "data") # Save the wav file from network with open(filename, "wb") as f: f.write(file.file.read()) # Down sampling down_sample(filename, 16000) # convert wav to text, and get text response converted_str = wav_file_to_str(name) responses = get_rasa_response(converted_str) # for every returned response, if contains text, convert it into base64 encoded audio and add it for i, response in enumerate(responses): if "text" in response.keys(): str_to_wav_file(response['text'], wav_output_dir) with open(os.path.join(wav_output_dir, "out.wav"), "rb") as f: wav_encoded = base64.b64encode(f.read()) response["audio"] = wav_encoded return responses
def train(self): loss_epoch = 0. num_batches = 0 model.train() # Train loop for i, data in enumerate(tqdm(dataloader_train), 0): optimizer.zero_grad() # data creation tgt = data['data'].to(args.device) inp = down_sample(tgt) # inference pred = model(inp) # losses loss = loss_fn(pred, tgt) loss.backward() loss_epoch += float(loss.item()) iou = kal.metrics.voxel.iou(pred.contiguous(), tgt) # logging num_batches += 1 if i % args.print_every == 0: tqdm.write(f'[TRAIN] Epoch {self.cur_epoch:03d}, Batch {i:03d}: Loss: {float(loss.item())}') tqdm.write('Metric iou: {0}'.format(iou)) optimizer.step() loss_epoch = loss_epoch / num_batches self.train_loss.append(loss_epoch) self.cur_epoch += 1
def validate(self): model.eval() with torch.no_grad(): iou_epoch = 0. iou_NN_epoch = 0. num_batches = 0 loss_epoch = 0. # Validation loop for i, data in enumerate(tqdm(dataloader_val), 0): # data creation tgt_odms = data['odms'].to(args.device) tgt_voxels = data['voxels'].to(args.device) inp_voxels = down_sample(tgt_voxels) inp_odms = [] for voxel in inp_voxels: inp_odms.append(kal.rep.voxel.extract_odms(voxel).unsqueeze(0)) inp_odms = torch.cat(inp_odms) tgt_odms_occ = to_occpumancy_map(tgt_odms) # inference pred_odms = model(inp_odms) # losses loss = loss_fn(pred_odms, tgt_odms_occ) loss_epoch += float(loss.item()) ones = pred_odms > .5 zeros = pred_odms <= .5 pred_odms[ones] = pred_odms.shape[-1] pred_odms[zeros] = 0 NN_pred = up_sample(inp_voxels) iou_NN = kal.metrics.voxel.iou(NN_pred.contiguous(), tgt_voxels) iou_NN_epoch += iou_NN pred_voxels = [] for odms, voxel_NN in zip(pred_odms, NN_pred): pred_voxels.append(kal.rep.voxel.project_odms(odms, voxel = voxel_NN, votes = 2).unsqueeze(0)) pred_voxels = torch.cat(pred_voxels) iou = kal.metrics.voxel.iou(pred_voxels.contiguous(), tgt_voxels) iou_epoch += iou # logging num_batches += 1 if i % args.print_every == 0: out_iou = iou_epoch.item() / float(num_batches) out_iou_NN = iou_NN_epoch.item() / float(num_batches) tqdm.write(f'[VAL] Epoch {self.cur_epoch:03d}, Batch {i:03d}: IoU: {out_iou}, Iou Base: {out_iou_NN}') out_iou = iou_epoch.item() / float(num_batches) out_iou_NN = iou_NN_epoch.item() / float(num_batches) tqdm.write(f'[VAL Total] Epoch {self.cur_epoch:03d}, Batch {i:03d}: IoU: {out_iou}, Iou Base: {out_iou_NN}') loss_epoch = loss_epoch / num_batches self.val_loss.append(out_iou)
def validate(self): model.eval() with torch.no_grad(): iou_epoch = 0. iou_NN_epoch = 0. num_batches = 0 loss_epoch = 0. # Validation loop for i, data in enumerate(tqdm(dataloader_val), 0): # data creation tgt = data['data'].to(args.device) inp = down_sample(tgt) # inference pred = model(inp) # losses loss = loss_fn(pred, tgt.long()) loss_epoch += float(loss.item()) iou = kal.metrics.voxel.iou(pred[:, 1, :, :].contiguous(), tgt) iou_epoch += iou NN_pred = up_sample(inp) iou_NN = kal.metrics.voxel.iou(NN_pred.contiguous(), tgt) iou_NN_epoch += iou_NN # logging num_batches += 1 if i % args.print_every == 0: out_iou = iou_epoch.item() / float(num_batches) out_iou_NN = iou_NN_epoch.item() / float(num_batches) tqdm.write( f'[VAL] Epoch {self.cur_epoch:03d}, Batch {i:03d}: IoU: {out_iou}, Iou Base: {out_iou_NN}' ) out_iou = iou_epoch.item() / float(num_batches) out_iou_NN = iou_NN_epoch.item() / float(num_batches) tqdm.write( f'[VAL Total] Epoch {self.cur_epoch:03d}, Batch {i:03d}: IoU: {out_iou}, Iou Base: {out_iou_NN}' ) loss_epoch = loss_epoch / num_batches self.val_loss.append(out_iou)
def train(self): loss_epoch = 0. num_batches = 0 diff = 0 model.train() # Train loop for i, data in enumerate(tqdm(dataloader_train), 0): optimizer.zero_grad() # data creation tgt_odms = data['odms'].to(args.device) tgt_voxels = data['voxels'].to(args.device) inp_voxels = down_sample(tgt_voxels) inp_odms = [] for voxel in inp_voxels: inp_odms.append(kal.rep.voxel.extract_odms(voxel).unsqueeze(0)) inp_odms = torch.cat(inp_odms) # inference initial_odms = upsample_omd(inp_odms)*2 distance = 30 - initial_odms pred_odms_update = model(inp_odms) pred_odms_update = pred_odms_update * distance pred_odms = initial_odms + pred_odms_update pred_odms = initial_odms + pred_odms_update # losses loss = loss_fn(pred_odms, tgt_odms) loss.backward() loss_epoch += float(loss.item()) # logging num_batches += 1 if i % args.print_every == 0: tqdm.write(f'[TRAIN] Epoch {self.cur_epoch:03d}, Batch {i:03d}: Loss: {float(loss.item())}') optimizer.step() loss_epoch = loss_epoch / num_batches self.train_loss.append(loss_epoch) self.cur_epoch += 1
# Model model = EncoderDecoderForNLL() model = model.to(args.device) # Load saved weights model.load_state_dict(torch.load('log/{0}/best.pth'.format(args.expid))) iou_epoch = 0. iou_NN_epoch = 0. num_batches = 0 model.eval() with torch.no_grad(): for data in tqdm(dataloader_val): tgt = data['data'].to(args.device) inp = down_sample(tgt) # inference pred = model(inp) iou = kal.metrics.voxel.iou(pred[:,1,:,:].contiguous(), tgt) iou_epoch += iou NN_pred = up_sample(inp) iou_NN = kal.metrics.voxel.iou(NN_pred.contiguous(), tgt) iou_NN_epoch += iou_NN if args.vis: for i in range(pred.shape[0]): print ('Rendering low resolution input') kal.visualize.show_voxel(inp[i,0], mode = 'exact', thresh = .5)
def main(): new_model = DeepestNetwork((25, 3, 120, 214)) N = 4 cwd = Path(os.getcwd()) par = cwd.parent data_path = str(par / "data/DAVIS//JPEGImages/480p/") mask_path = str(par / "data/DAVIS/Annotations/480p/") tvt_split = (0.5, 0.7) X_train_t, X_val_t, X_test_t, y_train_t, y_val_t, y_test_t = generate_dataset_temporal( data_path, mask_path, tvt_split, N) X_train_t = np.array(X_train_t).swapaxes(-1, -3).swapaxes(-2, -1) X_val_t = np.array(X_val_t).swapaxes(-1, -3).swapaxes(-2, -1) X_test_t = np.array(X_test_t).swapaxes(-1, -3).swapaxes(-2, -1) print(X_train_t.shape) print(X_val_t.shape) print(X_test_t.shape) y_train_t = np.array(y_train_t) y_val_t = np.array(y_val_t) y_test_t = np.array(y_test_t) print(y_train_t.shape) print(y_val_t.shape) print(y_test_t.shape) batch_size = 25 train_data_t = data_providers.DataProvider(X_train_t, y_train_t, batch_size, shuffle_order=True) val_data_t = data_providers.DataProvider(X_val_t, y_val_t, batch_size, shuffle_order=True) test_data_t = data_providers.DataProvider(X_test_t, y_test_t, batch_size, shuffle_order=True) eb = ExperimentBuilder(new_model, "get_bear", 1, train_data_t, val_data_t, test_data_t, True) model_path = Path(os.getcwd()) model_path = model_path / "static_run_deepest" / "saved_models" bear_path = Path( os.getcwd()).parent / "data" / "DAVIS" / "JPEGImages" / "480p" / "bear" bear = np.asarray( Image.open(str(bear_path / "00001.jpg")).convert(mode="RGB")) inp = torch.Tensor( down_sample(np.asarray(bear), 4).swapaxes(0, 2).swapaxes(1, 2)).unsqueeze(0) out = eb.get_bear(model_path, inp) out = out.squeeze() predicted = F.sigmoid(out) > 0.5 mask = predicted.cpu().numpy().astype('uint8') mask = 255 * mask mask_img = Image.fromarray(mask, mode='L') bear = down_sample(bear, 4) bear = Image.fromarray(bear) overlay = overlay_segment(bear, mask_img) overlay.save("cnnbear.png")
# Load saved weights model_res.load_state_dict(torch.load('log/{0}/resbest.pth'.format(args.expid))) model_occ.load_state_dict(torch.load('log/{0}/occbest.pth'.format(args.expid))) iou_epoch = 0. iou_NN_epoch = 0. num_batches = 0 model_res.eval() model_occ.eval() with torch.no_grad(): for data in tqdm(dataloader_val): tgt_odms = data['odms'].to(args.device) tgt_voxels = data['voxels'].to(args.device) inp_voxels = down_sample(tgt_voxels) inp_odms = [] for voxel in inp_voxels: inp_odms.append(kal.rep.voxel.extract_odms(voxel).unsqueeze(0)) inp_odms = torch.cat(inp_odms) # inference res initial_odms = upsample_omd(inp_odms) * 2 distance = 30 - initial_odms pred_odms_update = model_res(inp_odms) pred_odms_update = pred_odms_update * distance pred_odms_res = initial_odms + pred_odms_update # inference occ pred_odms_occ = model_occ(inp_odms)
import pyttsx3 from utils import down_sample # read the path to the parameters file if len(sys.argv) < 2: print("parameters file not passed, default is 'params.txt'") params_path = "params.txt" else: params_path = sys.argv[1] # read the json file with open(params_path) as file: params = json.load(file) #down-sample the video before detecting helmets down_sample(params) # load the COCO class labels this YOLO model was trained on labelsPath = os.path.sep.join([params["yolo"], "cocohelmet.names"]) LABELS = open(labelsPath).read().strip().split("\n") # initialize a list of colors to represent each possible class label np.random.seed(42) COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8") # derive the paths to the YOLO weights and model configuration weightsPath = os.path.sep.join([params["yolo"], "yolov3-obj_2400.weights"]) configPath = os.path.sep.join([params["yolo"], "yolov3-obj.cfg"]) # load our YOLO object detector trained on COCO dataset (80 classes) # and determine only the *output* layer names that we need from YOLO
return parser if __name__ == '__main__': parser = arg_parser() args = parser.parse_args() args_dict = vars(args) #make output directory folder_name = utils.make_output_folder(args_dict['content'], args_dict['style'], args_dict['output_folder']) #down-sample image content, style, height, width = utils.down_sample(args_dict['content'], args_dict['style'], args_dict['max_pixel']) assert content.mode == 'RGB', 'content image not in RGB format' assert style.mode == 'RGB', 'style image not in RGB format' # input tensor: input image with shape of [batch, height, width, colors=3] f_img_reshape = lambda x: np.reshape(np.asarray(x), newshape=(-1, height, width, 3)) imgs = {'content': content, 'style': style} imgs_reshaped = {key: f_img_reshape(img) for key, img in imgs.items()} vgg_input = tf.Variable(initial_value=np.zeros(shape=[1, height, width, 3], dtype='float32'), name='image') #build model conv1_1, conv2_1, conv3_1, conv4_1, conv5_1, conv4_2 = model.build_part_vgg19(