Beispiel #1
0
def response_audio_with_audio(name: str = Form(...), file: UploadFile = File(...)):
    """
    Receive blob(wav) file, store it to disk
    :param name: Name field in formdata, refers to file name
    :param file: File field in formdata, refers to the blob file
    :return: responses in dict
    """
    # file name of wav file received from network
    filename = f"./data/{name}.wav"

    # output full path of speech synthesis
    wav_output_dir = os.path.join(os.getcwd(), "data")

    # Save the wav file from network
    with open(filename, "wb") as f:
        f.write(file.file.read())

    # Down sampling
    down_sample(filename, 16000)

    # convert wav to text, and get text response
    converted_str = wav_file_to_str(name)
    responses = get_rasa_response(converted_str)

    # for every returned response, if contains text, convert it into base64 encoded audio and add it
    for i, response in enumerate(responses):
        if "text" in response.keys():
            str_to_wav_file(response['text'], wav_output_dir)
            with open(os.path.join(wav_output_dir, "out.wav"), "rb") as f:
                wav_encoded = base64.b64encode(f.read())
                response["audio"] = wav_encoded

    return responses
Beispiel #2
0
	def train(self):
		loss_epoch = 0.
		num_batches = 0
		model.train()
		# Train loop
		for i, data in enumerate(tqdm(dataloader_train), 0):
			optimizer.zero_grad()
			
			# data creation
			tgt = data['data'].to(args.device)
			
			inp = down_sample(tgt)

			# inference 
			pred = model(inp)

			# losses 
			loss = loss_fn(pred, tgt)
			loss.backward()
			loss_epoch += float(loss.item())
			iou = kal.metrics.voxel.iou(pred.contiguous(), tgt)

			# logging
			num_batches += 1
			if i % args.print_every == 0:
				tqdm.write(f'[TRAIN] Epoch {self.cur_epoch:03d}, Batch {i:03d}: Loss: {float(loss.item())}')
				tqdm.write('Metric iou: {0}'.format(iou))
			optimizer.step()
		
		
		loss_epoch = loss_epoch / num_batches
		self.train_loss.append(loss_epoch)
		self.cur_epoch += 1
Beispiel #3
0
	def validate(self):
		model.eval()
		with torch.no_grad():	
			iou_epoch = 0.
			iou_NN_epoch = 0.
			num_batches = 0
			loss_epoch = 0.

			# Validation loop
			for i, data in enumerate(tqdm(dataloader_val), 0):

				# data creation
				tgt_odms = data['odms'].to(args.device)
				tgt_voxels = data['voxels'].to(args.device)
				inp_voxels = down_sample(tgt_voxels)

				inp_odms = []
				for voxel in inp_voxels: 
					inp_odms.append(kal.rep.voxel.extract_odms(voxel).unsqueeze(0)) 
				inp_odms = torch.cat(inp_odms)

				tgt_odms_occ = to_occpumancy_map(tgt_odms)
				
				# inference 
				pred_odms = model(inp_odms)

				# losses 
				loss = loss_fn(pred_odms, tgt_odms_occ)
				loss_epoch += float(loss.item())

				ones = pred_odms > .5
				zeros = pred_odms <= .5
				pred_odms[ones] =  pred_odms.shape[-1]
				pred_odms[zeros] = 0 

				NN_pred = up_sample(inp_voxels)
				iou_NN = kal.metrics.voxel.iou(NN_pred.contiguous(), tgt_voxels)
				iou_NN_epoch += iou_NN
				
				pred_voxels = []
				for odms, voxel_NN in zip(pred_odms, NN_pred): 
					pred_voxels.append(kal.rep.voxel.project_odms(odms, voxel = voxel_NN, votes = 2).unsqueeze(0))
				pred_voxels = torch.cat(pred_voxels)
				iou = kal.metrics.voxel.iou(pred_voxels.contiguous(), tgt_voxels)
				iou_epoch += iou
				

				# logging
				num_batches += 1
				if i % args.print_every == 0:
						out_iou = iou_epoch.item() / float(num_batches)
						out_iou_NN = iou_NN_epoch.item() / float(num_batches)
						tqdm.write(f'[VAL] Epoch {self.cur_epoch:03d}, Batch {i:03d}: IoU: {out_iou}, Iou Base: {out_iou_NN}')
						
			out_iou = iou_epoch.item() / float(num_batches)
			out_iou_NN = iou_NN_epoch.item() / float(num_batches)
			tqdm.write(f'[VAL Total] Epoch {self.cur_epoch:03d}, Batch {i:03d}: IoU: {out_iou}, Iou Base: {out_iou_NN}')

			loss_epoch = loss_epoch / num_batches
			self.val_loss.append(out_iou)
Beispiel #4
0
    def validate(self):
        model.eval()
        with torch.no_grad():
            iou_epoch = 0.
            iou_NN_epoch = 0.
            num_batches = 0
            loss_epoch = 0.

            # Validation loop
            for i, data in enumerate(tqdm(dataloader_val), 0):

                # data creation
                tgt = data['data'].to(args.device)
                inp = down_sample(tgt)

                # inference
                pred = model(inp)

                # losses
                loss = loss_fn(pred, tgt.long())
                loss_epoch += float(loss.item())

                iou = kal.metrics.voxel.iou(pred[:, 1, :, :].contiguous(), tgt)
                iou_epoch += iou

                NN_pred = up_sample(inp)
                iou_NN = kal.metrics.voxel.iou(NN_pred.contiguous(), tgt)
                iou_NN_epoch += iou_NN

                # logging
                num_batches += 1
                if i % args.print_every == 0:
                    out_iou = iou_epoch.item() / float(num_batches)
                    out_iou_NN = iou_NN_epoch.item() / float(num_batches)
                    tqdm.write(
                        f'[VAL] Epoch {self.cur_epoch:03d}, Batch {i:03d}: IoU: {out_iou}, Iou Base: {out_iou_NN}'
                    )

            out_iou = iou_epoch.item() / float(num_batches)
            out_iou_NN = iou_NN_epoch.item() / float(num_batches)
            tqdm.write(
                f'[VAL Total] Epoch {self.cur_epoch:03d}, Batch {i:03d}: IoU: {out_iou}, Iou Base: {out_iou_NN}'
            )

            loss_epoch = loss_epoch / num_batches
            self.val_loss.append(out_iou)
Beispiel #5
0
	def train(self):
		loss_epoch = 0.
		num_batches = 0
		diff = 0 
		model.train()
		# Train loop
		for i, data in enumerate(tqdm(dataloader_train), 0):
			optimizer.zero_grad()
			
			# data creation
			tgt_odms = data['odms'].to(args.device)
			tgt_voxels = data['voxels'].to(args.device)
			inp_voxels = down_sample(tgt_voxels)
			inp_odms = []
			for voxel in inp_voxels: 
				inp_odms.append(kal.rep.voxel.extract_odms(voxel).unsqueeze(0)) 
			inp_odms = torch.cat(inp_odms)
			
			# inference 
			initial_odms = upsample_omd(inp_odms)*2
			distance = 30 - initial_odms
			pred_odms_update = model(inp_odms)
			pred_odms_update = pred_odms_update * distance
			pred_odms = initial_odms + pred_odms_update
			
			pred_odms = initial_odms + pred_odms_update

			# losses 
			loss = loss_fn(pred_odms, tgt_odms)
			loss.backward()
			loss_epoch += float(loss.item())

			# logging
			num_batches += 1
			if i % args.print_every == 0:
				tqdm.write(f'[TRAIN] Epoch {self.cur_epoch:03d}, Batch {i:03d}: Loss: {float(loss.item())}')
				
			optimizer.step()
		
		
		loss_epoch = loss_epoch / num_batches
		self.train_loss.append(loss_epoch)
		self.cur_epoch += 1
Beispiel #6
0
# Model
model = EncoderDecoderForNLL()
model = model.to(args.device)
# Load saved weights
model.load_state_dict(torch.load('log/{0}/best.pth'.format(args.expid)))


iou_epoch = 0.
iou_NN_epoch = 0.
num_batches = 0

model.eval()
with torch.no_grad():
	for data in tqdm(dataloader_val): 
		tgt = data['data'].to(args.device)
		inp = down_sample(tgt)

		# inference 
		pred = model(inp)

		iou = kal.metrics.voxel.iou(pred[:,1,:,:].contiguous(), tgt)
		iou_epoch += iou
		
		NN_pred = up_sample(inp)
		iou_NN = kal.metrics.voxel.iou(NN_pred.contiguous(), tgt)
		iou_NN_epoch += iou_NN

		if args.vis: 
			for i in range(pred.shape[0]):
				print ('Rendering low resolution input')
				kal.visualize.show_voxel(inp[i,0], mode = 'exact', thresh = .5)
Beispiel #7
0
def main():

    new_model = DeepestNetwork((25, 3, 120, 214))

    N = 4

    cwd = Path(os.getcwd())
    par = cwd.parent
    data_path = str(par / "data/DAVIS//JPEGImages/480p/")
    mask_path = str(par / "data/DAVIS/Annotations/480p/")

    tvt_split = (0.5, 0.7)

    X_train_t, X_val_t, X_test_t, y_train_t, y_val_t, y_test_t = generate_dataset_temporal(
        data_path, mask_path, tvt_split, N)

    X_train_t = np.array(X_train_t).swapaxes(-1, -3).swapaxes(-2, -1)
    X_val_t = np.array(X_val_t).swapaxes(-1, -3).swapaxes(-2, -1)
    X_test_t = np.array(X_test_t).swapaxes(-1, -3).swapaxes(-2, -1)
    print(X_train_t.shape)
    print(X_val_t.shape)
    print(X_test_t.shape)
    y_train_t = np.array(y_train_t)
    y_val_t = np.array(y_val_t)
    y_test_t = np.array(y_test_t)
    print(y_train_t.shape)
    print(y_val_t.shape)
    print(y_test_t.shape)

    batch_size = 25
    train_data_t = data_providers.DataProvider(X_train_t,
                                               y_train_t,
                                               batch_size,
                                               shuffle_order=True)
    val_data_t = data_providers.DataProvider(X_val_t,
                                             y_val_t,
                                             batch_size,
                                             shuffle_order=True)
    test_data_t = data_providers.DataProvider(X_test_t,
                                              y_test_t,
                                              batch_size,
                                              shuffle_order=True)

    eb = ExperimentBuilder(new_model, "get_bear", 1, train_data_t, val_data_t,
                           test_data_t, True)

    model_path = Path(os.getcwd())
    model_path = model_path / "static_run_deepest" / "saved_models"

    bear_path = Path(
        os.getcwd()).parent / "data" / "DAVIS" / "JPEGImages" / "480p" / "bear"

    bear = np.asarray(
        Image.open(str(bear_path / "00001.jpg")).convert(mode="RGB"))

    inp = torch.Tensor(
        down_sample(np.asarray(bear),
                    4).swapaxes(0, 2).swapaxes(1, 2)).unsqueeze(0)

    out = eb.get_bear(model_path, inp)
    out = out.squeeze()

    predicted = F.sigmoid(out) > 0.5

    mask = predicted.cpu().numpy().astype('uint8')

    mask = 255 * mask

    mask_img = Image.fromarray(mask, mode='L')

    bear = down_sample(bear, 4)
    bear = Image.fromarray(bear)

    overlay = overlay_segment(bear, mask_img)

    overlay.save("cnnbear.png")
Beispiel #8
0
# Load saved weights
model_res.load_state_dict(torch.load('log/{0}/resbest.pth'.format(args.expid)))
model_occ.load_state_dict(torch.load('log/{0}/occbest.pth'.format(args.expid)))

iou_epoch = 0.
iou_NN_epoch = 0.
num_batches = 0

model_res.eval()
model_occ.eval()
with torch.no_grad():
    for data in tqdm(dataloader_val):

        tgt_odms = data['odms'].to(args.device)
        tgt_voxels = data['voxels'].to(args.device)
        inp_voxels = down_sample(tgt_voxels)
        inp_odms = []
        for voxel in inp_voxels:
            inp_odms.append(kal.rep.voxel.extract_odms(voxel).unsqueeze(0))
        inp_odms = torch.cat(inp_odms)

        # inference res
        initial_odms = upsample_omd(inp_odms) * 2
        distance = 30 - initial_odms
        pred_odms_update = model_res(inp_odms)
        pred_odms_update = pred_odms_update * distance
        pred_odms_res = initial_odms + pred_odms_update

        # inference occ
        pred_odms_occ = model_occ(inp_odms)
Beispiel #9
0
import pyttsx3
from utils import down_sample

# read the path to the parameters file
if len(sys.argv) < 2:
    print("parameters file not passed, default is 'params.txt'")
    params_path = "params.txt"
else:
    params_path = sys.argv[1]

# read the json file
with open(params_path) as file:
    params = json.load(file)

#down-sample the video before detecting helmets
down_sample(params)

# load the COCO class labels this YOLO model was trained on
labelsPath = os.path.sep.join([params["yolo"], "cocohelmet.names"])
LABELS = open(labelsPath).read().strip().split("\n")

# initialize a list of colors to represent each possible class label
np.random.seed(42)
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8")

# derive the paths to the YOLO weights and model configuration
weightsPath = os.path.sep.join([params["yolo"], "yolov3-obj_2400.weights"])
configPath = os.path.sep.join([params["yolo"], "yolov3-obj.cfg"])

# load our YOLO object detector trained on COCO dataset (80 classes)
# and determine only the *output* layer names that we need from YOLO
    return parser


if __name__ == '__main__':
    parser = arg_parser()
    args = parser.parse_args()
    args_dict = vars(args)

    #make output directory
    folder_name = utils.make_output_folder(args_dict['content'],
                                           args_dict['style'],
                                           args_dict['output_folder'])

    #down-sample image
    content, style, height, width = utils.down_sample(args_dict['content'],
                                                      args_dict['style'],
                                                      args_dict['max_pixel'])
    assert content.mode == 'RGB', 'content image not in RGB format'
    assert style.mode == 'RGB', 'style image not in RGB format'

    # input tensor: input image with shape of [batch, height, width, colors=3]
    f_img_reshape = lambda x: np.reshape(np.asarray(x),
                                         newshape=(-1, height, width, 3))
    imgs = {'content': content, 'style': style}
    imgs_reshaped = {key: f_img_reshape(img) for key, img in imgs.items()}
    vgg_input = tf.Variable(initial_value=np.zeros(shape=[1, height, width, 3],
                                                   dtype='float32'),
                            name='image')

    #build model
    conv1_1, conv2_1, conv3_1, conv4_1, conv5_1, conv4_2 = model.build_part_vgg19(