def hySpeechRecognition(self, filename, content):
        resultQueue = Queue.Queue()
        s = Semaphore(0)
        t_api = thread(target=self.request_ifly_api,
                       args=(
                           filename,
                           content,
                           s,
                           resultQueue,
                       ))
        t_sdk = thread(target=self.request_ifly_sdk,
                       args=(
                           filename,
                           content,
                           s,
                           resultQueue,
                       ))
        t_api.setDaemon(True)
        t_sdk.setDaemon(True)
        t_sdk.start()
        t_api.start()
        s.acquire()

        result = resultQueue.get()
        try:
            return result
        except Exception as e:
            print(e)
            s.acquire()
            result = resultQueue.get()
            return result
Exemple #2
0
class Pool(object):

    def __init__(self):
        self.semaphore = Semaphore(config.settings.QUEUE_SIZE)

    def queue_producer(self, producer):
        """Yields items as soon as the semaphore allows."""
        try:
            for item in producer:
                self.semaphore.acquire()
                yield item
        except:
            logger.exception("Error in producer parallel task")

    def queue_consumer(self, consumer):
        """Returns item consumption function that signals the semaphore."""

        def consumer_function(item):
            self.semaphore.release()
            try:
                consumer(item)
            except:
                logger.exception("Error in consumer parallel task")

        return consumer_function

    def parallelize(self, consumer, producer):
        """Implements a queued production of items to paralelize, limits RAM usage.
        imap() uses correctly the generator, is more memory efficient
        imap_unordered() does not wait on each item to be processed

        Args:
            consumer (function): Ingest and process items
            producer (generator): Yields items to be consumed
        """
        logger.info("Starting paralelization")

        self.pool = ThreadPool(config.settings.NUM_CONCURRENT_WORKERS)

        self.pool.imap_unordered(self.queue_consumer(consumer), self.queue_producer(producer))

        self.pool.close()
        self.pool.join()

        logger.info("Finishing paralelization")
Exemple #3
0
def train(layer, logger, args, grad_queue, grad_queue2, targets_queue, e, data_size, trainloader, start_event, start_event2):

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(layer.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
    optimizer.zero_grad()
    layer.train()

    def backward_rank0(semaphore, start_event2):
        start_event2.wait()
        batch_idx = 0
        while True:
            try:
                semaphore.release()
                print("before grad recv")
                grad_recv = torch.zeros([args.batch_size, 256, 4, 4], dtype=torch.int8)
                dist.recv(tensor=grad_recv, src=1)
                print("after grad recv...")
            except RuntimeError as error:
                print("backward runtime error")
                break
            grad_recv = dequantize(grad_recv.cuda(0).float())
            loss = outputs_queue.get(block=False)
            loss.backward(grad_recv)
            if batch_idx % args.buffer_size == 0:
                optimizer.step()
                optimizer.zero_grad()
            batch_idx += 1


    def backward_rank1(semaphore, start_event, start_event2):

        start_event.wait()

        batch_idx = 0
        while True:
            try:
                #semaphore.release()
                print("before grad recv...")
                grad_recv1 = torch.zeros([args.batch_size, 512, 2, 2], dtype=torch.int8)
                dist.recv(tensor=grad_recv1, src=2)
                print("after grad recv.....")
            except RuntimeError as error:
                print("backward runtime error")
                send_opt = dist.isend(tensor=torch.zeros(0), dst=0)
                send_opt.wait()
                break
            grad_recv1 = dequantize(grad_recv1.cuda(0).float())
            inputs, outputs = outputs_queue.get(block=False)
            inputs.requires_grad_()
            outputs.backward(grad_recv1)
            if batch_idx % args.buffer_size == 0:
                optimizer.step()
                optimizer.zero_grad()

            inputs_grad = quantize(inputs.grad, char=True).cpu()
            print(inputs_grad.size())
            if batch_idx == 0:
                start_event2.set()
            #send_opt = dist.isend(tensor=inputs_grad, dst=0)
            #send_opt.wait()
            dist.send(tensor=inputs_grad, dst=0)
            batch_idx += 1


    if dist.get_rank() == 0:
        criterion.cuda(0)
        outputs_queue = ThreadQueue(args.buffer_size)
        semaphore = Semaphore(args.buffer_size)
        back_process = Process(target=backward_rank0, args=(semaphore, start_event2))
        back_process.start()
        for batch_idx, (inputs, targets) in enumerate(trainloader):
            semaphore.acquire()
            print("batch: " + str(batch_idx))
            inputs, targets = inputs.cuda(0), targets
            outputs = layer(inputs)
            targets_queue.put(targets.numpy())
            outputs_queue.put(outputs)
            send_opt = dist.isend(tensor=q_act(outputs, char=True).cpu(), dst=1)
            send_opt.wait()

            print("send....")
        print("start to end..")
        send_opt = dist.isend(tensor=torch.zeros(0), dst=1)
        send_opt.wait()
        back_process.join()
        e.set()
    elif dist.get_rank() == 1:
        batch_idx = 0
        criterion.cuda(0)
        outputs_queue = ThreadQueue(10)
        semaphore = Semaphore(args.buffer_size - 1)
        back_process = Process(target=backward_rank1, args=(semaphore, start_event, start_event2))
        back_process.start()
        while True:
            try:
                print("before semaphore......")
                #semaphore.acquire()
                rec_val = torch.zeros([args.batch_size, 256, 4, 4], dtype=torch.int8)
                dist.recv(tensor=rec_val, src=0)
                print("after recv.....")
            except RuntimeError as error:
                print("runtime errror")
                send_opt = dist.isend(tensor=torch.zeros(0), dst=2)
                send_opt.wait()
                back_process.join()
                e.wait()
                break
            print("before dq...")
            rec_val = dq_act(rec_val)
            rec_val = rec_val.cuda(0)
            rec_val.requires_grad_()
            print("before output......")
            outputs = layer(rec_val)
            # if batch_idx % args.buffer_size == 0:
            #     optimizer.step()
            #     optimizer.zero_grad()
            print("before queue")
            outputs_queue.put([rec_val, outputs])
            print("after queue")
            #send_opt = dist.isend(tensor=q_act(outputs, char=True).cpu(), dst=2)
            #send_opt.wait()
            dist.send(tensor=q_act(outputs, char=True).cpu(), dst=2)
            batch_idx += 1
            print("send end...")

    elif dist.get_rank() == 2:
        batch_idx = 0
        train_loss = 0
        correct = 0
        total = 0
        criterion.cuda(0)

        while True:
            try:
                #print("before recv....")
                rec_val = torch.zeros([args.batch_size, 512, 2, 2], dtype=torch.int8)
                dist.recv(tensor=rec_val, src=1)
                #print("after recv.....")
            except RuntimeError as error:
                #traceback.format_exc(error)
                send_opt = dist.isend(tensor=torch.zeros(0), dst=1)
                send_opt.wait()
                e.wait()
                break
            rec_val = dq_act(rec_val)
            rec_val = rec_val.cuda(0)
            rec_val.requires_grad_()
            outputs = layer(rec_val)
            targets = targets_queue.get(block=True, timeout=2)
            targets = torch.from_numpy(targets).cuda(0)
            loss = criterion(outputs, targets)
            loss.backward()

            if batch_idx % args.buffer_size == 0:
                optimizer.step()
                train_loss += loss.item()
                _, predicted = outputs.max(1)
                total += targets.size(0)
                correct += predicted.eq(targets).sum().item()
                progress_bar(batch_idx, data_size, 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
                             % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total))
                optimizer.zero_grad()
            else:
                progress_bar(batch_idx, data_size, 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
                             % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total))
            #if batch_idx % 10 == 0:
            logger.error("train:" + str(train_loss / (batch_idx + 1)))
            acc_str = "tacc: %.3f" % (100. * correct / total,)
            logger.error(acc_str)
            if batch_idx == 0:
                start_event.set()
            quantize_grad = quantize(rec_val.grad, char=True)
            #send_opt = dist.isend(tensor=quantize_grad.cpu(), dst=1)
            #send_opt.wait()
            dist.send(tensor=quantize_grad.cpu(), dst=1)
            batch_idx += 1