Esempio n. 1
0
def test_set_then_get_sample_rate_hertz():
    from model.configuration import Configuration
    config = Configuration()
    expected = 16000
    config.set_sample_rate_hertz(expected)
    result = config.get_sample_rate_hertz()
    assert result == expected
Esempio n. 2
0
def test_set_then_get_model():
    from model.configuration import Configuration
    config = Configuration()
    expected = 'command_and_search'
    config.set_model(expected)
    result = config.get_model()
    assert result == expected
Esempio n. 3
0
def test_set_then_get_enable_separate_recognition_per_channel():
    from model.configuration import Configuration
    config = Configuration()
    expected = True
    config.set_enable_separate_recognition_per_channel(expected)
    result = config.get_enable_separate_recognition_per_channel()
    assert result == expected
Esempio n. 4
0
def test_set_then_get_language_code():
    from model.configuration import Configuration
    config = Configuration()
    expected = 'fr-FR'
    config.set_language_code(expected)
    result = config.get_language_code()
    assert result == expected
Esempio n. 5
0
def test_set_get_alternative_language_codes():
    from model.configuration import Configuration
    config = Configuration()
    expected = ['gu-IN', 'ru-RU']
    config.set_alternative_language_codes(expected)
    result = config.get_alternative_language_codes()
    assert result == expected
Esempio n. 6
0
def test_set_get_encoding():
    from model.configuration import Configuration
    config = Configuration()
    expected = 'FLAC'
    config.set_encoding(expected)
    result = config.get_encoding()
    assert result == expected
Esempio n. 7
0
def test_set_get_speech_context():
    from model.configuration import Configuration
    config = Configuration()
    expected_boost = 3
    expected_phrases = ['foo', 'bar', 'baz']
    config.set_speech_context(expected_phrases, expected_boost)
    result = config.get_speech_context()
    assert result[0]['boost'] == expected_boost
    assert result[0]['phrases'] == expected_phrases
    def test_get_other_configuration_value(self):
        extended_configuration_map = copy.deepcopy(
            ConfigurationTests.BASE_VALID_CONFIGURATION_MAP)
        extended_configuration_map.update(
            {ConfigurationTests.OTHER_KEY: ConfigurationTests.OTHER_VALUE})
        extended_configuration_json = json.dumps(extended_configuration_map)
        configuration = Configuration(extended_configuration_json)

        self.assertEqual(
            ConfigurationTests.OTHER_VALUE,
            configuration.get_configuration_value(
                ConfigurationTests.OTHER_KEY))
def test_get_hypothesis():
    from model.configuration import Configuration
    from utilities.speech_to_text import SpeechToText
    uri = 'gs://brb/test_audio_n_truth/1.wav'
    configuration_object = Configuration()
    configuration_object.set_language_code('en-US')
    configuration_object.set_encoding('LINEAR16')
    configuration_object.set_sample_rate_hertz(44100)
    configuration_object.set_model('default')
    speech = SpeechToText()
    result = speech.get_hypothesis(uri, configuration_object)
    expected = ' testing 1 2 3  hello hello testing one two three'
    assert result == expected
class SpeechToText(object):
    configuration = Configuration()

    def get_hypothesis(self, uri, configuration):
        import time
        """Asynchronously transcribes the audio uri specified by the gcs_uri."""
        client = speech.SpeechClient()
        config = {
            "model":
            configuration.get_model(),
            "use_enhanced":
            configuration.get_use_enhanced(),
            "encoding":
            configuration.get_encoding(),
            "sample_rate_hertz":
            configuration.get_sample_rate_hertz(),
            "language_code":
            configuration.get_language_code(),
            "alternative_language_codes":
            configuration.get_alternative_language_codes(),
            "audio_channel_count":
            configuration.get_audio_channel_count(),
            "enable_separate_recognition_per_channel":
            configuration.get_enable_separate_recognition_per_channel(),
            "enable_speaker_diarization":
            configuration.get_enableSpeakerDiarization(),
            "diarization_speaker_count":
            configuration.get_diarizationSpeakerCount(),
            "enable_automatic_punctuation":
            configuration.get_enableAutomaticPunctuation(),
            "speech_contexts":
            configuration.get_speech_context()
        }

        audio = {"uri": uri}
        operation = client.long_running_recognize(config, audio)
        count = 0
        sleep_time = 15
        while not operation.done() and count != 30000:
            print(
                f"{operation.metadata.progress_percent}% complete - updates every {sleep_time} seconds"
            )
            if count == 29999:
                raise TimeoutError("Time out processing audio")
            count += 1
            time.sleep(sleep_time)
        print(
            f"{operation.metadata.progress_percent}% complete - updates every {sleep_time} seconds"
        )

        response = operation.result(timeout=1200)

        transcript = str()
        for result in response.results:
            # First alternative is the most probable result
            transcript += " " + result.alternatives[0].transcript
        if not transcript:
            logger.debug('No transcript returned')

        return transcript
Esempio n. 11
0
class ConfigurationTests(unittest.TestCase):
    CITY = "Seattle"
    COUNTRY = "USA"

    BASE_VALID_CONFIGURATION_MAP = {
        Configuration.CONFIGURATION_KEY_CITY: CITY,
        Configuration.CONFIGURATION_KEY_COUNTRY: COUNTRY
    }

    OTHER_KEY = "otherKey"
    OTHER_VALUE = "otherValue"

    BASE_VALID_CONFIGURATION_JSON = json.dumps(BASE_VALID_CONFIGURATION_MAP)
    INVALID_CONFIGURATION_JSON = json.dumps({})

    def setUp(self):
        self.configuration = Configuration(
            ConfigurationTests.BASE_VALID_CONFIGURATION_JSON)

    def test_invalid_configuration(self):
        self.assertRaises(KeyError, Configuration,
                          self.INVALID_CONFIGURATION_JSON)

    def test_get_city(self):
        self.assertEqual(self.CITY, self.configuration.get_city())

    def test_get_country(self):
        self.assertEqual(self.COUNTRY, self.configuration.get_country())

    def test_get_other_configuration_value(self):
        extended_configuration_map = copy.deepcopy(
            ConfigurationTests.BASE_VALID_CONFIGURATION_MAP)
        extended_configuration_map.update(
            {ConfigurationTests.OTHER_KEY: ConfigurationTests.OTHER_VALUE})
        extended_configuration_json = json.dumps(extended_configuration_map)
        configuration = Configuration(extended_configuration_json)

        self.assertEqual(
            ConfigurationTests.OTHER_VALUE,
            configuration.get_configuration_value(
                ConfigurationTests.OTHER_KEY))
def test_update_csv():
    from utilities.io_handler import IOHandler
    from model.configuration import Configuration
    from model.nlp import NLPModel
    import os
    configuration = Configuration()
    nlp_model = NLPModel()
    io = IOHandler()
    result_file_name = io._result_file_name
    io.set_result_path('test_results_path')
    io.write_csv_header()
    expected_uri = 'gs://foo/bar/baz/test.flac'
    expected_lang = 'fr-FR'
    nlp_model.set_apply_stemming(True)
    configuration.set_language_code(expected_lang)
    io.update_csv(expected_uri, configuration, nlp_model)
    full_path = f'{io.get_result_path()}/{result_file_name}'

    with open(full_path, 'r') as file:
        contents = file.read()
        os.remove(full_path)
        assert expected_uri in contents
        assert expected_lang in contents
        assert 'True' in contents
Esempio n. 13
0
def test_get_audio_channel_count():
    from model.configuration import Configuration
    config = Configuration()
    config.get_audio_channel_count()
Esempio n. 14
0
def test_get_use_enhanced():
    from model.configuration import Configuration
    config = Configuration()
    result = config.get_use_enhanced()
    expected = False
    assert result == expected
Esempio n. 15
0
def test_set_model():
    from model.configuration import Configuration
    config = Configuration()
    config.set_model('default')
Esempio n. 16
0
def test_set_use_enhanced():
    from model.configuration import Configuration
    config = Configuration()
    config.set_use_enhanced(True)
Esempio n. 17
0
 def setUp(self):
     self.configuration = Configuration(
         ConfigurationTests.BASE_VALID_CONFIGURATION_JSON)
Esempio n. 18
0
def test_set_language_code():
    from model.configuration import Configuration
    config = Configuration()
    config.set_language_code('en-US')
Esempio n. 19
0
def test_get_language_code():
    from model.configuration import Configuration
    config = Configuration()
    config.get_language_code()
Esempio n. 20
0
def test_str():
    from model.configuration import Configuration
    config = Configuration()
    expected_model = 'phone_call'
    expected_language_code = 'hi-IN'
    expected_use_enhanced = True
    expected_sample_rate_hertz = 48000
    expected_audio_channel_count = 5
    expected_enable_separate_recognition_per_channel = False
    expected_boost = 6
    expected_phrases = ['testing', '$ADDRESSNUM']
    config.set_model(expected_model)
    config.set_language_code(expected_language_code)
    config.set_enable_separate_recognition_per_channel(
        expected_enable_separate_recognition_per_channel)
    config.set_audio_channel_count(expected_audio_channel_count)
    config.set_use_enhanced(expected_use_enhanced)
    config.set_sample_rate_hertz(expected_sample_rate_hertz)
    config.set_speech_context(expected_phrases, expected_boost)
    result = config.__str__()
    assert isinstance(result, str)
    assert expected_model in result
    assert expected_language_code in result
    assert str(expected_use_enhanced) in result
    assert str(expected_sample_rate_hertz) in result
    assert str(expected_audio_channel_count) in result
    assert str(expected_enable_separate_recognition_per_channel) in result
    assert str(expected_boost) in result
Esempio n. 21
0
def test_get_alternative_language_codes():
    from model.configuration import Configuration
    config = Configuration()
    config.get_alternative_language_codes()
Esempio n. 22
0
def test_set_encoding():
    from model.configuration import Configuration
    config = Configuration()
    data = 'MP3'
    config.set_encoding(data)
Esempio n. 23
0
def test_get_encoding():
    from model.configuration import Configuration
    config = Configuration()
    config.get_encoding()
class SpeechToText(object):
    configuration = Configuration()

    def get_hypothesis(self, uri, configuration):
        import time
        """Asynchronously transcribes the audio uri specified by the gcs_uri."""
        client = speech.SpeechClient()
        config = {
            "model":
            configuration.get_model(),
            "use_enhanced":
            configuration.get_use_enhanced(),
            "encoding":
            configuration.get_encoding(),
            "sample_rate_hertz":
            configuration.get_sample_rate_hertz(),
            "language_code":
            configuration.get_language_code(),
            "alternative_language_codes":
            configuration.get_alternative_language_codes(),
            "audio_channel_count":
            configuration.get_audio_channel_count(),
            "enable_separate_recognition_per_channel":
            configuration.get_enable_separate_recognition_per_channel(),
            "enable_speaker_diarization":
            configuration.get_enableSpeakerDiarization(),
            "diarization_speaker_count":
            configuration.get_diarizationSpeakerCount(),
            "enable_automatic_punctuation":
            configuration.get_enableAutomaticPunctuation(),
            "speech_contexts":
            configuration.get_speech_context()
        }

        audio = {"uri": uri}
        operation = object
        try:
            operation = client.long_running_recognize(config=config,
                                                      audio=audio)
        except google.api_core.exceptions.InvalidArgument as e:
            raise e
        count = 0
        sleep_time = 5
        while not operation.done() and count != 30000:
            print(
                f"{operation.metadata.progress_percent}% complete - updates every {sleep_time} seconds"
            )
            if count == 29999:
                raise TimeoutError("Time out processing audio")
            count += 1
            time.sleep(sleep_time)
        print(
            f"{operation.metadata.progress_percent}% complete - updates every {sleep_time} seconds"
        )

        response = operation.result(timeout=1200)

        transcript = str()
        for result in response.results:
            # First alternative is the most probable result
            transcript += " " + result.alternatives[0].transcript
        if not transcript:
            logger.debug('No transcript returned')
        utilities = Utilities()
        t = utilities.strip_puc(text=transcript)
        return t.lower()

    def transcribe_streaming(self, stream_file, configuration):
        """Streams transcription of the given audio file."""
        import io
        client = speech.SpeechClient()
        output = ''

        with io.open(stream_file, 'rb') as audio_file:
            audio_content = audio_file.read()

        config = {
            "model":
            configuration.get_model(),
            "use_enhanced":
            configuration.get_use_enhanced(),
            "encoding":
            configuration.get_encoding(),
            "sample_rate_hertz":
            configuration.get_sample_rate_hertz(),
            "language_code":
            configuration.get_language_code(),
            "alternative_language_codes":
            configuration.get_alternative_language_codes(),
            "audio_channel_count":
            configuration.get_audio_channel_count(),
            "enable_separate_recognition_per_channel":
            configuration.get_enable_separate_recognition_per_channel(),
            "enable_speaker_diarization":
            configuration.get_enableSpeakerDiarization(),
            "diarization_speaker_count":
            configuration.get_diarizationSpeakerCount(),
            "enable_automatic_punctuation":
            configuration.get_enableAutomaticPunctuation(),
            "speech_contexts":
            configuration.get_speech_context()
        }

        streaming_config = speech.types.StreamingRecognitionConfig(
            config=config, interim_results=True)

        # BUG IS HERE

        #requests = speech.types.StreamingRecognizeRequest(
        #    audio_content=audio_content)

        stream = [audio_content]
        requests = (speech.types.StreamingRecognizeRequest(audio_content=chunk)
                    for chunk in stream)

        responses = client.streaming_recognize(streaming_config, requests)

        #import pdb; pdb.set_trace()
        for response in responses:
            # Once the transcription has settled, the first result will contain the
            # is_final result. The other results will be for subsequent portions of
            # the audio.
            for result in response.results:
                alternatives = result.alternatives
                # The alternatives are ordered from most likely to least.
                for alternative in alternatives:
                    output = ''.join(alternative.transcript)

        return output
                        help='Path to file containing comma separated phrases')
    parser.add_argument('-b', '--boosts', default=list(), nargs='+', required=False,
                        help=('Space separated list of boost values to evaluate for speech adaptation'))
    parser.add_argument('-ch', '--multi', required=False, type=int,
                        help='Integer indicating the number of channels if more than one')
    parser.add_argument('-q', '--random_queue', required=False, action='store_true', help='Replaces default queue.txt with randomly named queue file')
    parser.add_argument('-fake', '--fake_hyp',  required=False, action='store_true', help='Use a fake hypothesis for testing')
    parser.add_argument('-limit', '--limit', required=False, default=None,type= int,  help = 'Limit to X number of audio files')
    parser.add_argument('-nzb', '--no_zeros_boost', required=False,  action='store_true', help='skip boost of 0' )
    parser.add_argument('-single', '--single_word', required=False, action='store_true', help='process each letter rather than whole words')
    parser.add_argument('-lf','--local_files_path', required=False, type=str, help='process local files',  default=None)

    nlp_model = NLPModel()
    io_handler = IOHandler()
    nlp_options = NLPOptions()
    configuration = Configuration()
    # Turn on punctuation ..  why not.. no bearing on WER
    configuration.set_enableAutomaticPunctuation(True)

    args = parser.parse_args()
    no_zeros_for_boost = args.no_zeros_boost
    process_each_letter = args.single_word
    local_files_path = args.local_files_path
    limit = args.limit
    cloud_store_uri = args.cloud_store_uri
    io_handler.set_result_path(args.local_results_path)
    only_transcribe = args.transcriptions_only
    nlp_model.set_n2w(args.numbers_to_words)
    nlp_model.set_apply_stemming(args.stem)
    nlp_model.set_remove_stop_words(args.remove_stop_words)
    nlp_model.set_expand_contractions(args.expand)
Esempio n. 26
0
def test_get_enable_separate_recognition_per_channel():
    from model.configuration import Configuration
    config = Configuration()
    config.get_enable_separate_recognition_per_channel()
Esempio n. 27
0
def run_train():
    out_dir = RESULTS_DIR
    initial_checkpoint = RESULTS_DIR / ' 00072200_model.pth'
    #

    pretrain_file = None  # imagenet pretrain
    ## setup  -----------------
    (out_dir / 'checkpoint').mkdir(exist_ok=True)
    (out_dir / 'train').mkdir(exist_ok=True)
    (out_dir / 'backup').mkdir(exist_ok=True)

    backup_project_as_zip(
        PROJECT_PATH,
        str(out_dir / 'backup' / ' code.train.%s.zip') % IDENTIFIER)

    log = Logger()
    log.open(out_dir + '/log.train.txt', mode='a')
    log.write('\n--- [START %s] %s\n\n' % (IDENTIFIER, '-' * 64))
    log.write('** some experiment setting **\n')
    log.write('\tSEED         = %u\n' % SEED)
    log.write('\tPROJECT_PATH = %s\n' % PROJECT_PATH)
    log.write('\tout_dir      = %s\n' % out_dir)
    log.write('\n')

    ## net ----------------------
    log.write('** net setting **\n')
    cfg = Configuration()
    net = MaskRcnnNet(cfg).cuda()

    if initial_checkpoint is not None:
        log.write('\tinitial_checkpoint = %s\n' % initial_checkpoint)
        net.load_state_dict(
            torch.load(initial_checkpoint,
                       map_location=lambda storage, loc: storage))

    elif pretrain_file is not None:
        log.write('\tpretrained_file = %s\n' % pretrain_file)
        # load_pretrain_file(net, pretrain_file)

    log.write('%s\n\n' % (type(net)))
    log.write('\n')

    ## optimiser ----------------------------------
    iter_accum = 1
    batch_size = 4  ##NUM_CUDA_DEVICES*512 #256//iter_accum #512 #2*288//iter_accum

    num_iters = 1000 * 1000
    iter_smooth = 20
    iter_log = 50
    iter_valid = 100
    iter_save = [0, num_iters - 1] \
                + list(range(0, num_iters, 100))  # 1*1000

    LR = None  # LR = StepLR([ (0, 0.01),  (200, 0.001),  (300, -1)])
    optimizer = SGD(filter(lambda p: p.requires_grad, net.parameters()),
                    lr=0.001 / iter_accum,
                    momentum=0.9,
                    weight_decay=0.0001)

    start_iter = 0
    start_epoch = 0.
    if initial_checkpoint is not None:
        checkpoint = torch.load(
            initial_checkpoint.replace('_model.pth', '_optimizer.pth'))
        start_iter = checkpoint['iter']
        start_epoch = checkpoint['epoch']
        # optimizer.load_state_dict(checkpoint['optimizer'])

    ## dataset ----------------------------------------
    log.write('** dataset setting **\n')

    train_dataset = ScienceDataset(
        # 'train1_ids_gray_only1_500', mode='train',
        'valid1_ids_gray_only1_43',
        mode='train',
        transform=train_augment)
    train_loader = DataLoader(
        train_dataset,
        sampler=RandomSampler(train_dataset),
        # sampler = ConstantSampler(train_dataset,list(range(16))),
        batch_size=batch_size,
        drop_last=True,
        num_workers=4,
        pin_memory=True,
        collate_fn=train_collate)

    valid_dataset = ScienceDataset(
        'valid1_ids_gray_only1_43',
        mode='train',
        # 'debug1_ids_gray_only1_10', mode='train',
        transform=valid_augment)
    valid_loader = DataLoader(valid_dataset,
                              sampler=SequentialSampler(valid_dataset),
                              batch_size=batch_size,
                              drop_last=False,
                              num_workers=4,
                              pin_memory=True,
                              collate_fn=train_collate)

    log.write('\ttrain_dataset.split = %s\n' % (train_dataset.split))
    log.write('\tvalid_dataset.split = %s\n' % (valid_dataset.split))
    log.write('\tlen(train_dataset)  = %d\n' % (len(train_dataset)))
    log.write('\tlen(valid_dataset)  = %d\n' % (len(valid_dataset)))
    log.write('\tlen(train_loader)   = %d\n' % (len(train_loader)))
    log.write('\tlen(valid_loader)   = %d\n' % (len(valid_loader)))
    log.write('\tbatch_size  = %d\n' % (batch_size))
    log.write('\titer_accum  = %d\n' % (iter_accum))
    log.write('\tbatch_size*iter_accum  = %d\n' % (batch_size * iter_accum))
    log.write('\n')

    # log.write(inspect.getsource(train_augment)+'\n')
    # log.write(inspect.getsource(valid_augment)+'\n')
    # log.write('\n')

    if 0:  # <debug>
        for inputs, truth_boxes, truth_labels, truth_instances, indices in valid_loader:

            batch_size, C, H, W = inputs.size()
            print(batch_size)

            images = inputs.cpu().numpy()
            for b in range(batch_size):
                image = (images[b].transpose((1, 2, 0)) * 255)
                image = np.clip(image.astype(np.float32) * 3, 0, 255)

                image1 = image.copy()

                truth_box = truth_boxes[b]
                truth_label = truth_labels[b]
                truth_instance = truth_instances[b]
                if truth_box is not None:
                    for box, label, instance in zip(truth_box, truth_label,
                                                    truth_instance):
                        x0, y0, x1, y1 = box.astype(np.int32)
                        cv2.rectangle(image, (x0, y0), (x1, y1), (0, 0, 255),
                                      1)
                        print(label)

                        thresh = instance > 0.5
                        contour = thresh_to_inner_contour(thresh)
                        contour = contour.astype(np.float32) * 0.5

                        image1 = contour[:, :, np.newaxis] * np.array(
                            (0, 255,
                             0)) + (1 - contour[:, :, np.newaxis]) * image1

                    print('')

                image_show('image', image)
                image_show('image1', image1)
                cv2.waitKey(0)

    ## start training here! ##############################################
    log.write('** start training here! **\n')
    log.write(' optimizer=%s\n' % str(optimizer))
    log.write(' momentum=%f\n' % optimizer.param_groups[0]['momentum'])
    log.write(' LR=%s\n\n' % str(LR))

    log.write(' images_per_epoch = %d\n\n' % len(train_dataset))
    log.write(
        ' rate    iter   epoch  num   | valid_loss                           | train_loss                           | batch_loss                           |  time    \n'
    )
    log.write(
        '------------------------------------------------------------------------------------------------------------------------------------------------------------------\n'
    )

    train_loss = np.zeros(6, np.float32)
    train_acc = 0.0
    valid_loss = np.zeros(6, np.float32)
    valid_acc = 0.0
    batch_loss = np.zeros(6, np.float32)
    batch_acc = 0.0
    rate = 0

    start = time.time()
    j = 0
    i = 0

    while i < num_iters:  # loop over the dataset multiple times
        sum_train_loss = np.zeros(6, np.float32)
        sum_train_acc = 0.0
        sum = 0

        net.set_mode('train')
        optimizer.zero_grad()
        for inputs, truth_boxes, truth_labels, truth_instances, indices in train_loader:
            batch_size = len(indices)
            i = j / iter_accum + start_iter
            epoch = (i - start_iter) * batch_size * iter_accum / len(
                train_dataset) + start_epoch
            num_products = epoch * len(train_dataset)

            if i % iter_valid == 0:
                net.set_mode('valid')
                valid_loss, valid_acc = evaluate(net, valid_loader)
                net.set_mode('train')

                print('\r', end='', flush=True)
                log.write(
                    '%0.4f %5.1f k %6.2f %4.1f m | %0.3f   %0.2f %0.2f   %0.2f %0.2f   %0.2f | %0.3f   %0.2f %0.2f   %0.2f %0.2f   %0.2f | %0.3f   %0.2f %0.2f   %0.2f %0.2f   %0.2f | %s\n' % ( \
                        rate, i / 1000, epoch, num_products / 1000000,
                        valid_loss[0], valid_loss[1], valid_loss[2], valid_loss[3], valid_loss[4], valid_loss[5],
                        # valid_acc,
                        train_loss[0], train_loss[1], train_loss[2], train_loss[3], train_loss[4], train_loss[5],
                        # train_acc,
                        batch_loss[0], batch_loss[1], batch_loss[2], batch_loss[3], batch_loss[4], batch_loss[5],
                        # batch_acc,
                        time_to_str((time.time() - start) / 60)))
                time.sleep(0.01)

            # if 1:
            if i in iter_save:
                torch.save(net.state_dict(),
                           out_dir + '/checkpoint/%08d_model.pth' % (i))
                torch.save(
                    {
                        'optimizer': optimizer.state_dict(),
                        'iter': i,
                        'epoch': epoch,
                    }, out_dir + '/checkpoint/%08d_optimizer.pth' % (i))

            # learning rate schduler -------------
            if LR is not None:
                lr = LR.get_rate(i)
                if lr < 0: break
                adjust_learning_rate(optimizer, lr / iter_accum)
            rate = get_learning_rate(optimizer)[0] * iter_accum

            # one iteration update  -------------
            inputs = Variable(inputs).cuda()
            net(inputs, truth_boxes, truth_labels, truth_instances)
            loss = net.loss(inputs, truth_boxes, truth_labels, truth_instances)

            if 1:  # <debug>
                debug_and_draw(net,
                               inputs,
                               truth_boxes,
                               truth_labels,
                               truth_instances,
                               mode='test')

            # masks  = (probs>0.5).float()
            # acc    = dice_loss(masks, labels)

            # accumulated update
            loss.backward()
            if j % iter_accum == 0:
                # torch.nn.utils.clip_grad_norm(net.parameters(), 1)
                optimizer.step()
                optimizer.zero_grad()

            # print statistics  ------------
            batch_acc = 0  # acc[0][0]
            batch_loss = np.array((
                loss.cpu().data.numpy()[0],
                net.rpn_cls_loss.cpu().data.numpy()[0],
                net.rpn_reg_loss.cpu().data.numpy()[0],
                net.rcnn_cls_loss.cpu().data.numpy()[0],
                net.rcnn_reg_loss.cpu().data.numpy()[0],
                net.mask_cls_loss.cpu().data.numpy()[0],
            ))
            sum_train_loss += batch_loss
            sum_train_acc += batch_acc
            sum += 1
            if i % iter_smooth == 0:
                train_loss = sum_train_loss / sum
                train_acc = sum_train_acc / sum
                sum_train_loss = np.zeros(6, np.float32)
                sum_train_acc = 0.
                sum = 0

            print(
                '\r%0.4f %5.1f k %6.2f %4.1f m | %0.3f   %0.2f %0.2f   %0.2f %0.2f   %0.2f | %0.3f   %0.2f %0.2f   %0.2f %0.2f   %0.2f | %0.3f   %0.2f %0.2f   %0.2f %0.2f   %0.2f | %s  %d,%d,%s' % ( \
                    rate, i / 1000, epoch, num_products / 1000000,
                    valid_loss[0], valid_loss[1], valid_loss[2], valid_loss[3], valid_loss[4], valid_loss[5],
                    # valid_acc,
                    train_loss[0], train_loss[1], train_loss[2], train_loss[3], train_loss[4], train_loss[5],
                    # train_acc,
                    batch_loss[0], batch_loss[1], batch_loss[2], batch_loss[3], batch_loss[4], batch_loss[5],
                    # batch_acc,
                    time_to_str((time.time() - start) / 60), i, j, str(inputs.size())), end='', flush=True)
            j = j + 1

        pass  # -- end of one data loader --
    pass  # -- end of all iterations --

    if 1:  # save last
        torch.save(net.state_dict(),
                   out_dir + '/checkpoint/%d_model.pth' % (i))
        torch.save(
            {
                'optimizer': optimizer.state_dict(),
                'iter': i,
                'epoch': epoch,
            }, out_dir + '/checkpoint/%d_optimizer.pth' % (i))

    log.write('\n')
Esempio n. 28
0
def test_set_sample_rate_hertz():
    from model.configuration import Configuration
    config = Configuration()
    config.set_sample_rate_hertz(16000)
class IOHandler(object):
    _result_path = ''
    _result_file_name = 'results.csv'
    _csv_header = 'AUDIO_FILE, MODEL, ENHANCED, LANGUAGE, ALTERNATIVE_LANGS, PHRASE_HINTS_APPLIED, BOOST, REF_WORD_COUNT, REF_ERROR_COUNT , WER,STEMMING_APPLIED , STOP_WORDS_REMOVED, NUMBER_TO_WORD_CONVERSION, CONTRACTIONS_EXPANDED, INSERTIONS, DELETIONS, SUBSTITUTIONS, DELETED_WORDS, INSERTED_WORDS, SUBSTITUTE_WORDS\n'
    _csv_header_written = False
    configuration = Configuration()
    nlp_model = NLPModel()
    _queue_file_name = 'queue.txt'

    def set_queue_file_name(self, name):
        self._queue_file_name = name

    def get_queue_file_name(self):
        return self._queue_file_name

    def set_result_path(self, result_path):
        self._result_path = result_path

    def get_result_path(self):
        return self._result_path

    def write_csv_header(self):
        import os
        if not self._csv_header_written:
            full_path = f'{self.get_result_path()}/{self._result_file_name}'
            # if path does not exists, make it
            if not os.path.exists(self.get_result_path()):
                os.makedirs(self.get_result_path())

            with open(full_path, 'w') as file:
                try:
                    file.write(self._csv_header)
                except IOError as i:
                    print(f'Can not write csv header: {i}')
                except FileNotFoundError as x:
                    print(f'Can not find csv file: {x}')
            self._csv_header_written = True

    def update_csv(self,
                   uri,
                   configuration,
                   nlp_model,
                   word_count_list=None,
                   ref_total_word_count=0,
                   ref_error_count=0,
                   word_error_rate=0,
                   ins=0,
                   deletions=0,
                   subs=0):
        import logging
        logging.basicConfig(filename='wer_app.log')
        logger = logging.getLogger(__name__)
        from collections import OrderedDict
        deleted_words_dict = dict()
        inserted_words_dict = dict()
        substitute_words_dict = dict()

        if word_count_list:
            try:
                deleted_words_dict = OrderedDict(
                    sorted(word_count_list[0].items(), key=lambda x: x[1]))
                inserted_words_dict = OrderedDict(
                    sorted(word_count_list[1].items(), key=lambda x: x[1]))
                substitute_words_dict = OrderedDict(
                    sorted(word_count_list[2].items(), key=lambda x: x[1]))
            except TypeError as t:
                string = f'{t}'
                logger.debug(string)
                print(string)
                deleted_words_dict = None
                inserted_words_dict = None
                substitute_words_dict = None
        deleted_words = ''
        inserted_words = ''
        substitute_words = ''
        if deleted_words_dict:
            for k, v in deleted_words_dict.items():
                deleted_words += f'{k}:{v}, '
        if inserted_words_dict:
            for k, v in inserted_words_dict.items():
                inserted_words += f'{k}:{v}, '
        if substitute_words_dict:
            for k, v in substitute_words_dict.items():
                substitute_words += f'{k}:{v}, '

        full_path = f'{self.get_result_path()}/{self._result_file_name}'
        alts = ''
        for item in (configuration.get_alternative_language_codes()):
            alts += item + ' '
        string = f'{uri}, {configuration.get_model()}, {configuration.get_use_enhanced()}, {configuration.get_language_code()},' \
                 f'{alts}, {bool(configuration.get_phrases())},' \
                 f'{configuration.get_boost()}, {ref_total_word_count}, {ref_error_count}, {word_error_rate}, {nlp_model.get_apply_stemming()},' \
                 f'{nlp_model.get_remove_stop_words()}, {nlp_model.get_n2w()}, {nlp_model.get_expand_contractions()}, {ins}, {deletions}, {subs}, ' \
                 f'{deleted_words}, {inserted_words}, {substitute_words}\n'
        with open(
                full_path,
                'a+',
        ) as file:
            try:
                file.write(string)
            except IOError as i:
                print(f'Can not update csv file: {i}')
        print(f'UPDATED: {full_path}')

    def write_html_diagnostic(self, wer_obj, unique_root, result_path):
        aligned_html = '<br>'.join(wer_obj.aligned_htmls)

        result_file = unique_root + '.html'
        write_path = f'{result_path}/{result_file}'
        with open(write_path, 'w') as f:
            try:
                f.write(aligned_html)
            except IOError as i:
                print(f'Can not write html diagnostic {write_path}: {i}')
        print(f'WROTE: diagnostic file: {write_path} ')

    def write_queue_file(self, data):
        try:
            with open(self._queue_file_name, 'a+') as f:
                if isinstance(data, str):
                    info = data.split()
                else:
                    info = data
                for item in info:
                    f.write(item + ',')
        except IOError as e:
            print(f'Can not write diagnostic file: {e}')

    def read_queue_file(self):
        result = None
        try:
            with open(self._queue_file_name, 'r') as f:
                result = f.read()
        except IOError as e:
            print(f'Can not read queue file: {e}')
        except FileNotFoundError as x:
            print(f'Queue file not found: {x}')
        if not result:
            raise IOError('No contents found in queue')
        return result

    def write_hyp(self, file_name, text):
        import os.path

        if not os.path.exists(self.get_result_path()):
            os.makedirs(self.get_result_path())

        p = f'{self.get_result_path()}/{file_name}'

        with open(p, 'w+') as f:
            f.write(text)
Esempio n. 30
0
def test_set_alternative_language_codes():
    from model.configuration import Configuration
    config = Configuration()
    codes = ['gu-IN', 'ru-RU']
    config.set_alternative_language_codes(codes)