コード例 #1
0
def get_args():
    # Models
    argparser.add_argument('--checkpoints-path', type=str, required=True)
    # Results
    argparser.add_argument('--results-path', type=str, default='results/')

    return argparser.parse_args()
コード例 #2
0
def get_args():
    argparser = get_argparser()

    add_all_defaults(argparser)
    args = parse_args(argparser)
    args.wait_iterations = args.wait_epochs * args.eval_batches
    return args
コード例 #3
0
def get_args():
    argparser = get_argparser()
    # Models
    argparser.add_argument('--eval-path', type=str, required=True)
    # Save
    argparser.add_argument('--results-file', type=str, required=True)
    add_data_args(argparser)
    return parse_args(argparser)
コード例 #4
0
def get_args():
    argparser = get_argparser()
    # adaptor
    argparser.add_argument('--two-stage-state-folder', type=str, required=True)
    argparser.add_argument('--results-file', type=str, required=True)
    add_data_args(argparser)
    add_generator_args(argparser)
    args = parse_args(argparser)
    return args
コード例 #5
0
def get_args():
    # Data
    argparser.add_argument('--batch-size', type=int, default=512)
    # Models
    argparser.add_argument('--checkpoints-path', type=str, required=True)
    argparser.add_argument('--model-type', type=str, required=True)

    args = argparser.parse_args()
    args.reverse = args.model_type in constants.REVERSE_MODELS
    args.model_path = os.path.join(args.checkpoints_path, args.model_type)
    return args
コード例 #6
0
def get_args():
    # Models
    argparser.add_argument('--checkpoints-path', type=str, required=True)
    # Other
    argparser.add_argument('--analyse', type=str, default='none',
                           choices=['none', 'vowels', 'consonants'])
    argparser.add_argument('--n-permutations', type=int, default=100000)

    args = argparser.parse_args()
    args.analyse = None if args.analyse == 'none' else args.analyse
    return args
コード例 #7
0
def get_args():
    argparser = get_argparser()
    argparser.add_argument('--max-train-tokens', type=int, required=True)
    argparser.add_argument('--data-language-dir', type=str, required=True)
    argparser.add_argument('--checkpoint-language-dir',
                           type=str,
                           required=True)
    argparser.add_argument('--alpha', type=str, required=True)
    argparser.add_argument('--beta', type=str, required=True)
    argparser.add_argument('--results-folder', type=str, required=True)
    args = parse_args(argparser)
    return args
コード例 #8
0
def get_args():
    argparser.add_argument(
        "--src-file", type=str,
        help="The file from which to read data")
    argparser.add_argument(
        "--n-folds", type=int, default=10,
        help="Number of folds to split data")
    argparser.add_argument(
        "--max-words", type=int, default=10000,
        help="Number of types to use")

    return argparser.parse_args()
コード例 #9
0
def get_args():
    argparser = get_argparser()
    # Save
    argparser.add_argument('--results-file', type=str, required=True)
    # adaptor
    argparser.add_argument('--no-iterations', type=int, default=10)
    argparser.add_argument('--beta-limit', type=int)
    argparser.add_argument('--adaptor-iterations', type=int, default=6)
    argparser.add_argument('--two-stage-state-folder', type=str, required=True)
    add_all_defaults(argparser)
    args = parse_args(argparser)
    args.wait_iterations = args.wait_epochs * args.eval_batches
    return args
コード例 #10
0
def get_args():
    # Data
    argparser.add_argument('--batch-size', type=int, default=1024)
    argparser.add_argument('--train-folds', type=int, default=8)
    # Model
    argparser.add_argument('--model-type', type=str, required=True)
    # Save
    argparser.add_argument('--checkpoints-path', type=str)

    args = argparser.parse_args()

    args.reverse = (args.model_type in constants.REVERSE_MODELS)
    args.model_path = os.path.join(args.checkpoints_path, args.model_type)
    return args
コード例 #11
0
def get_args():
    # Models
    # argparser.add_argument('--checkpoints-path', type=str, required=True)
    argparser.add_argument('--model-type', type=str, required=True)
    # Other
    argparser.add_argument('--n-permutations', type=int, default=100000)
    # argparser.add_argument('--analyse', type=str, default='none',
    #                        choices=['none', 'vowels', 'consonants'])

    args = argparser.parse_args()
    args.keep_eos = args.model_type in ['norm', 'rev']
    # args.reverse = (args.model_type in constants.REVERSE_MODELS)
    # args.analyse = None if args.analyse == 'none' else args.analyse
    return args
コード例 #12
0
def get_args():
    argparser = get_argparser()
    # Save
    argparser.add_argument('--adaptor-results-file', type=str, required=True)
    # adaptor
    argparser.add_argument('--alpha', type=float, required=True)
    argparser.add_argument('--beta', type=float, required=True)
    argparser.add_argument('--adaptor-iterations', type=int, default=6)
    argparser.add_argument('--two-stage-state-folder', type=str, required=True)
    argparser.add_argument('--load-adaptor-init-state',
                           default=False,
                           action='store_true')

    add_all_defaults(argparser)
    args = parse_args(argparser)
    args.wait_iterations = args.wait_epochs * args.eval_batches
    return args
コード例 #13
0
def get_args():
    argparser = get_argparser()
    argparser.add_argument(
        "--wikipedia-tokenized-file",
        type=str,
        help="The file in which wikipedia tokenized results should be")
    argparser.add_argument("--language",
                           type=str,
                           help="The language the data is in")
    argparser.add_argument("--n-folds",
                           type=int,
                           default=10,
                           help="Number of folds to split data")
    argparser.add_argument("--max-sentences",
                           type=int,
                           default=1000000,
                           help="Maximum number of sentences used")
    add_data_args(argparser)
    return parse_args(argparser)
コード例 #14
0
def get_args():
    # Data
    argparser.add_argument('--batch-size', type=int, default=32)
    # Model
    argparser.add_argument('--nlayers', type=int, default=2)
    argparser.add_argument('--embedding-size', type=int, default=64)
    argparser.add_argument('--hidden-size', type=int, default=256)
    argparser.add_argument('--dropout', type=float, default=.33)
    argparser.add_argument('--model-type', type=str, required=True)
    # Optimization
    argparser.add_argument('--eval-batches', type=int, default=20)
    argparser.add_argument('--wait-epochs', type=int, default=5)
    # Save
    argparser.add_argument('--checkpoints-path', type=str)

    args = argparser.parse_args()
    args.wait_iterations = args.wait_epochs * args.eval_batches

    args.reverse = (args.model_type in constants.REVERSE_MODELS)
    args.model_path = os.path.join(args.checkpoints_path, args.model_type)
    return args
コード例 #15
0
                                             args,
                                             artificial=artificial)
            xp, yp = bayesian_optimisation(n_iters,
                                           sample_loss,
                                           bounds,
                                           n_pre_samples=n_pre_samples)

            opt_results += [
                get_optimal_loss(lang, args.is_devoicing, artificial,
                                 token_map, concept_ids, ipa_to_concepts, xp,
                                 yp, args)
            ]

            write_csv(
                results, '%s/artificial__%s__baysian-results.csv' %
                (args.rfolder, args.model))
            write_csv(
                opt_results, '%s/artificial__%s__opt-results.csv' %
                (args.rfolder, args.model))

    write_csv(
        results, '%s/artificial__%s__baysian-results-final.csv' %
        (args.rfolder, args.model))


if __name__ == '__main__':
    args = argparser.parse_args(csv_folder='artificial/%s/bayes-opt')
    assert args.data == 'northeuralex', 'this script should only be run with northeuralex data'
    fill_artificial_args(args)
    optimize_languages(args)
コード例 #16
0
def run_languages(args):
    print('------------------- Start -------------------')
    languages, token_map, data_split, concept_ids = read_info()
    print('Train %d, Val %d, Test %d' %
          (len(data_split[0]), len(data_split[1]), len(data_split[2])))

    if args.opt:
        test_results, test_loss, \
            test_acc, best_epoch, val_loss, val_acc = run_opt_language(languages, token_map, concept_ids, args)
    else:
        test_results, test_loss, \
            test_acc, best_epoch, val_loss, val_acc = run_language(languages, token_map, concept_ids, args)

    results = [[
        'lang', 'test_loss', 'test_acc', 'best_epoch', 'val_loss', 'val_acc'
    ]]
    results += [['full', test_loss, test_acc, best_epoch, val_loss, val_acc]]
    for lang, result in test_results.items():
        results += [[lang] + list(result)]

    write_csv(results,
              '%s/%s__shared-results.csv' % (args.rfolder, args.model))
    write_csv(results,
              '%s/%s__shared-results-final.csv' % (args.rfolder, args.model))


if __name__ == '__main__':
    args = argparser.parse_args(csv_folder='normal')
    assert args.data == 'northeuralex', 'this script should only be run with northeuralex data'
    run_languages(args)
コード例 #17
0
def load_info(args):
    with open('%s/preprocess/info.pckl' % args.ffolder, 'rb') as f:
        info = pickle.load(f)
    languages = info['languages']
    token_map = info['token_map']
    data_split = info['data_split']
    concept_ids = info['concepts_ids']

    return languages, token_map, data_split, concept_ids


def main(args):
    df = read_src_data(args.ffolder)

    languages = get_languages(df)
    train_df, val_df, test_df, data_split = separate_train(df)
    token_map = get_tokens(df)
    concepts_ids, IPA_to_concept = get_concept_ids(df)

    languages_df = separate_per_language(train_df, val_df, test_df, languages)

    process_languages(languages_df, token_map, args)
    save_info(args.ffolder, languages, token_map, data_split, concepts_ids, IPA_to_concept)


if __name__ == '__main__':
    args = parser.parse_args()
    assert args.data == 'northeuralex', 'this script should only be run with northeuralex data'
    main(args)
コード例 #18
0
def get_args():
    # Other
    argparser.add_argument('--n-permutations', type=int, default=100000)
    argparser.add_argument('--results-path', type=str, default='results/')

    return argparser.parse_args()
コード例 #19
0
def get_args():
    return argparser.parse_args()
コード例 #20
0
    bounds = np.array([[4, 256], [32, 256], [1, 2.95], [0.0, 0.5]])
    n_pre_samples = 5

    sample_loss = sample_loss_getter(languages, token_map, concept_ids, args)
    xp, yp = bayesian_optimisation(n_iters,
                                   sample_loss,
                                   bounds,
                                   n_pre_samples=n_pre_samples)

    opt_results, test_results = get_optimal_loss(languages, token_map, xp, yp,
                                                 concept_ids, args)

    log_results = [[
        'lang', 'test_loss', 'test_acc', 'best_epoch', 'val_loss', 'val_acc',
        'embedding_size', 'hidden_size', 'nlayers', 'dropout'
    ]]
    log_results += [opt_results]
    log_results += [[]]
    for lang, result in test_results.items():
        log_results += [[lang] + list(result)]

    write_csv(
        log_results,
        '%s/%s__bayesian-shared-results.csv' % (args.rfolder, args.model))


if __name__ == '__main__':
    args = argparser.parse_args(csv_folder='bayes-opt')
    assert args.data == 'northeuralex', 'this script should only be run with northeuralex data'
    optimize_languages(args)
コード例 #21
0
    return full_avg_len, avg_len, avg_test_loss, avg_val_loss


def run_languages(args):
    print('------------------- Start -------------------')
    languages, token_map, data_split, concept_ids, _ = read_info()
    print('Train %d, Val %d, Test %d' %
          (len(data_split[0]), len(data_split[1]), len(data_split[2])))

    max_order = 3
    results = [['lang', 'full_avg_len', 'avg_len', 'test_loss', 'val_loss'] +
               ['param_%d' % i for i in range(max_order)]]
    for i, lang in enumerate(languages):
        print()
        print('%d Language %s' % (i, lang))
        full_avg_len, avg_len, test_loss, val_loss = \
            run_language_cv(lang, token_map, concept_ids, args, max_order=max_order)
        results += [[lang, full_avg_len, avg_len, test_loss,
                     val_loss]]  # + opt_params.tolist()]

        write_csv(results, '%s/ngram.csv' % (args.rfolder))

    write_csv(results, '%s/ngram-final.csv' % (args.rfolder))


if __name__ == '__main__':
    args = argparser.parse_args(csv_folder='cv')
    assert args.data == 'northeuralex', 'this script should only be run with northeuralex data'
    run_languages(args)
コード例 #22
0
        'lang', 'avg_len', 'test_shannon', 'test_loss', 'test_acc', 'val_loss',
        'val_acc'
    ]]
    for i, lang in enumerate(languages):
        for artificial in [True, False]:
            print()
            print('%d. %s %s' %
                  (i, lang, 'artificial' if artificial else 'default'))
            avg_len, shannon, test_shannon, test_loss, \
                test_acc, best_epoch, val_loss, val_acc = run_artificial_language(
                    lang, args.is_devoicing, token_map, concept_ids, ipa_to_concepts, args, artificial=artificial)
            results += [[
                '%s %s' % (lang, 'art' if artificial else 'norm'), avg_len,
                shannon, test_shannon, test_loss, test_acc, best_epoch,
                val_loss, val_acc
            ]]

            write_csv(
                results,
                '%s/artificial__%s__results.csv' % (args.rfolder, args.model))
    write_csv(
        results,
        '%s/artificial__%s__results-final.csv' % (args.rfolder, args.model))


if __name__ == '__main__':
    args = argparser.parse_args(csv_folder='artificial/%s/normal')
    assert args.data == 'northeuralex', 'this script should only be run with northeuralex data'
    fill_artificial_args(args)
    run_languages(args)
コード例 #23
0
def get_args():
    # Models
    argparser.add_argument('--checkpoints-path', type=str, required=True)

    return argparser.parse_args()
コード例 #24
0
            symbols)

        lang_data += [[
            lang,
            len(symbols), vowel, consonant, tone, unrecognized, avg_len
        ]]

    columns = [
        'lang', 'inventory', 'vowel', 'consonant', 'tone', 'unrecognized',
        'avg_len'
    ]
    df_info = pd.DataFrame(lang_data, columns=columns)
    rfolder = args.rfolder[:-len('orig')]
    df_info.to_csv('%s/lang_inventory.csv' % (rfolder))


def main(args):
    df = read_src_data(args.ffolder)

    languages = get_languages(df)
    train_df, val_df, test_df, _ = separate_train(df)
    languages_df = separate_per_language(train_df, val_df, test_df, languages)

    get_lang_ipa_info(df, languages_df, args, field='IPA')


if __name__ == '__main__':
    args = argparser.parse_args(csv_folder='inventory')
    assert args.data == 'northeuralex', 'this script should only be run with northeuralex data'
    main(args)
コード例 #25
0
def get_args():
    # Other
    argparser.add_argument('--n-permutations', type=int, default=100000)
    return argparser.parse_args()
コード例 #26
0
        'lang', 'artificial', 'full_avg_len', 'avg_len', 'test_loss',
        'val_loss'
    ]]
    for i, lang in enumerate(languages):
        for artificial in [True, False]:
            print()
            print(i, end=' ')
            full_avg_len, avg_len, test_loss, val_loss = run_artificial_language_cv(
                lang,
                token_map,
                args,
                artificial=artificial,
                max_order=max_order)
            results += [[
                lang, artificial, full_avg_len, avg_len, test_loss, val_loss
            ]]

            write_csv(
                results,
                '%s/artificial__%s__results.csv' % (args.rfolder, args.model))
    write_csv(
        results,
        '%s/artificial__%s__results-final.csv' % (args.rfolder, args.model))


if __name__ == '__main__':
    args = argparser.parse_args(csv_folder='artificial/%s/cv')
    assert args.data == 'northeuralex', 'this script should only be run with northeuralex data'
    fill_artificial_args(args)
    run_languages_cv(args)
コード例 #27
0
def get_args():
    argparser = get_argparser()
    argparser.add_argument('--data-file', type=str, required=True)
    args = parse_args(argparser)
    return args