def main():
    args = parse_args()

    # input files
    train_file = args.data_dir + '/train.json'
    dev_file = args.data_dir + '/dev.json'
    test_file = args.data_dir + '/test.json'

    embedding_file = args.ucca_embedding_dir + '/' + args.ucca_embedding_file
    index_file = args.ucca_embedding_dir + '/' + args.ucca_embedding_index_file

    helper.ensure_dir(args.ucca_embedding_dir)

    UccaEmbedding.prepare(args.ucca_embedding_dim,
                          [train_file, dev_file, test_file], index_file,
                          embedding_file, args.ucca_embedding_source)

    return UccaEmbedding(args.ucca_embedding_dim, index_file, embedding_file)
            exit(1)

    # Vocab
    vocab_file = model_dir + '/vocab.pkl'
    vocab = Vocab(vocab_file, load=True)
    assert opt[
        'vocab_size'] == vocab.size, "Vocab size must match that in the saved model."

    # UCCA Embedding
    ucca_embedding = None
    if opt['ucca_embedding_dim'] > 0:
        embedding_file = opt['ucca_embedding_dir'] + '/' + opt[
            'ucca_embedding_file']
        index_file = opt['ucca_embedding_dir'] + '/' + opt[
            'ucca_embedding_index_file']
        ucca_embedding = UccaEmbedding(opt['ucca_embedding_dim'], index_file,
                                       embedding_file)

    data_file = opt['data_dir'] + '/{}.json'.format(args.dataset)
    with open(data_file) as infile:
        data_input = json.load(infile)

    data = DataLoader(data_input,
                      opt['batch_size'],
                      opt,
                      vocab,
                      evaluation=True,
                      ucca_embedding=ucca_embedding)
    print("{} batches created for test".format(len(data.data)))
    model_data.append(data)

evaluator = GCNEnsembleEvaluator(model_files)
# load vocab
vocab_file = opt['vocab_dir'] + '/vocab.pkl'
vocab = Vocab(vocab_file, load=True)
opt['vocab_size'] = vocab.size
emb_file = opt['vocab_dir'] + '/embedding.npy'
emb_matrix = np.load(emb_file)
assert emb_matrix.shape[0] == vocab.size
assert emb_matrix.shape[1] == opt['emb_dim']

# UCCA Embedding?
ucca_embedding = None
if args.ucca_embedding_dim > 0:
    embedding_file = args.ucca_embedding_dir + '/' + args.ucca_embedding_file
    index_file = args.ucca_embedding_dir + '/' + args.ucca_embedding_index_file
    ucca_embedding = UccaEmbedding(args.ucca_embedding_dim, index_file,
                                   embedding_file)
    opt['ucca_embedding_vocab_size'] = ucca_embedding.embedding_matrix.shape[0]
    assert ucca_embedding.embedding_matrix.shape[1] == args.ucca_embedding_dim

# load data
print("Loading data from {} with batch size {}...".format(
    opt['data_dir'], opt['batch_size']))
with open(opt['data_dir'] + '/train.json') as infile:
    train_input = json.load(infile)
train_batch = DataLoader(train_input,
                         opt['batch_size'],
                         opt,
                         vocab,
                         evaluation=False,
                         apply_filters=True,
                         ucca_embedding=ucca_embedding)