Ejemplo n.º 1
0
 def __init__(self, rollout_num, vocab):
     #self.new_net = copy.deepcopy(net)
     self.vocab = vocab
     self.tokenizer = Tokenizer(
         Vocab(strings=list(vocab.labelToIdx.keys())))
     self.rollout_num = rollout_num
     self.parser = StanfordParser(annots='tokenize')
Ejemplo n.º 2
0
    def preprocess_dataset(annot_file, text_file, verbose=False):
        parser = StanfordParser(annots="tokenize ssplit parse")

        processed_examples = []
        productions = set()

        for idx, (src_query,
                  tgt_text) in enumerate(zip(open(annot_file),
                                             open(text_file))):
            query_toks = src_query.strip().split()

            if len(query_toks) == 0:
                continue

            tgt_text = tgt_text.strip()

            tree = English.canonicalize_example(tgt_text, parser)

            if tree is not None:
                productions.update(tree.get_productions())
                processed_examples.append((query_toks, tgt_text, tree))

        productions = sorted(productions, key=lambda x: x.__repr__())

        return processed_examples, productions
Ejemplo n.º 3
0
def test_system(cfg_dict, gen_chk):
    args = cfg_dict['ARGS']
    gargs = cfg_dict['GENERATOR']
    gan_args = cfg_dict['GAN']
    use_cuda = args['cuda']
    seed = gan_args['seed']

    if not torch.cuda.is_available():
        use_cuda = False

    gargs['cuda'] = use_cuda
    gargs['verbose'] = gan_args['verbose']

    random.seed(seed)
    np.random.seed(seed)

    parser = StanfordParser(annots="depparse")

    if isinstance(gen_chk, str):
        gen_params = torch.load(gen_chk)
        netG = Parser.load(gen_chk)
        optimizer_cls = eval('torch.optim.%s' % netG.args['optimizer'])
        netG.optimizer = optimizer_cls(netG.parameters(), lr=netG.args['lr'])
        netG.optimizer.load_state_dict(gen_params['optimizer'])
    else:
        netG = gen_chk

    grammar = None  # netG.transition_system.grammar

    glove_vocab, glove_emb = load_word_vectors(
        os.path.join(gan_args['glove_dir'], gan_args['glove_file']),
        lowercase=gan_args['glove_lower'])

    if gan_args['verbose']: print("Generating training dataset and grammar...")

    samples_data, prim_vocab, grammar = English.generate_dataset(
        gargs['annot_file'], gargs['texts_file'], grammar)
    training_library = Dataset(samples_data)

    scores = np.zeros((len(training_library), 5))

    for i, example in tqdm(enumerate(training_library)):
        src = example.src_sent
        tgt = example.tgt_text
        hyps, _ = netG.parse(src)
        gen = asdl_ast_to_english(hyps[0].tree)

        for j in range(1, 6):
            weight = tuple((1. / j for _ in range(j)))
            scores[i, j - 1] = sentence_bleu(
                tgt,
                gen,
                weight,
                smoothing_function=SmoothingFunction().method1)

    import pdb
    pdb.set_trace()
Ejemplo n.º 4
0
def continue_training(cfg_dict,
                      gen_chk,
                      disc_chk,
                      epoch=0,
                      gen_loss=None,
                      disc_loss=None,
                      use_cuda=False):
    args = cfg_dict['ARGS']
    gargs = cfg_dict['GENERATOR']
    dargs = cfg_dict['DISCRIMINATOR']
    gan_args = cfg_dict['GAN']

    seed = gan_args['seed']
    total_epochs = gan_args['total_epochs']
    generated_num = gan_args['generated_num']

    # rollout params
    rollout_num = gan_args['rollout_num']

    g_steps = gan_args['g_steps']
    d_steps = gan_args['d_steps']
    k_steps = gan_args['k_steps']

    use_cuda = args['cuda']

    if not torch.cuda.is_available():
        use_cuda = False

    gargs['cuda'] = use_cuda
    gargs['verbose'] = gan_args['verbose']
    dargs['cuda'] = use_cuda
    dargs['verbose'] = gan_args['verbose']

    random.seed(seed)
    np.random.seed(seed)

    parser = StanfordParser(annots="depparse")

    if disc_loss is None:
        discriminator_losses = []

    if gen_loss is None:
        generator_losses = []

    if isinstance(gen_chk, str):
        gen_params = torch.load(gen_chk)
        netG = Parser.load(gen_chk)
        optimizer_cls = eval('torch.optim.%s' % netG.args['optimizer'])
        netG.optimizer = optimizer_cls(netG.parameters(), lr=netG.args['lr'])
        netG.optimizer.load_state_dict(gen_params['optimizer'])
    else:
        netG = gen_chk

    glove_vocab, glove_emb = load_word_vectors(
        os.path.join(gan_args['glove_dir'], gan_args['glove_file']),
        lowercase=gan_args['glove_lower'])

    if isinstance(disc_chk, str):
        device = torch.device("cuda" if use_cuda else "cpu")
        disc_params = torch.load(disc_chk)
        netD = QueryGAN_Discriminator_CNN(disc_params['args'], glove_vocab,
                                          glove_emb, 2)
        netD.load_state_dict(disc_params['state_dict'])

        if epoch == 0:
            epoch = disc_params['epoch']

        if netD.args['optim'] == 'adam':
            netD.optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                               netD.parameters()),
                                        betas=(netD.args['beta_1'], 0.999),
                                        lr=netD.args['lr'],
                                        weight_decay=netD.args['wd'])
        elif netD.args['optim'] == 'adagrad':
            netD.optimizer = optim.Adagrad(filter(lambda p: p.requires_grad,
                                                  netD.parameters()),
                                           lr=netD.args['lr'],
                                           weight_decay=netD.args['wd'])
        elif netD.args['optim'] == 'sgd':
            netD.optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                              netD.parameters()),
                                       lr=netD.args['lr'],
                                       weight_decay=netD.args['wd'])

        netD.optimizer.load_state_dict(disc_params['optimizer'])
    else:
        netD = disc_chk

    rollout = Rollout(rollout_num=rollout_num, vocab=glove_vocab)

    print('\n#####################################################')
    print('Restarting adversarial training from epoch {}...\n'.format(epoch))

    for ep in range(epoch, total_epochs):
        for step in range(g_steps):
            # train generator
            hyps, states, examples = generate_samples(netG,
                                                      generated_num,
                                                      parser,
                                                      gan_args,
                                                      oracle=True)
            # hyps, examples = list(zip(*samples))
            step_begin = time.time()
            pgloss = netG.pgtrain(hyps, states, examples, rollout, netD)
            print('[Generator {}]  step elapsed {}s'.format(
                step,
                time.time() - step_begin))
            print('Generator adversarial loss={}, epoch={}'.format(
                pgloss, epoch))
            generator_losses.append(pgloss)

        for d_step in range(d_steps):
            # train discriminator
            generate_samples(netG,
                             generated_num,
                             parser,
                             gan_args,
                             writeout=True)
            real_set = DiscriminatorDataset(netD.args['data'],
                                            fake=False,
                                            vocab=glove_vocab,
                                            limit=generated_num)
            fake_set = DiscriminatorDataset(netG.args['sample_dir'],
                                            fake=True,
                                            vocab=glove_vocab)

            for k_step in range(k_steps):
                loss_r = netD.train_single_code(real_set)
                print(
                    'D_step {}, K-step {} Discriminator loss on real set: {}'.
                    format(d_step + 1, k_step + 1, loss_r))
                loss_f = netD.train_single_code(fake_set)
                print(
                    'D_step {}, K-step {} Discriminator loss on fake set: {}'.
                    format(d_step + 1, k_step + 1, loss_f))
                discriminator_losses.append((loss_r + loss_f) / 2)

        save_progress(netD, netG, examples, ep, discriminator_losses,
                      generator_losses)
        discriminator_losses = []
        generator_losses = []
Ejemplo n.º 5
0
def run(cfg_dict):
    # Set up model and training parameters based on config file and runtime
    # arguments

    args = cfg_dict['ARGS']

    if args['test']:
        test_system(cfg_dict, args['gen_chk'])

    if args['continue']:
        continue_training(cfg_dict, args['gen_chk'], args['disc_chk'])

    gargs = cfg_dict['GENERATOR']
    dargs = cfg_dict['DISCRIMINATOR']
    gan_args = cfg_dict['GAN']

    seed = gan_args['seed']
    #batch_size = gan_args['batch_size']
    total_epochs = gan_args['total_epochs']
    generated_num = gan_args['generated_num']
    #vocab_size = gan_args['vocab_size']
    #seq_len = gan_args['sequence_len']

    # rollout params
    #rollout_update_rate = gan_args['rollout_update_rate']
    rollout_num = gan_args['rollout_num']

    g_steps = gan_args['g_steps']
    d_steps = gan_args['d_steps']
    k_steps = gan_args['k_steps']

    use_cuda = args['cuda']

    if not torch.cuda.is_available():
        print("No GPU available, running on CPU.")
        use_cuda = False

    gargs['cuda'] = use_cuda
    gargs['verbose'] = gan_args['verbose']
    dargs['cuda'] = use_cuda
    dargs['device'] = "cuda" if use_cuda else "cpu"
    dargs['verbose'] = gan_args['verbose']

    random.seed(seed)
    np.random.seed(seed)

    if gan_args['verbose']:
        print("Initializing Stanford Parser...")

    parser = StanfordParser(annots="tokenize ssplit")

    # Load input files for Generator: grammar and transition system, vocab,
    # word embeddings

    if gan_args['verbose']:
        print("Checking for existing grammar...")

    if gargs['grammar']:
        grammar = deserialize_from_file(gargs['grammar'])
    else:
        grammar = None

    glove_vocab, glove_emb = load_word_vectors(
        os.path.join(gan_args['glove_dir'], gan_args['glove_file']),
        lowercase=gan_args['glove_lower'])

    if gan_args['verbose']:
        print("Generating training dataset and grammar...")

    samples_data, prim_vocab, grammar = English.generate_dataset(
        gargs['annot_file'], gargs['texts_file'], grammar)
    transition_system = EnglishTransitionSystem(grammar)

    if gan_args['verbose']:
        print("Grammar and language transition system initiated.")

    # Build Generator model

    netG = Parser(gargs, glove_vocab, prim_vocab, transition_system)
    optimizer_cls = eval('torch.optim.%s' % gargs['optimizer'])
    netG.optimizer = optimizer_cls(netG.parameters(),
                                   lr=gargs['lr'],
                                   betas=(gargs['beta_1'], 0.999),
                                   weight_decay=gargs['lr_decay'])

    if gargs['uniform_init']:
        if gan_args['verbose']:
            print('uniformly initialize parameters [-{}, +{}]'.format(
                gargs['uniform_init'], gargs['uniform_init']))
        nn_utils.uniform_init(-gargs['uniform_init'], gargs['uniform_init'],
                              netG.parameters())
    elif gargs['glorot_init']:
        if gan_args['verbose']:
            print('use glorot initialization')
        nn_utils.glorot_init(netG.parameters())
    elif gargs['kaiming_init']:
        if gan_args['verbose']: print('use kaiming initialization')
        nn_utils.kaiming_init(netG.parameters())

    if gan_args['verbose']:
        print("Loading GloVe vectors as Generator embeddings...")

    load_to_layer(netG.src_embed, glove_emb, glove_vocab)
    load_to_layer(netG.primitive_embed, glove_emb, glove_vocab, prim_vocab)

    if gargs['cuda']:
        netG.cuda()
        netG.optimizer.cuda()

    # Set up Discriminator component with given parameters

    if gan_args['verbose']:
        print("Loading Discriminator component...")
    dargs['vocab_size'] = glove_vocab.size()

    netD = QueryGAN_Discriminator_CNN(dargs, glove_vocab, glove_emb,
                                      2)  # CNN classifier

    #
    # PRETRAIN GENERATOR & DISCRIMINATOR
    #

    if gan_args['verbose']:
        print('\nPretraining generator...\n')
    # Pre-train epochs are set in config.cfg file
    netG.pretrain(Dataset(samples_data))
    rollout = Rollout(rollout_num=rollout_num, vocab=glove_vocab)

    # pretrain discriminator
    if gan_args['verbose']:
        print('Loading Discriminator pretraining dataset.')
    dis_set = MULTIVACDataset(netD.args['data'], glove_vocab)

    y_onehot = torch.zeros(dis_set.size, 2)
    y_onehot.scatter_(1, dis_set.labels.long().unsqueeze(1), 1)

    dis_set.labels = y_onehot

    maxlen = 150  # to match CNN classifier architecture
    sents = torch.full((dis_set.size, maxlen), dis_set.vocab.pad)

    for i, s in enumerate(dis_set.sentences):
        sents[i, :len(s)] = s[:150]

    dis_set.sentences = sents.long()

    if gan_args['verbose']:
        print("Pretraining discriminator...")

    # for i in tqdm(range(k_steps), desc='Pretraining discriminator ... '):
    for epoch in range(k_steps):
        loss = netD.train_single_code(dis_set)
        print('Epoch {} pretrain discriminator training loss: {}'.format(
            epoch + 1, loss))

    save_progress(netD, netG, [], -1, [], [])

    #
    # ADVERSARIAL TRAINING
    #

    print('\n#####################################################')
    print('Adversarial training...\n')

    for epoch in range(total_epochs):
        discriminator_losses = []
        generator_losses = []

        for step in range(g_steps):
            # train generator
            hyps, states, examples = generate_samples(netG,
                                                      generated_num,
                                                      parser,
                                                      gan_args,
                                                      oracle=True)
            step_begin = time.time()

            pgloss = netG.pgtrain(hyps, states, examples, rollout, netD)
            print('[Generator {}]  step elapsed {}s'.format(
                step,
                time.time() - step_begin))
            print('Generator adversarial loss={}, epoch={}'.format(
                pgloss, epoch))
            generator_losses.append(pgloss)

        for d_step in range(d_steps):
            # train discriminator
            generate_samples(netG,
                             generated_num,
                             parser,
                             gan_args,
                             writeout=True)
            real_set = DiscriminatorDataset(netD.args['data'],
                                            fake=False,
                                            vocab=glove_vocab,
                                            limit=generated_num)
            fake_set = DiscriminatorDataset(netG.args['sample_dir'],
                                            fake=True,
                                            vocab=glove_vocab)

            for k_step in range(k_steps):
                loss_r = netD.train_single_code(real_set)
                print(
                    'D_step {}, K-step {} Discriminator loss on real set: {}'.
                    format(d_step + 1, k_step + 1, loss_r))
                loss_f = netD.train_single_code(fake_set)
                print(
                    'D_step {}, K-step {} Discriminator loss on fake set: {}'.
                    format(d_step + 1, k_step + 1, loss_f))
                discriminator_losses.append((loss_r + loss_f) / 2)

        save_progress(netD, netG, examples, epoch, discriminator_losses,
                      generator_losses)
Ejemplo n.º 6
0
class Rollout(object):
    def __init__(self, rollout_num, vocab):
        #self.new_net = copy.deepcopy(net)
        self.vocab = vocab
        self.tokenizer = Tokenizer(
            Vocab(strings=list(vocab.labelToIdx.keys())))
        self.rollout_num = rollout_num
        self.parser = StanfordParser(annots='tokenize')

    def hyp_to_parse(self, hyp, vocab):
        if isinstance(hyp, str):
            text = hyp
        else:
            text = asdl_ast_to_english(hyp.tree)

        parse = self.parser.get_parse(text)['sentences']

        if len(parse) > 0:
            tokens = [x['word'] for x in parse[0]['tokens']]
            deps = sorted(parse[0]['basicDependencies'],
                          key=lambda x: x['dependent'])
            parents = [x['governor'] for x in deps]
            tree = MULTIVACDataset.read_tree(parents)
            inp = torch.tensor(vocab.convertToIdx(tokens, '<unk>'),
                               dtype=torch.long,
                               device='cpu')
        else:
            tree = Tree()
            inp = torch.tensor([])

        return tree, inp

    def parse_tokens(self, tree):
        text = asdl_ast_to_english(tree)
        tokens = [x.text for x in self.tokenizer(text)]
        result = torch.tensor(self.vocab.convertToIdx(tokens, '<unk>'),
                              dtype=torch.long,
                              device='cpu')
        return result

    @staticmethod
    def parse_to_trees(parses, vocab):
        results = [''] * len(parses)

        for idx, parse in enumerate(parses):
            tokens = [x['word'] for x in parse['tokens']]
            deps = sorted(parse['basicDependencies'],
                          key=lambda x: x['dependent'])
            parents = [x['governor'] for x in deps]
            tree = MULTIVACDataset.read_tree(parents)
            results[idx] = (tree,
                            torch.tensor(vocab.convertToIdx(tokens, '<unk>'),
                                         dtype=torch.long,
                                         device='cpu'))

        return results

    @staticmethod
    def ffwd_hyp(hyp, j):
        new_hyp = DecodeHypothesis()

        for i in range(j):
            if i < len(hyp.action_infos):
                new_hyp.apply_action_info(hyp.action_infos[i])

        return new_hyp

    def get_tree_reward(self,
                        hyps,
                        states,
                        examples,
                        netG,
                        netD,
                        vocab,
                        verbose=False):
        batch_size = len(hyps)
        src_sents = [e.src_sent for e in examples]
        rewards = []
        max_action_len = max([len(hyp.actions) for hyp in hyps])

        netD.eval()

        for i in range(self.rollout_num):
            if verbose: print("Rollout step {}".format(i))

            samples = [[0] * batch_size] * max_action_len
            inputs = [[0] * batch_size] * max_action_len
            # texts   = [[0] * batch_size] * max_action_len

            for j in tqdm(range(1, max_action_len)):
                for n in range(batch_size):
                    src = src_sents[n]
                    hyp = Rollout.ffwd_hyp(hyps[n], j)
                    state = states[n][:j]
                    samples[j - 1][n] = netG.sample(src, hyp, state)

            if verbose:
                print("Samples generated of shape "
                      "({},{})".format(max_action_len, batch_size))

            for x in tqdm(range(max_action_len), "Translating trees..."):
                for h, hyp in enumerate(samples[x]):
                    inputs[x][h] = self.parse_tokens(hyp.tree)

            for j in range(max_action_len):
                samps = torch.full((len(inputs[j]), 150), vocab.pad)

                for idx, x in enumerate(inputs[j]):
                    samps[idx, :len(x)] = x[:150]

                x = samps.long().to(netD.args['device'])
                out = netD(x).softmax(dim=-1).data[:, 1].numpy()

                if i == 0:
                    rewards.append(out)
                else:
                    rewards[j] += out

            originals = [self.parse_tokens(hyp.tree) for hyp in hyps]

            for j in tqdm(
                    range(batch_size),
                    desc="Rating action step {}...".format(max_action_len)):
                samps = torch.full((len(originals), 150), vocab.pad)

                for idx, x in enumerate(originals):
                    samps[idx, :len(x)] = x[:150]

                x = samps.long().to(netD.args['device'])
                out = netD(x).softmax(dim=-1).data[:, 1].numpy()

            if i == 0:
                rewards.append(out)
            else:
                rewards[-1] += out

        rewards = np.array(rewards) / (1.0 * self.rollout_num)

        return rewards
Ejemplo n.º 7
0
def extract_grammar(source_file,
                    output=None,
                    clean=False,
                    verbose=False,
                    asdl=False):
    parse_trees = list()

    if asdl:
        parse_func = english_ast_to_asdl_ast
    else:
        parse_func = get_eng_tree

    parser = StanfordParser(annots="tokenize ssplit parse")

    with open(source_file, 'r') as f:
        queries = f.readlines()

    if clean:
        queries = clean_queries(queries, verbose)

    if verbose:
        print("Performing constituency parsing of queries")

    for i, q in enumerate(queries):
        if len(q) > 0:
            try:
                query = stanford_parse(parser, q)
            except Exception:
                print('Could not parse query {}: "{}"'.format(i, q))
                continue

        if check_parse(query):
            try:
                parse_trees.append(parse_func(query.parse_string))
            except Exception:
                print(("Could not interpret query parse {}: '{}'".format(
                    i, query)))
                continue

        if i % 100 == 0:
            print("{} queries processed.".format(i))

    if verbose:
        print(("{} queries successfully parsed.".format(len(parse_trees))))
        print("Extracting grammar production rules.")

    if asdl:
        productions = set()

        for parse_tree in parse_trees:
            productions.update(parse_tree.get_productions())

        grammar = EnglishASDLGrammar(productions=productions)
    else:
        rules = set()

        for parse_tree in parse_trees:
            parse_tree_rules, _ = parse_tree.get_productions()

            for rule in parse_tree_rules:
                rules.add(rule)

        rules = list(sorted(rules, key=lambda x: x.__repr__()))
        grammar = EnglishGrammar(rules)

    if verbose:
        print("Grammar induced successfully.")

    if output is not None:
        with open(output, 'wb') as f:
            pickle.dump(grammar, f)
    else:
        return grammar, parse_trees
Ejemplo n.º 8
0
        description='Preprocessing of MULTIVAC data for QueryGAN '
        'discriminator training.')
    # data arguments
    parser.add_argument('-d',
                        '--data',
                        required=False,
                        help='Path to source dataset.')

    args = vars(parser.parse_args())

    print('=' * 80)
    print('Preprocessing MULTIVAC dataset')
    print('=' * 80)

    base_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
    data_dir = os.path.join(base_dir, 'data')
    multivac_dir = os.path.join(data_dir, 'multivac')

    prs = StanfordParser(annots='tokenize')

    split(os.path.join(multivac_dir, 'extracted_questions_labels.txt'),
          multivac_dir)
    gen_tokens(os.path.join(multivac_dir, 'text.txt'), prs)

    # get vocabulary
    build_vocab(glob.glob(os.path.join(multivac_dir, '*/*.toks')),
                os.path.join(multivac_dir, 'vocab.txt'))
    build_vocab(glob.glob(os.path.join(multivac_dir, '*/*.toks')),
                os.path.join(multivac_dir, 'vocab-cased.txt'),
                lowercase=False)
Ejemplo n.º 9
0
def run(args_dict):
    # setup
    timestamp = datetime.now().strftime('%d%b%Y-%H:%M:%S')
    verbose = args_dict['verbose']

    threshold = float(args_dict['threshold'])
    num_top_rel = args_dict['num_top_rel']

    # check if output directory exists
    if not os.path.isdir(args_dict['out']):
        os.mkdir(args_dict['out'])

    # instantiate connection to OpenKE
    con = config.Config()

    # set global parameters
    if args_dict['dir'].endswith(os.path.sep):
        con.set_in_path(args_dict['dir'])
    else:
        con.set_in_path(args_dict['dir'] + os.path.sep)
    con.set_work_threads(8)
    con.set_dimension(100)

    # fit run-determined parameters
    if 'fit' in args_dict['run']:
        traintimes = int(args_dict['traintimes'])
        alpha = float(args_dict['alpha'])
        nbatches = int(args_dict['nbatches'])

        con.set_train_times(traintimes)
        con.set_nbatches(nbatches)
        con.set_alpha(alpha)
        con.set_margin(1.0)
        con.set_bern(0)
        con.set_ent_neg_rate(1)
        con.set_rel_neg_rate(0)
        con.set_opt_method("SGD")
        con.set_export_files(
            os.path.join(args_dict['out'],
                         'model.vec.{}.tf'.format(timestamp)), 0)

        # save out model parameters
        con.set_out_files(
            os.path.join(args_dict['out'],
                         'embedding.vec.{}.json'.format(timestamp)))
    else:
        con.set_test_link_prediction(True)
        con.set_test_triple_classification(True)

        files = glob.glob(os.path.join(args_dict['out'], '*tf*'))
        if not files:
            raise Exception('No models to predict on; generate one first.')
        else:
            if verbose:
                print("Loading files...")

            times = list(set([file.split('.')[2] for file in files]))
            ifile = max([
                datetime.strptime(x, '%d%b%Y-%H:%M:%S') for x in times
            ]).strftime('%d%b%Y-%H:%M:%S')
            con.set_import_files(
                os.path.join(args_dict['out'],
                             'model.vec.{}.tf'.format(ifile)))
            args_dict.update({'timestamp': ifile})

    # initialize settings
    if verbose:
        print("Initializing OpenKE system...")

    con.init()

    # set knowledge embedding model
    if verbose:
        print("Setting model...")
    kem = set_model_choice(args_dict['model'])
    con.set_model(kem)

    # determine action
    if 'fit' in args_dict['run']:
        # model training
        con.run()
    else:
        if verbose:
            print("Beginning predictions...")

        # predict objects
        if not args_dict['search']:
            raise Exception('You need to provide a search term.')
        else:
            annots = "tokenize ssplit pos depparse natlog openie ner coref",
            props = {
                "openie.triple.strict": "true",
                "openie.openie.resolve_coref": "true"
            }

            parser = StanfordParser(annots=annots, props=props)

            # glove = loadGloveModel(args_dict['glove'], verbose)
            glove_vocab, glove_emb = load_word_vectors(args_dict['glove'])

            # identify files for use
            files = [x for x in os.listdir(con.in_path) if '2id' in x]
            rel_file = get_newest_file(con.in_path, files, 'relation')
            ent_file = get_newest_file(con.in_path, files, 'entity')
            trn_file = get_newest_file(con.in_path, files, 'train')

            entities = pd.read_csv(ent_file,
                                   sep='\t',
                                   names=["Ent", "Id"],
                                   skiprows=1)
            relations = pd.read_csv(rel_file,
                                    sep='\t',
                                    names=["Rel", "Id"],
                                    skiprows=1)
            train = pd.read_csv(trn_file,
                                sep='\t',
                                names=["Head", "Tail", "Relation"],
                                skiprows=1)

            if os.path.exists(args_dict['search']):
                queries = pd.read_csv(args_dict['search'])

                parse = lambda z: stanford_parse(parser, z, sub_rdfs=True
                                                 ).get_rdfs(use_tokens=False,
                                                            how='longest')
                triples = queries.Query.apply(parse)

                results = triples.apply(lambda x: get_answers(
                    con, x, glove_vocab, glove_emb, entities, relations,
                    num_top_rel, threshold))
                queries['results'] = results
                queries.to_csv(os.path.join(args_dict['dir'],
                                            "query_results.csv"),
                               index=False)
            else:
                predicted_object(con,
                                 args_dict['search'],
                                 num_top_rel,
                                 threshold=threshold)