Example #1
0
def save_2index():
    grammar = Grammar('sql_simple_transition_2.bnf')
    #this shouldn't be necessary but it is
    grammar2 = Grammar('sql_simple_transition_2.bnf')

    sellist_gram = grammar.get_subgrammar('<sellist>')
    assert '<modifyop>' in grammar.gram_keys
    modifyopgram = grammar2.get_subgrammar('<modifyop>')
    dontuse = ['<start>']
    db_dict = []
    _, gram2ind = get_grammar2index(sellist_gram, db_dict, dontuse=dontuse)
    with open('grammar2index_grammar_2_sellist.json', 'w') as f:
        json.dump(gram2ind, f)
    _, gram2ind = get_grammar2index(modifyopgram, db_dict, dontuse=dontuse)
    with open('grammar2index_grammar_2_modifyop.json', 'w') as f:
        json.dump(gram2ind, f)
    _, w2ind = get_word2index(grammar, db_dict, dontuse=dontuse)
    with open('terminals2index_grammar_2.json', 'w') as f:
        json.dump(w2ind, f)
    with open('spider_tables_lowercase.json', 'r') as f:
        spider_db = json.loads(f.read())
    tab2ind = {}
    col2ind = {}
    allcols_db2ind = {}
    for db in spider_db.keys():
        dbp = db.lower()
        tab2ind[dbp] = {
            tab.lower(): i
            for i, tab in enumerate(spider_db[db].keys())
        }
        all_cols = list(
            set([
                c.lower() for t in spider_db[db].keys()
                for c in spider_db[db][t]
            ]))
        allcols_db2ind[dbp] = {c.lower(): i for i, c in enumerate(all_cols)}
        col2ind[dbp] = {}
        for tab in spider_db[db].keys():
            tabp = tab.lower()
            col2ind[dbp][tabp] = {
                col.lower(): i
                for i, col in enumerate(spider_db[db][tab])
            }
    with open('spider_tab2index.json', 'w') as f:
        json.dump(tab2ind, f)
    with open('spider_col2index.json', 'w') as f:
        json.dump(col2ind, f)

    with open('spider_db_cols2ind.json', 'w') as f:
        json.dump(allcols_db2ind, f)
Example #2
0
class AugmentedDataLoader(DataLoader):
    def __init__(self,
                 dataset,
                 filen,
                 batch_size=1,
                 shuffle=False,
                 sampler=None,
                 batch_sampler=None,
                 num_workers=0,
                 collate_fn=default_collate,
                 pin_memory=False,
                 drop_last=False,
                 timeout=0,
                 worker_init_fn=None):
        super().__init__(dataset, batch_size, shuffle, sampler, batch_sampler,
                         num_workers, collate_fn, pin_memory, drop_last,
                         timeout, worker_init_fn)

        self.grammar = SimpleGrammar(filen)
        self.database_calls

    def get_choices_key(self, key):
        return self.grammar.gr[key]['items']

    def get_values_terminal(self, terminal):
        if terminal in resolve_dict:
            pass
        else:
            lst = self.grammar.gr[terminal]
            assert len(lst) == 1
            return lst[0]

    def is_terminal_on_path(self, tok, terminal):
        return self.grammar.from_terminal_to_token(tok, terminal)
Example #3
0
    def __init__(self,
                 dataset,
                 filen,
                 batch_size=1,
                 shuffle=False,
                 sampler=None,
                 batch_sampler=None,
                 num_workers=0,
                 collate_fn=default_collate,
                 pin_memory=False,
                 drop_last=False,
                 timeout=0,
                 worker_init_fn=None):
        super().__init__(dataset, batch_size, shuffle, sampler, batch_sampler,
                         num_workers, collate_fn, pin_memory, drop_last,
                         timeout, worker_init_fn)

        self.grammar = SimpleGrammar(filen)
        self.database_calls
Example #4
0
class AugmentedDataset:  #(Dataset):
    def __init__(self, jsonfile, grammar_file):
        self.grammar = SimpleGrammar(grammar_file)
        self.grammar_terminals = self.grammar.get_terminal_toks()
        with open(jsonfile, 'r') as f:
            self.data = json.loads(f.read())

    def augment_data(self, test_string):
        to_learn = self.grammar.learn_

    def test(self):
        counter = 0
        not_resolved = 0
        kk = [k for k in self.data.keys()]
        for k in kk:
            string = self.data[k]['sql']
            val, reas = self.grammar.check_string_tokens(string, verbose=True)
            if not val:
                counter += 1
                if reas == 'res':
                    not_resolved += 1

        print('{} out of {} are errors, {} are with resolution'.format(
            counter, len(kk), not_resolved))
Example #5
0
 def __init__(self, jsonfile, grammar_file):
     self.grammar = SimpleGrammar(grammar_file)
     self.grammar_terminals = self.grammar.get_terminal_toks()
     with open(jsonfile, 'r') as f:
         self.data = json.loads(f.read())
Example #6
0
    def __init__(self, filen:str):
        grammar = SimpleGrammar(filen)

        self.grammar = grammar
        self.setup_conditions()
Example #7
0
            if tok == key:
                matched = True
            elif tok in tables[key]:
                matched = True
            if matched:
                break
        if not matched:
            toks_not_in.append(tok)
    return set(toks_not_in)


def iterate_through_data_tables(grammar, data, tables):
    for k in data.keys():
        toks = rem_terminals_tokenize_string(grammar, data[k]['sql'])
        toks = all_toks_not_matched(toks, tables)
        print('{} unmatched tokens in {}'.format(len(toks), data[k]['sql']))
        print(toks)


if __name__ == '__main__':
    jsonfile = '/home/jq/software/triplet-all/spider_train_subset.json'

    with open(jsonfile, 'r') as f:
        data = json.loads(f.read())

    with open('spider_tables.json', 'r') as f:
        spider_tables = json.loads(f.read())

    g = SimpleGrammar('sql_simple_transition.bnf')
    iterate_through_data_tables(g, data, spider_tables)