Beispiel #1
0
def write_embedding_table_dataset_end(data_type):
    datapath = '/home/DATA/TabFact'
    sent_data, table_ids, table_data = read_dataset(datapath, data_type)
    num_data = len(sent_data)
    table_data_part = []
    for i in range(90112, 90233):
        table_data_part.append(table_data["table"][i])
    Encoding_table(table_data_part, 'train', '512')
Beispiel #2
0
def write_labels(data_type):
    datapath = '/home/DATA/TabFact'
    sent_data, table_ids, table_data = read_dataset(datapath, data_type)
    num_data = len(sent_data)
    labels = []
    for i in range(num_data):
        labels.append(sent_data[i][-1])
    with open(
            datapath +
            '/processed_datasets/embedding_data/{}_label.json'.format(
                data_type), 'w') as f:
        json.dump(labels, f)
    f.close()
Beispiel #3
0
def write_embedding_column_dataset(data_type, id):
    datapath = '/home/DATA/TabFact'
    sent_data, table_ids, table_data = read_dataset(datapath, data_type)
    num_data = len(sent_data)
    column_data_part = []
    for i in range(176 * id, 176 * (id + 1)):
        column_data_part.append(table_data["column"][i])
    '''
    tabfact_dataset = TabFactDataset_with_no_edge_weight(sent_data_part, table_data_part)
    TabFactDataLoader = (tabfact_dataset)
    data = Encoding(TabFactDataLoader, data_type)
    #torch.save(data, datapath+"/processed_datasets/embedding_data/{}.pt".format(data_type))
    '''
    Encoding_column(column_data_part, 'train', id)