def write_embedding_table_dataset_end(data_type): datapath = '/home/DATA/TabFact' sent_data, table_ids, table_data = read_dataset(datapath, data_type) num_data = len(sent_data) table_data_part = [] for i in range(90112, 90233): table_data_part.append(table_data["table"][i]) Encoding_table(table_data_part, 'train', '512')
def write_labels(data_type): datapath = '/home/DATA/TabFact' sent_data, table_ids, table_data = read_dataset(datapath, data_type) num_data = len(sent_data) labels = [] for i in range(num_data): labels.append(sent_data[i][-1]) with open( datapath + '/processed_datasets/embedding_data/{}_label.json'.format( data_type), 'w') as f: json.dump(labels, f) f.close()
def write_embedding_column_dataset(data_type, id): datapath = '/home/DATA/TabFact' sent_data, table_ids, table_data = read_dataset(datapath, data_type) num_data = len(sent_data) column_data_part = [] for i in range(176 * id, 176 * (id + 1)): column_data_part.append(table_data["column"][i]) ''' tabfact_dataset = TabFactDataset_with_no_edge_weight(sent_data_part, table_data_part) TabFactDataLoader = (tabfact_dataset) data = Encoding(TabFactDataLoader, data_type) #torch.save(data, datapath+"/processed_datasets/embedding_data/{}.pt".format(data_type)) ''' Encoding_column(column_data_part, 'train', id)