def load_data(opt, model='train'): data = pd.read_csv('./data/java/deep_with_ast/java_with_ast_0.csv') # build_dict(data) meths = load_dataframe(data, 'method_name') dict_meth = Dict(lddict='./data/java/deep_with_ast/dict/meth_dict.pkl') tokens = load_dataframe(data, 'tokens') dict_token = Dict(lddict='./data/java/deep_with_ast/dict/token_dict.pkl') apis = load_dataframe(data, 'api_seq') dict_api = Dict(lddict='./data/java/deep_with_ast/dict/api_dict.pkl') comments = load_dataframe(data, 'desc') dict_comment = Dict( lddict='./data/java/deep_with_ast/dict/comment_dict.pkl') dict_ast = Dict(lddict='./data/java/deep_with_ast/dict/ast_dict.pkl') meth_data = [dict_meth.convertToIdx(meth, length=6) for meth in meths] token_data = [ dict_token.convertToIdx(token, length=50) for token in tokens ] api_data = [dict_api.convertToIdx(api, length=30) for api in apis] comment_data = [ dict_comment.convertToIdx(comment, length=30) for comment in comments ] deepcs_dataset = DeepCSDateSet(meth_data, token_data, api_data, comment_data) tree_json = parse_ast(data) ast_data = get_tree_dataset(tree_json, dict_ast) if torch.cuda.is_available(): device = torch.device('gpu') else: device = torch.device('cpu') tree_loader = DataLoader(dataset=ast_data, batch_size=opt.batchsz, collate_fn=batcher(device), shuffle=False, num_workers=2) loader = DataLoader(dataset=deepcs_dataset, batch_size=opt.batchsz, shuffle=False, num_workers=2, collate_fn=my_collate) wrapDataLoader = WrapDataLoader(loader, tree_loader) NamedDict = namedtuple( 'NamedDict', ['meth_name', 'tokens', 'api_seq', 'description', 'ast']) all_dict = NamedDict(dict_meth, dict_token, dict_api, dict_comment, dict_ast) if model == 'train': return wrapDataLoader, all_dict if model == 'n_query': code_body = data['original_string'].tolist() func_name = data['func_name'].tolist() return wrapDataLoader, all_dict, code_body, func_name
def load_dict(opt): dict_code = Dict([ data.Constants.PAD_WORD, data.Constants.UNK_WORD, data.Constants.BOS_WORD, data.Constants.EOS_WORD ], lower=opt.lower) dict_comment = Dict([ data.Constants.PAD_WORD, data.Constants.UNK_WORD, data.Constants.BOS_WORD, data.Constants.EOS_WORD ], lower=opt.lower) dict_code.loadFile(opt.dict_code) dict_comment.loadFile(opt.dict_comment) return_dict = [dict_code, dict_comment] if opt.dataset_type == "c": dict_leaves = Dict([ data.Constants.PAD_WORD, data.Constants.UNK_WORD, data.Constants.BOS_WORD, data.Constants.EOS_WORD ], lower=opt.lower) dict_leaves.loadFile(opt.ast_tree_leaves_dict) return_dict.append(dict_leaves) return return_dict
def __init__(self): unittest.TestCase.__init__(self) xiao_iron = 'E:/xt/xtcontract/xironbackend/dataconfig/' self.run_method = RunMethod() self.data = GetData(xiao_iron + 'interfacebar1.xlsx', 8) self.send_mai = SendEmail() self.read_int = ReadIni() self.statistic = Dict() self.excel_prop = Dict()
def dict_to_object(dict_obj): if not isinstance(dict_obj, dict): return dict_obj inst = Dict() for k, v in dict_obj.items(): inst[k] = CommonUtil.dict_to_object(v) return inst
def dict_to_object(dict_obj): if type(dict_obj) is list: for item in dict_obj: if not isinstance(item, dict): # return item dict_obj.append(item) continue inst = Dict() for k, v in item.items(): inst[k] = dict_to_object(v) dict_obj.append(inst) return dict_obj
def build_dict(data): tree_json = parse_ast(data) dict_tree = Dict(data=tree_json, istree=True) dict_tree.writeFile('./data/java/deep_with_ast/ast_dict.pkl') print('ast_dict was builded!!!') meth_name = load_dataframe(data, 'method_name') dict_meth = Dict(data=meth_name) dict_meth.writeFile('./data/java/deep_with_ast/meth_dict.pkl') print('meth_dict was builded!!!') tokens = load_dataframe(data, 'tokens') dict_token = Dict(data=tokens) dict_token.writeFile('./data/java/deep_with_ast/token_dict.pkl') print('token_dict was builded!!!') apis = load_dataframe(data, 'api_seq') dict_api = Dict(data=apis) dict_api.writeFile('./data/java/deep_with_ast/api_dict.pkl') print('api_dict was builded!!!') comments = load_dataframe(data, 'desc') dict_comment = Dict(data=comments) dict_comment.writeFile('./data/java/deep_with_ast/comment_dict.pkl') print('comment_dict was builded!!!')
ret.from_networkx(g, node_attrs=['x', 'y', 'mask']) return ret def batcher(dev): def batcher_dev(batch): batch_trees = dgl.batch(batch) return SSTBatch(graph=batch_trees, mask=batch_trees.ndata['mask'].to(dev), wordid=batch_trees.ndata['x'].to(dev), label=batch_trees.ndata['y'].to(dev)) return batcher_dev if __name__ == '__main__': data = pd.read_csv('../data/java/temp/mini_train.csv') tree = data['ast'].tolist() tree_json = [json.loads(item) for item in tree] dict_tree = Dict(data=tree_json, istree=True) ret = get_tree_dataset(tree_json, dict_tree) device = torch.device('cpu') train_loader = DataLoader(dataset=ret, batch_size=5, collate_fn=batcher(device), shuffle=False, num_workers=0) for i in train_loader: print()