Example #1
0
def load_data(opt, model='train'):
    data = pd.read_csv('./data/java/deep_with_ast/java_with_ast_0.csv')
    # build_dict(data)
    meths = load_dataframe(data, 'method_name')
    dict_meth = Dict(lddict='./data/java/deep_with_ast/dict/meth_dict.pkl')
    tokens = load_dataframe(data, 'tokens')
    dict_token = Dict(lddict='./data/java/deep_with_ast/dict/token_dict.pkl')
    apis = load_dataframe(data, 'api_seq')
    dict_api = Dict(lddict='./data/java/deep_with_ast/dict/api_dict.pkl')
    comments = load_dataframe(data, 'desc')
    dict_comment = Dict(
        lddict='./data/java/deep_with_ast/dict/comment_dict.pkl')

    dict_ast = Dict(lddict='./data/java/deep_with_ast/dict/ast_dict.pkl')

    meth_data = [dict_meth.convertToIdx(meth, length=6) for meth in meths]
    token_data = [
        dict_token.convertToIdx(token, length=50) for token in tokens
    ]
    api_data = [dict_api.convertToIdx(api, length=30) for api in apis]
    comment_data = [
        dict_comment.convertToIdx(comment, length=30) for comment in comments
    ]
    deepcs_dataset = DeepCSDateSet(meth_data, token_data, api_data,
                                   comment_data)

    tree_json = parse_ast(data)
    ast_data = get_tree_dataset(tree_json, dict_ast)

    if torch.cuda.is_available():
        device = torch.device('gpu')
    else:
        device = torch.device('cpu')

    tree_loader = DataLoader(dataset=ast_data,
                             batch_size=opt.batchsz,
                             collate_fn=batcher(device),
                             shuffle=False,
                             num_workers=2)

    loader = DataLoader(dataset=deepcs_dataset,
                        batch_size=opt.batchsz,
                        shuffle=False,
                        num_workers=2,
                        collate_fn=my_collate)

    wrapDataLoader = WrapDataLoader(loader, tree_loader)

    NamedDict = namedtuple(
        'NamedDict', ['meth_name', 'tokens', 'api_seq', 'description', 'ast'])
    all_dict = NamedDict(dict_meth, dict_token, dict_api, dict_comment,
                         dict_ast)

    if model == 'train':
        return wrapDataLoader, all_dict

    if model == 'n_query':
        code_body = data['original_string'].tolist()
        func_name = data['func_name'].tolist()
        return wrapDataLoader, all_dict, code_body, func_name
Example #2
0
def load_dict(opt):
    dict_code = Dict([
        data.Constants.PAD_WORD, data.Constants.UNK_WORD,
        data.Constants.BOS_WORD, data.Constants.EOS_WORD
    ],
                     lower=opt.lower)

    dict_comment = Dict([
        data.Constants.PAD_WORD, data.Constants.UNK_WORD,
        data.Constants.BOS_WORD, data.Constants.EOS_WORD
    ],
                        lower=opt.lower)
    dict_code.loadFile(opt.dict_code)

    dict_comment.loadFile(opt.dict_comment)
    return_dict = [dict_code, dict_comment]
    if opt.dataset_type == "c":
        dict_leaves = Dict([
            data.Constants.PAD_WORD, data.Constants.UNK_WORD,
            data.Constants.BOS_WORD, data.Constants.EOS_WORD
        ],
                           lower=opt.lower)
        dict_leaves.loadFile(opt.ast_tree_leaves_dict)
        return_dict.append(dict_leaves)

    return return_dict
Example #3
0
 def __init__(self):
     unittest.TestCase.__init__(self)
     xiao_iron = 'E:/xt/xtcontract/xironbackend/dataconfig/'
     self.run_method = RunMethod()
     self.data = GetData(xiao_iron + 'interfacebar1.xlsx', 8)
     self.send_mai = SendEmail()
     self.read_int = ReadIni()
     self.statistic = Dict()
     self.excel_prop = Dict()
Example #4
0
 def dict_to_object(dict_obj):
     if not isinstance(dict_obj, dict):
         return dict_obj
     inst = Dict()
     for k, v in dict_obj.items():
         inst[k] = CommonUtil.dict_to_object(v)
     return inst
Example #5
0
def dict_to_object(dict_obj):
    if type(dict_obj) is list:
        for item in dict_obj:
            if not isinstance(item, dict):
                # return item
                dict_obj.append(item)
                continue
            inst = Dict()
            for k, v in item.items():
                inst[k] = dict_to_object(v)
                dict_obj.append(inst)
        return dict_obj
Example #6
0
def build_dict(data):

    tree_json = parse_ast(data)
    dict_tree = Dict(data=tree_json, istree=True)
    dict_tree.writeFile('./data/java/deep_with_ast/ast_dict.pkl')
    print('ast_dict was builded!!!')
    meth_name = load_dataframe(data, 'method_name')
    dict_meth = Dict(data=meth_name)
    dict_meth.writeFile('./data/java/deep_with_ast/meth_dict.pkl')
    print('meth_dict was builded!!!')
    tokens = load_dataframe(data, 'tokens')
    dict_token = Dict(data=tokens)
    dict_token.writeFile('./data/java/deep_with_ast/token_dict.pkl')
    print('token_dict was builded!!!')
    apis = load_dataframe(data, 'api_seq')
    dict_api = Dict(data=apis)
    dict_api.writeFile('./data/java/deep_with_ast/api_dict.pkl')
    print('api_dict was builded!!!')
    comments = load_dataframe(data, 'desc')
    dict_comment = Dict(data=comments)
    dict_comment.writeFile('./data/java/deep_with_ast/comment_dict.pkl')
    print('comment_dict was builded!!!')
Example #7
0
    ret.from_networkx(g, node_attrs=['x', 'y', 'mask'])
    return ret


def batcher(dev):
    def batcher_dev(batch):
        batch_trees = dgl.batch(batch)
        return SSTBatch(graph=batch_trees,
                        mask=batch_trees.ndata['mask'].to(dev),
                        wordid=batch_trees.ndata['x'].to(dev),
                        label=batch_trees.ndata['y'].to(dev))

    return batcher_dev


if __name__ == '__main__':
    data = pd.read_csv('../data/java/temp/mini_train.csv')
    tree = data['ast'].tolist()
    tree_json = [json.loads(item) for item in tree]
    dict_tree = Dict(data=tree_json, istree=True)
    ret = get_tree_dataset(tree_json, dict_tree)
    device = torch.device('cpu')
    train_loader = DataLoader(dataset=ret,
                              batch_size=5,
                              collate_fn=batcher(device),
                              shuffle=False,
                              num_workers=0)
    for i in train_loader:
        print()