예제 #1
0
def getTestApiSequence(file="Data/SourceData/test.csv"):
    print(file)
    data_iter = loaddata(file)
    lines = next(data_iter)
    data = {}
    for lines in data_iter:
        # print(lines[0])
        for line in lines:
            user_id = line[0]
            api_name = line[1]
            # return_value = line[-2]
            order_index = int(line[-1])
            tid = line[3]
            d = list((tid, order_index, api_name))
            if user_id in data:
                data[user_id].append(d)
            else:
                data[user_id] = [d]
        # break
    new_data = {}
    for user_id in data:
        user_data = data[user_id]
        if not user_data:
            continue
        user_data = sorted(user_data, key=lambda s: s[1])
        user_data = sorted(user_data, key=lambda s: s[0])
        api_features = [apiMap[line[2]]5 for line in user_data]
예제 #2
0
def devidefile(input=input_file, decide_size=60, output_dir="Data/SourceData/"+file+"/"):
    data_iter = loaddata(input)
    files_fps = [open(output_dir+("%05d"%i)+".txt", "w") for i in range(decide_size)]
    for lines in data_iter:
        for line in lines:
            value = int(int(line[0])%decide_size)
            write_line = ",".join(line)+"\n"
            files_fps[value].write(write_line)
예제 #3
0
파일: static.py 프로젝트: FlashXT/python-1
def getLabel(input_dir="Data/SourceData", output_dir="Data/static", train=True):
    input_file = input_dir + "/train.csv"
    output_file = output_dir + "/label.txt"
    if not train:
        return None
    data_iter = loaddata(input_file)
    data_lab = {}
    with open(output_file, "w") as fp:
        for lines in data_iter:
            for line in lines:
                if line[0] not in data_lab:
                    data_lab[line[0]] = line[1]
                    fp.write(line[0] + "\t" + line[1] + "\n")
    return data_lab
예제 #4
0
파일: static.py 프로젝트: FlashXT/python-1
def getStaticfeature(input_dir="Data/SourceData", output_dir=output_path_dir, train=True):
    file = input_dir + "/train.csv"
    if not train:
        file = input_dir + "/test.csv"
    data_iter = loaddata(file)
    api_static = None
    return_static = {}
    api_index = 0
    return_index = 0
    api_map = {}
    return_map = {}
    filename = output_dir + "/static_feature.txt"
    if not train:
        filename = output_dir + "/test_static_feature.txt"
    fp = open(filename, "w")
    for lines in data_iter:

        for line in lines:
            api_feature = str(line[0] + "_api_" + line[2])
            if api_static and api_feature in api_static:
                api_static[api_feature] += 1
            elif api_static and api_feature not in api_static:
                for key in api_static:
                    fp.write(key + "\t" + str(api_static[key]) + "\n")
                api_static = {api_feature: 1}
            else:
                api_static = {api_feature: 1}
            apiName = str(line[2])
            if apiName not in api_map:
                api_index += 1
                api_map[apiName] = api_index

            return_feature = str(line[0] + "_return_" + line[-2])
            if return_static and return_feature in return_static:
                return_static[return_feature] += 1
            elif return_static and return_feature not in return_static:
                for key in return_static:
                    fp.write(key + "\t" + str(return_static[key]) + "\n")
                return_static = {return_feature: 1}
            else:
                return_static = {return_feature: 1}
            returnValue = str(line[-1])
            if returnValue not in return_map:
                return_index += 1
                return_map[returnValue] = return_index
    fp.close()
    return api_map, return_map
예제 #5
0
파일: static.py 프로젝트: FlashXT/python-1
def getApipv(file="Data/SourceData/train.csv"):
    data_iter = loaddata(file)
    # lines = next(data_iter)
    api_static_result = {}
    return_result_static = {}
    api_id = {}
    return_id = {}
    api_index = 1
    return_index = 1
    for lines in data_iter:
        print(lines[0])
        for line in lines:
            feature_name = str(line[0] + "_" + line[2])
            if feature_name in api_static_result:
                api_static_result[feature_name] += 1
            else:
                api_static_result[feature_name] = 1

            api_name = str(line[2])
            if api_name not in api_id:
                api_index = api_index + 1
                api_id[api_name] = api_index

            return_result = str(line[-2])
            if return_result not in return_id:
                return_id[return_result] = return_index
                return_index += 1

            feature_name_2 = str(line[0] + "_" + line[-2])
            if feature_name_2 in return_result_static:
                return_result_static[feature_name_2] += 1
            else:
                return_result_static[feature_name_2] = 1
        # lines = next(data_iter)
        # break
    return api_static_result, return_result_static, api_id, return_id