def getTestApiSequence(file="Data/SourceData/test.csv"): print(file) data_iter = loaddata(file) lines = next(data_iter) data = {} for lines in data_iter: # print(lines[0]) for line in lines: user_id = line[0] api_name = line[1] # return_value = line[-2] order_index = int(line[-1]) tid = line[3] d = list((tid, order_index, api_name)) if user_id in data: data[user_id].append(d) else: data[user_id] = [d] # break new_data = {} for user_id in data: user_data = data[user_id] if not user_data: continue user_data = sorted(user_data, key=lambda s: s[1]) user_data = sorted(user_data, key=lambda s: s[0]) api_features = [apiMap[line[2]]5 for line in user_data]
def devidefile(input=input_file, decide_size=60, output_dir="Data/SourceData/"+file+"/"): data_iter = loaddata(input) files_fps = [open(output_dir+("%05d"%i)+".txt", "w") for i in range(decide_size)] for lines in data_iter: for line in lines: value = int(int(line[0])%decide_size) write_line = ",".join(line)+"\n" files_fps[value].write(write_line)
def getLabel(input_dir="Data/SourceData", output_dir="Data/static", train=True): input_file = input_dir + "/train.csv" output_file = output_dir + "/label.txt" if not train: return None data_iter = loaddata(input_file) data_lab = {} with open(output_file, "w") as fp: for lines in data_iter: for line in lines: if line[0] not in data_lab: data_lab[line[0]] = line[1] fp.write(line[0] + "\t" + line[1] + "\n") return data_lab
def getStaticfeature(input_dir="Data/SourceData", output_dir=output_path_dir, train=True): file = input_dir + "/train.csv" if not train: file = input_dir + "/test.csv" data_iter = loaddata(file) api_static = None return_static = {} api_index = 0 return_index = 0 api_map = {} return_map = {} filename = output_dir + "/static_feature.txt" if not train: filename = output_dir + "/test_static_feature.txt" fp = open(filename, "w") for lines in data_iter: for line in lines: api_feature = str(line[0] + "_api_" + line[2]) if api_static and api_feature in api_static: api_static[api_feature] += 1 elif api_static and api_feature not in api_static: for key in api_static: fp.write(key + "\t" + str(api_static[key]) + "\n") api_static = {api_feature: 1} else: api_static = {api_feature: 1} apiName = str(line[2]) if apiName not in api_map: api_index += 1 api_map[apiName] = api_index return_feature = str(line[0] + "_return_" + line[-2]) if return_static and return_feature in return_static: return_static[return_feature] += 1 elif return_static and return_feature not in return_static: for key in return_static: fp.write(key + "\t" + str(return_static[key]) + "\n") return_static = {return_feature: 1} else: return_static = {return_feature: 1} returnValue = str(line[-1]) if returnValue not in return_map: return_index += 1 return_map[returnValue] = return_index fp.close() return api_map, return_map
def getApipv(file="Data/SourceData/train.csv"): data_iter = loaddata(file) # lines = next(data_iter) api_static_result = {} return_result_static = {} api_id = {} return_id = {} api_index = 1 return_index = 1 for lines in data_iter: print(lines[0]) for line in lines: feature_name = str(line[0] + "_" + line[2]) if feature_name in api_static_result: api_static_result[feature_name] += 1 else: api_static_result[feature_name] = 1 api_name = str(line[2]) if api_name not in api_id: api_index = api_index + 1 api_id[api_name] = api_index return_result = str(line[-2]) if return_result not in return_id: return_id[return_result] = return_index return_index += 1 feature_name_2 = str(line[0] + "_" + line[-2]) if feature_name_2 in return_result_static: return_result_static[feature_name_2] += 1 else: return_result_static[feature_name_2] = 1 # lines = next(data_iter) # break return api_static_result, return_result_static, api_id, return_id