def Feature_generate(file_in,file2_in,file3_in,file4_in): ''' Generate features for every user ''' path_feature=USERNAME+'/feature' path_check(path_feature) file_out=USERNAME+'/feature/data_out.csv' log_dicts=log_feature(file_in) device_dicts=device_feature(file2_in,log_dicts) # -------- original new_device_dicts=dict_complemetion(device_dicts,11) # --------- change # new_device_dicts=dict_complemetion(device_dicts,20) # ----- end http_dicts=http_feature(file4_in,new_device_dicts) # -------- original new_http_dicts=dict_complemetion(http_dicts,12) # --------- change # new_http_dicts=dict_complemetion(http_dicts,22) # ----- end email_dicts=email_feature(file3_in,new_http_dicts) # new_email_dicts=dict_complemetion(email_dicts,41) new_email_dicts=dict_complemetion(email_dicts,37) # 数据清空 data_save=open(file_out,'wt') data_save.close() # 数据保存 data_save=open(file_out,'a+') for (key,value) in new_email_dicts.items(): # print (len(value)) values=np.reshape(value,(-1,37)) np.savetxt(data_save,values,fmt='%f',delimiter=',') data_save.close()
def pre_step(): path=USERNAME+'/new' path_check(path) filetype=['/http.csv','/device.csv','/email.csv','/file.csv','/http.csv','/logon.csv'] for type_num in range (len(filetype)): new_log(USERNAME+filetype[type_num],USERNAME+'/new'+filetype[type_num]) # step 2 ----------- combine data in the same day new_filename=['/http2.csv','/device2.csv','/email2.csv','/file2.csv','/http2.csv','/logon2.csv'] for type_num in range (len(filetype)): file_in=USERNAME+'/new'+filetype[type_num] file_out=USERNAME+'/new'+new_filename[type_num] combine_time_log(file_in,file_out)
def Sequence_generate(file_in,file2_in,file3_in,file4_in): path=USERNAME+'/sequence' path_check(path) ActionSeq_save_path=USERNAME+'/sequence/actions_sequence.csv' sequence_temp=USERNAME+'/sequence/sequence_temp.csv' sequence_code_save=USERNAME+'/sequence/sequence_code.csv' logon_time_sequence=file_sequence(file_in,0) device_time_sequence=file_sequence(file2_in,1) email_time_sequence=file_sequence(file3_in,2) http_time_sequence=file_sequence(file4_in,4) Final_Sequence=sequence_combine(logon_time_sequence,device_time_sequence) Final_Sequence=sequence_combine(Final_Sequence,email_time_sequence) Final_Sequence=sequence_combine(Final_Sequence,http_time_sequence) max_length=sort_actions_InSequence(Final_Sequence,ActionSeq_save_path) file_temp=open(sequence_temp,'wt') file_temp.writelines(str(Final_Sequence)) file_temp.close() # -------------- sequence code sequence_code(ActionSeq_save_path,sequence_code_save,max_length)