Esempio n. 1
0
def Feature_generate(file_in,file2_in,file3_in,file4_in):
    '''
    Generate features for every user
    '''
    path_feature=USERNAME+'/feature'
    path_check(path_feature)
    file_out=USERNAME+'/feature/data_out.csv'
    log_dicts=log_feature(file_in)
    device_dicts=device_feature(file2_in,log_dicts)
    # -------- original
    new_device_dicts=dict_complemetion(device_dicts,11)
    # --------- change 
    # new_device_dicts=dict_complemetion(device_dicts,20)
    # ----- end
    http_dicts=http_feature(file4_in,new_device_dicts)
    # -------- original
    new_http_dicts=dict_complemetion(http_dicts,12)
    # --------- change 
    # new_http_dicts=dict_complemetion(http_dicts,22)
    # ----- end
    email_dicts=email_feature(file3_in,new_http_dicts)
    # new_email_dicts=dict_complemetion(email_dicts,41)
    new_email_dicts=dict_complemetion(email_dicts,37)
    # 数据清空
    data_save=open(file_out,'wt')
    data_save.close()
    # 数据保存
    data_save=open(file_out,'a+')
    for (key,value) in new_email_dicts.items():
        # print (len(value))
        values=np.reshape(value,(-1,37))
        np.savetxt(data_save,values,fmt='%f',delimiter=',')
    data_save.close()
Esempio n. 2
0
def pre_step():
    path=USERNAME+'/new'
    path_check(path)
    filetype=['/http.csv','/device.csv','/email.csv','/file.csv','/http.csv','/logon.csv']
    for type_num in range (len(filetype)):
        new_log(USERNAME+filetype[type_num],USERNAME+'/new'+filetype[type_num])

    # step 2  ----------- combine data in the same day 
    new_filename=['/http2.csv','/device2.csv','/email2.csv','/file2.csv','/http2.csv','/logon2.csv']
    for type_num in range (len(filetype)):
        file_in=USERNAME+'/new'+filetype[type_num]
        file_out=USERNAME+'/new'+new_filename[type_num]
        combine_time_log(file_in,file_out)
Esempio n. 3
0
def Sequence_generate(file_in,file2_in,file3_in,file4_in):
    path=USERNAME+'/sequence'
    path_check(path)
    ActionSeq_save_path=USERNAME+'/sequence/actions_sequence.csv'
    sequence_temp=USERNAME+'/sequence/sequence_temp.csv'
    sequence_code_save=USERNAME+'/sequence/sequence_code.csv'

    logon_time_sequence=file_sequence(file_in,0)
    device_time_sequence=file_sequence(file2_in,1)
    email_time_sequence=file_sequence(file3_in,2)
    http_time_sequence=file_sequence(file4_in,4)

    Final_Sequence=sequence_combine(logon_time_sequence,device_time_sequence)
    Final_Sequence=sequence_combine(Final_Sequence,email_time_sequence)
    Final_Sequence=sequence_combine(Final_Sequence,http_time_sequence)
    max_length=sort_actions_InSequence(Final_Sequence,ActionSeq_save_path)

    file_temp=open(sequence_temp,'wt')
    file_temp.writelines(str(Final_Sequence))
    file_temp.close()
    # -------------- sequence code
    sequence_code(ActionSeq_save_path,sequence_code_save,max_length)