### read values from text files ###### all_features=rw.read_features_frm_file(output_folder,"all_features_pickle.txt") rw.write_value(all_features,output_folder,"list_of_list_before_cleaning","w") global_vocab=rw.read_features_frm_file(output_folder,"global_vocab_pickle.txt") rec_name_array=rw.read_features_frm_file(output_folder,"rec_name_array_pickle.txt") ##################### change key value pairs of global vocab #################### inv_global_vocab = dict(zip(global_vocab.values(), global_vocab.keys())) #print type(inv_global_vocab.values()) all_features_list=inv_global_vocab.values() np.savetxt(output_folder+"all_features_list.txt",all_features_list,fmt="%s",delimiter=',',newline='\n') #generate class labels y=np.array(cl.generate_labels(rec_name_array)) print ("label array is: " + str(y)) #convert list of lists to matrix all_feature_matrix=cl.covert_array_to_matrix(all_features,len(all_features),max(global_vocab.values())+1); #print all_feature_matrix #print ("type of all feature matrix is: " + str(type(all_feature_matrix))) #################### SEPARATING EVALUATION DATA ######################### X_cv, X_eval, y_cv, y_eval = cross_validation.train_test_split(all_feature_matrix, y, test_size=0.2, random_state=0) X_cv_normalized_matrix=cl.normalise_mean_var(X_cv)
"global_vocab_pickle.txt") rec_name_array = rw.read_features_frm_file(output_folder, "rec_name_array_pickle.txt") ##################### change key value pairs of global vocab #################### inv_global_vocab = dict(zip(global_vocab.values(), global_vocab.keys())) #print type(inv_global_vocab.values()) all_features_list = inv_global_vocab.values() np.savetxt(output_folder + "all_features_list.txt", all_features_list, fmt="%s", delimiter=',', newline='\n') #generate class labels y = np.array(cl.generate_labels(rec_name_array)) print("label array is: " + str(y)) #convert list of lists to matrix all_feature_matrix = cl.covert_array_to_matrix(all_features, len(all_features), max(global_vocab.values()) + 1) #print all_feature_matrix #print ("type of all feature matrix is: " + str(type(all_feature_matrix))) #################### SEPARATING EVALUATION DATA ######################### X_cv, X_eval, y_cv, y_eval = cross_validation.train_test_split( all_feature_matrix, y, test_size=0.2, random_state=0) X_cv_normalized_matrix = cl.normalise_mean_var(X_cv)
#rec_name_array_afpdb_normal=rw.read_features_frm_file(output_folder_afpdb_normal,"rec_name_array_pickle.txt") # rec_name_array_nsrdb=rw.read_features_frm_file(output_folder_nsrdb,"rec_name_array_pickle.txt") # rec_name_array_afdb=rw.read_features_frm_file(output_folder_afdb,"rec_name_array_pickle.txt") rec_name_array=rw.read_features_frm_file(output_folder,"rec_name_array_pickle.txt") #all_rec_name_array=rec_name_array_afpdb_patient+ rec_name_array_afpdb_normal all_rec_name_array=rec_name_array print ("all rec_name array is: " + str(all_rec_name_array)) #generate class labels # y_afpdb=cl.generate_labels("afpdb",rec_name_array_afpdb) # y_nsrdb=cl.generate_labels("nsrdb",rec_name_array_nsrdb) # y_afdb=cl.generate_labels("afdb",rec_name_array_afdb) #y_aftdb=cl.generate_labels("aftdb",rec_name_array_aftdb) #y_afpdb_patient=cl.generate_labels("afdb",rec_name_array_afpdb_patient) #y_afpdb_normal=cl.generate_labels("nsrdb",rec_name_array_afpdb_normal) #y_all=np.array(y_afpdb_patient+y_afpdb_normal) y_all=cl.generate_labels("afpdb", rec_name_array) print ("all label array is: " + str(y_all)) ##################### change key value pairs of global vocab #################### inv_global_vocab = dict(zip(global_vocab.values(), global_vocab.keys())) #print type(inv_global_vocab.values()) all_features_list=inv_global_vocab.values() np.savetxt(output_folder+"all_features_list.txt",all_features_list,fmt="%s",delimiter=',',newline='\n') #combine lists and convert list of lists to one big matrix all_feature_matrix=cl.covert_array_to_matrix(all_features); #all_feature_matrix=cl.covert_array_to_matrix(all_features_afpdb_patient+all_features_afpdb_normal); #normalized_matrix=cl.normalise_mean_var(all_feature_matrix) #print all_feature_matrix
# rec_name_array_nsrdb=rw.read_features_frm_file(output_folder_nsrdb,"rec_name_array_pickle.txt") # rec_name_array_afdb=rw.read_features_frm_file(output_folder_afdb,"rec_name_array_pickle.txt") rec_name_array = rw.read_features_frm_file(output_folder, "rec_name_array_pickle.txt") #all_rec_name_array=rec_name_array_afpdb_patient+ rec_name_array_afpdb_normal all_rec_name_array = rec_name_array print("all rec_name array is: " + str(all_rec_name_array)) #generate class labels # y_afpdb=cl.generate_labels("afpdb",rec_name_array_afpdb) # y_nsrdb=cl.generate_labels("nsrdb",rec_name_array_nsrdb) # y_afdb=cl.generate_labels("afdb",rec_name_array_afdb) #y_aftdb=cl.generate_labels("aftdb",rec_name_array_aftdb) #y_afpdb_patient=cl.generate_labels("afdb",rec_name_array_afpdb_patient) #y_afpdb_normal=cl.generate_labels("nsrdb",rec_name_array_afpdb_normal) #y_all=np.array(y_afpdb_patient+y_afpdb_normal) y_all = cl.generate_labels("afpdb", rec_name_array) print("all label array is: " + str(y_all)) ##################### change key value pairs of global vocab #################### inv_global_vocab = dict(zip(global_vocab.values(), global_vocab.keys())) #print type(inv_global_vocab.values()) all_features_list = inv_global_vocab.values() np.savetxt(output_folder + "all_features_list.txt", all_features_list, fmt="%s", delimiter=',', newline='\n') #combine lists and convert list of lists to one big matrix all_feature_matrix = cl.covert_array_to_matrix(all_features) #all_feature_matrix=cl.covert_array_to_matrix(all_features_afpdb_patient+all_features_afpdb_normal);