forked from jlbouchot/MiDaPP
/
test_data_predictor.py
30 lines (20 loc) · 1.29 KB
/
test_data_predictor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# Script to test the data_predictor module
import data_predictor
biom_fname = "../../study_550_closed_reference_otu_table.biom"
map_fname = "study_550_mapping_file.txt"
foi_fname = "feature_of_interest_study_550.txt"
data_matrix, site_names, variable_names, environmental_param, hashtable_env = data_predictor.create_fused_data_matrix(biom_fname, map_fname, foi_fname)
print data_matrix.shape
print environmental_param
row_none, col_none, is_none = data_predictor.find_missing_data_locations(data_matrix)
print "There are ", len(list(row_none)), " none's in this study"
# Test the artificial unknown mask generation on a random full matrix
missing_ratio = 0.2
from_rows = xrange(len(variable_names),len(variable_names)+len(environmental_param))
rdm_matrix = numpy.random.uniform(0,1,data_matrix.size).reshape(data_matrix.shape)
row_none, col_none, bin_mask, new_matrix = data_predictor.create_random_unknown_mask(data_matrix, from_rows, missing_ratio)
print "Number of missing data from the row list: ", len(row_none)
print "Number of missing data from the col list: ", len(col_none)
new_row_none, new_col_none, new_is_none = data_predictor.find_missing_data_locations(new_matrix)
if len(new_row_none) == len(row_none):
print "Both have the same size" # We should actually make sure that have the same components!