def test_aggregate(mock_save_results, mock_get_results, mock_fetch_data): # run partial jobs inputs_1 = t.inputs_regression(limit_from=0, limit_to=8, include_nominal=True) mock_fetch_data.return_value = inputs_1 intermediate() output_1 = mock_save_results.call_args[0][0] inputs_2 = t.inputs_regression(limit_from=8, limit_to=20, include_nominal=True) mock_fetch_data.return_value = inputs_2 intermediate() output_2 = mock_save_results.call_args[0][0] mock_get_results.side_effect = [ mock.MagicMock(data=output_1, error=''), mock.MagicMock(data=output_2, error=''), ] # run computations aggregate(['1', '2']) output_agg = json.loads(mock_save_results.call_args[0][0]) beta_agg = {k: v['coef'] for k, v in output_agg.items()} # calculate coefficients from single-node regression mock_fetch_data.return_value = t.inputs_regression(limit_to=20, include_nominal=True) main() output_single = json.loads(mock_save_results.call_args[0][0]) beta_single = {k: v['coef'] for k, v in output_single.items()} assert t.round_dict(beta_agg) == t.round_dict(beta_single)
def test_main_logistic(mock_save_results, mock_fetch_data): mock_fetch_data.return_value = t.inputs_classification( limit_to=50, include_nominal=True) main() output = json.loads(mock_save_results.call_args[0][0]) assert t.round_dict(fx.output_classification()) == t.round_dict(output)
def test_main(mock_save_results, mock_fetch_data): mock_fetch_data.return_value = t.inputs_regression(include_nominal=False) main() result = json.loads(mock_save_results.call_args[0][0]) assert t.round_dict(result) == { 'Residual': { 'F': 'NaN', 'PR(>F)': 'NaN', 'df': 1.0, 'mean_sq': 0.019, 'sum_sq': 0.019 }, 'minimentalstate': { 'F': 12.831, 'PR(>F)': 0.173, 'df': 1.0, 'mean_sq': 0.248, 'sum_sq': 0.248 }, 'subjectage': { 'F': 0.958, 'PR(>F)': 0.507, 'df': 1.0, 'mean_sq': 0.019, 'sum_sq': 0.019 }, 'subjectage:minimentalstate': { 'F': 0.096, 'PR(>F)': 0.808, 'df': 1.0, 'mean_sq': 0.002, 'sum_sq': 0.002 } }
def test_compute_heatmap_continuous(mock_save_results, mock_fetch_data): mock_fetch_data.return_value = t.inputs_regression(include_nominal=True) compute('correlation_heatmap') results = json.loads(mock_save_results.call_args[0][0]) assert t.round_dict(results) == { 'data': [{ 'type': 'heatmap', 'x': ['lefthippocampus', 'subjectage', 'minimentalstate'], 'xaxis': 'x1', 'y': ['minimentalstate', 'subjectage', 'lefthippocampus'], 'yaxis': 'y1', 'z': [[0.959, -0.343, 1.0], [-0.254, 1.0, -0.343], [1.0, -0.254, 0.959]], 'zmax': 1, 'zmin': -1 }], 'layout': { 'title': 'Correlation heatmap' } }
def test_compute_heatmap_mix(mock_save_results, mock_fetch_data): # two categorical variables data = t.inputs_classification(include_nominal=True) mock_fetch_data.return_value = data compute('correlation_heatmap') results = json.loads(mock_save_results.call_args[0][0]) assert t.round_dict(results['data'][0]) == { 'type': 'heatmap', 'z': [[-0.343, 1.0], [1.0, -0.343]], 'x': ['subjectage', 'minimentalstate'], 'y': ['minimentalstate', 'subjectage'], 'zmin': -1, 'zmax': 1, 'xaxis': 'x1', 'yaxis': 'y1', 'colorbar': { 'len': 0.5, 'y': 0.75 } } assert t.round_dict(results['data'][1]) == { 'type': 'heatmap', 'z': [[0, 0, 0, 0], [0, 0.5, 0.5, 0.5], [0, 1, 1, 1], [0, 0.5, 0.5, 0.5], [0, 1, 1, 1]], 'opacity': 0.75, 'colorscale': [[0, '#00083e'], [0.5, '#ededee'], [1, '#ffffff']], 'showscale': 0, 'hoverinfo': 'none', 'xaxis': 'x2', 'yaxis': 'y2' }
def test_aggregate_single(mock_save_results, mock_get_results, mock_fetch_data): """Aggregation on single node should give same results as ordinary linear regression.""" # run partial jobs inputs = t.inputs_regression(limit_from=0, limit_to=20) mock_fetch_data.return_value = inputs intermediate() output = mock_save_results.call_args[0][0] mock_get_results.side_effect = [ mock.MagicMock(data=output, error=''), ] # run computations aggregate(['1']) output_agg = json.loads(mock_save_results.call_args[0][0]) # calculate coefficients from single-node regression mock_fetch_data.return_value = t.inputs_regression(limit_to=20) main() output_single = json.loads(mock_save_results.call_args[0][0]) assert t.round_dict(output_agg) == t.round_dict(output_single)
def test_intermediate_stats_nominal(mock_save_results, mock_fetch_data): data = t.inputs_classification(include_nominal=True) mock_fetch_data.return_value = data intermediate_stats() results = json.loads(mock_save_results.call_args[0][0]) assert len(results['data']) == 12 res = sorted(results['data'], key=lambda d: (d['index'], tuple(d['group']))) assert t.round_dict(res[:2]) == [{ 'index': 'adnicategory', 'label': 'ADNI category', 'type': 'polynominal', 'group': ['-50y'], 'group_variables': ['Age Group'], 'count': 3, 'unique': 3, 'top': 'Other', 'frequency': { 'Other': 1, 'CN': 1, 'AD': 1 }, 'null_count': 0 }, { 'index': 'adnicategory', 'label': 'ADNI category', 'type': 'polynominal', 'group': ['50-59y'], 'group_variables': ['Age Group'], 'count': 2, 'unique': 1, 'top': 'Other', 'frequency': { 'Other': 2, 'CN': 0, 'AD': 0 }, 'null_count': 0 }]
def test_intermediate_stats_real(mock_save_results, mock_fetch_data): # input data with some null values data = t.inputs_regression(include_nominal=True, add_null=True) mock_fetch_data.return_value = data intermediate_stats() results = json.loads(mock_save_results.call_args[0][0]) assert len(results['data']) == 12 res = sorted(results['data'], key=lambda d: (d['index'], tuple(d['group']))) assert t.round_dict(res[:2]) == [{ 'index': 'agegroup', 'label': 'Age Group', 'type': 'polynominal', 'group': ['-50y'], 'group_variables': ['Age Group'], 'count': 3, 'unique': 1, 'top': '-50y', 'frequency': { '-50y': 3, '50-59y': 0 }, 'null_count': 0 }, { 'index': 'agegroup', 'label': 'Age Group', 'type': 'polynominal', 'group': ['50-59y'], 'group_variables': ['Age Group'], 'count': 2, 'unique': 1, 'top': '50-59y', 'frequency': { '50-59y': 2, '-50y': 0 }, 'null_count': 0 }]
def test_intermediate_stats(mock_save_results, mock_fetch_data): mock_fetch_data.return_value = t.inputs_regression(include_nominal=True) intermediate_stats() results = json.loads(mock_save_results.call_args[0][0]) assert t.round_dict(results) == { 'X^T * X': [[44.891, 1055.621, 317.732], [1055.621, 25462.015, 7279.928], [317.732, 7279.928, 2354.0]], 'columns': ['lefthippocampus', 'subjectage', 'minimentalstate'], 'count': 5, 'crosstab': [{ 'agegroup': '-50y', 'count': 3 }, { 'agegroup': '50-59y', 'count': 2 }], 'means': [2.987, 70.86, 20.8], 'nominal_columns': ['agegroup'] }
def test_aggregate_stats_correlation_heatmap( mock_save_results, mock_load_intermediate_json_results, mock_fetch): mock_load_intermediate_json_results.return_value = [ intermediate_data_1(), intermediate_data_2() ] aggregate_stats([1, 2], graph_type='correlation_heatmap') results = json.loads(mock_save_results.call_args[0][0]) assert t.round_dict(results) == { 'data': [{ 'type': 'heatmap', 'x': ['iq', 'stress_before_test1'], 'xaxis': 'x1', 'y': ['stress_before_test1', 'iq'], 'yaxis': 'y1', 'z': [[-0.429, 1.0], [1.0, -0.429]], 'zmax': 1, 'zmin': -1 }], 'layout': { 'title': 'Correlation heatmap' } }
def test_main(mock_parameters, mock_save_results, mock_get_results, mock_fetch_data): # create mock objects from database mock_fetch_data.return_value = t.inputs_regression(include_nominal=False, limit_to=5) mock_get_results.return_value = None main() results = json.loads(mock_save_results.call_args[0][0]) assert t.round_dict(results) == { 'profile': 'tabular-data-resource', 'name': 'hinmine-features', 'data': [{ 'f_1': 0.0, 'f_2': 0.51, 'f_3': 0.496, 'f_4': 0.49, 'f_5': 0.504, 'id': 0.0 }, { 'f_1': 0.509, 'f_2': 0.0, 'f_3': 0.496, 'f_4': 0.492, 'f_5': 0.503, 'id': 1.0 }, { 'f_1': 0.506, 'f_2': 0.506, 'f_3': 0.0, 'f_4': 0.484, 'f_5': 0.504, 'id': 2.0 }, { 'f_1': 0.505, 'f_2': 0.508, 'f_3': 0.49, 'f_4': 0.0, 'f_5': 0.497, 'id': 3.0 }, { 'f_1': 0.508, 'f_2': 0.508, 'f_3': 0.498, 'f_4': 0.486, 'f_5': 0.0, 'id': 4.0 }], 'schema': { 'fields': [{ 'name': 'f_1', 'type': 'float' }, { 'name': 'f_2', 'type': 'float' }, { 'name': 'f_3', 'type': 'float' }, { 'name': 'f_4', 'type': 'float' }, { 'name': 'f_5', 'type': 'float' }], 'primaryKey': 'id' } }
def test_main(mock_save_results, mock_fetch_data): mock_fetch_data.return_value = t.inputs_regression(include_nominal=True) main() output = json.loads(mock_save_results.call_args[0][0]) assert t.round_dict(fx.output_regression()) == t.round_dict(output)