def test_range_includes_zero(self): output_dict = resolve_data_float(u'0-4') test_keys = ['min_range', 'max_range'] final_dict = DataStringToDictTest.compare_dict2.copy() for k in test_keys: self.assertEqual(output_dict[k], final_dict[k])
def test_range_swap(self): output_dict = resolve_data_float(u'\n (35-21) ± 12.45(N =2)') test_keys = ['value', 'min_range', 'max_range', "num_obs", 'error'] final_dict = DataStringToDictTest.compare_dict.copy() final_dict['value'] = (final_dict['min_range'] + final_dict['max_range']) / 2 for k in test_keys: self.assertEqual(output_dict[k], final_dict[k])
def test_data_range_plus_error_plus_n(self): output_dict = resolve_data_float(u'21-35 +/- 12.45 (2)') test_keys = ['value', 'min_range', 'max_range', "num_obs", 'error'] final_dict = DataStringToDictTest.compare_dict.copy() final_dict['value'] = (final_dict['min_range'] + final_dict['max_range']) / 2 for k in test_keys: self.assertEqual(output_dict[k], final_dict[k])
def test_no_digit_before_decimal(self): output_dict = resolve_data_float(u'.12 ± .0045 (n2)') test_keys = ['value', "num_obs", 'error'] final_dict = DataStringToDictTest.compare_dict.copy() final_dict['value'] = 0.12 final_dict['error'] = 0.0045 for k in test_keys: self.assertEqual(output_dict[k], final_dict[k])
def normalize_nedm_val(nedm, range_check = True): """Normalize the data within neuroelectro.models NeuronEphysDataMap to standard units and range """ data_mean_value = nedm.val data_err_value = nedm.err # initialize output dictionary key_list = ['value', 'error'] output_dict = dict.fromkeys(key_list) unit_reg = UnitRegistry() ecm = nedm.ephys_concept_map ephys_prop = nedm.ephys_concept_map.ephys_prop natural_unit = unicode(ephys_prop.units) # try to get unit from table header, if can't, assume unit is natural unit found_unit = ecm.identified_unit if found_unit is None: found_unit = get_units_from_table_header(ecm.ref_text) if found_unit is None: parsed_nedm = resolve_data_float(nedm.ref_text, initialize_dict = True) found_unit = parsed_nedm['units'] # need to save new unit to ecm now if found_unit is None: found_unit = natural_unit # normalize mean value conv_mean_value = convert_units(found_unit, natural_unit, data_mean_value) if conv_mean_value: # custom normalization for negative and ratio values conv_mean_value = convert_negative_value(conv_mean_value, ephys_prop) conv_mean_value = convert_percent_to_ratio(conv_mean_value, ephys_prop, ecm.ref_text) # check whether mean value in appropriate range if range_check: if check_data_val_range(conv_mean_value, ephys_prop) is False: print 'neuron ephys data map %s, with pk %s out of appropriate range' % (data_mean_value, nedm.pk) print conv_mean_value, ephys_prop conv_mean_value = None output_dict['value'] = conv_mean_value # normalize error term # TODO: address if errors represented as standard deviations if data_err_value: conv_err_value = convert_units(found_unit, natural_unit, data_err_value) if conv_err_value: conv_err_value = convert_percent_to_ratio(conv_err_value, ephys_prop, ecm.ref_text) #print 'reported err val: %s, norm err val: %s' % (nedm.err, conv_err_value) # really basic check for error term validity if conv_err_value < 0: conv_err_value = None output_dict['error'] = conv_err_value return output_dict
def add_ephys_nedm(ephys_name, ephys_value, pmid, neuron_type, user, overwrite=True): if ephys_value is '': return ephys_value_list = resolve.resolve_data_float(ephys_value) if not 'error' in ephys_value_list: ephys_value_list['error'] = None if not 'numCells' in ephys_value_list: ephys_value_list['numCells'] = None a = pubmed_functions.add_single_article_full(pmid) n = m.Neuron.objects.filter(name = neuron_type)[0] us_ob = m.UserSubmission.objects.get_or_create(user = user, article = a)[0] ds_ob = m.DataSource.objects.get_or_create(user_submission = us_ob)[0] ncm_ob = m.NeuronConceptMap.objects.get_or_create(source = ds_ob, added_by = user, neuron = n, times_validated = 1)[0] ephys_prop_ob = m.EphysProp.objects.get(name = ephys_name) ecm_ob = m.EphysConceptMap.objects.get_or_create(ephys_prop = ephys_prop_ob, source = ds_ob, added_by = user, times_validated = 1)[0] ds_ob.save() try: nedm = m.NeuronEphysDataMap.objects.get(source = ds_ob, added_by = user, neuron_concept_map = ncm_ob, ephys_concept_map = ecm_ob) if overwrite is True: nedm.delete() except ObjectDoesNotExist: pass # if overwrite is false, just make a new nedm, otherwise find the old nedm (if it exists) and then overwrite it m.NeuronEphysDataMap.objects.get_or_create(source = ds_ob, added_by = user, neuron_concept_map = ncm_ob, ephys_concept_map = ecm_ob, val = ephys_value_list['value'], val_norm = ephys_value_list['value'], err = ephys_value_list['error'], times_validated = 1, n = ephys_value_list['numCells'], )[0]
def find_data_vals_in_table(data_table_object): """Parses neuroelectro.models DataTable object for assigned NeuronConceptMaps and EphysConceptMaps and returns a dictionary of data values at the row and column intersection of these Args: data_table_object: neuroelectro.models DataTable object with associated NeuronConceptMap and EphysConceptMap objects. Returns: (dict) return_dict: a dictionary with keys corresponding to html tag elements from the entered data table and fields: 'ncm_pk', 'ecm_pk', 'efcm_pk_list', and 'data_dict' Example: >>> find_data_vals_in_table(data_table_ob) {'td-69': {'ncm_pk': 1L, 'ecm_pk': 1L, 'ref_text': 'ref_text': u'463\xa0\xb1\xa089\xa0 (15)', 'data_value_dict': {'num_obs': 15, 'max_range': None, 'min_range': None, 'value': 463.0, 'error': 89.0}, 'efcm_pk_list': [] } } """ # check that data_table_object has both ephys obs and neuron concept obs return_dict = dict() try: table_soup = BeautifulSoup(data_table_object.table_html, 'lxml') ds = m.DataSource.objects.get(data_table=data_table_object) ecm_obs = ds.ephysconceptmap_set.all() ncm_obs = ds.neuronconceptmap_set.all() efcm_obs = ds.expfactconceptmap_set.all() # first check if there are ephys and neuron concept maps assigned to table if ecm_obs.count() > 0 and ncm_obs.count() > 0: # returns a flattened, parsed form of table where data table cells can be easily checked for associated concept maps dataTable, numHeaderRows, html_tag_id_table = rep_html_table_struct(data_table_object.table_html) # if dataTable or idTable is none, parsing table failed, so return if dataTable is None or html_tag_id_table is None: return dict() # for each neuron concept map for ncm in ncm_obs: ncm_html_tag_id = ncm.dt_id # the same ncm may be linked to multiple data table value cells due to rowspan/colspan issues matching_neuron_cells = get_matching_inds(ncm_html_tag_id, html_tag_id_table) for ecm in ecm_obs: ecm_html_tag_id = ecm.dt_id if ecm_html_tag_id == '-1' or len(html_tag_id_table) == 0: continue # the same ecm may be linked to multiple data table cells matching_ephys_cells = get_matching_inds(ecm_html_tag_id, html_tag_id_table) # iterate through all matched cells, finding corresponding data value cells at their intersection for c1 in matching_neuron_cells: ncm_row_ind = c1[0] ncm_col_ind = c1[1] for c2 in matching_ephys_cells: ecm_row_ind = c2[0] ecm_col_ind = c2[1] # the max below is saying data cells are usually to the right and bottom of header cells table_cell_row_ind = max(ncm_row_ind, ecm_row_ind) table_cell_col_ind = max(ncm_col_ind, ecm_col_ind) table_cell_html_tag_id = html_tag_id_table[table_cell_row_ind][table_cell_col_ind] data_tag = table_soup.find(id=table_cell_html_tag_id) if data_tag is None: continue ref_text = data_tag.get_text() # regex out the floating point values of data value string data_dict = resolve_data_float(ref_text, True) if data_dict['value']: # check for experimental factor concept maps if efcm_obs is not None: # get efcm and add it to nedm efcm_pk_list = [] for efcm in efcm_obs: efcm_html_tag_id = efcm.dt_id # get table cells for this efcm matching_efcm_cells = get_matching_inds(efcm_html_tag_id, html_tag_id_table) # if any of the efcm cells match up with the current cell, add it to the list matching_rows = [e[0] for e in matching_efcm_cells] matching_cols = [e[1] for e in matching_efcm_cells] if table_cell_row_ind in matching_rows or table_cell_col_ind in matching_cols: # if efcm is num obs, store value in appropriate place and don't add to metadata list if efcm.metadata.name == 'NumObs' and data_dict['num_obs'] is None: data_dict['num_obs'] = efcm.metadata.cont_value.mean else: efcm_pk_list.append(efcm.pk) temp_return_dict = dict() temp_return_dict['ncm_pk'] = ncm.pk temp_return_dict['ecm_pk'] = ecm.pk temp_return_dict['ref_text'] = ref_text temp_return_dict['data_value_dict'] = data_dict temp_return_dict['efcm_pk_list'] = efcm_pk_list return_dict[table_cell_html_tag_id] = temp_return_dict return return_dict except TypeError: return dict()
def test_data_mean_plus_range(self): output_dict = resolve_data_float(u'-23 (21-35)') test_keys = ['value', 'min_range', 'max_range'] for k in test_keys: self.assertEqual(output_dict[k], DataStringToDictTest.compare_dict[k])
def test_mean_plus_error_plus_n_bad(self): output_dict = resolve_data_float(u'-23 ± 12.45 ()') test_keys = ['value', 'error'] for k in test_keys: self.assertEqual(output_dict[k], DataStringToDictTest.compare_dict[k])
def test_mean_only(self): output_dict = resolve_data_float(u'-23') self.assertEqual(output_dict['value'], DataStringToDictTest.compare_dict['value'])