Python resolve_data_floatの例、article_text_mining.resolve_data_float.resolve_data_float Pythonの例

コード例 #1

0

ファイルを表示

    def test_range_includes_zero(self):
        output_dict = resolve_data_float(u'0-4')

        test_keys = ['min_range', 'max_range']
        final_dict = DataStringToDictTest.compare_dict2.copy()
        for k in test_keys:
            self.assertEqual(output_dict[k], final_dict[k])

コード例 #2

0

ファイルを表示

    def test_range_swap(self):
        output_dict = resolve_data_float(u'\n     (35-21) ± 12.45(N  =2)')

        test_keys = ['value', 'min_range', 'max_range', "num_obs", 'error']
        final_dict = DataStringToDictTest.compare_dict.copy()
        final_dict['value'] = (final_dict['min_range'] + final_dict['max_range']) / 2
        for k in test_keys:
            self.assertEqual(output_dict[k], final_dict[k])

コード例 #3

0

ファイルを表示

    def test_data_range_plus_error_plus_n(self):
        output_dict = resolve_data_float(u'21-35 +/- 12.45 (2)')

        test_keys = ['value', 'min_range', 'max_range', "num_obs", 'error']
        final_dict = DataStringToDictTest.compare_dict.copy()
        final_dict['value'] = (final_dict['min_range'] + final_dict['max_range']) / 2
        for k in test_keys:
            self.assertEqual(output_dict[k], final_dict[k])

コード例 #4

0

ファイルを表示

    def test_no_digit_before_decimal(self):
        output_dict = resolve_data_float(u'.12 ± .0045 (n2)')

        test_keys = ['value', "num_obs", 'error']
        final_dict = DataStringToDictTest.compare_dict.copy()
        final_dict['value'] = 0.12
        final_dict['error'] = 0.0045
        for k in test_keys:
            self.assertEqual(output_dict[k], final_dict[k])

コード例 #5

0

ファイルを表示

ファイル: normalize_ephys_data.py プロジェクト: neuroelectro/neuroelectro_org

def normalize_nedm_val(nedm, range_check = True):
    """Normalize the data within neuroelectro.models NeuronEphysDataMap to standard units and range
    """
    data_mean_value = nedm.val
    data_err_value = nedm.err

    # initialize output dictionary
    key_list = ['value', 'error']
    output_dict = dict.fromkeys(key_list)

    unit_reg = UnitRegistry()
    ecm = nedm.ephys_concept_map
    ephys_prop = nedm.ephys_concept_map.ephys_prop
    natural_unit = unicode(ephys_prop.units)

    # try to get unit from table header, if can't, assume unit is natural unit
    found_unit = ecm.identified_unit
    if found_unit is None:
        found_unit = get_units_from_table_header(ecm.ref_text)
    if found_unit is None:
        parsed_nedm = resolve_data_float(nedm.ref_text, initialize_dict = True)
        found_unit = parsed_nedm['units']
        # need to save new unit to ecm now
    if found_unit is None:
        found_unit = natural_unit

    # normalize mean value
    conv_mean_value = convert_units(found_unit, natural_unit, data_mean_value)
    if conv_mean_value:
        # custom normalization for negative and ratio values
        conv_mean_value = convert_negative_value(conv_mean_value, ephys_prop)
        conv_mean_value = convert_percent_to_ratio(conv_mean_value, ephys_prop, ecm.ref_text)

        # check whether mean value in appropriate range
        if range_check:
            if check_data_val_range(conv_mean_value, ephys_prop) is False:
                print 'neuron ephys data map %s, with pk %s out of appropriate range' % (data_mean_value, nedm.pk)
                print conv_mean_value, ephys_prop
                conv_mean_value = None
        output_dict['value'] = conv_mean_value


    # normalize error term
    # TODO: address if errors represented as standard deviations

    if data_err_value:
        conv_err_value = convert_units(found_unit, natural_unit, data_err_value)
        if conv_err_value:
            conv_err_value = convert_percent_to_ratio(conv_err_value, ephys_prop, ecm.ref_text)
            #print 'reported err val: %s, norm err val: %s' % (nedm.err, conv_err_value)

            # really basic check for error term validity
            if conv_err_value < 0:
                conv_err_value = None
            output_dict['error'] = conv_err_value

    return output_dict

コード例 #6

0

ファイルを表示

ファイル: add_ephys_nedm.py プロジェクト: gsarma/neuroelectro_org

def add_ephys_nedm(ephys_name, ephys_value, pmid, neuron_type, user, overwrite=True):
    if ephys_value is '':
        return
    
    ephys_value_list = resolve.resolve_data_float(ephys_value)
    
    if not 'error' in ephys_value_list:
        ephys_value_list['error'] = None
        
    if not 'numCells' in ephys_value_list:
        ephys_value_list['numCells'] = None
    
    a = pubmed_functions.add_single_article_full(pmid)

    n = m.Neuron.objects.filter(name = neuron_type)[0]
    us_ob = m.UserSubmission.objects.get_or_create(user = user, article = a)[0]
    ds_ob = m.DataSource.objects.get_or_create(user_submission = us_ob)[0]
    ncm_ob = m.NeuronConceptMap.objects.get_or_create(source = ds_ob, added_by = user, neuron = n, 
                                                        times_validated = 1)[0]
    
    ephys_prop_ob = m.EphysProp.objects.get(name = ephys_name)
    ecm_ob = m.EphysConceptMap.objects.get_or_create(ephys_prop = ephys_prop_ob, source = ds_ob, added_by = user,
                                                        times_validated = 1)[0]
    ds_ob.save()

    try:
        nedm = m.NeuronEphysDataMap.objects.get(source = ds_ob,
                                         added_by = user,
                                         neuron_concept_map = ncm_ob,
                                         ephys_concept_map = ecm_ob)
        if overwrite is True:
            nedm.delete()
    except ObjectDoesNotExist:
        pass
    # if overwrite is false, just make a new nedm, otherwise find the old nedm (if it exists) and then overwrite it

    m.NeuronEphysDataMap.objects.get_or_create(source = ds_ob,
                                               added_by = user,
                                               neuron_concept_map = ncm_ob,
                                               ephys_concept_map = ecm_ob,
                                               val = ephys_value_list['value'],
                                               val_norm = ephys_value_list['value'],
                                               err = ephys_value_list['error'],
                                               times_validated = 1,
                                               n = ephys_value_list['numCells'],
                                             )[0]

コード例 #7

0

ファイルを表示

ファイル: assign_table_ephys_data.py プロジェクト: neuroelectro/neuroelectro_org

def find_data_vals_in_table(data_table_object):
    """Parses neuroelectro.models DataTable object for assigned NeuronConceptMaps and EphysConceptMaps
        and returns a dictionary of data values at the row and column intersection of these

    Args:
        data_table_object: neuroelectro.models DataTable object with associated NeuronConceptMap and EphysConceptMap
                            objects.
    Returns:
        (dict) return_dict: a dictionary with keys corresponding to html tag elements from the entered data table
                            and fields: 'ncm_pk', 'ecm_pk', 'efcm_pk_list', and 'data_dict'

    Example:
        >>> find_data_vals_in_table(data_table_ob)
            {'td-69':
                {'ncm_pk': 1L, 'ecm_pk': 1L, 'ref_text': 'ref_text': u'463\xa0\xb1\xa089\xa0 (15)', 'data_value_dict':
                    {'num_obs': 15, 'max_range': None, 'min_range': None, 'value': 463.0, 'error': 89.0},
                 'efcm_pk_list': []
                 }
            }
    """

    # check that data_table_object has both ephys obs and neuron concept obs
    return_dict = dict()
    try:
        table_soup = BeautifulSoup(data_table_object.table_html, 'lxml')
        ds = m.DataSource.objects.get(data_table=data_table_object)
        ecm_obs = ds.ephysconceptmap_set.all()
        ncm_obs = ds.neuronconceptmap_set.all()
        efcm_obs = ds.expfactconceptmap_set.all()
        # first check if there are ephys and neuron concept maps assigned to table
        if ecm_obs.count() > 0 and ncm_obs.count() > 0:

            # returns a flattened, parsed form of table where data table cells can be easily checked for associated concept maps
            dataTable, numHeaderRows, html_tag_id_table = rep_html_table_struct(data_table_object.table_html)

            # if dataTable or idTable is none, parsing table failed, so return
            if dataTable is None or html_tag_id_table is None:
                return dict()

            # for each neuron concept map
            for ncm in ncm_obs:
                ncm_html_tag_id = ncm.dt_id

                # the same ncm may be linked to multiple data table value cells due to rowspan/colspan issues
                matching_neuron_cells = get_matching_inds(ncm_html_tag_id, html_tag_id_table)
                for ecm in ecm_obs:
                    ecm_html_tag_id = ecm.dt_id
                    if ecm_html_tag_id == '-1' or len(html_tag_id_table) == 0:
                        continue

                    # the same ecm may be linked to multiple data table cells
                    matching_ephys_cells = get_matching_inds(ecm_html_tag_id, html_tag_id_table)

                    # iterate through all matched cells, finding corresponding data value cells at their intersection
                    for c1 in matching_neuron_cells:
                        ncm_row_ind = c1[0]
                        ncm_col_ind = c1[1]
                        for c2 in matching_ephys_cells:
                            ecm_row_ind = c2[0]
                            ecm_col_ind = c2[1]

                            # the max below is saying data cells are usually to the right and bottom of header cells
                            table_cell_row_ind = max(ncm_row_ind, ecm_row_ind)
                            table_cell_col_ind = max(ncm_col_ind, ecm_col_ind)
                            table_cell_html_tag_id = html_tag_id_table[table_cell_row_ind][table_cell_col_ind]
                            data_tag = table_soup.find(id=table_cell_html_tag_id)
                            if data_tag is None:
                                continue
                            ref_text = data_tag.get_text()

                            # regex out the floating point values of data value string
                            data_dict = resolve_data_float(ref_text, True)

                            if data_dict['value']:

                                # check for experimental factor concept maps
                                if efcm_obs is not None:
                                    # get efcm and add it to nedm
                                    efcm_pk_list = []
                                    for efcm in efcm_obs:

                                        efcm_html_tag_id = efcm.dt_id

                                        # get table cells for this efcm
                                        matching_efcm_cells = get_matching_inds(efcm_html_tag_id, html_tag_id_table)

                                        # if any of the efcm cells match up with the current cell, add it to the list
                                        matching_rows = [e[0] for e in matching_efcm_cells]
                                        matching_cols = [e[1] for e in matching_efcm_cells]
                                        if table_cell_row_ind in matching_rows or table_cell_col_ind in matching_cols:
                                            # if efcm is num obs, store value in appropriate place and don't add to metadata list
                                            if efcm.metadata.name == 'NumObs' and data_dict['num_obs'] is None:
                                                data_dict['num_obs'] = efcm.metadata.cont_value.mean
                                            else:
                                                efcm_pk_list.append(efcm.pk)

                                temp_return_dict = dict()
                                temp_return_dict['ncm_pk'] = ncm.pk
                                temp_return_dict['ecm_pk'] = ecm.pk
                                temp_return_dict['ref_text'] = ref_text
                                temp_return_dict['data_value_dict'] = data_dict
                                temp_return_dict['efcm_pk_list'] = efcm_pk_list

                                return_dict[table_cell_html_tag_id] = temp_return_dict

        return return_dict
    except TypeError:
        return dict()

コード例 #8

0

ファイルを表示

    def test_data_mean_plus_range(self):
        output_dict = resolve_data_float(u'-23 (21-35)')

        test_keys = ['value', 'min_range', 'max_range']
        for k in test_keys:
            self.assertEqual(output_dict[k], DataStringToDictTest.compare_dict[k])

コード例 #9

0

ファイルを表示

    def test_mean_plus_error_plus_n_bad(self):
        output_dict = resolve_data_float(u'-23 ± 12.45 ()')

        test_keys = ['value', 'error']
        for k in test_keys:
            self.assertEqual(output_dict[k], DataStringToDictTest.compare_dict[k])

コード例 #10

0

ファイルを表示

 def test_mean_only(self):
     output_dict = resolve_data_float(u'-23')
     self.assertEqual(output_dict['value'], DataStringToDictTest.compare_dict['value'])