コード例 #1
0
def normalize_nedm_val(nedm, range_check = True):
    """Normalize the data within neuroelectro.models NeuronEphysDataMap to standard units and range
    """
    data_mean_value = nedm.val
    data_err_value = nedm.err

    # initialize output dictionary
    key_list = ['value', 'error']
    output_dict = dict.fromkeys(key_list)

    unit_reg = UnitRegistry()
    ecm = nedm.ephys_concept_map
    ephys_prop = nedm.ephys_concept_map.ephys_prop
    natural_unit = unicode(ephys_prop.units)

    # try to get unit from table header, if can't, assume unit is natural unit
    found_unit = ecm.identified_unit
    if found_unit is None:
        found_unit = get_units_from_table_header(ecm.ref_text)
    if found_unit is None:
        parsed_nedm = resolve_data_float(nedm.ref_text, initialize_dict = True)
        found_unit = parsed_nedm['units']
        # need to save new unit to ecm now
    if found_unit is None:
        found_unit = natural_unit

    # normalize mean value
    conv_mean_value = convert_units(found_unit, natural_unit, data_mean_value)
    if conv_mean_value:
        # custom normalization for negative and ratio values
        conv_mean_value = convert_negative_value(conv_mean_value, ephys_prop)
        conv_mean_value = convert_percent_to_ratio(conv_mean_value, ephys_prop, ecm.ref_text)

        # check whether mean value in appropriate range
        if range_check:
            if check_data_val_range(conv_mean_value, ephys_prop) is False:
                print 'neuron ephys data map %s, with pk %s out of appropriate range' % (data_mean_value, nedm.pk)
                print conv_mean_value, ephys_prop
                conv_mean_value = None
        output_dict['value'] = conv_mean_value


    # normalize error term
    # TODO: address if errors represented as standard deviations

    if data_err_value:
        conv_err_value = convert_units(found_unit, natural_unit, data_err_value)
        if conv_err_value:
            conv_err_value = convert_percent_to_ratio(conv_err_value, ephys_prop, ecm.ref_text)
            #print 'reported err val: %s, norm err val: %s' % (nedm.err, conv_err_value)

            # really basic check for error term validity
            if conv_err_value < 0:
                conv_err_value = None
            output_dict['error'] = conv_err_value

    return output_dict
コード例 #2
0
def identify_ephys_units():
    """Iterates through ephys concept map objects and assigns an identified_unit field if found"""
    ecms = m.EphysConceptMap.objects.all()
    ecm_count = ecms.count()
    print "adding units to ephys concept maps"
    for i,ecm in enumerate(ecms):
        prog(i,ecm_count)
        ref_text = ecm.ref_text
        identified_unit = get_units_from_table_header(ref_text)
        try:
            if identified_unit:
                ecm.identified_unit = identified_unit
                ecm.save()
        except Exception:
            pass
コード例 #3
0
def resolve_data_float(data_str, initialize_dict = False):
    """Given a string containing numerical data, return a dictionary of text-mined assertions of
        mean value, error term, number of observations, and min and max range

    Args:
        data_str (str): string from a data table cell, corresponding to form
                    XX +/- YY (ZZ) where XX refers the mean value, YY is the error term, and ZZ reflects count
        initialize_dict (bool) : indicates whether dict keys should all be initialized with None values
    Returns:
        a dictionary of text-mined data attributes and their values

        example:
        {'value' : 46.5,
         'error' : 3.4,
         'num_obs' : 5,
         'min_range', 20.4,
         'max_range', 50.4,
        }

    """
    # TODO: consider adding an extracted SI unit as well

    key_list = ['value', 'error', 'num_obs', 'min_range', 'max_range', 'units']

    # initialize dict with None values if requested
    if initialize_dict :
        data_dict = dict.fromkeys(key_list)
    else:
        data_dict = {}

    # check if input string is mostly characters - then its probably not a data cont string
    if digit_pct(data_str) < .05:
        try:
            print 'Too many elems of string %s are not digits: %.2f' % (data_str.encode("iso-8859-15", "replace"), digit_pct(data_str))
        except Exception:
            pass
        return data_dict

    # first map unicode negative values
    new_str = re.sub(u'\u2212', '-', data_str)
    new_str = re.sub(u'\u2013', '-', new_str)
    new_str = re.sub(u'\+/-', u'\xb1',  new_str)
    new_str = re.sub(u'\+\\-', u'\xb1',  new_str)
    new_str = re.sub(u'\u2009', ' ', new_str)

    # look for string like '(XX)'
    num_obs_check = re.findall(u'\([Nn]?\s+?=?\s+?\d+\)', new_str)
    if len(num_obs_check) > 0:
        data_dict['num_obs'] = int(re.search('\d+', num_obs_check[0]).group(0))

        # remove number of observations instance from the string
        new_str = new_str.replace(num_obs_check[0], '')

    # try to ID unit here
    found_unit = get_units_from_table_header(new_str)
    if found_unit:
        found_unit = re.sub("-?\.?\d+\.?\d*", "", found_unit)
        data_dict['units'] = found_unit

    # remove whitespace from the data string as it serves no purpose
    new_str = re.sub('\s', '', new_str)

    # try to split string based on unicode +/-
    split_str_list = re.split('\xb1', new_str) if re.search('\xb1', new_str) else re.split('\+/-', new_str)

    # parse 'error' value as second element after +/- sign
    if len(split_str_list) == 2:
        error_float = str_to_float(split_str_list[1])

        # error_float must be greater than 0
        if error_float and error_float > 0:
            data_dict['error'] = error_float

        # remove the part of the string defined as error
        new_str = split_str_list[0]

    # Check the remaining string for range (it has to start with a range)
    range_str_check = re.search(r'-?\.?\d+\.?\d*--?\.?\d+\.?\d*', new_str)
    if range_str_check:
        range_str = range_str_check.group(0)
        minus_count = len(re.findall('-', range_str))
        range_split_list = re.split('-', range_str)

        if minus_count == 1:
            min_range = str_to_float(range_split_list[0])
            max_range = str_to_float(range_split_list[1])
        elif minus_count == 2:
            if re.search('^\-', range_str):
                min_range = str_to_float("-" + range_split_list[1])
                max_range = str_to_float(range_split_list[2])
            else:
                min_range = str_to_float(range_split_list[0])
                max_range = str_to_float("-" + range_split_list[2])
        elif minus_count == 3:
            min_range = str_to_float("-" + range_split_list[1])
            max_range = str_to_float("-" + range_split_list[3])
        else:
            print "Unparsable data range detected in String: '" + range_str + "'. Too many '-' signs."

        if min_range is not None and max_range is not None:
            if min_range < max_range:
                data_dict['min_range'] = min_range
                data_dict['max_range'] = max_range
            else:
                data_dict['min_range'] = max_range
                data_dict['max_range'] = min_range

            # prematurely assign a value as the mean of min and max ranges
            data_dict['value'] = np.mean([min_range, max_range])
            new_str = re.sub(range_str, "", new_str)

    # parse 'mean' data value as first element
    new_str = re.search(r'-?\.?\d+\.?\d*', new_str)
    if new_str:
        data_dict['value'] = str_to_float(new_str.group(0))

    return data_dict