def get_units_from_table_header(header_str): (a, parens_str, comma_str) = resolve_table_header(header_str) if parens_str: matched_units = parse_units_from_str(parens_str) if matched_units: return parens_str elif comma_str: matched_units = parse_units_from_str(comma_str) if matched_units: return comma_str else: # split header using whitespace and check if any are units for e in reversed(a.split()): matched_units = parse_units_from_str(e) if matched_units: return e return None
def match_ephys_header(header_str, ephys_synonym_list): """Given a data table header string, returns closest matching ephys prop object or None if no ephys synonym has a high match Args: header_str: header string from a data table ephys_synonym_list: the list of strings representing ephys synonyms Returns: An EphysProp neuroelectro.models object whose Ephys Synonym best matches the header_str if match is higher than threshold, or None otherwise example: <EphysProp: Input resistance> """ synapse_stop_words = get_synapse_stop_words() # a list of stop words relating to synapse terms (normHeader, insideParens, commaStr) = resolve_table_header(header_str) best_matching_ephys_syn = fuzzy_match_term_to_list(normHeader, ephys_synonym_list) if best_matching_ephys_syn: # if it's not None if any(substring in normHeader for substring in synapse_stop_words): # if header contains a synaptic plasticity term, then dont associate it to anything return None # find ephys prop matching synonym term ephysPropQuerySet = m.EphysProp.objects.filter(synonyms__term = best_matching_ephys_syn) if ephysPropQuerySet.count() > 0: ephysPropOb = ephysPropQuerySet[0] if ephysPropQuerySet.count() > 1: print 'Multiple ephys properties found matching synonym: %s' % best_matching_ephys_syn return ephysPropOb else: return None else: return None