def count_automated_database_statistics():
    nedmsValid = m.NeuronEphysDataMap.objects.filter(neuron_concept_map__times_validated__gte = 1, ephys_concept_map__times_validated__gte = 1, neuron_concept_map__source__data_table__isnull = False).distinct()
    articles_automated = m.Article.objects.filter(datatable__datasource__neuronconceptmap__times_validated__gte = 1).distinct()
    
    robot_user = m.get_robot_user()
    neurons = m.Neuron.objects.filter(neuronconceptmap__neuronephysdatamap__in = nedmsValid).distinct()
    
    ecmsNotValid = m.EphysConceptMap.objects.filter(times_validated = 0).distinct()
    ecms_valid_total = m.EphysConceptMap.objects.filter(times_validated = 1, source__data_table__isnull = False).distinct()
    ecms_valid_robot = m.EphysConceptMap.objects.filter(times_validated = 1,added_by = robot_user, source__data_table__isnull = False).distinct()

    ncms_robot_id, ncms_robot_3_id, ncms_datatable_total = count_matching_neuron_mentions()

    stat_dict = {}
    stat_dict['num_neurons'] = neurons.count()

    stat_dict['num_nemds_valid'] = nedmsValid.count()

    stat_dict['num_articles'] = articles_automated.count()

    stat_dict['num_ecms_valid_total'] = ecms_valid_total.count()
    stat_dict['num_ecms_valid_robot'] = ecms_valid_robot.count()
    stat_dict['ncms_datatable_total'] = ncms_datatable_total
    stat_dict['ncms_robot_id'] = ncms_robot_id
    stat_dict['ncms_robot_3_id'] = ncms_robot_3_id
    return stat_dict
def count_database_statistics():
    nedmsValid = m.NeuronEphysDataMap.objects.filter(neuron_concept_map__times_validated__gte = 1, ephys_concept_map__times_validated__gte = 1).distinct()
    nedmsValidUser = m.NeuronEphysDataMap.objects.filter(neuron_concept_map__times_validated__gte = 1, 
                                                              ephys_concept_map__times_validated__gte = 1,
                                                              neuron_concept_map__source__user_submission__isnull = False).distinct()    
    articles = m.Article.objects.filter(Q(datatable__datasource__neuronconceptmap__times_validated__gte = 1) | 
        Q(usersubmission__datasource__neuronconceptmap__times_validated__gte = 1)).distinct()
    articles_user_submit = m.Article.objects.filter(usersubmission__datasource__neuronconceptmap__times_validated__gte = 1).distinct()
    journals = m.Journal.objects.filter(article__in = articles).distinct()
    robot_user = m.get_robot_user()
    neurons = m.Neuron.objects.filter(neuronconceptmap__neuronephysdatamap__in = nedmsValid).distinct()
    ephys_props = m.EphysProp.objects.filter(ephysconceptmap__neuronephysdatamap__in = nedmsValid).distinct()
    ecmsNotValid = m.EphysConceptMap.objects.filter(times_validated = 0).distinct()
    articles_not_validated_total = m.Article.objects.filter(datatable__datasource__ephysconceptmap__in = ecmsNotValid)
    articles_not_validated = articles_not_validated_total.annotate(ecm_count = Count('datatable__datasource__ephysconceptmap'))
    articles_not_validated = articles_not_validated.filter(ecm_count__gte = 4).distinct()
    ecms_valid_total = m.EphysConceptMap.objects.filter(times_validated = 1).distinct()
    ecms_valid_robot = m.EphysConceptMap.objects.filter(times_validated = 1,added_by = robot_user).distinct()
    ncms_robot_id, ncms_robot_3_id, ncms_datatable_total = count_matching_neuron_mentions()
    stat_dict = {}
    stat_dict['num_neurons'] = neurons.count()
    stat_dict['num_journals'] = journals.count()
    stat_dict['num_ephys_props'] = ephys_props.count()
    stat_dict['num_nemds_valid'] = nedmsValid.count()
    stat_dict['num_nemds_valid_user'] = nedmsValidUser.count()
    stat_dict['num_articles'] = articles.count()
    stat_dict['num_articles_user_submit'] = articles_user_submit.count()
    stat_dict['num_articles_unvalid'] = articles_not_validated.count()
    stat_dict['num_ecms_valid_total'] = ecms_valid_total.count()
    stat_dict['num_ecms_valid_robot'] = ecms_valid_robot.count()
    stat_dict['ncms_datatable_total'] = ncms_datatable_total
    stat_dict['ncms_robot_id'] = ncms_robot_id
    stat_dict['ncms_robot_3_id'] = ncms_robot_3_id
    return stat_dict
def assocArticleNeuron(artOb):
    robot_user = m.get_robot_user()
    fullTextOb = artOb.articlefulltext_set.all()[0]
    fullTextHtml = fullTextOb.get_content()
    if fullTextHtml == 'test':
        return
    soup = bs(''.join(fullTextHtml))
    full_text = soup.get_text()
    neuronTuple = findNeuronsInText(full_text)
    usedNeurons = []
    for t in neuronTuple:
        neuronOb = t[0]
        numMentions = t[2]
        if neuronOb not in usedNeurons and numMentions > 2:
            #neuronSynOb = t[1]
            neuronArticleMapOb = m.NeuronArticleMap.objects.get_or_create(neuron = neuronOb,
                                                                  num_mentions = numMentions,
                                                                  article = artOb,
                                                                  added_by = robot_user)[0]
            usedNeurons.append(neuronOb)
        else:
            continue
    aftStatOb = m.ArticleFullTextStat.objects.get_or_create(article_full_text = fullTextOb)[0]
    aftStatOb.neuron_article_map_processed = True
    aftStatOb.save()
def update_ecm_using_text_mining(ecm, ephys_synonym_list=None, verbose_output=True):
    """Updates an EphysConceptMap object using text mining rules
    
    Args:
        ecm: an EphysConceptMap object for the object to be updated
        ephys_synonym_list: the list of strings representing ephys synonyms
        verbose_output: a bool indicating whether function should print statements
    """
    if not ephys_synonym_list:
        ephysSyns = m.EphysPropSyn.objects.all()
        ephys_synonym_list = [e.term.lower() for e in ephysSyns]
    
    if not ecm.ref_text: # some light error checking to make sure there's some text for the ecm object
        return
    
    # get the closest matching ephys prop given the table header reference text
    matched_ephys_prop = match_ephys_header(ecm.ref_text, ephys_synonym_list)
    
    if matched_ephys_prop is None: # no ephys props matched 
        if verbose_output:
            print 'deleting %s, prop: %s' % (ecm.ref_text, ecm.ephys_prop)
        ecm.delete() # remove the EphysConceptMap since none of the updated EphysProps matched it
        
    elif matched_ephys_prop != ecm.ephys_prop: # different found prop than existing one
        if verbose_output:
            print 'changing %s, to prop: %s, from prop: %s' %(ecm.ref_text, matched_ephys_prop, ecm.ephys_prop)
            
        ecm.ephys_prop = matched_ephys_prop # update the ecm
        ecm.changed_by = m.get_robot_user()
        ecm.save()
Beispiel #5
0
def annotate_misnormalized_nedm(nedm):
    ''' if can't algorithmically normalize nedm value to something appropriate, and raw value is out of range,
    leave a note in corresponding ecm in table'''
    norm_dict = normalize_nedm_val(nedm)
    if norm_dict['value'] is None and check_data_val_range(nedm.val, nedm.ephys_concept_map.ephys_prop) is False:
        ecm = nedm.ephys_concept_map
        normalizing_failed_note = 'Parsing failed to normalize ephys data'
        if not ecm.note:
            ecm.note = normalizing_failed_note
            ecm.changed_by = m.get_robot_user()
            ecm.save()
            print 'adding failed normalizing note to %s with data table id %d' % (ecm.ephys_prop, ecm.source.data_table.pk)
Beispiel #6
0
def assign_old_article_metadata_maps():
    with open ('data/old_article_metadata_maps.txt', 'r') as f:
        content = f.readlines()
    num_amdms = len(content)
    print 'repopulating %d article metadata maps' % num_amdms
    robot_user = m.get_robot_user()
    for i,line in enumerate(content):
        prog(i, num_amdms)
        [art_pk_str, md_pk_str] = re.findall('\d+', line)
        # print (art_pk_str, md_pk_str)
        # print line
        a = m.Article.objects.get(pk = int(art_pk_str))
        md = m.MetaData.objects.get(pk = int(md_pk_str))
        amdm = m.ArticleMetaDataMap.objects.get_or_create(article = a,
            metadata = md, added_by = robot_user)[0]
def update_data_table_stat(data_table_object):
    """adds intermediate fields to data table stat object based on concept map objects associated
            with data table"""

    data_table_stat = m.DataTableStat.objects.get_or_create(data_table = data_table_object)[0]

    # assign curating users by looking at history concepts assoc with table
    robot_user = m.get_robot_user()
    user_list = data_table_object.get_curating_users()
    if robot_user in user_list:
        user_list.remove(robot_user)
    existing_users = data_table_stat.curating_users.all()
    for u in user_list:
        if u in existing_users:
            continue
        else:
            data_table_stat.curating_users.add(u)

    # assign last curated on by looking at curating users curation times and getting most recent
    concept_maps = data_table_object.get_concept_maps()
    if len(concept_maps) == 0:
        return
    curated_on_dates = []
    for cm in concept_maps:
        curated_on = cm.history.latest().history_date
        curated_on_dates.append(curated_on)
    curated_on = max(curated_on_dates)
    # update last curated on if different
    if data_table_stat.last_curated_on is not curated_on:
        data_table_stat.last_curated_on = curated_on

    # count number of unique ncms, ecms, nedms associated with table
    data_table_stat.num_ecms = m.EphysProp.objects.filter(ephysconceptmap__source__data_table = data_table_object).distinct().count()
    data_table_stat.num_ncms = m.Neuron.objects.filter(neuronconceptmap__source__data_table = data_table_object).distinct().count()
    data_table_stat.num_nedms = m.NeuronEphysDataMap.objects.filter(source__data_table = data_table_object).distinct().count()

    # define times validated here as min num of times validated per neuron concept map
    concept_maps = data_table_object.get_neuron_concept_maps()
    times_validated_per_neuron = []
    for cm in concept_maps:
        tv = cm.times_validated
        times_validated_per_neuron.append(tv)
    if len(times_validated_per_neuron) > 0:
        data_table_stat.times_validated = int(min(times_validated_per_neuron))

    data_table_stat.save()

    return data_table_stat
def count_metadata_assign_accuracy():
    articles = m.Article.objects.filter(datatable__datasource__neuronconceptmap__times_validated__gte = 1,
                                      articlefulltext__articlefulltextstat__methods_tag_found = True)
    robot_user = m.get_robot_user()
    metadata_keys = ['Species', 'Strain', 'ElectrodeType', 'PrepType', 'JxnPotential', 'RecTemp', 'AnimalAge']    
    stat_dict = {}
    for metadata_key in metadata_keys:
        temp_dict = {}
        values_all = m.ArticleMetaDataMap.objects.filter(metadata__name = metadata_key,article__in=articles).distinct()
        values_robot = m.ArticleMetaDataMap.objects.filter(metadata__name = metadata_key, article__in = articles, added_by = robot_user).distinct()
        temp_dict['values_all'] = values_all.count()
        temp_dict['values_robot'] = values_robot.count()
        print metadata_key
        print temp_dict
        stat_dict[metadata_key] = temp_dict
    return stat_dict
Beispiel #9
0
def update_concept_maps():
    ncm_fields, ecm_fields, nedm_fields = load()
    datatables = m.DataTable.objects.all()
    print 'Getting or creating data sources'
    for i,x in enumerate(datatables):
        prog(i,datatables.count())
        m.DataSource.objects.get_or_create(data_table=x)
    
    anon_user = m.get_anon_user()
    robot_user = m.get_robot_user()
    print 'Updating nedm fields'
    for i,nedm_field in enumerate(nedm_fields):
        prog(i, len(nedm_fields))
        nedm=m.NeuronEphysDataMap.objects.get(pk=nedm_field['pk'])
        data_source = m.DataSource.objects.get(data_table=nedm_field['fields']['data_table'])
        nedm.source = data_source
        # if nedm.added_by_old == 'human':
        #     nedm.added_by = anon_user
        # else:
        #     nedm.added_by = robot_user
        nedm.save()

    print 'Updating ncm fields'
    for i,ncm_field in enumerate(ncm_fields):
        prog(i, len(ncm_fields))
        ncm=m.NeuronConceptMap.objects.get(pk=ncm_field['pk'])
        data_source = m.DataSource.objects.get(data_table=ncm_field['fields']['data_table'])
        ncm.source = data_source
        # if ncm.added_by_old == 'human':
        #     ncm.added_by = anon_user
        # else:
        #     ncm.added_by = robot_user
        ncm.save()
    
    print 'Updating ecm fields'

    for ecm_field in ecm_fields:
        prog(i, len(ecm_fields))

        ecm=m.EphysConceptMap.objects.get(pk=ecm_field['pk'])
        data_source = m.DataSource.objects.get(data_table=ecm_field['fields']['data_table'])
        ecm.source = data_source
        # if ecm.added_by_old == 'human':
        #     ecm.added_by = anon_user
        # else:
        #     ecm.added_by = robot_user
        ecm.save()
def assocDataTableEphysVal(dataTableOb):
    """Associates a data table object with ephys concept map objects
    """
    dt = dataTableOb
    ds = m.DataSource.objects.get(data_table = dt)
    robot_user = m.get_robot_user()
    if dt.table_text is None:
        return

    ephysSyns = m.EphysPropSyn.objects.all()
    ephysSynList = [e.term.lower() for e in ephysSyns]

    tableTag = dt.table_html
    soup = BeautifulSoup(''.join(tableTag), 'lxml')
    headerTags = soup.findAll('th')
    tdTags = soup.findAll('td')
    allTags = headerTags + tdTags

    for tag in allTags:
        origTagText = tag.get_text()
        tagText = origTagText.strip()

        if 'id' in tag.attrs.keys():
            tag_id = str(tag['id'])
        else:
            tag_id = -1
        if len(tagText) == 0:
            continue
        if has_ascii_letters(tagText) is True:
            # SJT Note - Currently doesn't mine terms in synapse stop words list
            matched_ephys_ob = match_ephys_header(tagText, ephysSynList)
            identified_unit = get_units_from_table_header(tagText)

            if matched_ephys_ob:

                save_ref_text = origTagText[0:min(len(origTagText),199)]
                # create EphysConceptMap object
                ephysConceptMapOb = m.EphysConceptMap.objects.get_or_create(ref_text = save_ref_text,
                                                                          ephys_prop = matched_ephys_ob,
                                                                          source = ds,
                                                                          dt_id = tag_id,
                                                                          #match_quality = matchVal,
                                                                          changed_by = robot_user,
                                                                          times_validated = 0,
                                                                          identified_unit=identified_unit)[0]
def update_other_defined_ecms():
    """Updates ephys prop assigned to previously defined ecm's tagged as 'other'"""
    
    other_ephys_prop = m.EphysProp.objects.get(name = 'other')
    ecm_list = m.EphysConceptMap.objects.filter(ephys_prop = other_ephys_prop)
    
    ephysSyns = m.EphysPropSyn.objects.all()
    ephysSynList = [e.term.lower() for e in ephysSyns]
    for ecm in ecm_list:
        
        # get the closest matching ephys prop given the table header reference text
        matched_ephys_prop = article_text_mining.mine_ephys_prop_in_table.match_ephys_header(ecm.ref_text, ephysSynList)
        if matched_ephys_prop is None: # no ephys props matched 
            continue
        if matched_ephys_prop != ecm.ephys_prop: # different found prop than existing one
            print 'changing %s, to prop: %s, from prop: %s' %(ecm.ref_text, matched_ephys_prop, ecm.ephys_prop)
            
        ecm.ephys_prop = matched_ephys_prop # update the ecm
        ecm.changed_by = m.get_robot_user()
        ecm.save()
Beispiel #12
0
def record_solution_concs():
#     articles = m.Article.objects.all()
    
    articles = m.Article.objects.filter(Q(datatable__datasource__neuronconceptmap__times_validated__gte = 1,
                                            datatable__datasource__neuronephysdatamap__isnull = False) | 
                                            Q(usersubmission__datasource__neuronconceptmap__times_validated__gte = 1,
                                              usersubmission__datasource__neuronephysdatamap__isnull = False)).distinct()
    
    robot_user = m.get_robot_user()
    
    solution_names = {"external": 'ExternalSolution', 
                      "internal": 'InternalSolution'}
    
    len_articles = articles.count()
    
    for i, article in enumerate(articles):
        prog(i, len_articles)
        for soln, soln_name in solution_names.iteritems():
            solution_ob = m.ArticleMetaDataMap.objects.filter(article = article, metadata__name = soln_name)
            if solution_ob and solution_ob[0].ref_text:
                record_compounds(article, None, solution_ob[0].ref_text.text, ["", "", "", ""], "%s_0" % soln, robot_user)
Beispiel #13
0
def assign_robot():
    nams = m.NeuronArticleMap.objects.all()
    u = m.get_robot_user()
    for nam in nams:
        nam.added_by = u
        nam.save()
    tot_count = artObs.count()
    #numRes = 23411881#res.count()
    print '%d num total articles' % tot_count
    blockSize = 100
    firstInd = 0
    lastInd = blockSize
    blockCnt = 0
    while firstInd < lastInd:
        print '%d of %d blocks ' % (blockCnt, tot_count/blockSize)
        for artOb in artObs[firstInd:lastInd].iterator():
            assocArticleNeuron(artOb)
        firstInd = lastInd + 1
        lastInd = min(lastInd+blockSize, tot_count)
        blockCnt += 1

robot_user = m.get_robot_user()
def assocArticleNeuron(artOb):
    fullTextOb = artOb.articlefulltext_set.all()[0]
    fullTextHtml = fullTextOb.get_content()
    if fullTextHtml == 'test':
        return
    soup = bs(''.join(fullTextHtml))
    full_text = soup.get_text()
    neuronTuple = findNeuronsInText(full_text)    
    usedNeurons = []
    for t in neuronTuple:
        neuronOb = t[0]
        numMentions = t[2]
        if neuronOb not in usedNeurons and numMentions > 2:
            #neuronSynOb = t[1]
            neuronArticleMapOb = m.NeuronArticleMap.objects.get_or_create(neuron = neuronOb,
def assocDataTableEphysVal(dataTableOb):
    dt = dataTableOb
    ds = m.DataSource.objects.get(data_table = dt)
    robot_user = m.get_robot_user()
    if dt.table_text is None:
        return
        
    tableTag = dt.table_html
    soup = BeautifulSoup(''.join(tableTag))
    headerTags = soup.findAll('th')
    #print headerTags
    tdTags = soup.findAll('td')
    allTags = headerTags + tdTags
    
    for tag in allTags:
        origTagText = tag.get_text()
        tagText = origTagText.strip()

        if 'id' in tag.attrs.keys():
            tag_id = str(tag['id'])
        else:
            tag_id = -1
        if len(tagText) == 0:
            continue
        if isHeader(tagText) is True:
            normHeader = resolveHeader(tagText)
            if len(normHeader) == 0:
                continue
            elif normHeader in ephysSynList: # try to match exactly
                bestMatch = normHeader
                matchVal = 100
            else: #try to fuzzy match
                try:
                    processOut = process.extractOne(normHeader, ephysSynList)
                    if processOut is not None:
                        bestMatch, matchVal = processOut
                    else:
                        continue
                except ZeroDivisionError:
                    continue
            if matchVal > matchThresh:
                ephysSynOb = m.EphysPropSyn.objects.get(term = bestMatch)
                ephysPropQuerySet = m.EphysProp.objects.filter(synonyms = ephysSynOb)
                if ephysPropQuerySet.count() > 0:
                    ephysPropOb = ephysPropQuerySet[0]        
                else:
                    continue
                # further check that if either header or syn is really short, 
                # match needs to be really f*****g good
                if len(normHeader) <= shortLim or len(ephysSynOb.term) <= shortLim:
                    if matchVal < matchThreshShort:
                        continue
                 
                # create EphysConceptMap object
                save_ref_text = origTagText[0:min(len(origTagText),199)]
                #print save_ref_text.encode("iso-8859-15", "replace")
                #print ephysPropOb.name
#                print ephysSynOb.term
                #print matchVal    
                ephysConceptMapOb = m.EphysConceptMap.objects.get_or_create(ref_text = save_ref_text,
                                                                          ephys_prop = ephysPropOb,
                                                                          source = ds,
                                                                          dt_id = tag_id,
                                                                          match_quality = matchVal,
                                                                          added_by = robot_user,
                                                                          times_validated = 0)[0]