def collectMeta(a):
    amdms = m.ArticleMetaDataMap.objects.filter(article = a)
    curr_metadata_list = ['']*(len(nom_vars) + len(cont_vars))
    
    # Process metadata for nominal vars
    for i,v in enumerate(nom_vars):
        valid_vars = amdms.filter(metadata__name = v)
        temp_metadata_list = [vv.metadata.value for vv in valid_vars]
        if 'in vitro' in temp_metadata_list and 'cell culture' in temp_metadata_list:
            curr_metadata_list[i] = 'cell culture'
        elif v == 'Strain' and amdms.filter(metadata__value = 'Mice').count() > 0:
            temp_metadata_list = 'C57BL'
            curr_metadata_list[i] = 'C57BL'
        elif v == 'Strain' and amdms.filter(metadata__value = 'Guinea Pigs').count() > 0:
            temp_metadata_list = 'Guinea Pigs'
            curr_metadata_list[i] = 'Guinea Pigs'
        elif len(temp_metadata_list) == 0 and v == 'Strain':
            if amdms.filter(metadata__value = 'Rats').count() > 0:
                if np.random.randn(1)[0] > 0:
                    curr_metadata_list[i] = 'Sprague-Dawley'
                else:
                    curr_metadata_list[i] = 'Wistar'
        elif len(temp_metadata_list) > 1: 
            temp_metadata_list = temp_metadata_list[0]
            curr_metadata_list[i] = temp_metadata_list
        else:
            curr_metadata_list[i] = u'; '.join(temp_metadata_list)
            
    # Process metadata for continuous vars
    for i,v in enumerate(cont_vars):
        valid_vars = amdms.filter(metadata__name = v)
        if valid_vars.count() > 0:
            cont_value_ob = valid_vars[0].metadata.cont_value.mean
            curr_metadata_list[i+num_nom_vars] = cont_value_ob
        else:
            # check if 
            if v == 'RecTemp' and amdms.filter(metadata__value = 'in vivo').count() > 0:
                curr_metadata_list[i+num_nom_vars] = 37.0
       
    pubmed_link_str = pubmed_base_link_str % a.pmid
    article_link_str = article_base_link_str % a.pk
    
    last_author = get_article_last_author(a)
    if last_author is not None:
        last_author_name = '%s %s' % (last_author.last, last_author.initials)
        last_author_name = last_author_name.encode("utf8", "replace")
    else:
        last_author_name = ''
            
    curr_meta_list = []
    
    curr_meta_list.append(pubmed_link_str)
    curr_meta_list.append((a.title).encode("utf8", "replace"))
    curr_meta_list.append(a.journal)
    curr_meta_list.append(a.pub_year)
    curr_meta_list.append(article_link_str)
    curr_meta_list.append(last_author_name)
    curr_meta_list.extend(curr_metadata_list)

    return curr_meta_list
def getAllArticleNedmMetadataSummary(getAllMetadata = False):
# TODO: uncomment and remove unnecessary metadata
#     articles = m.Article.objects.filter(Q(datatable__datasource__neuronconceptmap__times_validated__gte = 1) | 
#         Q(usersubmission__datasource__neuronconceptmap__times_validated__gte = 1)).distinct()
#     articles = articles.filter(articlefulltext__articlefulltextstat__metadata_human_assigned = True ).distinct()    
    articles = m.Article.objects.all()

    nom_vars = ['Species', 'Strain', 'ElectrodeType', 'PrepType', 'JxnPotential']
    cont_vars  = ['JxnOffset', 'RecTemp', 'AnimalAge', 'AnimalWeight', 'FlagSoln']
    cont_var_headers = ['JxnOffset', 'Temp', 'Age', 'Weight', 'FlagSoln']
    if getAllMetadata:
        for i in range(0, 5):
            cont_vars.extend(['external_%s_Mg' % i, 'external_%s_Ca' % i, 'external_%s_Na' % i, 'external_%s_Cl' % i, 'external_%s_K' % i, 'external_%s_pH' % i, 'external_%s_text' % i, 'internal_%s_Mg' % i, 'internal_%s_Ca' % i, 'internal_%s_Na' % i, 'internal_%s_Cl' % i, 'internal_%s_K' % i, 'internal_%s_pH' % i, 'internal_%s_text' % i])
            cont_var_headers.extend(['External_%s_Mg' % i, 'External_%s_Ca' % i, 'External_%s_Na' % i, 'External_%s_Cl' % i, 'External_%s_K' % i, 'External_%s_pH' % i, 'External_%s_text' % i, 'Internal_%s_Mg' % i, 'Internal_%s_Ca' % i, 'Internal_%s_Na' % i, 'Internal_%s_Cl' % i, 'Internal_%s_K' % i, 'Internal_%s_pH' % i, 'Internal_%s_text' % i])
    num_nom_vars = len(nom_vars)
    ephys_use_pks = range(1,28)

    ephys_list = m.EphysProp.objects.filter(pk__in = ephys_use_pks)
    ephys_headers = []
    for e in ephys_list:
        ephys_name_str = re.sub("[\s-]", "", e.name.title())
        ephys_headers.append(ephys_name_str)

    csvout = csv.writer(open(settings.OUTPUT_FILES_DIRECTORY + "article_ephys_metadata_curated.csv", "w+b"), delimiter = '\t')
    
    other_headers = ['NeuronType', 'Title', 'Journal', 'PubYear', 'PubmedLink', 'DataTableLinks', 'ArticleDataLink', 'LastAuthor']
    all_headers = other_headers
    all_headers.extend(ephys_headers)
    all_headers.extend(nom_vars + cont_var_headers)

    pubmed_base_link_str = 'http://www.ncbi.nlm.nih.gov/pubmed/%d/'
    table_base_link_str = 'http://neuroelectro.org/data_table/%d/'
    article_base_link_str = 'http://neuroelectro.org/article/%d/'

    csvout.writerow(all_headers)
    for a in articles:
        print "processing metadata for article: %s" % a.pk
        amdms = m.ArticleMetaDataMap.objects.filter(article = a)
        curr_metadata_list = ['']*(len(nom_vars) + len(cont_vars))
        for i,v in enumerate(nom_vars):
            valid_vars = amdms.filter(metadata__name = v)
            temp_metadata_list = [vv.metadata.value for vv in valid_vars]
            if 'in vitro' in temp_metadata_list and 'cell culture' in temp_metadata_list:
                curr_metadata_list[i] = 'cell culture'
            elif v == 'Strain' and amdms.filter(metadata__value = 'Mice').count() > 0:
                temp_metadata_list = 'C57BL'
                curr_metadata_list[i] = 'C57BL'
            elif v == 'Strain' and amdms.filter(metadata__value = 'Guinea Pigs').count() > 0:
                temp_metadata_list = 'Guinea Pigs'
                curr_metadata_list[i] = 'Guinea Pigs'
            elif len(temp_metadata_list) == 0 and v == 'Strain':
                if amdms.filter(metadata__value = 'Rats').count() > 0:
                    if np.random.randn(1)[0] > 0:
                        curr_metadata_list[i] = 'Sprague-Dawley'
                    else:
                        curr_metadata_list[i] = 'Wistar'
            elif len(temp_metadata_list) > 1: 
                temp_metadata_list = temp_metadata_list[0]
                curr_metadata_list[i] = temp_metadata_list
            else:
                curr_metadata_list[i] = u'; '.join(temp_metadata_list)
        for i,v in enumerate(cont_vars):
            valid_vars = amdms.filter(metadata__name = v)
            if valid_vars.count() > 0:
                cont_value_ob = valid_vars[0].metadata.cont_value.mean
                curr_metadata_list[i+num_nom_vars] = cont_value_ob
            else:
                # check if 
                if v == 'RecTemp' and amdms.filter(metadata__value = 'in vivo').count() > 0:
                    curr_metadata_list[i+num_nom_vars] = 37.0
                elif 'text' in v and ('external' in v or 'internal' in v):
                    for j in range(i - 6, i - 1, 1):
                        conc_amdm = amdms.filter(metadata__name = cont_vars[j])
                        if len(conc_amdm) > 0:
                            curr_metadata_list[i+num_nom_vars] = conc_amdm[0].metadata.ref_text.text.encode('utf8', "replace")
                            break
                        else:
                            curr_metadata_list[i+num_nom_vars] = 'NaN'
                else:
                    curr_metadata_list[i+num_nom_vars] = 'NaN'
       
# TODO: uncomment these 2 lines
        neurons = m.Neuron.objects.filter(Q(neuronconceptmap__times_validated__gte = 1) & 
            ( Q(neuronconceptmap__source__data_table__article = a) | Q(neuronconceptmap__source__user_submission__article = a))).distinct()
        neurons = m.Neuron.objects.filter( Q(neuronconceptmap__source__data_table__article = a) | Q(neuronconceptmap__source__user_submission__article = a)).distinct()
        
        pubmed_link_str = pubmed_base_link_str % a.pmid
        article_link_str = article_base_link_str % a.pk
        dts = m.DataTable.objects.filter(article = a, datasource__neuronconceptmap__times_validated__gte = 1).distinct()
        if dts.count() > 0:
            dt_link_list = [table_base_link_str % dt.pk for dt in dts] 
            dt_link_str = u'; '.join(dt_link_list)
        else:
            dt_link_str = ''  
        
        #grandfather = define_ephys_grandfather(a)   
        # grandfather = None
        # if grandfather is not None:
        #     grandfather_name = grandfather.lastname
        #     grandfather_name = grandfather_name.encode("iso-8859-15", "replace")
        # else:
        #     grandfather_name = ''
        last_author = get_article_last_author(a)
        if last_author is not None:
            last_author_name = '%s %s' % (last_author.last, last_author.initials)
            last_author_name = last_author_name.encode("utf8", "replace")
            # if grandfather_name is '':
            #     neuro_tree_node = get_neurotree_author(last_author)
            #     if neuro_tree_node is None:
            #         grandfather_name = 'Node not found'
        else:
            last_author_name = ''
            
        for n in neurons:
            curr_ephys_prop_list = []
            
            curr_ephys_prop_list.append(n.name)
            curr_ephys_prop_list.append((a.title).encode("utf8", "replace"))
            curr_ephys_prop_list.append(a.journal)
            curr_ephys_prop_list.append(a.pub_year)
            curr_ephys_prop_list.append(pubmed_link_str)
            curr_ephys_prop_list.append(dt_link_str)
            curr_ephys_prop_list.append(article_link_str)
            curr_ephys_prop_list.append(last_author_name)
            
            for e in ephys_list:
                curr_ephys_prop_list.append(computeArticleNedmSummary(a.pmid, n, e))
        
            curr_ephys_prop_list.extend(curr_metadata_list)
            #curr_ephys_prop_list.append(grandfather_name)

            csvout.writerow(curr_ephys_prop_list)
    return articles
def getAllArticleNedmMetadataSummary():
    
    articles = m.Article.objects.filter(Q(datatable__datasource__neuronconceptmap__times_validated__gte = 1) | 
        Q(usersubmission__datasource__neuronconceptmap__times_validated__gte = 1)).distinct()
    articles = articles.filter(articlefulltext__articlefulltextstat__metadata_human_assigned = True ).distinct()
    nom_vars = ['Species', 'Strain', 'ElectrodeType', 'PrepType', 'JxnPotential']
    cont_vars  = ['JxnOffset', 'RecTemp', 'AnimalAge', 'AnimalWeight']
    cont_var_headers = ['JxnOffset', 'Temp', 'Age', 'Weight']
    num_nom_vars = len(nom_vars)
    #ephys_use_pks = [2, 3, 4, 5, 6, 7]
    #ephys_headers = ['ir', 'rmp', 'tau', 'amp', 'hw', 'thresh']
    ephys_use_pks = range(1,28)

    ephys_list = m.EphysProp.objects.filter(pk__in = ephys_use_pks)
    ephys_headers = []
    for e in ephys_list:
        ephys_name_str = e.name
        ephys_name_str = ephys_name_str.title()
        ephys_name_str = ephys_name_str.replace(' ', '')
        ephys_name_str = ephys_name_str.replace('-', '')
        ephys_headers.append(ephys_name_str)

    #ephys_headers = [e.name for e in ephys_list]
#    metadata_table = []
#    metadata_table_nom = np.zeros([len(articles), len(nom_vars)])
#    metadata_table_nom = np.zeros([len(articles), len(cont_vars)])
    csvout = csv.writer(open("article_ephys_metadata_summary.csv", "wb"))
    
    
    #metadata_headers = ["Species", "Strain", "ElectrodeType", "PrepType", "Temp", "Age", "Weight"]
    metadata_headers = nom_vars + cont_var_headers
    other_headers = ['NeuronType', 'Title', 'PubYear', 'PubmedLink', 'DataTableLinks', 'ArticleDataLink', 'LastAuthor']
    all_headers = ephys_headers
    all_headers.extend(metadata_headers)
    all_headers.extend(other_headers)
    pubmed_base_link_str = 'http://www.ncbi.nlm.nih.gov/pubmed/%d/'
    table_base_link_str = 'http://neuroelectro.org/data_table/%d/'
    article_base_link_str = 'http://neuroelectro.org/article/%d/'

    csvout.writerow(all_headers)
    for j,a in enumerate(articles):
        amdms = m.ArticleMetaDataMap.objects.filter(article = a)
        curr_metadata_list = ['']*(len(nom_vars) + len(cont_vars))
        for i,v in enumerate(nom_vars):
            valid_vars = amdms.filter(metadata__name = v)
            temp_metadata_list = [vv.metadata.value for vv in valid_vars]
            if 'in vitro' in temp_metadata_list and 'cell culture' in temp_metadata_list:
                curr_metadata_list[i] = 'cell culture'
            elif v == 'Strain' and amdms.filter(metadata__value = 'Mice').count() > 0:
                 temp_metadata_list = 'C57BL'
                 curr_metadata_list[i] = 'C57BL'
            elif v == 'Strain' and amdms.filter(metadata__value = 'Guinea Pigs').count() > 0:
                 temp_metadata_list = 'Guinea Pigs'
                 curr_metadata_list[i] = 'Guinea Pigs'
            elif len(temp_metadata_list) == 0 and v == 'Strain':
                if amdms.filter(metadata__value = 'Rats').count() > 0:
                    if np.random.randn(1)[0] > 0:
                        curr_metadata_list[i] = 'Sprague-Dawley'
                    else:
                        curr_metadata_list[i] = 'Wistar'
            elif len(temp_metadata_list) > 1: 
                 temp_metadata_list = temp_metadata_list[0]
                 curr_metadata_list[i] = temp_metadata_list
            else:
                curr_metadata_list[i] = u'; '.join(temp_metadata_list)
        for i,v in enumerate(cont_vars):
            valid_vars = amdms.filter(metadata__name = v)
            if valid_vars.count() > 0:
                cont_value_ob = valid_vars[0].metadata.cont_value.mean
    #                curr_str = cont_value_ob
                curr_metadata_list[i+num_nom_vars] = cont_value_ob
            else:
                # check if 
                if v == 'RecTemp' and amdms.filter(metadata__value = 'in vivo').count() > 0:
                    curr_metadata_list[i+num_nom_vars] = 37.0
                else:
                    curr_metadata_list[i+num_nom_vars] = 'NaN'
                    
        neurons = m.Neuron.objects.filter(Q(neuronconceptmap__times_validated__gte = 1) & 
            ( Q(neuronconceptmap__source__data_table__article = a) | Q(neuronconceptmap__source__user_submission__article = a))).distinct()
            
        
        pmid = a.pmid    
        pubmed_link_str = pubmed_base_link_str % a.pmid
        article_link_str = article_base_link_str % a.pk
        dts = m.DataTable.objects.filter(article = a, datasource__neuronconceptmap__times_validated__gte = 1).distinct()
        if dts.count() > 0:
            dt_link_list = [table_base_link_str % dt.pk for dt in dts] 
            dt_link_str = u'; '.join(dt_link_list)
        else:
            dt_link_str = ''  
        
        #grandfather = define_ephys_grandfather(a)   
        # grandfather = None
        # if grandfather is not None:
        #     grandfather_name = grandfather.lastname
        #     grandfather_name = grandfather_name.encode("iso-8859-15", "replace")
        # else:
        #     grandfather_name = ''
        last_author = get_article_last_author(a)
        if last_author is not None:
            last_author_name = '%s %s' % (last_author.last, last_author.initials)
            last_author_name = last_author_name.encode("iso-8859-15", "replace")
            # if grandfather_name is '':
            #     neuro_tree_node = get_neurotree_author(last_author)
            #     if neuro_tree_node is None:
            #         grandfather_name = 'Node not found'
        else:
            last_author_name = ''
            
        for n in neurons:
            curr_ephys_prop_list = []
            for j,e in enumerate(ephys_list):
                curr_ephys_prop_list.append(computeArticleNedmSummary(pmid, n, e))
        
#            print curr_ephys_prop_list
            curr_ephys_prop_list.extend(curr_metadata_list)
            curr_ephys_prop_list.append(n.name)
            curr_ephys_prop_list.append((a.title).encode("iso-8859-15", "replace"))
            curr_ephys_prop_list.append(a.pub_year)
            curr_ephys_prop_list.append(pubmed_link_str)
            curr_ephys_prop_list.append(dt_link_str)
            curr_ephys_prop_list.append(article_link_str)
            curr_ephys_prop_list.append(last_author_name)
            #curr_ephys_prop_list.append(grandfather_name)
            csvout.writerow(curr_ephys_prop_list)
    return articles