Esempio n. 1
0
def applyDNN(images, net, config):
    output_stage = config.output_stage  
    if config.debug_mode:
        assert (output_stage > 0 and output_stage <= 6 and util.isInteger(output_stage)), 'output_stage number should be [1, 6]'
    
    #input_data = {single(images)}
    #s_vec = net.forward(input_data)    # scores are now Width x Height x Channels x Num

    # images should be in the size of [1, 21, 160, 120]
    net.blobs['data'].data[0,:,:,:] = images

    s_vec = net.forward()    # scores are now Width x Height x Channels x Num
    


    if output_stage == 1:
        if config.deconv:
            scores = net.blobs['deconv5_6_CPM'].data
        else:
            scores = net.blobs['conv5_5_CPM'].data
        
    elif output_stage >= 2:    
        if config.deconv:
            scores = net.blobs[('Mdeconv%d_stage' % (config.model.stage_depth + 1)) + str(output_stage)].data
        else:
            scores = net.blobs[('Mconv%d_stage' % config.model.stage_depth) + str(output_stage)].data
    

    scores = np.transpose(scores[0,:,:,:], [1,2,0])
    # h w c 160 120 21

    # upsample it by 8 times
    scores_ = toolbox.util.resize_portion(scores, config.downsample)           # use bicubic upsampling

    return scores_
def parseValue(input):
    if util.isInteger(input):
        return int(input)
    elif util.isFloat(input):
        return float(input)
    elif input[0] == '(':
        temp = input[1:-1].split(',')
        values = map(lambda x: parseValue(x), temp)
        return values
    else:
        return input
Esempio n. 3
0
def _testGeneToGenome(gene):
    '''
    gene: gene id from pa and Dpa files
    returns: one of the seven genomes which this gene belongs to, or None if there is no match.
    '''
    # At???????        ath:   Arabidopsis thaliana
    # CE?????             cel:  Caenorhabditis elegans    (worm)
    # Hs?????           hsa:    Human
    # ??????  (numbers)        dme: =Drosophila melanogaster  fly
    # EC????     ecu:     Encephalitozoon cuniculi
    # SP???   spo:      Pombe
    # Y????   sce:      Yeast -
    prefix = gene[:2]
    if prefix == 'At': return 'ath'
    elif prefix == 'CE': return 'cel'
    elif prefix == 'Hs': return 'hsa'
    elif prefix == 'SP': return 'spo'
    elif prefix == 'EC': return 'ecu'
    elif prefix[0] == 'Y': return 'sce'
    elif util.isInteger(prefix): return 'dme'
    else: return None
Esempio n. 4
0
def resultToGeneView(resultId, urlFunc, otherParams={}):
    '''
    geneIndex: index of gene/cluster row in result to view.
    for the cluster, get its terms, then get all the clusters associated with those terms and compute the pairwise profile distance of those terms.
    also compute the mean distance of the cluster and every other cluster in each term.
    report each term in the cluster, the mean pairwise hamming distance and the mean distance between cluster and every other cluster of the term.
    '''
    result = getResult(resultId)
    ds = result['dataset']
    geneIndex = otherParams.get('gene')
    if not util.isInteger(geneIndex):
        raise Exception('resultToGeneView(): geneIndex is not an integer. geneIndex=%s'%geneIndex)
    geneIndex = int(geneIndex)
    selectedRow = result['rows'][geneIndex]
    seqIdToDataMap = result.get('seq_id_to_data_map', {})
    termMap = result.get('term_map', {})
    clusterIndexToProfileMap = {}
    clusterIndexToBestGeneNameMap = {}
    termToClusterIndicesMap = {}
    headers = result['headers']
    genomes = headers[:-1]
    genomeIdToGenomeMap = result.get('genome_id_to_genome_map', {})
    orthologs = result['orthologs'][geneIndex] # orthologs is a list of lists of orthologs for each row.

    # get terms to clusters map, etc.
    for index in range(len(result['rows'])):
        # row has an element for each genome that contains 0 or more seq ids, and row has the distance of the cluster as its final element.
        row = result['rows'][index]
        clusterIndexToProfileMap[index] = getProfileForCluster(row)
        clusterIndexToBestGeneNameMap[index] = getBestClusterGeneName(row, headers, seqIdToDataMap)
        clusterTerms = getTermsForCluster(row, seqIdToDataMap)
        for term in clusterTerms:
            termToClusterIndicesMap.setdefault(term, []).append(index)
    # get terms for geneIndex cluster.
    geneTerms = getTermsForCluster(selectedRow, seqIdToDataMap)

    # generate list of mean distance information for each term.
    termDataList = []
    for termId in geneTerms:
        clusterIndices = termToClusterIndicesMap[termId]
        numClusters = len(clusterIndices)
        meanPairwiseHammingDistance = computeMeanPairwiseHammingDistance(result, clusterIndices, clusterIndexToProfileMap)
        # ignore terms associated with many clusters on the assumption that they are too general to be interesting.
        if meanPairwiseHammingDistance is None:
            continue
        totalHammingDistanceFromGene = 0
        num = 0
        for index in clusterIndices:
            num += 1
            if index != geneIndex:
                totalHammingDistanceFromGene += hammingDistanceForProfiles(clusterIndexToProfileMap[index], clusterIndexToProfileMap[geneIndex])
        if num > 1:
            meanHammingDistanceFromGene = totalHammingDistanceFromGene / float(num)
        else:
            meanHammingDistanceFromGene = 0            
        termDataList.append((meanHammingDistanceFromGene, meanPairwiseHammingDistance, numClusters, termId))
    termDataList.sort()

    termResultUrl = makeResultUrl(resultId, urlFunc, resultType=TERM_RESULT, templateType=WIDE_TEMPLATE)
    content = ''
    content += makeQueryDescHtml(result)
    content += '<h3>Result Description</h3>'
    content += '<pre>A table displays the orthologous sequences, '+headers[-1]+', and Phyletic Profile for the selected gene cluster.\n'
    content += 'Another table lists all the Gene Ontology terms associated with the selected gene.\n'
    content += 'That sortable table includes the following columns:\n'
    content += '\tThe mean hamming distance between the profile of the selected gene cluster and each profile of the other gene clusters in the result annotated with the GO term\n'
    content += '\tThe mean hamming distance between each pair of profiles of all gene clusters in the result annotated with the GO term\n'
    content += '\tNumber of gene clusters in the result annotated with each GO term.\n'
    content += 'For GO terms with only one associated gene cluster, the mean pairwise hamming distance is defined as 0.\n'
    content += 'GO terms associated with more than '+str(TERM_PROMISCUITY_LIMIT)+' gene clusters are excluded as being too general to be interesting.\n'
    content += 'Click on table column headers to sort by that column.  Please be patient with large data sets.\n'
    content += '</pre>'
    content += '<h3>Result</h3>'
    content += '<h4>Gene Cluster Information</h4>'
    content += makeSequenceClustersTable(result, resultId, urlFunc, clusterIndices=[geneIndex])
    content += '<h4>GO Term Information</h4>'
    content += '<table class="sortable" id="term_summary_table">'
    content += '<tr><th>GO&nbsp;Identifier</th><th>GO Term</th><th>Mean Gene Hamming Distance From Selected Gene</th>'
    content += '<th>Mean Pairwise Gene Hamming Distance</th><th>Number of Genes</th></tr>\n'
    for t in termDataList:
        content += '<tr><td><a href="%s">%s</a></td><td>%s</td>'%(termResultUrl+'&term='+urllib.quote_plus(t[3]), t[3], termMap[t[3]])
        content += '<td>%.2f</td><td>%.2f</td><td>%d</td></tr>\n'%(t[0], t[1], t[2])
    content += '</table>'
    content += '<h4>FASTA Sequence Information</h4>'
    content += '<ul>'
    for i in range(len(genomes)):
        genome = genomes[i]
        content += '<li>'+genomeDisplayName(ds, genome)+'<br/><pre>'
        for seqId in selectedRow[i]:
            try:
                fastaPath = roundup.dataset.getGenomeIndexPath(ds, genome)
                logging.debug('fastaPath: {}'.format(fastaPath))
                content += BioUtilities.getFastaForId(seqIdToDataMap.get(seqId, {}).get(roundup_common.EXTERNAL_SEQUENCE_ID_KEY), fastaPath)
            except:
                logging.exception('Error. genome=%s, seqId=%s'%(genome, seqId))
                content += 'Failed to get FASTA for sequence %s\n'%(seqIdToDataMap.get(seqId, {}).get(roundup_common.EXTERNAL_SEQUENCE_ID_KEY))
        content += '</pre>'
    content += '</ul>'
    content += '<h4>Gene Cluster Orthologs</h4>'
    content += '<table class="sortable" id="ortholog_table">'
    content += '<tr><th>Sequence 1</th><th>Genome 1</th><th>Sequence 2</th><th>Genome 2</th><th>Evolutionary Distance</th></tr>\n'
    for (seqId1, seqId2, distance) in orthologs:
        acc1 = seqIdToDataMap.get(seqId1, {}).get(roundup_common.EXTERNAL_SEQUENCE_ID_KEY)
        genome1 = genomeIdToGenomeMap.get(seqIdToDataMap.get(seqId1, {}).get(roundup_common.GENOME_ID_KEY))
        acc2 = seqIdToDataMap.get(seqId2, {}).get(roundup_common.EXTERNAL_SEQUENCE_ID_KEY)
        genome2 = genomeIdToGenomeMap.get(seqIdToDataMap.get(seqId2, {}).get(roundup_common.GENOME_ID_KEY))
        content += '<tr><td>%s</td><td>%s</td>'%(acc1, genomeDisplayName(ds, genome1))
        content += '<td>%s</td><td>%s</td>'%(acc2, genomeDisplayName(ds, genome2))
        content += '<td>%.3f</td></tr>'%distance
    content += '</table>'
    return content