def CalcLabelDistribution(data, modelSpecs):
    ## collect all discrete label matrices
    allLabelMatrices = dict()
    for response in modelSpecs['responses']:
        labelType = Response2LabelType(response)
        if labelType.startswith('LogNormal') or labelType.startswith('Normal'):
            continue

        allLabelMatrices[response] = [
            d['atomLabelMatrix'][response] for d in data
        ]

    ## calculate the discrete label distribution
    allRefProbs = dict()
    for response in modelSpecs['responses']:
        labelName, labelType, subType = config.ParseResponse(response)
        if labelType.startswith('LogNormal') or labelType.startswith('Normal'):
            allRefProbs[response] = np.array([1.] * numRanges).reshape(
                (-1, 1)).astype(np.float32)
            continue

        if modelSpecs.has_key('UseBoundingBox4RefProbs') and (
                modelSpecs['UseBoundingBox4RefProbs'] is True):
            ## here we sample a sub label matrix using BoundingBox to account for the real training scenario
            newLabelMatrices = []
            for lMatrix in allLabelMatrices[response]:
                bounds = SampleBoundingBox(
                    (lMatrix.shape[0], lMatrix.shape[1]),
                    modelSpecs['maxbatchSize'])
                new_lMatrix = lMatrix[bounds[0]:bounds[2],
                                      bounds[1]:bounds[3]].astype(np.int32)
                newLabelMatrices.append(new_lMatrix)
            if labelName in config.allOrientationNames:
                allRefProbs[response] = OrientationUtils.CalcLabelProb(
                    data=newLabelMatrices,
                    numLabels=GetResponseProbDims(response),
                    numRanges=RangeNWeight.GetNumRanges(modelSpecs))
            else:
                allRefProbs[response] = DistanceUtils.CalcLabelProb(
                    data=newLabelMatrices,
                    numLabels=GetResponseProbDims(response),
                    numRanges=RangeNWeight.GetNumRanges(modelSpecs))
        else:
            if labelName in config.allOrientationNames:
                allRefProbs[response] = OrientationUtils.CalcLabelProb(
                    data=[
                        m.astype(np.int32) for m in allLabelMatrices[response]
                    ],
                    numLabels=GetResponseProbDims(response),
                    numRanges=RangeNWeight.GetNumRanges(modelSpecs))
            else:
                allRefProbs[response] = DistanceUtils.CalcLabelProb(
                    data=[
                        m.astype(np.int32) for m in allLabelMatrices[response]
                    ],
                    numLabels=GetResponseProbDims(response),
                    numRanges=RangeNWeight.GetNumRanges(modelSpecs))

    modelSpecs['labelDistributions'] = allRefProbs
    return allRefProbs
        def EvaluateAccuracy(pred_prob, truth, pad_len):
            pred_in_correct_shape = T.cast(pred_prob[pad_len:, pad_len:],
                                           dtype=theano.config.floatX)
            truth_in_correct_shape = truth[pad_len:, pad_len:]

            labelName, labelType, subType = ParseResponse(currentResponse)
            symmetric = config.IsSymmetricLabel(labelName)

            if labelName in config.allOrientationNames:
                if not config.IsDiscreteLabel(labelType):
                    print 'ERROR: unsupported label type for orientation matrix prediction: ', currentResponse
                    exit(1)

                numLabels = GetResponseProbDims(currentResponse)
                if subType.endswith('Plus') or subType.endswith('Minus'):
                    largestValidLabel = numLabels - 2
                else:
                    largestValidLabel = numLabels - 1

                return TopAccuracyOrientation(
                    pred=pred_in_correct_shape,
                    truth=truth_in_correct_shape,
                    largestValidLabel=largestValidLabel,
                    symmetric=symmetric)

            if labelType.startswith('LogNormal'):
                return TopAccuracyLogNormal(pred=pred_in_correct_shape,
                                            truth=truth_in_correct_shape,
                                            symmetric=symmetric)

            elif labelType.startswith('Normal'):
                return TopAccuracyNormal(pred=pred_in_correct_shape,
                                         truth=truth_in_correct_shape,
                                         symmetric=symmetric)

            elif labelType.startswith('Discrete'):
                #subType = labelType[len('Discrete'): ]
                if subType.startswith('2C'):
                    return TopAccuracy2C(pred=pred_in_correct_shape,
                                         truth=truth_in_correct_shape,
                                         symmetric=symmetric)
                else:
                    return TopAccuracyMultiC(pred=pred_in_correct_shape,
                                             truth=truth_in_correct_shape,
                                             subType=subType,
                                             symmetric=symmetric)
            else:
                print 'ERROR: unsupported label type in EvaluateAccuracy: ', labelType
                exit(1)
    def errors(self, zList, weightList=None):
        errs = []
        if weightList is not None and len(weightList) > 0:
            for res, predictor, z, w, o in zip(self.responses, self.predictors,
                                               zList, weightList,
                                               self.outputList):
                labelType = Response2LabelType(res)
                numLabels = GetResponseProbDims(res)

                ## if the label type is Discrete25C, Discrete52C, Discrete12C
                if res in config.allAtomPairNames and labelType.startswith(
                        'Discrete') and numLabels > 3:
                    assert (z.ndim == 3 and GetResponseValueDims(res) == 1)
                    o2 = o.flatten(3)
                    ## here we convert 12C, 25C, and 52C to 3C for error calculation, which makes the result easier to interpret
                    errs.append(
                        self.errors4one(
                            z,
                            o2,
                            weight=w,
                            distLabelType=labelType[len('Discrete'):]))
                else:
                    ## call the error function of each predictor
                    if (z.ndim == 3):
                        zflat = z.flatten().dimshuffle(0, 'x')
                    elif (z.ndim == 4):
                        zflat = z.dimshuffle(3, 0, 1,
                                             2).flatten(2).dimshuffle(1, 0)
                    else:
                        print 'unsupported ndim for z in errors():', z.ndim
                        exit(1)

                    assert (w.ndim == 3)
                    wflat = w.flatten().dimshuffle(0, 'x')
                    e = predictor.errors(zflat, sampleWeight=wflat)
                    ## e is a tensor with ndim=1
                    errs.append(e)

        else:
            for res, predictor, z, o in zip(self.responses, self.predictors,
                                            zList, self.outputList):
                labelType = Response2LabelType(res)
                numLabels = GetResponseProbDims(res)
                if res in config.allAtomPairNames and labelType.startswith(
                        'Discrete') and numLabels > 3:
                    assert (z.ndim == 3 and GetResponseValueDims(res) == 1)
                    o2 = o.flatten(3)
                    errs.append(
                        self.errors4one(
                            z, o, distLabelType=labelType[len('Discrete'):]))
                else:
                    ## call the error function of each predictor
                    if (z.ndim == 3):
                        zflat = z.flatten().dimshuffle(0, 'x')
                    elif (z.ndim == 4):
                        zflat = z.dimshuffle(3, 0, 1,
                                             2).flatten(2).dimshuffle(1, 0)
                    else:
                        print 'unsupported ndim for z in errors():', z.ndim
                        exit(1)
                    e = predictor.errors(zflat)
                    ## e is a tensor with ndim=1
                    errs.append(e)

        return T.concatenate(errs)
    def __init__(self,
                 rng,
                 seqInput,
                 matrixInput,
                 mask_seq=None,
                 mask_matrix=None,
                 embedInput=None,
                 boundingbox=None,
                 modelSpecs=None):
        """
	seqInput has shape (batchSize, seqLen, n_in_seq)
	matrixInput has shape (batchSize, seqLen, seqLen, n_in_matrix)
	mask_seq has shape (batchSize, #cols_to_be_masked)
        mask_matrix has shape (batchSize, #rows_to_be_masked, seqLen)
	embedInput has shape (batchSize, seqLen, n_in2)
	boundingbox is a vector of 4 integer elements: top, left, bottom and right. boundingbox shall only be applied to the matrix converted from sequential features.
        """

        assert (modelSpecs is not None)

        self.modelSpecs = modelSpecs
        self.responses = modelSpecs['responses']

        ## set the number of hidden neurons and number of layers
        n_in_seq = modelSpecs['n_in_seq']
        n_in_matrix = modelSpecs['n_in_matrix']
        n_hiddens_seq = modelSpecs['conv1d_hiddens']
        n_hiddens_matrix = modelSpecs['conv2d_hiddens']
        n_hiddens_logreg = modelSpecs['logreg_hiddens']
        seq_repeats = modelSpecs['conv1d_repeats']
        matrix_repeats = modelSpecs['conv2d_repeats']

        ## half win size for convolutional operation
        if modelSpecs['network'].startswith('DilatedResNet'):
            hwsz_matrix = modelSpecs['conv2d_hwszs']
            hwsz_seq = [modelSpecs['conv1d_hwsz']] * len(n_hiddens_seq)
            dilation_seq = [1] * len(n_hiddens_seq)
            dilation_matrix = modelSpecs['conv2d_dilations']
        else:
            hwsz_matrix = modelSpecs['halfWinSize_matrix']
            hwsz_seq = modelSpecs['halfWinSize_seq']

        ## masks to reduce impact of padding zeros
        self.mask_1d = mask_seq
        self.mask_2d = mask_matrix

        self.layers = []

        act = T.nnet.relu
        if modelSpecs['activation'] == 'TANH':
            act = T.tanh

        # sequence convolution
        if modelSpecs['network'].startswith('DilatedResNet'):
            #seqConv = DilatedResNet(rng, input=seqInput, n_in=n_in_seq, n_hiddens=n_hiddens_seq, n_repeats=seq_repeats, halfWinSize=hwsz_seq, dilation=dilation_seq, mask=mask_seq, activation=act, batchNorm=modelSpecs['batchNorm'], version=modelSpecs['network'])
            seqConv = DilatedResNet(rng,
                                    input=seqInput,
                                    n_in=n_in_seq,
                                    n_hiddens=n_hiddens_seq,
                                    n_repeats=seq_repeats,
                                    halfWinSize=hwsz_seq,
                                    dilation=dilation_seq,
                                    mask=mask_seq,
                                    activation=act,
                                    modelSpecs=modelSpecs)
        else:
            seqConv = ResNet(rng,
                             input=seqInput,
                             n_in=n_in_seq,
                             n_hiddens=n_hiddens_seq,
                             n_repeats=seq_repeats,
                             halfWinSize=hwsz_seq,
                             mask=mask_seq,
                             activation=act,
                             batchNorm=modelSpecs['batchNorm'],
                             version=modelSpecs['network'])
        self.layers.append(seqConv)

        ## transform 1d sequence to 2d matrix
        seq2matrixMode = modelSpecs['seq2matrixMode']
        seq2matrixLayers = []
        embedLayers = []

        ## determine if we shall use the sequential features or not. The sequential features include sequence profile (PSSM), predicted secondary structure and predicted solvent accessibility
        ## useSequentialFeatures is True by default
        ##useSequentialFeatures = ( modelSpecs.has_key('UseSequentialFeatures') and (modelSpecs['UseSequentialFeatures'] is True) )

        ## use OuterConcatenation operation to convert sequence features into pairwise features
        if seq2matrixMode.has_key('OuterCat') and config.UseSequentialFeatures:

            ##midpointfeature has shape (batchSize, seqLen, seqLen, n_midpoint_out)
            midpointfeature, n_midpoint_out = MidpointFeature(seqConv.output,
                                                              seqConv.n_out,
                                                              box=boundingbox)

            ##remove noise in midpointfeature
            ## mask_matrix is used to reduce noise introduced by padding positions
            mid_subtensor = midpointfeature[:, :mask_matrix.shape[1], :, :]
            midpointfeature = T.set_subtensor(
                mid_subtensor,
                T.mul(mask_matrix.dimshuffle(0, 1, 2, 'x'), mid_subtensor))
            mid_subtensor2 = midpointfeature[:, :, :mask_matrix.shape[1], :]
            midpointfeature = T.set_subtensor(
                mid_subtensor2,
                T.mul(mask_matrix.dimshuffle(0, 2, 1, 'x'), mid_subtensor2))

            ## here we use convolution with halfWinSize=0 to reduce model complexity
            compressLayer = Conv2D4DistMatrix(
                rng,
                input=midpointfeature,
                n_in=n_midpoint_out,
                n_hiddens=seq2matrixMode['OuterCat'],
                halfWinSize=0,
                mask=mask_matrix)
            #compressLayer = Conv2D4DistMatrix(rng, input=midpointfeature, n_in=n_midpoint_out, n_hiddens=seq2matrixMode['OuterCat'], halfWinSize=0, mask=None )
            seq2matrixLayers.append(compressLayer)

        ## embedding primary sequence and/or predicted secondary structure
        if embedInput is not None:
            from EmbeddingLayer import EmbeddingLayer4AllRange

            if seq2matrixMode.has_key('Seq+SS'):
                n_out_embed = seq2matrixMode['Seq+SS']
            elif seq2matrixMode.has_key('SeqOnly'):
                n_out_embed = seq2matrixMode['SeqOnly']
            else:
                print 'At least one of two embedding modes Seq+SS or SeqOnly shall be specified.'
                exit(1)

            embedLayer = EmbeddingLayer4AllRange(embedInput,
                                                 modelSpecs['n_in_embed'],
                                                 n_out_embed,
                                                 box=boundingbox)
            seq2matrixLayers.append(embedLayer)
            embedLayers.append(embedLayer)
        """
	we do not use this profile embedding any more
	## embedding the sequence profile
	if seq2matrixMode.has_key('Profile') and useSequentialFeatures:
	    from EmbeddingLayer import ProfileEmbeddingLayer
	    pEmbedLayer = ProfileEmbeddingLayer(seqConv.output, seqConv.n_out, seq2matrixMode['Profile'])
	    seq2matrixLayers.append(pEmbedLayer)
	    embedLayers.append(pEmbedLayer)
	"""

        self.layers += seq2matrixLayers

        bUseCCMFnorm, bUseCCMsum, bUseCCMraw, bUseFullMI, bUseFullCov = config.ParseExtraCCMmode(
            modelSpecs)
        if (bUseCCMraw or bUseFullMI
                or bUseFullCov) and config.CompressMatrixInput(modelSpecs):
            ## here we add a compress layer to reduce the #channels of the original matrix input.
            n_hiddens4MatrixCompress = modelSpecs['hiddens4MatrixCompress']
            compressLayer4MatrixInput = Conv2D4DistMatrix(
                rng,
                input=matrixInput,
                n_in=n_in_matrix,
                n_hiddens=n_hiddens4MatrixCompress,
                halfWinSize=0,
                mask=mask_matrix)
            compressedMatrixInput = compressLayer4MatrixInput.output
            n_compressedMatrix = compressLayer4MatrixInput.n_out
            input_2d = T.concatenate(
                [compressedMatrixInput] +
                [layer.output for layer in seq2matrixLayers],
                axis=3)
            n_input2d = n_compressedMatrix + sum(
                [layer.n_out for layer in seq2matrixLayers])
        else:

            ##old code for merging original matrix input and sequential input
            input_2d = T.concatenate(
                [matrixInput] + [layer.output for layer in seq2matrixLayers],
                axis=3)
            n_input2d = n_in_matrix + sum(
                [layer.n_out for layer in seq2matrixLayers])

        #print 'n_input2d=', n_input2d

        if modelSpecs['network'].startswith('ResNet'):
            matrixConv = ResNet(rng,
                                input=input_2d,
                                n_in=n_input2d,
                                n_hiddens=n_hiddens_matrix,
                                n_repeats=matrix_repeats,
                                halfWinSize=hwsz_matrix,
                                mask=mask_matrix,
                                activation=act,
                                batchNorm=modelSpecs['batchNorm'],
                                version=modelSpecs['network'])

        elif modelSpecs['network'].startswith('DilatedResNet'):
            #matrixConv=DilatedResNet(rng, input=input_2d, n_in=n_input2d, n_hiddens=n_hiddens_matrix, n_repeats=matrix_repeats, halfWinSize=hwsz_matrix, dilation=dilation_matrix, mask=mask_matrix, activation=act, batchNorm=modelSpecs['batchNorm'], version=modelSpecs['network'])
            matrixConv = DilatedResNet(rng,
                                       input=input_2d,
                                       n_in=n_input2d,
                                       n_hiddens=n_hiddens_matrix,
                                       n_repeats=matrix_repeats,
                                       halfWinSize=hwsz_matrix,
                                       dilation=dilation_matrix,
                                       mask=mask_matrix,
                                       activation=act,
                                       modelSpecs=modelSpecs)
        else:
            print 'ERROR: Unimplemented deep network type: ', modelSpecs[
                'network']
            exit(1)

        self.layers.append(matrixConv)

        conv_out = matrixConv.output

        selected = conv_out.dimshuffle(3, 0, 1, 2).flatten(2).dimshuffle(1, 0)
        n_in4logreg = matrixConv.n_out

        self.outputList = []
        self.output_probList = []
        self.predictors = []

        self.params4var = []
        self.paramL14var = 0
        self.paramL24var = 0

        for res in modelSpecs['responses']:

            labelType = Response2LabelType(res)
            predictor = None

            if labelType.startswith('Discrete'):
                assert GetResponseValueDims(res) == 1
                predictor = NN4LogReg(rng=rng,
                                      input=selected,
                                      n_in=n_in4logreg,
                                      n_out=GetResponseProbDims(res),
                                      n_hiddens=n_hiddens_logreg)

            elif labelType.startswith('LogNormal') or labelType.startswith(
                    'Normal'):
                predictor = NN4Normal(rng=rng,
                                      input=selected,
                                      n_in=n_in4logreg,
                                      n_variables=GetResponseValueDims(res),
                                      n_out=GetResponseProbDims(res),
                                      n_hiddens=n_hiddens_logreg)

                ## recording parameters specific for variance prediction
                self.params4var += predictor.params4var
                self.paramL14var += predictor.paramL14var
                self.paramL24var += predictor.paramL24var

            else:
                print 'incorrect response name or label type: ', res
                exit(1)

            self.layers.append(predictor)
            self.predictors.append(predictor)

            ## output in 2d matrix
            output_2d = predictor.y_pred.reshape(
                (conv_out.shape[0], conv_out.shape[1], conv_out.shape[2],
                 GetResponseValueDims(res)))
            output_2d_prob = predictor.output.reshape(
                (conv_out.shape[0], conv_out.shape[1], conv_out.shape[2],
                 GetResponseProbDims(res)))

            self.outputList.append(output_2d)
            self.output_probList.append(output_2d_prob)

        self.output = T.concatenate(self.outputList, axis=3)
        self.output_prob = T.concatenate(self.output_probList, axis=3)

        ## collect all the model parameters and their norms
        self.params = []
        self.paramL2 = 0
        self.paramL1 = 0

        for layer in self.layers:
            self.params += layer.params
            self.paramL2 += layer.paramL2
            self.paramL1 += layer.paramL1
        """
def CalcLabelWeight(modelSpecs):
    print 'Calculating label weight ...'

    numRanges = RangeNWeight.GetNumRanges(modelSpecs)

    RangeNWeight.SetWeight4Range(modelSpecs)
    #print 'weight for range: ', modelSpecs['weight4range']

    RangeNWeight.SetWeight43C2C(modelSpecs)
    #print 'LRbias= ', modelSpecs['LRbias']
    #print 'weight43C= ', modelSpecs['weight4Discrete3C']

    allRefProbs = modelSpecs['labelDistributions']
    ##for discrete labels, we calculate their weights by inferring from the weight intialized to 3 bins: 0-8, 8-15 and >15 or -1, which makes inference easier
    modelSpecs['weight4labels'] = dict()

    for response in modelSpecs['responses']:
        labelName, labelType, subType = config.ParseResponse(response)
        numLabels = GetResponseProbDims(response)

        if config.IsContinuousLabel(labelType):
            ## just need to assign range weight for continuous response
            modelSpecs['weight4labels'][response] = modelSpecs[
                'weight4continuous']
            continue

        if not config.IsDiscreteLabel(labelType):
            print 'ERROR: unsupported response in CalcLabelWeight: ', response
            exit(1)

        if labelName in config.allOrientationNames or config.NoWeight4Label(
                modelSpecs):
            modelSpecs['weight4labels'][response] = np.multiply(
                np.ones((numRanges, numLabels), dtype=np.float32),
                modelSpecs['weight4range'])

        elif labelName in ['HB', 'Beta']:
            ## if the response is for HB and Beta-Pairing
            if subType.startswith('2C'):
                modelSpecs['weight4labels'][response] = modelSpecs['weight4' +
                                                                   response]
            else:
                print 'ERROR: unsupported label subtype in CalcLabelWeight: ', response
                exit(1)

        elif labelName in config.allAtomPairNames:
            ## calculate label weight for atom pairs Cb-Cb, Ca-Ca, Cg-Cg, CaCg, and NO
            if subType.startswith('2C'):
                print 'ERROR: 2C is not supported for contact/distance prediction any more'
                exit(1)
            elif subType.startswith('3C'):
                ## if 3C is used for the response
                modelSpecs['weight4labels'][response] = modelSpecs[
                    'weight4Discrete3C']
            else:
                modelSpecs['weight4labels'][
                    response] = DistanceUtils.CalcLabelWeight(
                        modelSpecs['weight4Discrete3C'], allRefProbs[response],
                        config.distCutoffs[subType])

        else:
            print 'ERROR: unsupported label name in CalcLabelWeight: ', response
            exit(1)

        ## set the weight of the label for the invalid entry (distance or orientation) to 0
        if subType.endswith('Minus'):
            modelSpecs['weight4labels'][response][:, -1] = 0
    """
	## for log
	for response in modelSpecs['responses']:
		print 'weight4labels for response: ', response
		print modelSpecs['weight4labels'][response]
	"""

    return modelSpecs['weight4labels']
Exemple #6
0
def PredictMatrixLabels(models,
                        predictors,
                        names,
                        inputFolders,
                        aliFolders=None,
                        tplFolder=None,
                        aliFile=None,
                        tplFile=None,
                        saveFolder=None):

    if not isinstance(names, (list, tuple)):
        targetName = names
    else:
        targetName = None

    ##allresults is a nested dictionary, i.e., allresults[proteinName][response] = sum of predicted_prob_matrices
    ##We predict one prob_matrix by each model for each protein and each response and then average them per protein and response to get the final results
    ##two different models may share common responses

    allsequences = dict()
    allresults = dict()  ## the results predicted from the real input
    numModels = dict(
    )  ## count the number of models that may predict each response

    for model, predictor in zip(models, predictors):
        #predict, inputVariables = BuildPredictor(model)
        predict, inputVariables = predictor

        ## load data for each model separately since each model may have a different specification
        if targetName is None:
            rawData = LoadProteinData4OneModel(model, names, inputFolders,
                                               aliFolders, tplFolder)

        elif aliFile is not None and tplFile is not None:
            rawData = LoadOneAlignment4OneModel(model, targetName,
                                                inputFolders, aliFile, tplFile)
        else:
            rawData = LoadOneProteinData4OneModel(model, targetName,
                                                  inputFolders, aliFolders,
                                                  tplFolder)

        predData = DataProcessor.ExtractFeaturesNLabels(
            rawData,
            modelSpecs=model,
            forTrainValidation=False,
            returnMode='list')

        ##make sure the input has the same number of features as the model
        FeatureUtils.CheckModelNDataConsistency(model, predData)

        ## check sequence consistency
        for d in predData:
            name = d['name']
            if not allresults.has_key(name):
                allresults[name] = dict()
                numModels[name] = dict()

            if not allsequences.has_key(name):
                allsequences[name] = d['sequence']
            elif allsequences[name] != d['sequence']:
                print 'ERROR: inconsistent primary sequence for the same protein in the protein feature files'
                exit(1)

        predSeqData = DataProcessor.SplitData2Batches(data=predData,
                                                      numDataPoints=624,
                                                      modelSpecs=model)
        print '#predData: ', len(predData), '#batches: ', len(predSeqData)

        ##for onebatch, names4onebatch in zip(predSeqData, names):
        for minibatch in predSeqData:
            onebatch, names4onebatch = DataProcessor.AssembleOneBatch(
                minibatch, model)
            input = onebatch[:len(inputVariables)]
            result = predict(*input)
            ##result is a 4-d tensor. The last dimension is the concatenation of the predicted prob parameters for all responses in this model
            assert result.shape[3] == sum([
                GetResponseProbDims(response)
                for response in model['responses']
            ])

            ## calculate the start and end positions of each response in the last dimension of result
            dims = [
                GetResponseProbDims(response)
                for response in model['responses']
            ]
            endPositions = np.cumsum(dims)
            startPositions = endPositions - dims

            x1d, x2d, x1dmask, x2dmask = input[0:4]
            seqLens = x1d.shape[1] - x1dmask.shape[1] + np.sum(x1dmask, axis=1)
            maxSeqLen = x1d.shape[1]

            for response, start, end in zip(model['responses'], startPositions,
                                            endPositions):

                ## batchres is a batch of result, its ndim=4
                ## the 1st dimension of batchres is batchSize, the 2nd and 3rd dimensions are distance/orientation matrix sizes and the 4th is for the predicted probability parameters
                batchres = result[:, :, :, start:end]
                ## remove masked positions
                revised_batchres = [
                    probMatrix[maxSeqLen - seqLen:, maxSeqLen - seqLen:, :]
                    for probMatrix, seqLen in zip(batchres, seqLens)
                ]

                for res4one, name in zip(revised_batchres, names4onebatch):
                    if not allresults[name].has_key(response):
                        allresults[name][response] = res4one
                        numModels[name][response] = np.int32(1)
                    else:
                        ## here we save sum to reduce memory consumption, which could be huge when many deep models are used to predict a large set of proteins
                        allresults[name][response] += res4one
                        numModels[name][response] += np.int32(1)

    ## calculate the final result, which is the average of predictd prob matrices by all models for the same protein and the same response
    finalresults = dict()
    for name, results in allresults.iteritems():
        if not finalresults.has_key(name):
            finalresults[name] = dict()

        ## finalresults has 3 dimensions.
        for response in results.keys():
            finalresults[name][response] = (allresults[name][response] /
                                            numModels[name][response]).astype(
                                                np.float32)

            ##make the predicted distance prob matrices symmetric for some reponses. This also slightly improves accuracy.
            labelName = Response2LabelName(response)
            if config.IsSymmetricLabel(labelName):
                finalresults[name][response] = (
                    finalresults[name][response] +
                    np.transpose(finalresults[name][response], (1, 0, 2))) / 2.

    ## convert predicted distance probability matrix into contact matrix
    predictedContactMatrices = DeriveContactMatrix(finalresults)

    ## collect the average label distributions and weight matrix
    finalLabelWeights, finalLabelDistributions = CollectLabelWeightNDistribution(
        models)

    ##write all the results here
    ## for each protein, we have a output file saving a tuple (name, sequence, predicted distance matrix, predicted contact matrix, labelWeight, labelDistribution)
    for name, results in finalresults.iteritems():

        savefilename = name + '.predictedDistMatrix.pkl'
        if saveFolder is not None:
            savefilename = os.path.join(saveFolder, savefilename)

        if targetName is not None:
            originalName = targetName
        else:
            for n in names:
                if name.startswith(n):
                    originalName = n
                    break

        with open(savefilename, 'wb') as fh:
            #cPickle.dump( (name, allsequences[name], results, predictedContactMatrices[name], finalLabelWeights, finalLabelDistributions), fh, protocol=cPickle.HIGHEST_PROTOCOL)
            cPickle.dump((originalName, allsequences[name], results,
                          predictedContactMatrices[name], finalLabelWeights,
                          finalLabelDistributions),
                         fh,
                         protocol=cPickle.HIGHEST_PROTOCOL)

    return (predictedContactMatrices, allsequences)
    """