def ValidateAllData(validData, validate, modelSpecs, forRefState=False):
    accs = []
    losses = []
    errs = []
    numSamples = []

    if config.UseSampleWeight(modelSpecs):
        w4losses = []
        w4errors = []
    else:
        w4losses = None
        w4errors = None

    for batch in validData:

        input = ToFloatX(ToNonSharedArray(batch))
        onebatch = input[:-1]

        onebatch_res = validate(*input)
        los = onebatch_res[0]
        err = onebatch_res[1]
        losses.append(los)
        errs.append(err)

        if len(onebatch_res) > 2:
            acc = onebatch_res[2]
            accs.append(acc)
            ##numSamples is the number of proteins in one batch
            numSamples.append(onebatch[0].shape[0])

        if config.UseSampleWeight(modelSpecs):
            #weights = onebatch[ len(onebatch) - len(modelSpecs['responses']) : ]
            weights = onebatch[-len(modelSpecs['responses']):]
            w4loss = []
            w4error = []
            for res, w in zip(modelSpecs['responses'], weights):
                wSum = np.sum(w)
                w4loss.append(wSum)
                w4error.extend([wSum] * GetResponseValueDims(res))
            w4losses.append(w4loss)
            w4errors.append(w4error)

    ## The loss and err is normalized by the weight of each minibatch. This is equivalent to minimize loss and err per residue pair
    ## The top accuracy is not normalized by the weight of a minibatch, i.e.,  we want to maximize per-protein accuracy.
    if len(accs) > 0 and len(numSamples) > 0:
        return np.average(losses, axis=0, weights=w4losses), np.average(
            errs, axis=0, weights=w4errors), np.average(accs,
                                                        axis=0,
                                                        weights=numSamples)
    else:
        return np.average(losses, axis=0,
                          weights=w4losses), np.average(errs,
                                                        axis=0,
                                                        weights=w4errors)
def AddLabel2OneBatch(names,
                      batch,
                      modelSpecs,
                      sharedLabelPool,
                      sharedLabelWeightPool,
                      floatType=theano.config.floatX):

    numSeqs = len(names)
    for name in names:
        if (not sharedLabelPool.has_key(name)) or (
                not sharedLabelWeightPool.has_key(name)):
            print 'the label or label weight matrix does not exist for protein ', name
            exit(1)

    seqLens = [sharedLabelWeightPool[name].shape[0] for name in names]

    ## get the boundingbox for this batch
    if not config.TrainByRefLoss(modelSpecs):
        box = batch[-1]
    else:
        box = batch[-2]

    top, left, bottom, right = box
    assert bottom - top == right - left
    boxsize = bottom - top

    if boxsize < max(seqLens) and numSeqs > 1:
        ## make sure that there is only one protein in this batch
        print 'ERROR: when one batch has a large protein, it can only have one protein'
        exit(1)

    ## we crop pairwise labels at this step to save memory and computational time
    maxMatrixSize = min(boxsize, max(seqLens))

    ## Y shall be a list of 2D or 3D matrices, each for one response
    Y = []
    for response in modelSpecs['responses']:
        labelName, labelType, _ = ParseResponse(response)
        dataType = np.int16
        if not config.IsDiscreteLabel(labelType):
            dataType = floatType
        rValDims = GetResponseValueDims(response)
        if rValDims == 1:
            y = np.zeros(shape=(numSeqs, maxMatrixSize, maxMatrixSize),
                         dtype=dataType)
            Y.append(y)

        else:
            y = np.zeros(shape=(numSeqs, maxMatrixSize, maxMatrixSize,
                                rValDims),
                         dtype=dataType)
            Y.append(y)

    ## when Y is empty, weight is useless. So When Y is empty, weight shall also be empty
    weightMatrix = []
    if bool(Y) and config.UseSampleWeight(modelSpecs):
        weightMatrix = [
            np.zeros(shape=(numSeqs, maxMatrixSize, maxMatrixSize),
                     dtype=floatType)
        ] * len(modelSpecs['responses'])

    for j, name, seqLen in zip(range(len(names)), names, seqLens):

        ## we align all matrices in the bottom/right corner
        ## posInX and posInY are the starting position of one protein in the final output tensor
        ## here X and Y refer to x-axis and y-axis
        posInX = -min(boxsize, seqLen)
        posInY = -min(boxsize, seqLen)

        for y, response in zip(Y, modelSpecs['responses']):

            if boxsize < seqLen:
                tmp = sharedLabelPool[name][response][top:bottom, left:right]
            else:
                tmp = sharedLabelPool[name][response]
            if len(y.shape) == 3:
                y[j, posInX:, posInY:] = tmp
            else:
                y[j, posInX:, posInY:, ] = tmp

        labelWeightMatrix = sharedLabelWeightPool[name]
        for w, response in zip(weightMatrix, modelSpecs['responses']):
            if boxsize < seqLen:
                w[j, posInX:,
                  posInY:] = labelWeightMatrix[response][top:bottom,
                                                         left:right]
            else:
                w[j, posInX:, posInY:] = labelWeightMatrix[response]

    ## the input batch contains bounding box
    tail = 1

    ## check to see if the input batch contains one flag for RefState
    if config.TrainByRefLoss(modelSpecs):
        tail += 1

    newbatch = batch[:-tail]
    newbatch.extend(Y)
    newbatch.extend(weightMatrix)
    newbatch.extend(batch[-tail:])

    return newbatch
def AssembleOneBatch(data,
                     modelSpecs,
                     forRefState=False,
                     bounds=None,
                     floatType=theano.config.floatX,
                     bUseSharedMemory=False):
    if not data:
        print 'WARNING: the list of data is empty'
        return None

    numSeqs = len(data)
    seqLens = [d['seqLen'] for d in data]
    names = [d['name'] for d in data]

    ## use maxSeqLen and minSeqLen for sequential features
    ## we do not crop sequential features at this step since the theano deep model will do so after 1D convolution operation
    maxSeqLen = max(seqLens)
    minSeqLen = min(seqLens)
    #print 'maxSeqLen= ', maxSeqLen, 'minSeqLen= ', minSeqLen

    numSeqFeatures = FeatureUtils.DetermineNumSeqFeatures(
        data[0]['seqFeatures'])
    X1d = np.zeros(shape=(numSeqs, maxSeqLen, numSeqFeatures), dtype=floatType)

    numMatrixFeatures = FeatureUtils.DetermineNumMatrixFeatures(
        data[0]['matrixFeatures']) + FeatureUtils.DetermineNumMatrixFeatures(
            data[0]['matrixFeatures_nomean'])
    ## we use maxMatrixSize and minMatrixSize for pairwise features
    ## we crop pairwise features at this step to save memory and computational time
    minMatrixSize, maxMatrixSize = CalcMinMaxMatrixSize(bounds, seqLens)

    if bUseSharedMemory:
        shmX2d = SharedNDArray(
            (numSeqs, maxMatrixSize, maxMatrixSize, numMatrixFeatures),
            dtype=floatType,
            name='/RaptorX-' + str(os.getppid()) + '-X2d-' + randomString(6))
        X2d = shmX2d.array
        X2d[:] = 0
    else:
        X2d = np.zeros(shape=(numSeqs, maxMatrixSize, maxMatrixSize,
                              numMatrixFeatures),
                       dtype=floatType)

    X1dem = None
    if data[0].has_key('embedFeatures'):
        numEmbedFeatures = data[0]['embedFeatures'].shape[1]
        X1dem = np.zeros(shape=(numSeqs, maxSeqLen, numEmbedFeatures),
                         dtype=floatType)

    ## Y shall be a list of 2D or 3D matrices, each for one response
    Y = []
    if data[0].has_key('atomLabelMatrix'):
        for response in modelSpecs['responses']:
            labelName, labelType, _ = ParseResponse(response)
            dataType = np.int16
            if not config.IsDiscreteLabel(labelType):
                dataType = floatType
            rValDims = GetResponseValueDims(response)
            if rValDims == 1:
                y = np.zeros(shape=(numSeqs, maxMatrixSize, maxMatrixSize),
                             dtype=dataType)
                Y.append(y)

            else:
                y = np.zeros(shape=(numSeqs, maxMatrixSize, maxMatrixSize,
                                    rValDims),
                             dtype=dataType)
                Y.append(y)

    ## when Y is empty, weight is useless. So When Y is None, weight shall also be None
    weightMatrix = []
    if bool(Y) and config.UseSampleWeight(modelSpecs):
        weightMatrix = [
            np.zeros(shape=(numSeqs, maxMatrixSize, maxMatrixSize),
                     dtype=floatType)
        ] * len(modelSpecs['responses'])

    ## for mask. we do not used shared ndarray for them since they are small
    M1d = np.zeros(shape=(numSeqs, maxSeqLen - minSeqLen), dtype=np.int8)
    M2d = np.zeros(shape=(numSeqs, maxMatrixSize - minMatrixSize,
                          maxMatrixSize),
                   dtype=np.int8)

    if bounds is not None:
        boxes = bounds
    else:
        boxes = [None] * len(data)

    for j, d, box in zip(range(len(data)), data, boxes):
        seqLen = d['seqLen']

        ## posInSeq, posInX and posInY are the starting position of one protein in the final output tensor
        posInSeq = -seqLen

        ## here X and Y refer to x-axis and y-axis
        if box is not None:
            top, left, bottom, right = box
            posInX = -(bottom - top)
            posInY = -(right - left)
        else:
            posInX = -seqLen
            posInY = -seqLen

        if forRefState:
            ## this code needs reexamination, it may not be correct when d['seqFeatures']/d['matrixFeatures'] is represented as a list of arrays instead of a single array
            X1d[j, posInSeq:, :] = np.array(
                [modelSpecs['seqFeatures_expected']] * seqLen).reshape(
                    (seqLen, -1))

            tmp = [modelSpecs['matrixFeatures_expected']] * (seqLen * seqLen)
            tmp2 = np.array(tmp).reshape((seqLen, seqLen, -1))
            tmp3 = np.concatenate((tmp2, d['matrixFeatures_nomean']), axis=2)
            if box is not None:
                X2d[j, posInX:, posInY:, :] = tmp3[top:bottom, left:right, ]
            else:
                X2d[j, posInX:, posInY:, :] = tmp3
        else:
            if isinstance(d['seqFeatures'], np.ndarray):
                X1d[j, posInSeq:, :] = d['seqFeatures']
            else:
                startPos = 0
                for f in d['seqFeatures']:
                    if len(f.shape) == 1:
                        X1d[j, posInSeq:,
                            startPos:startPos + 1] = f[:, np.newaxis]
                        startPos += 1
                    elif len(f.shape) == 2:
                        X1d[j, posInSeq:, startPos:startPos + f.shape[1]] = f
                        startPos = startPos + f.shape[1]
                    else:
                        print 'wrong shape in sequential feature: ', f.shape
                        exit(1)

            # add 2D features in matrixFeatures to holder staring from the start position
            # holder is a 3D array and start is the starting position in the 3rd dimension
            def Add2DFeatures(matrixFeatures, holder, start):
                if isinstance(matrixFeatures, np.ndarray):
                    features = [matrixFeatures]
                else:
                    features = matrixFeatures

                startPos = start
                #for f in matrixFeatures:
                for f in features:
                    if len(f.shape) == 2:
                        endPos = startPos + 1
                        if box is None:
                            holder[:, :, startPos:endPos] = f[:, :, np.newaxis]
                        else:
                            holder[:, :,
                                   startPos:endPos] = f[top:bottom, left:right,
                                                        np.newaxis]
                    elif len(f.shape) == 3:
                        endPos = startPos + f.shape[2]
                        if box is None:
                            holder[:, :, startPos:endPos] = f
                        else:
                            holder[:, :, startPos:endPos] = f[top:bottom,
                                                              left:right, :]
                    else:
                        print 'wrong shape in matrixFeatures: ', f.shape
                        exit(1)
                    startPos = endPos

                return endPos

            end = Add2DFeatures(d['matrixFeatures'], X2d[j, posInX:,
                                                         posInY:, :], 0)
            Add2DFeatures(d['matrixFeatures_nomean'], X2d[j, posInX:,
                                                          posInY:, :], end)

        M1d[j, posInSeq:].fill(1)
        M2d[j, posInX:, posInY:].fill(1)

        if X1dem is not None:
            ## embed feature is always represented as a single array, so the code shall be correct
            if forRefState:
                X1dem[j, posInSeq:, :] = np.array(
                    [modelSpecs['embedFeatures_expected']] * seqLen).reshape(
                        (seqLen, -1))
            else:
                X1dem[j, posInSeq:, :] = d['embedFeatures']

        for y, response in zip(Y, modelSpecs['responses']):
            if box is not None:
                tmp = d['atomLabelMatrix'][response][top:bottom, left:right]
            else:
                tmp = d['atomLabelMatrix'][response]
            if len(y.shape) == 3:
                y[j, posInX:, posInY:] = tmp
            else:
                y[j, posInX:, posInY:, ] = tmp

        if bool(weightMatrix):
            if d.has_key('labelWeightMatrix'):
                labelWeightMatrix = d['labelWeightMatrix']
            else:
                labelWeightMatrix = LabelUtils.CalcLabelWeightMatrix(
                    d['atomLabelMatrix'], modelSpecs, floatType=floatType)

        for w, response in zip(weightMatrix, modelSpecs['responses']):
            if box is not None:
                w[j, posInX:,
                  posInY:] = labelWeightMatrix[response][top:bottom,
                                                         left:right]
            else:
                w[j, posInX:, posInY:] = labelWeightMatrix[response]

    if bUseSharedMemory:
        onebatch = [X1d, shmX2d, M1d, M2d]
    else:
        onebatch = [X1d, X2d, M1d, M2d]

    if X1dem is not None:
        onebatch.append(X1dem)

    onebatch.extend(Y)
    onebatch.extend(weightMatrix)

    return onebatch, names
def BuildModel(modelSpecs, forTrain=True):
    rng = np.random.RandomState()

    ## x is for sequential features and y for matrix (or pairwise) features
    x = T.tensor3('x')
    y = T.tensor4('y')

    ## mask for x and y, respectively
    xmask = T.bmatrix('xmask')
    ymask = T.btensor3('ymask')

    xem = None
    ##if any( k in modelSpecs['seq2matrixMode'] for k in ('SeqOnly', 'Seq+SS') ):
    if config.EmbeddingUsed(modelSpecs):
        xem = T.tensor3('xem')

## bounding box for crop of a big protein distance matrix. This box allows crop at any position.
    box = None
    if forTrain:
        box = T.ivector('boundingbox')

## trainByRefLoss can be either 1 or -1. When this variable exists, we train the model using both reference loss and the loss of real data
    trainByRefLoss = None
    if forTrain and config.TrainByRefLoss(modelSpecs):
        trainByRefLoss = T.iscalar('trainByRefLoss')

    distancePredictor = ResNet4DistMatrix(rng,
                                          seqInput=x,
                                          matrixInput=y,
                                          mask_seq=xmask,
                                          mask_matrix=ymask,
                                          embedInput=xem,
                                          boundingbox=box,
                                          modelSpecs=modelSpecs)

    ## labelList is a list of label tensors, each having shape (batchSize, seqLen, seqLen) or (batchSize, seqLen, seqLen, valueDims[response] )
    labelList = []
    if forTrain:
        ## when this model is used for training. We need to define the label variable
        for response in modelSpecs['responses']:
            labelType = Response2LabelType(response)
            rValDims = GetResponseValueDims(response)

            if labelType.startswith('Discrete'):
                if rValDims > 1:
                    ## if one response is a vector, then we use a 4-d tensor
                    ## wtensor is for 16bit integer
                    labelList.append(T.wtensor4('Tlabel4' + response))
                else:
                    labelList.append(T.wtensor3('Tlabel4' + response))
            else:
                if rValDims > 1:
                    labelList.append(T.tensor4('Tlabel4' + response))
                else:
                    labelList.append(T.tensor3('Tlabel4' + response))

    ## weightList is a list of label weight tensors, each having shape (batchSize, seqLen, seqLen)
    weightList = []
    if len(labelList) > 0 and config.UseSampleWeight(modelSpecs):
        weightList = [
            T.tensor3('Tweight4' + response)
            for response in modelSpecs['responses']
        ]

## for prediction, both labelList and weightList are empty
    if forTrain:
        return distancePredictor, x, y, xmask, ymask, xem, labelList, weightList, box, trainByRefLoss
    else:
        return distancePredictor, x, y, xmask, ymask, xem
    def errors(self, zList, weightList=None):
        errs = []
        if weightList is not None and len(weightList) > 0:
            for res, predictor, z, w, o in zip(self.responses, self.predictors,
                                               zList, weightList,
                                               self.outputList):
                labelType = Response2LabelType(res)
                numLabels = GetResponseProbDims(res)

                ## if the label type is Discrete25C, Discrete52C, Discrete12C
                if res in config.allAtomPairNames and labelType.startswith(
                        'Discrete') and numLabels > 3:
                    assert (z.ndim == 3 and GetResponseValueDims(res) == 1)
                    o2 = o.flatten(3)
                    ## here we convert 12C, 25C, and 52C to 3C for error calculation, which makes the result easier to interpret
                    errs.append(
                        self.errors4one(
                            z,
                            o2,
                            weight=w,
                            distLabelType=labelType[len('Discrete'):]))
                else:
                    ## call the error function of each predictor
                    if (z.ndim == 3):
                        zflat = z.flatten().dimshuffle(0, 'x')
                    elif (z.ndim == 4):
                        zflat = z.dimshuffle(3, 0, 1,
                                             2).flatten(2).dimshuffle(1, 0)
                    else:
                        print 'unsupported ndim for z in errors():', z.ndim
                        exit(1)

                    assert (w.ndim == 3)
                    wflat = w.flatten().dimshuffle(0, 'x')
                    e = predictor.errors(zflat, sampleWeight=wflat)
                    ## e is a tensor with ndim=1
                    errs.append(e)

        else:
            for res, predictor, z, o in zip(self.responses, self.predictors,
                                            zList, self.outputList):
                labelType = Response2LabelType(res)
                numLabels = GetResponseProbDims(res)
                if res in config.allAtomPairNames and labelType.startswith(
                        'Discrete') and numLabels > 3:
                    assert (z.ndim == 3 and GetResponseValueDims(res) == 1)
                    o2 = o.flatten(3)
                    errs.append(
                        self.errors4one(
                            z, o, distLabelType=labelType[len('Discrete'):]))
                else:
                    ## call the error function of each predictor
                    if (z.ndim == 3):
                        zflat = z.flatten().dimshuffle(0, 'x')
                    elif (z.ndim == 4):
                        zflat = z.dimshuffle(3, 0, 1,
                                             2).flatten(2).dimshuffle(1, 0)
                    else:
                        print 'unsupported ndim for z in errors():', z.ndim
                        exit(1)
                    e = predictor.errors(zflat)
                    ## e is a tensor with ndim=1
                    errs.append(e)

        return T.concatenate(errs)
    def __init__(self,
                 rng,
                 seqInput,
                 matrixInput,
                 mask_seq=None,
                 mask_matrix=None,
                 embedInput=None,
                 boundingbox=None,
                 modelSpecs=None):
        """
	seqInput has shape (batchSize, seqLen, n_in_seq)
	matrixInput has shape (batchSize, seqLen, seqLen, n_in_matrix)
	mask_seq has shape (batchSize, #cols_to_be_masked)
        mask_matrix has shape (batchSize, #rows_to_be_masked, seqLen)
	embedInput has shape (batchSize, seqLen, n_in2)
	boundingbox is a vector of 4 integer elements: top, left, bottom and right. boundingbox shall only be applied to the matrix converted from sequential features.
        """

        assert (modelSpecs is not None)

        self.modelSpecs = modelSpecs
        self.responses = modelSpecs['responses']

        ## set the number of hidden neurons and number of layers
        n_in_seq = modelSpecs['n_in_seq']
        n_in_matrix = modelSpecs['n_in_matrix']
        n_hiddens_seq = modelSpecs['conv1d_hiddens']
        n_hiddens_matrix = modelSpecs['conv2d_hiddens']
        n_hiddens_logreg = modelSpecs['logreg_hiddens']
        seq_repeats = modelSpecs['conv1d_repeats']
        matrix_repeats = modelSpecs['conv2d_repeats']

        ## half win size for convolutional operation
        if modelSpecs['network'].startswith('DilatedResNet'):
            hwsz_matrix = modelSpecs['conv2d_hwszs']
            hwsz_seq = [modelSpecs['conv1d_hwsz']] * len(n_hiddens_seq)
            dilation_seq = [1] * len(n_hiddens_seq)
            dilation_matrix = modelSpecs['conv2d_dilations']
        else:
            hwsz_matrix = modelSpecs['halfWinSize_matrix']
            hwsz_seq = modelSpecs['halfWinSize_seq']

        ## masks to reduce impact of padding zeros
        self.mask_1d = mask_seq
        self.mask_2d = mask_matrix

        self.layers = []

        act = T.nnet.relu
        if modelSpecs['activation'] == 'TANH':
            act = T.tanh

        # sequence convolution
        if modelSpecs['network'].startswith('DilatedResNet'):
            #seqConv = DilatedResNet(rng, input=seqInput, n_in=n_in_seq, n_hiddens=n_hiddens_seq, n_repeats=seq_repeats, halfWinSize=hwsz_seq, dilation=dilation_seq, mask=mask_seq, activation=act, batchNorm=modelSpecs['batchNorm'], version=modelSpecs['network'])
            seqConv = DilatedResNet(rng,
                                    input=seqInput,
                                    n_in=n_in_seq,
                                    n_hiddens=n_hiddens_seq,
                                    n_repeats=seq_repeats,
                                    halfWinSize=hwsz_seq,
                                    dilation=dilation_seq,
                                    mask=mask_seq,
                                    activation=act,
                                    modelSpecs=modelSpecs)
        else:
            seqConv = ResNet(rng,
                             input=seqInput,
                             n_in=n_in_seq,
                             n_hiddens=n_hiddens_seq,
                             n_repeats=seq_repeats,
                             halfWinSize=hwsz_seq,
                             mask=mask_seq,
                             activation=act,
                             batchNorm=modelSpecs['batchNorm'],
                             version=modelSpecs['network'])
        self.layers.append(seqConv)

        ## transform 1d sequence to 2d matrix
        seq2matrixMode = modelSpecs['seq2matrixMode']
        seq2matrixLayers = []
        embedLayers = []

        ## determine if we shall use the sequential features or not. The sequential features include sequence profile (PSSM), predicted secondary structure and predicted solvent accessibility
        ## useSequentialFeatures is True by default
        ##useSequentialFeatures = ( modelSpecs.has_key('UseSequentialFeatures') and (modelSpecs['UseSequentialFeatures'] is True) )

        ## use OuterConcatenation operation to convert sequence features into pairwise features
        if seq2matrixMode.has_key('OuterCat') and config.UseSequentialFeatures:

            ##midpointfeature has shape (batchSize, seqLen, seqLen, n_midpoint_out)
            midpointfeature, n_midpoint_out = MidpointFeature(seqConv.output,
                                                              seqConv.n_out,
                                                              box=boundingbox)

            ##remove noise in midpointfeature
            ## mask_matrix is used to reduce noise introduced by padding positions
            mid_subtensor = midpointfeature[:, :mask_matrix.shape[1], :, :]
            midpointfeature = T.set_subtensor(
                mid_subtensor,
                T.mul(mask_matrix.dimshuffle(0, 1, 2, 'x'), mid_subtensor))
            mid_subtensor2 = midpointfeature[:, :, :mask_matrix.shape[1], :]
            midpointfeature = T.set_subtensor(
                mid_subtensor2,
                T.mul(mask_matrix.dimshuffle(0, 2, 1, 'x'), mid_subtensor2))

            ## here we use convolution with halfWinSize=0 to reduce model complexity
            compressLayer = Conv2D4DistMatrix(
                rng,
                input=midpointfeature,
                n_in=n_midpoint_out,
                n_hiddens=seq2matrixMode['OuterCat'],
                halfWinSize=0,
                mask=mask_matrix)
            #compressLayer = Conv2D4DistMatrix(rng, input=midpointfeature, n_in=n_midpoint_out, n_hiddens=seq2matrixMode['OuterCat'], halfWinSize=0, mask=None )
            seq2matrixLayers.append(compressLayer)

        ## embedding primary sequence and/or predicted secondary structure
        if embedInput is not None:
            from EmbeddingLayer import EmbeddingLayer4AllRange

            if seq2matrixMode.has_key('Seq+SS'):
                n_out_embed = seq2matrixMode['Seq+SS']
            elif seq2matrixMode.has_key('SeqOnly'):
                n_out_embed = seq2matrixMode['SeqOnly']
            else:
                print 'At least one of two embedding modes Seq+SS or SeqOnly shall be specified.'
                exit(1)

            embedLayer = EmbeddingLayer4AllRange(embedInput,
                                                 modelSpecs['n_in_embed'],
                                                 n_out_embed,
                                                 box=boundingbox)
            seq2matrixLayers.append(embedLayer)
            embedLayers.append(embedLayer)
        """
	we do not use this profile embedding any more
	## embedding the sequence profile
	if seq2matrixMode.has_key('Profile') and useSequentialFeatures:
	    from EmbeddingLayer import ProfileEmbeddingLayer
	    pEmbedLayer = ProfileEmbeddingLayer(seqConv.output, seqConv.n_out, seq2matrixMode['Profile'])
	    seq2matrixLayers.append(pEmbedLayer)
	    embedLayers.append(pEmbedLayer)
	"""

        self.layers += seq2matrixLayers

        bUseCCMFnorm, bUseCCMsum, bUseCCMraw, bUseFullMI, bUseFullCov = config.ParseExtraCCMmode(
            modelSpecs)
        if (bUseCCMraw or bUseFullMI
                or bUseFullCov) and config.CompressMatrixInput(modelSpecs):
            ## here we add a compress layer to reduce the #channels of the original matrix input.
            n_hiddens4MatrixCompress = modelSpecs['hiddens4MatrixCompress']
            compressLayer4MatrixInput = Conv2D4DistMatrix(
                rng,
                input=matrixInput,
                n_in=n_in_matrix,
                n_hiddens=n_hiddens4MatrixCompress,
                halfWinSize=0,
                mask=mask_matrix)
            compressedMatrixInput = compressLayer4MatrixInput.output
            n_compressedMatrix = compressLayer4MatrixInput.n_out
            input_2d = T.concatenate(
                [compressedMatrixInput] +
                [layer.output for layer in seq2matrixLayers],
                axis=3)
            n_input2d = n_compressedMatrix + sum(
                [layer.n_out for layer in seq2matrixLayers])
        else:

            ##old code for merging original matrix input and sequential input
            input_2d = T.concatenate(
                [matrixInput] + [layer.output for layer in seq2matrixLayers],
                axis=3)
            n_input2d = n_in_matrix + sum(
                [layer.n_out for layer in seq2matrixLayers])

        #print 'n_input2d=', n_input2d

        if modelSpecs['network'].startswith('ResNet'):
            matrixConv = ResNet(rng,
                                input=input_2d,
                                n_in=n_input2d,
                                n_hiddens=n_hiddens_matrix,
                                n_repeats=matrix_repeats,
                                halfWinSize=hwsz_matrix,
                                mask=mask_matrix,
                                activation=act,
                                batchNorm=modelSpecs['batchNorm'],
                                version=modelSpecs['network'])

        elif modelSpecs['network'].startswith('DilatedResNet'):
            #matrixConv=DilatedResNet(rng, input=input_2d, n_in=n_input2d, n_hiddens=n_hiddens_matrix, n_repeats=matrix_repeats, halfWinSize=hwsz_matrix, dilation=dilation_matrix, mask=mask_matrix, activation=act, batchNorm=modelSpecs['batchNorm'], version=modelSpecs['network'])
            matrixConv = DilatedResNet(rng,
                                       input=input_2d,
                                       n_in=n_input2d,
                                       n_hiddens=n_hiddens_matrix,
                                       n_repeats=matrix_repeats,
                                       halfWinSize=hwsz_matrix,
                                       dilation=dilation_matrix,
                                       mask=mask_matrix,
                                       activation=act,
                                       modelSpecs=modelSpecs)
        else:
            print 'ERROR: Unimplemented deep network type: ', modelSpecs[
                'network']
            exit(1)

        self.layers.append(matrixConv)

        conv_out = matrixConv.output

        selected = conv_out.dimshuffle(3, 0, 1, 2).flatten(2).dimshuffle(1, 0)
        n_in4logreg = matrixConv.n_out

        self.outputList = []
        self.output_probList = []
        self.predictors = []

        self.params4var = []
        self.paramL14var = 0
        self.paramL24var = 0

        for res in modelSpecs['responses']:

            labelType = Response2LabelType(res)
            predictor = None

            if labelType.startswith('Discrete'):
                assert GetResponseValueDims(res) == 1
                predictor = NN4LogReg(rng=rng,
                                      input=selected,
                                      n_in=n_in4logreg,
                                      n_out=GetResponseProbDims(res),
                                      n_hiddens=n_hiddens_logreg)

            elif labelType.startswith('LogNormal') or labelType.startswith(
                    'Normal'):
                predictor = NN4Normal(rng=rng,
                                      input=selected,
                                      n_in=n_in4logreg,
                                      n_variables=GetResponseValueDims(res),
                                      n_out=GetResponseProbDims(res),
                                      n_hiddens=n_hiddens_logreg)

                ## recording parameters specific for variance prediction
                self.params4var += predictor.params4var
                self.paramL14var += predictor.paramL14var
                self.paramL24var += predictor.paramL24var

            else:
                print 'incorrect response name or label type: ', res
                exit(1)

            self.layers.append(predictor)
            self.predictors.append(predictor)

            ## output in 2d matrix
            output_2d = predictor.y_pred.reshape(
                (conv_out.shape[0], conv_out.shape[1], conv_out.shape[2],
                 GetResponseValueDims(res)))
            output_2d_prob = predictor.output.reshape(
                (conv_out.shape[0], conv_out.shape[1], conv_out.shape[2],
                 GetResponseProbDims(res)))

            self.outputList.append(output_2d)
            self.output_probList.append(output_2d_prob)

        self.output = T.concatenate(self.outputList, axis=3)
        self.output_prob = T.concatenate(self.output_probList, axis=3)

        ## collect all the model parameters and their norms
        self.params = []
        self.paramL2 = 0
        self.paramL1 = 0

        for layer in self.layers:
            self.params += layer.params
            self.paramL2 += layer.paramL2
            self.paramL1 += layer.paramL1
        """