Example #1
0
def whole_pdb_submit(request):

    errorDict = {}

    skipNonXray = request.POST['skipNonXray']
    skipAlphaCarbon = request.POST['skipAlphaCarbon']
    cullMethod = request.POST['cullMethod']
    intraEntryCull = request.POST['intraEntryCull']
    email = request.POST['email']
    
    incorrect = []
    
    try:
        percentIdentity = float(request.POST['pc'])
        if percentIdentity < 5 or percentIdentity >= 100:
            errorDict['errorPercent'] = True
            incorrect.append(u"The value entered for the percentage: " + str(percentIdentity) + " is not within the permissible range of values. The percentage must be greater than or equal to 5, and less than 100.")
    except:
        errorDict['errorPercent'] = True
        incorrect.append(u'The value for the sequence identity threshold must be greater than or equal to 5, and less than 100')
    
    try:
        minResolution = float(request.POST['minRes'])
        if minResolution < 0:
            errorDict['errorMinRes'] = True
            incorrect.append(u"The value entered for the minimum resolution: " + str(minResolution) + " is less than 0.")
        if minResolution > 100:
            errorDict['errorMinRes'] = True
            incorrect.append(u"The value entered for the minimum resolution: " + str(minResolution) + " is greater than 100.")
        try:
            maxResolution = float(request.POST['maxRes'])
            if maxResolution < 0:
                errorDict['errorMaxRes'] = True
                incorrect.append(u"The value entered for the maximum resolution: " + str(maxResolution) + " is less than 0.")
            if maxResolution > 100:
                errorDict['errorMaxRes'] = True
                incorrect.append(u"The value entered for the maximum resolution: " + str(maxResolution) + " is greater than 100.")
            if minResolution > maxResolution:
                errorDict['errorMinRes'] = True
                incorrect.append(u"The value entered for the minimum resolution: " + str(minResolution) + " is greater than the value for the maximum resolution: " + str(maxResolution) + ".")
        except:
            errorDict['errorMaxRes'] = True
            incorrect.append(u'The value for the maximum resolution must be a number.')
    except:
        errorDict['errorMinRes'] = True
        incorrect.append(u'The value for the minimum resolution must be a number.')
        try:
            maxResolution = float(request.POST['maxRes'])
            if maxResolution < 0:
                errorDict['errorMaxRes'] = True
                incorrect.append(u"The value entered for the maximum resolution: " + str(maxResolution) + " is less than 0.")
            if maxResolution > 100:
                errorDict['errorMaxRes'] = True
                incorrect.append(u"The value entered for the maximum resolution: " + str(maxResolution) + " is greater than 100.")
        except:
            errorDict['errorMaxRes'] = True
            incorrect.append(u'The value for the maximum resolution must be a number.')
    
    try:
        maxRVal = float(request.POST['maxRVal'])
        if maxRVal < 0 or maxRVal > 1:
            errorDict['errorRVal'] = True
            incorrect.append(u"The value entered for the maximum R value: " + str(maxRVal) + " is not within the permissible range of values. The range allowed is: 0 to 1.")
    except:
        errorDict['errorRVal'] = True
        incorrect.append(u'The value for the R value must be a number in the range 0 - 1.')
    
    if request.POST['enforceMinLength'] == 'yes':
        try:
            minLength = int(request.POST['minLength'])
            if minLength < 0:
                errorDict['errorMinLength'] = True
                incorrect.append(u'The value for the minimum length must be greater than or equal to 0')
            if request.POST['enforceMaxLength'] == 'yes':
                try:
                    maxLength = int(request.POST['maxLength'])
                    if maxLength < 0:
                        errorDict['errorMaxLength'] = True
                        incorrect.append(u'The value for the maximum length must be greater than or equal to 0')
                    if minLength > maxLength:
                        errorDict['errorMinLength'] = True
                        incorrect.append(u"The value entered for the minimum length: " + str(minLength) + " is greater than the value for the maximum length: " + str(maxLength) + ".")
                except:
                    errorDict['errorMaxLength'] = True
                    incorrect.append(u'The value for the maximum sequence length must be an integer.')
            else:
                maxLength = -1
        except:
            errorDict['errorMinLength'] = True
            incorrect.append(u'The value for the minimum sequence length must be an integer.')
            if request.POST['enforceMaxLength'] == 'yes':
                try:
                    maxLength = int(request.POST['maxLength'])
                    if maxLength < 0:
                        errorDict['errorMaxLength'] = True
                        incorrect.append(u'The value for the maximum length must be greater than or equal to 0')
                    minLength = -1
                except:
                    errorDict['errorMaxLength'] = True
                    incorrect.append(u'The value for the maximum sequence length must be an integer.')
    elif request.POST['enforceMaxLength'] == 'yes':
        try:
            maxLength = int(request.POST['maxLength'])
            if maxLength < 0:
                errorDict['errorMaxLength'] = True
                incorrect.append(u'The value for the maximum length must be greater than or equal to 0')
            minLength = -1
        except:
            errorDict['errorMaxLength'] = True
            incorrect.append(u'The value for the maximum sequence length must be an integer.')
    else:
        minLength = -1
        maxLength = -1

    if not re.match("[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]+$", email):
        errorDict['errorEmail'] = True
        incorrect.append(u"The email address entered does not appear to be a valid email address.")

    if cullMethod == 'entry' and intraEntryCull == 'yes':
        try:
            intraEntryPC = float(request.POST['intraEntryPC'])
            if intraEntryPC < 5 or intraEntryPC >= 100:
                errorDict['errorIntraEntry'] = True
                incorrect.append(u"The value entered for the within entry sequence identity threshold: " + str(intraEntryPC) + " is not within the permissible range of values. The value must be a number greater than or equal to 5, and less than 100.")
        except:
            errorDict['errorIntraEntry'] = True
            incorrect.append(u'The value for the within entry sequence identity threshold must be a number greater than or equal to 5, and less than 100.')
    elif intraEntryCull == 'yes':
        incorrect.append(u'Intra entry culling is selected, but cull by entry is not.')

    if incorrect != []:
        valueDict = {'errorMessage': incorrect, 'pc' : request.POST['pc'], 'minRes' : request.POST['minRes'],
                     'maxRes' : request.POST['maxRes'], 'maxRVal' : request.POST['maxRVal'],
                     'intraEntryPC' : request.POST['intraEntryPC'], 'email' : request.POST['email']}
        if skipNonXray == 'no':
            valueDict['skipNonXray'] = request.POST['skipNonXray']
        if skipAlphaCarbon == 'no':
            valueDict['skipAlphaCarbon'] = request.POST['skipAlphaCarbon']
        if cullMethod == 'entry':
            valueDict['cullMethod'] = request.POST['cullMethod']
        if intraEntryCull == 'yes':
            valueDict['intraEntryCull'] = request.POST['intraEntryCull']
        if request.POST['enforceMinLength'] == 'yes':
            valueDict['enforceMinLength'] = 'yes'
            valueDict['minLength'] = request.POST['minLength']
        if request.POST['enforceMaxLength'] == 'yes':
            valueDict['enforceMaxLength'] = 'yes'
            valueDict['maxLength'] = request.POST['maxLength']
        for i in errorDict.keys():
            valueDict[i] = True
        
        return render_to_response('whole_pdb_culling.html',
                                  valueDict,
                                  context_instance=RequestContext(request))
    
    # Setup the database entry
    if skipNonXray == 'yes':
        skipNonXray = True
    else:
        skipNonXray = False
    if skipAlphaCarbon == 'yes':
        skipAlphaCarbon = True
    else:
        skipAlphaCarbon = False
    if cullMethod == 'chain':
        cullMethod = True
    else:
        cullMethod = False
    if intraEntryCull == 'yes':
        intraEntryCull = True
    else:
        intraEntryCull = False
    r = PDBCullRequest(
                       wholePDB=True,
                       sequenceIdentity=percentIdentity,
                       minResolution=minResolution,
                       maxResolution=maxResolution,
                       maxRValue=maxRVal,
                       minLength = minLength,
                       maxLength = maxLength,
                       skipNonXray=skipNonXray,
                       skipAlphaCarbon=skipAlphaCarbon,
                       cullByChain=cullMethod,
                       performIntraEntryCulling=intraEntryCull,
                       intraEntrySequenceIdentity=float(request.POST['intraEntryPC']),
                       email=str(email),
                       completed=False,
                       requestDate=datetime.datetime.now()
                       )
    r.save()
    
    cullinput.controlthread.RunCull(r, 'pdb')

#    return HttpResponseRedirect(reverse('Leaf.views.sent'))
    return HttpResponseRedirect(reverse('Leaf.views.sent', kwargs={'result_id': r.id, 'cullType' : 'pdb'}))
    def handle(self, *args, **options):
        os.umask(00000)
        logger = open('/srv/www/vhosts.d/www.bioinf/html/doig/cgi-bin/django_projects/LeafWebApp/ErrorLogs/GENERATION.log', 'a')
        logger.write('=======================================================\n=======================================================\nStarting generation on ' + time.strftime('%Y/%m/%d/ at %H:%M:%S', time.gmtime()) + '.\n')
        logger.close()

        # Turn all the protein chain information into a dictionary.
        pdbChainsData = ProteinInformation.objects.all()
        pdbChainsDict = dict([(i.chain, i) for i in pdbChainsData])
        del(pdbChainsData)
        
        # Turn all the within entry representative information into a dictionary.
        entryRepData = EntryRepresentative.objects.all()
        entryRepIgnore = []
        entryRepDict = {}
        for i in entryRepData:
            entryRepIgnore.append(i.nonreprChain)
            reprChain = i.reprChain
            if entryRepDict.has_key(reprChain):
                entryRepDict[reprChain].append(i.nonreprChain)
            else:
                entryRepDict[reprChain] = [i.nonreprChain]
        del(entryRepData)
        
        # Turn all the whole PDB representative information into a dictionary.
        wholeRepData = Representative.objects.all()
        wholeRepIgnore = []
        wholeRepDict = {}
        for i in wholeRepData:
            wholeRepIgnore.append(i.nonreprChain)
            reprChain = i.reprChain
            if wholeRepDict.has_key(reprChain):
                wholeRepDict[reprChain].append(i.nonreprChain)
            else:
                wholeRepDict[reprChain] = [i.nonreprChain]
        del(wholeRepData)

        # Generate the whole PDB fasta files.
        pdbaa = StringIO()
        gzf = GzipFile(fileobj=pdbaa, mode='wb')
        for i in sorted(pdbChainsDict.keys()):
            experimentType = str(pdbChainsDict[i].experimentType)
            resolution = str(pdbChainsDict[i].resolution)
            rValueObs = str(pdbChainsDict[i].rValueObs)
            rValueFree = str(pdbChainsDict[i].rValueFree)
            alphaCarbonOnly = ['no', 'yes'][pdbChainsDict[i].alphaCarbonOnly]
#            alphaCarbonOnly = 'yes' if pdbChainsDict[i].alphaCarbonOnly else 'no'
            description = str(pdbChainsDict[i].description)
            dbName = str(pdbChainsDict[i].dbName)
            dbCode = str(pdbChainsDict[i].dbCode)
            organism = str(pdbChainsDict[i].organism)
            sequence = str(pdbChainsDict[i].sequence)
            gzf.write('>' + str(i) + '\t' + str(len(sequence)) + '\t' + experimentType + '\t' + resolution + '\t' +
                      rValueObs + '\t' + rValueFree + '\t' + alphaCarbonOnly + '\t' + description + '\t<' + dbName +
                      ' ' + dbCode + '>\t[' + organism + ']\n')
            gzf.write(sequence + '\n')
        gzf.close()
        pdbaa.seek(0, 2)
        pdbaaFile = InMemoryUploadedFile(pdbaa, 'image', 'pdbaa.gz', None, pdbaa.tell(), None)
        pdbaaEntry = DownloadableFiles(fileName='pdbaa')
        pdbaaEntry.save()
        pdbaaEntry.downloadFile.save(pdbaaFile.name, pdbaaFile)
        pdbaa.close()
        
        pdbaaent = StringIO()
        gzf = GzipFile(fileobj=pdbaaent, mode='wb')
        for i in sorted([j for j in pdbChainsDict.keys() if not j in entryRepIgnore]):
            experimentType = str(pdbChainsDict[i].experimentType)
            resolution = str(pdbChainsDict[i].resolution)
            rValueObs = str(pdbChainsDict[i].rValueObs)
            rValueFree = str(pdbChainsDict[i].rValueFree)
            alphaCarbonOnly = ['no', 'yes'][pdbChainsDict[i].alphaCarbonOnly]
#            alphaCarbonOnly = 'yes' if pdbChainsDict[i].alphaCarbonOnly else 'no'
            description = str(pdbChainsDict[i].description)
            dbName = str(pdbChainsDict[i].dbName)
            dbCode = str(pdbChainsDict[i].dbCode)
            organism = str(pdbChainsDict[i].organism)
            sequence = str(pdbChainsDict[i].sequence)
            if entryRepDict.has_key(i):
                gzf.write('>' + str(i) + '\t' + str(len(sequence)) + '\t' + experimentType + '\t' + resolution + '\t' +
                          rValueObs + '\t' + rValueFree + '\t' + alphaCarbonOnly + '\t' + description + '\t<' + dbName +
                          ' ' + dbCode + '>\t[' + organism + ']\t||\t' + '\t'.join(entryRepDict[i]) + '\n')
            else:
                gzf.write('>' + str(i) + '\t' + str(len(sequence)) + '\t' + experimentType + '\t' + resolution + '\t' +
                          rValueObs + '\t' + rValueFree + '\t' + alphaCarbonOnly + '\t' + description + '\t<' + dbName +
                          ' ' + dbCode + '>\t[' + organism + ']\n')
            gzf.write(sequence + '\n')
        gzf.close()
        pdbaaent.seek(0, 2)
        pdbaaentFile = InMemoryUploadedFile(pdbaaent, 'image', 'pdbaaent.gz', None, pdbaaent.tell(), None)
        pdbaaentEntry = DownloadableFiles(fileName='pdbaaent')
        pdbaaentEntry.save()
        pdbaaentEntry.downloadFile.save(pdbaaentFile.name, pdbaaentFile)
        pdbaaent.close()
        
        pdbaanr = StringIO()
        gzf = GzipFile(fileobj=pdbaanr, mode='wb')
        for i in sorted([j for j in pdbChainsDict.keys() if not j in wholeRepIgnore]):
            sequence = str(pdbChainsDict[i].sequence)
            experimentType = str(pdbChainsDict[i].experimentType)
            resolution = str(pdbChainsDict[i].resolution)
            rValueObs = str(pdbChainsDict[i].rValueObs)
            rValueFree = str(pdbChainsDict[i].rValueFree)
            alphaCarbonOnly = ['no', 'yes'][pdbChainsDict[i].alphaCarbonOnly]
#            alphaCarbonOnly = 'yes' if pdbChainsDict[i].alphaCarbonOnly else 'no'
            description = str(pdbChainsDict[i].description)
            dbName = str(pdbChainsDict[i].dbName)
            dbCode = str(pdbChainsDict[i].dbCode)
            organism = str(pdbChainsDict[i].organism)
            if wholeRepDict.has_key(i):
                gzf.write('>' + str(i) + '\t' + str(len(sequence)) + '\t' + experimentType + '\t' + resolution + '\t' +
                          rValueObs + '\t' + rValueFree + '\t' + alphaCarbonOnly + '\t' + description + '\t<' + dbName +
                          ' ' + dbCode + '>\t[' + organism + ']\t||\t' + '\t'.join(wholeRepDict[i]) + '\n')
            else:
                gzf.write('>' + str(i) + '\t' + str(len(sequence)) + '\t' + experimentType + '\t' + resolution + '\t' +
                          rValueObs + '\t' + rValueFree + '\t' + alphaCarbonOnly + '\t' + description + '\t<' + dbName +
                          ' ' + dbCode + '>\t[' + organism + ']\n')
            gzf.write(sequence + '\n')
        gzf.close()
        pdbaanr.seek(0, 2)
        pdbaanrFile = InMemoryUploadedFile(pdbaanr, 'image', 'pdbaanr.gz', None, pdbaanr.tell(), None)
        pdbaanrEntry = DownloadableFiles(fileName='pdbaanr')
        pdbaanrEntry.save()
        pdbaanrEntry.downloadFile.save(pdbaanrFile.name, pdbaanrFile)
        pdbaanr.close()

        # Generate the files needed to run the culling locally (i.e. make files of the different database tables).
        proteinData = ProteinInformation.objects.all().values_list()
        proteinFile = NamedTemporaryFile()
#        sio = StringIO()
#        gzf = GzipFile(fileobj=sio, mode='wb')
        for i in proteinData:
#            gzf.write('\t'.join([str(j) for j in i]) + '\n')
            proteinFile.write('\t'.join([str(j) for j in i]) + '\n')
#        gzf.close()
#        sio.seek(0, 2)
#        proteinFile = InMemoryUploadedFile(sio, 'image', 'ProteinInformation.gz', None, sio.tell(), None)
        proteinFileEntry = DownloadableFiles(fileName='ProteinInformation', downloadFile=File(proteinFile))
#        proteinFileEntry = DownloadableFiles(fileName='ProteinInformation')
        proteinFileEntry.save()
#        proteinFileEntry.downloadFile.save(proteinFile.name, proteinFile)
#        sio.close()
        proteinFile.close()
        
        chainData = ChainType.objects.all().values_list()
        chainFile = NamedTemporaryFile()
#        sio = StringIO()
#        gzf = GzipFile(fileobj=sio, mode='wb')
        for i in chainData:
#            gzf.write('\t'.join([str(j) for j in i]) + '\n')
            chainFile.write('\t'.join([str(j) for j in i]) + '\n')
#        gzf.close()
#        sio.seek(0, 2)
#        chainFile = InMemoryUploadedFile(sio, 'image', 'ChainType.gz', None, sio.tell(), None)
        chainFileEntry = DownloadableFiles(fileName='ChainType', downloadFile=File(chainFile))
#        chainFileEntry = DownloadableFiles(fileName='ChainType')
        chainFileEntry.save()
#        chainFileEntry.downloadFile.save(chainFile.name, chainFile)
#        sio.close()
        chainFile.close()
        
        PDBEntriesData = AllPDBEntries.objects.all().values_list()
        entriesFile = NamedTemporaryFile()
#        sio = StringIO()
#        gzf = GzipFile(fileobj=sio, mode='wb')
        for i in PDBEntriesData:
#            gzf.write('\t'.join([str(j) for j in i]) + '\n')
            entriesFile.write('\t'.join([str(j) for j in i]) + '\n')
#        gzf.close()
#        sio.seek(0, 2)
#        entriesFile = InMemoryUploadedFile(sio, 'image', 'AllPDBEntries.gz', None, sio.tell(), None)
        entriesFileEntry = DownloadableFiles(fileName='AllPDBEntries', downloadFile=File(entriesFile))
#        entriesFileEntry = DownloadableFiles(fileName='AllPDBEntries')
        entriesFileEntry.save()
#        entriesFileEntry.downloadFile.save(entriesFile.name, entriesFile)
#        sio.close()
        entriesFile.close()

        similarityData = Similarity.objects.all().values_list()
        similarityFile = NamedTemporaryFile()
#        sio = StringIO()
#        gzf = GzipFile(fileobj=sio, mode='wb')
        for i in similarityData:
#            gzf.write('\t'.join([str(j) for j in i]) + '\n')
            similarityFile.write('\t'.join([str(j) for j in i]) + '\n')
#        gzf.close()
#        sio.seek(0, 2)
#        similarityFile = InMemoryUploadedFile(sio, 'image', 'Similarity.gz', None, sio.tell(), None)
        similarityFileEntry = DownloadableFiles(fileName='Similarity', downloadFile=File(similarityFile))
#        similarityFileEntry = DownloadableFiles(fileName='Similarity')
        similarityFileEntry.save()
#        similarityFileEntry.downloadFile.save(similarityFile.name, similarityFile)
#        sio.close()
        similarityFile.close()

        representativeData = Representative.objects.all().values_list()
        representativeFile = NamedTemporaryFile()
#        sio = StringIO()
#        gzf = GzipFile(fileobj=sio, mode='wb')
        for i in representativeData:
#            gzf.write('\t'.join([str(j) for j in i]) + '\n')
            representativeFile.write('\t'.join([str(j) for j in i]) + '\n')
#        gzf.close()
#        sio.seek(0, 2)
#        representativeFile = InMemoryUploadedFile(sio, 'image', 'Representative.gz', None, sio.tell(), None)
        representativeFileEntry = DownloadableFiles(fileName='Representative', downloadFile=File(representativeFile))
#        representativeFileEntry = DownloadableFiles(fileName='Representative')
        representativeFileEntry.save()
#        representativeFileEntry.downloadFile.save(representativeFile.name, representativeFile)
#        sio.close()
        representativeFile.close()
        
        destinationDir = settings.MEDIA_ROOT + 'TarData/PDBData'
#        tar = tarfile.open(settings.MEDIA_ROOT + 'TarData/PDBData.tar.gz', mode='w:gz')
        entriesInfo = DownloadableFiles.objects.filter(fileName__exact='AllPDBEntries')
        subprocess.call(['cp', str(entriesInfo[0].downloadFile.path), destinationDir + '/AllPDBEntries.txt'])
#        tar.add(entriesInfo[0].downloadFile.path, arcname='AllPDBEntries.txt')
        chainInfo = DownloadableFiles.objects.filter(fileName__exact='ChainType')
        subprocess.call(['cp', str(chainInfo[0].downloadFile.path), destinationDir + '/ChainType.txt'])
#        tar.add(chainInfo[0].downloadFile.path, arcname='ChainType.txt')
        proteinInfo = DownloadableFiles.objects.filter(fileName__exact='ProteinInformation')
        subprocess.call(['cp', str(proteinInfo[0].downloadFile.path), destinationDir + '/ProteinInformation.txt'])
#        tar.add(proteinInfo[0].downloadFile.path, arcname='ProteinInformation.txt')
        representativeInfo = DownloadableFiles.objects.filter(fileName__exact='Representative')
        subprocess.call(['cp', str(representativeInfo[0].downloadFile.path), destinationDir + '/Representative.txt'])
#        tar.add(representativeInfo[0].downloadFile.path, arcname='Representative.txt')
        similarityInfo = DownloadableFiles.objects.filter(fileName__exact='Similarity')
        subprocess.call(['cp', str(similarityInfo[0].downloadFile.path), destinationDir + '/Similarity.txt'])
#        tar.add(similarityInfo[0].downloadFile.path, arcname='Similarity.txt')
#        tar.close()
        subprocess.Popen(['tar', '-zcvf', 'PDBData.tar.gz', 'PDBData'], cwd=settings.MEDIA_ROOT + 'TarData', stdout=subprocess.PIPE, stderr=subprocess.STDOUT)


        proteinChains = []
        readChainTypeData = open(destinationDir + '/ChainType.txt', 'r')
        for i in readChainTypeData:
            chunks = (i.strip()).split('\t')
            if chunks[1] == 'Protein':
                proteinChains.append(chunks[0])
        readChainTypeData.close()
        proteinChains = '\n'.join(proteinChains)

        # Generate the culled PDB lists.
        resolutionsOfInterest = [1.6, 1.8, 2.0, 2.2, 2.5, 3.0]
        rValuesOfInterest = [0.25, 1.0]
        sequenceIdentities = [20, 25, 30, 40, 50, 60, 70, 80, 90]
        tuplesToDo = [(i, j, k) for i in resolutionsOfInterest for j in rValuesOfInterest for k in sequenceIdentities]
        tuplesToDo.extend([(100.0, 1.0, i) for i in sequenceIdentities])
        updatesPerformed = []
        while tuplesToDo != [] or updatesPerformed != []:
            # Continue looping until both lists are empty. If both are empty then all updates have been performed.
            updatesDone = []
            for i in updatesPerformed:
                # Determine if any of the culling operations that are currently running have finished.
                if i.completed:
                    if not i.skipNonXray:
                        if not i.skipAlphaCarbon:
                            xrayCAInfo = '_INCLNONXRAY_INCLCAONLY'
                        else:
                            xrayCAInfo = '_INCLNONXRAY'
                    else:
                        xrayCAInfo = ''

                    sio = StringIO()
                    gzf = GzipFile(fileobj=sio, mode='wb')
                    outputInfo = open(i.nonredSeq.path, 'r')
                    for line in outputInfo:
                        gzf.write(line)
                    outputInfo.close()
                    gzf.close()
                    sio.seek(0, 2)
                    culledFile = InMemoryUploadedFile(sio, 'image',
                                                      ('SeqIden_' + str(i.sequenceIdentity) + '_Res_' + str(i.maxResolution) +
                                                       '_RVal_' + str(i.maxRValue) + xrayCAInfo + '.gz'),
                                                       None, sio.tell(), None)
                    newDownload = DownloadableFiles(fileName='SeqIden_' + str(i.sequenceIdentity) + '_Res_' + str(i.maxResolution) +
                                                    '_RVal_' + str(i.maxRValue) + xrayCAInfo)
                    newDownload.save()
                    newDownload.downloadFile.save(culledFile.name, culledFile)
                    sio.close()
                    updatesDone.append(i)
                    i.delete()
            for i in updatesDone:
                updatesPerformed.remove(i)

            if tuplesToDo == [] or len(updatesPerformed) >= 1:
                # Only allow 1 culling operation to occur at once, to prevent overloading.
#                pass
                time.sleep(30)
            else:
                # Fill the buffer of culling operations up.
                while len(updatesPerformed) < 1 and tuplesToDo != []:
                    newCull = tuplesToDo.pop()
                    resolution = newCull[0]
                    rValue = newCull[1]
                    seqIdentity = newCull[2]
                    if resolution == 100.0:
                        skipNonXray = False
                        skipAlphaCarbon = False
                    else:
                        skipNonXray = True
                        skipAlphaCarbon = True
                    r = PDBCullRequest(
                                       wholePDB=False,
                                       sequenceIdentity=seqIdentity,
                                       minResolution=0.0,
                                       maxResolution=resolution,
                                       maxRValue=rValue,
                                       minLength = -1,
                                       maxLength = -1,
                                       skipNonXray=skipNonXray,
                                       skipAlphaCarbon=skipAlphaCarbon,
                                       cullByChain=True,
                                       performIntraEntryCulling=False,
                                       intraEntrySequenceIdentity=100.0,
                                       email='No Email',
                                       completed=False,
                                       requestDate=datetime.datetime.now()
                                       )
                    r.save()
                    r.userInput.save('', ContentFile(proteinChains))
                    cullinput.controlthread.RunCull(r, 'pdb')
                    updatesPerformed.append(r)

        logger = open('/srv/www/vhosts.d/www.bioinf/html/doig/cgi-bin/django_projects/LeafWebApp/ErrorLogs/GENERATION.log', 'a')
        logger.write('Finished generation on ' + time.strftime('%Y/%m/%d/ at %H:%M:%S', time.gmtime()) + '.\n')
        logger.close()
Example #3
0
def user_pdb_submit(request):

    errorDict = {}

    pastedChains = request.POST['pastedInfo'].rstrip()
    pastedChains = pastedChains.lstrip()
    pastedChains = pastedChains.split('\n')
    speciesDropBox = request.POST['speciesDropBox']
    speciesTextBox = request.POST['speciesTextBox']
    try:
        uploaded = request.FILES['upload']
    except:
        uploaded = ""
    skipNonXray = request.POST['skipNonXray']
    skipAlphaCarbon = request.POST['skipAlphaCarbon']
    cullMethod = request.POST['cullMethod']
    intraEntryCull = request.POST['intraEntryCull']
    email = request.POST['email']
    
    incorrect = []
    
    try:
        percentIdentity = float(request.POST['pc'])
        if percentIdentity < 5 or percentIdentity >= 100:
            errorDict['errorPercent'] = True
            incorrect.append(u"The value entered for the percentage: " + str(percentIdentity) + " is not within the permissible range of values. The percentage must be greater than or equal to 5, and less than 100.")
    except:
        errorDict['errorPercent'] = True
        incorrect.append(u'The value for the sequence identity threshold must be greater than or equal to 5, and less than 100.')
    
    try:
        minResolution = float(request.POST['minRes'])
        if minResolution < 0:
            errorDict['errorMinRes'] = True
            incorrect.append(u"The value entered for the minimum resolution: " + str(minResolution) + " is less than 0.")
        if minResolution > 100:
            errorDict['errorMinRes'] = True
            incorrect.append(u"The value entered for the minimum resolution: " + str(minResolution) + " is greater than 100.")
        try:
            maxResolution = float(request.POST['maxRes'])
            if maxResolution < 0:
                errorDict['errorMaxRes'] = True
                incorrect.append(u"The value entered for the maximum resolution: " + str(maxResolution) + " is less than 0.")
            if maxResolution > 100:
                errorDict['errorMaxRes'] = True
                incorrect.append(u"The value entered for the maximum resolution: " + str(maxResolution) + " is greater than 100.")
            if minResolution > maxResolution:
                errorDict['errorMinRes'] = True
                incorrect.append(u"The value entered for the minimum resolution: " + str(minResolution) + " is greater than the value for the maximum resolution: " + str(maxResolution) + ".")
        except:
            errorDict['errorMaxRes'] = True
            incorrect.append(u'The value for the maximum resolution must be a number.')
    except:
        errorDict['errorMinRes'] = True
        incorrect.append(u'The value for the minimum resolution must be a number.')
        try:
            maxResolution = float(request.POST['maxRes'])
            if maxResolution < 0:
                errorDict['errorMaxRes'] = True
                incorrect.append(u"The value entered for the maximum resolution: " + str(maxResolution) + " is less than 0.")
            if maxResolution > 100:
                errorDict['errorMaxRes'] = True
                incorrect.append(u"The value entered for the maximum resolution: " + str(maxResolution) + " is greater than 100.")
        except:
            errorDict['errorMaxRes'] = True
            incorrect.append(u'The value for the maximum resolution must be a number.')
    
    try:
        maxRVal = float(request.POST['maxRVal'])
        if maxRVal < 0 or maxRVal > 1:
            errorDict['errorRVal'] = True
            incorrect.append(u"The value entered for the maximum R value: " + str(maxRVal) + " is not within the permissible range of values. The range allowed is: 0 to 1.")
    except:
        errorDict['errorRVal'] = True
        incorrect.append(u'The maximum R value must be a number in the range 0 - 1.')
    
    if request.POST['enforceMinLength'] == 'yes':
        try:
            minLength = int(request.POST['minLength'])
            if minLength < 0:
                errorDict['errorMinLength'] = True
                incorrect.append(u'The value for the minimum length must be greater than or equal to 0')
            if request.POST['enforceMaxLength'] == 'yes':
                try:
                    maxLength = int(request.POST['maxLength'])
                    if maxLength < 0:
                        errorDict['errorMaxLength'] = True
                        incorrect.append(u'The value for the maximum length must be greater than or equal to 0')
                    if minLength > maxLength:
                        errorDict['errorMinLength'] = True
                        incorrect.append(u"The value entered for the minimum length: " + str(minLength) + " is greater than the value for the maximum length: " + str(maxLength) + ".")
                except:
                    errorDict['errorMaxLength'] = True
                    incorrect.append(u'The value for the maximum sequence length must be an integer.')
            else:
                maxLength = -1
        except:
            errorDict['errorMinLength'] = True
            incorrect.append(u'The value for the minimum sequence length must be an integer.')
            if request.POST['enforceMaxLength'] == 'yes':
                try:
                    maxLength = int(request.POST['maxLength'])
                    if maxLength < 0:
                        errorDict['errorMaxLength'] = True
                        incorrect.append(u'The value for the maximum length must be greater than or equal to 0')
                    minLength = -1
                except:
                    errorDict['errorMaxLength'] = True
                    incorrect.append(u'The value for the maximum sequence length must be an integer.')
    elif request.POST['enforceMaxLength'] == 'yes':
        try:
            maxLength = int(request.POST['maxLength'])
            if maxLength < 0:
                errorDict['errorMaxLength'] = True
                incorrect.append(u'The value for the maximum length must be greater than or equal to 0')
            minLength = -1
        except:
            errorDict['errorMaxLength'] = True
            incorrect.append(u'The value for the maximum sequence length must be an integer.')
    else:
        minLength = -1
        maxLength = -1
    
    if not re.match("[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]+$", email):
        errorDict['errorEmail'] = True
        incorrect.append(u"The email address entered does not appear to be a valid email address.")

    # Determine whether the user input is correct.
    errorList = [pastedChains == [''], uploaded == "", speciesDropBox == 'Nothing', speciesTextBox == '']
    if errorList.count(True) == 4:
        errorDict['errorPasted'] = True
        errorDict['errorUploaded'] = True
        errorDict['errorSpeciesDrop'] = True
        errorDict['errorSpeciesText'] = True
        incorrect.append(u"You have not provided a source of chains/entries to cull.")
    elif errorList.count(False) != 1:
        if errorList[0] == False:
            errorDict['errorPasted'] = True
        if errorList[1] == False:
            errorDict['errorUploaded'] = True
        if errorList[2] == False:
            errorDict['errorSpeciesDrop'] = True
        if errorList[3] == False:
            errorDict['errorSpeciesText'] = True
        incorrect.append(u'You have provided more than one source of chains/entries to cull.')
    else:
        # Determine if the PDB chains/entries input are acceptable.
        if errorList[0] == False:
            fileString = str(request.POST['pastedInfo'])
            inputList = fileString.split()
            chainInfo = ChainType.objects.all()
            if cullMethod == 'chain':
                # If the user has supplied any entries in the list of input chains, then extract all chains corresponding to
                # the input entry.
                processedInputList = set([])
                for i in inputList:
                    if len(i) == 4:
                        # Possibly an entry.
                        chainsFromEntry = ProteinInformation.objects.filter(entry__exact=i)
                        if len(chainsFromEntry) == 0:
                            processedInputList.add(i)
                        else:
                            for j in chainsFromEntry:
                                processedInputList.add(str(j.chain))
                    else:
                        processedInputList.add(i)
                processedInputList = list(processedInputList)
                chainDict = {}
                for i in chainInfo:
                    chainDict[i.chain] = i.chainType
                retCode, retVal = cullinput.checkPDBinput.main(processedInputList, allChains=chainDict,  checkType='chain')
            else:
                allEntries = AllPDBEntries.objects.all().values_list('entry', flat=True)
                allProtEntries = set([i.chain[:4] for i in chainInfo if i.chainType == 'Protein'])
                retCode, retVal = cullinput.checkPDBinput.main(inputList, allEntries=allEntries, allProtEntries=allProtEntries, checkType='entry')
            if retCode != 0:
                errorDict['errorPasted'] = True
                incorrect.append(retVal)
        elif uploaded != '':
            fileString = ''
            for chunk in uploaded.chunks():
                fileString += chunk
            inputList = fileString.split()
            inputList = [i.strip() for i in inputList]
            chainInfo = ChainType.objects.all()
            if cullMethod == 'chain':
                # If the user has supplied any entries in the list of input chains, then extract all chains corresponding to
                # the input entry.
                processedInputList = set([])
                for i in inputList:
                    if len(i) == 4:
                        # Possibly an entry.
                        chainsFromEntry = ProteinInformation.objects.filter(entry__exact=i)
                        if len(chainsFromEntry) == 0:
                            processedInputList.add(i)
                        else:
                            for j in chainsFromEntry:
                                processedInputList.add(str(j.chain))
                    else:
                        processedInputList.add(i)
                processedInputList = list(processedInputList)
                chainDict = {}
                for i in chainInfo:
                    chainDict[i.chain] = i.chainType
                retCode, retVal = cullinput.checkPDBinput.main(processedInputList, allChains=chainDict,  checkType='chain')
            else:
                allEntries = AllPDBEntries.objects.all().values_list('entry', flat=True)
                allProtEntries = set([i.chain[:4] for i in chainInfo if i.chainType == 'Protein'])
                retCode, retVal = cullinput.checkPDBinput.main(inputList, allEntries=allEntries, allProtEntries=allProtEntries, checkType='entry')
            if retCode != 0:
                errorDict['errorUploaded'] = True
                incorrect.append(retVal)
        elif speciesDropBox != 'Nothing':
            allChains = ProteinInformation.objects.filter(organism__iexact=speciesDropBox)
            if cullMethod == 'chain':
                allChains = [str(i.chain) for i in allChains]
                if len(allChains) < 2:
                    errorDict['errorSpeciesDrop'] = True
                    incorrect.append(u'There are less than 2 chains in the PDB from the organism you selected.')
                retVal = '\n'.join(allChains)
            else:
                allEntries = list(set([str(i.entry) for i in allChains]))
                if len(allEntries) < 2:
                    errorDict['errorSpeciesDrop'] = True
                    incorrect.append(u'There are less than 2 entries in the PDB from the organism you selected.')
                retVal = '\n'.join(allEntries)
        elif speciesTextBox != '':
            allChains = ProteinInformation.objects.filter(organism__iexact=speciesTextBox)
            if cullMethod == 'chain':
                allChains = [i.chain for i in allChains]
                if len(allChains) < 2:
                    errorDict['errorSpeciesText'] = True
                    incorrect.append(u'There are less than 2 chains in the PDB from the organism you entered.' +
                                     ' This is possibly due to the species being spelled incorrectly.')
                retVal = '\n'.join(allChains)
            else:
                allEntries = list(set([i.entry for i in allChains]))
                if len(allEntries) < 2:
                    errorDict['errorSpeciesText'] = True
                    incorrect.append(u'There are less than 2 entries in the PDB from the organism you entered.' +
                                     ' This is possibly due to the species being spelled incorrectly.')
                retVal = '\n'.join(allEntries)

    if cullMethod == 'entry' and intraEntryCull == 'yes':
        try:
            intraEntryPC = float(request.POST['intraEntryPC'])
            if intraEntryPC < 5 or intraEntryPC >= 100:
                errorDict['errorIntraEntry'] = True
                incorrect.append(u"The value entered for the within entry sequence identity threshold: " + str(intraEntryPC) + " is not within the permissible range of values. The value must be a number greater than or equal to 5, and less than 100.")
        except:
            errorDict['errorIntraEntry'] = True
            incorrect.append(u'The value for the within entry sequence identity threshold must be a number greater than or equal to 5, and less than 100.')
    elif intraEntryCull == 'yes':
        incorrect.append(u'Intra entry culling is selected, but cull by entry is not.')
        

    if incorrect != []:
        valueDict = {'errorMessage': incorrect, 'pastedChains' : request.POST['pastedInfo'],
                     'speciesDropBox' : request.POST['speciesDropBox'], 'speciesTextBox' : request.POST['speciesTextBox'],
                     'pc' : request.POST['pc'], 'minRes' : request.POST['minRes'], 'maxRes' : request.POST['maxRes'],
                     'maxRVal' : request.POST['maxRVal'], 'intraEntryPC' : request.POST['intraEntryPC'],
                     'email' : request.POST['email']
                     }
        if skipNonXray == 'no':
            valueDict['skipNonXray'] = request.POST['skipNonXray']
        if skipAlphaCarbon == 'no':
            valueDict['skipAlphaCarbon'] = request.POST['skipAlphaCarbon']
        if cullMethod == 'entry':
            valueDict['cullMethod'] = request.POST['cullMethod']
        if intraEntryCull == 'yes':
            valueDict['intraEntryCull'] = request.POST['intraEntryCull']
        if request.POST['enforceMinLength'] == 'yes':
            valueDict['enforceMinLength'] = 'yes'
            valueDict['minLength'] = request.POST['minLength']
        if request.POST['enforceMaxLength'] == 'yes':
            valueDict['enforceMaxLength'] = 'yes'
            valueDict['maxLength'] = request.POST['maxLength']
        for i in errorDict.keys():
            valueDict[i] = True
        
        return render_to_response('user_pdb_culling.html',
                                  valueDict,
                                  context_instance=RequestContext(request))
    
    # Setup the database entry
    if skipNonXray == 'yes':
        skipNonXray = True
    else:
        skipNonXray = False
    if skipAlphaCarbon == 'yes':
        skipAlphaCarbon = True
    else:
        skipAlphaCarbon = False
    if cullMethod == 'chain':
        cullMethod = True
    else:
        cullMethod = False
    if intraEntryCull == 'yes':
        intraEntryCull = True
    else:
        intraEntryCull = False
    r = PDBCullRequest(
                       wholePDB=False,
                       sequenceIdentity=percentIdentity,
                       minResolution=minResolution,
                       maxResolution=maxResolution,
                       maxRValue=maxRVal,
                       minLength = minLength,
                       maxLength = maxLength,
                       skipNonXray=skipNonXray,
                       skipAlphaCarbon=skipAlphaCarbon,
                       cullByChain=cullMethod,
                       performIntraEntryCulling=intraEntryCull,
                       intraEntrySequenceIdentity=float(request.POST['intraEntryPC']),
                       email=str(email),
                       completed=False,
                       requestDate=datetime.datetime.now()
                       )
    r.save()
    r.userInput.save('', ContentFile(retVal))

    cullinput.controlthread.RunCull(r, 'pdb')

#    return HttpResponseRedirect(reverse('Leaf.views.sent'))
    return HttpResponseRedirect(reverse('Leaf.views.sent', kwargs={'result_id': r.id, 'cullType' : 'pdb'}))