コード例 #1
0
ファイル: test.py プロジェクト: Kortemme-Lab/klab
def test_sifts_module():
    failures = []
    ddG_pdb_ids = ['107L','108L','109L','110L','111L','112L','113L','114L','115L','118L','119L','120L','122L','123L','125L','126L','127L','128L','129L','130L','131L','137L','149L','150L','151L','160L','161L','162L','163L','164L','165L','168L','169L','171L','172L','173L','190L','191L','192L','195L','196L','1A23','1A2I','1A2P','1A3Y','1A43','1A4Y','1A53','1A5E','1A70','1A7A','1A7H','1A7V','1AAL','1AAR','1AAZ','1ABE','1ACB','1ADO','1ADW','1AG2','1AG4','1AG6','1AIE','1AIN','1AJ3','1AJQ','1AKK','1AKM','1AM7','1AMQ','1ANF','1ANK','1ANT','1AO6','1AON','1AOZ','1APC','1APL','1APS','1AQH','1AR1','1ARR','1ATJ','1ATN','1AU1','1AUT','1AV1','1AVR','1AX1','1AXB','1AYE','1AYF','1AZP','1B0O','1B26','1B5M','1B8J','1BAH','1BAN','1BAO','1BCX','1BD8','1BET','1BF4','1BFM','1BGD','1BGL','1BJP','1BKE','1BKS','1BLC','1BMC','1BNI','1BNL','1BNS','1BNZ','1BOY','1BP2','1BPI','1BPL','1BPR','1BPT','1BRF','1BRG','1BRH','1BRI','1BRJ','1BRK','1BSA','1BSB','1BSC','1BSD','1BSE','1BSR','1BTA','1BTI','1BTM','1BUJ','1BVC','1BVU','1BZO','1C0L','1C17','1C2R','1C52','1C53','1C5G','1C6P','1C9O','1CAH','1CBW','1CDC','1CEA','1CEY','1CHK','1CHO','1CHP','1CLW','1CM7','1CMB','1CMS','1COA','1COK','1COL','1CPM','1CSP','1CTS','1CUN','1CUS','1CVW','1CX1','1CX8','1CYC','1CYO','1D0X','1D1G','1DAQ','1DDN','1DE3','1DEC','1DEQ','1DFO','1DFX','1DHN','1DIL','1DIV','1DJU','1DKG','1DKT','1DLC','1DM0','1DO9','1DPM','1DTD','1DTO','1DVC','1DVF','1DVV','1DXX','1DYA','1DYB','1DYC','1DYD','1DYE','1DYF','1DYG','1DYJ','1E21','1E6K','1E6L','1E6M','1E6N','1EDH','1EFC','1EG1','1EHK','1EKG','1EL1','1ELV','1EMV','1EQ1','1ERU','1ESF','1ETE','1EVQ','1EW4','1EXG','1EZA','1F88','1FAJ','1FAN','1FC1','1FEP','1FGA','1FKB','1FKJ','1FLV','1FMK','1FMM','1FNF','1FR2','1FRD','1FTG','1FTT','1FXA','1G6N','1G6V','1G6W','1GA0','1GAD','1GAL','1GAY','1GAZ','1GB0','1GB2','1GB3','1GB7','1GBX','1GD1','1GF8','1GF9','1GFA','1GFE','1GFG','1GFH','1GFJ','1GFK','1GFL','1GFR','1GFT','1GFU','1GFV','1GKG','1GLH','1GLM','1GOB','1GPC','1GQ2','1GRL','1GRX','1GSD','1GTM','1GTX','1GUY','1GXE','1H09','1H0C','1H2I','1H7M','1H8V','1HA4','1HCD','1HEM','1HEN','1HEO','1HEP','1HEQ','1HER','1HEV','1HFY','1HFZ','1HGH','1HGU','1HIB','1HIC','1HIO','1HIX','1HK0','1HME','1HML','1HNG','1HNL','1HOR','1HQK','1HTI','1HUE','1HXN','1HYN','1HYW','1HZ6','1I4N','1I5T','1IAR','1IC2','1IDS','1IFB','1IFC','1IGS','1IGV','1IHB','1IMQ','1INQ','1INU','1IO2','1IOB','1IOF','1IOJ','1IR3','1IRL','1IRO','1ISK','1IX0','1J0X','1J4S','1J7N','1JAE','1JBK','1JHN','1JIW','1JJI','1JKB','1JNK','1JTD','1JTG','1JTK','1K23','1K3B','1K40','1K9Q','1KA6','1KBP','1KDN','1KDU','1KDX','1KEV','1KFD','1KFW','1KJ1','1KKJ','1KTQ','1KUM','1KVA','1KVB','1KVC','1L00','1L02','1L03','1L04','1L05','1L06','1L07','1L08','1L09','1L10','1L11','1L12','1L13','1L14','1L15','1L16','1L17','1L18','1L19','1L20','1L21','1L22','1L23','1L24','1L33','1L34','1L36','1L37','1L38','1L40','1L41','1L42','1L43','1L44','1L45','1L46','1L47','1L48','1L49','1L50','1L51','1L52','1L53','1L54','1L55','1L56','1L57','1L59','1L60','1L61','1L62','1L63','1L65','1L66','1L67','1L68','1L69','1L70','1L71','1L72','1L73','1L74','1L75','1L76','1L77','1L85','1L86','1L87','1L88','1L89','1L90','1L91','1L92','1L93','1L94','1L95','1L96','1L97','1L98','1L99','1LAV','1LAW','1LBI','1LFO','1LHH','1LHI','1LHJ','1LHK','1LHL','1LHM','1LHP','1LLI','1LMB','1LOZ','1LPS','1LRA','1LRE','1LRP','1LS4','1LSN','1LUC','1LVE','1LYE','1LYF','1LYG','1LYH','1LYI','1LYJ','1LZ1','1M7T','1MAX','1MBD','1MBG','1MCP','1MGR','1MJC','1MLD','1MSI','1MUL','1MX2','1MX4','1MX6','1MYK','1MYL','1N02','1N0J','1NAG','1NM1','1NZI','1OA2','1OA3','1OCC','1OH0','1OIA','1OKI','1OLR','1OMU','1ONC','1OPD','1ORC','1OSA','1OSI','1OTR','1OUA','1OUB','1OUC','1OUD','1OUE','1OUF','1OUG','1OUH','1OUI','1OUJ','1OVA','1P2M','1P2N','1P2O','1P2P','1P2Q','1P3J','1PAH','1PBA','1PCA','1PDO','1PGA','1PHP','1PII','1PIN','1PK2','1PMC','1POH','1PPI','1PPN','1PPP','1PQN','1PRE','1PRR','1Q5Y','1QEZ','1QGV','1QHE','1QJP','1QK1','1QLP','1QLX','1QM4','1QND','1QQR','1QQV','1QT6','1QT7','1QU0','1QU7','1QUW','1R2R','1RBN','1RBP','1RBR','1RBT','1RBU','1RBV','1RCB','1RDA','1RDB','1RDC','1REX','1RGC','1RGG','1RH1','1RHD','1RHG','1RIL','1RIS','1RN1','1ROP','1RRO','1RTB','1RTP','1RX4','1S0W','1SAK','1SAP','1SCE','1SEE','1SFP','1SHF','1SHG','1SHK','1SMD','1SPD','1SPH','1SSO','1STF','1STN','1SUP','1SYC','1SYD','1SYE','1SYG','1T3A','1T7C','1T8L','1T8M','1T8N','1T8O','1TBR','1TCA','1TCY','1TEN','1TFE','1TGN','1THQ','1TI5','1TIN','1TIT','1TLA','1TML','1TMY','1TOF','1TPE','1TPK','1TTG','1TUP','1TUR','1U5P','1UBQ','1UCU','1UOX','1URK','1UW3','1UWO','1UZC','1V6S','1VAR','1VFB','1VIE','1VQA','1VQB','1VQC','1VQD','1VQE','1VQF','1VQG','1VQH','1VQI','1VQJ','1W3D','1W4E','1W4H','1W99','1WIT','1WLG','1WPW','1WQ5','1WQM','1WQN','1WQO','1WQP','1WQQ','1WQR','1WRP','1WSY','1XAS','1XY1','1Y4Y','1Y51','1YAL','1YAM','1YAN','1YAO','1YAP','1YAQ','1YCC','1YEA','1YGV','1YHB','1YMB','1YNR','1YPA','1YPB','1YPC','1YPI','1Z1I','1ZNJ','200L','206L','216L','217L','219L','221L','224L','227L','230L','232L','233L','235L','236L','237L','238L','239L','240L','241L','242L','243L','244L','246L','247L','253L','254L','255L','2A01','2A36','2ABD','2AC0','2ACE','2ACY','2ADA','2AFG','2AIT','2AKY','2ASI','2ATC','2B4Z','2BBM','2BQA','2BQB','2BQC','2BQD','2BQE','2BQF','2BQG','2BQH','2BQI','2BQJ','2BQK','2BQM','2BQN','2BQO','2BRD','2CBR','2CHF','2CI2','2CPP','2CRK','2CRO','2DQJ','2DRI','2EQL','2FAL','2FHA','2FX5','2G3P','2GA5','2GSR','2GZI','2HEA','2HEB','2HEC','2HED','2HEE','2HEF','2HIP','2HMB','2HPR','2IFB','2IMM','2L3Y','2L78','2LZM','2MBP','2MLT','2NUL','2OCJ','2PDD','2PEC','2PEL','2PRD','2Q98','2RBI','2RN2','2RN4','2SNM','2SOD','2TMA','2TRT','2TRX','2TS1','2WSY','2ZAJ','2ZTA','3BCI','3BCK','3BD2','3BLS','3CHY','3D2A','3ECA','3FIS','3HHR','3MBP','3PGK','3PRO','3PSG','3SSI','3TIM','3VUB','451C','487D','4BLM','4CPA','4GCR','4LYZ','4SGB','4TLN','4TMS','5AZU','5CPV','5CRO','5MDH','5PEP','6TAA','7AHL','7PTI','8PTI','8TIM','9INS','9PCY',]
    for no_xml_case in ['1GTX', '1SEE', '1UOX', '1WSY', '1YGV', '2MBP']:
        ddG_pdb_ids.remove(no_xml_case)
    for bad_sifts_mapping_case in ['1N02', '487D']:
        ddG_pdb_ids.remove(bad_sifts_mapping_case)
    for no_pdb_uniprot_mapping_case in ['2IMM']:
        ddG_pdb_ids.remove(no_pdb_uniprot_mapping_case)

    ddG_pdb_ids = ['1GTX', '1SEE', '1UOX', '1WSY', '1YGV', '2MBP']
    ddG_pdb_ids = ['1N02', '487D'] + ['2IMM']

    count = 1
    num_cases = len(ddG_pdb_ids)
    for pdb_id in ddG_pdb_ids:
        try:
            print('Case %d/%d: %s' % (count, num_cases, pdb_id))
            sifts_map = SIFTS.retrieve(pdb_id, cache_dir = cache_dir, acceptable_sequence_percentage_match = 80.0)
        except MissingSIFTSRecord:
            colortext.warning('No SIFTS XML exists for %s.' % pdb_id)
        except BadSIFTSMapping:
            colortext.warning('The SIFTS mapping for %s was considered a bad mapping at the time of writing.' % pdb_id)
        except NoSIFTSPDBUniParcMapping:
            colortext.warning('The SIFTS file for %s does not map to UniParc sequences at the time of writing.' % pdb_id)
        except Exception, e:
            colortext.warning(str(e))
            colortext.error(traceback.format_exc())
            failures.append(pdb_id)
        count += 1
コード例 #2
0
def _get_ResultSetFilter_data():
    s_module = "ddglib.ddgfilters"
    #clsmembers = inspect.getmembers(sys.modules[s_module], lambda member: member.__module__ == s_module and inspect.isclass)
    #clsmembers = inspect.getmembers(sys.modules[s_module], lambda member: member.inspect.isclass(member))

    m_filters = []
    m_resultsets = []
    d_filters = {}

    s_module = "ddglib.ddgfilters"
    for m in inspect.getmembers(sys.modules[s_module]):
        o = m[1]
        if inspect.isclass(o) and o.__module__ == s_module:
            classnm = m[0]
            if classnm.find("Filter") != -1:
                d = {"name": classnm, "class": o}
                m_filters.append(d)
                d_filters[classnm] = d
            elif classnm.find("ResultSet") != -1:
                e_Filter = "%sFilter" % classnm[:classnm.find("ResultSet")]
                e_Filter = o.allowed_filters
                #"%sFilter" % classnm[:classnm.find("ResultSet")]
                m_resultsets.append({
                    "name": classnm,
                    "class": o,
                    "filter": e_Filter
                })
            else:
                colortext.error("Unknown class '%s' found." % classnm)
    return m_filters, m_resultsets, d_filters
コード例 #3
0
ファイル: fasta.py プロジェクト: Kortemme-Lab/klab
 def match(self, other):
     ''' This is a noisy terminal-printing function at present since there is no need to make it a proper API function.'''
     colortext.message("FASTA Match")
     for frompdbID, fromchains in sorted(self.iteritems()):
         matched_pdbs = {}
         matched_chains = {}
         for fromchain, fromsequence in fromchains.iteritems():
             for topdbID, tochains in other.iteritems():
                 for tochain, tosequence in tochains.iteritems():
                     if fromsequence == tosequence:
                         matched_pdbs[topdbID] = matched_pdbs.get(topdbID, set())
                         matched_pdbs[topdbID].add(fromchain)
                         matched_chains[fromchain] = matched_chains.get(fromchain, [])
                         matched_chains[fromchain].append((topdbID, tochain))
         foundmatches = []
         colortext.printf("  %s" % frompdbID, color="silver")
         for mpdbID, mchains in matched_pdbs.iteritems():
             if mchains == set(fromchains.keys()):
                 foundmatches.append(mpdbID)
                 colortext.printf("  PDB %s matched PDB %s on all chains" % (mpdbID, frompdbID), color="white")
         if foundmatches:
             for fromchain, fromsequence in fromchains.iteritems():
                 colortext.printf("    %s" % (fromchain), color = "silver")
                 colortext.printf("      %s" % (fromsequence), color = self.unique_sequences[fromsequence])
                 mstr = []
                 for mchain in matched_chains[fromchain]:
                     if mchain[0] in foundmatches:
                         mstr.append("%s chain %s" % (mchain[0], mchain[1]))
                 colortext.printf("	  Matches: %s" % ", ".join(mstr))
         else:
             colortext.error("    No matches found.")
コード例 #4
0
ファイル: help.py プロジェクト: Kortemme-Lab/kddg
def _get_ResultSetFilter_data():
	s_module = "ddglib.ddgfilters"
	#clsmembers = inspect.getmembers(sys.modules[s_module], lambda member: member.__module__ == s_module and inspect.isclass)
	#clsmembers = inspect.getmembers(sys.modules[s_module], lambda member: member.inspect.isclass(member))
	
	m_filters = []
	m_resultsets = []
	d_filters = {}
	
	s_module = "ddglib.ddgfilters"
	for m in inspect.getmembers(sys.modules[s_module]):
		o = m[1]
		if inspect.isclass(o) and o.__module__ == s_module:
			classnm = m[0]
			if classnm.find("Filter") != -1:
				d = {"name" : classnm, "class" : o}
				m_filters.append(d)
				d_filters[classnm] = d 
			elif classnm.find("ResultSet") != -1:
				e_Filter = "%sFilter" % classnm[:classnm.find("ResultSet")]
				e_Filter = o.allowed_filters
				#"%sFilter" % classnm[:classnm.find("ResultSet")]
				m_resultsets.append({"name" : classnm, "class" : o, "filter" : e_Filter})
			else:
				colortext.error("Unknown class '%s' found." % classnm)
	return m_filters, m_resultsets, d_filters
コード例 #5
0
 def __init__(self, user, host, db, passwd, port = 3306, socket = '/var/lib/mysql/mysql.sock'):
     try:
         self.db_interface = DatabaseInterface({}, isInnoDB=True, numTries=1, host=host, db=db, user=user, passwd=passwd, port=3306,
                  unix_socket=socket, passwdfile=None, use_utf=False, use_locking=True)
     except Exception, e:
         colortext.error('An exception was thrown trying to connect to the database.')
         colortext.warning(str(e))
         print(traceback.format_exc())
         sys.exit(1)
コード例 #6
0
ファイル: analysis.py プロジェクト: Kortemme-Lab/kddg
    def plot(self, table_name, RFunction, output_filename = None, filetype = "pdf"):
        '''Results is expect to be a list of dicts each of which has the keys ExperimentID and ddG.'''
        if (not self.analysis_tables) or (not table_name):
            raise Exception("There are no analysis tables to plot.")
        if not table_name in self.analysis_tables.keys():
            raise Exception("The analysis table '%s' does not exist." % table_name)

        R_return_values = {}
        gplot = None
        analysis_table = self.analysis_tables[table_name]
        if self.quiet_level >= 3:
            print(table_name)
            print(RFunction)
        if len(analysis_table.points) == 1:
            raise Exception("The analysis table %s set only has one data point. At least two points are required." % table_name)
        else:
            inputfname = self.CreateCSVFile(table_name)
            if self.quiet_level >= 3:
                print(inputfname)
            try:
                if self.quiet_level >= 2:
                    colortext.printf("Running %s." % RFunction)
                    if output_filename:
                        colortext.printf("Saving graph as %s with filename %s." % (filetype, output_filename))

                output_fname = output_filename
                if not output_fname:
                    output_fname = rosettahelper.writeTempFile(".", "")

                R_output = RFunction(inputfname, output_fname, filetype)
                R_return_values = RUtilities.parse_R_output(R_output)

                colortext.message(table_name)
                print("  %s" % str(RFunction))
                for k, v in sorted(R_return_values.iteritems()):
                    print("  %s: %s" % (str(k), str(v)))

                if not output_filename:
                    contents = rosettahelper.readBinaryFile(output_fname)
                    delete_file(output_fname)
                    description = None
                    for file_suffix, details in RFunctions.iteritems():
                        if details[1] == RFunction:
                            description = details[0]
                    assert(description)
                    gplot = AnalysisObject(table_name, description, filetype, contents)
                else:
                    gplot = output_filename

            except Exception, e:
                import traceback
                colortext.error(traceback.format_exc())
                delete_file(inputfname)
                raise Exception(e)
            delete_file(inputfname)
コード例 #7
0
def fix_1AYE_InputFiles(prediction_set):
    '''This is a once-off function which should only be run once per prediction set as each run changes the mutfile and this change should only occur once.'''
    import pickle
    ddGdb = ddgdbapi.ddGDatabase()

    BadPredictions = sorted(set([(r['PredictionID'], r['Status']) for r in ddGdb.execute_select('''
    SELECT Prediction.ID AS PredictionID, Status FROM Prediction INNER JOIN UserDataSetExperiment ON UserDataSetExperiment.ID=Prediction.UserDataSetExperimentID WHERE PredictionSet=%s AND PDBFileID='1AYE'
    ''', parameters=(prediction_set,))]))
    BadPredictionIDs = sorted(set([r[0] for r in BadPredictions]))
    print(BadPredictions)
    num_active = len([r for r in BadPredictions if r[1] == 'active'])
    num_queued = len([r for r in BadPredictions if r[1] == 'queued'])
    statuses = sorted(set([r[1] for r in BadPredictions]))
    if ('active' in statuses) or ('queued' in statuses):
        colortext.error("Cannot proceed - there are %d active jobs and %d queued in the list that need to be fixed up. Stop the DDG scheduler, remove the queued constraint, and rerun this function. " % (num_active, num_queued))
        if num_active:
            print("%d active jobs: %s" % (num_active, ", ".join([str(r[0]) for r in BadPredictions if r[1] == 'active'])))
        if num_queued:
            print("%d queued jobs: %s" % (num_queued, ", ".join([str(r[0]) for r in BadPredictions if r[1] == 'queued'])))
        return

    for PredictionID in BadPredictionIDs:
        r = ddGdb.execute_select("SELECT InputFiles FROM Prediction WHERE ID=%s", parameters=(PredictionID,))
        assert(len(r) == 1)
        r = r[0]

        InputFiles = pickle.loads(r['InputFiles'])
        assert(InputFiles.keys() == ['MUTFILE'])
        mutfile = InputFiles['MUTFILE']

        colortext.message("\n%d" % PredictionID)

        colortext.warning('original')
        print(mutfile)

        lines = mutfile.split("\n")
        assert(lines[0].startswith('total'))
        num_muts = int(lines[0][5:])
        assert(lines[1] == str(num_muts))
        for x in range(2, num_muts + 2):
            mutline = lines[x]
            tokens = mutline.split()
            tokens[1] = str(int(tokens[1]) - 1)
            lines[x] = " ".join(tokens)

        new_mutfile = "\n".join(lines)
        colortext.warning('fixed')
        print(new_mutfile)

        p = pickle.dumps({'MUTFILE' : new_mutfile})
コード例 #8
0
ファイル: blast.py プロジェクト: Kortemme-Lab/klab
def test_sequences(b, sequences):
    failed_cases = []
    c = 0
    for sequence in sequences:
        try:
            c += 1
            colortext.message('\n{0}/{1}: {2}'.format(c, len(sequences), sequence))
            hits = b.by_sequence(sequence)
            if hits:
                colortext.warning('{0} hits: {1}'.format(len(hits), ','.join(hits)))
            else:
                colortext.warning('No hits')
        except Exception, e:
            colortext.error('FAILED')
            failed_cases.append((sequence, str(e), traceback.format_exc()))
コード例 #9
0
ファイル: gcalendar.py プロジェクト: Kortemme-Lab/klab
    def updateEvents(self, calendar_id, newEvents):
        currentEvents = self.getEventsTable(calendar_id)

        #colortext.message(newEvents)
        #colortext.warning(currentEvents)

        # Events to remove
        toRemove = []
        for startdateTitle, event in sorted(currentEvents.iteritems()):
            if event["title"].find("birthday") != -1:
                # Don't remove birthdays
                continue
            if newEvents.get(startdateTitle):
                newEvent = newEvents[startdateTitle]
                if newEvent["enddate"] == event["enddate"]:
                    if event["location"].startswith(newEvent["location"]):
                        if str(newEvent["title"]) == str(event["title"]):
                            # Don't remove events which are in both newEvents and the calendar
                            continue

            # Remove events which are on the calendar but not in newEvents
            toRemove.append(startdateTitle)

        # Events to add
        toAdd = []
        for startdateTitle, event in sorted(newEvents.iteritems()):
            if currentEvents.get(startdateTitle):
                currentEvent = currentEvents[startdateTitle]
                if currentEvent["enddate"] == event["enddate"]:
                    if currentEvent["location"].startswith(event["location"]):
                        if str(currentEvent["title"]) == str(event["title"]):
                            # Don't add events which are in both newEvents and the calendar
                            continue
            # Add events which are in newEvents but not on the calendar
            toAdd.append(startdateTitle)

        if toRemove:
            colortext.error("Removing these %d events:" % len(toRemove))
            for dtTitle in toRemove:
                colortext.warning(dtTitle)
                self.removeEvent(calendar_id, currentEvents[dtTitle]["event"].id)

        if toAdd:
            colortext.message("Adding these %d events:" % len(toAdd))
            for dtTitle in toAdd:
                newEvent = newEvents[dtTitle]
                #print(dtTitle, newEvent)
                self.addNewEvent(calendar_id, dtTitle[0], newEvent["enddate"], newEvent["location"], newEvent["title"])
コード例 #10
0
ファイル: blast.py プロジェクト: Kortemme-Lab/klab
def test_pdb_files(b, pdb_ids):

    failed_cases = []
    c = 0
    for pdb_id in pdb_ids:
        try:
            c += 1
            colortext.message('\n{0}/{1}: {2}'.format(c, len(pdb_ids), pdb_id))
            hits = b.by_pdb(pdb_id)
            if hits:
                colortext.warning('{0} hits: {1}'.format(len(hits), ','.join(hits)))
            else:
                colortext.warning('No hits')
        except Exception, e:
            colortext.error('FAILED')
            failed_cases.append((pdb_id, str(e), traceback.format_exc()))
コード例 #11
0
ファイル: bonsai.py プロジェクト: Kortemme-Lab/klab
    def prune(self, arbitrary_atom_serial_numbers, sidechain_atom_serial_numbers = set(), atoms_serial_numbers_to_keep_in_cutting = set(), keep_CA_in_cutting = True, generate_pymol_session = True, bonsai_label = 'Bonsai', cutting_label = 'Cutting', pymol_executable = 'pymol'):
        '''Returns the content of two PDB files and (optionally) a PyMOL session and associated script.
           The first returned PDB file ("bonsai") is missing the ATOM (and any related ANISOU) and HETATM records identified by atom_serial_numbers.
           The second returned PDB file ("cutting") only contains ATOM, ANISOU, and HETATM records which are identified by atom_serial_numbers.
           Both PDB objects contain all records from the original PDB which are not ATOM, ANISOU, or HETATM records.

           If keep_CA_in_cutting is set, the cutting will also contain the associated Calpha atoms. This is useful purely
           to visualize the cutting in the PyMOL session. If a PyMOL session is not to be generated, this option should
           be set to False.
           '''
        bonsai = []
        cutting = []

        # Determine the set of sidechain residues in case keep_CA_in_cutting is True and we wish to keep those atoms in the cutting
        sidechain_residues = set()
        if keep_CA_in_cutting and sidechain_atom_serial_numbers:
            for line in self.indexed_lines:
                if line[0] == 'ATOM' and line[1] in sidechain_atom_serial_numbers:
                    residue_id = line[3].residue.id()
                    sidechain_residues.add(residue_id[0] + residue_id[1])

        atom_serial_numbers_to_remove = arbitrary_atom_serial_numbers.union(sidechain_atom_serial_numbers)
        for line in self.indexed_lines:
            if line[0]: # record type
                PDB_line = line[2]
                if line[1] in atom_serial_numbers_to_remove:
                    cutting.append(PDB_line)
                else:
                    if atoms_serial_numbers_to_keep_in_cutting and int(PDB_line[6:11]) in atoms_serial_numbers_to_keep_in_cutting:
                        cutting.append(PDB_line)
                    elif keep_CA_in_cutting and PDB_line[21:27] in sidechain_residues and PDB_line[12:16] == ' CA ':
                        cutting.append(PDB_line)
                    bonsai.append(PDB_line)
            else:
                bonsai.append(line[1])
                cutting.append(line[1])
        bonsai_pdb_content = '\n'.join(bonsai)
        cutting_pdb_content = '\n'.join(cutting)
        PSE_file, PSE_script = None, None
        try:
            PSE_file, PSE_script = self.generate_pymol_session(bonsai_pdb_content, cutting_pdb_content, bonsai_label = bonsai_label, cutting_label = cutting_label, pymol_executable = pymol_executable, settings = {})
        except Exception, e:
            colortext.error('Failed to generate the PyMOL session: "{0}"'.format(e))
コード例 #12
0
ファイル: schema.py プロジェクト: Kortemme-Lab/klab
    def _generate_schema_diagram(self, show_fk_only):

        tempfiles = []
        output_handle, sql_schema_filepath = open_temp_file('/tmp', ftype = 'w')
        tempfiles.append(sql_schema_filepath)
        try:
            #output_handle.write('%s\n\n' % self.db_schema)
            output_handle.write('%s\n\n' % self.sanitize_schema())#mysqldump_schema)
            output_handle.close()
        except:
            output_handle.close()

        try:
            png_handle, png_filepath = open_temp_file('/tmp', ftype = 'w')
            png_handle.close()
            tempfiles.append(png_filepath)

            c = [
                "sqlt-diagram",
                "-d=MySQL",
                "-i=png",
                "-t=%s database on %s" % (self.db, self.host),
                "-o=%s" % png_filepath,
                "--color",
                sql_schema_filepath,
                ]
            if show_fk_only:
                # Useful to print a smaller schema of just the primary/foreign keys
                c.append("--show-fk-only")

            p = subprocess.Popen(c, stdout=subprocess.PIPE)
            stdout, stderr = p.communicate()
            if not p.returncode == 0:
                if stderr:
                    raise colortext.Exception("Error - sqlt-diagram exited with %d: '%s'." % (p.returncode, stderr))
                else:
                    raise colortext.Exception("Error - sqlt-diagram exited with %d." % (p.returncode))

        except Exception, e:
            colortext.error('Failed!')
            print(str(e))
コード例 #13
0
ファイル: test.py プロジェクト: Kortemme-Lab/klab
def test_pdbml_speed():

    test_cases = [
        '1WSY',
        '1YGV',
        '487D',
        '1HIO',
        '1H38',
        '3ZKB',
    ]
    for test_case in test_cases:
        print("\n")

        colortext.message("Creating PDBML object for %s" % test_case)
        #PDBML.retrieve(test_case, cache_dir = cache_dir)

        print("")
        colortext.printf("Using the old minidom class", color = 'cyan')
        t1 = time.clock()
        p_minidom = PDBML_slow.retrieve(test_case, cache_dir = cache_dir)
        t2 = time.clock()
        colortext.message("Done in %0.2fs!" % (t2 - t1))

        print("")
        colortext.printf("Using the new sax class", color = 'cyan')
        t1 = time.clock()
        p_sax = PDBML.retrieve(test_case, cache_dir = cache_dir)
        t2 = time.clock()
        colortext.message("Done in %0.2fs!" % (t2 - t1))

        colortext.write("\nEquality test: ", color = 'cyan')
        try:
            assert(p_minidom.atom_to_seqres_sequence_maps.keys() == p_sax.atom_to_seqres_sequence_maps.keys())
            for c, s_1 in p_minidom.atom_to_seqres_sequence_maps.iteritems():
                s_2 = p_sax.atom_to_seqres_sequence_maps[c]
                assert(str(s_1) == str(s_2))
            colortext.message("passed\n")
        except:
            colortext.error("failed\n")
コード例 #14
0
ファイル: gcalendar.py プロジェクト: Kortemme-Lab/klab
 def removeEvent(self, calendar_id, event_id):
     for i in range(3):
         try:
             assert(self.service.events().get(calendarId = self.configured_calendar_ids[calendar_id], eventId = event_id).execute())
             self.service.events().delete(calendarId = self.configured_calendar_ids[calendar_id], eventId = event_id).execute()
             break
         except Exception, e:
             colortext.error("An error occurred:")
             colortext.error(e)
             colortext.error("Trying again.")
             time.sleep(2)
コード例 #15
0
ファイル: gcalendar.py プロジェクト: Kortemme-Lab/klab
    def addNewEvent(self, calendar_id, startdate, enddate, location, title):
        colortext.message("\nAdding %s on %s at %s" % (title, startdate, location))

        #start_time = startdate.strftime('%Y-%m-%dT%H:%M:%S').isoformat()
        #end_time =	 enddate.strftime('%Y-%m-%dT%H:%M:%S').isoformat()
        start_time = startdate.isoformat()
        end_time =	 enddate.isoformat()

        loc = location
        if loc.startswith("Tahoe"):
            loc = "%s, 10 minutes outside Truckee, CA @ 39.328455,-120.184078" % loc
        else:
            if location.startswith("BH "):
                loc = "%s, Byers Hall" % loc
            loc = "%s, removeEvent/Mission Bay, San Francisco, CA @ 37.767952,-122.392214" % loc

        for i in range(3):
            try:
                self.service.events().insert(
                    calendarId = self.configured_calendar_ids[calendar_id],
                    body = {
                        "start" : {
                            "timeZone" : self.timezone_string,
                            "dateTime" : start_time,
                        },
                        "end" : {
                            "timeZone" : self.timezone_string,
                            "dateTime" : end_time,
                        },
                        "location" : loc,
                        "summary" : title,
                        "description" : title
                    }).execute()
                break
            except Exception, e:
                colortext.error("An error occurred:")
                colortext.error(traceback.format_exc())
                colortext.error(e)
                colortext.error("Trying again.")
                time.sleep(2)
コード例 #16
0
ファイル: test.py プロジェクト: Kortemme-Lab/klab
def test_ddg_pdb_ids():

    # Test set - 845 PDB IDs. A small number required manual intervention but most are parsed and mapped automatically. 5 needed to use the SIFTS mappings.

    ddG_pdb_ids = ['107L','108L','109L','110L','111L','112L','113L','114L','115L','118L','119L','120L','122L','123L','125L','126L','127L','128L','129L','130L','131L','137L','149L','150L','151L','160L','161L','162L','163L','164L','165L','168L','169L','171L','172L','173L','190L','191L','192L','195L','196L','1A23','1A2I','1A2P','1A3Y','1A43','1A4Y','1A53','1A5E','1A70','1A7A','1A7H','1A7V','1AAL','1AAR','1AAZ','1ABE','1ACB','1ADO','1ADW','1AG2','1AG4','1AG6','1AIE','1AIN','1AJ3','1AJQ','1AKK','1AKM','1AM7','1AMQ','1ANF','1ANK','1ANT','1AO6','1AON','1AOZ','1APC','1APL','1APS','1AQH','1AR1','1ARR','1ATJ','1ATN','1AU1','1AUT','1AV1','1AVR','1AX1','1AXB','1AYE','1AYF','1AZP','1B0O','1B26','1B5M','1B8J','1BAH','1BAN','1BAO','1BCX','1BD8','1BET','1BF4','1BFM','1BGD','1BGL','1BJP','1BKE','1BKS','1BLC','1BMC','1BNI','1BNL','1BNS','1BNZ','1BOY','1BP2','1BPI','1BPL','1BPR','1BPT','1BRF','1BRG','1BRH','1BRI','1BRJ','1BRK','1BSA','1BSB','1BSC','1BSD','1BSE','1BSR','1BTA','1BTI','1BTM','1BUJ','1BVC','1BVU','1BZO','1C0L','1C17','1C2R','1C52','1C53','1C5G','1C6P','1C9O','1CAH','1CBW','1CDC','1CEA','1CEY','1CHK','1CHO','1CHP','1CLW','1CM7','1CMB','1CMS','1COA','1COK','1COL','1CPM','1CSP','1CTS','1CUN','1CUS','1CVW','1CX1','1CX8','1CYC','1CYO','1D0X','1D1G','1DAQ','1DDN','1DE3','1DEC','1DEQ','1DFO','1DFX','1DHN','1DIL','1DIV','1DJU','1DKG','1DKT','1DLC','1DM0','1DO9','1DPM','1DTD','1DTO','1DVC','1DVF','1DVV','1DXX','1DYA','1DYB','1DYC','1DYD','1DYE','1DYF','1DYG','1DYJ','1E21','1E6K','1E6L','1E6M','1E6N','1EDH','1EFC','1EG1','1EHK','1EKG','1EL1','1ELV','1EMV','1EQ1','1ERU','1ESF','1ETE','1EVQ','1EW4','1EXG','1EZA','1F88','1FAJ','1FAN','1FC1','1FEP','1FGA','1FKB','1FKJ','1FLV','1FMK','1FMM','1FNF','1FR2','1FRD','1FTG','1FTT','1FXA','1G6N','1G6V','1G6W','1GA0','1GAD','1GAL','1GAY','1GAZ','1GB0','1GB2','1GB3','1GB7','1GBX','1GD1','1GF8','1GF9','1GFA','1GFE','1GFG','1GFH','1GFJ','1GFK','1GFL','1GFR','1GFT','1GFU','1GFV','1GKG','1GLH','1GLM','1GOB','1GPC','1GQ2','1GRL','1GRX','1GSD','1GTM','1GTX','1GUY','1GXE','1H09','1H0C','1H2I','1H7M','1H8V','1HA4','1HCD','1HEM','1HEN','1HEO','1HEP','1HEQ','1HER','1HEV','1HFY','1HFZ','1HGH','1HGU','1HIB','1HIC','1HIO','1HIX','1HK0','1HME','1HML','1HNG','1HNL','1HOR','1HQK','1HTI','1HUE','1HXN','1HYN','1HYW','1HZ6','1I4N','1I5T','1IAR','1IC2','1IDS','1IFB','1IFC','1IGS','1IGV','1IHB','1IMQ','1INQ','1INU','1IO2','1IOB','1IOF','1IOJ','1IR3','1IRL','1IRO','1ISK','1IX0','1J0X','1J4S','1J7N','1JAE','1JBK','1JHN','1JIW','1JJI','1JKB','1JNK','1JTD','1JTG','1JTK','1K23','1K3B','1K40','1K9Q','1KA6','1KBP','1KDN','1KDU','1KDX','1KEV','1KFD','1KFW','1KJ1','1KKJ','1KTQ','1KUM','1KVA','1KVB','1KVC','1L00','1L02','1L03','1L04','1L05','1L06','1L07','1L08','1L09','1L10','1L11','1L12','1L13','1L14','1L15','1L16','1L17','1L18','1L19','1L20','1L21','1L22','1L23','1L24','1L33','1L34','1L36','1L37','1L38','1L40','1L41','1L42','1L43','1L44','1L45','1L46','1L47','1L48','1L49','1L50','1L51','1L52','1L53','1L54','1L55','1L56','1L57','1L59','1L60','1L61','1L62','1L63','1L65','1L66','1L67','1L68','1L69','1L70','1L71','1L72','1L73','1L74','1L75','1L76','1L77','1L85','1L86','1L87','1L88','1L89','1L90','1L91','1L92','1L93','1L94','1L95','1L96','1L97','1L98','1L99','1LAV','1LAW','1LBI','1LFO','1LHH','1LHI','1LHJ','1LHK','1LHL','1LHM','1LHP','1LLI','1LMB','1LOZ','1LPS','1LRA','1LRE','1LRP','1LS4','1LSN','1LUC','1LVE','1LYE','1LYF','1LYG','1LYH','1LYI','1LYJ','1LZ1','1M7T','1MAX','1MBD','1MBG','1MCP','1MGR','1MJC','1MLD','1MSI','1MUL','1MX2','1MX4','1MX6','1MYK','1MYL','1N02','1N0J','1NAG','1NM1','1NZI','1OA2','1OA3','1OCC','1OH0','1OIA','1OKI','1OLR','1OMU','1ONC','1OPD','1ORC','1OSA','1OSI','1OTR','1OUA','1OUB','1OUC','1OUD','1OUE','1OUF','1OUG','1OUH','1OUI','1OUJ','1OVA','1P2M','1P2N','1P2O','1P2P','1P2Q','1P3J','1PAH','1PBA','1PCA','1PDO','1PGA','1PHP','1PII','1PIN','1PK2','1PMC','1POH','1PPI','1PPN','1PPP','1PQN','1PRE','1PRR','1Q5Y','1QEZ','1QGV','1QHE','1QJP','1QK1','1QLP','1QLX','1QM4','1QND','1QQR','1QQV','1QT6','1QT7','1QU0','1QU7','1QUW','1R2R','1RBN','1RBP','1RBR','1RBT','1RBU','1RBV','1RCB','1RDA','1RDB','1RDC','1REX','1RGC','1RGG','1RH1','1RHD','1RHG','1RIL','1RIS','1RN1','1ROP','1RRO','1RTB','1RTP','1RX4','1S0W','1SAK','1SAP','1SCE','1SEE','1SFP','1SHF','1SHG','1SHK','1SMD','1SPD','1SPH','1SSO','1STF','1STN','1SUP','1SYC','1SYD','1SYE','1SYG','1T3A','1T7C','1T8L','1T8M','1T8N','1T8O','1TBR','1TCA','1TCY','1TEN','1TFE','1TGN','1THQ','1TI5','1TIN','1TIT','1TLA','1TML','1TMY','1TOF','1TPE','1TPK','1TTG','1TUP','1TUR','1U5P','1UBQ','1UCU','1UOX','1URK','1UW3','1UWO','1UZC','1V6S','1VAR','1VFB','1VIE','1VQA','1VQB','1VQC','1VQD','1VQE','1VQF','1VQG','1VQH','1VQI','1VQJ','1W3D','1W4E','1W4H','1W99','1WIT','1WLG','1WPW','1WQ5','1WQM','1WQN','1WQO','1WQP','1WQQ','1WQR','1WRP','1WSY','1XAS','1XY1','1Y4Y','1Y51','1YAL','1YAM','1YAN','1YAO','1YAP','1YAQ','1YCC','1YEA','1YGV','1YHB','1YMB','1YNR','1YPA','1YPB','1YPC','1YPI','1Z1I','1ZNJ','200L','206L','216L','217L','219L','221L','224L','227L','230L','232L','233L','235L','236L','237L','238L','239L','240L','241L','242L','243L','244L','246L','247L','253L','254L','255L','2A01','2A36','2ABD','2AC0','2ACE','2ACY','2ADA','2AFG','2AIT','2AKY','2ASI','2ATC','2B4Z','2BBM','2BQA','2BQB','2BQC','2BQD','2BQE','2BQF','2BQG','2BQH','2BQI','2BQJ','2BQK','2BQM','2BQN','2BQO','2BRD','2CBR','2CHF','2CI2','2CPP','2CRK','2CRO','2DQJ','2DRI','2EQL','2FAL','2FHA','2FX5','2G3P','2GA5','2GSR','2GZI','2HEA','2HEB','2HEC','2HED','2HEE','2HEF','2HIP','2HMB','2HPR','2IFB','2IMM','2L3Y','2L78','2LZM','2MBP','2MLT','2NUL','2OCJ','2PDD','2PEC','2PEL','2PRD','2Q98','2RBI','2RN2','2RN4','2SNM','2SOD','2TMA','2TRT','2TRX','2TS1','2WSY','2ZAJ','2ZTA','3BCI','3BCK','3BD2','3BLS','3CHY','3D2A','3ECA','3FIS','3HHR','3K0NA_lin','3K0NB_lin','3K0On_lin','3MBP','3PGK','3PRO','3PSG','3SSI','3TIM','3VUB','451C','487D','4BLM','4CPA','4GCR','4LYZ','4SGB','4TLN','4TMS','5AZU','5CPV','5CRO','5MDH','5PEP','6TAA','7AHL','7PTI','8PTI','8TIM','9INS','9PCY',]
    print(len(ddG_pdb_ids))
    fix_later = set([
        # SELECT * FROM `Experiment` WHERE `PDBFileID` IN ('1OLR')
        # SELECT * FROM `DataSetDDG` WHERE `PDBFileID` IN ('1OLR')
        # SELECT * FROM `UserDataSetExperiment` WHERE `PDBFileID` IN ('1OLR')
        # SELECT * FROM `UserAnalysisSet` WHERE `PDB_ID` IN ('1OLR')
        ])

    failed_cases = []

    specific_cut_offs = {
        '1AR1' : (78, 76, 73.00), # Chain C has a Clustal Omega match at 77%
        '1BF4' : (80, 77, 87.00), # Chain A has a Clustal Omega match at 79%
        '1MCP' : (100, 98, 50.00), # Chain H has a Clustal Omega match at 100% but only half the chain
        '2ZAJ' : (75, 72, 70.00), #
        '1CPM' : (73, 71, 70.00), #
    }

    to_be_hardcoded = {
        # Special case: 1N02. This needs to be handled manually.
        # DBREF  1N02 A    1     3  UNP    P81180   CVN_NOSEL        1      3
        # DBREF  1N02 A    4    49  UNP    P81180   CVN_NOSEL       54     992IMM
        # DBREF  1N02 A   50    54  UNP    P81180   CVN_NOSEL       49     53
        # DBREF  1N02 A   55    99  UNP    P81180   CVN_NOSEL        4     48
        # DBREF  1N02 A  100   101  UNP    P81180   CVN_NOSEL      100    101
        '1N02',
        ('2IMM'), # No PDB <-> UniProt mapping
    }
    test_these = [
        '1KJ1'
    ]

    colortext.message('Testing %d PDB files for the DDG database.' % len(ddG_pdb_ids))
    #start_x = 0
    start_x = 0

    for x in range(start_x, len(ddG_pdb_ids)):
        ddG_pdb_id = ddG_pdb_ids[x]
        if test_these and ddG_pdb_id not in test_these:
            continue
        if ddG_pdb_id not in fix_later:
            colortext.warning('Testing PDB file number %d: %s' % (x, ddG_pdb_id))
            starting_clustal_cut_off = 100
            min_clustal_cut_off = 71
            acceptable_sequence_percentage_match = 80.0
            if specific_cut_offs.get(ddG_pdb_id):
                starting_clustal_cut_off, min_clustal_cut_off, acceptable_sequence_percentage_match = specific_cut_offs[ddG_pdb_id]
            try:
                rr = ResidueRelatrix(ddG_pdb_id, rosetta_scripts_path, rosetta_database_path, starting_clustal_cut_off = starting_clustal_cut_off, min_clustal_cut_off = min_clustal_cut_off, acceptable_sequence_percentage_match = acceptable_sequence_percentage_match, cache_dir = '/home/oconchus/temp')

            except SpecificException:
                failed_cases.append((x, ddG_pdb_id, str(e)))
        else:
            colortext.warning('SKIPPING PDB file number %d: %s' % (x, ddG_pdb_id))

        if failed_cases:
            colortext.error('Failed cases:')
            fcc = 0
            for f in failed_cases:
                if fcc == 0:
                    colortext.warning(str(f))
                else:
                    colortext.printf(str(f), color = 'cyan')
                fcc = (fcc + 1) % 2


    print("failed_cases", failed_cases)
コード例 #17
0
def generate_JSON_dataset(dataset_ID, pdb_data, pub_data):

    record_data = {}

    #1LRP
    #1LMB

    # 1 JSON object per dataset record
    failure_count = 0
    records = ddGdb.execute_select('SELECT * FROM DataSetDDG WHERE DataSetID=%s', parameters=(dataset_ID,))
    colortext.warning('Starting with %d records.' % (len(records)))
    mutation_count = {1:0, 2:0, 3:0, 4:0, 5:0}
    for r in records:

        mutation_is_reversed = r['MutationIsReversed'] == 1
        d = dict(
            _DataSetDDGID = r['ID'],
            RecordID = r['RecordNumber'],
            AggregateType = r['AggregateType'],
            DDG = r['PublishedValue'],
            PDBFileID = r['PDBFileID'],
            DerivedMutation = mutation_is_reversed,
        )

        # Parse PDB
        if not(cached_pdbs.get(r['PDBFileID'])):
            cached_pdbs[r['PDBFileID']] = PDB(ddGdb.execute_select('SELECT Content FROM PDBFile WHERE ID=%s', parameters=(r['PDBFileID'],))[0]['Content'])

        # Store PDB data
        PDBResolution = None,
        PDBMethodOfDetermination = None,
        try:
            PDBResolution = cached_pdbs[r['PDBFileID']].get_resolution()
        except: pass
        try:
            PDBMethodOfDetermination = cached_pdbs[r['PDBFileID']].get_techniques()
        except: pass
        pdb_data[r['PDBFileID']] = dict(
            Resolution = PDBResolution,
            MethodOfDetermination = PDBMethodOfDetermination,
        )

        assay_DDGs = ddGdb.execute_select('''
            SELECT *
            FROM DataSetDDGSource
            INNER JOIN ExperimentAssayDDG ON DataSetDDGSource.ExperimentAssayID = ExperimentAssayDDG.ExperimentAssayID AND DataSetDDGSource.Type = ExperimentAssayDDG.Type
            INNER JOIN ExperimentAssay ON ExperimentAssayDDG.ExperimentAssayID = ExperimentAssay.ID
            WHERE DataSetDDGID=%s''', parameters=(r['ID'],))

        ExperimentID = set([a['ExperimentID'] for a in assay_DDGs])
        if len(ExperimentID) != 1:
            colortext.message('%d records passed' % len(record_data))
            # Cases where 1FLV and 1FTG need to be elided
            if sorted(ExperimentID) in ([113699, 113830], [113704, 113832], [113705, 113836]):
                ExperimentID = [sorted(ExperimentID)[0]]
            elif sorted(ExperimentID) in ([112149, 112591],):
                # ExperimentID is used below for mutation details but these agree in this case. 1LZ1, 2BQA
                ExperimentID = [sorted(ExperimentID)[0]]
            elif sorted(ExperimentID) in (
                    [112141, 112583L], [112136, 112578], [112137, 112579], [112142, 112584], [112139, 112581],
                    [112140, 112582], [112146, 112588], [112147, 112589], [112148, 112590]
                ):
                # ExperimentID is used below for mutation details but these agree in this case. 1REX, 2BQA
                ExperimentID = [sorted(ExperimentID)[0]]
            elif sorted(ExperimentID) in ([112227, 112323], [112288, 113039], [111587, 112379]):
                # ExperimentID is used below for mutation details but these agree in this case. 2LZM, 1L63
                ExperimentID = [sorted(ExperimentID)[0]]
            else:
                colortext.warning(
                    '\n'.join(['%(PDBFileID)s %(Chain)s %(WildTypeAA)s %(ResidueID)s %(MutantAA)s' % rii for rii in ddGdb.execute_select('''
                    SELECT * FROM `ExperimentMutation` INNER JOIN Experiment ON Experiment.ID=ExperimentID WHERE `ExperimentID` IN (%s)''' % ','.join(map(str, ExperimentID)))]))
                pprint.pprint(r)
                colortext.error(map(int, ExperimentID))
                #pprint.pprint(assay_DDGs)
                print(sorted(ExperimentID))
        assert(len(ExperimentID) == 1)
        ExperimentID = ExperimentID.pop()
        d['_ExperimentID'] = ExperimentID

        experimental_DDGs = []
        for a in assay_DDGs:
            experimental_DDGs.append(dict(
                DDG = a['Value'],
                DDGType = a['Type'],
                Publication = a['Publication'],
                LocationOfValueInPublication = a['LocationOfValueInPublication'],
                Temperature = a['Temperature'],
                pH= a['pH'],
            ))
            # Store Publication data
            pub_data[a['Publication']] = cached_publications[a['Publication']]
        d['ExperimentalDDGs'] = experimental_DDGs

        # Retrieve mutations
        mutation_records = ddGdb.execute_select('SELECT * FROM ExperimentMutation WHERE ExperimentID=%s ORDER BY ResidueID', parameters=(ExperimentID,))
        if dataset_ID == "AlaScan-GPK_2014/09/25":
            assert(len(mutation_records) == 1)

        mutations = []
        failed_check = False
        mutation_count[len(mutation_records)] += 1
        for mutation in mutation_records:
            mutation_d = {}
            #if ExperimentID == 109911:
            #    d['PDBFileID'] = '1WQ5' # Hack for one 1BKS case

            mutation_d['Chain'] = mutation['Chain']
            mutation_d['ResidueID'] = mutation['ResidueID']
            if mutation_is_reversed:
                mutation_d['MutantAA'] = mutation['WildTypeAA']
                mutation_d['WildTypeAA'] = mutation['MutantAA']
            else:
                mutation_d['WildTypeAA'] = mutation['WildTypeAA']
                mutation_d['MutantAA'] = mutation['MutantAA']

            if dataset_ID == "AlaScan-GPK_2014/09/25":
                if d['PDBFileID'] == '1LMB':
                    mutation_d['Chain'] = '3' # Hack for the PDB replacement 1LRP (3.2A) -> 1LMB (1.8A)
                if d['PDBFileID'] == '1U5P' and int(mutation_d['ResidueID']) < 1600:
                    mutation_d['ResidueID'] = str(int(mutation_d['ResidueID']) + 1762) # Hack for the PDB replacement 1AJ3, NMR -> 1U5P (2A)
            if dataset_ID == "Kellogg_10.1002/prot.22921_2010/12/03":
                if d['PDBFileID'] == '1U5P' and int(mutation_d['ResidueID']) < 1600:
                    mutation_d['ResidueID'] = str(int(mutation_d['ResidueID']) + 1762) # Hack for the PDB replacement 1AJ3, NMR -> 1U5P (2A)

            mutated_residue = ddGdb.execute_select('SELECT * FROM PDBResidue WHERE PDBFileID=%s AND Chain=%s AND ResidueID=%s', parameters=(d['PDBFileID'], mutation_d['Chain'], ResidueID2String(mutation_d['ResidueID'])))
            if len(mutated_residue) == 0:
                colortext.warning('Skipping Experiment #%d (%s) in %s due to missing residue %s.' % (ExperimentID, d['PDBFileID'], dataset_ID, mutation_d['ResidueID']))
                #print('SELECT * FROM PDBResidue WHERE PDBFileID=%s AND Chain=%s AND ResidueID=%s' % (d['PDBFileID'], mutation_d['Chain'], ResidueID2String(mutation_d['ResidueID'])))
                #pprint.pprint(d)
                #pprint.pprint(mutations)
                #pprint.pprint(mutation_d)
                #print(ExperimentID)
                #print(mutated_residue)
                #print(10*'*')
                #print('\n')
                failure_count += 1
                failed_check = True
                break
            assert(len(mutated_residue) == 1)

            mutated_residue = mutated_residue[0]
            mutation_d['DSSPExposure'] = mutated_residue['MonomericExposure']
            mutation_d['DSSPType'] = mutated_residue['MonomericDSSP']
            mutation_d['DSSPSimpleSSType'] = dssp_elision.get(mutation_d['DSSPType'])
            assert(mutation_d['DSSPType'] != None)
            assert(mutation_d['DSSPSimpleSSType'] != None)
            mutations.append(mutation_d)

        if failed_check:
            print('FAILED CHECK')
            continue
        d['Mutations'] = mutations

        if dataset_ID == "Potapov_10.1093/protein/gzp030_2009/09/01":
            key = '%s_%s_%s' % (d['PDBFileID'], '+'.join(['%s:%s:%s' % (mutation_d['Chain'], mutation_d['ResidueID'].strip(), mutation_d['MutantAA']) for mutation_d in mutations]), d['RecordID'])
        else:
            key = '%s_%s' % (d['PDBFileID'], '+'.join(['%s:%s:%s' % (mutation_d['Chain'], mutation_d['ResidueID'].strip(), mutation_d['MutantAA']) for mutation_d in mutations]))

        if record_data.get(key):
            colortext.warning('KEY EXISTS: %s' % key)
            print('Existing record: %s' % pprint.pformat(record_data[key]))
            print('New record: %s' % pprint.pformat(d))
            failure_count += 1
        record_data[key] = d

    colortext.message('Mutation count')
    colortext.warning(pprint.pformat(mutation_count))

    if failure_count > 0:
        colortext.error('Total length of dataset: %d. Failed on %d records.' % (len(record_data), failure_count))
    else:
        colortext.message('Total length of dataset: %d. ' % (len(record_data)))

    record_list = []
    for k, v in sorted(record_data.iteritems()):
        record_list.append(v)

    colortext.message('Adding dataset %s with %d records, %d PDB files, and %d references.' % (dataset_ID, len(record_list), len(pdb_data), len(pub_data)))
    JSON_datasets[dataset_ID]['data'] = record_list
コード例 #18
0
ファイル: pooja.py プロジェクト: jaaamessszzz/DDGBenchmarking
    #sys.path.insert(0, "../..")
    sys.path.insert(0, "../updatedb")
    #sys.path.insert(0, '/home/oconchus/dev/')
    #sys.path.insert(0, "/home/oconchus/dev/klab")
else:
    import klab

import klab.colortext as colortext
from ddglib.ppi_api import get_interface_with_config_file as get_ppi_interface_with_config_file


# Set up database connection
try:
    ppi_api = get_ppi_interface_with_config_file(host_config_name = 'kortemmelab')
except:
    colortext.error('Database connection failed.')
    raise
colortext.message('Connected to database.')


# Pick a scoring method
score_method_id = ppi_api.get_score_method_id('Rescore-Talaris2014', method_authors = 'kyle', method_type = 'ddg_monomer rescore')

# Get the best structures for prediction 23849
wild_type_complexes = ppi_api.get_top_x_scores(23849, score_method_id, 'WildTypeComplex', 3, component = 'total', order_by = 'ASC')
wild_type_filenames = []
for wtc in wild_type_complexes:
    wild_type_filenames.append([f for f in glob.glob('repacked_wt*_round_{0}.*'.format(wtc['StructureID']))][0])
print(wild_type_filenames)

コード例 #19
0
    def _create_intermediate_schema(self, tbl):
        code = (self.db_interface.execute("SHOW CREATE TABLE %s" % tbl))
        assert(len(code) == 1)
        schema = code[0]['Create Table']
        #colortext.message(tbl)

        #print(schema)

        #print(schema)
        fields = [f for f in map(string.strip, schema[schema.find('(') + 1:schema.find('PRIMARY KEY')].strip().split('\n')) if f.strip()]

        pk_fields = re.match('.*PRIMARY\s+KEY\s*[(](.*?)[)]\s*[,)].*', schema, re.DOTALL)
        assert(pk_fields)
        pk_fields = [s.strip() for s in pk_fields.group(1).replace('`', '').split(',') if s.strip()]


        #colortext.warning(fields)
        for f in fields:
            #print('')
            #colortext.message(f)
            if f.endswith(','):
                f = f[:-1]

            field_name = f.split()[0].replace('`', '')
            if f.split()[1].startswith('enum('):
                mtchs = re.match(".* (enum[(].*?[)])(.*)", f)
                assert(mtchs)
                #print('ENUM', mtchs.group(1))
                field_type = mtchs.group(1)
                remaining_description = mtchs.group(2)
            else:
                field_type = f.split()[1]
                remaining_description = (' '.join(f.split()[2:])).strip()

            unicode_collation_or_character_set = False
            if remaining_description.find('utf') != -1:
                unicode_collation_or_character_set = True

            not_null = False
            if remaining_description.find('NOT NULL') != -1:
                not_null = True
                remaining_description = remaining_description.replace('NOT NULL', '').strip()

            default = False
            default_type = None
            default_value = None
            if remaining_description.find('default CURRENT_TIMESTAMP') != -1:
                default_type = 'TIMESTAMP'
                default_value = None
                remaining_description = remaining_description.replace('default CURRENT_TIMESTAMP', '')
            elif remaining_description.find('default NULL') != -1:
                default_type = 'null'
                default_value = None
                remaining_description = remaining_description.replace('default NULL', '')
            elif remaining_description.find('default') != -1:
                mtchs = re.match(".*default '(.*?)'.*", remaining_description)
                if mtchs:
                    #print('mtchs', mtchs.group(1))
                    default_type = 'string'
                    default_value = mtchs.group(1)
                    remaining_description = remaining_description.replace("default '%s'" % default_value, "")
                else:
                    colortext.error('Unexpected default value string: "{0}".'.format(remaining_description))
                    pass
                    #mtchs = re.match(".*default (.*?)(\s.*)*$", remaining_description)
                    #if mtchs:
                    #    print('mtchs non-string', mtchs.group(1))
                    #    if mtchs.group(1) == 'NULL':
                    #        default_type = 'null'
                    #        default_value = None
                    #        remaining_description = remaining_description.replace('')

            comment = None
            mtchs = re.match(".*(COMMENT '.*?').*", remaining_description)
            if mtchs:
                comment = mtchs.group(1)
                remaining_description = remaining_description.replace(mtchs.group(1), "")

            remaining_description = remaining_description.strip()

            self.intermediate_schema[tbl] = self.intermediate_schema.get(tbl, [])
            self.intermediate_schema[tbl].append(IntermediateField(field_name, field_type, not_null = not_null, default_type = default_type, default_value = default_value, comment = comment, is_primary_key = field_name in pk_fields, unicode_collation_or_character_set = unicode_collation_or_character_set))

            #print('field_name : %s' % field_name)
            #print('field_type : %s' % field_type)
            #print('not_null : %s' % not_null)

            if default_type != None:
                pass
                #print('default: %s, %s' % (default_type, default_value))
            #print('comment : %s' % comment)
            if remaining_description:
                #colortext.error('remaining_description : %s' % remaining_description)
                pass
コード例 #20
0
ファイル: blast.py プロジェクト: Kortemme-Lab/klab
            colortext.message('\n{0}/{1}: {2}'.format(c, len(pdb_ids), pdb_id))
            hits = b.by_pdb(pdb_id)
            if hits:
                colortext.warning('{0} hits: {1}'.format(len(hits), ','.join(hits)))
            else:
                colortext.warning('No hits')
        except Exception, e:
            colortext.error('FAILED')
            failed_cases.append((pdb_id, str(e), traceback.format_exc()))

    if failed_cases:
        colortext.warning('*** These cases failed ***')
        for p in failed_cases:
            print('')
            colortext.pcyan(p[0])
            colortext.error(p[1])
            print(p[2])
        print('')


def test_sequences(b, sequences):
    failed_cases = []
    c = 0
    for sequence in sequences:
        try:
            c += 1
            colortext.message('\n{0}/{1}: {2}'.format(c, len(sequences), sequence))
            hits = b.by_sequence(sequence)
            if hits:
                colortext.warning('{0} hits: {1}'.format(len(hits), ','.join(hits)))
            else:
コード例 #21
0
ファイル: uniprot.py プロジェクト: Kortemme-Lab/klab
    def _parse_PDB_mapping(self):
        entry_tag = self.entry_tag
        mapping = {}
        dbReference_tags = [child for child in entry_tag.childNodes if child.nodeType == child.ELEMENT_NODE and child.tagName == 'dbReference']

        for t in dbReference_tags:
            db_type = t.getAttribute('type')
            assert(db_type)
            if db_type == 'PDB':
                pdb_id = t.getAttribute('id')
                assert(len(pdb_id) == 4)
                #print(pdb_id)
                method = None
                resolution = None
                chains = []
                for p in t.getElementsByTagName('property'):
                    if p.getAttribute('type') == 'method':
                        method = p.getAttribute('value')
                    elif p.getAttribute('type') == 'resolution':
                        resolution = float(p.getAttribute('value'))
                    elif p.getAttribute('type') == 'chains':
                        chains_groups = [x.strip() for x in p.getAttribute('value').split(",") if x.strip()]
                        for cg in chains_groups:
                            cg_tokens = cg.split("=")
                            assert(len(cg_tokens) == 2)

                            chain_ids = cg_tokens[0].strip().split("/")
                            for chain_id in chain_ids:
                                assert(len(chain_id) == 1)
                            #print(chain_id)

                            range = cg_tokens[1].strip().split("-")
                            assert(len(range) == 2)
                            starting_index = None
                            ending_index = None
                            try:
                                starting_index = int(range[0])
                                ending_index = int(range[1])
                            except:
                                mmkey = "/".join(sorted(chain_ids))
                                if missing_mapping_for_AC_PDB_chains.get(self.UniProtAC, {}).get(pdb_id, {}).get(mmkey):
                                    starting_index, ending_index = missing_mapping_for_AC_PDB_chains.get(self.UniProtAC, {}).get(pdb_id, {}).get(mmkey)
                                    if not self.silent:
                                        colortext.error("Fixing starting_index, ending_index to %d, %d for PDB chains %s." % (starting_index, ending_index, str(chain_ids)))
                                else:
                                    if not set(chain_ids) in broken_mapping_for_AC_PDB_chains.get(self.UniProtAC, {}).get(pdb_id, []):
                                        raise colortext.Exception("The starting index and ending index for %s, chains %s in UniProtKB AC entry %s is broken or missing. Fix the mapping or mark it as missing in uniprot_patches.py" % (pdb_id, ",".join(chain_ids), self.UniProtAC))
                                    continue

                            for chain_id in chain_ids:
                                assert(len(chain_id) == 1)
                                if fixed_mapping_for_AC_PDB_chains.get(self.UniProtAC, {}).get(pdb_id, {}).get(chain_id):
                                    fixed_chain_id = fixed_mapping_for_AC_PDB_chains.get(self.UniProtAC, {}).get(pdb_id, {}).get(chain_id)
                                    if not self.silent:
                                        colortext.error("Fixing PDB chain from %s to %s." % (chain_id, fixed_chain_id))
                                    chain_id = fixed_chain_id
                                chains.append((chain_id, starting_index, ending_index))

                    else:
                        raise Exception("Unhandled dbReference property tag type.")

                if not method:
                    temp_method = missing_AC_PDB_methods.get(self.UniProtAC, {}).get(pdb_id, [])
                    if temp_method:
                        method = temp_method[0]
                        if not self.silent:
                            colortext.error("Fixing method to %s for PDB %s." % (method, pdb_id))

                if not chains:
                    assert(pdb_id in broken_mapping_for_AC_PDB_chains.get(self.UniProtAC, {}))
                    continue

                if not method and chains:
                    raise colortext.Exception("Missing method and chains for %s in UniProtKB AC entry %s. Fix the mapping or mark it as missing in uniprot_patches.py" % (pdb_id, self.UniProtAC))

                if not method in UniProtACEntry.sampling_methods.keys():
                    raise colortext.Exception("Unknown method '%s' found in UniProtKB AC entry %s." % (method, self.UniProtAC))
                if method in ['X-ray'] and resolution: # resolution can be null e.g. in P00698 with 2A6U (POWDER DIFFRACTION)
                    assert(pdb_id not in mapping)
                    if pdb_id not in PDBs_marked_as_XRay_with_no_resolution:
                        assert(resolution)
                        mapping[pdb_id] = {'method' : method, 'resolution' : resolution, 'chains' : {}}
                        import pprint
                        for chain in chains:
                            #assert(chain[0] not in mapping[pdb_id]['chains']) # todo: I disabled this when calling get_common_PDB_IDs as it hit the assertion while looking up 1REW with 4N1D. Is this assertion necessary?
                            mapping[pdb_id]['chains'][chain[0]] = (chain[1], chain[2])

        if False:
            for pdb_id, details in sorted(mapping.iteritems()):
                if not self.silent:
                    colortext.message("%s, %s, %sA" % (str(pdb_id), str(details['method']), str(details['resolution'])))
                for chain, indices in sorted(details['chains'].iteritems()):
                    if not self.silent:
                        colortext.warning(" Chain %s: %s-%s" % (chain, str(indices[0]).rjust(5), str(indices[1]).ljust(5)))
コード例 #22
0
            average_time_taken = float(total_time_in_secs)/float(cases_computed or 1)
            estimate_remaining_time = number_of_cases_left * average_time_taken

            t.stop()
            colortext.printf("**Profile**", 'orange')
            print(t)
            colortext.message("Time taken for this case: %0.2fs." % t.sum())
            colortext.message("Average time taken per case: %0.2fs." % average_time_taken)
            colortext.message("Estimated time remaining: %dh%dm%ds." % (int(estimate_remaining_time/3600), int((estimate_remaining_time/60) % 60), estimate_remaining_time % 60))
            print("\n")

    #exF.close()
    colortext.printf("\nDone.", 'lightgreen')

    if failed_cases:
        colortext.error("Failed cases:\n[%s]" % ",".join(map(str, failed_cases)))

#main(FixedIDs = [38766, 39738, 40379, 40381] + range(40610, 40611))
#main(FixedIDs = [39044])
#main(FixedIDs = [48898,49870,50948,51058,51059,52247,53633,53711])

convert_scores_to_json()
print('here')
FixedIDs = [76633]
FixedIDs = []
main(FixedIDs = FixedIDs, radii = [8.0])




コード例 #23
0
def check_failures(prediction_set):
    ddGdb = ddgdbapi.ddGDatabase()
    results_root = '/kortemmelab/shared/DDG/jobs'

    UserDataSetExperimentIDs = {}
    results = ddGdb.execute_select('''SELECT ID, ExperimentID FROM Prediction WHERE PredictionSet = %s AND STATUS = 'failed' ''', parameters=(prediction_set,))
    reported_failures = [r['ID'] for r in results]
    for r in results:
        UserDataSetExperimentIDs[r['ID']] = r['ExperimentID']
    print(UserDataSetExperimentIDs)
    print(len(UserDataSetExperimentIDs))

    affected_subsets = {}
    actually_failed = []
    did_not_fail = []
    did_not_fail_but_have_many_residues = []
    did_not_fail_but_has_a_troublesome_structure = []
    large_proteins = set(['1FEP', '1W99'])
    troublesome_structures = set(['2IFB', '1FMK'])

    for PredictionID in reported_failures:
        print(PredictionID)
        zipfile_path = os.path.join(results_root, '%d.zip' % PredictionID)
        #try:
        z = zipfile.ZipFile(zipfile_path, 'r')
        #except:
        #    colortext.error('MISSING FILE FOR %d' % PredictionID)
        #    did_not_fail.append(PredictionID)
        #    continue
        file_list = z.namelist()

        found_stdout = 0
        found_stderr = 0
        for f in file_list:
            if f.find('.cmd.o') != -1:
                found_stdout = 1
            elif f.find('.cmd.e') != -1:
                found_stderr = 1
                colortext.error(f)
        assert(found_stdout >= found_stderr)

        if found_stderr:
            assert(found_stderr == 1)
            colortext.error("Job #%d actually failed" % PredictionID)
            actually_failed.append(PredictionID)

            ExperimentID = UserDataSetExperimentIDs[PredictionID]
            sub_results = ddGdb.execute_select('''SELECT Subset FROM UserAnalysisSet WHERE ExperimentID = %s''', parameters=(ExperimentID,))
            for sr in sub_results:
                print(sr['Subset'])

                affected_subsets[sr['Subset']] = affected_subsets.get(sr['Subset'], [])
                affected_subsets[sr['Subset']].append(PredictionID)



        else:
            PDBFileID = ddGdb.execute_select("SELECT UserDataSetExperiment.PDBFileID AS PDBFileID FROM Prediction INNER JOIN UserDataSetExperiment ON UserDataSetExperimentID=UserDataSetExperiment.ID WHERE Prediction.ID=%s", parameters=(PredictionID,))[0]['PDBFileID']
            if PDBFileID in troublesome_structures:
                colortext.warning("Job #%d had not failed by the time it was terminated however it has a troublesome structure (%s)." % (PredictionID, PDBFileID))
                did_not_fail_but_has_a_troublesome_structure.append(PredictionID)
            elif PDBFileID in large_proteins:
                colortext.warning("Job #%d had not failed by the time it was terminated however it has many residues (%s)." % (PredictionID, PDBFileID))
                did_not_fail_but_have_many_residues.append(PredictionID)
            else:
                colortext.warning("Job #%d had not failed by the time it was terminated." % PredictionID)
                did_not_fail.append(PredictionID)


    colortext.message("*** Report ***")
    print('%d jobs were marked as failed.' % len(reported_failures))
    colortext.warning('%d jobs were marked as failed but had not failed.' % len(did_not_fail))
    colortext.warning('%d jobs were marked as failed and had not failed but had large chains.' % len(did_not_fail_but_have_many_residues))
    colortext.warning('%d jobs were marked as failed and had not failed but have a troublesome structure.' % len(did_not_fail_but_has_a_troublesome_structure))
    colortext.error('%d jobs were marked as failed and did fail.\n' % len(actually_failed))

    if affected_subsets:
        print("The following subsets were affected")
        for k, v in affected_subsets.iteritems():
            print("%s: %d records" % (k, len(v)))

    if did_not_fail:
        restart_jobs = ask_yes_no("Do you want to restart the jobs that did not actually fail?", default_value=False)
        if restart_jobs:
            for j in did_not_fail:
                r = ddGdb.execute("SELECT Status, AdminCommand FROM Prediction WHERE ID=%s", parameters=(j,))
                assert(len(r) == 1)
                if r[0]['Status'] != 'failed' or r[0]['AdminCommand'] != 'restart':
                    ddGdb.execute("UPDATE Prediction SET AdminCommand='restart' WHERE ID=%s", parameters=(j,))

    if did_not_fail_but_have_many_residues:
        restart_jobs = ask_yes_no("Do you want to restart the jobs that did not actually fail but had large proteins?", default_value=False)
        if restart_jobs:
            for j in did_not_fail_but_have_many_residues:
                r = ddGdb.execute("SELECT Status, AdminCommand FROM Prediction WHERE ID=%s", parameters=(j,))
                assert(len(r) == 1)
                if r[0]['Status'] != 'failed' or r[0]['AdminCommand'] != 'restart':
                    ddGdb.execute("UPDATE Prediction SET AdminCommand='restart' WHERE ID=%s", parameters=(j,))

    if did_not_fail_but_has_a_troublesome_structure:
        restart_jobs = ask_yes_no("Do you want to restart the jobs that did not actually fail but had troublesome structures?", default_value=False)
        if restart_jobs:
            for j in did_not_fail_but_has_a_troublesome_structure:
                r = ddGdb.execute("SELECT Status, AdminCommand FROM Prediction WHERE ID=%s", parameters=(j,))
                assert(len(r) == 1)
                if r[0]['Status'] != 'failed' or r[0]['AdminCommand'] != 'restart':
                    ddGdb.execute("UPDATE Prediction SET AdminCommand='restart' WHERE ID=%s", parameters=(j,))

    for k, v in affected_subsets.iteritems():
        see_errors = ask_yes_no("Do you want to see the stderr files for the jobs that did fail and affected %s?" % k, default_value=False)
        if see_errors:
            count = 1
            for j in v:
                zipfile_path = os.path.join(results_root, '%d.zip' % j)
                z = zipfile.ZipFile(zipfile_path, 'r')
                file_list = z.namelist()

                colortext.error("\n[%d/%d] Prediction ID: %d" % (count, len(v), j))
                PDBFileID = ddGdb.execute_select("SELECT UserDataSetExperiment.PDBFileID AS PDBFileID FROM Prediction INNER JOIN UserDataSetExperiment ON UserDataSetExperimentID=UserDataSetExperiment.ID WHERE Prediction.ID=%s", parameters=(j,))
                assert(len(PDBFileID) == 1)
                PDBFileID = PDBFileID[0]['PDBFileID']
                colortext.error("\nPDB ID: %s" % PDBFileID)
                for f in file_list:
                    if f.find('.cmd.e') != -1:
                        colortext.warning(f)
                        print(z.open(f, 'r').read()[:300])
                        print("")
                count += 1
コード例 #24
0
def classify_failures(prediction_set):
    ddGdb = ddgdbapi.ddGDatabase()
    results_root = '/kortemmelab/shared/DDG/jobs'

    UserDataSetExperimentIDs = {}
    results = ddGdb.execute_select('''SELECT ID, ExperimentID FROM Prediction WHERE PredictionSet = %s AND STATUS = 'failed' ''', parameters=(prediction_set,))
    reported_failures = [r['ID'] for r in results]
    for r in results:
        UserDataSetExperimentIDs[r['ID']] = r['ExperimentID']

    actually_failed = []
    did_not_fail = []
    for PredictionID in reported_failures:
        zipfile_path = os.path.join(results_root, '%d.zip' % PredictionID)
        #try:
        z = zipfile.ZipFile(zipfile_path, 'r')
        #except:
        #    colortext.error('MISSING FILE FOR %d' % PredictionID)
        #    continue
        file_list = z.namelist()

        found_stdout = 0
        found_stderr = 0
        for f in file_list:
            if f.find('.cmd.o') != -1:
                found_stdout = 1
            elif f.find('.cmd.e') != -1:
                found_stderr = 1
        assert(found_stdout >= found_stderr)

        if found_stderr:
            assert(found_stderr == 1)
            colortext.error("Job #%d actually failed" % PredictionID)
            actually_failed.append(PredictionID)
        else:
            colortext.warning("Job #%d had not failed by the time it was terminated." % PredictionID)
            did_not_fail.append(PredictionID)

    colortext.message("*** Report ***")
    print('%d jobs were marked as failed.' % len(reported_failures))
    colortext.warning('%d jobs were marked as failed but had not failed.' % len(did_not_fail))
    colortext.error('%d jobs were marked as failed and did fail.\n' % len(actually_failed))

    pdb_details = {}
    failed_job_pdb_files = {}
    for failed_job in actually_failed:
        PDBFileID = ddGdb.execute_select("SELECT UserDataSetExperiment.PDBFileID AS PDBFileID FROM Prediction INNER JOIN UserDataSetExperiment ON UserDataSetExperimentID=UserDataSetExperiment.ID WHERE Prediction.ID=%s", parameters=(failed_job,))[0]['PDBFileID']
        pdb_details[PDBFileID] = True
        failed_job_pdb_files[failed_job] = PDBFileID

    for pdb_id in pdb_details.keys():
        pdb_details[pdb_id] = ddGdb.execute_select("SELECT Resolution, Techniques FROM PDBFile WHERE ID=%s", parameters=(pdb_id,))[0]
        pdb_details[pdb_id]['Chains'] = [r['Chain'] for r in ddGdb.execute_select("SELECT Chain FROM PDBChain WHERE PDBFileID=%s ORDER BY Chain", parameters=(pdb_id,))]
        pdb_details[pdb_id]['TotalJobs'] = ddGdb.execute_select("SELECT Count(ID) AS TotalJobs FROM UserDataSetExperiment WHERE PDBFileID=%s", parameters=(pdb_id,))[0]['TotalJobs']

    hosts = {}
    failed_by_hessin = {}
    failed_by_residue_mismatch = {}
    failed_for_another_reason = {}
    missing_output = {}
    mutfiles = {}
    count = 1
    for failed_job in actually_failed:
        mutfile = None
        colortext.message('Failed job %d of %d' % (count, failed_job))
        zipfile_path = os.path.join(results_root, '%d.zip' % failed_job)
        found_output = False
        pdb_id = failed_job_pdb_files[failed_job]
        if os.path.exists(zipfile_path):
            z = zipfile.ZipFile(zipfile_path, 'r')
            file_list = z.namelist()
            for f in file_list:
                if f.find('.cmd.e') != -1:
                    found_output = True
                    stderr_contents = z.open(f, 'r').read()
                    stdout_contents = z.open(f.replace('.cmd.e', '.cmd.o'), 'r').read()

                    hosts[failed_job] = stdout_contents[stdout_contents.find('<host>') + 6:stdout_contents.find('</host>')].strip()

                    if stderr_contents.find('HESSIN for (i,i):') != -1:
                        assert(stderr_contents.find('G for (i):') != -1)
                        print(stderr_contents[:120])
                        failed_by_hessin[pdb_id] = failed_by_hessin.get(pdb_id, [])
                        failed_by_hessin[pdb_id].append(failed_job)
                        colortext.error('HESSIN: %s' % pdb_id)
                    elif stderr_contents.find('ERROR: pose.residue(resnum).name1() == wt') != -1:
                        failed_by_residue_mismatch[pdb_id] = failed_by_residue_mismatch.get(pdb_id, [])
                        failed_by_residue_mismatch[pdb_id].append(failed_job)
                        colortext.error('MISMATCH')
                    else:
                        failed_for_another_reason[pdb_id] = failed_for_another_reason.get(pdb_id, [])
                        failed_for_another_reason[pdb_id].append(failed_job)
                        colortext.error('UNKNOWN')
                        see_errors = ask_yes_no("Do you want to see the stderr files for prediction %d?" % failed_job, default_value=False)
                        if see_errors:
                            colortext.warning(f)
                            print(stderr_contents[:300])
                            print("")
                if f.find('.mutfile') != -1:
                    assert(mutfile == None)
                    mutfile = z.open(f, 'r').read()
                    mutfiles[failed_job] = mutfile

        if not found_output:
            missing_output[pdb_id] = missing_output.get(pdb_id, [])
            missing_output[pdb_id].append(failed_job)
        count += 1


    colortext.message("*** Report ***")
    if missing_output:
        colortext.warning("Missing output: %d jobs" % sum([len(v) for k, v in missing_output.iteritems()]))
        for k, v in sorted(missing_output.iteritems()):
            print('%s: %d jobs - %s' % (k, len(v), ', '.join(map(str, sorted(v)))))
    if failed_by_hessin:
        colortext.warning("Failed Hessin: %d jobs" % sum([len(v) for k, v in failed_by_hessin.iteritems()]))
        for k, v in sorted(failed_by_hessin.iteritems()):
            if pdb_details[k]['Resolution'] != None:
                print('%s, %0.2fA, %s.' % (k, pdb_details[k]['Resolution'], pdb_details[k]['Techniques'].title()))
            else:
                print('%s, %s.' % (k, pdb_details[k]['Techniques'].title()))
            print('%d/%d jobs failed - %s\n' % (len(v), pdb_details[k]['TotalJobs'], ', '.join(map(str, sorted(v)))))
            for failed_id in sorted(v):
                mutations = ddGdb.execute_select("SELECT Prediction.ExperimentID, ExperimentMutation.* FROM Prediction INNER JOIN ExperimentMutation ON Prediction.ExperimentID=ExperimentMutation.ExperimentID WHERE Prediction.ID=%s", parameters=(failed_id,))
                mut_str = ', '.join([('%s %s%s%s' % (m['Chain'], m['WildTypeAA'], m['ResidueID'], m['MutantAA'])) for m in mutations])
                colortext.printf('%d: %s, experiment #%d. Host = %s' % (failed_id, mut_str, mutations[0]['ExperimentID'], hosts[failed_id]), 'orange')
                print('')
            print('')

    if failed_by_residue_mismatch:
        colortext.warning("Failed due to residue mismatch: %d jobs" % sum([len(v) for k, v in failed_by_residue_mismatch.iteritems()]))
        for k, v in sorted(failed_by_residue_mismatch.iteritems()):
            if pdb_details[k]['Resolution'] != None:
                colortext.printf('%s, %0.2fA, %s.' % (k, pdb_details[k]['Resolution'], pdb_details[k]['Techniques'].title()), 'cyan')
            else:
                colortext.printf('%s, %s.' % (k, pdb_details[k]['Techniques'].title()), 'cyan')
            print('%d/%d jobs failed - %s\n' % (len(v), pdb_details[k]['TotalJobs'], ', '.join(map(str, sorted(v)))))
            for failed_id in sorted(v):
                mutations = ddGdb.execute_select("SELECT ExperimentMutation.* FROM Prediction INNER JOIN ExperimentMutation ON Prediction.ExperimentID=ExperimentMutation.ExperimentID WHERE Prediction.ID=%s", parameters=(failed_id,))
                mut_str = ', '.join([('%s %s%s%s' % (m['Chain'], m['WildTypeAA'], m['ResidueID'], m['MutantAA'])) for m in mutations])
                colortext.printf('%d: %s' % (failed_id, mut_str), 'orange')
                print(mutfiles[failed_id])
                print('')
            print('')

    if failed_for_another_reason:
        colortext.warning("Failed for an unknown reason: %d jobs" % sum([len(v) for k, v in failed_for_another_reason.iteritems()]))
        for k, v in sorted(failed_for_another_reason.iteritems()):
            if pdb_details[k]['Resolution'] != None:
                print('%s, %0.2fA, %s.' % (k, pdb_details[k]['Resolution'], pdb_details[k]['Techniques'].title()))
            else:
                print('%s, %s.' % (k, pdb_details[k]['Techniques'].title()))
            print('%d/%d jobs failed - %s\n' % (len(v), pdb_details[k]['TotalJobs'], ', '.join(map(str, sorted(v)))))

    print('%d jobs were marked as failed.' % len(reported_failures))
    colortext.warning('%d jobs were marked as failed but had not failed.' % len(did_not_fail))
    colortext.error('%d jobs were marked as failed and did fail.\n' % len(actually_failed))
コード例 #25
0
ファイル: analysis.py プロジェクト: Kortemme-Lab/kddg
    def plot(self,
             table_name,
             RFunction,
             output_filename=None,
             filetype="pdf"):
        '''Results is expect to be a list of dicts each of which has the keys ExperimentID and ddG.'''
        if (not self.analysis_tables) or (not table_name):
            raise Exception("There are no analysis tables to plot.")
        if not table_name in self.analysis_tables.keys():
            raise Exception("The analysis table '%s' does not exist." %
                            table_name)

        R_return_values = {}
        gplot = None
        analysis_table = self.analysis_tables[table_name]
        if self.quiet_level >= 3:
            print(table_name)
            print(RFunction)
        if len(analysis_table.points) == 1:
            raise Exception(
                "The analysis table %s set only has one data point. At least two points are required."
                % table_name)
        else:
            inputfname = self.CreateCSVFile(table_name)
            if self.quiet_level >= 3:
                print(inputfname)
            try:
                if self.quiet_level >= 2:
                    colortext.printf("Running %s." % RFunction)
                    if output_filename:
                        colortext.printf(
                            "Saving graph as %s with filename %s." %
                            (filetype, output_filename))

                output_fname = output_filename
                if not output_fname:
                    output_fname = rosettahelper.writeTempFile(".", "")

                R_output = RFunction(inputfname, output_fname, filetype)
                R_return_values = RUtilities.parse_R_output(R_output)

                colortext.message(table_name)
                print("  %s" % str(RFunction))
                for k, v in sorted(R_return_values.iteritems()):
                    print("  %s: %s" % (str(k), str(v)))

                if not output_filename:
                    contents = rosettahelper.readBinaryFile(output_fname)
                    delete_file(output_fname)
                    description = None
                    for file_suffix, details in RFunctions.iteritems():
                        if details[1] == RFunction:
                            description = details[0]
                    assert (description)
                    gplot = AnalysisObject(table_name, description, filetype,
                                           contents)
                else:
                    gplot = output_filename

            except Exception, e:
                import traceback
                colortext.error(traceback.format_exc())
                delete_file(inputfname)
                raise Exception(e)
            delete_file(inputfname)
コード例 #26
0
            assert (len(loop_set) == 1)
            start_res = '{chainID}{resSeq:>4d}{iCode}'.format(
                **loop_set[0]['start'])
            end_res = '{chainID}{resSeq:>4d}{iCode}'.format(
                **loop_set[0]['stop'])

            success, result = get_pdb_contents_to_pose_residue_map(
                new_pdb_content,
                rosetta_scripts_binary,
                None,
                pdb_id=None,
                extra_flags=
                '-ignore_zero_occupancy false -ignore_unrecognized_res')

            if not success:
                colortext.error('Failed on {0}.'.format(pdb_prefix))
                raise colortext.Exception('\n'.join(result))
            else:
                if not start_res in result:
                    raise colortext.Exception(
                        'Could not find the starting residue in the PDB -> Rosetta residue mapping.'
                    )
                elif not end_res in result:
                    raise colortext.Exception(
                        'Could not find the starting residue in the PDB -> Rosetta residue mapping.'
                    )
                start_rosetta_res = result[start_res]['pose_residue_id']
                end_rosetta_res = result[end_res]['pose_residue_id']
                if not end_rosetta_res > start_rosetta_res:
                    raise colortext.Exception(
                        'The end residue have a higher index number than the starting residue.'
コード例 #27
0
ファイル: sifts.py プロジェクト: Kortemme-Lab/klab
    def end_document(self):
        assert(self.counters['entry'] == 1)

        residue_count = 0
        residues_matched = {}
        residues_encountered = set()
        atom_to_uniparc_residue_map = {}
        atom_to_seqres_residue_map = {}
        seqres_to_uniparc_residue_map = {}

        UniProtACs = set()
        for r in self.residues:
            if r.UniProtAC:
                UniProtACs.add(r.UniProtAC)

        ACC_to_UPARC_mapping = uniprot_map('ACC', 'UPARC', list(UniProtACs), cache_dir = self.cache_dir)
        assert(sorted(ACC_to_UPARC_mapping.keys()) == sorted(list(UniProtACs)))
        for k, v in ACC_to_UPARC_mapping.iteritems():
            assert(len(v) == 1)
            ACC_to_UPARC_mapping[k] = v[0]

        map_chains = set()
        for r in self.residues:
            if not(r.PDBResidueID.isalnum() and int(r.PDBResidueID.isalnum()) < 0):
                # These are not valid PDB residue IDs - the SIFTS XML convention sometimes assigns negative residue IDs to unobserved residues before the first ATOM record
                # (only if the first residue ID is 1?)
                pass

            # Store the PDB->UniProt mapping
            if r.has_pdb_to_uniprot_mapping():
                UniProtAC = r.UniProtAC
                UniParcID = ACC_to_UPARC_mapping[UniProtAC]
                self.uniparc_ids.add(UniParcID)

            full_pdb_residue_ID = r.get_pdb_residue_id()
            PDBChainID = r.PDBChainID
            map_chains.add(PDBChainID)
            residues_matched[PDBChainID] = residues_matched.get(PDBChainID, 0)

            if not r.WasNotObserved:
                # Do not add ATOM mappings when the ATOM data does not exist
                if r.has_pdb_to_uniprot_mapping():
                    atom_to_uniparc_residue_map[PDBChainID] = atom_to_uniparc_residue_map.get(PDBChainID, {})
                    atom_to_uniparc_residue_map[PDBChainID][full_pdb_residue_ID] = (UniParcID, r.UniProtResidueIndex)

                atom_to_seqres_residue_map[PDBChainID] = atom_to_seqres_residue_map.get(PDBChainID, {})
                atom_to_seqres_residue_map[PDBChainID][full_pdb_residue_ID] = r.PDBeResidueID

            if r.has_pdb_to_uniprot_mapping():
                seqres_to_uniparc_residue_map[PDBChainID] = seqres_to_uniparc_residue_map.get(PDBChainID, {})
                seqres_to_uniparc_residue_map[PDBChainID][r.PDBeResidueID] = (UniParcID, r.UniProtResidueIndex)

            # Make sure we only have at most one match per PDB residue
            assert(full_pdb_residue_ID not in residues_encountered)
            residues_encountered.add(full_pdb_residue_ID)

            # Count the number of exact sequence matches
            PDBResidue3AA = r.PDBResidue3AA
            pdb_residue_type = residue_type_3to1_map.get(PDBResidue3AA) or self.modified_residues.get(PDBResidue3AA) or protonated_residue_type_3to1_map.get(PDBResidue3AA) or non_canonical_amino_acids.get(PDBResidue3AA)
            if r.has_pdb_to_uniprot_mapping():
                if pdb_residue_type == r.UniProtResidue1AA:

                    residues_matched[PDBChainID] += 1
            residue_count += 1

        # Create the SequenceMaps
        for c in map_chains:
            if residues_matched[c] > 0:
                # 1IR3 has chains A,
                # Chain A has mappings from atom and seqres (PDBe) residues to UniParc as usual
                # Chain B (18 residues long) has mappings from atom to seqres residues but not to UniParc residues
                self.atom_to_uniparc_sequence_maps[c] = PDBUniParcSequenceMap.from_dict(atom_to_uniparc_residue_map[c])
                self.seqres_to_uniparc_sequence_maps[c] = PDBUniParcSequenceMap.from_dict(seqres_to_uniparc_residue_map[c])
            self.atom_to_seqres_sequence_maps[c] = SequenceMap.from_dict(atom_to_seqres_residue_map[c])

        # Check the match percentage
        total_residues_matched = sum([residues_matched[c] for c in residues_matched.keys()])
        if total_residues_matched == 0:
            if self.pdb_id and self.pdb_id in NoSIFTSPDBUniParcMappingCases:
                if self.require_uniprot_residue_mapping:
                    raise NoSIFTSPDBUniParcMapping('The PDB file %s has a bad or missing SIFTS mapping at the time of writing.' % self.pdb_id)
                else:
                    colortext.error('Warning: The PDB file %s has a a bad or missing SIFTS mapping at the time of writing so there is no PDB -> UniProt residue mapping.' % self.pdb_id)
            else:
                if self.require_uniprot_residue_mapping:
                    raise Exception('No residue information matching PDB residues to UniProt residues was found.')
                else:
                    colortext.error('Warning: No residue information matching PDB residues to UniProt residues was found.')
        else:
            percentage_matched = float(total_residues_matched)*100.0/float(residue_count)
            if percentage_matched < self.acceptable_sequence_percentage_match:
                if self.pdb_id and self.pdb_id in BadSIFTSMappingCases:
                    raise BadSIFTSMapping('The PDB file %s has a known bad SIFTS mapping at the time of writing.' % self.pdb_id)
                else:
                    raise Exception('Expected %.2f%% sequence match on matched residues but the SIFTS results only gave us %.2f%%.' % (self.acceptable_sequence_percentage_match, percentage_matched))

        # Merge the ranges for the region mappings i.e. so [1-3],[3-86] becomes [1-86]
        region_mapping = self.region_mapping
        for chain_id, chain_details in region_mapping.iteritems():
            for dbSource, source_details in chain_details.iteritems():
                for dbAccessionId, range_list in source_details.iteritems():
                    source_details[dbAccessionId] = merge_range_pairs(range_list)

        # Check to see if the expected numbering schemes hold
        for k, v in expected_residue_numbering_schemes.iteritems():
            if self.region_map_coordinate_systems.get(k):
                assert(self.region_map_coordinate_systems[k] == set([v]))

        pfam_scop_mapping = {}
        scop_pfam_mapping = {}
        for chain_id, chain_details in region_mapping.iteritems():
            if chain_details.get('Pfam') and chain_details.get('SCOP'):
                for pfamAccessionId, pfam_range_lists in chain_details['Pfam'].iteritems():
                    pfam_residues = parse_range(','.join(['%d-%d' % (r[0], r[1]) for r in pfam_range_lists]))
                    for scopAccessionId, scop_range_lists in chain_details['SCOP'].iteritems():
                        scop_residues = parse_range(','.join(['%d-%d' % (r[0], r[1]) for r in scop_range_lists]))
                        num_same_residues = len(set(pfam_residues).intersection(set(scop_residues)))
                        if num_same_residues > 10:
                            Pfam_match_quality = float(num_same_residues) / float(len(pfam_residues))
                            SCOP_match_quality = float(num_same_residues) / float(len(scop_residues))
                            if (Pfam_match_quality >= self.domain_overlap_cutoff) or (SCOP_match_quality >= self.domain_overlap_cutoff):
                                pfam_scop_mapping[pfamAccessionId] = pfam_scop_mapping.get(pfamAccessionId, DomainMatch(pfamAccessionId, 'Pfam'))
                                pfam_scop_mapping[pfamAccessionId].add(scopAccessionId, 'SCOP', SCOP_match_quality)
                                scop_pfam_mapping[scopAccessionId] = scop_pfam_mapping.get(scopAccessionId, DomainMatch(scopAccessionId, 'SCOP'))
                                scop_pfam_mapping[scopAccessionId].add(pfamAccessionId, 'Pfam', Pfam_match_quality)

        self.pfam_scop_mapping = pfam_scop_mapping
        self.scop_pfam_mapping = scop_pfam_mapping

        self._validate()
コード例 #28
0
ファイル: ris.py プロジェクト: Kortemme-Lab/klab
    def format(self, abbreviate_journal = True, abbreviate_author_names = True, show_year = True, html = True, allow_errors = False):
        raise Exception('This function is deprecated in favor of PublicationInterface.to_string. Some functionality needs to be added to that function e.g. ReferralURL_link.')
        if self.errors and not allow_errors:
            if not self.quiet:
                colortext.error("There were parsing errors: %s" % self.errors)
            return None

        # Abbreviate the journal name
        journal = self.journal
        if abbreviate_journal and self.publication_type != "CHAP":
            journal = publication_abbreviations.get(self.journal, self.journal)

        # Abbreviate the authors' names
        authors_str = None
        if abbreviate_author_names:
            authors_str = ", ".join(self.get_author_names_in_short_format())
        else:
            raise Exception("This code needs to be written with whatever is needed.")

        # Create string for the publication year
        year_str = ""
        if show_year:
            year_str = ", %s" % self.year

        ReferralURL_link = ""
        if self.ReferralURL:
            ReferralURL_link = " <a class='publist' href='%s'>[free download]</a>" % self.ReferralURL

        titlesuffix = '.'
        if self.publication_type == "CHAP":
            titlesuffix = " in"

        # The entry format is fixed. This could be passed as a variable for different styles.
        entry = ""
        if self.volume:
            entry = self.volume
            if self.subtitle:
                entry += " (%s)" % self.subtitle
            if self.issue:
                entry += "(%s)" % self.issue

            pagerange = PublicationInterface.get_page_range_in_abbreviated_format(self.startpage, self.endpage)
            if pagerange:
                entry += ":%s" % pagerange
        else:
            if self.startpage and self.endpage and self.startpage.isdigit() and self.endpage.isdigit():
                if self.subtitle:
                    entry = " (%s)" % self.subtitle
                pagerange = PublicationInterface.get_page_range_in_abbreviated_format(self.startpage, self.endpage)
                if pagerange:
                    entry += ":%s" % pagerange

        s = ['%s. ' % authors_str]
        if html:
            if self.doi:
                s.append('%s%s %s %s%s.' % (self.title, titlesuffix, self.journal, entry, year_str))
                s.append('doi: <a class="publication_doi" href="http://dx.doi.org/%s">%s</a>''' % (self.doi, self.doi))
                s.append(ReferralURL_link)
            elif self.url:
                s.append('<a class="publication_link" href="%s">%s</a>%s' % (self.url, self.title, titlesuffix))
                s.append('%s %s%s.' % (self.journal, entry, year_str))
                s.append(ReferralURL_link)
            else:
                s.append('%s%s %s %s%s.' % (self.title, titlesuffix, self.journal, entry, year_str))
                s.append(ReferralURL_link)
        else:
            s.append('%s%s %s %s%s.' % (self.title, titlesuffix, self.journal, entry, year_str))
            if self.doi:
                s.append('doi: %s' % self.doi)
            elif self.url:
                s.append('url: %s' % self.url)
        return " ".join(s)
コード例 #29
0
            #print('comment : %s' % comment)
            if remaining_description:
                #colortext.error('remaining_description : %s' % remaining_description)
                pass
        #print('\n')


if __name__ == '__main__':
    script_name = sys.argv[0]
    args = sys.argv[1:]
    if 4 > len(args) or len(args) > 6:
        print('Usage             : %s [user] [host] [db] [passwd]' % script_name)
        print('Optional arguments: %s [user] [host] [db] [passwd] [port] [socket]' % script_name)
    else:
        user = args[0]
        host = args[1]
        db = args[2]
        passwd = args[3]
        port = 3306
        socket = '/var/lib/mysql/mysql.sock'
        if len(args) == 6:
            socket = args[5]
        if len(args) >= 5:
            try:
                port = int(args[4])
            except:
                colortext.error('Error: Port must be a numeric string.')
                sys.exit(1)
        sc = MySQLSchemaConverter(user, host, db, passwd, port, socket)
        sc.get_sqlalchemy_schema()
コード例 #30
0
def determine_structure_scores(DDG_api, skip_if_we_have_pairs = 50):
    pp = pprint.PrettyPrinter(indent=4)

    ddGdb = DDG_api.ddGDB
    ddGdb_utf = ddgdbapi.ddGDatabase(use_utf = True)
    # Get the list of completed prediction set
    completed_prediction_sets = get_completed_prediction_sets(DDG_api)
    print(completed_prediction_sets)

    # Create the mapping from the old score types to the ScoreMethod record IDs
    ScoreMethodMap = {}
    results = ddGdb_utf.execute('SELECT * FROM ScoreMethod')
    for r in results:
        if r['MethodName'] == 'Global' and r['MethodType'] == 'Protocol 16':
            ScoreMethodMap[("kellogg", "total")] = r['ID']
        if r['Authors'] == 'Noah Ollikainen':
            if r['MethodName'] == 'Local' and r['MethodType'] == 'Position' and r['Parameters'] == u'8Å radius':
                ScoreMethodMap[("noah_8,0A", "positional")] = r['ID']
            if r['MethodName'] == 'Local' and r['MethodType'] == 'Position (2-body)' and r['Parameters'] == u'8Å radius':
                ScoreMethodMap[("noah_8,0A", "positional_twoscore")] = r['ID']
            if r['MethodName'] == 'Global' and r['MethodType'] == 'By residue' and r['Parameters'] == u'8Å radius':
                ScoreMethodMap[("noah_8,0A", "total")] = r['ID']

    # For each completed prediction set, determine the structure scores
    for prediction_set in completed_prediction_sets:
        #if prediction_set not in ['Ubiquitin scan: UQ_con_yeast p16']:
        #    continue

        predictions = ddGdb.execute('SELECT ID, ddG, Scores, status, ScoreVersion FROM Prediction WHERE PredictionSet=%s ORDER BY ID', parameters=(prediction_set,))
        num_predictions = len(predictions)

        # Pass #1: Iterate over all Predictions and make sure that they gave completed and contain all the scores we expect
        colortext.message('Prediction set: %s' % prediction_set)
        colortext.warning('Checking that all data exists...')
        for prediction in predictions:
            #assert(prediction['status'] == 'done')
            PredictionID = prediction['ID']
            if PredictionID != 72856:
                continue
            global_scores = pickle.loads(prediction['ddG'])
            assert(global_scores)
            assert(prediction['ScoreVersion'] == 0.23)
            if not prediction['Scores']:
                raise Exception("This prediction needs to be scored with Noah's method.")

            gs2 = json.loads(prediction['Scores'])
            if True not in set([k.find('noah') != -1 for k in gs2['data'].keys()]):
                raise Exception("This prediction needs to be scored with Noah's method.")
            assert (gs2['data']['kellogg'] == global_scores['data']['kellogg'])

        # Pass #2: Iterate over all completed Predictions with null StructureScores.
        # For each Prediction, determine and store the structure scores
        count = 0
        for prediction in predictions:

            count += 1
            PredictionID = prediction['ID']
            colortext.message('%s: %d of %d (Prediction #%d)' % (prediction_set, count, num_predictions, PredictionID))

            #if PredictionID != 72856:
            #if PredictionID < 73045: continue
            if prediction['status'] == 'failed':
                colortext.error('Skipping failed prediction %d.' % PredictionID)
                continue
            if prediction['status'] == 'queued':
                colortext.warning('Skipping queued prediction %d.' % PredictionID)
                continue
            if prediction['status'] == 'postponed':
                colortext.printf('Skipping postponed prediction %d.' % PredictionID, 'cyan')
                continue

            # Store the ensemble scores
            try:
                global_scores = json.loads(prediction['Scores'])['data']
            except:
                raise colortext.Exception("Failed reading the Scores field's JSON object. The Prediction Status is %(status)s. The Scores field is: '%(Scores)s'." % prediction)
            for score_type, inner_data in global_scores.iteritems():
                for inner_score_type, data in inner_data.iteritems():
                    components = {}
                    if score_type == 'kellogg' and inner_score_type == 'total':
                        components = data['components']
                        ddG = data['ddG']

                    elif score_type == 'noah_8,0A' and inner_score_type == 'positional':
                        ddG = data['ddG']
                    elif score_type == 'noah_8,0A' and inner_score_type == 'positional_twoscore':
                        ddG = data['ddG']
                    elif score_type == 'noah_8,0A' and inner_score_type == 'total':
                        ddG = data['ddG']
                    else:
                        continue
                        raise Exception('Unhandled score types: "%s", "%s".' % (score_type, inner_score_type))

                    ScoreMethodID = ScoreMethodMap[(score_type, inner_score_type)]
                    new_record = dict(
                        PredictionID = PredictionID,
                        ScoreMethodID = ScoreMethodID,
                        ScoreType = 'DDG',
                        StructureID = -1, # This score is for the Prediction rather than a structure
                        DDG = ddG,
                    )
                    assert(not(set(components.keys()).intersection(set(new_record.keys()))))
                    new_record.update(components)
                    ddGdb.insertDictIfNew('PredictionStructureScore', new_record, ['PredictionID', 'ScoreMethodID', 'ScoreType', 'StructureID'])

            if skip_if_we_have_pairs != None:
                # Skip this case if we have a certain number of existing records (much quicker since we do not have to extract the binary)
                num_wt = ddGdb.execute_select("SELECT COUNT(ID) AS NumRecords FROM PredictionStructureScore WHERE PredictionID=%s AND ScoreType='WildType'", parameters=(PredictionID,))[0]['NumRecords']
                num_mut = ddGdb.execute_select("SELECT COUNT(ID) AS NumRecords FROM PredictionStructureScore WHERE PredictionID=%s AND ScoreType='Mutant'", parameters=(PredictionID,))[0]['NumRecords']
                print(num_wt, num_mut)
                if num_wt == num_mut and num_mut == skip_if_we_have_pairs:
                    continue

            # Store the ddg_monomer scores for each structure
            grouped_scores = DDG_api.get_ddg_monomer_scores_per_structure(PredictionID)
            for structure_id, wt_scores in sorted(grouped_scores['WildType'].iteritems()):
                new_record = dict(
                    PredictionID = PredictionID,
                    ScoreMethodID = ScoreMethodMap[("kellogg", "total")],
                    ScoreType = 'WildType',
                    StructureID = structure_id,
                    DDG = None,
                )
                new_record.update(wt_scores)
                ddGdb.insertDictIfNew('PredictionStructureScore', new_record, ['PredictionID', 'ScoreMethodID', 'ScoreType', 'StructureID'])
            for structure_id, wt_scores in sorted(grouped_scores['Mutant'].iteritems()):
                new_record = dict(
                    PredictionID = PredictionID,
                    ScoreMethodID = ScoreMethodMap[("kellogg", "total")],
                    ScoreType = 'Mutant',
                    StructureID = structure_id,
                    DDG = None,
                )
                new_record.update(wt_scores)
                ddGdb.insertDictIfNew('PredictionStructureScore', new_record, ['PredictionID', 'ScoreMethodID', 'ScoreType', 'StructureID'])

            # Test to make sure that we can pick a best pair of structures (for generating a PyMOL session)
            assert(DDG_api.determine_best_pair(PredictionID) != None)
コード例 #31
0
def import_mutageneses():
    setup_mutations_dataframe()
    ppi_api = get_ppi_api()
    
    complex_definitions = json.loads(read_file('tinas_complexes.json'))

    # Determine the mapping from PDB ID to complex ID
    pdb_id_to_database_id = {}
    for index, r in mutations_dataframe.iterrows():
        pdb_id = r['pdb']
        db_id = complex_definitions[pdb_id]['Structure']['db_id']
        if pdb_id_to_database_id.get(pdb_id):
            assert(pdb_id_to_database_id[pdb_id] == db_id)
        pdb_id_to_database_id[pdb_id] = db_id

    pdb_id_to_complex_id = {}
    for pdb_id, db_id in sorted(pdb_id_to_database_id.iteritems()):
        results = ppi_api.DDG_db.execute_select('SELECT DISTINCT PPComplexID, SetNumber FROM PPIPDBPartnerChain WHERE PDBFileID=%s', parameters=(db_id,))
        assert(len(results) == 1)
        pdb_id_to_complex_id[pdb_id] = dict(PPComplexID = results[0]['PPComplexID'], SetNumber = results[0]['SetNumber'])

    pdb_residues = {}
    for db_id in pdb_id_to_database_id.values():
        pdb_residues[db_id] = {}
        for r in ppi_api.DDG_db.execute_select('SELECT Chain, ResidueID, ResidueAA FROM PDBResidue WHERE PDBFileID=%s', parameters=(db_id,)):
            pdb_residues[db_id][r['Chain']] = pdb_residues[db_id].get(r['Chain'], {})
            pdb_residues[db_id][r['Chain']][r['ResidueID']] = r['ResidueAA']

    assert(len(pdb_id_to_complex_id) == 15)

    user_data_set_text_id = 'RAN-GSP'
    ppi_api.add_user_dataset('oconchus', user_data_set_text_id, "Tina's dataset for RAN/GSP1 complexes.")

    user_dataset_cases = []
    for index, r in mutations_dataframe.iterrows():
        pdb_id = r['pdb']
        database_pdb_id = pdb_id_to_database_id[pdb_id]
        dataset_id = index
        pdb_id = r['pdb']
        complex_definition = complex_definitions[pdb_id]

        # all the mutations are on chain1 (which is always chain A)
        chain_id = 'A'
        residue_id = str(r['pdb_res_num'])
        wildtype_aa = pdb_residues[database_pdb_id][chain_id][PDB.ResidueID2String(residue_id)]
        mutant_aa = r['mutation']
        assert(wildtype_aa != mutant_aa)

        case_details = dict(

            # These records are used to create a PPMutagenesis record and the associated mutagenesis details

            Mutagenesis = dict(
                RecognizableString = 'TinaGSP_{0}'.format(dataset_id),
                PPComplexID = pdb_id_to_complex_id[pdb_id]['PPComplexID'],
            ),

            Mutations = [
                # There is one dict per mutation
                dict(
                    MutagenesisMutation = dict(
                        # PPMutagenesisID will be filled in when the PPMutagenesis record is created.
                        RecordKey = '{0} {1}{2}{3}'.format(chain_id, wildtype_aa, residue_id.strip(), mutant_aa),
                        ProteinID = None, # todo
                        ResidueIndex = None, # todo
                        WildTypeAA = wildtype_aa,
                        MutantAA = mutant_aa,
                    ),
                    MutagenesisPDBMutation = dict(
                        # PPMutagenesisID and PPMutagenesisMutationID will be filled in when the PPMutagenesisMutation record is created.
                        # PPComplexID is taken from the PPMutagenesis section. WildTypeAA and MutantAA are taken from the PPMutagenesisMutation section.
                        SetNumber = pdb_id_to_complex_id[pdb_id]['SetNumber'],
                        PDBFileID = database_pdb_id,
                        Chain = chain_id,
                        ResidueID = residue_id,
                    ),
                ),
            ],

            # This field is used to create the UserPPDataSetExperiment record. All other fields can be derived from the above.
            # Note: We use the human-readable label here. The database ID is retrieved using e.g. ppi_api.get_defined_user_datasets()[<UserDataSetTextID>]['ID']
            UserDataSetTextID = user_data_set_text_id,
        )
        user_dataset_cases.append(case_details)

    colortext.porange('Creating the UserDataSet cases')
    user_dataset_name_to_id_map = {}
    tsession = ppi_api.get_session(new_session = True)
    try:
        for user_dataset_case in user_dataset_cases:
            ppi_api.add_user_dataset_case(tsession, user_dataset_case, user_dataset_name_to_id_map = user_dataset_name_to_id_map)

        print('\n\nSuccess')
        tsession.commit()
        #tsession.rollback()
        tsession.close()
    except Exception, e:
        colortext.error('\n\nFailure: An error occurred.')
        colortext.warning(str(e))
        colortext.warning(traceback.format_exc())
        tsession.rollback()
        tsession.close()
コード例 #32
0
def main(FixedIDs = [], radii = [6.0, 7.0, 8.0, 9.0]):
    max_processors = get_number_of_processors()

    rescore_process_file = "/tmp/klab_rescore.txt"
    parser = OptionParser()
    parser.add_option("-n", "--numprocesses", default=1, type='int', dest="num_processes", help="The number of processes used for the rescoring. The cases are split according to this number.", metavar="NUM_PROCESSES")
    parser.add_option("-p", "--process", default=1, type='int', dest="process", help="The ID of this process. This should be an integer between 1 and the number of processes used for the rescoring.", metavar="PROCESS_ID")
    parser.add_option("-d", "--delete",  action="store_true", dest="delete", help="Delete the process tracking file %s." % rescore_process_file)
    parser.add_option("-s", "--set",  type='string', dest="prediction_set", help="The prediction set to rescore.")
    (options, args) = parser.parse_args()

    if options.delete and os.path.exists(rescore_process_file):
        print("Removing %s." % rescore_process_file)
        os.remove(rescore_process_file)

    num_processes = options.num_processes
    prediction_set = options.prediction_set
    process_id = options.process

    for i in FixedIDs:
        assert(type(i) == type(1))

    # SELECT * FROM `Prediction` WHERE `PredictionSet`= 'RosCon2013_P16_score12prime'  AND Status='done' LIMIT 1
    # Check prediction set
    if not prediction_set:
        raise colortext.Exception("A prediction set must be specified.")
    else:
        if FixedIDs:
            results = ddGdb.execute("SELECT DISTINCT PredictionSet FROM Prediction WHERE ID IN (%s)" % ",".join(map(str, FixedIDs)))
            if len(results) != 1:
                raise colortext.Exception("Error: The fixed IDs cover %d different prediction sets." % len(results))
        else:
            results = ddGdb.execute("SELECT ID FROM PredictionSet WHERE ID=%s", parameters=(prediction_set,))
        if not results:
            raise colortext.Exception("The prediction set '%s' does not exist in the database." % prediction_set)

    if num_processes < 1:
        raise colortext.Exception("At least 1 processor must be used.")
    if num_processes > max_processors:
        raise colortext.Exception("Only %d processors/cores were detected. Cannot run with %d processes." % (max_processors, num_processes))
    if num_processes > (max_processors * 0.75):
        colortext.warning("Warning: Using %d processors/cores out of %d which is %0.2f%% of the total available." % (num_processes, max_processors, (100.0*float(num_processes)/float(max_processors))))
    if not(1 <= process_id <= min(max_processors, num_processes)):
        raise colortext.Exception("The process ID %d must be between 1 and the number of processes, %d." % (process_id, num_processes))

    if os.path.exists(rescore_process_file):
        lines = readFileLines(rescore_process_file)
        idx = lines[0].find("numprocesses")
        if idx == -1:
            raise Exception("Badly formatted %s." % rescore_process_file)
        existing_num_processes = int(lines[0][idx+len("numprocesses"):])
        if existing_num_processes != num_processes:
            raise colortext.Exception("You specified the number of processes to be %d but %s already specifies it as %d." % (num_processes, rescore_process_file, existing_num_processes))
        for line in [line for line in lines[1:] if line.strip()]:
            idx = line.find("process")
            if idx == -1:
                raise colortext.Exception("Badly formatted %s. Line is '%s'." % (rescore_process_file, line))
            existing_process = int(line[idx+len('process'):])
            if process_id == existing_process:
                raise colortext.Exception("Process %d is already logged as running. Check if this is so and edit %s." % (process_id, rescore_process_file))
        F = open(rescore_process_file, 'a')
        F.write("process %d\n" % process_id)
        F.close()
    else:
        F = open(rescore_process_file, 'w')
        F.write("numprocesses %d\n" % num_processes)
        F.write("process %d\n" % process_id)
        F.close()

    output_dir = os.path.join('rescoring', str(process_id))
    if not(os.path.exists(output_dir)):
        os.makedirs(output_dir)
    abs_output_dir = os.path.abspath(os.path.join(os.getcwd(), output_dir))
    print("Running process in %s.\n" % abs_output_dir)

    ReallyFixedIDs = False

    results = ddGdb.execute("SELECT ID, ExperimentID, Scores FROM Prediction WHERE PredictionSet=%s AND Status='done' AND ScoreVersion <> %s", parameters=(prediction_set, float(current_score_revision),))
    if not(FixedIDs) and results:
        raise WrongScoreRevisionException("Score versions found which are not %s. Need to update table structure." % current_score_revision)
    else:
        # Hacky way to run multiple processes
        if ReallyFixedIDs:
            num_to_score = len(remaining_unscored)
            num_for_this_to_score = num_to_score / num_processes
            IDs_to_score = remaining_unscored[(process_id-1) * num_for_this_to_score : (process_id) * num_for_this_to_score]
            results = ddGdb.execute("SELECT ID, ExperimentID, Scores, UserDataSetExperimentID FROM Prediction WHERE ID IN (%s)" % (",".join(map(str, IDs_to_score))))
        elif FixedIDs:
            results = ddGdb.execute("SELECT ID, ExperimentID, Scores, UserDataSetExperimentID FROM Prediction WHERE ID IN (%s) AND MOD(ID,%s)=%s" % (",".join(map(str, FixedIDs)), num_processes,process_id-1))
        else:
            results = ddGdb.execute("SELECT ID, ExperimentID, Scores, UserDataSetExperimentID FROM Prediction WHERE PredictionSet=%s AND Status='done' AND ScoreVersion=%s AND MOD(ID,%s)=%s", parameters=(prediction_set, float(current_score_revision),num_processes,process_id-1))

    count = 0
    cases_computed = 0
    total_time_in_secs = 0

    number_of_cases_left = len(results) * len(radii)

    failed_cases = []
    colortext.printf("Rescoring %d predictions over %d radii...\n" % (len(results), len(radii)), 'lightgreen')
    for r in results:
        t = Timer()
        t.add('Preamble')
        inner_count = 0

        mutations = ddGdb.execute('SELECT * FROM ExperimentMutation WHERE ExperimentID=%s', parameters=(r['ExperimentID'],))
        mutation_str = ', '.join(['%s %s%s%s' % (m['Chain'], m['WildTypeAA'], m['ResidueID'], m['MutantAA']) for m in mutations])
        extracted_data = False

        details = ddGdb.execute_select('SELECT Prediction.ID, PDBFileID, Chain FROM Prediction INNER JOIN Experiment ON Prediction.ExperimentID=Experiment.ID INNER JOIN ExperimentChain ON Prediction.ExperimentID=ExperimentChain.ExperimentID WHERE Prediction.ID=%s', parameters=(r['ID'],))
        details = ddGdb.execute_select('SELECT Prediction.ID, PDBFileID, Chain FROM Prediction INNER JOIN Experiment ON Prediction.ExperimentID=Experiment.ID INNER JOIN ExperimentChain ON Prediction.ExperimentID=ExperimentChain.ExperimentID WHERE Prediction.ID=%s', parameters=(r['ID'],))
        colortext.message("Prediction: %d, %s chain %s. Mutations: %s. Experiment ID #%d. UserDataSetExperimentID #%d." % (details[0]['ID'], details[0]['PDBFileID'], details[0]['Chain'], mutation_str, r['ExperimentID'], r['UserDataSetExperimentID']))

        experiment_pdbID = ddGdb.execute('SELECT PDBFileID FROM Experiment WHERE ID=%s', parameters=(r['ExperimentID'],))[0]['PDBFileID']
        print('Experiment PDB file ID = %s' % experiment_pdbID)
        pdbID = ddGdb.execute('SELECT UserDataSetExperiment.PDBFileID FROM Prediction INNER JOIN UserDataSetExperiment ON UserDataSetExperimentID=UserDataSetExperiment.ID WHERE Prediction.ID=%s', parameters=(r['ID'],))[0]['PDBFileID']
        print('UserDataSetExperiment PDB file ID = %s' % pdbID)

        count += 1
        if True:#len(mutations) == 1:
            timestart = time.time()

            #mutation = mutations[0]
            dbchains = sorted(set([mutation['Chain'] for mutation in mutations]))
            # todo: note: assuming monomeric structures here
            assert(len(dbchains) == 1)
            dbchain = dbchains[0]
            #mutantaa = mutation['MutantAA']

            ddG_dict = json.loads(r['Scores'])
            kellogg_ddG = ddG_dict['data']['kellogg']['total']['ddG']

            #assert(ddG_dict['version'] == current_score_revision)

            all_done = True
            for radius in radii:
                score_name = ('noah_%0.1fA' % radius).replace(".", ",")
                if not(ddG_dict['data'].get(score_name)):
                    all_done = False
                else:
                    cases_computed += 1
                    number_of_cases_left -= 1
            if all_done:
                print('Prediction %d: done.' % r["ID"])
                continue

            # Extract data
            t.add('Grab data')
            #archivefile = None
            #prediction_data_path = ddGdb.execute('SELECT Value FROM _DBCONSTANTS WHERE VariableName="PredictionDataPath"')[0]['Value']
            #job_data_path = os.path.join(prediction_data_path, '%d.zip' % r['ID'])
            #print(job_data_path)
            #assert(os.path.exists(job_data_path))
            #archivefile = readBinaryFile(job_data_path)
            archivefile = DDG_interface.getData(r['ID'])
            zipfilename = os.path.join(output_dir, "%d.zip" % r['ID'])
            F = open(zipfilename, "wb")
            F.write(archivefile)
            F.close()

            t.add('Extract data')
            zipped_content = zipfile.ZipFile(zipfilename, 'r', zipfile.ZIP_DEFLATED)
            tmpdir = None
            repacked_files = []
            mutant_files = []

            rosetta_resids = []
            try:
                tmpdir = makeTemp755Directory(output_dir)
                highestIndex = -1
                foundResfile = False
                foundMutfile = False

                presumed_mutation = None
                for fname in sorted(zipped_content.namelist()):
                    if fname.endswith(".pdb"):
                        if fname.startswith("%s/mut_" % r['ID']) or fname.startswith("%s/repacked_" % r['ID']):
                            structnum = int(fname[fname.rindex('_')+1:-4])
                            if fname.startswith("%s/mut_" % r['ID']):
                                if presumed_mutation:
                                    assert(presumed_mutation == os.path.split(fname)[1].split('_')[1])
                                else:
                                    presumed_mutation = os.path.split(fname)[1].split('_')[1]
                                newfname = 'mutant_%02d' % structnum
                            if fname.startswith("%s/repacked_" % r['ID']):
                                newfname = 'repacked_%02d' % structnum
                            highestIndex = max(highestIndex, structnum)

                            newfilepath = os.path.join(tmpdir, newfname)
                            writeFile(newfilepath, zipped_content.read(fname))

                            if fname.startswith("%s/mut_" % r['ID']):
                                mutant_files.append(newfilepath)
                            if fname.startswith("%s/repacked_" % r['ID']):
                                repacked_files.append(newfilepath)
                        #elif fname.startswith("%s/%s-%s" % (r['ID'],r['ExperimentID'],pdbID)) or fname.startswith("%s/repacked_" % r['ID']):
                        #    writeFile(os.path.join(tmpdir, '%s.pdb' % pdbID), zipped_content.read(fname))
                    if fname.startswith("%s/%s-%s.resfile" % (r['ID'],r['ExperimentID'],experiment_pdbID)):
                        raise Exception('This case needs to be updated (see the mutfile section below). We mainly use mutfiles now so I did not update this section.')
                        foundResfile = True
                        lines = zipped_content.read(fname).split("\n")
                        assert(len(lines) == 3)
                        assert(lines[0] == "NATAA")
                        assert(lines[1] == "start")
                        resfile_mutation = lines[2].split(" ")
                        assert(len(resfile_mutation) == 4)
                        rosetta_resid = resfile_mutation[0]
                        rosetta_chain = resfile_mutation[1]
                        rosetta_mutaa = resfile_mutation[3]
                        assert(mutantaa == rosetta_mutaa)
                        assert(dbchain == rosetta_chain)
                        assert(resfile_mutation[2] == 'PIKAA')
                        assert(len(rosetta_mutaa) == 1)
                    if fname.startswith("%s/%s-%s.mutfile" % (r['ID'],r['ExperimentID'],experiment_pdbID)):
                        foundMutfile = True
                        lines = zipped_content.read(fname).split("\n")
                        assert(lines[0].startswith('total '))
                        num_mutations = int(lines[0][6:])
                        assert(lines[1] == str(num_mutations))
                        # todo: note: assuming monomeric structures here
                        rosetta_chain = ddGdb.execute("SELECT Chain FROM ExperimentChain WHERE ExperimentID=%s", parameters=(r['ExperimentID'],))
                        assert(len(rosetta_chain) == 1)
                        rosetta_chain = rosetta_chain[0]['Chain']

                        resfile_mutations = lines[2:]
                        for resfile_mutation in resfile_mutations:
                            resfile_mutation = resfile_mutation.split(" ")
                            assert(len(resfile_mutation) == 3)
                            rosetta_resids.append(resfile_mutation[1])
                            rosetta_mutaa = resfile_mutation[2]
                            assert(dbchain == rosetta_chain)
                            assert(len(rosetta_mutaa) == 1)

                # Make sure the wtaa->mutantaa types match the structures
                assert(not(foundResfile))
                if not foundMutfile:
                    raise Exception('This case needs to be updated (see the mutfile section below). This was added as a hack for cases where I did not store the mutfile so I did not update this section.')
                    input_files = ddGdb.execute_select('SELECT InputFiles FROM Prediction WHERE ID=%s', parameters=(r['ID'],))
                    assert(len(input_files) == 1)
                    lines = pickle.loads(input_files[0]['InputFiles'])['MUTFILE'].split("\n")

                    #lines = regenerate_mutfile(r['ID']).split("\n")
                    assert(len(lines) == 3)
                    assert(lines[0] == "total 1")
                    assert(lines[1] == "1")
                    resfile_mutation = lines[2].split(" ")
                    assert(len(resfile_mutation) == 3)
                    rosetta_resid = resfile_mutation[1]
                    rosetta_chain = ddGdb.execute("SELECT Chain FROM ExperimentChain WHERE ExperimentID=%s", parameters=(r['ExperimentID'],))
                    assert(len(rosetta_chain) == 1)
                    rosetta_chain = rosetta_chain[0]['Chain']
                    rosetta_mutaa = resfile_mutation[2]
                    assert(dbchain == rosetta_chain)
                    assert(len(rosetta_mutaa) == 1)
                    assert("%s%s%s" % (resfile_mutation[0], resfile_mutation[1], resfile_mutation[2]) == presumed_mutation)

                fullresids = []

                for rosetta_resid in rosetta_resids:
                    fullresid = None
                    if rosetta_resid.isdigit():
                        fullresid = '%s%s%s ' % (rosetta_chain, (4-len(rosetta_resid)) * ' ', rosetta_resid)
                    else:
                        assert(False)
                        fullresid = '%s%s%s' % (rosetta_chain, (5-len(rosetta_resid)) * ' ', rosetta_resid)
                    fullresids.append(fullresid)


                resultst1 = ddGdb.execute_select("SELECT ExperimentID, UserDataSetExperimentID FROM Prediction WHERE ID=%s", parameters = (r['ID'],))
                assert(len(resultst1) == 1)
                ExperimentIDt1 = resultst1[0]['ExperimentID']
                UserDataSetExperimentIDt1 = resultst1[0]['UserDataSetExperimentID']

                if UserDataSetExperimentIDt1:
                    resultst2 = ddGdb.execute_select("SELECT PDBFileID FROM UserDataSetExperiment WHERE ID=%s", parameters = (UserDataSetExperimentIDt1,))
                else:
                    resultst2 = ddGdb.execute_select("SELECT PDBFileID FROM Experiment WHERE ID=%s", parameters = (ExperimentIDt1,))
                assert(len(resultst2) == 1)

                prediction_PDB_ID = resultst2[0]['PDBFileID']

                if False and prediction_PDB_ID not in ['1TEN', '1AYE', '1H7M'] + ['1A2P', '1BNI', '1STN']:
                    for fullresid in fullresids:
                        wtaa = None
                        for m in mutations:
                            # Hack for ub_RPN13
                            if prediction_PDB_ID == 'ub_RPN13' and m['Chain'] == fullresid[0] and m['ResidueID'] == str(int(fullresid[1:].strip()) - 109):
                                wtaa = m['WildTypeAA']
                            # Hack for ub_RPN13_yeast
                            elif prediction_PDB_ID == 'uby_RPN13' and m['Chain'] == fullresid[0] and m['ResidueID'] == str(int(fullresid[1:].strip()) - 109):
                                wtaa = m['WildTypeAA']
                            # Hack for ub_OTU
                            elif prediction_PDB_ID == 'ub_OTU' and m['Chain'] == fullresid[0] and m['ResidueID'] == str(int(fullresid[1:].strip()) - 172):
                                wtaa = m['WildTypeAA']
                            # Hack for ub_OTU_yeast
                            elif prediction_PDB_ID == 'uby_OTU' and m['Chain'] == fullresid[0] and m['ResidueID'] == str(int(fullresid[1:].strip()) - 172):
                                wtaa = m['WildTypeAA']
                            # Hack for ub_UQcon
                            elif prediction_PDB_ID == 'ub_UQcon' and m['Chain'] == fullresid[0] and m['ResidueID'] == str(int(fullresid[1:].strip()) + 213): # starts at 501
                                wtaa = m['WildTypeAA']
                            # Hack for uby_UQcon
                            elif prediction_PDB_ID == 'uby_UQcon' and m['Chain'] == fullresid[0] and m['ResidueID'] == str(int(fullresid[1:].strip()) - 287):
                                wtaa = m['WildTypeAA']
                            elif m['Chain'] == fullresid[0] and m['ResidueID'] == fullresid[1:].strip():
                                wtaa = m['WildTypeAA']
                        if (wtaa == None):
                            colortext.error(prediction_PDB_ID)
                            colortext.error('wtaa == None')
                            colortext.error('fullresid = %s' % str(fullresid))
                            colortext.error(str(mutations))
                            colortext.warning([rosetta_resid.strip() for rosetta_resid in rosetta_resids])
                            #sys.exit(0)
                        assert(wtaa != None)
                        assert(PDB.from_filepath(repacked_files[0]).get_residue_id_to_type_map()[fullresid] == wtaa)
                    #assert(PDB(mutant_files[0]).get_residue_id_to_type_map()[fullresid] == mutantaa)

                for radius in radii:
                    score_name = ('noah_%0.1fA' % radius).replace(".", ",")

                    if ddG_dict['data'].get(score_name):
                        print('Radius %0.1f: done.' % radius)
                        continue
                    cases_computed += 1
                    number_of_cases_left -= 1

                    t.add('Radius %0.3f: repacked' % radius)
                    colortext.printf("Prediction ID: %d. Calculating radius %0.1f. Calculation #%d of %d." % (r['ID'], radius, cases_computed, len(results) * len(radii)), 'orange')

                    repacked_score = NoahScore()
                    repacked_score.calculate(repacked_files, rosetta_chain, sorted([rosetta_resid.strip() for rosetta_resid in rosetta_resids]), radius = radius)
                    colortext.message("Repacked")
                    print(repacked_score)

                    t.add('Radius %0.3f: mutant' % radius)
                    mutant_score = NoahScore()
                    mutant_score.calculate(mutant_files, rosetta_chain, sorted([rosetta_resid.strip() for rosetta_resid in rosetta_resids]), radius = radius)
                    colortext.printf("Mutant", color = 'cyan')
                    print(mutant_score)

                    t.add('Radius %0.3f: postamble' % radius)
                    colortext.printf("ddG", color = 'lightpurple')
                    ddg_score = repacked_score.ddg(mutant_score)
                    print(ddg_score)

                    colortext.printf("Liz's ddG", color = 'yellow')
                    print("Total score: %0.3f" % kellogg_ddG)

                    ddG_dict['version'] = '0.23'
                    if ddG_dict['version'] == '0.1':
                        ddG_dict['version'] = '0.21'
                        ddG_dict['data'] = {
                            'kellogg' : {
                                'total' : ddG_dict['data'],
                            },
                            'noah': {
                                'total' : {'ddG' : ddg_score.total},
                                'positional' : {'ddG' : ddg_score.positional},
                                'positional_twoscore' : {'ddG' : ddg_score.positional_twoscore},
                            },
                        }
                    elif ddG_dict['version'] == '0.2':
                        ddG_dict['version'] = '0.21'
                        ddG_dict['data']['noah']['total']['ddG'] = ddg_score.total
                        ddG_dict['data']['noah']['positional']['ddG'] = ddg_score.positional
                        ddG_dict['data']['noah']['positional_twoscore']['ddG'] = ddg_score.positional_twoscore
                    elif ddG_dict['version'] == '0.22':
                        ddG_dict['data'][score_name] = {'total' : {}, 'positional' : {}, 'positional_twoscore' : {}}
                        ddG_dict['data'][score_name]['total']['ddG'] = ddg_score.total
                        ddG_dict['data'][score_name]['positional']['ddG'] = ddg_score.positional
                        ddG_dict['data'][score_name]['positional_twoscore']['ddG'] = ddg_score.positional_twoscore
                    elif ddG_dict['version'] == '0.23':
                        ddG_dict['data'][score_name] = {'total' : {}, 'positional' : {}, 'positional_twoscore' : {}}
                        ddG_dict['data'][score_name]['total']['ddG'] = ddg_score.total
                        ddG_dict['data'][score_name]['positional']['ddG'] = ddg_score.positional
                        ddG_dict['data'][score_name]['positional_twoscore']['ddG'] = ddg_score.positional_twoscore

                    jsonified_ddG = json.dumps(ddG_dict)
                    ddGdb.execute('UPDATE Prediction SET Scores=%s WHERE ID=%s', parameters=(jsonified_ddG, r['ID'],))
                t.add('Cleanup')
                shutil.rmtree(tmpdir)
                os.remove(zipfilename)

            except Exception, e:
                print("Exception! In prediction %d" % r['ID'], str(e))
                failed_cases.append(r['ID'])
                import traceback
                print(traceback.format_exc())
                if tmpdir:
                    shutil.rmtree(tmpdir)

            total_time_in_secs += t.sum()
            average_time_taken = float(total_time_in_secs)/float(cases_computed or 1)
            estimate_remaining_time = number_of_cases_left * average_time_taken

            t.stop()
            colortext.printf("**Profile**", 'orange')
            print(t)
            colortext.message("Time taken for this case: %0.2fs." % t.sum())
            colortext.message("Average time taken per case: %0.2fs." % average_time_taken)
            colortext.message("Estimated time remaining: %dh%dm%ds." % (int(estimate_remaining_time/3600), int((estimate_remaining_time/60) % 60), estimate_remaining_time % 60))
            print("\n")
コード例 #33
0
ファイル: test.py プロジェクト: Kortemme-Lab/klab
        try:
            print('Case %d/%d: %s' % (count, num_cases, pdb_id))
            sifts_map = SIFTS.retrieve(pdb_id, cache_dir = cache_dir, acceptable_sequence_percentage_match = 80.0)
        except MissingSIFTSRecord:
            colortext.warning('No SIFTS XML exists for %s.' % pdb_id)
        except BadSIFTSMapping:
            colortext.warning('The SIFTS mapping for %s was considered a bad mapping at the time of writing.' % pdb_id)
        except NoSIFTSPDBUniParcMapping:
            colortext.warning('The SIFTS file for %s does not map to UniParc sequences at the time of writing.' % pdb_id)
        except Exception, e:
            colortext.warning(str(e))
            colortext.error(traceback.format_exc())
            failures.append(pdb_id)
        count += 1
    if failures:
        colortext.error('Failures: %d/%d' % (len(failures), num_cases))
        for f in failures:
            colortext.warning(f)

def test_pdbml_speed():

    test_cases = [
        '1WSY',
        '1YGV',
        '487D',
        '1HIO',
        '1H38',
        '3ZKB',
    ]
    for test_case in test_cases:
        print("\n")