# get the set of non-redundant PDB chains from the NRPDB file, and use only # these chains for training logger.debug('Getting non-redundant set of PDB chains...') nrpdbs = nrpdbs_from_file(NRPDB_FILENAME, representative_field) logger.debug('There are ' + str(len(nrpdbs)) + ' non-redundant chains.') # then, filter the query_pdbids and the trained_pdbids to be only reps # then subtract out the trained_pdbids # get all the PDB IDs on which to test (don't test already trained superfamily) logger.debug('Getting records to test from SCOP Classification file...') original_query_pdbids = all_pdbids_from_file(SCOP_CLASSIFICATION_FILE) query_pdbids = set([nrpdbs[x] for x in original_query_pdbids if x in nrpdbs]) logger.debug(' total number of chains: ' + str(len(query_pdbids))) original_trained_pdbids = all_pdbids_from_file_in(SCOP_CLASSIFICATION_FILE, target_level, target_sunid) trained_pdbids = set([nrpdbs[x] for x in original_trained_pdbids if x in nrpdbs]) used_base_pdbids = set([x.split(':')[0] for x in trained_pdbids]) logger.debug(' number of trained chains: ' + str(len(trained_pdbids))) query_pdbids -= trained_pdbids logger.debug(' number of query chains: ' + str(len(query_pdbids))) # create the whitelist of PDB chains on which to train explicitly #logger.debug('Getting the whitelist of chains to test...') #whitelist = whitelist_from_file(WHITELIST_FILENAME)
# summary of the program configuration config = {'output_dir' : output_dir, 'pdb_dir' : pdb_dir, 'representative_field' : representative_field, 'target_level' : target_level, 'target_sunid' : target_sunid} logger.debug('Program configuration: ' + str(config)) # get the set of non-redundant PDB chains from the NRPDB file, and use only # these chains for training nrpdbs = nrpdbs_from_file(NRPDB_FILENAME, representative_field) # get all the records from the SCOP classification file logger.debug('Getting PDB IDs from SCOP Classification file...') all_pdbs = all_pdbids_from_file_in(SCOP_CLASSIFICATION_FILE, target_level, target_sunid) logger.debug('all_pdbs: ' + str(all_pdbs)) if len(all_pdbs) == 0: logger.critical('Nothing in all_pdbs for target level: ' + str(target_level) + ' and target_sunid: ' + str(target_sunid)) sys.exit(1) # create the whitelist of PDB chains on which to train explicitly #logger.debug('Getting the whitelist of chains to test...') #whitelist = whitelist_from_file(WHITELIST_FILENAME) # create the output directory if it doesn't exist logger.debug('Checking whether output directory exists...') if not os.path.isdir(output_dir): logger.debug('...it doesn\'t so we create it') os.mkdir(output_dir)