Example #1
0
def create_logger(config_file='logging_config.yaml'):
    """Setup logging configuration.

    Examples:
        write_log = logging.getLogger('pdb_app_logger')
        write_log.warning('A warning message in the application log.')

        msg = logging.getLogger('pdb_console_handler_logger')
        msg.info("Writing an information msg to the screen.")

        uni_log = logging.getLogger('uni_error_logger')
        uni_log.error("A UniProt download failed.")

        general_log = logging.getLogger('root')
        general_log.debug("Writing a message to the app log and the screen.")

    Args:
        config_file (Unicode): The logging configuration file.

    Returns:
        None

    """
    config_fp = _build_config_path(config_file)
    config = read_yaml(config_fp)
    handler_names = ['pdb_app_handler', 'uni_err_handler']
    for handler in handler_names:
        handler_log_name = config['handlers'][handler]['filename']
        handler_log_path = _create_logfile_path(handler_log_name)
        config['handlers'][handler]['filename'] = handler_log_path
    logging.config.dictConfig(config)
    return None
Example #2
0
 def test_force_fetch_obsolete_pass(self):
     """fetch_obsolete()"""
     obs_fp = os.path.join(self.temp_dir, 'obs.yaml')
     with open(obs_fp, 'w', encoding='utf-8') as obs_fh:
         obs_fh.write("\n")
     fetch_obsolete(obs_fp, force_download=True)
     expected = {'1E6T', '1E7T', '1E7X'}
     result = read_yaml(obs_fp)
     self.assertLess(expected, set(result))
Example #3
0
 def test_fetch_obsolete_from_servers(self):
     expected = {
         "2TNA", "2TNC", "2UCE", "2UV9", "2UVA", "2UVB", "2UVC",
         "2UWG", "2UWY", "2UWZ", "2V0Q", "2V2Y", "2V44", "2V46"
     }
     obs_fp = os.path.join(self.temp_dir, 'obs.yaml')
     fetch_obsolete(obs_fp)
     self.assertTrue(expected < set(read_yaml(obs_fp)))
     return None
Example #4
0
 def test_fetch_xray_pass(self):
     """fetch_xray()"""
     xray_fp = os.path.join(self.temp_dir, 'xray.yaml')
     fetch_xray(xray_fp)
     self.assertTrue(os.path.isfile(xray_fp))
     expected = [
         '101M', '102L', '102M', '103L', '103M', '104L', '104M',
         '105M', '106M', '107L', '107M', '108L', '108M',
         '109L', '109M', '10GS', '10MH'
     ]
     result = read_yaml(xray_fp)
     self.assertTrue(set(expected) < set(result))
     return None
Example #5
0
def initial_filtering(dirs):
    """Creates a dataframe from pdb_chain_uniprot.tsv.

    Perform initial filtering with pdb_chain_uniprot.tsv
    and supplementary files.

    Supplementary file processing steps:
        1. Removes the PDB_BEG, PDB_END columns.
        2. Converts all PDB IDs to upper case.
        3. Removes any rows where the PDB ID isn't in the xray list.
        4. Removes any rows where the PDB ID is in the obs list.
        5. Removes any rows where the RES_BEG or SP_BEG are < 1.
        6. Removes any rows where the length of the intervals doesn't match.
        7. Removes any rows where the length of the interval is <= 3.
        8. Removes any rows for pdb_chains not in ss_dis.
        9. Removes uniIDs with < 2 pdb chains.
        10. Adds a column called 'PDB_SEQ' that has the section of the PDB
            chain corresponding to the interval in RES_BEG:RES_END.

    Args:
        dirs (ProjectFolders): A named tuple of directory paths.

    Returns:
        None

    """
    # Return used_local for unittest because of problems capturing stdout
    # with logging instance.
    used_local = False
    pdb_seq_fp = os.path.join(dirs.working, 'pdb_seq.tsv')
    msg = getLogger('root')

    if not os.path.exists(pdb_seq_fp):
        obs_fp = os.path.join(dirs.working, 'obs.yaml')
        xray_fp = os.path.join(dirs.working, 'xray.yaml')
        chain_fp = os.path.join(dirs.tsv_data, 'pdb_chain_uniprot.tsv')

        msg.info('START: Initial filtering.')

        msg.debug("START: Fetch ss_dis.tsv.")
        ss_dis = fetch_ss_dis(dirs.working)
        msg.debug("COMPLETE: Fetch ss_dis.tsv.")

        msg.debug("START: Read obs.yaml.")
        obs = read_yaml(obs_fp)
        msg.debug("COMPLETE: Read obs.yaml.")

        msg.debug("START: Read xray.yaml.")
        xray = read_yaml(xray_fp)
        msg.debug("COMPLETE: Read xray.yaml.")

        msg.debug("START: Create initial DataFrame.")
        df = pd.read_csv(
            chain_fp,
            sep='\t',
            header=1,
            encoding='utf-8',
            keep_default_na=False,
            na_values=['NULL', 'N/A'])
        msg.debug("COMPLETE: Create initial DataFrame.")
        msg.debug("Initial DataFrame has {} rows.".format(len(df.index)))

        msg.debug("START: Remove rows where "
                  "the PDB ID is not in the xray list.")
        df = filter_pdb_chain_uniprot(df, obs, xray)
        msg.debug("COMPLETE: Remove rows where "
                  "the PDB ID is not in the xray list.")
        msg.debug("DataFrame now has {} rows.".format(len(df.index)))

        msg.debug("START: Remove entries not in ss_dis "
                  "and add the PDB peptide.")
        df = add_pdbseq_to_df(df, ss_dis)
        msg.debug("COMPLETE: Remove entries not in ss_dis "
                  "and add the PDB peptide.")
        msg.debug("DataFrame now has {} rows.".format(len(df.index)))

        msg.debug("START: Remove UniProt IDs with < 2 pdb chains.")
        df = filter_single(df)
        msg.debug("COMPLETE: Remove UniProt IDs with < 2 pdb chains.")
        msg.debug("DataFrame now has {} rows.".format(len(df.index)))

        msg.debug("START: Writing DataFrame to TSV file.")
        delimiter = create_delimiter('\t')
        df.to_csv(pdb_seq_fp, sep=delimiter, encoding='utf-8')
        msg.debug("COMPLETE: Writing DataFrame to TSV file.")
        msg.info(
            "Wrote {} to:\n\t{}".format(basename(pdb_seq_fp), pdb_seq_fp)
        )
        msg.info('COMPLETE: Initial filtering.')

    else:
        used_local = True
        msg.info(
            "Found and using local {filename}: \n"
            "\t{filepath}".format(
                filename=basename(pdb_seq_fp),
                filepath=pdb_seq_fp
            )
        )
        msg.info('COMPLETE: Initial filtering.')

    return used_local