Python create_delimiterの例

プログラミング言語: Python

名前空間/パッケージ名: pdb.lib.create_delimiter

メソッド/関数: create_delimiter

hotexamples.comのコード掲載数: 4

Python create_delimiter - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのpdb.lib.create_delimiter.create_delimiterの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: fetch_uniprot.py プロジェクト: shellydeforte/PDB

 def _write_new_dataframe(self):
     delimiter = create_delimiter('\t')
     self.df.to_csv(self.pdb_seq_fp, sep=delimiter, encoding='utf-8')
     return None

コード例 #2

ファイルを表示

ファイル: uni_composite.py プロジェクト: shellydeforte/PDB

def uniprot_composite(dirs):
    """Creates final UniProt DataFrame.

    Create final UniProt DataFrame where the
    UniProt ID provides a unique key.

    Args:
        dirs (ProjectFolders): A named tuple of directory paths.

    """
    pdb_initial_composite_fp = os.path.join(dirs.tsv_data, "pdb_initial_composite_df.tsv")
    assert os.path.isfile(pdb_initial_composite_fp)

    uni_folder_path = dirs.uni_data
    file_names = _create_composite_file_names()
    paths = _create_composite_file_paths(uni_folder_path, file_names)

    uni_composite_tsv = paths["tsv_file"]
    uni_composite_yaml = paths["yaml_file"]
    uni_composite_json = paths["json_file"]

    if _uni_composite_file_exists(uni_folder_path):
        print(
            "A final uni_composite file already exists. Composite "
            "function complete. (Note: remove existing uni_composite "
            'files in the "{}" directory to have them '
            "regenerated.".format(uni_folder_path)
        )
        return None

    pdb_df = pd.read_csv(
        pdb_initial_composite_fp, sep="\t", header=0, encoding="utf-8", keep_default_na=False, na_values=["NULL", "N/A"]
    )

    print("Creating the UniProt composite structure.")
    uni_df = create_uni_struct(pdb_df)
    print("Done creating UniProt composite structure.")

    print("Validating UniProt composite structure.")
    uni_pdb_validation(uni_df, pdb_df)
    print("Validation complete.")

    print("Assigning missing region designations.")
    uni_df = create_intervals(pdb_df, uni_df)
    print("Done assigning missing regions.")

    assert isinstance(uni_df, pd.DataFrame)
    delimiter = create_delimiter("\t")
    uni_df.to_csv(uni_composite_tsv, sep=delimiter, encoding="utf-8")
    uni_df.to_json(uni_composite_json, force_ascii=False)

    json_data = read_json(uni_composite_json)
    write_yaml(json_data, uni_composite_yaml)

    print("Done writing UniProt composite files:")
    print("\t{}".format(uni_composite_tsv))
    print("\t{}".format(uni_composite_yaml))
    print("\t{}".format(uni_composite_json))
    print("This is the final UniProt ID DataFrame.")

    return None

コード例 #3

ファイルを表示

ファイル: filtering_step_three.py プロジェクト: shellydeforte/PDB

def final_filtering(dirs):
    """Create PDB composite.

    Args:
        dirs (ProjectFolders): A named tuple of directory paths.

    Returns:
        None
    """
    pdb_initial_composite_fp = os.path.join(
        dirs.tsv_data,
        'pdb_initial_composite_df.tsv'
    )

    uni_filtered_path = os.path.join(
        dirs.working,
        'pdb_seq_uni_filtered.tsv'
    )
    if not os.path.exists(pdb_initial_composite_fp):
        df = pd.read_csv(
            uni_filtered_path,
            sep='\t',
            index_col=0,
            keep_default_na=False,
            na_values=['NULL', 'N/A']
        )
        ss_dis = fetch_ss_dis(dirs.working)
        print("Creating PDB composite.")
        df = create_pdb_composite(df, ss_dis, dirs.uni_data)
        print("\nPDB composite finished.")

        print(
            "Removing UniProt entries with < 2 PDB "
            "chains. Starting with {0} rows".format(len(df.index))
        )
        df = filter_single(df)
        print(
            "Entries removed. There are now {0} rows".format(len(df.index))
        )

        print("Writing final PDB chain DataFrame.")
        delimiter = create_delimiter('\t')
        df.to_csv(pdb_initial_composite_fp, sep=delimiter, encoding='utf-8')
        print(
            "Finished writing {}:\n"
            "\t{}\n"
            "This is the final PDB_CHAIN DataFrame.\n"
            "Note that only pdb_chain_uniprot.tsv provides a "
            "unique key".format(
                basename(pdb_initial_composite_fp),
                pdb_initial_composite_fp,
            )
        )
    else:
        print(
            "Found {}. Using local file:\n"
            "\t{}".format(
                basename(pdb_initial_composite_fp),
                pdb_initial_composite_fp
            )
        )
    print("")
    return None

コード例 #4

ファイルを表示

ファイル: filtering_step_one.py プロジェクト: shellydeforte/PDB

def initial_filtering(dirs):
    """Creates a dataframe from pdb_chain_uniprot.tsv.

    Perform initial filtering with pdb_chain_uniprot.tsv
    and supplementary files.

    Supplementary file processing steps:
        1. Removes the PDB_BEG, PDB_END columns.
        2. Converts all PDB IDs to upper case.
        3. Removes any rows where the PDB ID isn't in the xray list.
        4. Removes any rows where the PDB ID is in the obs list.
        5. Removes any rows where the RES_BEG or SP_BEG are < 1.
        6. Removes any rows where the length of the intervals doesn't match.
        7. Removes any rows where the length of the interval is <= 3.
        8. Removes any rows for pdb_chains not in ss_dis.
        9. Removes uniIDs with < 2 pdb chains.
        10. Adds a column called 'PDB_SEQ' that has the section of the PDB
            chain corresponding to the interval in RES_BEG:RES_END.

    Args:
        dirs (ProjectFolders): A named tuple of directory paths.

    Returns:
        None

    """
    # Return used_local for unittest because of problems capturing stdout
    # with logging instance.
    used_local = False
    pdb_seq_fp = os.path.join(dirs.working, 'pdb_seq.tsv')
    msg = getLogger('root')

    if not os.path.exists(pdb_seq_fp):
        obs_fp = os.path.join(dirs.working, 'obs.yaml')
        xray_fp = os.path.join(dirs.working, 'xray.yaml')
        chain_fp = os.path.join(dirs.tsv_data, 'pdb_chain_uniprot.tsv')

        msg.info('START: Initial filtering.')

        msg.debug("START: Fetch ss_dis.tsv.")
        ss_dis = fetch_ss_dis(dirs.working)
        msg.debug("COMPLETE: Fetch ss_dis.tsv.")

        msg.debug("START: Read obs.yaml.")
        obs = read_yaml(obs_fp)
        msg.debug("COMPLETE: Read obs.yaml.")

        msg.debug("START: Read xray.yaml.")
        xray = read_yaml(xray_fp)
        msg.debug("COMPLETE: Read xray.yaml.")

        msg.debug("START: Create initial DataFrame.")
        df = pd.read_csv(
            chain_fp,
            sep='\t',
            header=1,
            encoding='utf-8',
            keep_default_na=False,
            na_values=['NULL', 'N/A'])
        msg.debug("COMPLETE: Create initial DataFrame.")
        msg.debug("Initial DataFrame has {} rows.".format(len(df.index)))

        msg.debug("START: Remove rows where "
                  "the PDB ID is not in the xray list.")
        df = filter_pdb_chain_uniprot(df, obs, xray)
        msg.debug("COMPLETE: Remove rows where "
                  "the PDB ID is not in the xray list.")
        msg.debug("DataFrame now has {} rows.".format(len(df.index)))

        msg.debug("START: Remove entries not in ss_dis "
                  "and add the PDB peptide.")
        df = add_pdbseq_to_df(df, ss_dis)
        msg.debug("COMPLETE: Remove entries not in ss_dis "
                  "and add the PDB peptide.")
        msg.debug("DataFrame now has {} rows.".format(len(df.index)))

        msg.debug("START: Remove UniProt IDs with < 2 pdb chains.")
        df = filter_single(df)
        msg.debug("COMPLETE: Remove UniProt IDs with < 2 pdb chains.")
        msg.debug("DataFrame now has {} rows.".format(len(df.index)))

        msg.debug("START: Writing DataFrame to TSV file.")
        delimiter = create_delimiter('\t')
        df.to_csv(pdb_seq_fp, sep=delimiter, encoding='utf-8')
        msg.debug("COMPLETE: Writing DataFrame to TSV file.")
        msg.info(
            "Wrote {} to:\n\t{}".format(basename(pdb_seq_fp), pdb_seq_fp)
        )
        msg.info('COMPLETE: Initial filtering.')

    else:
        used_local = True
        msg.info(
            "Found and using local {filename}: \n"
            "\t{filepath}".format(
                filename=basename(pdb_seq_fp),
                filepath=pdb_seq_fp
            )
        )
        msg.info('COMPLETE: Initial filtering.')

    return used_local