Python read_gtf_as_dict Examples

Programming Language: Python

Namespace/Package Name: gtfparse

Method/Function: read_gtf_as_dict

Examples at hotexamples.com: 6

Python read_gtf_as_dict - 6 examples found. These are the top rated real world Python examples of gtfparse.read_gtf_as_dict extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: gtf.py Project: xflicsu/topiary

def load_transcript_fpkm_dict_from_gtf(
        gtf_path,
        transcript_id_column_name="reference_id",
        fpkm_column_name="FPKM",
        feature_column_name="feature"):
    """
    Load a GTF file generated by StringTie which contains transcript-level
    quantification of abundance. Returns a dictionary mapping Ensembl
    IDs of transcripts to FPKM values.
    """
    columns = gtfparse.read_gtf_as_dict(
        gtf_path, column_converters={fpkm_column_name: float})
    transcript_ids = _get_gtf_column(transcript_id_column_name, gtf_path,
                                     columns)
    fpkm_values = _get_gtf_column(fpkm_column_name, gtf_path, columns)
    features = _get_gtf_column(feature_column_name, gtf_path, columns)
    logging.info("Loaded %d rows from %s" % (len(transcript_ids), gtf_path))
    logging.info("Found %s transcript entries" % sum(feature == "transcript"
                                                     for feature in features))
    result = {
        transcript_id: float(fpkm)
        for (transcript_id, fpkm,
             feature) in zip(transcript_ids, fpkm_values, features)
        if transcript_id is not None and len(transcript_id) > 0
        and feature == "transcript"
    }
    logging.info("Keeping %d transcript rows with reference IDs" %
                 (len(result), ))
    return result

Example #2

Show file

File: gtf.py Project: Al3n70rn/topiary

def load_transcript_fpkm_dict_from_gtf(
        gtf_path,
        transcript_id_column_name="reference_id",
        fpkm_column_name="FPKM",
        feature_column_name="feature"):
    """
    Load a GTF file generated by StringTie which contains transcript-level
    quantification of abundance. Returns a dictionary mapping Ensembl
    IDs of transcripts to FPKM values.
    """
    columns = gtfparse.read_gtf_as_dict(
        gtf_path,
        column_converters={fpkm_column_name: float})
    transcript_ids = _get_gtf_column(transcript_id_column_name, gtf_path, columns)
    fpkm_values = _get_gtf_column(fpkm_column_name, gtf_path, columns)
    features = _get_gtf_column(feature_column_name, gtf_path, columns)
    logging.info("Loaded %d rows from %s" % (len(transcript_ids), gtf_path))
    logging.info("Found %s transcript entries" % sum(
        feature == "transcript" for feature in features))
    result = {
        transcript_id: float(fpkm)
        for (transcript_id, fpkm, feature)
        in zip(transcript_ids, fpkm_values, features)
        if transcript_id is not None
        and len(transcript_id) > 0
        and feature == "transcript"
    }
    logging.info("Keeping %d transcript rows with reference IDs" % (
        len(result),))
    return result

Example #3

Show file

File: test_refseq_gtf.py Project: hammerlab/gtfparse

def test_read_refseq_gtf_as_dict():
    gtf_dict = read_gtf_as_dict(REFSEQ_GTF_PATH)
    _check_required_columns(gtf_dict)

Example #4

Show file

File: test_read_stringtie_gtf.py Project: hammerlab/gtfparse

def test_read_string_gtf_as_dict_float_values():
    gtf_dict = read_gtf_as_dict(
        B16_GTF_PATH,
        column_converters={"cov": float, "FPKM": float})
    _check_required_columns(gtf_dict)
    _check_float_cov_and_FPKM(gtf_dict)

Example #5

Show file

File: test_read_stringtie_gtf.py Project: hammerlab/gtfparse

def test_read_string_gtf_as_dict():
    gtf_dict = read_gtf_as_dict(B16_GTF_PATH)
    _check_required_columns(gtf_dict)
    _check_string_cov_and_FPKM(gtf_dict)

Example #6

Show file

File: test_refseq_gtf.py Project: noahpieta/gtfparse

def test_read_refseq_gtf_as_dict():
    gtf_dict = read_gtf_as_dict(REFSEQ_GTF_PATH)
    _check_required_columns(gtf_dict)