コード例 #1
0
ファイル: rna_grep.py プロジェクト: hammerlab/immuno_research
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
import pandas as pd
import numpy as np
import entrez
import rnaseq_atlas
from load_rna_data import load_rsem


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--filename",
        "-f",
        required=True,
        help="""File with two tab separated columns: Entrez gene IDs and RSEM expression values""")
    parser.add_argument(
        "pattern",
        help="Gene name, part of a gene name, or regular expression ")

    args = parser.parse_args()
    df = load_rsem(args.filename, translate_entrez_ids = True)
    mask = df.Hugo.str.contains(args.pattern)
    subset = df[mask]
    if len(subset) == 0:
        print "Pattern %s not found" % args.pattern
    else:
        print subset
コード例 #2
0
ファイル: generank.py プロジェクト: hammerlab/immuno_research
        help="Which column to select from the reference")
    parser.add_argument(
        "--percentile",
        help="Which rank percentile do we consider 'low'?",
        type = float,
        default = 0.25)
    parser.add_argument(
        "--compare-rsem",
        default = False,
        action = "store_true",
        help = "Compare RSEM values directly instead of rank (default: False)")

    args = parser.parse_args()

    if not args.sample_uses_hugo_ids:
        hugo_df = load_rsem(args.sample, translate_entrez_ids = True)
    else:
        hugo_df = load_rsem(args.sample, translate_entrez_ids = False)
    group_rank_method = 'average'

    if args.normal is None:
        if args.compare_rsem:
            normal_df = rnaseq_atlas.hugo_to_rpkm()
        else:
            normal_df = rnaseq_atlas.hugo_to_rank(group_rank_method)
        tissue_cols = rnaseq_atlas.TISSUE_COLUMNS
    else:
        normal_df = pd.read_csv(args.normal)
        normal_df = hugo_mapping.merge(normal_df, on = "Entrez")
        normal_df = normal_df.drop("Entrez", axis=1)
        tissue_cols = normal_df.columns[1:]