odir_opt = options.odir filter_noncoding = options.filter_noncoding transdecoder = options.transdecoder hmmer = options.hmmer blastp = options.blastp report = options.report gene_list = options.gene_list ref_organism = options.ref_organism # set prefixes if filter_noncoding: prefix += '_filtered' if gene_list: dir_prefix = prefix + '_gene_list' # get parent output file that everything's gonna go into if not odir_opt: odir = pp_utils().make_dated_folder(os.path.dirname(gtffile), dir_prefix) else: odir = odir_opt # set reference files based on what organism we're using # reference files that we'll keep hard coded for now if ref_organism == 'human': fastafile = '/data/users/freese/mortazavi_lab/ref/hg38/hg38.fa' p_ref = '/data/users/freese/mortazavi_lab/ref/gencode.v24/gencode.v24.pc_translations.fasta' elif ref_organism == 'mouse': fastafile = '/data/users/freese/mortazavi_lab/ref/mm10/mm10.fa' p_ref = '/data/users/freese/mortazavi_lab/ref/gencode.vM21/gencode.vM21.pc_translations.fasta' # filter out known non-coding transcripts from talon gtf if filter_noncoding: gtffile = pp_utils().filter_coding_novel_gtf(gtffile, odir, prefix)
from pp_utils import pp_utils t_tsv = pp_utils().reformat_transdecoder( '/Users/fairliereese/mortazavi_lab/data/190313_HepG2/190502_HepG2_filtered_protein_pred/transdecoder/HepG2_filtered.fasta.transdecoder.pep', 'HepG2_filtered') print(t_tsv)
from pp_utils import pp_utils t_tsv = pp_utils().reformat_hmmer( '/Users/fairliereese/mortazavi_lab/data/190313_HepG2/190502_HepG2_filtered_protein_pred/hmmer/HepG2_filtered.tab', 'HepG2_filtered') print(t_tsv)
from pp_utils import pp_utils tid_gid_map = '/Users/fairliereese/mortazavi_lab/data/190313_HepG2/HepG2_filtered_talon_coding_novel_tid_gid_map.tsv' odir = '/Users/fairliereese/mortazavi_lab/data/190313_HepG2/190502_HepG2_filtered_protein_pred/' prefix = 'HepG2_filtered' gene_names = pp_utils().get_gene_names(tid_gid_map, odir, prefix) print(gene_names)
from pp_utils import pp_utils gene_names = '/data/users/freese/mortazavi_lab/data/190313_HepG2/190502_HepG2_filtered_protein_pred/blastp/HepG2_filtered_gene_IDS_mini.txt' p_ref = '/data/users/freese/mortazavi_lab/ref/gencode.v24/gencode.v24.pc_translations.fasta' odir = '/data/users/freese/mortazavi_lab/data/190313_HepG2/190502_HepG2_filtered_protein_pred/' prefix = 'HepG2_filtered' pepfile = '/data/users/freese/mortazavi_lab/data/190313_HepG2/190502_HepG2_filtered_protein_pred/transdecoder/HepG2_filtered.fasta.transdecoder.pep' b_tbl = pp_utils().run_blastp(gene_names, p_ref, odir, prefix, pepfile) print(b_tbl)
from pp_utils import pp_utils blastfile = '/Users/fairliereese/mortazavi_lab/data/190313_HepG2/190502_HepG2_filtered_protein_pred/blastp/HepG2_filtered_blast_results.tab' prefix = 'HepG2_filtered' b_tsv = pp_utils().reformat_blastp(blastfile, prefix) print(b_tsv)