def main(): args = parse_args() classifiers = [AbutsUnknownBases, StartOutOfFrame, BadFrame, BeginStart, EndStop, CdsGap, UtrGap, UnknownGap, CdsNonCanonSplice, CdsUnknownSplice, UtrNonCanonSplice, UtrUnknownSplice, InFrameStop, ShortCds, UnknownBases] classify_dicts = {} details_dicts = {} fn_args = {"transcript_dict": get_transcript_dict(args.refGp), "seq_dict": seq_lib.get_sequence_dict(args.refFasta)} for fn in classifiers: classify_dicts[fn.__name__], details_dicts[fn.__name__] = fn(**fn_args) for data_dict, database_path in itertools.izip(*[[classify_dicts, details_dicts], ["classify.db", "details.db"]]): sql_lib.write_dict(data_dict, database_path, args.refGenome)
import itertools gp = "/hive/groups/recon/projs/gorilla_eichler/pipeline_data/comparative/susie_3_2/transMap/2015-10-06/transMap/gorilla/transMapGencodeBasicV23.gp" ref_gp = "/hive/groups/recon/projs/gorilla_eichler/pipeline_data/comparative/susie_3_2/transMap/2015-10-06/data/wgEncodeGencodeBasicV23.gp" aug_gp = "/hive/groups/recon/projs/gorilla_eichler/pipeline_data/comparative/susie_3_2/augustus/tmr/gorilla.output.gp" aln_psl = "/hive/groups/recon/projs/gorilla_eichler/pipeline_data/comparative/susie_3_2/transMap/2015-10-06/transMap/gorilla/transMapGencodeBasicV23.psl" ref_psl = "/hive/groups/recon/projs/gorilla_eichler/pipeline_data/comparative/susie_3_2/transMap/2015-10-06/data/wgEncodeGencodeBasicV23.psl" ref_fasta = "/hive/groups/recon/projs/gorilla_eichler/pipeline_data/assemblies/susie_3_2/human.fa" target_fasta = "/hive/groups/recon/projs/gorilla_eichler/pipeline_data/assemblies/susie_3_2/gorilla.fa" tx_dict = seq_lib.get_transcript_dict(gp) ref_dict = seq_lib.get_transcript_dict(ref_gp) aug_dict = seq_lib.get_transcript_dict(aug_gp) aln_dict = psl_lib.get_alignment_dict(aln_psl) ref_aln_dict = psl_lib.get_alignment_dict(ref_psl) seq_dict = seq_lib.get_sequence_dict(target_fasta) ref_seq_dict = seq_lib.get_sequence_dict(ref_fasta) con, cur = sql_lib.attach_databases("/hive/groups/recon/projs/gorilla_eichler/pipeline_data/comparative/susie_3_2/comparativeAnnotation/2015-10-12/GencodeBasicV23", mode="augustus") genome = 'gorilla' ref_genome = 'human' biotype = 'protein_coding' filter_chroms = ["Y", "chrY"] stats = merge_stats(cur, 'gorilla') highest_cov_dict = sql_lib.highest_cov_aln(cur, "gorilla") highest_cov_ids = set(zip(*highest_cov_dict.itervalues())[0]) biotype_ids = sql_lib.get_biotype_aln_ids(cur, 'gorilla', 'protein_coding') highest_cov_ids &= biotype_ids best_stats = {x: y for x, y in stats.iteritems() if psl_lib.remove_augustus_alignment_number(x) in highest_cov_ids}
def get_fasta(self): self.ref_seq_dict = seq_lib.get_sequence_dict(self.ref_fasta)
def getSeqDict(self): self.seqDict = seq_lib.get_sequence_dict(self.fasta)
def getRefDict(self): self.refDict = seq_lib.get_sequence_dict(self.refFasta)