def setTranscriptsAnnotByOverlap(queries, transcripts): """ Annotate each query by the information coming from the transcripts overlapping them. :param region: Regions to annotate. :type region: anacore.region.Region :param transcripts: The list of transcripts where overlapped transcripts will be searched. :type transcripts: anacore.region.RegionList """ transcripts_by_chr = splittedByRef(transcripts) queries_by_chr = splittedByRef(queries) for chrom, curr_query, overlapped_subjects in iterOverlappedByRegion(queries_by_chr, transcripts_by_chr): curr_query.annot["ANN"] = getTranscriptsAnnot(curr_query, overlapped_subjects)
def setVariantsByOverlap(queries, variants): """ Annotate each query by the list of variants overlapping them. :param queries: Regions to annotate. :type queries: anacore.region.Region :param variants: The list of variants where overlapped variants will be searched. :type variants: anacore.region.RegionList """ variants_by_chr = splittedByRef(variants) queries_by_chr = splittedByRef(queries) for chrom, curr_query, overlapped_subjects in iterOverlappedByRegion(queries_by_chr, variants_by_chr): curr_query.annot["VAR"] = [] for sbjct in overlapped_subjects: curr_query.annot["VAR"].append(sbjct)
def testSplittedByRef(self): reg_list = RegionList([ Region(10, 30, "-", "chr1", "region1"), Region(40, 70, "-", "chr1", "region2"), Region(80, 100, "-", "chr2", "region3") ]) reg_by_chr = splittedByRef(reg_list) expected = ["chr1:region1", "chr1:region2", "chr2:region3"] observed = [] for chrom, regions in sorted(reg_by_chr.items()): named_regions = [] for curr_region in regions: named_regions.append("{}:{}".format(chrom, curr_region.name)) observed.extend(named_regions) self.assertEqual(expected, observed)
) group_output = parser.add_argument_group('Outputs') # Outputs group_output.add_argument( '-o', '--output-regions', default="renamed.bed", help= 'Path to the file containing the renamed regions (format: BED). [Default: %(default)s]' ) args = parser.parse_args() # Get transcripts gene_by_tr = getGeneByRefTr(args.input_reference_tr) selected_transcripts = getTranscriptAnnot(args.input_annotation, gene_by_tr) tr_by_chr = splittedByRef(selected_transcripts) # Write renamed regions out_nb_col = BEDIO.getMaxNbCol(args.input_regions) if out_nb_col == 3: out_nb_col = 4 with BEDIO(args.input_regions) as FH_regions: with BEDIO(args.output_regions, "w", out_nb_col) as FH_out: for record_idx, record in enumerate(FH_regions): target = Region(record.start, record.end, record.strand, record.chrom) if args.is_thick_based and record.thickStart is not None and record.thickEnd is not None: target.start = record.thickStart target.end = record.thickEnd overlapped_tr = list() if record.chrom in tr_by_chr: overlapped_tr = tr_by_chr[record.chrom].getOverlapped(
help='Path to the annotated file. (format: VCF).') args = parser.parse_args() # Logger logging.basicConfig( format= '%(asctime)s -- [%(filename)s][pid:%(process)d][%(levelname)s] -- %(message)s' ) log = logging.getLogger(os.path.basename(__file__)) log.setLevel(logging.INFO) log.info("Command: " + " ".join(sys.argv)) # Load annotations log.info("Load model from {}.".format(args.input_annotations)) genes = loadModel(args.input_annotations, "genes") genes_by_chr = splittedByRef(genes) # Annot variants log.info("Annot variants in {}.".format(args.input_variants)) with BreakendVCFIO(args.output_variants, "w", args.annotation_field) as writer: with BreakendVCFIO(args.input_variants) as reader: # Header writer.copyHeader(reader) writer.ANN_titles = [ "SYMBOL", "Gene", "Feature", "Feature_type", "Protein", "STRAND", "RNA_ELT_TYPE", "RNA_ELT_POS", "CDS_position", "Protein_position", "GENE_SHARD", "IN_FRAME" ] writer.info[args.annotation_field] = HeaderInfoAttr( id=args.annotation_field,