Beispiel #1
0
def setTranscriptsAnnotByOverlap(queries, transcripts):
    """
    Annotate each query by the information coming from the transcripts overlapping them.

    :param region: Regions to annotate.
    :type region: anacore.region.Region
    :param transcripts: The list of transcripts where overlapped transcripts will be searched.
    :type transcripts: anacore.region.RegionList
    """
    transcripts_by_chr = splittedByRef(transcripts)
    queries_by_chr = splittedByRef(queries)
    for chrom, curr_query, overlapped_subjects in iterOverlappedByRegion(queries_by_chr, transcripts_by_chr):
        curr_query.annot["ANN"] = getTranscriptsAnnot(curr_query, overlapped_subjects)
Beispiel #2
0
def setVariantsByOverlap(queries, variants):
    """
    Annotate each query by the list of variants overlapping them.

    :param queries: Regions to annotate.
    :type queries: anacore.region.Region
    :param variants: The list of variants where overlapped variants will be searched.
    :type variants: anacore.region.RegionList
    """
    variants_by_chr = splittedByRef(variants)
    queries_by_chr = splittedByRef(queries)
    for chrom, curr_query, overlapped_subjects in iterOverlappedByRegion(queries_by_chr, variants_by_chr):
        curr_query.annot["VAR"] = []
        for sbjct in overlapped_subjects:
            curr_query.annot["VAR"].append(sbjct)
Beispiel #3
0
 def testSplittedByRef(self):
     reg_list = RegionList([
         Region(10, 30, "-", "chr1", "region1"),
         Region(40, 70, "-", "chr1", "region2"),
         Region(80, 100, "-", "chr2", "region3")
     ])
     reg_by_chr = splittedByRef(reg_list)
     expected = ["chr1:region1", "chr1:region2", "chr2:region3"]
     observed = []
     for chrom, regions in sorted(reg_by_chr.items()):
         named_regions = []
         for curr_region in regions:
             named_regions.append("{}:{}".format(chrom, curr_region.name))
         observed.extend(named_regions)
     self.assertEqual(expected, observed)
Beispiel #4
0
    )
    group_output = parser.add_argument_group('Outputs')  # Outputs
    group_output.add_argument(
        '-o',
        '--output-regions',
        default="renamed.bed",
        help=
        'Path to the file containing the renamed regions (format: BED). [Default: %(default)s]'
    )
    args = parser.parse_args()

    # Get transcripts
    gene_by_tr = getGeneByRefTr(args.input_reference_tr)
    selected_transcripts = getTranscriptAnnot(args.input_annotation,
                                              gene_by_tr)
    tr_by_chr = splittedByRef(selected_transcripts)
    # Write renamed regions
    out_nb_col = BEDIO.getMaxNbCol(args.input_regions)
    if out_nb_col == 3:
        out_nb_col = 4
    with BEDIO(args.input_regions) as FH_regions:
        with BEDIO(args.output_regions, "w", out_nb_col) as FH_out:
            for record_idx, record in enumerate(FH_regions):
                target = Region(record.start, record.end, record.strand,
                                record.chrom)
                if args.is_thick_based and record.thickStart is not None and record.thickEnd is not None:
                    target.start = record.thickStart
                    target.end = record.thickEnd
                overlapped_tr = list()
                if record.chrom in tr_by_chr:
                    overlapped_tr = tr_by_chr[record.chrom].getOverlapped(
Beispiel #5
0
        help='Path to the annotated file. (format: VCF).')
    args = parser.parse_args()

    # Logger
    logging.basicConfig(
        format=
        '%(asctime)s -- [%(filename)s][pid:%(process)d][%(levelname)s] -- %(message)s'
    )
    log = logging.getLogger(os.path.basename(__file__))
    log.setLevel(logging.INFO)
    log.info("Command: " + " ".join(sys.argv))

    # Load annotations
    log.info("Load model from {}.".format(args.input_annotations))
    genes = loadModel(args.input_annotations, "genes")
    genes_by_chr = splittedByRef(genes)

    # Annot variants
    log.info("Annot variants in {}.".format(args.input_variants))
    with BreakendVCFIO(args.output_variants, "w",
                       args.annotation_field) as writer:
        with BreakendVCFIO(args.input_variants) as reader:
            # Header
            writer.copyHeader(reader)
            writer.ANN_titles = [
                "SYMBOL", "Gene", "Feature", "Feature_type", "Protein",
                "STRAND", "RNA_ELT_TYPE", "RNA_ELT_POS", "CDS_position",
                "Protein_position", "GENE_SHARD", "IN_FRAME"
            ]
            writer.info[args.annotation_field] = HeaderInfoAttr(
                id=args.annotation_field,