Beispiel #1
0
    def setArgs(self, args):
        self.args = args

        try:
            self.genome = genomesource.FastaGenomeSource(args.ref)

            for bamPath in self.args.bam:
                name = nameFromBamPath(bamPath)

                i = 0
                while name in self.samples:
                    i += 1
                    curname = "{}_{}".format(name, i)
                    if curname not in self.samples:
                        name = curname
                        break

                sample = Sample(name, bamPath)
                self.samples[name] = sample

            if self.args.annotations:
                for annoPath in self.args.annotations:
                    name = nameFromBedPath(annoPath)
                    if annoPath.endswith(".bed") or annoPath.endswith(
                            ".bed.gz"):
                        self.annotationSets[name] = annotations.AnnotationSet(
                            annoPath)
                    else:
                        if not (annoPath.endswith(".gff") or annoPath.endswith(".gff.gz") \
                            or annoPath.endswith(".gtf") or annoPath.endswith(".gtf.gz")):
                            logging.warn(
                                "Unknown annotation file extension; trying to parse as if GTF/GFF format: '{}'"
                                .format(annoPath))
                        self.annotationSets[name] = gff.GeneAnnotationSet(
                            annoPath)

        except:
            self.args._parser.print_help()
            print("")
            raise

        for bamPath in self.args.bam:
            try:
                bam = pysam.AlignmentFile(bamPath)
                bam.fetch()
            except ValueError:
                logging.error(
                    "\nERROR: Need to create index for input bam file: {}".
                    format(bamPath))
                sys.exit(0)
Beispiel #2
0
def getVariant(dataHub):
    if dataHub.args.type.lower().startswith("del"):
        assert len(dataHub.args.breakpoints) == 3, getBreakpointFormatsStr("del")
        chrom = dataHub.args.breakpoints[0]
        start = int(dataHub.args.breakpoints[1])
        end = int(dataHub.args.breakpoints[2])
        assert start < end
        variant = Deletion.from_breakpoints(chrom, start-1, end-1, dataHub.alignDistance, dataHub.genome)

    elif dataHub.args.type.lower() in ["ldel", "largedeletion"]:
        assert len(dataHub.args.breakpoints) == 3, getBreakpointFormatsStr("ldel")
        chrom = dataHub.args.breakpoints[0]
        start = int(dataHub.args.breakpoints[1])
        end = int(dataHub.args.breakpoints[2])
        assert start < end
        variant = LargeDeletion.from_breakpoints(chrom, start-1, end-1, dataHub.alignDistance, dataHub.genome)

    elif dataHub.args.type.lower().startswith("ins"):
        assert len(dataHub.args.breakpoints) in [3,4], getBreakpointFormatsStr("ins")
        chrom = dataHub.args.breakpoints[0]
        pos = int(dataHub.args.breakpoints[1])
        if len(dataHub.args.breakpoints) == 3:
            seq = dataHub.args.breakpoints[2]
            end = pos
        else:
            end = int(dataHub.args.breakpoints[2])
            seq = dataHub.args.breakpoints[3]
        variant = Insertion(Locus(chrom, pos, end, "+"), seq, dataHub.alignDistance, dataHub.genome)

    elif dataHub.args.type.lower().startswith("inv"):
        assert len(dataHub.args.breakpoints) == 3, getBreakpointFormatsStr("inv")
        chrom = dataHub.args.breakpoints[0]
        start = int(dataHub.args.breakpoints[1])
        end = int(dataHub.args.breakpoints[2])
        if dataHub.args.min_mapq is None:
            dataHub.args.min_mapq = -1
        variant = Inversion(Locus(chrom, start, end, "+"), dataHub.alignDistance, dataHub.genome)

    elif dataHub.args.type.lower().startswith("mei"):
        assert len(dataHub.args.breakpoints) >= 4, getBreakpointFormatsStr("mei")

        insertionBreakpoint = Locus(dataHub.args.breakpoints[1], dataHub.args.breakpoints[2], dataHub.args.breakpoints[2], "+")

        meName = dataHub.args.breakpoints[3]
        meStrand = getListDefault(dataHub.args.breakpoints, 4, "+")
        meStart = getListDefault(dataHub.args.breakpoints, 5, 0)
        meEnd = getListDefault(dataHub.args.breakpoints, 6, 1e100)

        meCoords = Locus(meName, meStart, meEnd, meStrand)
        meFasta = genomesource.FastaGenomeSource(dataHub.args.breakpoints[0])

        variant = MobileElementInsertion(insertionBreakpoint, meCoords, meFasta, dataHub.alignDistance, dataHub.genome)

    elif dataHub.args.type.lower().startswith("tra"):
        assert len(dataHub.args.breakpoints) == 5, getBreakpointFormatsStr("tra")
        chrom1 = dataHub.args.breakpoints[0]
        start1 = int(dataHub.args.breakpoints[1])

        chrom2 = dataHub.args.breakpoints[2]
        start2 = int(dataHub.args.breakpoints[3])

        orientation = dataHub.args.breakpoints[4]

        if dataHub.args.min_mapq is None:
            dataHub.args.min_mapq = -1

        variant = Translocation(Locus(chrom1, start1, start1, "+"), 
                                Locus(chrom2, start2, start2, orientation), 
                                dataHub.alignDistance, dataHub.genome)

    elif dataHub.args.type.lower() in ["bkend", "breakend"]:
        assert len(dataHub.args.breakpoints) == 6, getBreakpointFormatsStr("bkend")
        chrom1 = dataHub.args.breakpoints[0]
        start1 = int(dataHub.args.breakpoints[1])
        strand1 = dataHub.args.breakpoints[2]

        chrom2 = dataHub.args.breakpoints[3]
        start2 = int(dataHub.args.breakpoints[4])
        strand2 = dataHub.args.breakpoints[5]

        if dataHub.args.min_mapq is None:
            dataHub.args.min_mapq = -1

        variant = Breakend(Locus(chrom1, start1, start1, strand1), 
                           Locus(chrom2, start2, start2, strand2), 
                           dataHub.alignDistance, dataHub.genome)
    else:
        raise Exception("only accept event types of deletion, insertion, mei, translocation or breakend")
    logging.info(" Variant: {}".format(variant))

    return variant