Beispiel #1
0
 def addFeatureSet(self):
     """
     Adds a new feature set into this repo
     """
     self._openRepo()
     dataset = self._repo.getDatasetByName(self._args.datasetName)
     filePath = self._getFilePath(self._args.filePath,
                                  self._args.relativePath)
     name = getNameFromPath(self._args.filePath)
     featureSet = sequence_annotations.Gff3DbFeatureSet(
         dataset, name)
     referenceSetName = self._args.referenceSetName
     if referenceSetName is None:
         raise exceptions.RepoManagerException(
             "A reference set name must be provided")
     referenceSet = self._repo.getReferenceSetByName(referenceSetName)
     featureSet.setReferenceSet(referenceSet)
     ontologyName = self._args.ontologyName
     if ontologyName is None:
         raise exceptions.RepoManagerException(
             "A sequence ontology name must be provided")
     ontology = self._repo.getOntologyByName(ontologyName)
     self._checkSequenceOntology(ontology)
     featureSet.setOntology(ontology)
     featureSet.populateFromFile(filePath)
     self._updateRepo(self._repo.insertFeatureSet, featureSet)
Beispiel #2
0
 def _checkFolder(self, folderPath, fileExt):
     self._assertFileExists(folderPath, 'Directory')
     self._assertDirectory(folderPath)
     folderFiles = os.listdir(folderPath)
     if len(folderFiles) == 0:
         raise exceptions.RepoManagerException(
             "Directory '{}' has no contents; "
             "needs files with extension '{}'".format(folderPath, fileExt))
     vcfPresent = any(
         [folderFile.endswith(fileExt) for folderFile in folderFiles])
     if not vcfPresent:
         raise exceptions.RepoManagerException(
             "Directory '{}' does not contain a file "
             "with a '{}' extension".format(folderPath, fileExt))
Beispiel #3
0
 def _assertPathEmpty(self, path, text='Path', inRepo=False, emitName=None):
     if os.path.exists(path):
         if emitName is None:
             emitName = path
         message = "{} '{}' already exists".format(text, emitName)
         if inRepo:
             self._raiseException(message)
         else:
             raise exceptions.RepoManagerException(message)
Beispiel #4
0
 def init(self):
     forceMessage = (
         "Respository '{}' already exists. Use --force to overwrite")
     if self._repo.exists():
         if self._args.force:
             self._repo.delete()
         else:
             raise exceptions.RepoManagerException(
                 forceMessage.format(self._registryPath))
     self._updateRepo(self._repo.initialise)
Beispiel #5
0
 def _assertDirectory(self,
                      dirPath,
                      text='File',
                      inRepo=False,
                      emitName=None):
     if not os.path.isdir(dirPath):
         if emitName is None:
             emitName = dirPath
         message = "{} '{}' is not a directory".format(text, emitName)
         if inRepo:
             self._raiseException(message)
         else:
             raise exceptions.RepoManagerException(message)
Beispiel #6
0
 def _assertFileExists(self,
                       filePath,
                       text='File',
                       inRepo=False,
                       emitName=None):
     if not os.path.exists(filePath):
         if emitName is None:
             emitName = filePath
         message = "{} '{}' does not exist".format(text, emitName)
         if inRepo:
             self._raiseException(message)
         else:
             raise exceptions.RepoManagerException(message)
Beispiel #7
0
 def init(self):
     """
     Initialize a repository
     """
     if os.path.exists(self._repoPath):
         self._assertDirectory(self._repoPath)
         raise exceptions.RepoManagerException(
             "Directory already exists at '{}'".format(self._repoPath))
     else:
         os.mkdir(self._repoPath)
         for directory in self._topStructure:
             newDir = os.path.join(self._repoPath, directory)
             os.mkdir(newDir)
         self._repoEmit("Created")
Beispiel #8
0
 def addRnaQuantificationSet(self):
     """
     Adds an rnaQuantificationSet into this repo
     """
     self._openRepo()
     dataset = self._repo.getDatasetByName(self._args.datasetName)
     if self._args.name is None:
         name = getNameFromPath(self._args.filePath)
     else:
         name = self._args.name
     rnaQuantificationSet = rna_quantification.SqliteRnaQuantificationSet(
         dataset, name)
     referenceSetName = self._args.referenceSetName
     if referenceSetName is None:
         raise exceptions.RepoManagerException(
             "A reference set name must be provided")
     referenceSet = self._repo.getReferenceSetByName(referenceSetName)
     rnaQuantificationSet.setReferenceSet(referenceSet)
     rnaQuantificationSet.populateFromFile(self._args.filePath)
     self._updateRepo(
         self._repo.insertRnaQuantificationSet, rnaQuantificationSet)
Beispiel #9
0
 def _checkFile(self, filePath, fileExt):
     self._assertFileExists(filePath)
     if not filePath.endswith(fileExt):
         raise exceptions.RepoManagerException(
             "File '{}' does not have a '{}' extension".format(
                 filePath, fileExt))
Beispiel #10
0
 def _raiseException(self, message):
     exceptionMessage = "Exception for repo at '{}'\n{}".format(
         self._repoPath, message)
     raise exceptions.RepoManagerException(exceptionMessage)
Beispiel #11
0
 def _checkSequenceOntology(self, ontology):
     so = ontologies.SEQUENCE_ONTOLOGY_PREFIX
     if ontology.getOntologyPrefix() != so:
         raise exceptions.RepoManagerException(
             "Ontology '{}' does not have ontology prefix '{}'".format(
                 ontology.getName(), so))
Beispiel #12
0
 def _openRepo(self):
     if not self._repo.exists():
         raise exceptions.RepoManagerException(
             "Repo '{}' does not exist. Please create a new repo "
             "using the 'init' command.".format(self._registryPath))
     self._repo.open(datarepo.MODE_READ)
Beispiel #13
0
    def addVariantSet(self):
        """
        Adds a new VariantSet into this repo.
        """
        self._openRepo()
        dataset = self._repo.getDatasetByName(self._args.datasetName)
        dataUrls = self._args.dataFiles
        name = self._args.name
        if len(dataUrls) == 1:
            if self._args.name is None:
                name = getNameFromPath(dataUrls[0])
            if os.path.isdir(dataUrls[0]):
                # Read in the VCF files from the directory.
                # TODO support uncompressed VCF and BCF files
                vcfDir = dataUrls[0]
                pattern = os.path.join(vcfDir, "*.vcf.gz")
                dataUrls = glob.glob(pattern)
                if len(dataUrls) == 0:
                    raise exceptions.RepoManagerException(
                        "Cannot find any VCF files in the directory "
                        "'{}'.".format(vcfDir))
                dataUrls[0] = self._getFilePath(dataUrls[0],
                                                self._args.relativePath)
        elif self._args.name is None:
            raise exceptions.RepoManagerException(
                "Cannot infer the intended name of the VariantSet when "
                "more than one VCF file is provided. Please provide a "
                "name argument using --name.")
        parsed = urlparse.urlparse(dataUrls[0])
        if parsed.scheme not in ['http', 'ftp']:
            dataUrls = map(lambda url: self._getFilePath(
                url, self._args.relativePath), dataUrls)
        # Now, get the index files for the data files that we've now obtained.
        indexFiles = self._args.indexFiles
        if indexFiles is None:
            # First check if all the paths exist locally, as they must
            # if we are making a default index path.
            for dataUrl in dataUrls:
                if not os.path.exists(dataUrl):
                    raise exceptions.MissingIndexException(
                        "Cannot find file '{}'. All variant files must be "
                        "stored locally if the default index location is "
                        "used. If you are trying to create a VariantSet "
                        "based on remote URLs, please download the index "
                        "files to the local file system and provide them "
                        "with the --indexFiles argument".format(dataUrl))
            # We assume that the indexes are made by adding .tbi
            indexSuffix = ".tbi"
            # TODO support BCF input properly here by adding .csi
            indexFiles = [filename + indexSuffix for filename in dataUrls]
        indexFiles = map(lambda url: self._getFilePath(
            url, self._args.relativePath), indexFiles)
        variantSet = variants.HtslibVariantSet(dataset, name)
        variantSet.populateFromFile(dataUrls, indexFiles)
        # Get the reference set that is associated with the variant set.
        referenceSetName = self._args.referenceSetName
        if referenceSetName is None:
            # Try to find a reference set name from the VCF header.
            referenceSetName = variantSet.getVcfHeaderReferenceSetName()
        if referenceSetName is None:
            raise exceptions.RepoManagerException(
                "Cannot infer the ReferenceSet from the VCF header. Please "
                "specify the ReferenceSet to associate with this "
                "VariantSet using the --referenceSetName option")
        referenceSet = self._repo.getReferenceSetByName(referenceSetName)
        variantSet.setReferenceSet(referenceSet)

        # Now check for annotations
        annotationSets = []
        if variantSet.isAnnotated() and self._args.addAnnotationSets:
            ontologyName = self._args.ontologyName
            if ontologyName is None:
                raise exceptions.RepoManagerException(
                    "A sequence ontology name must be provided")
            ontology = self._repo.getOntologyByName(ontologyName)
            self._checkSequenceOntology(ontology)
            for annotationSet in variantSet.getVariantAnnotationSets():
                annotationSet.setOntology(ontology)
                annotationSets.append(annotationSet)

        # Add the annotation sets and the variant set as an atomic update
        def updateRepo():
            self._repo.insertVariantSet(variantSet)
            for annotationSet in annotationSets:
                self._repo.insertVariantAnnotationSet(annotationSet)
        self._updateRepo(updateRepo)