def convert(self): # set flags if self.inputFileFormat == AlignmentFileConstants.SAM: inputFlags = "r" elif self.inputFileFormat == AlignmentFileConstants.BAM: inputFlags = "rb" if self.outputFileFormat == AlignmentFileConstants.SAM: outputFlags = "wh" elif self.outputFileFormat == AlignmentFileConstants.BAM: outputFlags = "wb" # open files inputFile = pysam.AlignmentFile(self.args.inputFile, inputFlags) outputFile = pysam.AlignmentFile(self.args.outputFile, outputFlags, header=inputFile.header) outputFilePath = outputFile.filename utils.log("Creating alignment file '{}'".format(outputFilePath)) # write new file for _ in xrange(self.args.numLines): alignedSegment = inputFile.next() outputFile.write(alignedSegment) # clean up inputFile.close() outputFile.close() # create index file if (not self.args.skipIndexing and self.outputFileFormat == AlignmentFileConstants.BAM): indexFilePath = "{}.{}".format(outputFilePath, AlignmentFileConstants.BAI.lower()) utils.log("Creating index file '{}'".format(indexFilePath)) pysam.index(outputFilePath)
def convert(self): # set flags if self.inputFileFormat == AlignmentFileConstants.SAM: inputFlags = "r" elif self.inputFileFormat == AlignmentFileConstants.BAM: inputFlags = "rb" if self.outputFileFormat == AlignmentFileConstants.SAM: outputFlags = "wh" elif self.outputFileFormat == AlignmentFileConstants.BAM: outputFlags = "wb" # open files inputFile = pysam.AlignmentFile( self.args.inputFile, inputFlags) outputFile = pysam.AlignmentFile( self.args.outputFile, outputFlags, header=inputFile.header) outputFilePath = outputFile.filename utils.log("Creating alignment file '{}'".format(outputFilePath)) # write new file for _ in xrange(self.args.numLines): alignedSegment = inputFile.next() outputFile.write(alignedSegment) # clean up inputFile.close() outputFile.close() # create index file if (not self.args.skipIndexing and self.outputFileFormat == AlignmentFileConstants.BAM): indexFilePath = "{}.{}".format( outputFilePath, AlignmentFileConstants.BAI.lower()) utils.log("Creating index file '{}'".format(indexFilePath)) pysam.index(outputFilePath)
def testLog(self): utils.log("message") self.assertEquals(self.printMock.call_count, 1)
def __init__(self, inputDirectory, outputDirectory, force): """ Converts human readable dataset from compliance repository, and translates it into a reference-server readable filesystem with binary files. :param inputDirectory: location of the human readable compliance dataset :param outputDirectory: location of the file hierarchy suitable for deploying on the reference server """ self.inputDirectory = inputDirectory self.outputDirectory = outputDirectory self.repoPath = os.path.abspath( os.path.join(outputDirectory, "registry.db")) self.tempdir = None if os.path.exists(self.outputDirectory): if force: utils.log("Removing existing output directory at '{}'".format( self.outputDirectory)) shutil.rmtree(self.outputDirectory) else: utils.log("Output directory '{}' already exists".format( self.outputDirectory)) utils.log("Please specify an output path that does not exist") utils.log("Exiting...") exit(1) # If no input directory is specified download from GitHub if inputDirectory is None: utils.log("Downloading test data...") self.tempdir = tempfile.mkdtemp() assert (os.path.exists(self.tempdir)) url = "https://github.com/ga4gh/compliance/archive/master.zip" filePath = os.path.join(self.tempdir, 'compliance-master.zip') downloader = file_downloader.HttpFileDownloader(url, filePath) downloader.download() utils.log("Extracting test data...") with zipfile.ZipFile(filePath, "r") as z: z.extractall(self.tempdir) self.inputDirectory = os.path.join(self.tempdir, 'compliance-master', 'test-data') repo = datarepo.SqlDataRepository(self.repoPath) self.repo = repo
def cleanup(self): if self.tempdir is not None: shutil.rmtree(self.tempdir) utils.log("Done converting compliance data.") utils.log("Result in '{}'".format(self.outputDirectory))
def runCommandCheckWarnings(self, cmd): utils.log("Running '{}'".format(cmd)) splits = shlex.split(cmd) output = subprocess.check_output(splits).split('\n') self.ensureNoWarnings(output, cmd)
def __init__(self, inputDirectory, outputDirectory, force): """ Converts human readable dataset from compliance repository, and translates it into a reference-server readable filesystem with binary files. :param inputDirectory: location of the human readable compliance dataset :param outputDirectory: location of the file hierarchy suitable for deploying on the reference server """ self.inputDirectory = inputDirectory self.outputDirectory = outputDirectory self.repoPath = os.path.abspath( os.path.join(outputDirectory, "registry.db")) self.tempdir = None if os.path.exists(self.outputDirectory): if force: utils.log( "Removing existing output directory at '{}'".format( self.outputDirectory)) shutil.rmtree(self.outputDirectory) else: utils.log( "Output directory '{}' already exists".format( self.outputDirectory)) utils.log( "Please specify an output path that does not exist") utils.log("Exiting...") exit(1) # If no input directory is specified download from GitHub if inputDirectory is None: utils.log("Downloading test data...") self.tempdir = tempfile.mkdtemp() assert(os.path.exists(self.tempdir)) url = "https://github.com/ga4gh/compliance/archive/master.zip" filePath = os.path.join(self.tempdir, 'compliance-master.zip') downloader = file_downloader.HttpFileDownloader(url, filePath) downloader.download() utils.log("Extracting test data...") with zipfile.ZipFile(filePath, "r") as z: z.extractall(self.tempdir) self.inputDirectory = os.path.join( self.tempdir, 'compliance-master', 'test-data') repo = datarepo.SqlDataRepository(self.repoPath) self.repo = repo
def log(self, logStr): utils.log("{0} {1}".format(self.logStrPrefix, logStr))