Esempio n. 1
0
    def impInitIO(self, ):
        """
        This function is to initialize 
        all of the input and output files from the io parameters set in __init__() 
        """
        # obtain all input and output parameters
        fastqInput1 = self.getParamIO('fastqInput1')
        fastqInput2 = self.getParamIO('fastqInput2')
        fastqOutputDir1 = self.getParamIO('fastqOutputDir1')
        fastqOutputDir2 = self.getParamIO('fastqOutputDir2')

        #set all input files
        self.setInputDirOrFile('fastqInput1', fastqInput1)
        self.setInputDirOrFile('fastqInput2', fastqInput2)

        # create output file paths and set

        if fastqOutputDir1 is None:
            self.setParamIO('fastqOutputDir1', Configure.getTmpDir())
        if fastqOutputDir2 is None:
            self.setParamIO('fastqOutputDir2', Configure.getTmpDir())
        self.setOutputDir1To1('fastqOutput1', fastqOutputDir1, None, 'fastq',
                              'fastqInput1')
        self.setOutputDir1To1('fastqOutput2', fastqOutputDir2, None, 'fastq',
                              'fastqInput2')
        self.setOutputDir1To1('adapterOutput', None, None, 'adapter.txt',
                              'fastqInput1')
        self.setOutputDir1To1('settingsOutput', None, None, 'settings',
                              'fastqInput1')

        # set how many sample are there
        if fastqInput1 is not None:
            self._setInputSize(len(self.getInputList('fastqInput1')))
Esempio n. 2
0
    def impInitIO(self, ):

        # obtain all input and output parameters
        #fastqInput1 = self.getParamIO('fastqInput1')
        fastqInput1 = self.getParamIO('fastqInput1')
        fastqInput2 = self.getParamIO('fastqInput2')
        samOutputDir = self.getParamIO('samOutputDir')
        ht2Idx = self.getParamIO('ht2Idx')
        if samOutputDir is None:
            self.setParamIO('samOutputDir', Configure.getTmpDir())

        # print(ht2Idx)
        #set all input files
        #self.setInputDirOrFile('fastqInput1',fastqInput1)
        self.setInputDirOrFile('fastqInput1', fastqInput1)
        self.setInputDirOrFile('fastqInput2', fastqInput2)

        if ht2Idx is None:
            self.setInput('ht2IdxFile', Configure.getConfig('ht2IdxFile'))
            self.setParamIO('ht2Idx', Configure.getConfig('ht2Indx'))
        else:
            suffix = [
                '.1.ht2', '.2.ht2', '.3.ht2', '.4.ht2', '.5.ht2', '.6.ht2',
                '.7.ht2', '.8.ht2'
            ]
            ht2IdxFiles = [ht2Idx + s for s in suffix]
            self.setInput('ht2IdxFiles', ht2IdxFiles)

        # create output file paths and set
        self.setOutputDir1To1('samOutput', samOutputDir, 'hisat', 'sam',
                              'fastqInput1')

        # set how many sample are there
        if fastqInput1 is not None:
            self._setInputSize(len(self.getInputList('fastqInput1')))
Esempio n. 3
0
    def impInitIO(self, ):
        sraInput1 = self.getParamIO('sraInput1')
        fastqOutputDir = self.getParamIO('fastqOutputDir')
        if fastqOutputDir is None:
            self.setParamIO('fastqOutputDir', Configure.getTmpDir())

        #set all input files
        self.setInputDirOrFile('sraInput1', sraInput1)

        # self.setOutputDir1To1('fastqOutputDir', fastqOutputDir,'fastqDump','fastq','sraInput1',sep='_')
        self.setOutputDir1To1('fastqOutput1',
                              fastqOutputDir,
                              None,
                              '1.fastq',
                              'sraInput1',
                              sep='_')
        self.setOutputDir1To1('fastqOutput2',
                              fastqOutputDir,
                              None,
                              '2.fastq',
                              'sraInput1',
                              sep='_')

        if sraInput1 is not None:
            self._setInputSize(len(self.getInputList('sraInput1')))
Esempio n. 4
0
    def __init__(self,
                 fastqInput=None,
                 fileFormat=None,
                 fastqcOutputDir=None,
                 threads=None,
                 cmdParam=None,
                 **kwargs):
        super(Step, self).__init__(cmdParam, **kwargs)

        # set all input and output parameters
        self.setParamIO('fastqInput', fastqInput)
        if fastqcOutputDir == None:
            self.setParamIO('fastqcOutputDir', Configure.getTmpDir())
        else:
            self.setParamIO('fastqcOutputDir', fastqcOutputDir)

        # call self.initIO()
        self.initIO()

        #set other parameters
        #self.setParam('isNoDiscordant', isNoDiscordant)
        self.setParam('fileFormat', fileFormat)
        if threads is None:
            threads = Configure.getThreads()
        self.setParam('threads', threads)

        print(self.params)
Esempio n. 5
0
    def impInitIO(self, ):
        faInput1 = self.getParamIO('faInput1')
        gtfInput1 = self.getParamIO('gtfInput1')
        assembliesInput = self.getParamIO('assembliesInput')
        gtfOutputDir = self.getParamIO('gtfOutputDir')
        if gtfOutputDir is None:
            self.setParamIO('gtfOutputDir', Configure.getTmpDir())

        #set all input files
        self.setInputDirOrFile('assembliesInput', assembliesInput)

        if faInput1 is None:
            faInput1 = Configure.getConfig('')
            self.setIput('faInput1', faInput1)
            self.setParamIO('faInput1', faInput1)
        else:
            self.setInput('faInput1', faInput1)

        if gtfInput1 is None:
            gtfInput1 = Configure.getConfig('')
            self.setIput('gtfInput1', gtfInput1)
            self.setParamIO('gtfInput1', gtfInput1)
        else:
            self.setInput('gtfInput1', gtfInput1)

        if assembliesInput is not None:
            self._setInputSize(len(self.getInputList('assembliesInput')))
            merged_gtf = list()
            for i in range(len(self.getInputList('assembliesInput'))):
                merged_gtf.append(
                    os.path.join(gtfOutputDir, 'cuffmerge_' + str(i),
                                 'merged.gtf'))
            self.setOutput('merged_gtf', merged_gtf)
        else:
            self.setOutput('merged_gtf', None)
Esempio n. 6
0
    def impInitIO(self, ):
        """
        This function is to initialize 
        all of the input and output files from the io parameters set in __init__() 
        """

        # obtain all input and output parameters
        fastqInput1 = self.getParamIO('fastqInput1')
        fastqInput2 = self.getParamIO('fastqInput2')
        bt2Idx = self.getParamIO('bt2Idx')
        samOutputDir = self.getParamIO('samOutputDir')
        mapRsOutputDir = self.getParamIO('mapRsOutputDir')

        #set all input files
        self.setInputDirOrFile('fastqInput1', fastqInput1)
        self.setInputDirOrFile('fastqInput2', fastqInput2)

        #some special input from __init__ or configure
        if bt2Idx is None:
            self.setInput('bt2IdxFiles', Configure.getConfig('bt2IdxFiles'))
            self.setParamIO('bt2Idx', Configure.getConfig('bt2Idx'))
        else:
            suffix = [
                '.1.bt2', '.2.bt2', '.3.bt2', '.4.bt2', '.rev.1.bt2',
                '.rev.2.bt2'
            ]
            bt2IdxFiles = [bt2Idx + s for s in suffix]
            self.setInput('bt2IdxFiles', bt2IdxFiles)

        # create output file paths and set
        if samOutputDir is None:
            self.setParamIO('samOutputDir', Configure.getTmpDir())
        if mapRsOutputDir is None:
            self.setParamIO('mapRsOutputDir', Configure.getTmpDir())
        self.setOutputDir1To1('samOutput', samOutputDir, None, 'sam',
                              'fastqInput1')
        self.setOutputDir1To1('mapRsOutput', mapRsOutputDir, None,
                              'result.txt', 'fastqInput1')

        # set how many sample are there
        if fastqInput1 is not None:
            self._setInputSize(len(self.getInputList('fastqInput1')))
Esempio n. 7
0
	def impInitIO(self,):
		bamInput = self.getParamIO('bamInput')
		gtfInput = self.getParamIO('gtfInput')
		outputDir = self.getParamIO('outputDir')
		#fragBiasCorrectInput = self.getParamIO('fragBiasCorrectInput')
		if outputDir is None:
			self.setParamIO('outputDir',Configure.getTmpDir())


		self.setInputDirOrFile('bamInput',bamInput)

		if gtfInput is None:
			gtfInput=Configure.getConfig('')
			self.setIput('gtfInput',gtfInput)
			self.setParamIO('gtfInput',gtfInput)
		else:
			self.setInput('gtfInput',gtfInput)

		self.setOutput('assembliesOutput',os.path.join(Configure.getTmpDir(), 'assemblies.txt'))


		if bamInput is not None:
			self._setInputSize(len(self.getInputList('bamInput')))
			genes_fpkm_tracking=list()
			isoforms_fpkm_tracking=list()
			skipped_gtf=list()
			transcripts_gtf=list()
			for i in range(len(self.getInputList('bamInput'))):
				genes_fpkm_tracking.append(os.path.join(outputDir, 'cufflinks_'+str(i),'genes.fpkm_tracking'))
				isoforms_fpkm_tracking.append(os.path.join(outputDir, 'cufflinks_'+str(i),'isoforms.fpkm_tracking'))
				skipped_gtf.append(os.path.join(outputDir, 'cufflinks_'+str(i),'skipped.gtf'))
				transcripts_gtf.append(os.path.join(outputDir, 'cufflinks_'+str(i),'transcripts.gtf'))
			self.setOutput('genes_fpkm_tracking',genes_fpkm_tracking)
			self.setOutput('isoforms_fpkm_tracking',isoforms_fpkm_tracking)
			self.setOutput('skipped_gtf',skipped_gtf)
			self.setOutput('transcripts_gtf',transcripts_gtf)
		else:
			self.setOutput('genes_fpkm_tracking',None)
			self.setOutput('isoforms_fpkm_tracking',None)
			self.setOutput('skipped_gtf',None)
			self.setOutput('transcripts_gtf',None)
Esempio n. 8
0
    def impInitIO(self, ):
        fastqInput = self.getParamIO('fastqInput')
        refile = self.getParamIO('refile')
        outputdir = self.getParamIO('outputdir')

        self.setInputDirOrFile('fastqInput', fastqInput)
        self.setInputDirOrFile('version', os.path.join(refile, 'version'))
        self.setInputDirOrFile('Reference',
                               os.path.join(refile, 'reference.json'))
        self.setInputDirOrFile('README',
                               os.path.join(refile, 'README.BEFORE.MODIFYING'))
        for i in [
                'chrLength.txt', 'chrName.txt', 'exonGeTrInfo.tab',
                'geneInfo.tab', 'genomeParameters.txt', 'SAindex',
                'sjdbList.fromGTF.out.tab', 'transcriptInfo.tab',
                'chrNameLength.txt', 'chrStart.txt', 'exonInfo.tab', 'Genome',
                'SA', 'sjdbInfo.txt', 'sjdbList.out.tab'
        ]:
            self.setInputDirOrFile(i, os.path.join(refile, 'star', i))
        self.setInputDirOrFile('genes.pickle',
                               os.path.join(refile, 'pickle', 'genes.pickle'))
        self.setInputDirOrFile('genes.gtf',
                               os.path.join(refile, 'genes', 'genes.gtf'))
        self.setInputDirOrFile('genome.fa',
                               os.path.join(refile, 'fasta', 'genome.fa'))

        if outputdir is None:
            self.setParamIO('outputdir', Configure.getTmpDir())
            outputdir = self.getParamIO('outputdir')
            self.resultdir = 'Cellranger'
        else:
            self.resultdir = ''

        self.setParamIO(
            'finaldir',
            os.path.join(outputdir, self.resultdir, 'outs',
                         'filtered_gene_bc_matrices', 'hg19'))
        self.setOutputDirNTo1(
            'genes',
            os.path.join(outputdir, self.resultdir, 'outs',
                         'filtered_gene_bc_matrices', 'hg19', 'genes.tsv'), '',
            'fastqInput')
        self.setOutputDirNTo1(
            'matrix',
            os.path.join(outputdir, self.resultdir, 'outs',
                         'filtered_gene_bc_matrices', 'hg19', 'matrix.mtx'),
            '', 'fastqInput')
        self.setOutputDirNTo1(
            'barcodes',
            os.path.join(outputdir, self.resultdir, 'outs',
                         'filtered_gene_bc_matrices', 'hg19', 'barcodes.tsv'),
            '', 'fastqInput')
Esempio n. 9
0
    def impInitIO(self):
        samInput = self.getParamIO('samInput')
        bamOutputDir = self.getParamIO('bamOutputDir')
        if bamOutputDir is None:
            self.setParamIO('bamOutputDir',Configure.getTmpDir())
            

        # set all input files
        self.setInputDirOrFile('samInput', samInput)
        # set all output files
        self.setOutputDir1To1('bamOutput', bamOutputDir, None, 'bam', 'samInput')

        if samInput is not None:
            self._setInputSize(len(self.getInputList('samInput')))
Esempio n. 10
0
    def impInitIO(self):
        sraInput = self.getParamIO('sraInput')
        fastqOutputDir = self.getParamIO('fastqOutputDir')

        # set all input files
        self.setInputDirOrFile('sraInput', sraInput)
        # set all output files
        self.setOutputDir1To1('fastqOutput1', fastqOutputDir, None, '_1.fastq', 'sraInput', '')
        self.setOutputDir1To1('fastqOutput2', fastqOutputDir, None, '_2.fastq', 'sraInput', '')

        if fastqOutputDir is None:
            self.setParamIO('fastqOutputDir', Configure.getTmpDir())

        if sraInput is not None:
            self._setInputSize(len(self.getInputList('sraInput')))
Esempio n. 11
0
    def impInitIO(self, ):
        samInput1 = self.getParamIO('samInput1')
        gtfInput1 = self.getParamIO('gtfInput1')
        countOutputDir = self.getParamIO('countOutputDir')
        if countOutputDir is None:
            self.setParamIO('countOutputDir', Configure.getTmpDir())

        #set all input files
        self.setInputDirOrFile('samInput1', samInput1)
        self.setInputDirOrFile('gtfInput1', gtfInput1)

        self.setOutputDir1To1('countOutput', countOutputDir, None, 'count',
                              'samInput1')

        if samInput1 is not None:
            self._setInputSize(len(self.getInputList('samInput1')))
Esempio n. 12
0
    def impInitIO(self, ):
        # matrix = self.getParamIO('matrix')
        # barcodes = self.getParamIO('barcodes')
        # genes = self.getParamIO('genes')
        outputdir = self.getParamIO('outputdir')
        inputdir = self.getParamIO('inputdir')
        rscript = self.getParamIO('rscript')

        self.setInputDirOrFile('rscript', rscript)
        # if outputdir is None, os will error
        if outputdir is None:
            self.setParamIO('outputdir', Configure.getTmpDir())
            outputdir = self.getParamIO('outputdir')

        # set output/input paths
        # if inputdir is None, os will error
        if inputdir is not None:
            self.setInputDirOrFile('barcodes',
                                   os.path.join(inputdir, 'barcodes.tsv'))
            self.setInputDirOrFile('genes',
                                   os.path.join(inputdir, 'genes.tsv'))
            self.setInputDirOrFile('matrix',
                                   os.path.join(inputdir, 'matrix.mtx'))
            self.setOutputDirNTo1('violinplot',
                                  os.path.join(outputdir, 'violinplot.jpeg'),
                                  '', 'barcodes')
            self.setOutputDirNTo1('geneplot',
                                  os.path.join(outputdir, 'geneplot.jpeg'), '',
                                  'barcodes')
            self.setOutputDirNTo1(
                'variableGenes', os.path.join(outputdir, 'variableGenes.jpeg'),
                '', 'barcodes')
            self.setOutputDirNTo1('Elbowplot',
                                  os.path.join(outputdir, 'Elbowplot.jpeg'),
                                  '', 'barcodes')
            self.setOutputDirNTo1('TSNEplot',
                                  os.path.join(outputdir, 'TSNEplot.jpeg'), '',
                                  'barcodes')
Esempio n. 13
0
	def _singleRun(self,i):
		bamInput = self.getInputList('bamInput')
		gtfInput = self.getParamIO('gtfInput')
		#fragBiasCorrectInput = self.getInputList('fragBiasCorrectInput')
		outputDir = self.getParamIO('outputDir')
		print(os.path.join(Configure.getTmpDir(), 'assemblies.txt'))

		cmdline = [
				'cufflinks',
				'-p',str(self.getParam('threads')),
				self.getBoolParamCmd('-u','ismultiReadCorrect'),
				self.getBoolParamCmd('-N','isupperQuartileForm'),
				self.getBoolParamCmd('--total-hits-norm','istotalHitsNorm'),
				'-m',str(self.getParam('fragLenMean')),
				'-s',str(self.getParam('fragLenStdDev')),
				'-G',gtfInput,
				'-o',os.path.join(outputDir,'cufflinks_'+str(i)),
				bamInput[i],
				';',
				# 'echo', '"'+self.convertToRealPath(os.path.join(outputDir,'cufflinks_'+str(i),'transcripts.gtf')).split('.tmp')[1]+'" >>',
				'echo', '"'+os.path.join(outputDir,'cufflinks_'+str(i),'transcripts.gtf')+'" >>',
				self.getOutput("assembliesOutput")
				]
		self.callCmdline('V1', cmdline)