def assignIODirectories(self):
     assignConfigurationValue('input_directory', self.inputDirectoryText.get())
     assignConfigurationValue('output_directory', self.outputDirectoryText.get())
     
     assignConfigurationValue('log_file_location', join(self.outputDirectoryText.get(), 'nanopore-prospector-log.txt'))
     assignConfigurationValue('report_file_location', join(self.outputDirectoryText.get(), 'AnalysisReport.txt'))
     
     #self.consensusSequenceFileName = join(self.outputDirectoryText.get(), 'Consensus.fasta')
     
     self.recordAnalysisStep('Initialized Nanopore Prospector Log File')
     self.recordAnalysisStep('Read Input Directory:' + getConfigurationValue('input_directory'))
     self.recordAnalysisStep('Analysis Output Directory:' + getConfigurationValue('output_directory'))
 def reportResults(self, currentMessage):
     # This Report is intended to Summarize the prospector / allele calling results.
     fullReportMessage = currentMessage + '\n'
          
     if (getConfigurationValue('report_file_location') is None):
         print ('The report file name does not exist yet.\nTrying to report this message:\n' + currentMessage)
     else:
         #print ('Logging message: ' + currentMessage) 
         # Append the log with the log entry            
         resultsOutput = open(getConfigurationValue('report_file_location'), 'a')
         resultsOutput.write(fullReportMessage)
         resultsOutput.close()
def logBlastProgress(blastLogText):
    # If we have a log file location from Global Variables, we can use that.
    # Otherwise print output to the console.
    if getConfigurationValue('log_file_location') is not None:
        logMessageToFile(blastLogText)
    else:
        print(str(blastLogText))
    def sortByLocus(self):
        self.recordAnalysisStep('Step 2.) Sort reads by HLA Locus')
        self.disableGUI()
        
        # TODO: What if it's demultiplexed?
        # I should sort each barcode.
        # Make a loop, find barcodes, sort each one in a subrdirectory
        # What if i use some other sample ID, other than READS
        # TODO: Look for fastq files dynamically in output directory.        
        #sampleID = 'READS'        
        # TODO: I need to rethink how to handle sampleID when I am demultiplexing.
        # I can handle one sample one folder.
        
        # TODO: threadCount should be a parameter somewhere.
        # Specified in the options, probably.
        threadCount = 4
        
        #preparedReadsInputFile = join(join(self.outputDirectoryText.get(), '1_prepared_reads'),sampleID + '_Pass.fastq')
        preparedReadsInput = join(self.outputDirectoryText.get(), '1_prepared_reads')
        #sortedReadsOutputDirectory = join(join(self.outputDirectoryText.get(), '2_sorted_reads'), sampleID)
        sortedReadsOutputDirectory = join(self.outputDirectoryText.get(), '2_sorted_reads')
        if not exists(sortedReadsOutputDirectory):
            makedirs(sortedReadsOutputDirectory)
            
        # TODO: Make more than one gene references. Split by locus.
        # Fine. That's what I'm doing.
        
        # Make a list of the reference files I need.
        referenceFileList = []
        
        print('the configuration value for analyze_hla_a is:' + str(getConfigurationValue('analyze_hla_a')))
        
        if(str(getConfigurationValue('analyze_hla_a')) == '1'):
            referenceFileList.append(getBlastSortResourceLocation('HLA_A_BlastReference.fasta'))
        if(str(getConfigurationValue('analyze_hla_b')) == '1'):
            referenceFileList.append(getBlastSortResourceLocation('HLA_B_BlastReference.fasta'))
        if(str(getConfigurationValue('analyze_hla_c')) == '1'):
            referenceFileList.append(getBlastSortResourceLocation('HLA_C_BlastReference.fasta'))
        if(str(getConfigurationValue('analyze_hla_e')) == '1'):
            referenceFileList.append(getBlastSortResourceLocation('HLA_E_BlastReference.fasta'))
           
        if(str(getConfigurationValue('analyze_hla_dra')) == '1'):
            referenceFileList.append(getBlastSortResourceLocation('HLA_DRA_BlastReference.fasta'))    
        if(str(getConfigurationValue('analyze_hla_dqa1')) == '1'):
            referenceFileList.append(getBlastSortResourceLocation('HLA_DQA1_BlastReference.fasta'))    
        if(str(getConfigurationValue('analyze_hla_dqb1')) == '1'):
            referenceFileList.append(getBlastSortResourceLocation('HLA_DQB1_BlastReference.fasta'))    
        if(str(getConfigurationValue('analyze_hla_drb1')) == '1'):
            referenceFileList.append(getBlastSortResourceLocation('HLA_DRB1_BlastReference.fasta'))
        
        print('I found this many blast references:' + str(len(referenceFileList)))
        
        sortReferencePath = combineBlastReferences(referenceFileList, join(self.outputDirectoryText.get(), 'blast_sort_reference'))      
        
        # the key is the name of the file analyzed.
        # The value is a list of minion_read_collections, pertaining to what gene they sorted to.          
        sortResults = sortDirectory(preparedReadsInput, sortedReadsOutputDirectory, sortReferencePath, threadCount)
        
        # Report the BLAST sorting results in the results summary.
        self.reportResults('Read Sorting:\n')
        # Loop through the analyzed read files (probably just one here)
        for analyzedReadResult in sortResults.keys():
            
            #print ('Looking at this read collection key:' + analyzedReadResult)
            blastGeneList = sortResults[analyzedReadResult]
            #print ('Looking at this list of sorted read groups\n:' + str(blastGeneList))
            
            self.reportResults(analyzedReadResult + ':')
            
            # loop through each entry in this 
            for genewiseSortedReadGroup in blastGeneList:
            
                if (genewiseSortedReadGroup.gene is None):
                    self.reportResults('Unsorted: ' + str(len(genewiseSortedReadGroup.readCollection)) + ' reads.')
                else:
                    self.reportResults('HLA-' + str(genewiseSortedReadGroup.gene) + ': ' + str(len(genewiseSortedReadGroup.readCollection)) + ' reads.')
                

        self.reportResults('')
        self.recordAnalysisStep('Done sorting reads by HLA Locus')
        self.enableGUI()
    def constructInitialReadStats(self):
        self.recordAnalysisStep('Step 1.) Calculating initial read stats')
        self.disableGUI()
        
        # Set the input and output directories
        self.assignIODirectories()
        writeConfigurationFile()
   
        # Run nit-picker for output directory
        # TODO: fix these parameters, especially sample ID. 
        # TODO: RUn this in a thread, so the GUI can update.
        
        preparedReadsOutputDirectory = join(getConfigurationValue('output_directory'), '1_prepared_reads')
        
        #if(self.demultiplexReads):            
        #    sampleID = 'READS'
        #else:
        #    sampleID = 'READS'
        
        # TODO: I am always using the leaf directory name. Does this work for multiplexed and demultiplexed samples?
        #self.inputDirectoryText.set(currentInputDirectory)
        #parentDir = abspath(join(currentInputDirectory, os.pardir))
        leafDirName = basename(normpath(self.inputDirectoryText.get()))
        sampleID = leafDirName
        # suggestedOutputDirectory = join(parentDir,leafDirName + '_analysis')

        if not exists(preparedReadsOutputDirectory):
            makedirs(preparedReadsOutputDirectory)

        # preparedReadResults is a dictionary. 
        # The result will have 2 arrays
        # readstats is a 2d array, with lengths and qualities.
        preparedReadResults = None
        
        # If demultiplex option is set to 1, demultiplex is "on".
        if (getConfigurationValue('demultiplex_reads') == '1'):
            
            barcodeFilePath = getBarcodeFilePath()
      
            preparedReadResults = prepareReads(getConfigurationValue('input_directory')
                , preparedReadsOutputDirectory
                , sampleID
                , barcodeFilePath
                , None # Reference File, we don't have one.
                , int(getConfigurationValue('min_length'))
                , int(getConfigurationValue('max_length'))
                , int(getConfigurationValue('min_quality'))
                , int(getConfigurationValue('max_quality'))
                , False)
        else:
            preparedReadResults = prepareReads(getConfigurationValue('input_directory')
                , preparedReadsOutputDirectory
                , sampleID
                , None # No Barcoding file.
                , None # No Allele Reference
                , int(getConfigurationValue('min_length'))
                , int(getConfigurationValue('max_length'))
                , int(getConfigurationValue('min_quality'))
                , int(getConfigurationValue('max_quality'))
                , False)
            
        self.reportReadStats(preparedReadResults)
        self.recordAnalysisStep('Done calculating initial read stats')
        self.enableGUI()  
    def loadOptions(self):
        # Read the configuration file.
        loadConfigurationFile()

        #TODO: Number of threads. Configure this.

        if getConfigurationValue('demultiplex_reads') is not None:
            self.chooseDemultiplexIntVar.set(
                int(getConfigurationValue('demultiplex_reads')))

        if getConfigurationValue('min_length') is not None:
            self.inputMinLength.set(getConfigurationValue('min_length'))
        if getConfigurationValue('max_length') is not None:
            self.inputMaxLength.set(getConfigurationValue('max_length'))

        if getConfigurationValue('min_quality') is not None:
            self.inputMinQuality.set(getConfigurationValue('min_quality'))
        if getConfigurationValue('max_quality') is not None:
            self.inputMaxQuality.set(getConfigurationValue('max_quality'))

        if getConfigurationValue('analyze_hla_a') is not None:
            self.geneAIntVar.set(getConfigurationValue('analyze_hla_a'))
        if getConfigurationValue('analyze_hla_b') is not None:
            self.geneBIntVar.set(getConfigurationValue('analyze_hla_b'))
        if getConfigurationValue('analyze_hla_c') is not None:
            self.geneCIntVar.set(getConfigurationValue('analyze_hla_c'))
        if getConfigurationValue('analyze_hla_e') is not None:
            self.geneEIntVar.set(getConfigurationValue('analyze_hla_e'))

        if getConfigurationValue('analyze_hla_dra') is not None:
            self.geneDRAIntVar.set(getConfigurationValue('analyze_hla_dra'))
        if getConfigurationValue('analyze_hla_dqa1') is not None:
            self.geneDQA1IntVar.set(getConfigurationValue('analyze_hla_dqa1'))
        if getConfigurationValue('analyze_hla_dqb1') is not None:
            self.geneDQB1IntVar.set(getConfigurationValue('analyze_hla_dqb1'))
        if getConfigurationValue('analyze_hla_drb1') is not None:
            self.geneDRB1IntVar.set(getConfigurationValue('analyze_hla_drb1'))