Esempio n. 1
0
    def _check_database(self, database):
        #Database file does not exist
        if not os.path.isfile(database):
            raise BE(code = -5, value = database)

        #Database is not formated (if dbformatexe is added in the configuration path it will be autoformated)
        formatdb_files = []
        if self._search_type == 'nucl':     formatdb_sufix = ['.nhr','.nin','.nsq']
        elif self._search_type == 'prot':   formatdb_sufix = ['.phr','.pin','.psq']
        for sufix in formatdb_sufix:
            if not os.path.isfile(database + sufix):
                formatdb_files.append(database + sufix)

        if len(formatdb_files) > 0:
            try:
                dbexe  = Executable(executable    = self._configurator.get('blast','dbformatexe'),
                                    path          = self._configurator.get('blast','path'),
                                    variable_path = self._configurator.get('blast','variable_path'))

                SBIglobals.alert('debug', self, 'Trying to format de DB {0} to perform a blast search.\n'.format(database))

                dbexe.add_attribute(database, '-in')
                dbexe.add_attribute(self._search_type, '-dbtype')

                SBIglobals.alert('debug', self, 'Executing command {0}\n'.format(dbexe))

                dbexe.execute()

            except ConfigParser.NoOptionError, e:
                raise BE(code = -6, value = formatdb_files)
            except SystemError, e:
                raise BE(code = -11, value = e)
Esempio n. 2
0
class CDhitExe(object):
    def __init__(self, fasta, threshold, output_dir=None, execute=True):
        fasta = os.path.abspath(fasta)
        if output_dir is None:
            output_dir = os.path.split(fasta)[0]

        if threshold >= 0.7: word = 5
        elif threshold >= 0.6: word = 4
        elif threshold >= 0.5: word = 3
        elif threshold >= 0.4: word = 2
        else: word = 1

        #CDhit executable configuration
        self._configurator = ConfigParser.RawConfigParser(allow_no_value=True)
        self._configurator.read(
            os.getenv('SBI_CONFIG_FILE', default_configuration_file))
        self._exe = Executable(executable=self._configurator.get(
            'cd-hit', 'executable'),
                               path=self._configurator.get('cd-hit', 'path'))

        self._input = fasta
        output_file = os.path.split(fasta)[1] + '.' + str(threshold).replace(
            '.', '_')
        self._output = os.path.join(output_dir, output_file)
        self._threshold = str(threshold)
        self._word = word

        if execute:
            self._execute()

    @property
    def output_file(self):
        return self._output + '.clstr'

    def _execute(self):
        self._exe.add_attribute(self._input, '-i')
        self._exe.add_attribute(self._output, '-o')
        self._exe.add_attribute(self._threshold, '-c')
        self._exe.add_attribute('1', '-g')
        self._exe.add_attribute(self._word, '-n')

        try:
            self._exe.execute(silent=True)
        except SystemError, e:
            sys.stderr.write(
                'Some error occurred while executing cd-hit\n{0}\n'.format(e))
Esempio n. 3
0
class BlastExe(object):

    def __init__(self, database, search_type = 'prot'):

        #Search Type Check
        if search_type not in set(['prot','nucl']):
            raise BE(-10)
        self._search_type = search_type

        #Blast executable configuration
        self._configurator = ConfigParser.RawConfigParser(allow_no_value=True)
        self._configurator.read(os.getenv('SBI_CONFIG_FILE',default_configuration_file))
        self._exe    = Executable(executable    = self._configurator.get('blast','executable'),
                                  path          = self._configurator.get('blast','path'),
                                  variable_path = self._configurator.get('blast','variable_path'))

        #Database Configuration
        self._database = self._check_database(os.path.abspath(database))
        if os.path.isfile(self._database.file.full + ".idx"):
            self._idx = File(file_name = self._database.file.full + ".idx", action = 'r')
        else:
            self._idx = None

        #Adding fixed blast parameters
        self._exe.add_attribute(self._database.file.full, '-db')
        self._exe.add_attribute('5', '-outfmt')
        self._exe.add_parameter('-lcase_masking')

        SBIglobals.alert('debug', self, 'New Blast Executable created.\nBlast executable at {0}\n'.format(self._exe.full_executable))

        self._selfHit     = False
        self._hitIDformat = 'single'
        self._overwritte  = False
        self._clean_files = True

    '''ATTRIBUTES'''
    @property
    def database(self): return self._database

    @property
    def selfHit(self):        return self._selfHit
    @selfHit.setter
    def selfHit(self, vaule): self._selfHit = value

    @property
    def hitIDformat(self): return self._hitIDformat
    @hitIDformat.setter
    def hitIDformat(self, value):
        if value not in set(['single','double','all']):
            self._hitIDformat = 'single'
        else:
            self._hitIDformat = value

    @property
    def overwritte(self):       return self._overwritte
    @overwritte.setter
    def overwritte(self, value): self._overwritte = value

    @property
    def clean_files(self):        return self._clean_files
    @clean_files.setter
    def clean_files(self, value): self._clean_files = value

    '''FUNCTIONS'''
    def add_attribute(self, attribute_value, attribute_id):
        if attribute_id in set(['-db','-outfmt']):
            raise AttributeError('The parameters in {0} cannot be altered'.format(set(['-db','-outfmt'])))

        self._exe.add_attribute(str(attribute_value), attribute_id)

    def execute_query_seq(self, sequenceID = None, sequence = None, blast_input_file = None, blast_output_file = None):
        if sequenceID is None and sequence is None:
            raise AttributeError('Either a sequence or ID is needed to perform the blast')

        if isinstance(sequenceID, (list,set,tuple)):
            raise AttributeError('Blasts can only be executed one at a time due to XML output restrictions')

        if sequenceID is None:
            sequenceID = 'QuerySequence'
        #Given only a code implies that the protein of interest is in the database itself
        if sequence is None:
            grabbedSequence = self._database.retrieve(sequenceID)
            sequenceID = grabbedSequence.id
            sequence   = grabbedSequence.sequence

        if len(re.sub(r'[Xx]','',sequence)) == 0: #All the sequence is unknown, it will crash blast
            return BR.BlastResult(queryname=sequenceID, querylength=len(sequence))

        file_prefixes    = ".".join([str(os.getpid()), str(int(time.clock()*100000))])
        if blast_input_file is None:
            temp_input_name  = os.path.join(os.getcwd(), file_prefixes + ".tmp.fa")
        else:
            temp_input_name  = blast_input_file
        if blast_output_file is None:
            temp_output_name = os.path.join(os.getcwd(), file_prefixes + ".blast.xml.out")
        else:
            temp_output_name = blast_output_file

        QueryFasta = Fasta.build(file_name = temp_input_name, sequenceID = sequenceID, sequence = sequence, force = True)

        self._execute(input_file = QueryFasta, output_file = temp_output_name)

        BlastResult = self._parse_blast(sequence, temp_output_name)

        if self.clean_files:
            self._clean([temp_input_name,temp_output_name])

        return BlastResult

    def execute_query(self, query_file = None, blast_output_file = None):
        if isinstance(query_file, basestring) or isinstance(query_file, File):
            newFasta = Fasta(fasta_file = query_file)
        elif isinstance(query_file, Fasta):
            newFasta = query_file

        if newFasta.is_multifasta:
            raise BE(code = -4, value = newFasta.file.full)

        newFasta.load()
        if len(re.sub(r'[Xx]','',newFasta.sequences[0].sequence)) == 0: #All the sequence is unknown, it will crash blast
            return BR.BlastResult(queryname=newFasta.sequences[0].id, querylength=len(newFasta.sequences[0].sequence))

        if blast_output_file is None:
            temp_output_name = os.path.join(os.getcwd(), newFasta.file.prefix + "." + str(os.getpid()) + ".blast.xml.out")
        else:
            temp_output_name = blast_output_file

        self._execute(input_file = newFasta, output_file = temp_output_name)

        BlastResult = self._parse_blast(newFasta.sequences[0].sequence, temp_output_name)

        if self.clean_files:
            self._clean([temp_output_name])

        return BlastResult

    '''PRIVATE FUNCTIONS'''
    def _execute(self, input_file, output_file):

        if not os.path.isfile(output_file) or self.overwritte:
            final_executable = copy.deepcopy(self._exe)
            final_executable.add_attribute(input_file.file.full, '-query')
            final_executable.add_attribute(output_file, '-out')

            try:
                final_executable.execute()
            except SystemError, e:
                psiblast_default_warning = 'Warning: Composition-based score adjustment conditioned on sequence properties and unconditional composition-based score adjustment is not supported with PSSMs, resetting to default value of standard composition-based statistics'
                selenocysteine_warning   = 'Selenocysteine \(U\) at position'
                if not bool(re.search(psiblast_default_warning,str(e))) and not bool(re.search(selenocysteine_warning,str(e))):
                    raise BE(code = -1, value = str(e))