def test_blastdb_seq_kind():
        'It test the blastdb kind'
        blastdb = join(TEST_DATA_DIR, 'blast', 'tomato_genome2')
        assert  guess_blastdb_kind(blastdb) == 'nucl'
        blastdb = join(TEST_DATA_DIR, 'blast', 'tair7_pep')

        assert  guess_blastdb_kind(blastdb) == 'prot'
    def _get_b2g_blast(self, input_fpath, goblast_settings):
        'It gets a chopped blast ready for use with blast2go'
        if 'kind' in goblast_settings:
            db_kind = goblast_settings['kind']
        else:
            db_kind = guess_blastdb_kind(goblast_settings['path'])

        seq_type = scrape_info_from_fname(input_fpath)['st']
        blast_program = guess_blast_program(seq_type, db_kind,
                                            prefer_tblastx=True)
        blastdb = goblast_settings['path']

        project_dir = self._project_settings['General_settings']['project_path']
        blast = backbone_blast_runner(query_fpath=input_fpath,
                                            project_dir=project_dir,
                                            blast_program=blast_program,
                                            blast_db=blastdb,
                                            dbtype=db_kind,
                                            threads=self.threads)

        chop_big_xml, num_items = True, 2
        if chop_big_xml:
            #chopped_blast = open('/tmp/blast_itemized.xml', 'w')
            chopped_blast = NamedTemporaryFile(suffix='.xml')
            for blast_parts in xml_itemize(blast, 'Iteration', num_items):
                chopped_blast.write(blast_parts)
            chopped_blast.flush()
            return chopped_blast
        else:
            return open(blast)
    def run(self):
        'It runs the analysis'
        inputs, output_dirs = self._get_inputs_and_prepare_outputs()
        db_dir = output_dirs['db_dir']
        blast_settings = self._project_settings['blast']

        settings = self._project_settings['Annotation']
        annot_settings = settings['description_annotation']
        description_databases = annot_settings['description_databases']

        general_settings = self._project_settings['General_settings']

        #first we need some blasts
        project_dir = general_settings['project_path']
        blasts = {}
        for input_ in inputs['input']:
            input_fpath = input_.last_version
            for database in description_databases:
                if 'kind' in blast_settings[database]:
                    db_kind = blast_settings[database]['kind']
                else:
                    db_kind = guess_blastdb_kind(blast_settings[database]['path'])

                seq_type = scrape_info_from_fname(input_.last_version)['st']
                blast_program = guess_blast_program(seq_type, db_kind,
                                                    prefer_tblastx=True)

                blastdb = blast_settings[database]['path']
                blast = backbone_blast_runner(query_fpath=input_fpath,
                                                project_dir=project_dir,
                                                blast_program=blast_program,
                                                blast_db=blastdb,
                                                dbtype=db_kind,
                                                threads=self.threads)
                if input_ not in blasts:
                    blasts[input_fpath] = []
                blasts[input_fpath].append({'blast':blast, 'modifier':None})
        #print blasts
        pipeline = []
        configuration = {}
        for database in description_databases:
            step = annotate_with_descriptions
            step['name_in_config'] = database
            pipeline.append(step)
            for input_ in inputs['input']:
                step_config = {'blasts': blasts[input_.last_version]}
                configuration[input_.basename] = {}
                configuration[input_.basename][database] = step_config
        #print configuration
        return self._run_annotation(pipeline=pipeline,
                                    configuration=configuration,
                                    inputs=inputs,
                                    output_dir=db_dir)
    def run(self):
        'It runs the analysis'
        inputs, output_dirs = self._get_inputs_and_prepare_outputs()
        output_dir = output_dirs['result']
        blast_settings = self._project_settings['blast']
        settings = self._project_settings['Annotation']['ortholog_annotation']
        ortholog_databases = settings['ortholog_databases']


        general_settings = self._project_settings['General_settings']
        project_dir = general_settings['project_path']

        #first we need some blasts
        blasts = {}
        for input_ in inputs['input']:
            for database in ortholog_databases:
                if 'kind' in blast_settings[database]:
                    db_kind = blast_settings[database]['kind']
                else:
                    db_kind = guess_blastdb_kind(blast_settings[database]['path'])

                seq_type = scrape_info_from_fname(input_.last_version)['st']
                blast_program = guess_blast_program(seq_type, db_kind,
                                                    prefer_tblastx=True)

                blastdb = blast_settings[database]['path']
                if 'subj_def_as_acc' in blast_settings[database]:
                    subj_def_as_acc = blast_settings[database]['subj_def_as_acc']
                else:
                    subj_def_as_acc = None


                #this could be different adding something to the settings
                blastdb_seq_fpath = blastdb
                blast = backbone_blast_runner(query_fpath=input_.last_version,
                                              project_dir=project_dir,
                                              blast_program=blast_program,
                                              blast_db=blastdb,
                                              dbtype=db_kind,
                                              threads=self.threads)

                blast = {'fpath':blast,
                         'subj_def_as_acc': subj_def_as_acc}
                blast_program = guess_blast_program(db_kind, seq_type,
                                                    prefer_tblastx=True)
                reverse_blast = backbone_blast_runner(
                                              query_fpath=blastdb_seq_fpath,
                                              project_dir=project_dir,
                                              blast_program=blast_program,
                                              blast_db_seq=input_.last_version,
                                              dbtype='nucl',
                                              threads=self.threads)
                reverse_blast = {'fpath':reverse_blast,
                                  'subj_def_as_acc':None}

                if input_ not in blasts:
                    blasts[input_] = {}
                blasts[input_][database] = {'blast':blast,
                                            'reverse_blast':reverse_blast}

        pipeline = []
        configuration = {}
        for database in ortholog_databases:
            step = copy.deepcopy(annotate_orthologs)
            step['name_in_config'] = database
            #an annotation step for every ortholog database
            pipeline.append(step)
            for input_ in inputs['input']:
                reverse_blast = ''
                step_config = {
                    'blast':{'blast': blasts[input_][database]['blast']['fpath'],
                             'subj_def_as_acc':blasts[input_][database]['blast']['subj_def_as_acc']},
                    'reverse_blast':{'blast':
                                     blasts[input_][database]['reverse_blast']['fpath'],
                                     'subj_def_as_acc':blasts[input_][database]['reverse_blast']['subj_def_as_acc']},
                    'species': database}
                if input_.basename not in configuration:
                    configuration[input_.basename] = {}
                configuration[input_.basename][database] = step_config

        return self._run_annotation(pipeline=pipeline,
                                    configuration=configuration,
                                    inputs=inputs,
                                    output_dir=output_dir)