コード例 #1
0
def main():
    'The main'
    # get parameters
    infhand, outfhand, rm_annots = set_parameters()

    # guess file format
    format_ = guess_seq_file_format(infhand)

    #remove annotations
    seqs = remove_annotation(infhand, format_, rm_annots)

    # write seqs in file
    write_seqs_in_file(seqs, seq_fhand=outfhand, format=format_)
コード例 #2
0
    def test_json_reader():
        'It tests the json sequence writer'
        #first we write some files
        seq0 = SeqWithQuality(seq=Seq('ATGATAGATAGATGF'), name='seq1')
        seq1 = SeqWithQuality(seq=Seq('GATACCA', DNAAlphabet()), name='seq2')
        fhand = tempfile.NamedTemporaryFile(suffix='.json')
        write_seqs_in_file([seq0, seq1], fhand, format='json')
        fhand.flush()

        #now we read them
        seqs = list(seqs_in_file(open(fhand.name)))
        assert seqs[0].seq == seq0.seq
        assert seqs[1].seq == seq1.seq
        assert str(seqs[1].seq.alphabet) == str(seq1.seq.alphabet)
コード例 #3
0
    def test_description_annotation_analysis():
        "We can annotate with description"
        test_dir = NamedTemporaryDir()
        project_name = "backbone"
        arab_blastdb = join(TEST_DATA_DIR, "blast", "arabidopsis_genes+")
        config = {
            "blast": {"arabidopsis": {"path": arab_blastdb, "species": "arabidopsis"}},
            "Annotation": {"description_annotation": {"description_databases": ["arabidopsis"]}},
            "General_settings": {"threads": THREADS},
        }

        settings_path = create_project(directory=test_dir.name, name=project_name, configuration=config)
        project_dir = join(test_dir.name, project_name)

        # some melon file to annotate
        input_dir = join(project_dir, BACKBONE_DIRECTORIES["annotation_input"])
        os.makedirs(input_dir)
        seq_ = "AGGTGTCACCGTTCACGAGGGCGACTGGGACTCCCACGGGGCCATCAAGTCCTGGAACTACA"
        seq_ += "CATGCGGTCCTCTATCTCATTCTCTATTTGTATGAATATGTGTTTATTACTAGCTAGGGTTT"
        seq_ += "CTATTAATGAAAGGTTCATGTAAATATATGAAGATGGGAAGCAAGAGGTGTTCAAGGAGAAG"
        seq_ += "AGGGAGTTAGACGACCAGAAGAT"
        seq1 = SeqWithQuality(Seq(seq_), id="CUTC021854")
        seq2 = SeqWithQuality(Seq("Atagtagcatcagatgagcatcgacttctagctagctagct"), id="CUTC021853")
        write_seqs_in_file([seq1, seq2], open(join(input_dir, "melon.st_nucl.pl_454.fasta"), "a"))

        do_analysis(project_settings=settings_path, kind="annotate_descriptions", silent=True)

        repr_fpath = join(project_dir, BACKBONE_DIRECTORIES["annotation_dbs"], "melon.st_nucl.pl_454.0.pickle")
        result = open(repr_fpath).read()
        # print result
        assert "yet another one" in result

        do_analysis(project_settings=settings_path, kind="annotation_stats", silent=True)
        stats_fpath = join(project_dir, "annotations", "features", "stats", "melon.st_nucl.pl_454.txt")
        result = open(stats_fpath).read()
        expected = """Annotation statistics
---------------------
Number of sequences: 2
Sequences with description: 1"""
        assert expected in result

        test_dir.close()
コード例 #4
0
    def test_pickle_writer():
        'It tests the pickle sequence writer'
        seq0 = SeqWithQuality(seq=Seq('ATGATAGATAGATGF'), name='seq1')
        alleles = {('G', 3): {}}
        filters = {'a_filter':{('param',):False}}
        snv_feature = SeqFeature(FeatureLocation(ExactPosition(3),
                                                 ExactPosition(3)),
                                                 type='snv',
                                        qualifiers={'alleles':alleles,
                                                    'filters':filters})
        seq1 = SeqWithQuality(seq=Seq('GATACCA'), name='seq2',
                              features=[snv_feature])
        fhand = StringIO()
        write_seqs_in_file([seq0, seq1], fhand, format='pickle')
        #print fhand.getvalue()

        fhand.seek(0)
        seqs = list(seqs_in_file(fhand))
        assert seqs[1].features[0].qualifiers['alleles'] == alleles
        assert seqs[1].features[0].qualifiers['filters'] == filters
コード例 #5
0
def seqio(in_seq_fhand, out_seq_fhand, out_format, double_encoding=False,
          in_qual_fhand=None, out_qual_fhand=None, in_format=None):
    'It converts format of the files'
    if not in_format:
        in_format = guess_seq_file_format(in_seq_fhand)
    if (in_qual_fhand is not None or
        out_qual_fhand is not None or
        in_format in ('repr', 'json', 'pickle') or
        out_format in ('repr', 'json', 'pickle')) :
        seqs = seqs_in_file(seq_fhand=in_seq_fhand,
                            qual_fhand=in_qual_fhand,
                            format=in_format, double_encoding=double_encoding)
        write_seqs_in_file(seqs, seq_fhand=out_seq_fhand,
                           qual_fhand=out_qual_fhand,
                           format=out_format)
    else:
        SeqIO.convert(in_seq_fhand, in_format, out_seq_fhand, out_format)
    out_seq_fhand.flush()
    if out_qual_fhand:
        out_qual_fhand.flush()
コード例 #6
0
    def test_transitive_clustering(self):
        'We do a transitive clustering'

        blast_fhand = open(os.path.join(TEST_DATA_DIR,
                                        'transitive_cluster.blastout.xml'),
                           'rt')
        filter1 = {'kind': 'score_threshold',
                   'score_key': 'similarity',
                   'min_score': 98,
                  }
        filter2 = {'kind': 'min_length',
                   'min_num_residues': 50,
                   'length_in_query': True
                  }
        filters = [filter1, filter2]

        clusters = do_transitive_clustering_on_blast(blast_fhand, filters)
        assert set([u'seq3', u'seq2', u'seq1']) in clusters
        assert set([u'seq4']) in clusters

        # with the secuences
        blast_fhand = open(os.path.join(TEST_DATA_DIR,
                                        'transitive_cluster.blastout.xml'),
                           'rt')
        seqs_fhand = NamedTemporaryFile()
        seqs = [SeqWithQuality(name='seq1', seq=Seq('aa')),
                SeqWithQuality(name='seq2', seq=Seq('aa')),
                SeqWithQuality(name='seq3', seq=Seq('aa')),
                SeqWithQuality(name='seq4', seq=Seq('aa')),
                SeqWithQuality(name='seq5', seq=Seq('aa')),
                SeqWithQuality(name='seq6', seq=Seq('aa'))]
        write_seqs_in_file(seqs, seqs_fhand)
        clusters, no_matched = do_transitive_clustering_all(blast_fhand,
                                                            seqs_fhand,
                                                            filters)
        assert set([u'seq3', u'seq2', u'seq1']) in clusters
        assert set([u'seq4']) in clusters
        assert 'seq5' in no_matched
        assert 'seq6' in no_matched
コード例 #7
0
    def test_json_writer():
        'It tests the json sequence writer'
        seq0 = SeqWithQuality(seq=Seq('ATGATAGATAGATGF'), name='seq1')
        alleles = {('G', 3): {}}
        filters = {'a_filter':{('param',):False}}
        snv_feature = SeqFeature(FeatureLocation(ExactPosition(3),
                                                 ExactPosition(3)),
                                                 type='snv',
                                        qualifiers={'alleles':alleles,
                                                    'filters':filters})
        seq1 = SeqWithQuality(seq=Seq('GATACCA'), name='seq2',
                              features=[snv_feature])
        fhand = StringIO()
        write_seqs_in_file([seq0, seq1], fhand, format='json')
        lines = fhand.getvalue().splitlines()
        struct1 = json.loads(lines[2])
        assert struct1['seq']['seq'] == 'GATACCA'
        assert struct1['features'][0]['qualifiers']['alleles'].keys()[0] == "('G', 3)"

        fhand.seek(0)
        seqs = list(seqs_in_file(fhand))
        assert seqs[1].features[0].qualifiers['alleles'] == alleles
        assert seqs[1].features[0].qualifiers['filters'] == filters
コード例 #8
0
ファイル: gff.py プロジェクト: BioinformaticsArchive/franklin
    def write(self, item):
        '''It writes a line.

        The item should be a tuple with the kind and the information about the
        feature
        '''
        if item is None:
            return
        kind, item = item
        if self._fhand.tell() == 0:
            if not self.version:
                self.version = _DEFAULT_WRITE_VERSION
            self._fhand.write('##gff-version %s\n' % self.version)
        if kind == METADATA:
            self._fhand.write('##' + item + '\n')
        elif kind == COMMENT:
            self._fhand.write('#' + item + '\n')
        elif kind == FEATURE:
            feature_line = self._feature_to_str(item) + '\n'
            self._fhand.write(feature_line)
        elif kind == FASTA:
            self._fhand.write('##FASTA\n')
            write_seqs_in_file(item, self._fhand, format='fasta')
        self._fhand.flush()
コード例 #9
0
    def test_ortholog_annotation_analysis():
        "We can annotate orthologs"
        test_dir = NamedTemporaryDir()
        project_name = "backbone"

        config = {
            "blast": {
                "arabidopsis": {"path": "/path/to/tair", "species": "arabidopsis", "kind": "nucl"},
                "arabidopsis2": {"path": "/path/to/tair2", "species": "arabidopsis2", "kind": "nucl"},
            },
            "Annotation": {"ortholog_annotation": {"ortholog_databases": ["arabidopsis", "arabidopsis2"]}},
            "General_settings": {"threads": THREADS},
        }

        settings_path = create_project(directory=test_dir.name, name=project_name, configuration=config)
        project_dir = join(test_dir.name, project_name)

        # create blast results
        melon_tair_blastdir = join(project_dir, "annotations", "blast", "melon.st_nucl.pl_454", "tair")
        melon_tair2_blastdir = join(project_dir, "annotations", "blast", "melon.st_nucl.pl_454", "tair2")
        os.makedirs(melon_tair_blastdir)
        os.makedirs(melon_tair2_blastdir)
        tair_melon_blastdir = join(project_dir, "annotations", "blast", "tair", "melon.st_nucl.pl_454")
        tair2_melon_blastdir = join(project_dir, "annotations", "blast", "tair2", "melon.st_nucl.pl_454")
        os.makedirs(tair_melon_blastdir)
        os.makedirs(tair2_melon_blastdir)
        blast_fname = BACKBONE_BASENAMES["blast_basename"] + ".tblastx.xml"
        shutil.copy(join(TEST_DATA_DIR, "melon_tair.xml"), join(melon_tair_blastdir, blast_fname))
        shutil.copy(join(TEST_DATA_DIR, "melon_tair.xml"), join(melon_tair2_blastdir, blast_fname))
        shutil.copy(join(TEST_DATA_DIR, "tair_melon.xml"), join(tair_melon_blastdir, blast_fname))
        shutil.copy(join(TEST_DATA_DIR, "tair_melon.xml"), join(tair2_melon_blastdir, blast_fname))

        # some melon file to annotate
        input_dir = join(project_dir, BACKBONE_DIRECTORIES["annotation_input"])
        os.makedirs(input_dir)
        seq1 = SeqWithQuality(Seq("A"), id="melon1")
        seq2 = SeqWithQuality(Seq("A"), id="melon2")
        write_seqs_in_file([seq1, seq2], open(join(input_dir, "melon.st_nucl.pl_454.fasta"), "a"))

        do_analysis(project_settings=settings_path, kind="annotate_orthologs", silent=True)
        pickle_fpath = join(project_dir, BACKBONE_DIRECTORIES["annotation_dbs"], "melon.st_nucl.pl_454.0.pickle")
        pickle = open(pickle_fpath).read()
        assert "arabidopsis-orthologs" in pickle
        assert "arabidopsis2-orthologs" in pickle

        do_analysis(project_settings=settings_path, kind="write_annotations", silent=True)

        orf_fpath = join(project_dir, "annotations", "features", "melon.st_nucl.pl_454.orthologs")
        assert os.path.exists(orf_fpath)
        assert "tair1" in open(orf_fpath).read()

        orf_fpath = join(project_dir, "annotations", "features", "melon.st_nucl.pl_454.orf")
        assert not os.path.exists(orf_fpath)

        do_analysis(project_settings=settings_path, kind="annotation_stats", silent=True)
        stats_fpath = join(project_dir, "annotations", "features", "stats", "melon.st_nucl.pl_454.txt")
        result = open(stats_fpath).read()
        expected = """Orthologs
_________
Sequences with arabidopsis orthologs: 2
Number of arabidopsis orthologs: 2
Sequences with arabidopsis2 orthologs: 2
Number of arabidopsis2 orthologs: 2"""

        assert expected in result

        test_dir.close()