Example #1
0
def load(datadir, create_session=None):
    '''
    nr_loaded = load(datadir, create_session={backend.create_session})

    Load ENSEMBL FASTA file into database

    Parameters
    ----------
    datadir : str
        Directory containing the FASTA file
    create_session : callable, optional
        a callable object that returns an sqlalchemy session

    Returns
    -------
    nr_loaded : integer
        Nr. of entries loaded
    '''
    from waldo.backend import call_create_session
    session = call_create_session(create_session)
    inputfilename = glob.glob(path.join(datadir, 'Mus_musculus.*.pep.all.fa.gz'))[0]
    filename = path.join(inputfilename)
    nr_loaded = 0
    for seq in fasta.read(filename):
        htokens = seq.header.split()
        peptide = htokens[0]
        gene = htokens[3]
        assert gene.startswith('gene:'), 'waldo.sequences.load'
        gene = gene[len('gene:'):]
        session.add(
            Translation(
                'ensembl:gene_id',
                gene,
                'ensembl:peptide_id',
                peptide))
        aaseq = seq.sequence
        seq = models.EnsemblSequence(peptide, aaseq)
        session.add(seq)
        session.commit()
        nr_loaded += 1
    return nr_loaded
Example #2
0
def test_read():
    seqs = list(fasta.read(path_to_testfile('test.fasta')))
    assert len(seqs) == 2
    assert seqs[1].header.find('gene:ENSMUSG00000064345')
    assert seqs[1].header[-1] != '\n'
    assert seqs[1].header[0] != '>'