Beispiel #1
0
def load_molecules(filename: str, dbname='#'):
    '''### load molecules from fasta file
        #### params:
        - filename: Your file name
        - dbname: Name of your data base

        *returns* -> dict with molecules
    '''
    molecules = {}
    with open(filename) as file:
        lines = file.read()
        molecules_str = lines.split('>')[1::]

        for molecule_str in molecules_str:
            mol_str = molecule_str.split('\n', 1)
            mol_str[1] = mol_str[1].replace('\n', '')

            ignorable_alleles = ['N', 'L', 'Q', 'S', 'A', 'C']

            name = mol_str[0].split(' ')
            if len(name) > 2:
                name = name[1]
            elif len(name) > 1:
                name = name[0]
            else:
                name = name[0]

            if any(name.endswith(allele) for allele in ignorable_alleles):
                print('ignorated ', name)
                continue

            if name:
                if name.find(':') > 2:
                    name = ':'.join(name.split(':', 2)[:2])

                if any(subname in molecules for subname in name):
                    continue

            else:
                name = 'None'

            mol = Molecule(dbname=dbname, name=name, seq=mol_str[1])
            molecules[name] = mol

    return molecules