def test_compute_aa_composition_result_complex_heteropolymer():
    protein_sequence = 'AWGY'
    aa_composition = compute_aa_composition(protein_sequence)
    assert aa_composition['A'] == 0.25
    assert aa_composition['W'] == 0.25
    assert aa_composition['G'] == 0.25
    assert aa_composition['Y'] == 0.25
Example #2
0
def predict_protein_location(protein_sequence:str) -> str:
    aa_composition = compute_aa_composition(protein_sequence)
    df_aa_composition = pd.DataFrame([aa_composition])
    prediction = model.predict(df_aa_composition)

    if prediction == 1:
        protein_location = 'Membrane'
    else:
        protein_location = 'Cytoplasm'
    
    return protein_location
Example #3
0
def run_model(file_path: str, model_path: str) -> pd.DataFrame:
    """
    Run a membrane protein prediction on a FASTA file.

    Parameters
    ----------
    file_path:str
        path to proteins in FASTA format.
    
    model_path:str
        path to trained model in pickle format

    Returns
    -------
    df_prediction:pd.DataFrame
        Pandas DataFrame containing the membrane protein predictions.
    """
    with open(model_path, 'rb') as handle:
        model = pickle.load(handle)

    handle = open(file_path)
    parser = SeqIO.parse(handle, 'fasta')

    df_aa_composition = pd.DataFrame()
    df_predictions = pd.DataFrame(columns=['id', 'membrane'])

    for record in parser:
        aa_composition = compute_aa_composition(str(record.seq))
        aa_composition['id'] = record.id
        df_aa_composition = df_aa_composition.append(aa_composition,
                                                     ignore_index=True)

    X = df_aa_composition.drop(['id'], axis=1)
    ids = df_aa_composition['id']
    y_pred = model.predict(X)

    df_predictions['id'] = ids
    df_predictions['membrane'] = y_pred

    return df_predictions
def test_compute_aa_composition_result_simple_homopolymer():
    protein_sequence = 'AAAAAA'
    aa_composition = compute_aa_composition(protein_sequence)
    assert aa_composition['A'] == 1
def test_compute_aa_composition_return_type():
    protein_sequence = 'AWGY'
    aa_composition = compute_aa_composition(protein_sequence)
    assert isinstance(aa_composition, dict)