#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import oas_file_handling as oas
import sys
from scipy.stats import chi2_contingency, chi2, spearmanr

#human_file = oas.oas_file("Vander_Heiden_2017_Heavy_HD09_IGHG_HD09_Unsorted_Bcells_age31_healthy_iglblastn_igblastn_IGHG.json.gz")
#mouse_file = oas.oas_file("Collins_2015_IGHG_Mouse_sample_2_iglblastn_igblastn_IGHG.json.gz")
#mouse_file = oas.oas_file("Corcoran_2016_heavy_mouse_IGHG_mouse_heavy_M2_igblastn_igblastn_IGHG.json.gz")

human_file = oas.oas_file(str(sys.argv[1]))
mouse_file = oas.oas_file(str(sys.argv[2]))

if len(sys.argv) > 3:
    threshold = int(sys.argv[3])
else:
    threshold = 1
    
if len(sys.argv) > 4:
    significance = int(sys.argv[4])
else:
    significance = 0.05

position_list = human_file.all_positions(threshold)

print("""Using data from {} and {} and a significance level of {}:
    """.format(human_file.file_name, mouse_file.file_name, significance))
for position in position_list:
    human_data = human_file.data_row(position)
    mouse_data = mouse_file.data_row(position)
#!/usr/bin/env python3
# coding: utf-8

#%%

import sys
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import oas_file_handling as oas

f = oas.oas_file(str(sys.argv[1]))
threshold = sys.argv[2] if len(sys.argv) > 2 else 1
#f = oas_file("Vander_Heiden_2017_Heavy_HD09_IGHG_HD09_Unsorted_Bcells_age31_healthy_iglblastn_igblastn_IGHG.json.gz")
#f = oas_file("Corcoran_2016_heavy_mouse_IGHG_mouse_heavy_M2_igblastn_igblastn_IGHG.json.gz")

amino_acid_list = f.amino_acids

page = 1
for region_name in f.regions:
    raw_data = {}
    for amino_acid in amino_acid_list:
        raw_data[amino_acid] = []
        for position in f.region_positions(region_name, freq, threshold):
            amino_acid_frequency = f.amino_acid_frequency(position)
            if amino_acid in amino_acid_frequency:
                raw_data[amino_acid].append(amino_acid_frequency[amino_acid])
            else:
                raw_data[amino_acid].append(0)

    df = pd.DataFrame(raw_data)
Ejemplo n.º 3
0
import sys, math
import oas_file_handling as oas

f = oas.oas_file(
    "Vander_Heiden_2017_Heavy_HD09_IGHG_HD09_Unsorted_Bcells_age31_healthy_iglblastn_igblastn_IGHG.json.gz"
)
#f = oas.oas_file("Corcoran_2016_heavy_mouse_IGHG_mouse_heavy_M2_igblastn_igblastn_IGHG.json.gz")
#f = oas.oas_file(str(sys.argv[1]))
if len(sys.argv) > 2:
    threshold = int(sys.argv[2])
else:
    threshold = 1


def probability(position, amino_acid):
    amino_acid_frequency = f.amino_acid_frequency(position)
    if amino_acid in amino_acid_frequency:
        prob = amino_acid_frequency[amino_acid] / 100
    else:
        prob = 0
    return prob


def joint_probability(position1, amino_acid1, position2, amino_acid2):
    both = 0
    for data in f.sequence_data:
        full_sequence = f.combined_sequence(data)
        if amino_acid2 == 'Unused' and position2 not in full_sequence:
            if amino_acid1 == 'Unused' and position1 not in full_sequence:
                both += 1
            elif position1 in full_sequence and full_sequence[