#!/usr/bin/env python3 # -*- coding: utf-8 -*- import oas_file_handling as oas import sys from scipy.stats import chi2_contingency, chi2, spearmanr #human_file = oas.oas_file("Vander_Heiden_2017_Heavy_HD09_IGHG_HD09_Unsorted_Bcells_age31_healthy_iglblastn_igblastn_IGHG.json.gz") #mouse_file = oas.oas_file("Collins_2015_IGHG_Mouse_sample_2_iglblastn_igblastn_IGHG.json.gz") #mouse_file = oas.oas_file("Corcoran_2016_heavy_mouse_IGHG_mouse_heavy_M2_igblastn_igblastn_IGHG.json.gz") human_file = oas.oas_file(str(sys.argv[1])) mouse_file = oas.oas_file(str(sys.argv[2])) if len(sys.argv) > 3: threshold = int(sys.argv[3]) else: threshold = 1 if len(sys.argv) > 4: significance = int(sys.argv[4]) else: significance = 0.05 position_list = human_file.all_positions(threshold) print("""Using data from {} and {} and a significance level of {}: """.format(human_file.file_name, mouse_file.file_name, significance)) for position in position_list: human_data = human_file.data_row(position) mouse_data = mouse_file.data_row(position)
#!/usr/bin/env python3 # coding: utf-8 #%% import sys import pandas as pd import matplotlib.pyplot as plt import numpy as np import oas_file_handling as oas f = oas.oas_file(str(sys.argv[1])) threshold = sys.argv[2] if len(sys.argv) > 2 else 1 #f = oas_file("Vander_Heiden_2017_Heavy_HD09_IGHG_HD09_Unsorted_Bcells_age31_healthy_iglblastn_igblastn_IGHG.json.gz") #f = oas_file("Corcoran_2016_heavy_mouse_IGHG_mouse_heavy_M2_igblastn_igblastn_IGHG.json.gz") amino_acid_list = f.amino_acids page = 1 for region_name in f.regions: raw_data = {} for amino_acid in amino_acid_list: raw_data[amino_acid] = [] for position in f.region_positions(region_name, freq, threshold): amino_acid_frequency = f.amino_acid_frequency(position) if amino_acid in amino_acid_frequency: raw_data[amino_acid].append(amino_acid_frequency[amino_acid]) else: raw_data[amino_acid].append(0) df = pd.DataFrame(raw_data)
import sys, math import oas_file_handling as oas f = oas.oas_file( "Vander_Heiden_2017_Heavy_HD09_IGHG_HD09_Unsorted_Bcells_age31_healthy_iglblastn_igblastn_IGHG.json.gz" ) #f = oas.oas_file("Corcoran_2016_heavy_mouse_IGHG_mouse_heavy_M2_igblastn_igblastn_IGHG.json.gz") #f = oas.oas_file(str(sys.argv[1])) if len(sys.argv) > 2: threshold = int(sys.argv[2]) else: threshold = 1 def probability(position, amino_acid): amino_acid_frequency = f.amino_acid_frequency(position) if amino_acid in amino_acid_frequency: prob = amino_acid_frequency[amino_acid] / 100 else: prob = 0 return prob def joint_probability(position1, amino_acid1, position2, amino_acid2): both = 0 for data in f.sequence_data: full_sequence = f.combined_sequence(data) if amino_acid2 == 'Unused' and position2 not in full_sequence: if amino_acid1 == 'Unused' and position1 not in full_sequence: both += 1 elif position1 in full_sequence and full_sequence[