import dataParser as parser
import helperFiles.buildPlot as plotBuilder

df = pd.read_csv('../merged_1.6.1.csv')

# data = []
# labels = set
# # inputs = ['RAD51B']
# inputs = set()
# samples = parser.samples
# for category in samples:
#     for gene in samples[category]:
#         labels.add(gene)
# print(inputs)

pValues = parser.load_obj('pvalues')
data = parser.load_obj('data')

for i in data:
    print(len(i) == 0)
quit()
fig, ax1 = plt.subplots(figsize=(15, 5))
fig.canvas.set_window_title('A Boxplot Example')
fig.subplots_adjust(left=0.075, right=0.95, top=0.9, bottom=0.25)

bp = ax1.boxplot(data, notch=0, sym='+', vert=1, whis=1.5, showmeans=True)
plt.setp(bp['boxes'], color='black')
plt.setp(bp['whiskers'], color='black')
plt.setp(bp['fliers'], color='red', marker='+')

# Add a horizontal grid to the plot, but make it very light in color
Esempio n. 2
0
import json
import os
import re
import urllib

import pandas as pd

import dataParser

df = pd.read_csv('../merged_1.6.1.csv')
patients = df['donor_unique_id'].unique()
rangeDict = dataParser.buildRangeDict()
if (os.path.isfile("obj/completedDonors3.pkl")):
    completed = dataParser.load_obj("completedDonors3")
else:
    completed = set()

size = int(len(patients) / 4)

for patient in patients[size + size + size + 1:int(len(patients))]:
    if (os.path.isfile("obj/GenewithDonorsWithSVsInGene3.pkl")):
        results = dataParser.load_obj("GenewithDonorsWithSVsInGene3")
    else:
        results = {}
    match = re.match('.*::(.+)', patient)
    donorid = dataParser.getKeyword(match[1])
    if donorid in completed:
        continue
    print(donorid)
    mutsinDonor = dataParser.mutationsInDonorCount(donorid)
    to_nearest_hunderd = 101 - (mutsinDonor % 100)
Esempio n. 3
0
#         donorDF = df[(df['donor_unique_id'] == donor)]
#         notaffected.append(len(donorDF.index))
#     if(len(affected) == 0):
#         affected.append(0)
#     if(len(notaffected) == 0):
#         notaffected.append(0)
#     data.append(notaffected)
#     data.append(affected)
#     value = stats.ttest_ind(affected, notaffected)[1]
#     pValues.append(value)
#     print(value)

# parser.save_obj(pValues, 'WTvsmUTsubsetpValues')
# parser.save_obj(data, 'WTvsMUTsubsetData')
# parser.save_obj(labels, 'WTvsMUTsubsetLabels')
pValues = parser.load_obj('WTvsmUTsubsetpValues')
data = parser.load_obj('WTvsMUTsubsetData')
labels = parser.load_obj('WTvsMUTsubsetLabels')
# the x locations for the groups
ind = np.arange(start=0, stop=len(data) * 1.5, step=3)
width = 1.25  # the width of the bars
fig2, ax2 = plt.subplots(figsize=(20, 15))
means = []
stdevs = []
print(ind)
for dataset in data:
    means.append(mean(dataset))
for dataset in data:
    try:
        stdevs.append(stdev(dataset))
    except:
Esempio n. 4
0
import json
import os
import re
import urllib

import pandas as pd

import dataParser

rangeDict = dataParser.buildRangeDict()
if(os.path.isfile("obj/completedDonors0.pkl")):
    completed = dataParser.load_obj("completedDonors0")
else:
    completed = set()
patients = dataParser.load_obj("patients")
size = int(len(patients)/4)

for patient in patients[0:size+1]:
    if(os.path.isfile("obj/GenewithDonorsWithSVsInGene0.pkl")):
        results = dataParser.load_obj("GenewithDonorsWithSVsInGene0")
    else:
        results = {}
    match = re.match('.*::(.+)', patient)
    donorid = dataParser.getKeyword(match[1])
    if donorid in completed:
        continue
    print(donorid)
    mutsinDonor = dataParser.mutationsInDonorCount(donorid)
    to_nearest_hunderd = 101 - (mutsinDonor % 100)
    try:
        for i in range(0, mutsinDonor+to_nearest_hunderd, 100):
Esempio n. 5
0
intrachromosomal = []
samples = parser.samples
labels = []
s = set()
counter = 0
# for category in samples:
#     for gene in samples[category]:
#         if(gene in s):
#             continue
#         print(gene)
#         if(not parser.checkValid(gene)):
#             continue
#         else:
#             s.add(gene)
#             labels.append(gene)
labels = parser.load_obj('WTvsMUTsubsetLabels')
for gene in labels:
    print(gene)
    try:
        geneDF, chromsome = parser.getGeneDF(gene)
        geneDataIntra = 0
        geneDataInter = 0
        for index, row in geneDF.iterrows():
            if (int(row['seqnames']) == chromsome) and (int(row['altchr']) == chromsome):
                geneDataIntra += 1
            if (int(row['seqnames'])) != (int(row['altchr'])):
                geneDataInter += 1
        intrachromosomal.append(geneDataIntra)
        interchromosomal.append(geneDataInter)
    except:
        intrachromosomal.append(0)