/
checkSwitching.py
118 lines (100 loc) · 4.69 KB
/
checkSwitching.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/usr/bin/python
import sys
import argparse
import subprocess
import shutil
import csv
from collections import Counter, namedtuple
import io
import os
import uuid
import zipfile
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from matplotlib_venn import venn2
from matplotlib_venn import _math
from matplotlib_venn import venn2_circles
from matplotlib.patches import ConnectionPatch, Circle
from matplotlib.text import Text
import numpy as np
parser = argparse.ArgumentParser()
parser.add_argument('--unfiltered_archive', type=str)
parser.add_argument('--filtered_archive', type=str)
parser.add_argument('--fdr', type=float)
parser.add_argument('--plot', type=str)
args = parser.parse_args()
assert(args.unfiltered_archive)
assert(args.filtered_archive)
assert(args.fdr)
assert(args.plot)
def plot_venn(search_one, search_two, output_location):
"""
search_one and search_two should both be dictionaries, mapping each scan to the peptide
"""
spectra_one = set(search_one.keys())
print('spectra one:')
print(spectra_one)
spectra_two = set(search_two.keys())
common_spectra = spectra_one.intersection(spectra_two)
print('common spectra')
print(common_spectra)
#the number of spectra shared between the two searches that match against different peptides
discordant_spectra = 0
#the number of spectra shared between the two searches that match against the same peptide
concordant_spectra = 0
for spectra in common_spectra:
if search_one[spectra] == search_two[spectra]:
concordant_spectra += 1
else:
discordant_spectra += 1
circles = venn2_circles([spectra_one, spectra_two])
sorted_circles = sorted(circles, key=lambda x: x.center[0])
bigger_circle = max(circles, key=lambda x: x.radius)
bigger_radius = bigger_circle.radius
left_point = np.array([sorted_circles[0].center[0] - sorted_circles[0].radius, sorted_circles[0].center[1]])
right_point = np.array([sorted_circles[1].center[0] + sorted_circles[1].radius, sorted_circles[1].center[1]])
left_intersection = max(_math.circle_line_intersection(sorted_circles[0].center, sorted_circles[0].radius, left_point, right_point), key=lambda x: x[0])
right_intersection = min(_math.circle_line_intersection(sorted_circles[1].center, sorted_circles[1].radius, left_point, right_point), key=lambda x: x[0])
line = ConnectionPatch(left_intersection, right_intersection, 'data', 'data')
plt.gca().add_patch(line)
print(sorted_circles[0].center)
print(sorted_circles[1].center)
circle_intersections = _math.circle_circle_intersection(sorted_circles[0].center, sorted_circles[0].radius, sorted_circles[1].center, sorted_circles[1].radius)
upper_circle_intersection = max(circle_intersections, key=lambda x: x[1])
#take the centroid
upper_text_location = (left_intersection + right_intersection + upper_circle_intersection)/3.0
#plt.rc('text', usetex=True)
plt.text(upper_text_location[0], upper_text_location[1], str(concordant_spectra) + '\n' + r'$p_i = p_j$')
lower_circle_intersection = min(circle_intersections, key=lambda x: x[1])
lower_text_location = (left_intersection + right_intersection + lower_circle_intersection)/3.0
plt.text(lower_text_location[0], lower_text_location[1], str(discordant_spectra) + '\n' + r'$p_i \neq p_j$')
venn_diagram = venn2([spectra_one, spectra_two], ['Unfiltered', 'Filtered'])
venn_diagram.get_label_by_id('11').set_text('')
matplotlib.pyplot.savefig(output_location, format='png')
def get_psms(zip_path, fdr_cutoff):
with zipfile.ZipFile(zip_path, 'r') as f:
locations = []
for x in f.namelist():
if x.endswith('percolator.target.psms.txt'):
locations.append(x)
assert(len(locations) == 1)
psms = {}
with f.open(locations[0], 'r') as psms_binary_file:
psms_text_file = io.TextIOWrapper(psms_binary_file)
psms_reader = csv.DictReader(psms_text_file, delimiter='\t')
for row in psms_reader:
scan = row['scan']
q_val = float(row['percolator q-value'])
score = float(row['percolator score'])
peptide = row['sequence']
if q_val <= fdr_cutoff and (scan not in psms or psms[scan][0] < score):
psms[scan] = (score, peptide)
return {k: v[1] for k,v in psms.items()}
unfiltered_archive_path = args.unfiltered_archive
filtered_archive_path = args.filtered_archive
fdr = args.fdr/100.0
plot = args.plot
unfiltered_psms = get_psms(unfiltered_archive_path, fdr)
filtered_psms = get_psms(filtered_archive_path, fdr)
plot_venn(unfiltered_psms, filtered_psms, plot)