def process_overlaps(in_f, min_overlap=6): print("Reading peptides from %s.." % in_f) if in_f.endswith('.csv'): peptides = read_peptides_from_csv(in_f) elif in_f.endswith('.txt'): peptides = read_peptides_from_txt(in_f) print("Generating groups...") make_overlap_groups(peptides, 'groups.yaml', min_overlap) groups = datafile.load_yaml('groups.yaml') base = os.path.splitext(in_f)[0] out_csv = base + '.cluster.csv' print("Writing groups %s" % out_csv) write_groups(groups, out_csv) out_csv = base + '.kernel.csv' print("Writing groups %s" % out_csv) write_overlap_kernels(groups, out_csv)
def process_subsets(in_f): """ need to look for redundancies """ print("Reading peptides from %s.." % in_f) if in_f.endswith('.csv'): peptides = read_peptides_from_csv(in_f) elif in_f.endswith('.txt'): peptides = read_peptides_from_txt(in_f) print("Generating groups...") make_subset_groups(peptides, 'groups.yaml') groups = datafile.load_yaml('groups.yaml') base = os.path.splitext(in_f)[0] out_csv = base + '.cluster.csv' print("Writing groups %s" % out_csv) write_groups(groups, out_csv) out_csv = base + '.kernel.csv' print("Writing groups %s" % out_csv) write_subset_kernels(groups, out_csv)
if seq in ref_seq: return ref_seq.find(seq) elif ref_seq in seq: return -seq.find(ref_seq) elif ref_left_overlap: return len(ref_seq) - ref_left_overlap elif ref_right_overlap: return -(len(seq) - ref_right_overlap) raise "No overlap" print("Generating groups...") make_groups('b57_clean.csv', 'groups.yaml') print("Loading groups...") groups = datafile.load_yaml('groups.yaml') rows = [('i_group', 'sequence', 'modifications', 'protein')] for group in groups: sequences = [] for peptide in group['peptides']: sequences.append(peptide['sequence']) ref_seq = sequences[0] indices = [get_i_relative_to_ref(ref_seq, s) for s in sequences] # find max length left = -min(indices) max_len = 0 for index, seq in zip(indices, sequences):