예제 #1
0
def process_overlaps(in_f, min_overlap=6):

    print("Reading peptides from %s.." % in_f)
    if in_f.endswith('.csv'):
        peptides = read_peptides_from_csv(in_f)
    elif in_f.endswith('.txt'):
        peptides = read_peptides_from_txt(in_f)

    print("Generating groups...")
    make_overlap_groups(peptides, 'groups.yaml', min_overlap)

    groups = datafile.load_yaml('groups.yaml')
    base = os.path.splitext(in_f)[0]

    out_csv = base + '.cluster.csv'
    print("Writing groups %s" % out_csv)
    write_groups(groups, out_csv)

    out_csv = base + '.kernel.csv'
    print("Writing groups %s" % out_csv)
    write_overlap_kernels(groups, out_csv)
예제 #2
0
def process_subsets(in_f):
    """
    need to look for redundancies
    """

    print("Reading peptides from %s.." % in_f)
    if in_f.endswith('.csv'):
        peptides = read_peptides_from_csv(in_f)
    elif in_f.endswith('.txt'):
        peptides = read_peptides_from_txt(in_f)

    print("Generating groups...")
    make_subset_groups(peptides, 'groups.yaml')

    groups = datafile.load_yaml('groups.yaml')
    base = os.path.splitext(in_f)[0]

    out_csv = base + '.cluster.csv'
    print("Writing groups %s" % out_csv)
    write_groups(groups, out_csv)

    out_csv = base + '.kernel.csv'
    print("Writing groups %s" % out_csv)
    write_subset_kernels(groups, out_csv)
예제 #3
0
    if seq in ref_seq:
        return ref_seq.find(seq)
    elif ref_seq in seq:
        return -seq.find(ref_seq)
    elif ref_left_overlap:
        return len(ref_seq) - ref_left_overlap
    elif ref_right_overlap:
        return -(len(seq) - ref_right_overlap)
    raise "No overlap"


print("Generating groups...")
make_groups('b57_clean.csv', 'groups.yaml')

print("Loading groups...")
groups = datafile.load_yaml('groups.yaml')

rows = [('i_group', 'sequence', 'modifications', 'protein')]

for group in groups:
    sequences = []
    for peptide in group['peptides']:
        sequences.append(peptide['sequence'])

    ref_seq = sequences[0]
    indices = [get_i_relative_to_ref(ref_seq, s) for s in sequences]

    # find max length
    left = -min(indices)
    max_len = 0
    for index, seq in zip(indices, sequences):
예제 #4
0
    if seq in ref_seq:
        return ref_seq.find(seq)
    elif ref_seq in seq:
        return -seq.find(ref_seq)
    elif ref_left_overlap:
        return len(ref_seq) - ref_left_overlap
    elif ref_right_overlap:
        return -(len(seq) - ref_right_overlap)
    raise "No overlap"


print("Generating groups...")
make_groups('b57_clean.csv', 'groups.yaml')

print("Loading groups...")
groups = datafile.load_yaml('groups.yaml')

rows = [('i_group', 'sequence', 'modifications', 'protein')]

for group in groups:
    sequences = []
    for peptide in group['peptides']:
        sequences.append(peptide['sequence'])

    ref_seq = sequences[0]
    indices = [get_i_relative_to_ref(ref_seq, s) for s in sequences]

    # find max length
    left = -min(indices)
    max_len = 0
    for index, seq in zip(indices, sequences):