def load_centers(iterable): dict_ = CIDict() for row in iterable: if row[0] != '': dict_.update({row[0]:row[1]}) for key, value in dict_.items(): # Turns '(1,2,3)' etc, that is, textual representations of vectors, # into Vector objects. Will cut off the last digit of the third # componant, but I don't care because the third componant will # always be 0.0 dict_[key] = np.array([float(y[:-1]) for y in value[1:].split()]) return dict_
def __init__(self, iterable, normalize = False): self.normalize = normalize self.ref = CIDict() colmap = CIDict() for column, letter in enumerate(iterable.next()): if letter != '': self.ref.update({letter: dict()}) colmap.update({letter: column}) curvetypes = iterable.next() for letter, column in colmap.items(): self.ref[letter].update({'curve': curvetypes[column]}) for parameter in [{'sigmoidal': 'e0', 'gaussian': 'emin'}, {'sigmoidal': 'zmid', 'gaussian': 'zmin'}, {'sigmoidal': 'n', 'gaussian': 'sigma'}]: paramrow = iterable.next() for letter in self.ref.keys(): curvetype = self.ref[letter]['curve'] self.ref[letter].update({parameter[curvetype]: \ float(paramrow[colmap[letter]])})
from sundries import CIDict from sundries import one_letter from Bio.PDB import PDBParser import warnings import csv # Retrieve the sequences from the BBTMOUT alignment, including -'s for gaps bbtm_align= list(AlignIO.read('Swiss-PDB structural alignment.aln', 'clustal')) # Assuming the first is 1A0S, the second is 1AF6: sequences = CIDict((('1A0S',str(bbtm_align[0].seq)), ('1AF6',str(bbtm_align[1].seq)))) # Check that I'm right about the first being 1AF6, the second being 1A0S firstfive_of = CIDict() for pdbid, sequence in sequences.items(): firstfive_of.update({pdbid: ''}) for letter in sequence: if letter != '-': firstfive_of[pdbid] += letter if len(firstfive_of[pdbid]) == 5: break assertion_error_message = 'wrong aligned sequences in sequences dictionary' assertion_error_message += ": 1a0s's first five are {},"\ .format(firstfive_of['1a0s'])\ +" and 1af6's first five are {}"\ .format(firstfive_of['1af6']) assert firstfive_of['1a0s'] == 'SGFEF' \ and firstfive_of['1af6'] == 'VDFHG',\
for filename in weight_file_paths] # weights maps pdbids to spreadsheets weights = CIDict() for spreadsheet in spreadsheets: pdbid_list = spreadsheet.get_column('pdbid') pdbid_filtered = filter(lambda x: x != '', pdbid_list) pdbid_set = set(pdbid_filtered) # I expect there to only be one pdbid assert len(pdbid_set) == 1, 'more than 1 pdbid in one spreadsheet' pdbid = list(pdbid_set)[0] weights.update({pdbid: spreadsheet}) # selections maps pdbids to sets of resis selections = CIDict() for pdbid, spreadsheet in weights.items(): def not_blank(string): return string != '' resis = filter(not_blank, spreadsheet.get_column('resi')) selections.update({pdbid: set(resis)}) # A new global variable for looping over these proteins asymmetric_dataset = CIDict([(pdbid, groupdict[pdbid]) \ for pdbid in weights.keys()]) # Make the spreadsheets available through groupdict for pdbid, group in asymmetric_dataset.items(): group.non_ppi = weights[pdbid] # Make selection