def intersection_mon_low_molar_ratio(h5file, ratio): for iso in ['scyllo', 'chiro']: for run_set in range(0, 6): polar_matrix = myh5.getTableAsMatrix(h5file, os.path.join('/polar', '%(iso)s_sys%(run_set)d_mon_2to1_inos_total.dat' % vars()), dtype=numpy.float64) print polar_matrix print polar_matrix.shape nonpolar_matrix = myh5.getTableAsMatrix(h5file, os.path.join('/nonpolar_residue', '%(iso)s_sys%(run_set)d_mon_2to1_per_inositol_contacts.dat' % vars()), dtype=numpy.float64) print nonpolar_matrix print nonpolar_matrix.shape assert polar_matrix.shape == nonpolar_matrix.shape, "the two matrices are expected to have the same dimensions" nrows, ncols = polar_matrix.shape counts = [{'polar_only':0, 'nonpolar_only':0, 'polar_nonpolar':0}, {'polar_only':0, 'nonpolar_only':0, 'polar_nonpolar':0}] for i in range(0, nrows): for j in range(1, ncols): if polar_matrix[i][j] and nonpolar_matrix[i][j]: counts[0]['polar_nonpolar'] += 1 elif polar_matrix[i][j]: counts[0]['polar_only'] += 1 elif nonpolar_matrix[i][j]: counts[0]['nonpolar_only'] += 1 # class csv.DictWriter(csvfile, fieldnames[, restval=''[, extrasaction='raise'[, dialect='excel'[, *args, **kwds]]]]) total = counts[0]['polar_only'] + counts[0]['nonpolar_only'] + counts[0]['polar_nonpolar'] fraction = {'polar_only': float(counts[0]['polar_only']) / total, 'nonpolar_only' : float(counts[0]['nonpolar_only']) / total, 'polar_nonpolar' : float(counts[0]['polar_nonpolar']) / total} print counts[0] writer = csv.DictWriter(open('%(iso)s_sys%(run_set)d_mon_2to1_intersection.csv' % vars(), 'wb'), counts[0].keys()) writer.writeheader() writer.writerow(counts[0]) writer.writerow(fraction)
def monomer_15to1_binding_events(h5file, inositol_concentration): writer = csv.writer(open('monomer_15to1_binding_events.csv', 'wb'), delimiter=' ') csv_header = ["isomer", "inositol_ratio", "binding_constant", "inos_conc"] writer.writerow(csv_header) for isomer in ["scyllo", "chiro"]: for k in range(1, 6): polar_big_matrix = None nonpolar_big_matrix = None from_idx = (k-1)*100 + 1 to_idx = k*100 + 1 print "Computing run_set", k, "with systems from", from_idx, "to", to_idx-1 for i in range(from_idx, to_idx): polar_matrix = myh5.getTableAsMatrix(h5file, '/polar/%(isomer)s_sys%(i)d_inos_total.dat' % vars(), dtype=numpy.float64) nonpolar_matrix = myh5.getTableAsMatrix(h5file, '/nonpolar/%(isomer)s_sys%(i)d_per_inositol_contacts.dat' % vars(), dtype=numpy.float64) if polar_matrix is not None and nonpolar_matrix is not None: if polar_big_matrix is None and nonpolar_big_matrix is None: polar_big_matrix = polar_matrix nonpolar_big_matrix = nonpolar_matrix else: polar_big_matrix = numpy.concatenate((polar_big_matrix, polar_matrix)) nonpolar_big_matrix = numpy.concatenate((nonpolar_big_matrix, nonpolar_matrix)) else: print "data files (polar and nonpolar) for system", i, "was not found" num_binding_events = _num_binding_events_state_machine(nonpolar_big_matrix[:, 1:] + polar_big_matrix[:, 1:]) writer.writerow([isomer, "15to1", num_binding_events, inositol_concentration])
def compute_beta_low_molar_binding_events(h5file, inositol_ratio, inositol_concentration): # assert len(system_indices) > 0, "List of system indices should be non-empty." isomerList = ["scyllo", "chiro"] polar_path = "/polar" nonpolar_path = "/nonpolar_revision" csv_header = ["isomer", "sys_idx", "binding_events", "inos_conc"] writer = csv.writer(open('beta_' + inositol_ratio + '_binding_events.csv', 'wb'), delimiter=' ') writer.writerow(csv_header) for iso in isomerList: for sys in range(0, 3): for i in range(1, 6): nonpolar_file = os.path.join(nonpolar_path, "%(iso)s_sys%(sys)d_t%(i)d_per_inositol_contacts.dat" % vars()) polar_file = os.path.join(polar_path, "%(iso)s_sys%(sys)d_t%(i)d_inos_total.dat" % vars()) print polar_file, nonpolar_file nonpolar_matrix = myh5.getTableAsMatrix(h5file, nonpolar_file, dtype=numpy.float64) polar_matrix = myh5.getTableAsMatrix(h5file, polar_file, dtype=numpy.float64) print polar_matrix.shape, nonpolar_matrix.shape num_binding_events = _num_binding_events_state_machine(polar_matrix[:, 1:] + nonpolar_matrix[:, 1:]) writer.writerow([iso, sys, num_binding_events, inositol_concentration])
def monomer_stacking(h5file, ratio, system_indices, tag="15", file_path='/stacking'): writer = csv.writer(open(ratio + "_" + tag + "_monomer_stacking.csv", 'wb')) header = ["stacked","bound", "stacked+bound", "stacked/bound"] writer.writerow(header) for i in system_indices: residue_file = "" phe_stacking_file = "" if ratio == "2to1": residue_file = '/nonpolar_residue/scyllo_sys%(i)d_mon_2to1_per_residue_contacts.dat' % vars() phe_stacking_file = os.path.join(file_path, 'scyllo_sys%(i)d_per_phe_stacking.dat') % vars() print residue_file, phe_stacking_file elif ratio == "15to1": residue_file = '/nonpolar/scyllo_sys%(i)d_per_residue_contacts.dat' % vars() phe_stacking_file = os.path.join(file_path, 'scyllo_sys%(i)d_per_phe_stacking.dat') % vars() print residue_file, phe_stacking_file else: # TODO: Throw a custom exception here print "ratio ", ratio, "is not recognized" sys.exit() residue_matrix = myh5.getTableAsMatrix(h5file, residue_file, dtype=numpy.float64) phe_stacking = myh5.getTableAsMatrix(h5file, phe_stacking_file, dtype=numpy.float64) if residue_matrix is None or phe_stacking is None: print residue_file, "or", phe_stacking_file, "does not exist" continue print residue_matrix.shape, phe_stacking.shape assert residue_matrix.shape[0] == phe_stacking.shape[0], "Residue and phe stacking matrices must have the same number of lines" nrows, ncols = residue_matrix.shape bound = 0.0 stacked = 0.0 stacked_bound = 0.0 for k in range(0, nrows): if residue_matrix[k][5] > 0: bound = bound + 1 if residue_matrix[k][6] > 0: bound = bound + 1 if phe_stacking[k][1] > 0: stacked = stacked + 1 if phe_stacking[k][2] > 0: stacked = stacked + 1 # this is for a sanity check if residue_matrix[k][5] > 0 and phe_stacking[k][1] > 0: stacked_bound += 1 if residue_matrix[k][6] > 0 and phe_stacking[k][2] > 0: stacked_bound += 1 writer.writerow([stacked, bound, stacked_bound, stacked / float(bound)])
def intersection_beta(h5file, tag): isomerList = ["scyllo", "chiro"] polar_path = "/polar" nonpolar_path = "/nonpolar_residue" write_header = False for iso in isomerList: for sys in range(0, 6): nonpolar_file = os.path.join(nonpolar_path, "%(iso)s_t%(sys)d_per_inositol_contacts.dat" % vars()) polar_file = os.path.join(polar_path, "%(iso)s_t%(sys)d_inos_total.dat" % vars()) nonpolar_matrix = myh5.getTableAsMatrix(h5file, nonpolar_file, dtype=numpy.float64) polar_matrix = myh5.getTableAsMatrix(h5file, polar_file, dtype=numpy.float64) counts = {'polar_only' : 0, 'nonpolar_only' : 0, 'polar_nonpolar' : 0} if polar_matrix is not None and nonpolar_matrix is not None: print polar_matrix.shape print nonpolar_matrix.shape assert polar_matrix.shape == nonpolar_matrix.shape, "the two matrices are expected to have the same dimensions" nrows, ncols = polar_matrix.shape for i in range(1, nrows): for j in range(1, ncols): if polar_matrix[i][j] and nonpolar_matrix[i][j]: counts['polar_nonpolar'] += 1 elif polar_matrix[i][j]: counts['polar_only'] += 1 elif nonpolar_matrix[i][j]: counts['nonpolar_only'] += 1 # normalize total = counts['polar_nonpolar'] + counts['polar_only'] + counts['nonpolar_only'] if total != 0: counts['polar_nonpolar'] = counts['polar_nonpolar'] / float(total) counts['polar_only'] = counts['polar_only'] / float(total) counts['nonpolar_only'] = counts['nonpolar_only'] / float(total) writer = csv.DictWriter(open('%(iso)s_%(tag)s_intersection%(sys)d.csv' % vars(), 'wb'), counts.keys()) if write_header is False: writer.writeheader() else: write_header = True # class csv.DictWriter(csvfile, fieldnames[, restval=''[, extrasaction='raise'[, dialect='excel'[, *args, **kwds]]]]) print sys, counts writer.writerow(counts)
def monomer_2to1_binding_events_estimate(h5file, inositol_concentration): isomer = ["scyllo", "chiro"] csv_header = ["isomer", "sys_idx", "binding_constant", "inos_conc"] writer = csv.writer(open('monomer_2to1_binding_events.csv', 'wb'), delimiter=' ') writer.writerow(csv_header) for iso in isomer: for i in range(0, 6): polar_matrix = myh5.getTableAsMatrix(h5file, os.path.join('/nonpolar_residue', '%(iso)s_sys%(i)d_mon_2to1_per_inositol_contacts.dat' % vars()), dtype=numpy.float64) nonpolar_matrix = myh5.getTableAsMatrix(h5file, os.path.join('/polar', '%(iso)s_sys%(i)d_mon_2to1_inos_total.dat' % vars()), dtype=numpy.float64) print polar_matrix.shape print nonpolar_matrix.shape num_binding_events = _num_binding_events_state_machine(nonpolar_matrix[:, 1:] + polar_matrix[:, 1:]) writer.writerow([iso, i, num_binding_events, inositol_concentration])
def oligomer_stacking(h5file, type, system_indices = [], tag="15", file_path='/stacking'): phe_header_residue = "PHE13 PHE14 PHE22 PHE23 PHE31 PHE32 PHE4 PHE5".split() phe_header_stacking = "PHE4 PHE5 PHE13 PHE14 PHE22 PHE23 PHE31 PHE32".split() writer = csv.writer(open(type + "_" + tag + "_oligomer_stacking.csv", 'wb')) writer.writerow(["stacked", "bound", "stacked+bound", "stacked/bound"]) for i in system_indices: stacked_system_total = 0 bound_system_total = 0 stacked_bound_system_total = 0 residue_file = "" if type == "15to4": residue_file = '/nonpolar_residue/scyllo_sys%(i)d_%(type)s_whole_nosol_0-200ns_per_residue_contact.dat' % vars() elif type == "45to4": residue_file = '/nonpolar_residue/scyllo_sys%(i)d_%(type)s_whole_nosol_0-200_per_residue_contact.dat' % vars() else: print "unrecognized system type", type sys.exit() # /stacking/scyllo_sys9_per_phe_stacking.dat phe_stacking_file = os.path.join(file_path, 'scyllo_sys%(i)d_per_phe_stacking.dat' % vars()) residue_matrix = myh5.getTableAsMatrix(h5file, residue_file, dtype=numpy.float64) phe_stacking = myh5.getTableAsMatrix(h5file, phe_stacking_file, dtype=numpy.float64) if residue_matrix is None or phe_stacking is None: print residue_file, phe_stacking_file, "does not exist" continue nrows,ncols = residue_matrix.shape for row in range(0, nrows): # Grab a row from a numpy matrix and converts it to a list # Note that numpy matrix slicing range is not inclusive at the higher index # Note that the column indices accounts for the first column as being time residue_data_list = numpy.array(residue_matrix[row, 17:25].T).flatten().tolist() stacking_data_list = numpy.array(phe_stacking[row, 1:].T).flatten().tolist() residue_data_dict = dict(zip(phe_header_residue, residue_data_list)) stacking_data_dict = dict(zip(phe_header_stacking, stacking_data_list)) stacked,bound,stacked_bound = match_phe_binding(residue_data_dict, stacking_data_dict) stacked_system_total += stacked bound_system_total += bound stacked_bound_system_total += stacked_bound writer.writerow([stacked_system_total, bound_system_total, stacked_bound_system_total, stacked_system_total / float(bound_system_total)])
def beta_stacking(h5file, type, system_indices = [], tag="15", file_path='/stacking'): writer = csv.writer(open(type + "_" + tag + "_beta_stacking.csv", 'wb')) writer.writerow(["stacked", "bound", "stacked+bound", "stacked/bound"]) phe_header_residue = "PHE103 PHE104 PHE112 PHE113 PHE121 PHE122 PHE13 PHE130 PHE131 PHE139 PHE14 PHE140 PHE22 PHE23 PHE31 PHE32 PHE4 PHE40 PHE41 PHE49 PHE5 PHE50 PHE58 PHE59 PHE67 PHE68 PHE76 PHE77 PHE85 PHE86 PHE94 PHE95".split() phe_header_stacking = "PHE4 PHE5 PHE13 PHE14 PHE22 PHE23 PHE31 PHE32 PHE40 PHE41 PHE49 PHE50 PHE58 PHE59 PHE67 PHE68 PHE76 PHE77 PHE85 PHE86 PHE94 PHE95 PHE103 PHE104 PHE112 PHE113 PHE121 PHE122 PHE130 PHE131 PHE139 PHE140".split() for i in system_indices: stacked_system_total = 0 bound_system_total = 0 stacked_bound_system_total = 0 residue_file = "" if type == 'low': residue_file = '/nonpolar_residue_dt1/scyllo_t%(i)d_per_residue_contacts.dat' % vars() elif type == 'high': residue_file = '/nonpolar_residue/scyllo_t%(i)d_per_residue_contacts.dat' % vars() else: print "system type", type, "is not recognize" sys.exit() phe_stacking_file = os.path.join(file_path, 'scyllo_sys%(i)d_per_phe_stacking.dat' % vars()) print residue_file, phe_stacking_file residue_matrix = myh5.getTableAsMatrix(h5file, residue_file, dtype=numpy.float64) phe_stacking = myh5.getTableAsMatrix(h5file, phe_stacking_file, dtype=numpy.float64) print residue_matrix.shape, phe_stacking.shape assert residue_matrix.shape[0] == phe_stacking.shape[0], "residue matrix and phe_stacking matrix must have the same number of rows" nrows, ncols = residue_matrix.shape for row in range(0, nrows): residue_data_list = numpy.array(residue_matrix[row, 65:97].T).flatten().tolist() stacking_data_list = numpy.array(phe_stacking[row, 1:].T).flatten().tolist() residue_data_dict = dict(zip(phe_header_residue, residue_data_list)) stacking_data_dict = dict(zip(phe_header_stacking, stacking_data_list)) stacked,bound,stacked_bound = match_phe_binding(residue_data_dict, stacking_data_dict) stacked_system_total += stacked bound_system_total += bound stacked_bound_system_total += stacked_bound writer.writerow([stacked_system_total, bound_system_total, stacked_bound_system_total, stacked_system_total / float(bound_system_total)])
def beta_binding_event_estimate(h5file, inositol_ratio, inositol_concentration, system_indices=[]): assert len(system_indices) > 0, "List of system indices should be non-empty." isomerList = ["scyllo", "chiro"] polar_path = "/polar" nonpolar_path = "/nonpolar_residue" csv_header = ["isomer", "sys_idx", "binding_constant", "inos_conc"] writer = csv.writer(open(inositol_ratio + '_binding_events.csv', 'wb'), delimiter=' ') writer.writerow(csv_header) for iso in isomerList: for sys in system_indices[iso]: nonpolar_file = os.path.join(nonpolar_path, "%(iso)s_t%(sys)d_per_inositol_contacts.dat" % vars()) polar_file = os.path.join(polar_path, "%(iso)s_t%(sys)d_inos_total.dat" % vars()) print polar_file, nonpolar_file nonpolar_matrix = myh5.getTableAsMatrix(h5file, nonpolar_file, dtype=numpy.float64) polar_matrix = myh5.getTableAsMatrix(h5file, polar_file, dtype=numpy.float64) writer.writerow([iso, sys, _num_binding_events_state_machine(nonpolar_matrix[:,1:] + polar_matrix[:,1:]), inositol_concentration])
def compute_disordered_binding_constant(h5file, inositol_ratio, inositol_concentration, system_indices=[]): assert len(system_indices) > 0, "List of system indices should be non-empty." polarName = {'4to2' : 'inos_total.dat', '15to4' : 'whole_nosol_0-200ns_inos_total.dat', '45to4' : 'whole_nosol_0-200_inos_total.dat'} nonpolarName = {'4to2' : 'per_inositol_contacts.dat', '15to4' : 'whole_nosol_0-200ns_per_inositol_contacts.dat', '45to4' : 'whole_nosol_0-200_per_inositol_contacts.dat'} isomerList = ["scyllo", "chiro"] polarPath = "/polar" nonpolarPath = "/nonpolar_residue" csv_header = ["isomer", "sys_idx", "binding_constant", "inos_conc"] writer = csv.writer(open(inositol_ratio + '_binding_constants.csv', 'wb'), delimiter=' ') writer.writerow(csv_header) for iso in isomerList: data = [] for sys in system_indices[iso]: polarFile = os.path.join(polarPath, "%(iso)s_sys%(sys)s_%(inositol_ratio)s_" % vars() + polarName[inositol_ratio]) if inositol_ratio == "4to2": polarFile = os.path.join(polarPath, "klvffae_aggr%(sys)s_%(iso)s_nosol.xtc_" % vars() + polarName[inositol_ratio]) print "analyzing", polarFile polarMatrix = myh5.getTableAsMatrix(h5file, polarFile, dtype=numpy.float64) print polarMatrix.shape nonpolarFile = os.path.join(nonpolarPath, "%(iso)s_sys%(sys)s_%(inositol_ratio)s_" % vars() + nonpolarName[inositol_ratio]) if inositol_ratio == "4to2": nonpolarFile = os.path.join(nonpolarPath, "%(iso)s_sys%(sys)s_per_inositol_contacts.dat" % vars()) print "analyzing", nonpolarFile # This is really bad, but for other systems except 4to2 this line was used. I've changed it if inositol_ratio == "4to2": nonpolarMatrix = myh5.getTableAsMatrix(h5file, nonpolarFile, dtype=numpy.float64)[:, :] else: nonpolarMatrix = myh5.getTableAsMatrix(h5file, nonpolarFile, dtype=numpy.float64)[::2, :] print nonpolarMatrix.shape binding_constant = _binding_constant(polarMatrix, nonpolarMatrix, inositol_concentration) writer.writerow([iso, sys, binding_constant, inositol_concentration])
def intersection_mon(h5file, csv_file, isomer, ratio): polar_matrix = myh5.getTableAsMatrix(h5file, '/inositol/inos_total') nonpolar_matrix = myh5.getTableAsMatrix(h5file, '/residue/per_inos_contacts') print polar_matrix.shape print nonpolar_matrix.shape assert polar_matrix.shape == nonpolar_matrix.shape, "the two matrices are expected to have the same dimensions" nrows, ncols = polar_matrix.shape counts = [{'polar_only':0, 'nonpolar_only':0, 'polar_nonpolar':0}, {'polar_only':0, 'nonpolar_only':0, 'polar_nonpolar':0}] for i in range(0, nrows): for j in range(1, ncols): if polar_matrix[i][0] < 7500: if polar_matrix[i][j] and nonpolar_matrix[i][j]: counts[0]['polar_nonpolar'] += 1 elif polar_matrix[i][j]: counts[0]['polar_only'] += 1 elif nonpolar_matrix[i][j]: counts[0]['nonpolar_only'] += 1 else: if polar_matrix[i][j] and nonpolar_matrix[i][j]: counts[1]['polar_nonpolar'] += 1 elif polar_matrix[i][j]: counts[1]['polar_only'] += 1 elif nonpolar_matrix[i][j]: counts[1]['nonpolar_only'] += 1 # class csv.DictWriter(csvfile, fieldnames[, restval=''[, extrasaction='raise'[, dialect='excel'[, *args, **kwds]]]]) total = counts[0]['polar_only'] + counts[0]['nonpolar_only'] + counts[0]['polar_nonpolar'] fraction = {'polar_only': float(counts[0]['polar_only']) / total, 'nonpolar_only' : float(counts[0]['nonpolar_only']) / total, 'polar_nonpolar' : float(counts[0]['polar_nonpolar']) / total} print counts[0] writer = csv.DictWriter(open(csv_file, 'wb'), counts[0].keys()) writer.writeheader() writer.writerow(counts[0]) writer.writerow(fraction)
def process(h5file, ratio): isomerlist = ["scyllo", "chiro", "water"] plot_data = [] mean_contact_list = [] std_contact_list = [] #read in files for each system and aggregate format="pp_nonpolar_vs_t.xvg" for iso in isomerlist: print "processing", iso pattern = re.compile(r"%(iso)s.*%(ratio)s.*%(format)s" % vars()) if iso == "water": pattern = re.compile(r"%(iso)s.*%(format)s" % vars()) datalist=[] for table in h5file.listNodes(where='/pp_nonpolar'): table_path = os.path.join('/pp_nonpolar', table.name) if pattern.search(table.name): data = myh5.getTableAsMatrix(h5file, table_path) if data is not None: data = data.astype('float') datalist.append(data[0:config.LASTFRAME, 1]) else: print "no data was read in" print "datalist", datalist data_matrix = numpy.transpose(numpy.vstack(datalist)) print "data_matrix", data_matrix, data_matrix.shape avg, std = utils.summary_statistics(data_matrix, sum_across="columns") avg_contacts = numpy.average(data_matrix[config.STARTFRAME:config.LASTFRAME], axis=0) mean_contact = numpy.average(avg_contacts) std_contact = numpy.std(avg_contacts) print mean_contact print std_contact mean_contact_list.append(mean_contact) std_contact_list.append(std_contact) avg_smoothed = utils.smooth(avg/config.NMOLECULES, 500, time_present=False, timestep=2) std_smoothed = utils.smooth(std/config.NMOLECULES, 500, time_present=True, timestep=2) plot_data.append(avg_smoothed) plot_data.append(std_smoothed) timeseries_matrix = numpy.hstack(plot_data) print "timeseries_matrix", timeseries_matrix, timeseries_matrix.shape print "time", timeseries_matrix[:,0] numpy.savetxt(ratio + "_pp_nonpolar_smoothed.txt.gz", timeseries_matrix, fmt='%0.3f') utils.savetxt(ratio + "_avg_pp_nonpolar_contact.txt", "#scyllo chiro water", numpy.vstack([mean_contact_list, std_contact_list]), fmt='%0.3f') return timeseries_matrix
def nonpolar_residue_disordered(h5file, tag): scyllo_pattern = re.compile(r'scyllo') chiro_pattern = re.compile(r'chiro') atype_pattern = re.compile(r'residue_contact') data_list = {'scyllo':[], 'chiro':[]} #fix this number for now N_datapoints = 190000 for table in h5file.listNodes("/nonpolar_residue", 'Table'): if atype_pattern.search(table.name): table_path = os.path.join("/nonpolar_residue", table.name) print table_path data = myh5.getTableAsMatrix(h5file, table_path, dtype=numpy.float64) # print data sum_over_time = numpy.average(data[20000:N_datapoints, 1:], axis = 0) # print sum_over_time # This matrix is Nres by 4, where 4 is the number of peptides in the system (disordered oligomer) # Each row of this matrix represents a single amino acid # Each column is a peptide sequence A, E, L, K, F, F, V sum_over_time.shape = (sum_over_time.size / 4, 4) # Average over all peptides in the system (over columns, hence axis = 1). # The resulting array of numbers has units of per peptide. avg_over_peptides = numpy.average(sum_over_time, axis = 1) if scyllo_pattern.search(table.name): data_list['scyllo'].append(avg_over_peptides) elif chiro_pattern.search(table.name): data_list['chiro'].append(avg_over_peptides) else: print "No pattern matches", table.name # save results to flat files for isomer in data_list.keys(): nparray = numpy.array(data_list[isomer]) # dump the list of counts for each system numpy.savetxt('%(tag)s_nonpolar_residue_inositol_contact_%(isomer)s_counts.txt' % vars(), nparray, fmt='%0.8f') print "saved", isomer, "analysis with shape", nparray.shape # average over all the systems; each system is a row in nparray average = numpy.average(nparray, axis=0) std = numpy.std(nparray, axis=0)/math.sqrt(8) numpy.savetxt('%(tag)s_nonpolar_residue_inositol_contact_%(isomer)s_avg_std.txt' % vars(), [average, std], fmt='%0.8f')
def _intersection(h5file, polar_file, nonpolar_file, tag): nonpolar_matrix = myh5.getTableAsMatrix(h5file, nonpolar_file, dtype=numpy.float64) polar_matrix = myh5.getTableAsMatrix(h5file, polar_file, dtype=numpy.float64) counts = {'polar_only' : 0, 'nonpolar_only' : 0, 'polar_nonpolar' : 0} if polar_matrix is not None and nonpolar_matrix is not None: print polar_matrix.shape print nonpolar_matrix.shape assert polar_matrix.shape == nonpolar_matrix.shape, "the two matrices are expected to have the same dimensions" nrows, ncols = polar_matrix.shape for i in range(1, nrows): for j in range(1, ncols): if polar_matrix[i][j] and nonpolar_matrix[i][j]: counts['polar_nonpolar'] += 1 elif polar_matrix[i][j]: counts['polar_only'] += 1 elif nonpolar_matrix[i][j]: counts['nonpolar_only'] += 1 # normalize total = counts['polar_nonpolar'] + counts['polar_only'] + counts['nonpolar_only'] if total != 0: counts['polar_nonpolar'] = counts['polar_nonpolar'] / float(total) counts['polar_only'] = counts['polar_only'] / float(total) counts['nonpolar_only'] = counts['nonpolar_only'] / float(total) writer = csv.DictWriter(open('%(tag)s_intersection.csv' % vars(), 'wb'), counts.keys()) writer.writeheader() write_header = True # class csv.DictWriter(csvfile, fieldnames[, restval=''[, extrasaction='raise'[, dialect='excel'[, *args, **kwds]]]]) print sys, counts writer.writerow(counts)
def nonpolar_residue_beta_low_molar(h5file, system_indices=[]): assert len(system_indices) > 0, "The list of system_indices should not be empty" for isomer in ["scyllo", "chiro"]: data_list = [] for s in system_indices: nonpolar_residue_large = None for i in range(1,6): nonpolar_residue_path = "/nonpolar_revision/%(isomer)s_sys%(s)d_t%(i)d_per_residue_contacts.dat" % vars() print "analyzing ", nonpolar_residue_path # read in the file data = myh5.getTableAsMatrix(h5file, nonpolar_residue_path, dtype=numpy.float64) print data if nonpolar_residue_large is None: print "in here" nonpolar_residue_large = data else: nonpolar_residue_large = numpy.concatenate((nonpolar_residue_large, data)) # data = numpy.genfromtxt(file, comments="#", dtype='float') nrows,ncols = nonpolar_residue_large.shape print nrows, ncols # sum over rows time_avg = numpy.average(nonpolar_residue_large[:,1:], axis=0) print time_avg.shape time_avg.shape = (time_avg.size / 16, 16) print time_avg.shape sum_over_peptides = numpy.sum(time_avg, axis = 1) data_list.append(sum_over_peptides) # save results to flat files nparray = numpy.array(data_list) # dump the list of results for each system numpy.savetxt('%(isomer)s_low_molar_nonpolar_residue_contact.txt' % vars(), nparray, fmt='%0.8f') # average over all the systems; each system is a row in nparray average = numpy.average(nparray, axis=0) / 16 std = numpy.std(nparray, axis=0) / 16 / math.sqrt(len(system_indices)) #save the normalized average and std numpy.savetxt('%(isomer)s_low_molar_nonpolar_residue_contact_avg_std.txt' % vars(), [average, std], fmt='%0.8f')
def process(h5file, ratio): labellist = [] plot_list = [] isomerlist = [ "scyllo", "chiro", "water" ] root='/cluster' pattern="" for iso in isomerlist: datalist=[] for t in h5file.listNodes(where=root): filename = t.name # fileslist = glob.glob("*%(iso)s*%(conc)s*.xvg" % vars()) if iso != "water": pattern = re.compile(r'%(iso)s.*%(ratio)s.*nclust.xvg' % vars()) else: pattern = re.compile(r'%(iso)s.*nclust.xvg' % vars()) if pattern.search(filename): print filename data = myh5.getTableAsMatrix(h5file, os.path.join(root, filename))[0:config.LASTFRAME] print data.shape datalist.append(data) print len(datalist) all_data = numpy.hstack(datalist) print all_data.shape print all_data sdata = all_data[:,1::2] average = numpy.average(sdata, axis=1) plot_data = numpy.transpose([all_data[:,0],average]) numpy.savetxt('%(iso)s_%(ratio)s_nclust.txt.gz' % vars(), plot_data, fmt='%0.2f') plot_list.append(plot_data) plot_label = config.LABEL[iso] + " (" + config.RATIO[ratio] + ")" labellist.append("%(plot_label)s" % vars()) return (plot_list, labellist)
def process(h5file, ratio, format="p2p_vs_t.dat"): # given a h5file return a list of data to be plotted as line plots # and a corresponding list of labels header = "# time average_inter std_inter average_intra std_intra" datalist = [] labellist = [] isomerlist = ["scyllo", "chiro", "water"] mean_contact_list = [] std_contact_list = [] for iso in isomerlist: print "processing", iso pattern = re.compile(r"%(iso)s.*%(ratio)s.*%(format)s" % vars()) if iso == "water": pattern = re.compile(r"%(iso)s.*%(format)s" % vars()) data_inter = [] data_intra = [] for table in h5file.listNodes(where='/polar'): table_path = os.path.join('/polar', table.name) if pattern.search(table.name): print "processing", table.name data = myh5.getTableAsMatrix(h5file, table_path, dtype=numpy.int32) data = data.astype('float') print "converted to float32", data nrows, ncols = data.shape assert nrows > ncols print "Test data read in dimensions", data.shape, data.dtype data_inter.append(data[0:config.LASTFRAME,1]) data_intra.append(data[0:config.LASTFRAME,2]) # compute summary statistics print "summarizing statistics ... " inter_matrix = utils.array_list_to_matrix(data_inter) intra_matrix = utils.array_list_to_matrix(data_intra) average_inter, std_inter = utils.summary_statistics(inter_matrix) average_intra, std_intra = utils.summary_statistics(intra_matrix) # compute the time average number of contacts and its std error avg_contacts = numpy.average(inter_matrix, axis=0) mean_contact = numpy.average(avg_contacts) std_contact = numpy.std(avg_contacts) mean_contact_list.append(mean_contact) std_contact_list.append(std_contact) print mean_contact, std_contact time = data[0:config.LASTFRAME,0] # print "Test: dimensions of average_inter", average_inter.shape plotdata = utils.array_list_to_matrix([ time, average_inter, std_inter, average_intra, std_intra ]) print "plotdata", plotdata print "Test: dimensions of plotdata for", iso, ratio, plotdata.shape plotdata_smoothed = utils.smooth(plotdata, 500, time_present=True, timestep=2) print plotdata_smoothed datalist.append(plotdata_smoothed) print "smoothed data", plotdata_smoothed, plotdata_smoothed.shape ratiolabel = config.RATIO[ratio] if iso == "water": labellist.append("water" % vars()) else: labellist.append("%(iso)s (%(ratiolabel)s)" % vars()) utils.savetxt('%(ratio)s_p2p_vs_t.txt' % vars(), header, plotdata, fmt='%0.2f') utils.savetxt('%(ratio)s_p2p_vs_t_smoothed.txt' % vars(), header, plotdata_smoothed, fmt='%0.2f') utils.savetxt('%(ratio)s_avg_contacts_w_err.txt' % vars(), "#scyllo chiro water", numpy.vstack([mean_contact_list, std_contact_list]), fmt='%0.2f') return (datalist, labellist)
def get_angle_matrix(h5file, sys): """docstring for get_angle""" path = os.path.join('/angle', sys) print "getting ", path matrix = myh5.getTableAsMatrix(h5file, path, dtype=numpy.float64) return matrix
def intersection_disordered(h5file, ratio, system_indices): """intersection analysis for disordered oligomers""" #nasty fix for different table names polarName = {'4to2' : 'inos_total.dat', '15to4' : 'whole_nosol_0-200ns_inos_total.dat', '45to4' : 'whole_nosol_0-200_inos_total.dat'} nonpolarName = {'4to2' : 'per_inositol_contacts.dat', '15to4' : 'whole_nosol_0-200ns_per_inositol_contacts.dat', '45to4' : 'whole_nosol_0-200_per_inositol_contacts.dat'} # klvffae_aggr0_chiro_nosol.xtc_inos_total.dat isomerList = ["scyllo", "chiro"] polarPath = "/polar" nonpolarPath = "/nonpolar_residue" dataList = [['isomer', 'system#', 'polar_only', 'polar_and_nonpolar', 'nonpolar_only', 'total']] resultsWriter = csv.writer(open(ratio + '_intersection.txt', 'wb'), delimiter=' ') print system_indices for iso in isomerList: data = [] for sys in system_indices[iso]: if ratio == "4to2": polarFile = os.path.join(polarPath, "klvffae_aggr%(sys)s_%(iso)s_nosol.xtc_" % vars() + polarName[ratio]) else: polarFile = os.path.join(polarPath, "%(iso)s_sys%(sys)s_%(ratio)s_" % vars() + polarName[ratio]) print "analyzing", polarFile polarMatrix = myh5.getTableAsMatrix(h5file, polarFile, dtype=numpy.float64) print polarMatrix if ratio == "4to2": nonpolarFile = os.path.join(nonpolarPath, "%(iso)s_sys%(sys)s_" % vars() + nonpolarName[ratio]) else: nonpolarFile = os.path.join(nonpolarPath, "%(iso)s_sys%(sys)s_%(ratio)s_" % vars() + nonpolarName[ratio]) print "analyzing", nonpolarFile nonpolarMatrix = myh5.getTableAsMatrix(h5file, nonpolarFile, dtype=numpy.float64)[::2, :] if polarMatrix != None and nonpolarMatrix != None: rows, cols = nonpolarMatrix.shape print rows, cols print polarMatrix.shape polar_and_nonpolar = 0.0 polar_only = 0.0 nonpolar_only = 0.0 rows = min(rows, 100000) print rows, cols for i in range(20001, rows): for j in range(1, cols): if polarMatrix[i][j] and nonpolarMatrix[i][j]: polar_and_nonpolar += 1 elif polarMatrix[i][j]: polar_only += 1 elif nonpolarMatrix[i][j]: nonpolar_only += 1 total = polar_only + polar_and_nonpolar + nonpolar_only dataList.append([iso, sys, polar_only / total, polar_and_nonpolar / total, nonpolar_only / total, total]) data.append([polar_only / total, polar_and_nonpolar / total, nonpolar_only / total, total]) print data print numpy.array(data) average = numpy.average(numpy.array(data), axis=0) std = numpy.std(numpy.array(data), axis=0) / len(system_indices) print average.tolist() print std.tolist() # addToListAvg = [iso+' avg', 'all'] addToListAvg.extend(average.tolist()) addToListStd = [iso+' std', 'all'] addToListStd.extend(std.tolist()) dataList.append(addToListAvg) dataList.append(addToListStd) # numpy.savetxt("15to4_intersection.gz", dataList, fmt='%s %d %0.3f %0.3f %0.3f %d') resultsWriter.writerows(dataList)
def get_mindist_matrix(h5file, sys): """docstring for get_mindist_matrix""" path = os.path.join('/mindist', sys) print "getting ", path return myh5.getTableAsMatrix(h5file, path, dtype=numpy.float64)
def compute_inositol_ub_b_cluster_size_histo(nonpolar_h5file, polar_h5file, clust_info_path, iso, sys, tag=""): # Path names to the text files saved in the h5 file nonpolar_contacts_file = "/%(iso)s_64_inositol_nonpolar_contacts_%(sys)s" % vars() polar_contacts_file = "/%(iso)s_64_inositol_hbonds_%(sys)s" % vars() nonpolar_contacts = myh5.getTableAsMatrix(nonpolar_h5file, nonpolar_contacts_file, dtype=numpy.float64) polar_contacts = myh5.getTableAsMatrix(polar_h5file, polar_contacts_file, dtype=numpy.float64) clust_info_csv = os.path.join(clust_info_path, '%(sys)d_final_clust_info.dat' % vars()) contacts_matrix = polar_contacts[:,1:] + nonpolar_contacts[:,1:] print contacts_matrix.shape # Parse the csv inositol clusters data bound_sizes_list = [] unbound_sizes_list = [] with open(clust_info_csv, 'rb') as csvfile: reader = csv.reader(csvfile, delimiter=',') for row in reader: time = row[0] clustered = row[1] inositol_ids = row[2].split(' ') # print time, clustered, inositol_ids inositol_indices = _residue_ids_to_indices(inositol_ids) try: # print int(time), polar_contacts[int(time) / 2][0] inos_in_cluster_contacts = _contacts_for_indices_in_cluster(contacts_matrix[int(time) / 2], inositol_indices) except IndexError: print "index error encountered at time=", time break if clustered == "yes": bound = _cluster_bound_to_protein(inos_in_cluster_contacts) if bound: # print time, inositol_ids bound_sizes_list.append(len(inositol_indices)) else: unbound_sizes_list.append(len(inositol_indices)) else: for val in inos_in_cluster_contacts: if val > 0: bound_sizes_list.append(1) else: unbound_sizes_list.append(1) # compute histograms bound_hist = numpy.bincount(numpy.array(bound_sizes_list)) unbound_hist = numpy.bincount(numpy.array(unbound_sizes_list)) print bound_hist print unbound_hist with open(iso + '_sys' + str(sys) + tag + 'bound_hist.txt', 'w') as results_file: for size in range(0, bound_hist.size): frequency = bound_hist[size] / float(bound_hist.sum()) results_file.write("%d %0.2f\n" % (size, frequency)) with open(iso + '_sys' + str(sys) + tag + 'unbound_hist.txt', 'w') as results_file: for size in range(0, unbound_hist.size): frequency = unbound_hist[size] / float(unbound_hist.sum()) results_file.write("%d %0.2f\n" % (size, frequency))