Beispiel #1
0
def intersection_mon_low_molar_ratio(h5file, ratio):
    for iso in ['scyllo', 'chiro']:
        for run_set in range(0, 6):
            polar_matrix = myh5.getTableAsMatrix(h5file, os.path.join('/polar', '%(iso)s_sys%(run_set)d_mon_2to1_inos_total.dat' % vars()), dtype=numpy.float64)
            print polar_matrix
            print polar_matrix.shape
            nonpolar_matrix = myh5.getTableAsMatrix(h5file, os.path.join('/nonpolar_residue', '%(iso)s_sys%(run_set)d_mon_2to1_per_inositol_contacts.dat' % vars()), dtype=numpy.float64)
            print nonpolar_matrix
            print nonpolar_matrix.shape

            assert polar_matrix.shape == nonpolar_matrix.shape, "the two matrices are expected to have the same dimensions"

            nrows, ncols = polar_matrix.shape
            counts = [{'polar_only':0, 'nonpolar_only':0, 'polar_nonpolar':0}, {'polar_only':0, 'nonpolar_only':0, 'polar_nonpolar':0}]
            for i in range(0, nrows): 
                for j in range(1, ncols):
                    if polar_matrix[i][j] and nonpolar_matrix[i][j]:
                        counts[0]['polar_nonpolar'] += 1
                    elif polar_matrix[i][j]:
                        counts[0]['polar_only'] += 1
                    elif nonpolar_matrix[i][j]:
                        counts[0]['nonpolar_only'] += 1

            # class csv.DictWriter(csvfile, fieldnames[, restval=''[, extrasaction='raise'[, dialect='excel'[, *args, **kwds]]]])
            total = counts[0]['polar_only'] + counts[0]['nonpolar_only'] + counts[0]['polar_nonpolar']
            fraction = {'polar_only': float(counts[0]['polar_only']) / total, 'nonpolar_only' : float(counts[0]['nonpolar_only']) / total, 'polar_nonpolar' : float(counts[0]['polar_nonpolar']) / total}

            print counts[0]

            writer = csv.DictWriter(open('%(iso)s_sys%(run_set)d_mon_2to1_intersection.csv' % vars(), 'wb'), counts[0].keys())
            writer.writeheader()
            writer.writerow(counts[0])
            writer.writerow(fraction)
Beispiel #2
0
def monomer_15to1_binding_events(h5file, inositol_concentration):
    writer = csv.writer(open('monomer_15to1_binding_events.csv', 'wb'), delimiter=' ')
    csv_header = ["isomer", "inositol_ratio", "binding_constant", "inos_conc"]
    writer.writerow(csv_header)
    for isomer in ["scyllo", "chiro"]:
        for k in range(1, 6):
            polar_big_matrix = None
            nonpolar_big_matrix = None
            from_idx = (k-1)*100 + 1
            to_idx = k*100 + 1
            
            print "Computing run_set", k, "with systems from", from_idx, "to", to_idx-1
            
            for i in range(from_idx, to_idx):
                polar_matrix = myh5.getTableAsMatrix(h5file, '/polar/%(isomer)s_sys%(i)d_inos_total.dat' % vars(), dtype=numpy.float64)
                nonpolar_matrix = myh5.getTableAsMatrix(h5file, '/nonpolar/%(isomer)s_sys%(i)d_per_inositol_contacts.dat' % vars(), dtype=numpy.float64)
               
                if polar_matrix is not None and nonpolar_matrix is not None:
                    if polar_big_matrix is None and nonpolar_big_matrix is None: 
                        polar_big_matrix = polar_matrix
                        nonpolar_big_matrix = nonpolar_matrix
                    else:
                        polar_big_matrix = numpy.concatenate((polar_big_matrix, polar_matrix))
                        nonpolar_big_matrix = numpy.concatenate((nonpolar_big_matrix, nonpolar_matrix))
                else:
                    print "data files (polar and nonpolar) for system", i, "was not found"
            
            num_binding_events = _num_binding_events_state_machine(nonpolar_big_matrix[:, 1:] + polar_big_matrix[:, 1:])
            writer.writerow([isomer, "15to1", num_binding_events, inositol_concentration])     
Beispiel #3
0
def compute_beta_low_molar_binding_events(h5file, inositol_ratio, inositol_concentration):
    # assert len(system_indices) > 0, "List of system indices should be non-empty."
    
    isomerList = ["scyllo", "chiro"]
    polar_path = "/polar"
    nonpolar_path = "/nonpolar_revision"
    csv_header = ["isomer", "sys_idx", "binding_events", "inos_conc"]

    writer = csv.writer(open('beta_' + inositol_ratio + '_binding_events.csv', 'wb'), delimiter=' ')
    writer.writerow(csv_header)

    for iso in isomerList:
        for sys in range(0, 3):
            for i in range(1, 6):
                nonpolar_file = os.path.join(nonpolar_path, "%(iso)s_sys%(sys)d_t%(i)d_per_inositol_contacts.dat" % vars())
                polar_file = os.path.join(polar_path, "%(iso)s_sys%(sys)d_t%(i)d_inos_total.dat" % vars())

                print polar_file, nonpolar_file

                nonpolar_matrix = myh5.getTableAsMatrix(h5file, nonpolar_file, dtype=numpy.float64)
                polar_matrix = myh5.getTableAsMatrix(h5file, polar_file, dtype=numpy.float64)

                print polar_matrix.shape, nonpolar_matrix.shape
            
                num_binding_events = _num_binding_events_state_machine(polar_matrix[:, 1:] + nonpolar_matrix[:, 1:])
                writer.writerow([iso, sys, num_binding_events, inositol_concentration])
Beispiel #4
0
def monomer_stacking(h5file, ratio, system_indices, tag="15", file_path='/stacking'):
    writer = csv.writer(open(ratio + "_" + tag + "_monomer_stacking.csv", 'wb'))
    header = ["stacked","bound", "stacked+bound", "stacked/bound"]

    writer.writerow(header)
    for i in system_indices:
        residue_file = ""
        phe_stacking_file = ""
        if ratio == "2to1":
            residue_file = '/nonpolar_residue/scyllo_sys%(i)d_mon_2to1_per_residue_contacts.dat' % vars()
            phe_stacking_file = os.path.join(file_path, 'scyllo_sys%(i)d_per_phe_stacking.dat') % vars()
            print residue_file, phe_stacking_file
        elif ratio == "15to1":
            residue_file = '/nonpolar/scyllo_sys%(i)d_per_residue_contacts.dat' % vars()
            phe_stacking_file = os.path.join(file_path, 'scyllo_sys%(i)d_per_phe_stacking.dat') % vars()
            print residue_file, phe_stacking_file
        else:
            # TODO: Throw a custom exception here
            print "ratio ", ratio, "is not recognized"
            sys.exit()
            
        residue_matrix = myh5.getTableAsMatrix(h5file, residue_file, dtype=numpy.float64)
        phe_stacking  = myh5.getTableAsMatrix(h5file, phe_stacking_file, dtype=numpy.float64)

        if residue_matrix is None or phe_stacking is None:
            print residue_file, "or", phe_stacking_file, "does not exist"
            continue

        print residue_matrix.shape, phe_stacking.shape
        assert residue_matrix.shape[0] == phe_stacking.shape[0], "Residue and phe stacking matrices must have the same number of lines"

        nrows, ncols = residue_matrix.shape

        bound = 0.0
        stacked = 0.0
        stacked_bound = 0.0
        for k in range(0, nrows):
            if residue_matrix[k][5] > 0: 
                bound = bound + 1

            if residue_matrix[k][6] > 0: 
                bound = bound + 1

            if phe_stacking[k][1] > 0:
                stacked = stacked + 1

            if phe_stacking[k][2] > 0:
                stacked = stacked + 1

            # this is for a sanity check
            if residue_matrix[k][5] > 0 and phe_stacking[k][1] > 0:
                stacked_bound += 1
            
            if residue_matrix[k][6] > 0 and phe_stacking[k][2] > 0:
                stacked_bound += 1

        writer.writerow([stacked, bound, stacked_bound, stacked / float(bound)])
Beispiel #5
0
def intersection_beta(h5file, tag):
    isomerList = ["scyllo", "chiro"]
    polar_path = "/polar"
    nonpolar_path = "/nonpolar_residue"
   
    write_header = False
    for iso in isomerList:
        for sys in range(0, 6):
            nonpolar_file = os.path.join(nonpolar_path, "%(iso)s_t%(sys)d_per_inositol_contacts.dat" % vars())
            polar_file = os.path.join(polar_path, "%(iso)s_t%(sys)d_inos_total.dat" % vars())

            nonpolar_matrix = myh5.getTableAsMatrix(h5file, nonpolar_file, dtype=numpy.float64)
            polar_matrix = myh5.getTableAsMatrix(h5file, polar_file, dtype=numpy.float64)

            counts = {'polar_only' : 0, 'nonpolar_only' : 0, 'polar_nonpolar' : 0}

            if polar_matrix is not None and nonpolar_matrix is not None:
                print polar_matrix.shape
                print nonpolar_matrix.shape

                assert polar_matrix.shape == nonpolar_matrix.shape, "the two matrices are expected to have the same dimensions"

                nrows, ncols = polar_matrix.shape
                for i in range(1, nrows):
                    for j in range(1, ncols):
                        if polar_matrix[i][j] and nonpolar_matrix[i][j]:
                            counts['polar_nonpolar'] += 1
                        elif polar_matrix[i][j]:
                            counts['polar_only'] += 1
                        elif nonpolar_matrix[i][j]:
                            counts['nonpolar_only'] += 1

            # normalize
            total = counts['polar_nonpolar'] + counts['polar_only'] + counts['nonpolar_only']
            if total != 0:
                counts['polar_nonpolar'] = counts['polar_nonpolar'] / float(total)
                counts['polar_only'] = counts['polar_only'] / float(total)
                counts['nonpolar_only'] = counts['nonpolar_only'] / float(total)


            writer = csv.DictWriter(open('%(iso)s_%(tag)s_intersection%(sys)d.csv' % vars(), 'wb'), counts.keys())
            if write_header is False:
                writer.writeheader()
            else:
                write_header = True

            # class csv.DictWriter(csvfile, fieldnames[, restval=''[, extrasaction='raise'[, dialect='excel'[, *args, **kwds]]]])
            print sys, counts
            writer.writerow(counts)
Beispiel #6
0
def monomer_2to1_binding_events_estimate(h5file, inositol_concentration):
    isomer =  ["scyllo", "chiro"]
   
    csv_header = ["isomer", "sys_idx", "binding_constant", "inos_conc"]
    writer = csv.writer(open('monomer_2to1_binding_events.csv', 'wb'), delimiter=' ')
    writer.writerow(csv_header)

    for iso in isomer:
        for i in range(0, 6):
            polar_matrix = myh5.getTableAsMatrix(h5file, os.path.join('/nonpolar_residue', '%(iso)s_sys%(i)d_mon_2to1_per_inositol_contacts.dat' % vars()), dtype=numpy.float64)
            nonpolar_matrix = myh5.getTableAsMatrix(h5file, os.path.join('/polar', '%(iso)s_sys%(i)d_mon_2to1_inos_total.dat' % vars()), dtype=numpy.float64)

            print polar_matrix.shape
            print nonpolar_matrix.shape
            num_binding_events = _num_binding_events_state_machine(nonpolar_matrix[:, 1:] + polar_matrix[:, 1:])
            writer.writerow([iso, i, num_binding_events, inositol_concentration])
Beispiel #7
0
def oligomer_stacking(h5file, type, system_indices = [], tag="15", file_path='/stacking'):
    phe_header_residue = "PHE13 PHE14 PHE22 PHE23 PHE31 PHE32 PHE4 PHE5".split()
    phe_header_stacking = "PHE4 PHE5 PHE13 PHE14 PHE22 PHE23 PHE31 PHE32".split()

    writer = csv.writer(open(type + "_" + tag + "_oligomer_stacking.csv", 'wb'))
    writer.writerow(["stacked", "bound", "stacked+bound", "stacked/bound"])
    for i in system_indices:
        stacked_system_total = 0
        bound_system_total = 0
        stacked_bound_system_total = 0
        residue_file = ""
        if type == "15to4":
            residue_file = '/nonpolar_residue/scyllo_sys%(i)d_%(type)s_whole_nosol_0-200ns_per_residue_contact.dat' % vars()
        elif type == "45to4":            
           residue_file = '/nonpolar_residue/scyllo_sys%(i)d_%(type)s_whole_nosol_0-200_per_residue_contact.dat' % vars()
        else:
            print "unrecognized system type", type
            sys.exit()

        # /stacking/scyllo_sys9_per_phe_stacking.dat
        phe_stacking_file = os.path.join(file_path, 'scyllo_sys%(i)d_per_phe_stacking.dat' % vars())

        residue_matrix = myh5.getTableAsMatrix(h5file, residue_file, dtype=numpy.float64)
        phe_stacking  = myh5.getTableAsMatrix(h5file, phe_stacking_file, dtype=numpy.float64)

        if residue_matrix is None or phe_stacking is None:
            print residue_file, phe_stacking_file, "does not exist"
            continue

        nrows,ncols = residue_matrix.shape
        for row in range(0, nrows):
            # Grab a row from a numpy matrix and converts it to a list
            # Note that numpy matrix slicing range is not inclusive at the higher index
            # Note that the column indices accounts for the first column as being time
            residue_data_list = numpy.array(residue_matrix[row, 17:25].T).flatten().tolist()
            stacking_data_list = numpy.array(phe_stacking[row, 1:].T).flatten().tolist()
            
            residue_data_dict = dict(zip(phe_header_residue, residue_data_list))
            stacking_data_dict = dict(zip(phe_header_stacking, stacking_data_list))
            
            stacked,bound,stacked_bound = match_phe_binding(residue_data_dict, stacking_data_dict)

            stacked_system_total += stacked
            bound_system_total += bound
            stacked_bound_system_total += stacked_bound

        writer.writerow([stacked_system_total, bound_system_total, stacked_bound_system_total, stacked_system_total / float(bound_system_total)])
Beispiel #8
0
def beta_stacking(h5file, type, system_indices = [], tag="15", file_path='/stacking'):
    writer = csv.writer(open(type + "_" + tag + "_beta_stacking.csv", 'wb'))
    writer.writerow(["stacked", "bound", "stacked+bound", "stacked/bound"])

    phe_header_residue = "PHE103 PHE104 PHE112 PHE113 PHE121 PHE122 PHE13 PHE130 PHE131 PHE139 PHE14 PHE140 PHE22 PHE23 PHE31 PHE32 PHE4 PHE40 PHE41 PHE49 PHE5 PHE50 PHE58 PHE59 PHE67 PHE68 PHE76 PHE77 PHE85 PHE86 PHE94 PHE95".split()
    phe_header_stacking = "PHE4 PHE5 PHE13 PHE14 PHE22 PHE23 PHE31 PHE32 PHE40 PHE41 PHE49 PHE50 PHE58 PHE59 PHE67 PHE68 PHE76 PHE77 PHE85 PHE86 PHE94 PHE95 PHE103 PHE104 PHE112 PHE113 PHE121 PHE122 PHE130 PHE131 PHE139 PHE140".split()
    for i in system_indices:
        stacked_system_total = 0
        bound_system_total = 0
        stacked_bound_system_total = 0
        residue_file = ""
        if type == 'low':
            residue_file = '/nonpolar_residue_dt1/scyllo_t%(i)d_per_residue_contacts.dat' % vars()
        elif type == 'high':
            residue_file = '/nonpolar_residue/scyllo_t%(i)d_per_residue_contacts.dat' % vars()
        else:
            print "system type", type, "is not recognize"
            sys.exit()

        phe_stacking_file = os.path.join(file_path, 'scyllo_sys%(i)d_per_phe_stacking.dat' % vars())

        print residue_file, phe_stacking_file
 
        residue_matrix = myh5.getTableAsMatrix(h5file, residue_file, dtype=numpy.float64)
        phe_stacking  = myh5.getTableAsMatrix(h5file, phe_stacking_file, dtype=numpy.float64)
        
        print residue_matrix.shape, phe_stacking.shape
        assert residue_matrix.shape[0] == phe_stacking.shape[0], "residue matrix and phe_stacking matrix must have the same number of rows" 
  
        nrows, ncols = residue_matrix.shape
        for row in range(0, nrows):
            residue_data_list = numpy.array(residue_matrix[row, 65:97].T).flatten().tolist()
            stacking_data_list = numpy.array(phe_stacking[row, 1:].T).flatten().tolist()
            
            residue_data_dict = dict(zip(phe_header_residue, residue_data_list))
            stacking_data_dict = dict(zip(phe_header_stacking, stacking_data_list))
            
            stacked,bound,stacked_bound = match_phe_binding(residue_data_dict, stacking_data_dict)
            
            stacked_system_total += stacked
            bound_system_total += bound
            stacked_bound_system_total += stacked_bound

        writer.writerow([stacked_system_total, bound_system_total, stacked_bound_system_total, stacked_system_total / float(bound_system_total)])
Beispiel #9
0
def beta_binding_event_estimate(h5file, inositol_ratio, inositol_concentration, system_indices=[]):
    assert len(system_indices) > 0, "List of system indices should be non-empty."
    
    isomerList = ["scyllo", "chiro"]
    polar_path = "/polar"
    nonpolar_path = "/nonpolar_residue"
    csv_header = ["isomer", "sys_idx", "binding_constant", "inos_conc"]

    writer = csv.writer(open(inositol_ratio + '_binding_events.csv', 'wb'), delimiter=' ')
    writer.writerow(csv_header)

    for iso in isomerList:
        for sys in system_indices[iso]:
            nonpolar_file = os.path.join(nonpolar_path, "%(iso)s_t%(sys)d_per_inositol_contacts.dat" % vars())
            polar_file = os.path.join(polar_path, "%(iso)s_t%(sys)d_inos_total.dat" % vars())
            print polar_file, nonpolar_file

            nonpolar_matrix = myh5.getTableAsMatrix(h5file, nonpolar_file, dtype=numpy.float64)
            polar_matrix = myh5.getTableAsMatrix(h5file, polar_file, dtype=numpy.float64)
            writer.writerow([iso, sys, _num_binding_events_state_machine(nonpolar_matrix[:,1:] + polar_matrix[:,1:]), inositol_concentration])
Beispiel #10
0
def compute_disordered_binding_constant(h5file, inositol_ratio, inositol_concentration, system_indices=[]):
    assert len(system_indices) > 0, "List of system indices should be non-empty."
    
    polarName = {'4to2' : 'inos_total.dat', '15to4' : 'whole_nosol_0-200ns_inos_total.dat', '45to4' : 'whole_nosol_0-200_inos_total.dat'}
    nonpolarName = {'4to2' : 'per_inositol_contacts.dat', '15to4' : 'whole_nosol_0-200ns_per_inositol_contacts.dat', '45to4' : 'whole_nosol_0-200_per_inositol_contacts.dat'}

    isomerList = ["scyllo", "chiro"]
    polarPath = "/polar"
    nonpolarPath = "/nonpolar_residue"
    csv_header = ["isomer", "sys_idx", "binding_constant", "inos_conc"]
    writer = csv.writer(open(inositol_ratio + '_binding_constants.csv', 'wb'), delimiter=' ')
    writer.writerow(csv_header)

    for iso in isomerList:
        data = []
        for sys in system_indices[iso]:
            polarFile = os.path.join(polarPath, "%(iso)s_sys%(sys)s_%(inositol_ratio)s_" % vars() + polarName[inositol_ratio])
            if inositol_ratio == "4to2":
                polarFile = os.path.join(polarPath, "klvffae_aggr%(sys)s_%(iso)s_nosol.xtc_" % vars() + polarName[inositol_ratio])

            print "analyzing", polarFile

            polarMatrix = myh5.getTableAsMatrix(h5file, polarFile, dtype=numpy.float64)
            print polarMatrix.shape 

            nonpolarFile = os.path.join(nonpolarPath, "%(iso)s_sys%(sys)s_%(inositol_ratio)s_" % vars() + nonpolarName[inositol_ratio])
            if inositol_ratio == "4to2":
                nonpolarFile = os.path.join(nonpolarPath, "%(iso)s_sys%(sys)s_per_inositol_contacts.dat" % vars())

            print "analyzing", nonpolarFile
            # This is really bad, but for other systems except 4to2 this line was used.  I've changed it 
            if inositol_ratio == "4to2":
                nonpolarMatrix = myh5.getTableAsMatrix(h5file, nonpolarFile, dtype=numpy.float64)[:, :]
            else:
                nonpolarMatrix = myh5.getTableAsMatrix(h5file, nonpolarFile, dtype=numpy.float64)[::2, :]

            print nonpolarMatrix.shape

            binding_constant = _binding_constant(polarMatrix, nonpolarMatrix, inositol_concentration)             
            writer.writerow([iso, sys, binding_constant, inositol_concentration]) 
Beispiel #11
0
def intersection_mon(h5file, csv_file, isomer, ratio):
    polar_matrix = myh5.getTableAsMatrix(h5file, '/inositol/inos_total')
    nonpolar_matrix = myh5.getTableAsMatrix(h5file, '/residue/per_inos_contacts')
    
    print polar_matrix.shape
    print nonpolar_matrix.shape
    
    assert polar_matrix.shape == nonpolar_matrix.shape, "the two matrices are expected to have the same dimensions"
    
    nrows, ncols = polar_matrix.shape
    counts = [{'polar_only':0, 'nonpolar_only':0, 'polar_nonpolar':0}, {'polar_only':0, 'nonpolar_only':0, 'polar_nonpolar':0}]
    for i in range(0, nrows): 
        for j in range(1, ncols):
            if polar_matrix[i][0] < 7500:
                if polar_matrix[i][j] and nonpolar_matrix[i][j]:
                    counts[0]['polar_nonpolar'] += 1
                elif polar_matrix[i][j]:
                    counts[0]['polar_only'] += 1
                elif nonpolar_matrix[i][j]:
                    counts[0]['nonpolar_only'] += 1
            else:
                if polar_matrix[i][j] and nonpolar_matrix[i][j]:
                    counts[1]['polar_nonpolar'] += 1
                elif polar_matrix[i][j]:
                    counts[1]['polar_only'] += 1
                elif nonpolar_matrix[i][j]:
                    counts[1]['nonpolar_only'] += 1

    # class csv.DictWriter(csvfile, fieldnames[, restval=''[, extrasaction='raise'[, dialect='excel'[, *args, **kwds]]]])
    total = counts[0]['polar_only'] + counts[0]['nonpolar_only'] + counts[0]['polar_nonpolar']
    fraction = {'polar_only': float(counts[0]['polar_only']) / total, 'nonpolar_only' : float(counts[0]['nonpolar_only']) / total, 'polar_nonpolar' : float(counts[0]['polar_nonpolar']) / total}

    print counts[0]

    writer = csv.DictWriter(open(csv_file, 'wb'), counts[0].keys())
    writer.writeheader()
    writer.writerow(counts[0])
    writer.writerow(fraction)
Beispiel #12
0
def process(h5file, ratio):
	isomerlist = ["scyllo", "chiro", "water"]
	plot_data = []
	mean_contact_list = []
	std_contact_list = []
	#read in files for each system and aggregate
	format="pp_nonpolar_vs_t.xvg"
	for iso in isomerlist:
		print "processing", iso
		pattern = re.compile(r"%(iso)s.*%(ratio)s.*%(format)s" % vars())
		if iso == "water":
			pattern = re.compile(r"%(iso)s.*%(format)s" % vars())

		datalist=[]
		for table in h5file.listNodes(where='/pp_nonpolar'):
			table_path = os.path.join('/pp_nonpolar', table.name)
			if pattern.search(table.name):			
				data = myh5.getTableAsMatrix(h5file, table_path)
				if data is not None:
					data = data.astype('float')
					datalist.append(data[0:config.LASTFRAME, 1])
				else:
					print "no data was read in"
			
		print "datalist", datalist
		data_matrix = numpy.transpose(numpy.vstack(datalist))	
		print "data_matrix", data_matrix, data_matrix.shape

		avg, std = utils.summary_statistics(data_matrix, sum_across="columns")
		
		avg_contacts = numpy.average(data_matrix[config.STARTFRAME:config.LASTFRAME], axis=0)
		mean_contact = numpy.average(avg_contacts)
		std_contact = numpy.std(avg_contacts)
		print mean_contact
		print std_contact
		mean_contact_list.append(mean_contact)
		std_contact_list.append(std_contact)
		
		avg_smoothed = utils.smooth(avg/config.NMOLECULES, 500, time_present=False, timestep=2)
		std_smoothed = utils.smooth(std/config.NMOLECULES, 500, time_present=True, timestep=2)
		plot_data.append(avg_smoothed)
		plot_data.append(std_smoothed)
	
	timeseries_matrix = numpy.hstack(plot_data)
	print "timeseries_matrix", timeseries_matrix, timeseries_matrix.shape
	print "time", timeseries_matrix[:,0]
	numpy.savetxt(ratio + "_pp_nonpolar_smoothed.txt.gz", timeseries_matrix, fmt='%0.3f')
	utils.savetxt(ratio + "_avg_pp_nonpolar_contact.txt", "#scyllo chiro water", numpy.vstack([mean_contact_list, std_contact_list]), fmt='%0.3f')

	return timeseries_matrix
Beispiel #13
0
def nonpolar_residue_disordered(h5file, tag):
    scyllo_pattern = re.compile(r'scyllo')
    chiro_pattern = re.compile(r'chiro')
    atype_pattern = re.compile(r'residue_contact')
    data_list = {'scyllo':[], 'chiro':[]}

    #fix this number for now
    N_datapoints = 190000
    for table in h5file.listNodes("/nonpolar_residue", 'Table'):
        if atype_pattern.search(table.name):
            table_path = os.path.join("/nonpolar_residue", table.name)

            print table_path

            data = myh5.getTableAsMatrix(h5file, table_path, dtype=numpy.float64)
            
            # print data
            
            sum_over_time = numpy.average(data[20000:N_datapoints, 1:], axis = 0)
            
            # print sum_over_time
            
            # This matrix is Nres by 4, where 4 is the number of peptides in the system (disordered oligomer)
            # Each row of this matrix represents a single amino acid
            # Each column is a peptide sequence A, E, L, K, F, F, V
            sum_over_time.shape = (sum_over_time.size / 4, 4)
            
            # Average over all peptides in the system (over columns, hence axis = 1). 
            # The resulting array of numbers has units of per peptide.
            avg_over_peptides = numpy.average(sum_over_time, axis = 1)
            
            if scyllo_pattern.search(table.name):
                data_list['scyllo'].append(avg_over_peptides)
            elif chiro_pattern.search(table.name):
                data_list['chiro'].append(avg_over_peptides)
            else:
                print "No pattern matches", table.name

    # save results to flat files
    for isomer in data_list.keys():
        nparray = numpy.array(data_list[isomer])
            
        # dump the list of counts for each system
        numpy.savetxt('%(tag)s_nonpolar_residue_inositol_contact_%(isomer)s_counts.txt' % vars(), nparray, fmt='%0.8f')
        print "saved", isomer, "analysis with shape", nparray.shape

        # average over all the systems; each system is a row in nparray
        average = numpy.average(nparray, axis=0)
        std = numpy.std(nparray, axis=0)/math.sqrt(8)
        numpy.savetxt('%(tag)s_nonpolar_residue_inositol_contact_%(isomer)s_avg_std.txt' % vars(), [average, std], fmt='%0.8f')
Beispiel #14
0
def _intersection(h5file, polar_file, nonpolar_file, tag):
    nonpolar_matrix = myh5.getTableAsMatrix(h5file, nonpolar_file, dtype=numpy.float64)
    polar_matrix = myh5.getTableAsMatrix(h5file, polar_file, dtype=numpy.float64)

    counts = {'polar_only' : 0, 'nonpolar_only' : 0, 'polar_nonpolar' : 0}

    if polar_matrix is not None and nonpolar_matrix is not None:
        print polar_matrix.shape
        print nonpolar_matrix.shape

        assert polar_matrix.shape == nonpolar_matrix.shape, "the two matrices are expected to have the same dimensions"

        nrows, ncols = polar_matrix.shape
        for i in range(1, nrows):
            for j in range(1, ncols):
                if polar_matrix[i][j] and nonpolar_matrix[i][j]:
                    counts['polar_nonpolar'] += 1
                elif polar_matrix[i][j]:
                    counts['polar_only'] += 1
                elif nonpolar_matrix[i][j]:
                    counts['nonpolar_only'] += 1

    # normalize
    total = counts['polar_nonpolar'] + counts['polar_only'] + counts['nonpolar_only']
    if total != 0:
        counts['polar_nonpolar'] = counts['polar_nonpolar'] / float(total)
        counts['polar_only'] = counts['polar_only'] / float(total)
        counts['nonpolar_only'] = counts['nonpolar_only'] / float(total)


    writer = csv.DictWriter(open('%(tag)s_intersection.csv' % vars(), 'wb'), counts.keys())
    writer.writeheader()
    write_header = True

    # class csv.DictWriter(csvfile, fieldnames[, restval=''[, extrasaction='raise'[, dialect='excel'[, *args, **kwds]]]])
    print sys, counts
    writer.writerow(counts)
Beispiel #15
0
def nonpolar_residue_beta_low_molar(h5file, system_indices=[]):
    assert len(system_indices) > 0, "The list of system_indices should not be empty"

    for isomer in ["scyllo", "chiro"]:
        data_list = []
        for s in system_indices:
            nonpolar_residue_large = None
            for i in range(1,6):
                nonpolar_residue_path = "/nonpolar_revision/%(isomer)s_sys%(s)d_t%(i)d_per_residue_contacts.dat" % vars()
                print "analyzing ", nonpolar_residue_path

                # read in the file
                data = myh5.getTableAsMatrix(h5file, nonpolar_residue_path, dtype=numpy.float64)
                print data
 
                if nonpolar_residue_large is None:
                    print "in here"
                    nonpolar_residue_large = data
                else:
                    nonpolar_residue_large = numpy.concatenate((nonpolar_residue_large, data))
                
                # data = numpy.genfromtxt(file, comments="#", dtype='float')
                nrows,ncols = nonpolar_residue_large.shape
                print nrows, ncols

            # sum over rows
            time_avg = numpy.average(nonpolar_residue_large[:,1:], axis=0)
            print time_avg.shape
            time_avg.shape = (time_avg.size / 16, 16) 
            print time_avg.shape
            sum_over_peptides = numpy.sum(time_avg, axis = 1)

            data_list.append(sum_over_peptides)

        # save results to flat files
        nparray = numpy.array(data_list)

        # dump the list of results for each system
        numpy.savetxt('%(isomer)s_low_molar_nonpolar_residue_contact.txt' % vars(), nparray, fmt='%0.8f')

        # average over all the systems; each system is a row in nparray
        average = numpy.average(nparray, axis=0) / 16 
        std = numpy.std(nparray, axis=0) / 16 / math.sqrt(len(system_indices))

        #save the normalized average and std
        numpy.savetxt('%(isomer)s_low_molar_nonpolar_residue_contact_avg_std.txt' % vars(), [average, std], fmt='%0.8f')
Beispiel #16
0
def process(h5file, ratio):
	labellist = []
	plot_list = []
	isomerlist = [ "scyllo", "chiro", "water" ]
	root='/cluster'
	pattern=""	
	for iso in isomerlist:
		datalist=[]
		for t in h5file.listNodes(where=root):
			filename = t.name
			# fileslist = glob.glob("*%(iso)s*%(conc)s*.xvg" % vars())
			if iso != "water":
				pattern = re.compile(r'%(iso)s.*%(ratio)s.*nclust.xvg' % vars())
			else:
				pattern = re.compile(r'%(iso)s.*nclust.xvg' % vars())
		
			if pattern.search(filename):
				print filename
				data = myh5.getTableAsMatrix(h5file, 
									 os.path.join(root, filename))[0:config.LASTFRAME]
				print data.shape
				datalist.append(data)
		
		print len(datalist)
		all_data = numpy.hstack(datalist)
		print all_data.shape
		print all_data
			
		sdata = all_data[:,1::2]			
		average = numpy.average(sdata, axis=1)
		plot_data = numpy.transpose([all_data[:,0],average])
		
		numpy.savetxt('%(iso)s_%(ratio)s_nclust.txt.gz' % vars(), plot_data, fmt='%0.2f')
		
		plot_list.append(plot_data)
		plot_label = config.LABEL[iso] + " (" + config.RATIO[ratio] + ")"
		
		labellist.append("%(plot_label)s" % vars())
		 
	return (plot_list, labellist)
Beispiel #17
0
def process(h5file, ratio, format="p2p_vs_t.dat"):
	# given a h5file return a list of data to be plotted as line plots 
	# and a corresponding list of labels

	header = "# time average_inter std_inter average_intra std_intra"
	datalist = []
	labellist = []
	isomerlist = ["scyllo", "chiro", "water"]
	mean_contact_list = []
	std_contact_list = []
	
	for iso in isomerlist:
		print "processing", iso
		pattern = re.compile(r"%(iso)s.*%(ratio)s.*%(format)s" % vars())
		if iso == "water":
			pattern = re.compile(r"%(iso)s.*%(format)s" % vars())

		data_inter = []
		data_intra = []
		for table in h5file.listNodes(where='/polar'):
			table_path = os.path.join('/polar', table.name)
			if pattern.search(table.name):
				print "processing", table.name
				data = myh5.getTableAsMatrix(h5file, table_path, dtype=numpy.int32)
				data = data.astype('float')
				print "converted to float32", data

				nrows, ncols = data.shape
				assert nrows > ncols
				print "Test data read in dimensions", data.shape, data.dtype
				data_inter.append(data[0:config.LASTFRAME,1])
				data_intra.append(data[0:config.LASTFRAME,2])

		# compute summary statistics
		print "summarizing statistics ... "
		inter_matrix = utils.array_list_to_matrix(data_inter)
		intra_matrix = utils.array_list_to_matrix(data_intra)
		average_inter, std_inter = utils.summary_statistics(inter_matrix)
		average_intra, std_intra = utils.summary_statistics(intra_matrix)
		
		# compute the time average number of contacts and its std error
		avg_contacts = numpy.average(inter_matrix, axis=0)
		mean_contact = numpy.average(avg_contacts)
		std_contact = numpy.std(avg_contacts)

		mean_contact_list.append(mean_contact)
		std_contact_list.append(std_contact)
		print mean_contact, std_contact

		time = data[0:config.LASTFRAME,0]
		# print "Test: dimensions of average_inter", average_inter.shape
		plotdata = utils.array_list_to_matrix([ time, average_inter, std_inter, average_intra, std_intra ])
		print "plotdata", plotdata
		print "Test: dimensions of plotdata for", iso, ratio, plotdata.shape
		plotdata_smoothed = utils.smooth(plotdata, 500, time_present=True, timestep=2)
		print plotdata_smoothed

		datalist.append(plotdata_smoothed)
		print "smoothed data", plotdata_smoothed, plotdata_smoothed.shape

		ratiolabel = config.RATIO[ratio]
		if iso == "water":
			labellist.append("water" % vars())
		else:
			labellist.append("%(iso)s (%(ratiolabel)s)" % vars())

	utils.savetxt('%(ratio)s_p2p_vs_t.txt' % vars(), header, plotdata, fmt='%0.2f')
	utils.savetxt('%(ratio)s_p2p_vs_t_smoothed.txt' % vars(), header, plotdata_smoothed, fmt='%0.2f')
	utils.savetxt('%(ratio)s_avg_contacts_w_err.txt' % vars(), "#scyllo chiro water", numpy.vstack([mean_contact_list, std_contact_list]), fmt='%0.2f')

	return (datalist, labellist)
Beispiel #18
0
def get_angle_matrix(h5file, sys):
	"""docstring for get_angle"""
	path = os.path.join('/angle', sys)
	print "getting ", path
	matrix = myh5.getTableAsMatrix(h5file, path, dtype=numpy.float64)
	return matrix
Beispiel #19
0
def intersection_disordered(h5file, ratio, system_indices):
    """intersection analysis for disordered oligomers"""

    #nasty fix for different table names
    polarName = {'4to2' : 'inos_total.dat', '15to4' : 'whole_nosol_0-200ns_inos_total.dat', '45to4' : 'whole_nosol_0-200_inos_total.dat'}
    nonpolarName = {'4to2' : 'per_inositol_contacts.dat', '15to4' : 'whole_nosol_0-200ns_per_inositol_contacts.dat', '45to4' : 'whole_nosol_0-200_per_inositol_contacts.dat'}
   # klvffae_aggr0_chiro_nosol.xtc_inos_total.dat 
    isomerList = ["scyllo", "chiro"]
    polarPath = "/polar"
    nonpolarPath = "/nonpolar_residue"
    dataList = [['isomer', 'system#', 'polar_only', 'polar_and_nonpolar', 'nonpolar_only', 'total']]
    
    resultsWriter = csv.writer(open(ratio + '_intersection.txt', 'wb'), delimiter=' ')
    
    print system_indices
    
    for iso in isomerList:
        data = []
        for sys in system_indices[iso]:
            if ratio == "4to2":
                polarFile = os.path.join(polarPath, "klvffae_aggr%(sys)s_%(iso)s_nosol.xtc_" % vars() + polarName[ratio])
            else:
                polarFile = os.path.join(polarPath, "%(iso)s_sys%(sys)s_%(ratio)s_" % vars() + polarName[ratio])

            print "analyzing", polarFile
            polarMatrix = myh5.getTableAsMatrix(h5file, polarFile, dtype=numpy.float64)
            print polarMatrix
            
            if ratio == "4to2":
                nonpolarFile = os.path.join(nonpolarPath, "%(iso)s_sys%(sys)s_" % vars() + nonpolarName[ratio])
            else:
                nonpolarFile = os.path.join(nonpolarPath, "%(iso)s_sys%(sys)s_%(ratio)s_" % vars() + nonpolarName[ratio])
            print "analyzing", nonpolarFile

            nonpolarMatrix = myh5.getTableAsMatrix(h5file, nonpolarFile, dtype=numpy.float64)[::2, :]
            
            if polarMatrix != None and nonpolarMatrix != None:
                rows, cols = nonpolarMatrix.shape
                print rows, cols
                print polarMatrix.shape
                polar_and_nonpolar = 0.0
                polar_only = 0.0
                nonpolar_only = 0.0
                rows = min(rows, 100000)
                print rows, cols
                for i in range(20001, rows):
                    for j in range(1, cols):
                        if polarMatrix[i][j] and nonpolarMatrix[i][j]:
                            polar_and_nonpolar += 1
                        elif polarMatrix[i][j]:
                            polar_only += 1
                        elif nonpolarMatrix[i][j]:
                            nonpolar_only += 1

                total = polar_only + polar_and_nonpolar + nonpolar_only 
                dataList.append([iso, sys, polar_only / total, polar_and_nonpolar / total, nonpolar_only / total, total])
                data.append([polar_only / total, polar_and_nonpolar / total, nonpolar_only / total, total])
        
        print data
        print numpy.array(data)
        average = numpy.average(numpy.array(data), axis=0)
        std = numpy.std(numpy.array(data), axis=0) / len(system_indices)
        print average.tolist()
        print std.tolist()
        # 
        addToListAvg = [iso+' avg', 'all']
        addToListAvg.extend(average.tolist())
        addToListStd = [iso+' std', 'all']
        addToListStd.extend(std.tolist())
        
        dataList.append(addToListAvg)
        dataList.append(addToListStd)

    # numpy.savetxt("15to4_intersection.gz", dataList, fmt='%s %d %0.3f %0.3f %0.3f %d')
    resultsWriter.writerows(dataList)
Beispiel #20
0
def get_mindist_matrix(h5file, sys):
	"""docstring for get_mindist_matrix"""
	path = os.path.join('/mindist', sys)
	print "getting ", path
	
	return myh5.getTableAsMatrix(h5file, path, dtype=numpy.float64)
def compute_inositol_ub_b_cluster_size_histo(nonpolar_h5file, polar_h5file, clust_info_path, iso, sys, tag=""):
    # Path names to the text files saved in the h5 file
    nonpolar_contacts_file = "/%(iso)s_64_inositol_nonpolar_contacts_%(sys)s" % vars()
    polar_contacts_file = "/%(iso)s_64_inositol_hbonds_%(sys)s" % vars()

    nonpolar_contacts = myh5.getTableAsMatrix(nonpolar_h5file, nonpolar_contacts_file, dtype=numpy.float64)
    polar_contacts = myh5.getTableAsMatrix(polar_h5file, polar_contacts_file, dtype=numpy.float64)
    
    clust_info_csv = os.path.join(clust_info_path, '%(sys)d_final_clust_info.dat' % vars())
    contacts_matrix = polar_contacts[:,1:] + nonpolar_contacts[:,1:]

    print contacts_matrix.shape

    # Parse the csv inositol clusters data
    bound_sizes_list = []
    unbound_sizes_list = []
    
    with open(clust_info_csv, 'rb') as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        for row in reader:
            time = row[0]
            clustered = row[1]
            inositol_ids = row[2].split(' ')
           
            # print time, clustered, inositol_ids
 
            inositol_indices = _residue_ids_to_indices(inositol_ids)
            try:
                # print int(time), polar_contacts[int(time) / 2][0]
                inos_in_cluster_contacts = _contacts_for_indices_in_cluster(contacts_matrix[int(time) / 2], inositol_indices)
            except IndexError:
                print "index error encountered at time=", time
                break
            
            if clustered == "yes":
                bound = _cluster_bound_to_protein(inos_in_cluster_contacts)
                if bound:
                    # print time, inositol_ids 
                    bound_sizes_list.append(len(inositol_indices))
                else:
                    unbound_sizes_list.append(len(inositol_indices))
            else:
                for val in inos_in_cluster_contacts:
                    if val > 0:
                        bound_sizes_list.append(1)
                    else:
                        unbound_sizes_list.append(1)

    # compute histograms
    bound_hist = numpy.bincount(numpy.array(bound_sizes_list))
    unbound_hist = numpy.bincount(numpy.array(unbound_sizes_list))

    print bound_hist
    print unbound_hist
 
    with open(iso + '_sys' + str(sys) + tag + 'bound_hist.txt', 'w') as results_file:
        for size in range(0, bound_hist.size):
            frequency = bound_hist[size] / float(bound_hist.sum()) 
            results_file.write("%d  %0.2f\n" % (size, frequency))

    with open(iso + '_sys' + str(sys) + tag + 'unbound_hist.txt', 'w') as results_file:
        for size in range(0, unbound_hist.size):
            frequency = unbound_hist[size] / float(unbound_hist.sum())
            results_file.write("%d  %0.2f\n" % (size, frequency))