Example #1
0
def load_into_tables():
    """docstring for load_into_tables"""
    # Initialize a pytable for saving data into

    function_list = {'rmsd' : rmsd } 

    print "initializing pytables data file"
    
    group_name = 'analysis'

    h5file = myh5.initialize('analysis.h5', group_name)

    root = '/' + group_name

    print "reading in flat files"    
    for ratio in [15, 64]:
        for isomer in ["chiro", "scyllo", "glycerol"]:
            for analysis in ["rmsd"]:
                for sys_idx in range(0, 10):
                    flat_file_path = generate_file_name(ratio, isomer, sys_idx, analysis)
                    print "loading in file at", flat_file_path

                    flat_file_name = flat_file_path.replace('/','_')
                    print "loading in", flat_file_name
                    if os.path.exists(flat_file_path):
                        data_file = numpy.genfromtxt(flat_file_path)
                    else:
                        print flat_file_path, "was not found!"
                    
                    data_cleaned = preprocess(function_list[analysis], data=data_file, keep_time=True)
                    #kwargs={'data': data_file, 'keep_time': True})
                    myh5.save(h5file, data_cleaned, os.path.join(root, os.path.splitext(flat_file_name)[0]))
Example #2
0
def parse(datfile, h5file_name):
	"""read all the analysis files into a single h5 file"""
	
	# print "parsing into h5file"
	
	column_names = ['replica', 'sequence', 'w', 'w_nominal', 'rg', 'sas1', 'sas2']
	descr = create_description(column_names, 7)
	h5file = myh5.initialize(h5file_name)
		
	f = open(datfile)
	data = read_analysis_file(f)
	f.close()
	data_array = numpy.array(data)
	myh5.save(h5file, numpy.array(data), '/test', table_struct=descr)
Example #3
0
def load():
	# initialize a h5 file to store all the analysis relating to GA4-beta protofibrils
	h5file = myh5.initialize('GA4_beta_analysis.h5')
	read_polar(h5file)
	read_nonpolar(h5file)
	h5file.close()
Example #4
0
def process_dssp(filename, totalResidue, correction_factor, h5file='analysis_results.h5'):
	fp = open(filename)

	#initialize structure lists
	legend={}
	averageStruct = {}
	columnTotal = 0
	columnIndex = 0
	totalFramesProcessed=0
	raw_data = []
	for line in fp:
		if line[0] == "#":
			continue;
		elif line[0] == "@":
			columns = line.split()
			#print columns
			if columns[1][0] == "s" and columns[1] != "subtitle":
				#print columns
				structureType = columns[3][1:len(columns[3])-1]
				#print structureType
				legend[columnIndex+1] = structureType
				columnIndex+=1
				#print columnIndex

			columnTotal = columnIndex
			#initialize data array 
			for i in range(1, columnTotal+1):
				averageStruct[i]=0
		else:
			# should all be data now
			cols = line.split()
			raw_data.append(cols)
			for i in range(1,columnTotal+1):
				# correct for the 3 extra residues are counted in the GA4 system by dssp
				if legend[i] == "Coil":
					averageStruct[i] += (float(cols[i]) - correction_factor)/totalResidue
				else:
					averageStruct[i] += float(cols[i])/totalResidue
			totalFramesProcessed+=1

	# print "total number of columns is", columnTotal
	table = []
	table_descr = {}
	table.append(filename)
	table_descr['filename'] = tables.StringCol(256, pos=0)
	
	for i in range(1,columnTotal+1):
		table.append(averageStruct[i]/totalFramesProcessed)
		table_descr[legend[i]] = tables.Float32Col(pos=i)
	
	table.append(totalFramesProcessed)
	table_descr['num_frames'] = tables.Float32Col(pos=columnTotal+1)
	
	h5 = myh5.initialize(h5file)
	
	basename,ext = os.path.splitext(filename)
	myh5.save(h5, [tuple(table),], '/dssp/%(basename)s' % vars(), table_descr)
	
	raw_data_array = numpy.Array(raw_data)
	(nrows, ncols) = raw_data_array.shape
	myh5.save(h5, raw_data_array, '/dssp_data/%(basename)s' % vars(), myh5.create_description('col', ncols, format=tables.Int32Col(dflt=0)))