def main(): # check the given arguments if len(sys.argv) < 5: usage() else: f = shared.openFile(sys.argv[1], "r") directory = sys.argv[2] image_name = sys.argv[3] step_size = shared.toFlo(sys.argv[4]) print 'Plotting all the cells from ' + sys.argv[1] + '...' # split the lines to get data data = [line.split() for line in f] max_time = len(data) - 1 # calculate the tissue size cells_width = shared.toInt(data[0][0]) cells_height = shared.toInt(data[0][1]) total_cells = cells_width * cells_height + 1 # create a matrix to store the concentration values we obtain from the file cons = numpy.zeros(shape = (max_time, total_cells)) # put the concentration values from the file into the matrix for i in range(1, max_time + 1): cons[i - 1][0] = shared.toFlo(data[i][0]) * step_size for j in range(1, total_cells): cons[i - 1][j] = shared.toFlo(data[i][j]) # close the file f.close() # plot colors colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k'] color = 0 for i in range(0, total_cells,200): start = 0 # Adjust the plotting interval for each cell to account for different columns being staggered # as they enter the PSM at intervals of 6 minutes apart frome ach other while cons[start][i] == -1: # -1 stands for no data in the output file start += 1 end = max_time - 1 while cons[end][i] == -1: end -= 1; if (i % 4 == 0): pl.plot(cons[start:end, 0], cons[start:end, i], 'r') #elif (i % 4 == 1): #pl.plot(cons[start:end, 0], cons[start:end, i], 'g') #elif (i % 4 == 2): #pl.plot(cons[start:end, 0], cons[start:end, i], 'b') #else: #pl.plot(cons[start:end, 0], cons[start:end, i], 'c') #pl.ylim((-1,100)) pl.axis([400, 600, 0, 300]) pl.savefig(directory + "/" + image_name + ".png", format = "png") pl.close() print 'Done. Your plot is stored in ' + directory + "/" + image_name + ".png"
def main(): # check the given arguments if len(sys.argv) < 5: usage() else: f = shared.openFile(sys.argv[1], "r") directory = sys.argv[2] image_name = sys.argv[3] step_size = shared.toFlo(sys.argv[4]) print 'Plotting all the cells from ' + sys.argv[1] + '...' # split the lines to get data data = [line.split() for line in f] max_time = len(data) - 1 # calculate the tissue size cells_width = shared.toInt(data[0][0]) cells_height = shared.toInt(data[0][1]) total_cells = cells_width * cells_height + 1 # create a matrix to store the concentration values we obtain from the file cons = numpy.zeros(shape = (max_time, total_cells)) # put the concentration values from the file into the matrix for i in range(1, max_time + 1): cons[i - 1][0] = shared.toFlo(data[i][0]) * step_size for j in range(1, total_cells): cons[i - 1][j] = shared.toFlo(data[i][j]) # close the file f.close() # plot colors colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k'] color = 0 for i in range(1, total_cells): start = 0 # Adjust the plotting interval for each cell to account for different columns being staggered # as they enter the PSM at intervals of 6 minutes apart frome ach other while cons[start][i] == -1: # -1 stands for no data in the output file start += 1 end = max_time - 1 while cons[end][i] == -1: end -= 1; if (i % 4 == 0): pl.plot(cons[start:end, 0], cons[start:end, i], 'r') elif (i % 4 == 1): pl.plot(cons[start:end, 0], cons[start:end, i], 'g') elif (i % 4 == 2): pl.plot(cons[start:end, 0], cons[start:end, i], 'b') else: pl.plot(cons[start:end, 0], cons[start:end, i], 'c') pl.savefig(directory + "/" + image_name + ".png", format = "png") pl.close() print 'Done. Your plot is stored in ' + directory + "/" + image_name + ".png"
def main(): # check the given arguments if len(sys.argv) < 3: usage() elif len(sys.argv) == 4: if sys.argv[1] == "-c" or sys.argv[1] == "--no-color": shared.terminalRed = "" shared.terminalReset = "" filename = sys.argv[2] spaces = sys.argv[3] else: usage() else: filename = sys.argv[1] spaces = sys.argv[2] # open the input file and check to ensure 'spaces' is an integer f = shared.openFile(filename, "r") spaces = shared.toInt(spaces) # replace spaces with tabs ofile = "" for line in f: count = 0 while line[:spaces] == " " * spaces: line = line[spaces:] count += 1 ofile = ofile + "\t" * count + line f.close() newfile = shared.openFile(filename, "w") newfile.write(ofile)
def main(): print 'Reading command-line arguments...' args = sys.argv[1:] # Remove the name of the program from the arguments num_args = len(args) if num_args == 3: # There are three arguments, each of which is required input_file = shared.openFile(args[0], 'r') # The input parameter sets output_fname = args[1] output_file = shared.openFile(output_fname, 'w') # The output parameter sets num_output_params = shared.toInt( args[2] ) # How many parameters each set should have in the output file else: usage() print 'Converting each parameter set...' num_input_params = -1 for line in input_file: # For every parameter set if len(line) > 1 and line[0] != '#': # Skip blank lines and comments input_set = line.split(',') if num_input_params == -1: # Find the input format based on the first parameter set found num_input_params = len(input_set) else: output_file.write( '\n') # Print a newline before each non-first set input_set[num_input_params - 1].replace( '\n', '') # Get rid of the newline in the last parameter # Convert the set to the master (88) format base_set = ['0'] * 88 for par in range(num_input_params): base_index = set_formats[num_input_params][par] base_set[base_index] = input_set[par] # Convert the master format to the specified one output_set = ['0'] * num_output_params for par in range(num_output_params): output_index = set_formats[num_output_params][par] output_set[par] = base_set[output_index] # Write the results to the output file output_file.write(output_set[0]) for par in range(1, num_output_params): output_file.write(',' + output_set[par]) print 'Closing files...' input_file.close() output_file.close() print 'Done. Your newly formatted parameter sets are stored in ' + output_fname
def main(): print 'Reading command-line arguments...' args = sys.argv[1:] # Remove the name of the program from the arguments num_args = len(args) if num_args == 3: # There are three arguments, each of which is required input_file = shared.openFile(args[0], 'r') # The input parameter sets output_fname = args[1] output_file = shared.openFile(output_fname, 'w') # The output parameter sets num_output_params = shared.toInt(args[2]) # How many parameters each set should have in the output file else: usage() print 'Converting each parameter set...' num_input_params = -1 for line in input_file: # For every parameter set if len(line) > 1 and line[0] != '#': # Skip blank lines and comments input_set = line.split(',') if num_input_params == -1: # Find the input format based on the first parameter set found num_input_params = len(input_set) else: output_file.write('\n') # Print a newline before each non-first set input_set[num_input_params - 1].replace('\n', '') # Get rid of the newline in the last parameter # Convert the set to the master (88) format base_set = ['0'] * 88 for par in range(num_input_params): base_index = set_formats[num_input_params][par] base_set[base_index] = input_set[par] # Convert the master format to the specified one output_set = ['0'] * num_output_params for par in range(num_output_params): output_index = set_formats[num_output_params][par] output_set[par] = base_set[output_index] # Write the results to the output file output_file.write(output_set[0]) for par in range(1, num_output_params): output_file.write(',' + output_set[par]) print 'Closing files...' input_file.close() output_file.close() print 'Done. Your newly formatted parameter sets are stored in ' + output_fname
def main(): #check the given arguments print "Reading command-line arguments..." args = sys.argv[1:] num_args = len(args) req_args = [False] * 6 num_seeds = 0 sim_arguments = "" if num_args >= 6: for arg in range(0, num_args - 1, 2): option = args[arg] value = args[arg + 1] if option == '-i' or option == '--input-file': ifile = value req_args[0] = True elif option == '-n' or option == '--num-params': num_params = shared.toInt(value) req_args[1] = True elif option == '-p' or option == '--pars-per-job': pars_per_job = shared.toInt(value) req_args[2] = True elif option == '-d' or option == '--directory': folder = value req_args[3] = True elif option == '-s' or option == '--simulation': simulation = value req_args[4] = True elif option == '-S' or option == '--seeds': num_seeds = int(value) req_args[5] = True elif option == '-a' or option == '--arguments': for a in range(arg + 1, num_args): sim_arguments += ' ' + args[a] break elif option == '-h' or option == '--help': usage() else: usage() for arg in req_args: if not arg: req_args usage() else: usage() index = 0 input_file = shared.openFile(ifile, "r") shared.ensureDir(folder) for parset in range(0, num_params, pars_per_job): params = shared.openFile(folder + "/input" + str(index) + ".params", "w") for line in range(pars_per_job): params.write(input_file.readline()) params.close() index += 1; for seeds in range(num_seeds): seed = (seeds + 1) * 1000 for parset in range(index): job = shared.openFile(folder + "/pbs-job-" + str(seed) + "-" + str(parset), 'w') job.write(''' #PBS -N robust-test #PBS -l nodes=1:ppn=1 #PBS -l mem=500mb #PBS -l file=300mb #PBS -q biomath #PBS -j oe #PBS -o ''' + folder + '''/output''' + str(seed) + "-" + str(parset) + '''.txt #PBS -l walltime=06:00:00 cd $PBS_O_WORKDIR ''' + simulation + ' ' + sim_arguments + ' -p ' + str(pars_per_job) + ' -i ' + ifile + ''' -s ''' + str(seed) + " -M 6 -E " + folder + "/scores-" + str(seed) + "-" + str(parset) + ".csv") job.close() subprocess.call(["qsub", folder + "/pbs-job-" + str(seed) + "-" + str(parset)])
def main(): filename = "" if len(sys.argv) == 8: directory = sys.argv[1] # directory where the mutation is first_run = shared.toInt(sys.argv[2]) last_run = shared.toInt(sys.argv[3]) feature = sys.argv[4] picture = sys.argv[5] start = shared.toInt(sys.argv[6]) end = shared.toInt(sys.argv[7]) else: print("ofeatures.py requires 7 parameters --", len(sys.argv) - 1, "given.") exit(1) bins = 10 miny = 999999 maxy = 0 x = [] for r in range(first_run, last_run): filename = directory + "/run" + str(r) + "/" + feature f = file(filename, "r") index = 0 for line in f: mylist = line.split() for i in range(len(mylist)): mylist[i] = float(mylist[i]) num = min(mylist) if (num < miny): miny = num num = max(mylist) if (num > maxy): maxy = num if (r == first_run): x.append(mylist) else: x[index] = x[index] + mylist index += 1 z = [] minz = inf maxz = 0 for line in x: newline = [0] * (bins + 1) for el in line: bin = ((el - start) * 100) / (end - start) newline[int(bin / float(100 / bins))] += 1 if (max(newline) > maxz): maxz = max(newline) if (min(newline) < minz): minz = min(newline) z.append(newline) hold(True) cbar = plt.colorbar(imshow(z, aspect='auto', extent=[start, end , 0, 16], cmap="hot")) xlabel(feature[0:1].upper() + feature[1:-4]) ylabel('Cells') cbar.ax.set_yticks([0, 1]) cbar.ax.set_yticklabels(['Low', 'High']) savefig(picture)
def main(): print 'Reading command-line arguments...' args = sys.argv[1:] # Remove the name of the program from the arguments num_args = len(args) cons_file = None # Concentrations file figure_fname = 'st' # Filename to give the figure minus its extension image_format = 'png' # Format in which to save the figure, also serving as its extension image_width = 1000 # Width of the image in pixels image_height = 250 # Height of the image in pixels steps_til_growth = 60000 # Steps before growth starts steps_to_split = 600 # Steps each split takes initial_width = 10 # Initial width of the PSM in cells granularity = 1 # Use each <granularity> time steps of data start_step = 0 # Start time step relative to steps_til_growth end_step = 60000 # End time step relative to steps_til_growth for arg in range(0, num_args - 1, 2): option = args[arg] value = args[arg + 1] if option == '-c' or option == '--cons-file': cons_file = shared.openFile(value, 'r') elif option == '-f' or option == '--figure-name': figure_fname = value elif option == '-i' or option == '--image-format': image_format = value elif option == '-w' or option == '--image-width': image_width = shared.toInt(value) elif option == '-h' or option == '--image-height': image_height = shared.toInt(value) elif option == '-G' or option == '--steps-til-growth': steps_til_growth = shared.toInt(value) elif option == '-S' or option == '--steps-to-split': steps_to_split = shared.toInt(value) elif option == '-n' or option == '--initial-width': initial_width = shared.toInt(value) elif option == '-g' or option == '--granularity': granularity = shared.toInt(value) elif option == '-s' or option == '--start-step': start_step = shared.toInt(value) elif option == '-e' or option == '--end-step': end_step = shared.toInt(value) elif option == '-h' or option == '--help': usage() else: usage() if cons_file is None: # The concentrations file is required usage() print 'Parsing concentrations file...' raw_data = [line.split() for line in cons_file] # Split the data into lines and split each line by spaces into an array cons_file.close() print 'Converting data to the appropriate sizes...' # Take the width and height from the first line of the file psm_width = shared.toInt(raw_data[0][0]) psm_height = shared.toInt(raw_data[0][1]) raw_data = raw_data[1 + steps_til_growth:] # Remove all data before growth starts # Adjust step sizes for the given granularity steps_til_growth /= granularity steps_to_split /= granularity data = [] # Like raw data, but takes only each <granularity> time steps of data and removes the time steps column for line in range(len(raw_data)): if line % granularity == 0: data.append(raw_data[line]) total_steps = len(data) for row in range(total_steps): data[row] = data[row][1:] # Remove the time steps column steps_when_full = (psm_width - initial_width) * steps_to_split # When the PSM is done growing total_width = psm_width + (total_steps - steps_when_full) / steps_to_split # The width of every cell that exists at any point table = [[0 for i in range(total_steps)] for j in range(total_width)] # A table containing the data formatted more closely to what the figure requires print 'Accounting for cell growth and averaging cell columns...' min_con = float('inf') max_con = 0 # Fill in the table with all data from when the PSM is growing current_width = initial_width row_start = current_width - 1 steps_elapsed = 0 for column in range(steps_when_full): for row in range(current_width): avg_con = 0 cell_x = row_start - row # Posterior cells should be printed on the right for cell_y in range(psm_height): # Average each column of cells cell_index = cell_y * psm_width + cell_x avg_con += shared.toFlo(data[column][cell_index]) avg_con /= psm_height table[row][column] = avg_con # Update the minimum and maximum concentrations min_con = min(min_con, avg_con) max_con = max(max_con, avg_con) for row in range(current_width, total_width): # Nonexistent cells get concentrations of 0 table[row][column] = -10 steps_elapsed += 1 if steps_elapsed == steps_to_split: # Split the PSM every steps_to_split time steps current_width += 1 row_start += 1 # Adjust because the first cell in data is the new, posterior-most cell steps_elapsed = 0 for column in range(steps_when_full, total_steps): for row in range(current_width, total_width): table[row][column] = -10 # Fill in the table with all data from when the PSM is done growing arrested_cells = [] row_start = psm_width - 1 row_offset = 0 for column in range(steps_when_full, total_steps): for row in range(psm_width): avg_con = 0 cell_x = (row_start - row) % psm_width # Posterior cells should be printed on the right for cell_y in range(psm_height): # Average each column of cells cell_index = cell_y * psm_width + cell_x avg_con += shared.toFlo(data[column][cell_index]) avg_con /= psm_height table[row + row_offset][column] = avg_con # Update the minimum and maximum concentrations min_con = min(min_con, avg_con) max_con = max(max_con, avg_con) for cell in arrested_cells: # Print the last value each arrested cell had for the rest of time table[cell[0]][column] = -10 steps_elapsed += 1 if steps_elapsed == steps_to_split: # Split the PSM every steps_to_split time steps arrested_cells.append((row_offset, table[row_offset][column])) row_offset += 1 steps_elapsed = 0 max_con += 1 print 'Cropping to the specified time range...' start_step /= granularity end_step /= granularity total_steps = end_step - start_step for row in range(total_width): table[row] = table[row][start_step:end_step] print 'Creating a blank image...' im = Image.new('RGB', (image_width, image_height), rgb('FFFFFF')) # Make an image with a blank, white canvas draw = ImageDraw.Draw(im) # Get the drawing object print 'Filling the image with the concentrations...' # Find the factors to scale the table data into an image_width by image_height sized figure x_factor = shared.toFlo(total_steps) / image_width y_factor = shared.toFlo(total_width) / image_height # Darker shades indicate higher concentrations shades = [rgb('FEB4EF'), rgb('FEB4EF'), rgb('FE5A77'), rgb('FE2D3B'), rgb('FF0000'), rgb('BF0000'), rgb('7F0000'), rgb('3F0000'), rgb('000000'), rgb('FFFFFF')] num_shades = len(shades) for i in range(image_width): x = shared.toInt(i * x_factor) for j in range(image_height): reverse_j = image_height - j - 1 # In the figure, cell 0 is at the bottom, not top y = shared.toInt(reverse_j * y_factor) con = table[y][x] if con == -10: color = rgb('EEE5DE') else: color = shades[int((con - min_con) / (max_con - min_con) * (num_shades - 1))] # Find the color matching the concentration draw.point((i, j), fill = color) print 'Saving the image...' figure_fname_full = figure_fname + '.' + image_format.lower() im.save(figure_fname_full, image_format.upper()) print 'Done. Your figure is stored in ' + figure_fname_full
def main(): # check the given arguments if len(sys.argv) < 6: usage() elif len(sys.argv) == 7: if sys.argv[1] == "-c" or sys.argv[1] == "--no-color": shared.terminalRed = "" shared.terminalReset = "" filename = sys.argv[2] filename2 = sys.argv[3] directory = sys.argv[4] measuring = sys.argv[5] mutation = sys.argv[6] else: usage() else: filename = sys.argv[1] directory = sys.argv[2] measuring = sys.argv[3] mutation = sys.argv[4] # open the input file and ensure the directory exists f = shared.openFile(filename, "r") f2 = shared.openFile(filename2, "r") directory = shared.ensureDir(directory) # split the lines to get data data = [line.split() for line in f] file_len = len(data) - 1 max_x = file_len f.close() data2 = [line.split() for line in f2] file_len2 = len(data2) - 1 max_x2 = file_len2 f2.close() if (max_x == max_x2): print "test" # number of columns we have in the files cn = shared.toInt(data[0][0]) * shared.toInt(data[0][1]) + 1 cn2 = shared.toInt(data2[0][0]) * shared.toInt(data2[0][1]) + 1 # create matrices to store the data we obtained from the files m2p = numpy.zeros(shape=(max_x, cn + cn2)) # put the data coming from the files to the matrix for i in range(2, file_len): for j in range(0, cn + cn2): if (j < cn): m2p[i][j] = shared.toFlo(data[i][j]) elif (j == cn): print data2[i][j - cn] else: m2p[i][j] = 2 * shared.toFlo(data2[i][j - cn]) # plot colors colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k'] color = 0 for i in range(1, cn + cn2): if (i % 4 == 0): pl.plot(m2p[0:max_x, 0], m2p[0:max_x, i], 'r') elif (i % 4 == 1): pl.plot(m2p[0:max_x, 0], m2p[0:max_x, i], 'g') elif (i % 4 == 2): pl.plot(m2p[0:max_x, 0], m2p[0:max_x, i], 'b') else: pl.plot(m2p[0:max_x, 0], m2p[0:max_x, i], 'c') pl.title(measuring + " " + mutation + " All Cells") pl.savefig(directory + "/" + mutation + "_all.png", format="png") pl.close() # plot the data average = [] for i in range(0, max_x): average.append(float(sum(m2p[i][1:])) / float(len(m2p[i][1:]))) pl.plot(m2p[0:max_x, 0], average, colors[color]) if color == len(colors) - 1: color = 0 else: color += 1 pl.title(measuring + " " + mutation + " Average") pl.savefig(directory + "/" + mutation + "_avg.png", format="png") pl.close()
def main(): # check the given arguments if len(sys.argv) < 6: usage() else: folder = sys.argv[1] parsets = shared.toInt(sys.argv[2]) ofolder = sys.argv[3] image_name = sys.argv[4] excel_name = sys.argv[5] mutants = ["wildtype", "delta", "her1", "her7", "her7her13", "her13"] markers = ['o', '^', 's', '*', 'h', 'D'] colors = ['k', 'b', 'g', 'r', 'c', 'm'] num_mutants = 6 # Create excel file in which the data used to create the plots will be stored excel_file = shared.openFile(ofolder + "/" + excel_name + "-sync.csv", "w") for index in range(num_mutants): mutant = mutants[index] marker = markers[index] color = colors[index] # open the first file to get the height, width and interval f = shared.openFile(folder + "/" + mutant + "/set_0_sync_mh1.feats", "r") # split the lines to get data data = [line.split(",") for line in f] # calculate the tissue size height = shared.toInt(data[0][0]) interval = shared.toFlo(data[0][1]) #split_time = shared.toFlo(data[0][2]) width = len(data[1]) - 1 indexes = [0 for i in range(width)] averages = [0 for i in range(width)] stderr = [0 for i in range(width)] for parset in range(parsets): f = shared.openFile(folder + "/" + mutant + "/set_" + str(parset) + "_sync_mh1.feats", "r") # split the lines to get data data = [line.split(",") for line in f] for col in range(width): for line in range(1, height + 1): averages[col] += shared.toFlo(data[line][col]) f.close() for col in range(width): indexes[col] = (((interval / 2) * col + (interval / 2) * col + interval) / 2) / 6 averages[col] /= height * parsets if mutant == "wildtype": excel_file.write("mutant,") for index in indexes: excel_file.write(str(index) + ",") excel_file.write("\n") for parset in range(parsets): f = shared.openFile(folder + "/" + mutant + "/set_" + str(parset) + "_sync_mh1.feats", "r") data = [line.split(",") for line in f] # std error = std deviation / sqrt(num data points) for col in range(width): for line in range(1, height + 1): stderr[col] += (shared.toFlo(data[line][col]) - averages[col]) ** 2 stderr[col] = math.sqrt(stderr[col] / (height * parsets)) stderr[col] /= math.sqrt(height * parsets) # Print the means and standard deviations to the excel_file excel_file.write(mutant + ",") for average in averages: excel_file.write(str(average) + ",") excel_file.write("\n,") for stder in stderr: excel_file.write(str(stder) + ",") excel_file.write("\n") plt.errorbar(indexes, averages, stderr, fmt='ro', linestyle='-', marker=marker, color=color, label=mutant) plt.legend(prop={'size':8}, loc=3) pylab.xlim([0, (width + 1) * (interval / 2) / 6]) plt.savefig(ofolder + "/" + image_name + ".png", format = "png") plt.close()
def main(): # Default argument values. global Y_BOUNDS global JOB_NAME global PARAM_NAMES nodes = 1 feature = -1 points = 4 percent = 20 file_out = "sensitivity_graphs" nominal_file = "../sensitivity-analysis/nominal.params" data_dir = "../sensitivity-analysis/sense-for-plot" image_dir = "plots" perturb_file = "../simulation/input.perturb" nominal_count = 1 ex_path = "../sensitivity-analysis/sensitivity" sim_path = "../simulation/simulation" ppn = "2" graph = False elasticity = False sim_args = " " additional_args = " " cname = None #Check the commmand line input: if len(sys.argv) < 2 or ("-h" in sys.argv) or ("--help" in sys.argv): usage() ishift = 0 for i in range(1, len(sys.argv), 2): i += ishift if i + 1 >= len(sys.argv): usage() #elif sys.argv[i] == "-j" or sys.argv[i] == "--job-name": # global JOB_NAME = sys.argv[i+1] elif sys.argv[i] == "-n" or sys.argv[i] == "--nominal-file": nominal_file = sys.argv[i + 1] elif sys.argv[i] == "-c" or sys.argv[i] == "--nominal-count": nominal_count = shared.toInt(sys.argv[i + 1]) elif sys.argv[i] == "-p" or sys.argv[i] == "--percent": percent = shared.toInt(sys.argv[i + 1]) elif sys.argv[i] == "-P" or sys.argv[i] == "--Points": points = shared.toInt(sys.argv[i + 1]) elif sys.argv[i] == "-l" or sys.argv[i] == "--ppn": ppn = sys.argv[i + 1] elif sys.argv[i] == "-N" or sys.argv[i] == "--nodes": nodes = shared.toInt(sys.argv[i + 1]) elif sys.argv[i] == "-f" or sys.argv[i] == "--feature": feature = shared.toInt(sys.argv[i + 1]) elif sys.argv[i] == "-e" or sys.argv[i] == "--exec": ex_path = sys.argv[i + 1] elif sys.argv[i] == "-s" or sys.argv[i] == "--sim": sim_path = sys.argv[i + 1] elif sys.argv[i] == "-o" or sys.argv[i] == "--output": file_out = sys.argv[i + 1] elif sys.argv[i] == "-d" or sys.argv[i] == "--dir": image_dir = sys.argv[i + 1] elif sys.argv[i] == "-D" or sys.argv[i] == "--data-dir": data_dir = sys.argv[i + 1] elif sys.argv[i] == "-j" or sys.argv[i] == "--job-name": JOB_NAME = sys.argv[i + 1] elif sys.argv[i] == "-C" or sys.argv[i] == "--cluster-name": cname = sys.argv[i + 1] elif sys.argv[i] == "--ymin": val = float(sys.argv[i + 1]) if Y_BOUNDS == None: Y_BOUNDS = (val, max(1.5, 2 * val)) else: Y_BOUNDS[0] = val elif sys.argv[i] == "--ymax": val = float(sys.argv[i + 1]) if Y_BOUNDS == None: Y_BOUNDS = (min(0, 2 * val), val) else: Y_BOUNDS[1] = val elif sys.argv[i] == "-E" or sys.argv[i] == "--elasticity": elasticity = True ishift = -1 elif sys.argv[i] == "-g" or sys.argv[i] == "--graph": graph = True ishift = -1 elif sys.argv[i] == "-a" or sys.argv[i] == "--args": for a in sys.argv[i + 1:]: additional_args += " " + a + " " break #Ensure that the necessary directories exist -- if not, make them. shared.ensureDir(data_dir) shared.ensureDir(image_dir) #Additional args is a string that that is attached to the final arguments sent to the sensitivity analysis program. additional_args = " -p " + str(percent) + " -P " + str( points) + " " + additional_args #Depending on whether elasticity is chosen, either create sensitivity bar graphs or scatter-line plots. if (not elasticity): #This statement checks to see if simulations actually need to be run. This is mostly true, but if all the data has been created already then '-g' will cause the script to skip to the plotting. if (not graph): #dispatch_jobs takes care of running the program locally or making the pbs jobs. dispatch_jobs(nodes, file_out, nominal_file, data_dir, image_dir, perturb_file, nominal_count, 0, ex_path, sim_path, ppn, sim_args, None, additional_args, cname) print "\t~ Done with runs ~" #Once the data has been collected, load it in and make the graphs. print "\t ~ Generating graphs ~ " #Load all of the data from the sensitivity results. #This uses "/normalized_[number]" as the file name because that is how it's set in sensitivity-analysis/init.hpp. #The struct input_params has two strings, norm_file and sense_file, that determine the names of specific files to load. #These could be specified more generally by making a new commandline argument for the sensitivity executible, but this has not seemed necessary because there is already so much customization of the directies these files end up in. data = [] names = [] for i in range(nominal_count): temp_data, names = parse_files(data_dir + "/normalized_" + str(i)) data.append(temp_data) #If just one feature is specified, this makes just one graph. Otherwise it loops through all features and makes a graph for each. bar_data = [ ] #This holds onto the data that was actually plotted, i.e. average sensitivity values for each parameter. bar_error = [] #This holds onto the standard error for each parameter. if feature > 0: temp_sense, temp_error = sense_bar(data, image_dir, feature, feat_name=names[feature]) bar_data.append(temp_sense) bar_error.append(temp_error) else: sys.stdout.write("Done with normalized graphs: ") sys.stdout.flush() for i in range(len(data[0][0])): temp_sense, temp_error = sense_bar(data, image_dir, i, feat_name=names[i]) bar_data.append(temp_sense) bar_error.append(temp_error) sys.stdout.write(str(i) + "... ") sys.stdout.flush() #Write out the bar graph data to file write_bar_data(bar_data, bar_error, data_dir + "/bar_graph_data_normalized.csv", ynames=names, xnames=PARAM_NAMES) #Abosulte sensitivity graphs #Similarly, this uses "/LSA_[number]" as the file name because that is how it's set in sensitivity-analysis/init.hpp. data = [] names = [] for i in range(nominal_count): temp_data, names = parse_files(data_dir + "/LSA_" + str(i)) data.append(temp_data) #If just one feature is specified, this makes just one graph. Otherwise it loops through all features and makes a graph for each. bar_data = [ ] #This holds onto the data that was actually plotted, i.e. average sensitivity values for each parameter. bar_error = [] #This holds onto the standard error for each parameter. if feature > 0: temp_sense, temp_error = sense_bar(data, image_dir, feature, feat_name=names[feature], normal=False) bar_data.append(temp_sense) bar_error.append(temp_error) else: sys.stdout.write("Done with absolute graphs: ") sys.stdout.flush() for i in range(len(data[0][0])): temp_sense, temp_error = sense_bar(data, image_dir, i, feat_name=names[i], normal=False) bar_data.append(temp_sense) bar_error.append(temp_error) sys.stdout.write(str(i) + "... ") sys.stdout.flush() #Write out the bar graph data to file write_bar_data(bar_data, bar_error, data_dir + "/bar_graph_data_absolute.csv", ynames=names, xnames=PARAM_NAMES) #If the elasticity option was included, the following code makes scatter plots of the oscillation features data at different perturbations of each nominal parameter. else: #This adds a commandline argument that is passed to the sensitivity analysis program to tell it to gather the data without caluclating the sensitivity. additional_args = " --generate-only " + additional_args #Note that for the elasticity/scatter-line plots each instance of sensitivity used to gather the data is given only one parameter set to ensure data files will be unique (and not get overwritten). This makes it slower than the sensitivity graphs. print "\n\t ~ Elasticity data collection ~ " data = [ ] # this will be a four dimensional list indexed by: data[which nominal set][which parameter][which perturbation amount][which oscillation feature value] names = [] nominal = [ ] # this will be a three dimensional list indexed by: nominal[which nominal set][0][which oscillation feature], the middle index is zero because there is only one parameter set in the nominal features file. #This loop runs if the data needs to be collected. There are some unintuitive additions that are used to keep track of how many jobs should be sent out the index of which nominal parameter set to use. if not graph: disp = 0 # a counter used to keep track of how many jobs to dispatch. raw_data_dirs = [ ] #A list that gets filled with file name strings which get passed as arguments to the sensitivity program. for c in range(0, nominal_count): raw_data_dirs.append(data_dir + "/elastic_data_" + str(c)) disp += 1 if disp == nodes or c == nominal_count - 1: dispatch_jobs(disp, file_out, nominal_file, data_dir, image_dir, perturb_file, disp, c - disp + 1, ex_path, sim_path, ppn, sim_args, raw_data_dirs, additional_args, cname) raw_data_dirs = [] disp = 0 #Now that the data files exist, load them and parse them into the appropriate arrays. #The "/dim_[number]" and "/nominal_0" strings are the file names that the sensitivity analysis program uses to distinguish output features files. #Modifying these file names would require changing nom_file and dim_file in the constructor of input_params in sensitivity-analysis/init.hpp. #This loop is similar to the above, but simpler -- for every nominal parameter set it opens its /elastic_data_[number] directory, parses the files in it and stores the data in data[number] and nominal[number] for c in range(0, nominal_count): data.append([]) for d in range(44): temp_data, names = parse_files(data_dir + "/elastic_data_" + str(c) + "/dim_" + str(d)) data[c].append(temp_data) temp_data, names = parse_files(data_dir + "/elastic_data_" + str(c) + "/nominal_0") nominal.append(temp_data) #data[] and nominal[] should have everything we need for the graphs now, so plot them. print "\n\t ~ Elasticity graphing ~ " sys.stdout.write("Done with parameter: ") sys.stdout.flush() #Loop through each parameter for p in range(len(data[0])): #Loop through each feature for f in range(len(data[0][0][0])): #Plot! line_plot(data, nominal, p, f, names[f], PARAM_NAMES[p], image_dir, percent, points) sys.stdout.write(str(p) + "...") sys.stdout.flush() print "\n\t ~ Graphs complete ~ " return
def main(): # check the given arguments if len(sys.argv) < 6: usage() else: folder = sys.argv[1] parsets = shared.toInt(sys.argv[2]) ofolder = sys.argv[3] image_name = sys.argv[4] excel_name = sys.argv[5] mutants = ["wildtype", "delta", "her1", "her7", "her7her13", "her13"] markers = ['o', '^', 's', '*', 'h', 'D'] colors = ['k', 'b', 'g', 'r', 'c', 'm'] num_mutants = 6 # Create excel file in which the data used to create the plots will be stored excel_file = shared.openFile(ofolder + "/" + excel_name + "-sync.csv", "w") for index in range(num_mutants): mutant = mutants[index] marker = markers[index] color = colors[index] # open the first file to get the height, width and interval f = shared.openFile(folder + "/" + mutant + "/set_0_sync_mh1.feats", "r") # split the lines to get data data = [line.split(",") for line in f] # calculate the tissue size height = shared.toInt(data[0][0]) interval = shared.toFlo(data[0][1]) #split_time = shared.toFlo(data[0][2]) width = len(data[1]) - 1 indexes = [0 for i in range(width)] averages = [0 for i in range(width)] stderr = [0 for i in range(width)] for parset in range(parsets): f = shared.openFile( folder + "/" + mutant + "/set_" + str(parset) + "_sync_mh1.feats", "r") # split the lines to get data data = [line.split(",") for line in f] for col in range(width): for line in range(1, height + 1): averages[col] += shared.toFlo(data[line][col]) f.close() for col in range(width): indexes[col] = (((interval / 2) * col + (interval / 2) * col + interval) / 2) / 6 averages[col] /= height * parsets if mutant == "wildtype": excel_file.write("mutant,") for index in indexes: excel_file.write(str(index) + ",") excel_file.write("\n") for parset in range(parsets): f = shared.openFile( folder + "/" + mutant + "/set_" + str(parset) + "_sync_mh1.feats", "r") data = [line.split(",") for line in f] # std error = std deviation / sqrt(num data points) for col in range(width): for line in range(1, height + 1): stderr[col] += (shared.toFlo(data[line][col]) - averages[col])**2 stderr[col] = math.sqrt(stderr[col] / (height * parsets)) stderr[col] /= math.sqrt(height * parsets) # Print the means and standard deviations to the excel_file excel_file.write(mutant + ",") for average in averages: excel_file.write(str(average) + ",") excel_file.write("\n,") for stder in stderr: excel_file.write(str(stder) + ",") excel_file.write("\n") plt.errorbar(indexes, averages, stderr, fmt='ro', linestyle='-', marker=marker, color=color, label=mutant) plt.legend(prop={'size': 8}, loc=3) pylab.xlim([0, (width + 1) * (interval / 2) / 6]) plt.savefig(ofolder + "/" + image_name + ".png", format="png") plt.close()
def main(): print 'Reading command-line arguments...' args = sys.argv[1:] if len(args) == 3: cons_fname1 = args[0] cons_fname2 = args[1] directory = args[2] else: usage() print 'Reading concentrations file 1...' min_con1 = float('inf') max_con1 = 0 cons_data1 = [] if cons_fname1.endswith('.cons'): # Read ASCII file cons_file1 = shared.openFile(cons_fname1, 'r') width, height = map( lambda num: shared.toInt(num), cons_file1.readline().split( ' ')) # The first line contains the width and height checkSize(width, height) for line in cons_file1: cons = map( lambda num: shared.toFlo(num), line.split(' ')[1:-1] ) # Remove the time step column and newline when taking the concentrations for con in cons: min_con1 = min(min_con1, con) max_con1 = max(max_con1, con) cons_data1.append(cons) elif cons_fname1.endswith('.bcons'): # Read binary file cons_file1 = shared.openFile(cons_fname1, 'rb') # Read the file as a binary # The first two ints are the width and height width, = struct.unpack('i', cons_file1.read(4)) height, = struct.unpack('i', cons_file1.read(4)) checkSize(width, height) size = width * height cons1 = [] cons_length1 = 0 while True: con_str1 = cons_file1.read(4) if con_str1 == '': # While not EOF break else: # There are width * height concentration floats per time step con, = struct.unpack('f', con_str1) min_con1 = min(min_con1, con) max_con1 = max(max_con1, con) cons1.append(con) cons_length1 += 1 if cons_length1 == height: cons_data1.append(cons) cons1 = [] else: usage() print 'Reading concentrations file 2...' min_con2 = float('inf') max_con2 = 0 cons_data2 = [] if cons_fname2.endswith('.cons'): # Read ASCII file cons_file2 = shared.openFile(cons_fname2, 'r') width, height = map( lambda num: shared.toInt(num), cons_file2.readline().split( ' ')) # The first line contains the width and height checkSize(width, height) for line in cons_file2: cons = map( lambda num: shared.toFlo(num), line.split(' ')[1:-1] ) # Remove the time step column and newline when taking the concentrations for con in cons: min_con2 = min(min_con2, con) max_con2 = max(max_con2, con) cons_data2.append(cons) elif cons_fname2.endswith('.bcons'): # Read binary file cons_file2 = shared.openFile(cons_fname2, 'rb') # Read the file as a binary # The first two ints are the width and height width, = struct.unpack('i', cons_file2.read(4)) height, = struct.unpack('i', cons_file2.read(4)) checkSize(width, height) size = width * height cons2 = [] cons_length2 = 0 while True: con_str2 = cons_file2.read(4) if con_str2 == '': # While not EOF break else: # There are width * height concentration floats per time step con, = struct.unpack('f', con_str2) min_con2 = min(min_con2, con) max_con2 = max(max_con2, con) cons2.append(con) cons_length2 += 1 if cons_length2 == height: cons_data2.append(cons) cons2 = [] else: usage() print 'Creating the directory if necessary...' directory = shared.ensureDir(directory) if (directory[-1] != '/'): directory = directory + '/' cons_data = combine_cons(cons_data1, cons_data2, max_con1, min_con1, max_con2, min_con2) print 'Creating snapshots...' edge, size = findSizes( width, height ) # Configure the hexagon edge and window size based on the grid size index = 0 for line in cons_data: if (index % 10 == 0 and index >= 21000): plotHexagons(directory, size, index, line, edge, width, height) index += 1 print 'Done. Your snapshots are stored in ' + directory
def main(): print 'Reading command-line arguments...' args = sys.argv[1:] # Remove the name of the program from the arguments num_args = len(args) req_args = [False] * 3 # If every required argument was given then req_args will be all true if num_args >= 3: # Arguments with default values stdevs_away = 2 round_to = 5 for arg in range(0, num_args - 1, 2): option = args[arg] value = args[arg + 1] if option == '-s' or option == '--sets': sets_file = shared.openFile(value, 'r') req_args[0] = True elif option == '-c' or option == '--current-ranges': cur_ranges_file = shared.openFile(value, 'r') req_args[1] = True elif option == '-n' or option == '--new-ranges': new_ranges_fname = value new_ranges_file = shared.openFile(new_ranges_fname, 'w') req_args[2] = True elif option == '-d' or option == '--standard-dev': stdevs_away = shared.toInt(value) elif option == '-r' or option == '--round-to': round_to = shared.toInt(value) elif option == '-h' or option == '--help': usage() else: usage() for arg in req_args: # Check to ensure every required argument was entered if not arg: usage() else: usage() print 'Reading the parameter sets file...' # Parse the sets file to get the list of parameter sets sets = [] for line in sets_file: if not(line == '' or line[0] == '#'): # Ignore blank lines and comments sets.append(line) if len(sets) < 1: # Ensure at least one set was given usage() print 'Reading the current ranges file...' # Parse the current ranges file to find the existing ranges par_names = [] cur_ranges = [] for line in cur_ranges_file: line = line.replace('\t', ' ') if not(line == '' or line[0] == '#'): # Ignore blank lines and comments # Get the human-readable description space = line.find(' ') if space <= 0: parsing_error() par_names.append(line[: space]) line = line[space + 1:] # Skip past the description # Find the range bounds start = 0 if line[start] == '\0': parsing_error() while line[start] == ' ': start += 1 if line[start] != '[': parsing_error() end = start + 1 while line[end] != ']' and line[end] != '\0': end += 1 if line[end] == '\0': parsing_error() line = line[start + 1 : end] bounds = map(shared.toFlo, line.split(',')) # Convert the bounds to floats if len(bounds) != 2: parsing_error() cur_ranges.append(bounds) print 'Calculating new ranges...' # Calculate each parameter's new range flo_sets = map(lambda ls: map(shared.toFlo, ls), map(lambda s: s.split(','), sets)) # Convert each parameter set string into an array of floats num_sets = len(flo_sets) new_ranges = [] for p in range(len(cur_ranges)): # For every range # Get the mean range based on every set vals = [] for s in flo_sets: vals.append(s[p]) mean = sum(vals) / num_sets # Calculate the standard deviation from the mean stdev_sum = 0 for f in vals: stdev_sum += (f - mean) ** 2 stdev = math.sqrt(stdev_sum / num_sets) # Define new ranges based on the mean and standard deviation that are at least as narrow as the current ranges lower_bound = max(cur_ranges[p][0], round(mean - stdev * stdevs_away, round_to)) upper_bound = min(cur_ranges[p][1], round(mean + stdev * stdevs_away, round_to)) new_ranges.append([lower_bound, upper_bound]) print 'Writing the new ranges to the specified output file...' # Write the parameter ranges to the new ranges file for r in range(len(new_ranges)): new_ranges_file.write(par_names[r] + ' [' + str(new_ranges[r][0]) + ',' + str(new_ranges[r][1]) + ']\n') new_ranges_file.close() print 'Done. The new ranges are in ' + new_ranges_fname
def main(): print 'Reading command line arguments...' # check the given arguments if len(sys.argv) < 8: usage() else: folder = sys.argv[1] parsets = shared.toInt(sys.argv[2]) image_name = sys.argv[3] feature = sys.argv[4] ofolder = sys.argv[5] post_width = shared.toInt(sys.argv[6]) excel_name = sys.argv[7] num_mutants = 6 index = 0 mutants = ["wildtype", "delta", "her1", "her7", "her7her13", "her13"] markers = ['o', '^', 's', '*', 'h', 'D'] colors = ['k', 'b', 'g', 'r', 'c', 'm'] features = [] if (feature == "period" or feature == "amplitude"): features.append(feature) else: features.append("period") features.append("amplitude") for feat in features: # Create excel file in which the data used to create the plots will be stored excel_file = shared.openFile( ofolder + "/" + excel_name + "-" + feat + ".csv", "w") print "Plotting ", feat, "..." first_avg = 0 num_first = 0 for index in range(num_mutants): mutant = mutants[index] print ' Running ' + mutant + '...' marker = markers[index] color = colors[index] # open the input file f = shared.openFile( folder + "/" + mutant + "/set_0_" + feat + "_mh1.feats", "r") # split the lines to get data data = [line.split(",") for line in f] # calculate the tissue size height = shared.toInt(data[0][0]) width = shared.toInt(data[0][1]) xmin = 0 xmax = 0.9 * width buckets = 9 # split the interval into 9 chunks chunk = (width - post_width) / ( buckets - 1) # the width of the intervals after the posterior indexes = [0 for i in range(buckets)] for bucket in range(buckets): if bucket == 0: indexes[bucket] = post_width / 2 else: indexes[bucket] = (post_width + (bucket - 1) * chunk) + (chunk / 2.0) averages = [0 for i in range(buckets)] num_points = [0 for i in range(buckets)] stderr = [0 for i in range(buckets)] if mutant == "wildtype": excel_file.write("mutant,") for index in indexes: excel_file.write(str(index) + ",") excel_file.write("\n") print ' Averaging the first bucket for the wildtype...' # all other data points will be averaged to this value for parset in range(parsets): # open the input file and ensure the directory exists f = shared.openFile( folder + "/" + mutant + "/set_" + str(parset) + "_" + feat + "_mh1.feats", "r") # split the lines to get data data = [line.split(",") for line in f] lines = len(data) for line in range(1, lines, 2): for col in range(len(data[line]) - 1): pos = shared.toInt(data[line][col]) val = shared.toFlo(data[line + 1][col]) if pos < post_width: first_avg += val num_first += 1 first_avg /= num_first for parset in range(parsets): print ' Normalizing and analyzing data from set ' + str( parset) + '...' # open the input file and ensure the directory exists f = shared.openFile( folder + "/" + mutant + "/set_" + str(parset) + "_" + feat + "_mh1.feats", "r") # split the lines to get data data = [line.split(",") for line in f] lines = len(data) for line in range(1, lines, 2): for col in range(len(data[line]) - 1): pos = shared.toInt(data[line][col]) val = shared.toFlo(data[line + 1][col]) / first_avg if pos < post_width: averages[0] += val num_points[0] += 1 else: averages[(pos - post_width) / chunk + 1] += val num_points[(pos - post_width) / chunk + 1] += 1 # ignore the buckets which don't have data buckets_with_data = buckets for bucket in range(buckets): if post_width + ( (bucket - 1) * chunk) + chunk - 1 > (0.9 * width): buckets_with_data -= 1 else: if num_points[bucket] > 0: averages[bucket] /= num_points[bucket] elif feat == "amplitude": averages[bucket] = 0 else: buckets_with_data -= 1 buckets = buckets_with_data print ' Calculating standard error...' for parset in range(parsets): f = shared.openFile( folder + "/" + mutant + "/set_" + str(parset) + "_" + feat + "_mh1.feats", "r") data = [line.split(",") for line in f] lines = len(data) for line in range(1, lines, 2): for col in range(len(data[line]) - 1): pos = shared.toInt(data[line][col]) val = shared.toFlo(data[line + 1][col]) / first_avg if pos < post_width: stderr[0] += (val - averages[0])**2 else: stderr[(pos - post_width) / chunk + 1] += ( val - averages[(pos - post_width) / chunk + 1])**2 for bucket in range(buckets): if (num_points[bucket] > 0): stderr[bucket] = math.sqrt(stderr[bucket] / num_points[bucket]) stderr[bucket] /= math.sqrt(num_points[bucket]) else: stderr[bucket] = 0 indexes = indexes[:buckets] averages = averages[:buckets] stderr = stderr[:buckets] # Print the means and standard deviations to the excel_file excel_file.write(mutant + ",") for average in averages: excel_file.write(str(average) + ",") excel_file.write("\n,") for stder in stderr: excel_file.write(str(stder) + ",") excel_file.write("\n") plt.errorbar(indexes, averages, stderr, fmt='ro', linestyle='-', marker=marker, color=color, label=mutant) plt.legend(prop={'size': 8}, loc=2) pylab.xlim([xmin, xmax]) excel_file.close() plt.savefig(ofolder + "/" + image_name + "_" + feat + ".png", format="png") plt.close() print "Done. Your " + feat + " plot is stored in " + ofolder + "/" + image_name + "_" + feat + ".png" print "The data behind the plot can be found in " + ofolder + "/" + excel_name + "-" + feat + ".csv"
def main(): print "Reading command-line arguments..." args = sys.argv[1:] if len(args) == 3: cons_fname1 = args[0] cons_fname2 = args[1] directory = args[2] else: usage() print "Reading concentrations file 1..." min_con1 = float("inf") max_con1 = 0 cons_data1 = [] if cons_fname1.endswith(".cons"): # Read ASCII file cons_file1 = shared.openFile(cons_fname1, "r") width, height = map( lambda num: shared.toInt(num), cons_file1.readline().split(" ") ) # The first line contains the width and height checkSize(width, height) for line in cons_file1: cons = map( lambda num: shared.toFlo(num), line.split(" ")[1:-1] ) # Remove the time step column and newline when taking the concentrations for con in cons: min_con1 = min(min_con1, con) max_con1 = max(max_con1, con) cons_data1.append(cons) elif cons_fname1.endswith(".bcons"): # Read binary file cons_file1 = shared.openFile(cons_fname1, "rb") # Read the file as a binary # The first two ints are the width and height width, = struct.unpack("i", cons_file1.read(4)) height, = struct.unpack("i", cons_file1.read(4)) checkSize(width, height) size = width * height cons1 = [] cons_length1 = 0 while True: con_str1 = cons_file1.read(4) if con_str1 == "": # While not EOF break else: # There are width * height concentration floats per time step con, = struct.unpack("f", con_str1) min_con1 = min(min_con1, con) max_con1 = max(max_con1, con) cons1.append(con) cons_length1 += 1 if cons_length1 == height: cons_data1.append(cons) cons1 = [] else: usage() print "Reading concentrations file 2..." min_con2 = float("inf") max_con2 = 0 cons_data2 = [] if cons_fname2.endswith(".cons"): # Read ASCII file cons_file2 = shared.openFile(cons_fname2, "r") width, height = map( lambda num: shared.toInt(num), cons_file2.readline().split(" ") ) # The first line contains the width and height checkSize(width, height) for line in cons_file2: cons = map( lambda num: shared.toFlo(num), line.split(" ")[1:-1] ) # Remove the time step column and newline when taking the concentrations for con in cons: min_con2 = min(min_con2, con) max_con2 = max(max_con2, con) cons_data2.append(cons) elif cons_fname2.endswith(".bcons"): # Read binary file cons_file2 = shared.openFile(cons_fname2, "rb") # Read the file as a binary # The first two ints are the width and height width, = struct.unpack("i", cons_file2.read(4)) height, = struct.unpack("i", cons_file2.read(4)) checkSize(width, height) size = width * height cons2 = [] cons_length2 = 0 while True: con_str2 = cons_file2.read(4) if con_str2 == "": # While not EOF break else: # There are width * height concentration floats per time step con, = struct.unpack("f", con_str2) min_con2 = min(min_con2, con) max_con2 = max(max_con2, con) cons2.append(con) cons_length2 += 1 if cons_length2 == height: cons_data2.append(cons) cons2 = [] else: usage() print "Creating the directory if necessary..." directory = shared.ensureDir(directory) if directory[-1] != "/": directory = directory + "/" cons_data = combine_cons(cons_data1, cons_data2, max_con1, min_con1, max_con2, min_con2) print "Creating snapshots..." edge, size = findSizes(width, height) # Configure the hexagon edge and window size based on the grid size index = 0 for line in cons_data: if index % 10 == 0 and index >= 21000: plotHexagons(directory, size, index, line, edge, width, height) index += 1 print "Done. Your snapshots are stored in " + directory
def main(): #check the given arguments print "Reading command-line arguments..." args = sys.argv[1:] num_args = len(args) req_args = [False] * 6 num_seeds = 0 sim_arguments = "" if num_args >= 6: for arg in range(0, num_args - 1, 2): option = args[arg] value = args[arg + 1] if option == '-i' or option == '--input-file': ifile = value req_args[0] = True elif option == '-n' or option == '--num-params': num_params = shared.toInt(value) req_args[1] = True elif option == '-p' or option == '--pars-per-job': pars_per_job = shared.toInt(value) req_args[2] = True elif option == '-d' or option == '--directory': folder = value req_args[3] = True elif option == '-s' or option == '--simulation': simulation = value req_args[4] = True elif option == '-S' or option == '--seeds': num_seeds = int(value) req_args[5] = True elif option == '-a' or option == '--arguments': for a in range(arg + 1, num_args): sim_arguments += ' ' + args[a] break elif option == '-h' or option == '--help': usage() else: usage() for arg in req_args: if not arg: req_args usage() else: usage() index = 0 input_file = shared.openFile(ifile, "r") shared.ensureDir(folder) for parset in range(0, num_params, pars_per_job): params = shared.openFile(folder + "/input" + str(index) + ".params", "w") for line in range(pars_per_job): params.write(input_file.readline()) params.close() index += 1 for seeds in range(num_seeds): seed = (seeds + 1) * 1000 for parset in range(index): job = shared.openFile( folder + "/pbs-job-" + str(seed) + "-" + str(parset), 'w') job.write(''' #PBS -N robust-test #PBS -l nodes=1:ppn=1 #PBS -l mem=500mb #PBS -l file=300mb #PBS -q biomath #PBS -j oe #PBS -o ''' + folder + '''/output''' + str(seed) + "-" + str(parset) + '''.txt #PBS -l walltime=06:00:00 cd $PBS_O_WORKDIR ''' + simulation + ' ' + sim_arguments + ' -p ' + str(pars_per_job) + ' -i ' + ifile + ''' -s ''' + str(seed) + " -M 6 -E " + folder + "/scores-" + str(seed) + "-" + str(parset) + ".csv") job.close() subprocess.call( ["qsub", folder + "/pbs-job-" + str(seed) + "-" + str(parset)])
def main(): #check the given arguments print "Reading command-line arguments..." args = sys.argv[1:] num_args = len(args) req_args = [False] * 8 if num_args == 16: for arg in range(0, num_args - 1, 2): option = args[arg] value = args[arg + 1] if option == '-S' or option == '--seeds': num_seeds = shared.toInt(value) req_args[0] = True elif option == '-n' or option == '--num-params': num_sets = shared.toInt(value) req_args[1] = True elif option == '-o' or option == '--output-file': output_file = value req_args[2] = True elif option == '-f' or option == '--num-files': num_files = shared.toInt(value) req_args[3] = True elif option == '-d' or option == '--directory': folder = value req_args[4] = True elif option == '-m' or option == '--max-score': max_score = shared.toInt(value) req_args[5] = True elif option == '-i' or option == '--input-file': input_file = value req_args[6] = True elif option == '-t' or option == '--threshold': threshold = shared.toInt(value) req_args[7] = True elif option == '-h' or option == '--help': usage() else: usage() for arg in req_args: if not arg: usage() else: usage() parsets = [[0 for i in range(num_seeds)] for i in range(num_sets)] robust = open(output_file, "w") for seeds in range(num_seeds): seed = (seeds + 1) * 1000 parindex = 0 for i in range(0, num_files): scores = open(folder + "/scores-" + str(seed) + "-" + str(i) + ".csv") scores.readline() #skip the header line in the scores file for result in scores: line = result.split(",") if ( int(line[-2]) == max_score ): #the total score is the last column in the scores file, after the '\n' character parsets[parindex][seed / 1000] += 1 parindex += 1 num_robust = 0 pars = open(input_file, "r") for i in range(parindex): parset = pars.readline() if sum(parsets[i]) >= threshold: num_robust += 1 robust.write(parset) print num_robust, " sets passed in more than ", threshold, " seeds " robust.close()
def main(): #check the given arguments print "Reading command-line arguments..." args = sys.argv[1:] num_args = len(args) req_args = [False] * 8 if num_args == 16: for arg in range(0, num_args - 1, 2): option = args[arg] value = args[arg + 1] if option == '-S' or option == '--seeds': num_seeds = shared.toInt(value) req_args[0] = True elif option == '-n' or option == '--num-params': num_sets = shared.toInt(value) req_args[1] = True elif option == '-o' or option == '--output-file': output_file = value req_args[2] = True elif option == '-f' or option == '--num-files': num_files = shared.toInt(value) req_args[3] = True elif option == '-d' or option == '--directory': folder = value req_args[4] = True elif option == '-m' or option == '--max-score': max_score = shared.toInt(value) req_args[5] = True elif option == '-i' or option == '--input-file': input_file = value req_args[6] = True elif option == '-t' or option == '--threshold': threshold = shared.toInt(value) req_args[7] = True elif option == '-h' or option == '--help': usage() else: usage() for arg in req_args: if not arg: usage() else: usage() parsets = [[0 for i in range(num_seeds)] for i in range(num_sets)] robust = open(output_file, "w") for seeds in range(num_seeds): seed = (seeds + 1) * 1000 parindex = 0 for i in range(0, num_files): scores = open(folder + "/scores-" + str(seed) + "-" + str(i) + ".csv") scores.readline() #skip the header line in the scores file for result in scores: line = result.split(",") if (int(line[-2]) == max_score): #the total score is the last column in the scores file, after the '\n' character parsets[parindex][seed / 1000] += 1 parindex += 1 num_robust = 0 pars = open(input_file, "r") for i in range(parindex): parset = pars.readline() if sum(parsets[i]) >= threshold: num_robust += 1 robust.write(parset) print num_robust, " sets passed in more than ", threshold, " seeds " robust.close()
def main(): # check the given arguments if len(sys.argv) < 6: usage() else: f = shared.openFile(sys.argv[1], "r") directory = sys.argv[2] image_name = sys.argv[3] step_size = shared.toFlo(sys.argv[4]) plot_style= sys.argv[5] plot_helper1= int(shared.toFlo(sys.argv[6])) plot_helper2= int(shared.toFlo(sys.argv[7])) print 'Plotting all the cells from ' + sys.argv[1] + '...' # split the lines to get data data = [line.split() for line in f] max_time = len(data) - 1 # calculate the tissue size cells_width = shared.toInt(data[0][0]) cells_height = shared.toInt(data[0][1]) total_cells = cells_width * cells_height + 1 #print cells_width # create a matrix to store the concentration values we obtain from the file cons = numpy.zeros(shape = (max_time, total_cells)) cons_t = [0]*max_time # create array for row plotting pos = [0]*50 for i in range (0, 49): pos[i]=i time = [0]*max_time for i in range (1,max_time+1): time[i-1]=i*step_size # put the concentration values from the file into the matrix for i in range(1, max_time + 1): cons[i - 1][0] = shared.toFlo(data[i][0]) * step_size for j in range(1, total_cells): cons[i - 1][j] = shared.toFlo(data[i][j]) # close the file f.close() # plot colors colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k'] color = 0 # decide which row/column/cells to plot #plot_col = 1 if (plot_style == "col"): startpoint = plot_helper1 interval = cells_width elif (plot_style == "all"): startpoint =1 interval =1 elif (plot_style =="cell"): startpoint = plot_helper1 interval = total_cells elif (plot_style == "col_t"): startpoint = plot_helper1 interval= cells_width start_time = plot_helper2 if (plot_style!= "row" and plot_style!="col_t"): print "not in row" for j in range(0,plot_helper2): for i in range(startpoint+j, total_cells, interval): start = 0 # Adjust the plotting interval for each cell to account for different columns being staggered # as they enter the PSM at intervals of 6 minutes apart frome ach other while cons[start][i] == -1: # -1 stands for no data in the output file start += 1 end = max_time - 1 while cons[end][i] == -1: end -= 1; if (i % 4 == 0): pl.plot(cons[start:end, 0], cons[start:end, i], 'r') elif (i % 4 == 1): pl.plot(cons[start:end, 0], cons[start:end, i], 'g') elif (i % 4 == 2): pl.plot(cons[start:end, 0], cons[start:end, i], 'b') else: pl.plot(cons[start:end, 0], cons[start:end, i], 'c') elif (plot_style == "row"): print "in row" pl.plot(pos[0:49],cons[plot_helper1, 1:50],'r') pl.plot(pos[0:49],cons[plot_helper1, 51:100],'g') pl.plot(pos[0:49],cons[plot_helper1, 101:150],'b') pl.plot(pos[0:49],cons[plot_helper1, 151:200],'c') elif (plot_style == "col_t"): difference=0 if start_time > 29999: difference= start_time-29999 print "tracking column" if (startpoint <=9): if start_time<29999: timetodie= 24000 timetilappear=0 i=1 while i<=29999: #print i #print startpoint #print cons[timetilappear+i,startpoint] cons_t[timetilappear+i]=cons[timetilappear+i,startpoint] i+=1 timetilappear=29999 else: for i in range (start_time): cons_t[i]=0 timetilappear=start_time timetodie= (cells_width-1- startpoint)*600-1 elif (startpoint>9 and startpoint <=49): print ">9" timetodie= (cells_width-1- startpoint)*600 timetilappear= (startpoint +1-10)*600+29999+difference #print timetodie #print timetilappear for i in range (timetilappear): cons_t[i]=0 growth=1 i=1 while i<=timetodie: #print i #print growth #print startpoint #print cons[timetilappear+i,startpoint] if (timetilappear+i>= 90000): break if (startpoint>= 93000): break cons_t[timetilappear+i]=cons[timetilappear+i,startpoint] growth+=1 i+=1 if (growth %600==0 ): startpoint+=1 growth=1 pl.plot(time[0:max_time], cons_t, 'r') #new_array=[500,520,540,560,580,600,620,640,660,680,700,720,740,760,780,800,820,840] #pl.grid(True) #pl.axis([600, 700,0,1100]) #pl.xticks(new_array) pl.savefig(directory + "/" + image_name + ".png", format = "png") pl.close() print 'Done. Your plot is stored in ' + directory + "/" + image_name + ".png"
def main(): # Default argument values. global Y_BOUNDS global JOB_NAME global PARAM_NAMES nodes = 1 feature = -1 points = 4 percent = 20 file_out = "sensitivity_graphs" nominal_file = "../sensitivity-analysis/nominal.params" data_dir = "../sensitivity-analysis/sense-for-plot" image_dir = "plots" perturb_file = "../simulation/input.perturb" nominal_count = 1 ex_path = "../sensitivity-analysis/sensitivity" sim_path = "../simulation/simulation" ppn = "2" graph = False elasticity = False sim_args = " " additional_args = " " cname = None #Check the commmand line input: if len(sys.argv) < 2 or ("-h" in sys.argv) or ("--help" in sys.argv): usage() ishift = 0 for i in range(1, len(sys.argv), 2): i += ishift if i+1 >= len(sys.argv): usage() #elif sys.argv[i] == "-j" or sys.argv[i] == "--job-name": # global JOB_NAME = sys.argv[i+1] elif sys.argv[i] == "-n" or sys.argv[i] == "--nominal-file": nominal_file = sys.argv[i+1] elif sys.argv[i] == "-c" or sys.argv[i] == "--nominal-count": nominal_count = shared.toInt(sys.argv[i+1]) elif sys.argv[i] == "-p" or sys.argv[i] == "--percent": percent = shared.toInt(sys.argv[i+1]) elif sys.argv[i] == "-P" or sys.argv[i] == "--Points": points = shared.toInt(sys.argv[i+1]) elif sys.argv[i] == "-l" or sys.argv[i] == "--ppn": ppn = sys.argv[i+1] elif sys.argv[i] == "-N" or sys.argv[i] == "--nodes": nodes = shared.toInt(sys.argv[i+1]) elif sys.argv[i] == "-f" or sys.argv[i] == "--feature": feature = shared.toInt(sys.argv[i+1]) elif sys.argv[i] == "-e" or sys.argv[i] == "--exec": ex_path = sys.argv[i+1] elif sys.argv[i] == "-s" or sys.argv[i] == "--sim": sim_path = sys.argv[i+1] elif sys.argv[i] == "-o" or sys.argv[i] == "--output": file_out = sys.argv[i+1] elif sys.argv[i] == "-d" or sys.argv[i] == "--dir": image_dir = sys.argv[i+1] elif sys.argv[i] == "-D" or sys.argv[i] == "--data-dir": data_dir = sys.argv[i+1] elif sys.argv[i] == "-j" or sys.argv[i] == "--job-name": JOB_NAME = sys.argv[i+1] elif sys.argv[i] == "-C" or sys.argv[i] == "--cluster-name": cname = sys.argv[i+1] elif sys.argv[i] == "--ymin": val = float(sys.argv[i+1]) if Y_BOUNDS == None: Y_BOUNDS = (val , max(1.5, 2*val) ) else: Y_BOUNDS[0] = val elif sys.argv[i] == "--ymax": val = float(sys.argv[i+1]) if Y_BOUNDS == None: Y_BOUNDS = (min(0, 2*val) , val) else: Y_BOUNDS[1] = val elif sys.argv[i] == "-E" or sys.argv[i] == "--elasticity": elasticity = True ishift = -1 elif sys.argv[i] == "-g" or sys.argv[i] == "--graph": graph = True ishift = -1 elif sys.argv[i] == "-a" or sys.argv[i] == "--args": for a in sys.argv[i+1:]: additional_args += " " + a + " " break #Ensure that the necessary directories exist -- if not, make them. shared.ensureDir(data_dir) shared.ensureDir(image_dir) #Additional args is a string that that is attached to the final arguments sent to the sensitivity analysis program. additional_args = " -p " + str(percent) + " -P " + str(points) + " " + additional_args #Depending on whether elasticity is chosen, either create sensitivity bar graphs or scatter-line plots. if(not elasticity): #This statement checks to see if simulations actually need to be run. This is mostly true, but if all the data has been created already then '-g' will cause the script to skip to the plotting. if(not graph): #dispatch_jobs takes care of running the program locally or making the pbs jobs. dispatch_jobs(nodes, file_out, nominal_file , data_dir , image_dir , perturb_file, nominal_count, 0, ex_path, sim_path, ppn, sim_args , None, additional_args, cname) print "\t~ Done with runs ~" #Once the data has been collected, load it in and make the graphs. print "\t ~ Generating graphs ~ " #Load all of the data from the sensitivity results. #This uses "/normalized_[number]" as the file name because that is how it's set in sensitivity-analysis/init.hpp. #The struct input_params has two strings, norm_file and sense_file, that determine the names of specific files to load. #These could be specified more generally by making a new commandline argument for the sensitivity executible, but this has not seemed necessary because there is already so much customization of the directies these files end up in. data = [] names = [] for i in range(nominal_count): temp_data, names = parse_files(data_dir+"/normalized_"+str(i)) data.append(temp_data) #If just one feature is specified, this makes just one graph. Otherwise it loops through all features and makes a graph for each. bar_data = [] #This holds onto the data that was actually plotted, i.e. average sensitivity values for each parameter. bar_error = [] #This holds onto the standard error for each parameter. if feature > 0: temp_sense, temp_error = sense_bar(data, image_dir, feature, feat_name = names[feature]) bar_data.append(temp_sense) bar_error.append(temp_error) else: sys.stdout.write("Done with normalized graphs: ") sys.stdout.flush() for i in range(len(data[0][0])): temp_sense, temp_error = sense_bar(data, image_dir, i, feat_name = names[i]) bar_data.append(temp_sense) bar_error.append(temp_error) sys.stdout.write(str(i) + "... ") sys.stdout.flush() #Write out the bar graph data to file write_bar_data(bar_data, bar_error, data_dir+"/bar_graph_data_normalized.csv", ynames=names, xnames=PARAM_NAMES) #Abosulte sensitivity graphs #Similarly, this uses "/LSA_[number]" as the file name because that is how it's set in sensitivity-analysis/init.hpp. data = [] names = [] for i in range(nominal_count): temp_data, names = parse_files(data_dir+"/LSA_"+str(i)) data.append(temp_data) #If just one feature is specified, this makes just one graph. Otherwise it loops through all features and makes a graph for each. bar_data = [] #This holds onto the data that was actually plotted, i.e. average sensitivity values for each parameter. bar_error = [] #This holds onto the standard error for each parameter. if feature > 0: temp_sense, temp_error = sense_bar(data, image_dir, feature, feat_name = names[feature], normal=False) bar_data.append(temp_sense) bar_error.append(temp_error) else: sys.stdout.write("Done with absolute graphs: ") sys.stdout.flush() for i in range(len(data[0][0])): temp_sense, temp_error = sense_bar(data, image_dir, i, feat_name = names[i], normal=False) bar_data.append(temp_sense) bar_error.append(temp_error) sys.stdout.write(str(i) + "... ") sys.stdout.flush() #Write out the bar graph data to file write_bar_data(bar_data, bar_error, data_dir+"/bar_graph_data_absolute.csv", ynames=names, xnames=PARAM_NAMES) #If the elasticity option was included, the following code makes scatter plots of the oscillation features data at different perturbations of each nominal parameter. else: #This adds a commandline argument that is passed to the sensitivity analysis program to tell it to gather the data without caluclating the sensitivity. additional_args = " --generate-only " + additional_args #Note that for the elasticity/scatter-line plots each instance of sensitivity used to gather the data is given only one parameter set to ensure data files will be unique (and not get overwritten). This makes it slower than the sensitivity graphs. print "\n\t ~ Elasticity data collection ~ " data = [] # this will be a four dimensional list indexed by: data[which nominal set][which parameter][which perturbation amount][which oscillation feature value] names = [] nominal = [] # this will be a three dimensional list indexed by: nominal[which nominal set][0][which oscillation feature], the middle index is zero because there is only one parameter set in the nominal features file. #This loop runs if the data needs to be collected. There are some unintuitive additions that are used to keep track of how many jobs should be sent out the index of which nominal parameter set to use. if not graph: disp = 0 # a counter used to keep track of how many jobs to dispatch. raw_data_dirs = [] #A list that gets filled with file name strings which get passed as arguments to the sensitivity program. for c in range(0, nominal_count): raw_data_dirs.append(data_dir +"/elastic_data_" + str(c)) disp += 1 if disp == nodes or c == nominal_count - 1: dispatch_jobs(disp, file_out, nominal_file , data_dir , image_dir , perturb_file, disp, c-disp+1, ex_path, sim_path, ppn, sim_args , raw_data_dirs, additional_args, cname) raw_data_dirs = [] disp = 0 #Now that the data files exist, load them and parse them into the appropriate arrays. #The "/dim_[number]" and "/nominal_0" strings are the file names that the sensitivity analysis program uses to distinguish output features files. #Modifying these file names would require changing nom_file and dim_file in the constructor of input_params in sensitivity-analysis/init.hpp. #This loop is similar to the above, but simpler -- for every nominal parameter set it opens its /elastic_data_[number] directory, parses the files in it and stores the data in data[number] and nominal[number] for c in range(0, nominal_count): data.append([]) for d in range(44): temp_data, names = parse_files(data_dir +"/elastic_data_" + str(c) + "/dim_" + str(d)) data[c].append(temp_data) temp_data , names = parse_files(data_dir +"/elastic_data_" + str(c) + "/nominal_0") nominal.append(temp_data) #data[] and nominal[] should have everything we need for the graphs now, so plot them. print "\n\t ~ Elasticity graphing ~ " sys.stdout.write("Done with parameter: ") sys.stdout.flush() #Loop through each parameter for p in range(len(data[0])): #Loop through each feature for f in range(len(data[0][0][0])): #Plot! line_plot(data, nominal, p, f, names[f], PARAM_NAMES[p], image_dir, percent, points) sys.stdout.write(str(p) + "...") sys.stdout.flush() print "\n\t ~ Graphs complete ~ " return
def main(): print 'Reading command-line arguments...' args = sys.argv[1:] if len(args) == 2: cons_fname = args[0] directory = args[1] else: usage() print 'Reading the concentrations file...' min_con = float('inf') max_con = 0 cons_data = [] if cons_fname.endswith('.cons'): # Read ASCII file cons_file = shared.openFile(cons_fname, 'r') width, height = map(lambda num: shared.toInt(num), cons_file.readline().split(' ')) # The first line contains the width and height checkSize(width, height) for line in cons_file: cons = map(lambda num: shared.toFlo(num), line.split(' ')[1:-1]) # Remove the time step column and newline when taking the concentrations for con in cons: min_con = min(min_con, con) max_con = max(max_con, con) cons_data.append(cons) elif cons_fname.endswith('.bcons'): # Read binary file cons_file = shared.openFile(cons_fname, 'rb') # Read the file as a binary # The first two ints are the width and height width, = struct.unpack('i', cons_file.read(4)) height, = struct.unpack('i', cons_file.read(4)) checkSize(width, height) size = width * height cons = [] cons_length = 0 while True: con_str = cons_file.read(4) if con_str == '': # While not EOF break; else: # There are width * height concentration floats per time step con, = struct.unpack('f', con_str) min_con = min(min_con, con) max_con = max(max_con, con) cons.append(con) cons_length += 1 if cons_length == height: cons_data.append(cons) cons = [] else: usage() print 'Creating the directory if necessary...' directory = shared.ensureDir(directory) if (directory[-1] != '/'): directory = directory + '/' print 'Creating snapshots...' edge, size = findSizes(width, height) # Configure the hexagon edge and window size based on the grid size index = 0 for line in cons_data: if (index % 10 == 0 and index >= 50000): plotHexagons(directory, size, index, line, min_con, max_con, edge, width, height) index += 1 print 'Done. Your snapshots are stored in ' + directory
def main(): print 'Reading command line arguments...' # check the given arguments if len(sys.argv) < 8: usage() else: folder = sys.argv[1] parsets = shared.toInt(sys.argv[2]) image_name = sys.argv[3] feature = sys.argv[4] ofolder = sys.argv[5] post_width = shared.toInt(sys.argv[6]) excel_name = sys.argv[7] num_mutants = 6 index = 0 mutants = ["wildtype", "delta", "her1", "her7", "her7her13", "her13"] markers = ['o', '^', 's', '*', 'h', 'D'] colors = ['k', 'b', 'g', 'r', 'c', 'm'] features = [] if (feature == "period" or feature == "amplitude"): features.append(feature) else: features.append("period") features.append("amplitude") for feat in features: # Create excel file in which the data used to create the plots will be stored excel_file = shared.openFile(ofolder + "/" + excel_name + "-" + feat + ".csv", "w") print "Plotting ", feat, "..." first_avg = 0 num_first = 0 for index in range(num_mutants): mutant = mutants[index] print ' Running ' + mutant + '...' marker = markers[index] color = colors[index] # open the input file f = shared.openFile(folder + "/" + mutant + "/set_0_" + feat + "_mh1.feats", "r") # split the lines to get data data = [line.split(",") for line in f] # calculate the tissue size height = shared.toInt(data[0][0]) width = shared.toInt(data[0][1]) xmin = 0 xmax = 0.9 * width buckets = 9 # split the interval into 9 chunks chunk = (width - post_width) / (buckets - 1) # the width of the intervals after the posterior indexes = [0 for i in range(buckets)] for bucket in range(buckets): if bucket == 0: indexes[bucket] = post_width / 2 else: indexes[bucket] = (post_width + (bucket - 1) * chunk) + (chunk / 2.0) averages = [0 for i in range(buckets)] num_points = [0 for i in range(buckets)] stderr = [0 for i in range(buckets)] if mutant == "wildtype": excel_file.write("mutant,") for index in indexes: excel_file.write(str(index) + ",") excel_file.write("\n") print ' Averaging the first bucket for the wildtype...' # all other data points will be averaged to this value for parset in range(parsets): # open the input file and ensure the directory exists f = shared.openFile(folder + "/" + mutant + "/set_" + str(parset) + "_" + feat + "_mh1.feats", "r") # split the lines to get data data = [line.split(",") for line in f] lines = len(data) for line in range(1, lines, 2): for col in range(len(data[line]) - 1): pos = shared.toInt(data[line][col]) val = shared.toFlo(data[line + 1][col]) if pos < post_width: first_avg += val num_first += 1 first_avg /= num_first for parset in range(parsets): print ' Normalizing and analyzing data from set ' + str(parset) + '...' # open the input file and ensure the directory exists f = shared.openFile(folder + "/" + mutant + "/set_" + str(parset) + "_" + feat + "_mh1.feats", "r") # split the lines to get data data = [line.split(",") for line in f] lines = len(data) for line in range(1, lines, 2): for col in range(len(data[line]) - 1): pos = shared.toInt(data[line][col]) val = shared.toFlo(data[line + 1][col]) / first_avg if pos < post_width: averages[0] += val num_points[0] += 1 else: averages[(pos - post_width) / chunk + 1] += val num_points[(pos - post_width) / chunk + 1] += 1 # ignore the buckets which don't have data buckets_with_data = buckets for bucket in range(buckets): if post_width + ((bucket - 1) * chunk) + chunk - 1 > (0.9 * width): buckets_with_data -= 1 else: if num_points[bucket] > 0: averages[bucket] /= num_points[bucket] elif feat == "amplitude": averages[bucket] = 0 else: buckets_with_data -= 1 buckets = buckets_with_data print ' Calculating standard error...' for parset in range(parsets): f = shared.openFile(folder + "/" + mutant + "/set_" + str(parset) + "_" + feat + "_mh1.feats", "r") data = [line.split(",") for line in f] lines = len(data) for line in range(1, lines, 2): for col in range(len(data[line]) - 1): pos = shared.toInt(data[line][col]) val = shared.toFlo(data[line + 1][col]) / first_avg if pos < post_width: stderr[0] += (val - averages[0]) ** 2 else: stderr[(pos - post_width) / chunk + 1] += (val - averages[(pos - post_width) / chunk + 1]) ** 2 for bucket in range(buckets): if (num_points[bucket] > 0): stderr[bucket] = math.sqrt(stderr[bucket] / num_points[bucket]) stderr[bucket] /= math.sqrt(num_points[bucket]) else: stderr[bucket] = 0 indexes = indexes[:buckets] averages = averages[:buckets] stderr = stderr[:buckets] # Print the means and standard deviations to the excel_file excel_file.write(mutant + ",") for average in averages: excel_file.write(str(average) + ",") excel_file.write("\n,") for stder in stderr: excel_file.write(str(stder) + ",") excel_file.write("\n") plt.errorbar(indexes, averages, stderr, fmt='ro', linestyle='-', marker=marker, color=color, label=mutant) plt.legend(prop={'size':8}, loc=2) pylab.xlim([xmin, xmax]) excel_file.close() plt.savefig(ofolder + "/" + image_name + "_" + feat + ".png", format = "png") plt.close() print "Done. Your " + feat + " plot is stored in " + ofolder + "/" + image_name + "_" + feat + ".png" print "The data behind the plot can be found in " + ofolder + "/" + excel_name + "-" + feat + ".csv"
def main(): print 'Reading command-line arguments...' args = sys.argv[1:] # Remove the name of the program from the arguments num_args = len(args) req_args = [ False ] * 3 # If every required argument was given then req_args will be all true if num_args >= 3: # Arguments with default values stdevs_away = 2 round_to = 5 for arg in range(0, num_args - 1, 2): option = args[arg] value = args[arg + 1] if option == '-s' or option == '--sets': sets_file = shared.openFile(value, 'r') req_args[0] = True elif option == '-c' or option == '--current-ranges': cur_ranges_file = shared.openFile(value, 'r') req_args[1] = True elif option == '-n' or option == '--new-ranges': new_ranges_fname = value new_ranges_file = shared.openFile(new_ranges_fname, 'w') req_args[2] = True elif option == '-d' or option == '--standard-dev': stdevs_away = shared.toInt(value) elif option == '-r' or option == '--round-to': round_to = shared.toInt(value) elif option == '-h' or option == '--help': usage() else: usage() for arg in req_args: # Check to ensure every required argument was entered if not arg: usage() else: usage() print 'Reading the parameter sets file...' # Parse the sets file to get the list of parameter sets sets = [] for line in sets_file: if not (line == '' or line[0] == '#'): # Ignore blank lines and comments sets.append(line) if len(sets) < 1: # Ensure at least one set was given usage() print 'Reading the current ranges file...' # Parse the current ranges file to find the existing ranges par_names = [] cur_ranges = [] for line in cur_ranges_file: line = line.replace('\t', ' ') if not (line == '' or line[0] == '#'): # Ignore blank lines and comments # Get the human-readable description space = line.find(' ') if space <= 0: parsing_error() par_names.append(line[:space]) line = line[space + 1:] # Skip past the description # Find the range bounds start = 0 if line[start] == '\0': parsing_error() while line[start] == ' ': start += 1 if line[start] != '[': parsing_error() end = start + 1 while line[end] != ']' and line[end] != '\0': end += 1 if line[end] == '\0': parsing_error() line = line[start + 1:end] bounds = map(shared.toFlo, line.split(',')) # Convert the bounds to floats if len(bounds) != 2: parsing_error() cur_ranges.append(bounds) print 'Calculating new ranges...' # Calculate each parameter's new range flo_sets = map( lambda ls: map(shared.toFlo, ls), map(lambda s: s.split(','), sets)) # Convert each parameter set string into an array of floats num_sets = len(flo_sets) new_ranges = [] for p in range(len(cur_ranges)): # For every range # Get the mean range based on every set vals = [] for s in flo_sets: vals.append(s[p]) mean = sum(vals) / num_sets # Calculate the standard deviation from the mean stdev_sum = 0 for f in vals: stdev_sum += (f - mean)**2 stdev = math.sqrt(stdev_sum / num_sets) # Define new ranges based on the mean and standard deviation that are at least as narrow as the current ranges lower_bound = max(cur_ranges[p][0], round(mean - stdev * stdevs_away, round_to)) upper_bound = min(cur_ranges[p][1], round(mean + stdev * stdevs_away, round_to)) new_ranges.append([lower_bound, upper_bound]) print 'Writing the new ranges to the specified output file...' # Write the parameter ranges to the new ranges file for r in range(len(new_ranges)): new_ranges_file.write(par_names[r] + ' [' + str(new_ranges[r][0]) + ',' + str(new_ranges[r][1]) + ']\n') new_ranges_file.close() print 'Done. The new ranges are in ' + new_ranges_fname
def main(): # check the given arguments if len(sys.argv) < 6: usage() elif len(sys.argv) == 7: if sys.argv[1] == "-c" or sys.argv[1] == "--no-color": shared.terminalRed = "" shared.terminalReset = "" filename = sys.argv[2] filename2 = sys.argv[3] directory = sys.argv[4] measuring = sys.argv[5] mutation = sys.argv[6] else: usage() else: filename = sys.argv[1] directory = sys.argv[2] measuring = sys.argv[3] mutation = sys.argv[4] # open the input file and ensure the directory exists f = shared.openFile(filename, "r") f2 = shared.openFile(filename2, "r") directory = shared.ensureDir(directory) # split the lines to get data data = [line.split() for line in f] file_len = len(data) - 1 max_x = file_len f.close() data2 = [line.split() for line in f2] file_len2 = len(data2) - 1 max_x2 = file_len2 f2.close() if (max_x == max_x2): print "test" # number of columns we have in the files cn = shared.toInt(data[0][0]) * shared.toInt(data[0][1]) + 1 cn2 = shared.toInt(data2[0][0]) * shared.toInt(data2[0][1]) + 1 # create matrices to store the data we obtained from the files m2p=numpy.zeros(shape = (max_x,cn + cn2)) # put the data coming from the files to the matrix for i in range(2, file_len): for j in range(0, cn+cn2): if (j <cn): m2p[i][j] = shared.toFlo(data[i][j]) elif (j==cn): print data2[i][j-cn] else: m2p[i][j] = 2*shared.toFlo(data2[i][j-cn]) # plot colors colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k'] color = 0 for i in range(1, cn+cn2): if (i % 4 == 0): pl.plot(m2p[0:max_x, 0], m2p[0:max_x, i], 'r') elif (i % 4 == 1): pl.plot(m2p[0:max_x, 0], m2p[0:max_x, i], 'g') elif (i % 4 == 2): pl.plot(m2p[0:max_x, 0], m2p[0:max_x, i], 'b') else: pl.plot(m2p[0:max_x, 0], m2p[0:max_x, i], 'c') pl.title(measuring + " " + mutation + " All Cells") pl.savefig(directory + "/" + mutation + "_all.png", format = "png") pl.close() # plot the data average = [] for i in range(0, max_x): average.append(float(sum(m2p[i][1:])) / float(len(m2p[i][1:]))) pl.plot(m2p[0:max_x, 0], average, colors[color]) if color == len(colors) - 1: color = 0 else: color += 1 pl.title(measuring + " " + mutation + " Average") pl.savefig(directory + "/" + mutation + "_avg.png", format = "png") pl.close()