def generate_data(model, sim_length, inputdir, outputdir): """ The first pipeline step: data generation. :param model: the model to process :param sim_length: the length of the simulation :param inputdir: the directory containing the model :param outputdir: the directory to store the results :return: no output """ if not os.path.isfile(os.path.join(inputdir, model)): logger.error(os.path.join(inputdir, model) + " does not exist.") return refresh_directory(outputdir, model[:-4]) logger.info("Simulating Model: " + model) model_noext = model[:-4] copasi = get_copasi() if copasi is None: logger.error("CopasiSE not found! Please check that CopasiSE is " "installed and in the PATH environmental variable.") return # run CopasiSE. Copasi must generate a (TIME COURSE) report process = subprocess.Popen([copasi, '--nologo', os.path.join(inputdir, model)]) process.wait() if (not os.path.isfile(os.path.join(inputdir, model_noext + ".csv")) and not os.path.isfile(os.path.join(inputdir, model_noext + ".txt"))): logger.warn(os.path.join(inputdir, model_noext + ".csv") + " (or .txt) does not exist!") return if os.path.isfile(os.path.join(inputdir, model_noext + ".txt")): os.rename(os.path.join(inputdir, model_noext + ".txt"), os.path.join(inputdir, model_noext + ".csv")) # Replace some string in the report file replace_str_copasi_sim_report(os.path.join(inputdir, model_noext + ".csv")) # copy file removing empty lines with open(os.path.join(inputdir, model_noext + ".csv"), 'r') as filein, \ open(os.path.join(outputdir, model_noext + ".csv"), 'w') as fileout: for line in filein: if not line.isspace(): fileout.write(line) os.remove(os.path.join(inputdir, model_noext + ".csv")) # Extract a selected time point from all perturbed time courses contained in the report file with open(os.path.join(outputdir, model_noext + ".csv"), 'r') as filein: lines = filein.readlines() header = lines[0] lines = lines[1:] timepoints = range(0, sim_length + 1) filesout = [] try: filesout = [open(os.path.join(outputdir, model_noext + "__tp_%d.csv" % i), "w") for i in timepoints] # copy the header for fileout in filesout: fileout.write(header) # extract the i-th time point and copy it to the corresponding i-th file for line in lines: tp = line.rstrip().split('\t')[0] if not '.' in tp and int(tp) in timepoints: filesout[int(tp)].write(line) finally: for fileout in filesout: fileout.close()
def generate_data(model, inputdir, outputdir, cluster_type="pp", pp_cpus=2, runs=1): """ The first pipeline step: data generation. :param model: the model to process :param inputdir: the directory containing the model :param outputdir: the directory containing the output files :param cluster_type: pp for local Parallel Python, lsf for Load Sharing Facility, sge for Sun Grid Engine. :param pp_cpus: the number of CPU used by Parallel Python. :param runs: the number of model simulation :return: no output """ if runs < 1: logger.error("variable " + str(runs) + " must be greater than 0. Please, check your configuration file.") return if not os.path.isfile(os.path.join(inputdir, model)): logger.error(os.path.join(inputdir, model) + " does not exist.") return copasi = get_copasi() if copasi is None: logger.error( "CopasiSE not found! Please check that CopasiSE is installed and in the PATH environmental variable.") return # folder preparation refresh_directory(outputdir, model[:-4]) # execute runs simulations. logger.info("Simulating model " + model + " for " + str(runs) + " time(s)") # Replicate the copasi file and rename its report file groupid = "_" + get_rand_alphanum_str(20) + "_" group_model = model[:-4] + groupid for i in xrange(1, runs + 1): shutil.copyfile(os.path.join(inputdir, model), os.path.join(inputdir, group_model) + str(i) + ".cps") replace_string_in_file(os.path.join(inputdir, group_model) + str(i) + ".cps", model[:-4] + ".csv", group_model + str(i) + ".csv") # run copasi in parallel # To make things simple, the last 10 character of groupid are extracted and reversed. # This string will be likely different from groupid and is the string to replace with # the iteration number. str_to_replace = groupid[10::-1] command = copasi + " " + os.path.join(inputdir, group_model + str_to_replace + ".cps") parallel_computation(command, str_to_replace, cluster_type, runs, outputdir, pp_cpus) # move the report files report_files = [f for f in os.listdir(inputdir) if re.match(group_model + '[0-9]+.*\.csv', f) or re.match(group_model + '[0-9]+.*\.txt', f)] for file in report_files: # Replace some string in the report file replace_str_copasi_sim_report(os.path.join(inputdir, file)) # rename and move the output file shutil.move(os.path.join(inputdir, file), os.path.join(outputdir, file.replace(groupid, "_")[:-4] + ".csv")) # removed repeated copasi files repeated_copasi_files = [f for f in os.listdir(inputdir) if re.match(group_model + '[0-9]+.*\.cps', f)] for file in repeated_copasi_files: os.remove(os.path.join(inputdir, file))