def execute(ssh_client, project_name, analysis_steps, s3_input_files_address, sample_list, group_name, s3_output_files_address, email): yaml_file = project_name + ".yaml" global log_dir log_dir = log_dir.format(project_name) print("making the yaml file ...") YamlFileMaker.make_yaml_file(yaml_file, project_name, analysis_steps, s3_input_files_address, sample_list, group_name, s3_output_files_address, "hg19", "NA") print("copying yaml files to remote master node...") ConnectionManager.copy_file(ssh_client, yaml_file, workspace + "yaml_examples") os.remove(yaml_file) #if not email == "": print("executing pipeline...") ConnectionManager.execute_command( ssh_client, "qsub -o /dev/null -e /dev/null " + workspace + "scripts/run.sh " + workspace + "yaml_examples/" + yaml_file + " " + log_dir + " " + "WGSPipeline.py")
def edit_step_tools_config(ssh_client, new_step_tools_conf, step_name): tools_conf = yaml.load( ConnectionManager.execute_command( ssh_client, "cat /shared/workspace/Pipelines/config/tools.yaml")) tools_conf[step_name] = new_step_tools_conf with open("tools.yaml", "w+") as f: f.write(yaml.dump(tools_conf, default_flow_style=False)) ConnectionManager.execute_command( ssh_client, "mv -n /shared/workspace/Pipelines/config/tools.yaml /shared/workspace/Pipelines/config/tools.yaml.BACKUP" ) ConnectionManager.copy_file( ssh_client, "{}/tools.yaml".format(os.getcwd()), "/shared/workspace/Pipelines/config/tools.yaml")
def upload_script(ssh_client, pipeline, workflow, script_name): script_path_cluster = "/shared/workspace/Pipelines/scripts/" if pipeline == "all": script_path_cluster += script_name elif workflow == "all": script_path_cluster += "{}/{}".format(pipeline, script_name) else: script_path_cluster += "{}/{}/{}".format(pipeline, workflow, script_name) ConnectionManager.execute_command( ssh_client, "mv -n {0} {0}.BACKUP".format(script_path_cluster)) ConnectionManager.copy_file(ssh_client, "{}/{}".format(os.getcwd(), script_name), script_path_cluster)
def execute(ssh_client, project_name, analysis_steps, s3_input_files_address, sample_list, group_list, s3_output_files_address): yaml_file = project_name + ".yaml" print "making the yaml file..." YamlFileMaker.make_yaml_file(yaml_file, project_name, analysis_steps, s3_input_files_address, sample_list, group_list, s3_output_files_address, "hg19", "NA") print "copying yaml file to remote master node..." ConnectionManager.copy_file(ssh_client, yaml_file, workspace + "yaml_examples") ## Remove the local yaml file os.remove(yaml_file) print "executing pipeline..." ConnectionManager.execute_command(ssh_client, "sh " + workspace + "run.sh " + workspace + "yaml_examples/" + yaml_file)
def execute(ssh_client, project_name, analysis_steps, s3_input_files_address, sample_list, group_name, s3_output_files_address): yaml_file = project_name + ".yaml" print "making the yaml file..." YamlFileMaker.make_yaml_file(yaml_file, project_name, analysis_steps, s3_input_files_address, sample_list, group_name, s3_output_files_address, "hg19", "NA") print "copying yaml file to remote master node..." ConnectionManager.copy_file(ssh_client, yaml_file, workspace + "yaml_examples") ## Remove the local yaml file os.remove(yaml_file) print "executing pipeline..." ConnectionManager.execute_command(ssh_client, "sh " + workspace + "run.sh " + workspace + "yaml_examples/" + yaml_file)
def edit_step_specific_config(ssh_client, pipeline, workflow, new_extra_bash_args, step_name): conf_file_name = "{}_{}.yaml".format(pipeline, workflow) spec_conf = yaml.load( ConnectionManager.execute_command( ssh_client, "cat /shared/workspace/Pipelines/config/{}/{}".format( pipeline, conf_file_name))) spec_conf[step_name] = new_extra_bash_args with open(conf_file_name, "w+") as f: f.write(yaml.dump(spec_conf, default_flow_style=False)) ConnectionManager.execute_command( ssh_client, "mv -n /shared/workspace/Pipelines/config/{0}/{1} /shared/workspace/Pipelines/config/{0}/{1}.BACKUP" .format(pipeline, conf_file_name)) ConnectionManager.copy_file( ssh_client, "{}/{}".format(os.getcwd(), conf_file_name), "/shared/workspace/Pipelines/config/{}/{}".format( pipeline, conf_file_name))
def execute(pipeline, ssh_client, project_name, workflow, analysis_steps, s3_input_files_address, sample_list, group_list, s3_output_files_address, genome, style, pairs_list): """Executes a pipeline. The main local side function for executing a pipeline with all user inputs to jupyter notebook. Calls the run.sh shell script on the cluster head node using nohup after creating a yaml file summarizing user input and uploaded that file to the cluster. Args: pipeline: name of the pipeline to be run, supported pipelines can be found in CirrusAddons notebook ssh_client: a paramiko SSHClient object that connects to the cluster where analysis is run project_name: name of the current project, <project_name>.yaml contains all user input to notebook workflow: name of the workflow to be run, supported workflows can be found in CirrusAddons notebook analysis_steps: set of analysis steps to be run, supported steps can be found in pipeline's notebook s3_input_files_address: s3 bucket containing all fastq files for project sample_list: list of dictionaries with sample info for each sample group_list: list of all groups, shares indices with sample_list (sample_list[0] is in group[0], etc) s3_output_files_address: root s3 bucket where analysis results should be uploaded genome: reference genome to be used, supported genomes can be found in pipeline's notebook style: only for ChIPSeq homer workflow, can be "factor" or "histone" pairs_list: dictionary with keys=normal samples, values=experimental samples for ChIPSeq the keys=ChIP samples, values=corresponding input regularization samples Returns: None """ yaml_file = project_name + ".yaml" if s3_output_files_address.endswith("/"): s3_output_files_address = s3_output_files_address[:-1] if s3_input_files_address.endswith("/"): s3_input_files_address = s3_input_files_address[:-1] logs_dir = "/shared/workspace/logs/{}/{}/{}".format( pipeline, workflow, project_name) print("making the yaml file...") YamlFileMaker.make_yaml_file(yaml_file, pipeline, project_name, workflow, analysis_steps, s3_input_files_address, sample_list, group_list, s3_output_files_address, genome, style, pairs_list) print("copying yaml file to remote master node...") # Make sure remote directory exists remote_dir = workspace + "yaml_files/" + pipeline + "/" + workflow ssh_client.exec_command("mkdir -p " + remote_dir) ConnectionManager.copy_file( ssh_client, yaml_file, "{}yaml_files/{}/{}".format(workspace, pipeline, workflow)) # Remove the local yaml file os.remove(yaml_file) print("executing pipeline...") ConnectionManager.execute_command( ssh_client, "nohup bash " + workspace + "scripts/run.sh " + workspace + "yaml_files/{}/{}/{} ".format(pipeline, workflow, yaml_file) + logs_dir + " " + pipeline + "_" + workflow)