예제 #1
0
def elprep_sfm_gnupar ():
  # set up directories for intermediate results
  file_in = sys.argv[1]
  file_out = sys.argv[2]
  input = os.path.basename(file_in)
  output_prefix, output_extension = os.path.splitext(input)
  stamp = str(time.time())
  split_dir = os.path.join(os.getcwd(), "temp-" + stamp + os.sep)
  result_dir = os.path.join(os.getcwd(), "temp-processed-" + stamp + os.sep)
  os.mkdir(split_dir)
  os.mkdir(result_dir)
  # split command
  nr_of_threads_opt = elprep_io_wrapper.cmd_option('--nr-of-threads', sys.argv)
  intermediate_files_opt = elprep_io_wrapper.cmd_option('--intermediate-files-output-type', sys.argv) 
  if intermediate_files_opt:
    intermediate_files_output_type = intermediate_files_opt[1]
  else:
    intermediate_files_output_type = "sam"
  elprep_io_wrapper.cmd_wrap_input(["elprep", "split"], file_in, split_dir, ["--output-prefix", output_prefix, "--output-type", intermediate_files_output_type] + nr_of_threads_opt)
  spread_file = os.path.join(split_dir, output_prefix + "-spread." + intermediate_files_output_type)
  splits_path = os.path.join(split_dir, "splits" + os.sep)
  # gnu parallel command
  nr_of_jobs_opt = elprep_io_wrapper.cmd_option('--nr-of-jobs', sys.argv)
  read_group_string = elprep_io_wrapper.cmd_option('--replace-read-group', sys.argv)
  given_cmd_opts = elprep_io_wrapper.remove_cmd_option(sys.argv[3:], '--nr-of-jobs')
  given_cmd_opts = elprep_io_wrapper.remove_cmd_option(given_cmd_opts, '--intermediate-files-output-type')
  cmd_opts = given_cmd_opts
  if read_group_string:
    cmd_opts = elprep_io_wrapper.remove_cmd_option(cmd_opts, '--replace-read-group') 
    cmd_opts = cmd_opts + ['--replace-read-group', '\"' + read_group_string[1] + '\"']
  cmd_opts = cmd_opts + ['--split-file']
  cmd_list = ["elprep"]
  elprep_cmd = '\'' + reduce(append_cmd, cmd_list + ['{}', result_dir + '{/.}.' + intermediate_files_output_type ] + cmd_opts) + '\''
  gnu_cmd = 'parallel --gnu -j ' + str(nr_of_jobs_opt[1]) + ' ' + elprep_cmd + ' ::: ' + splits_path + '*.' + intermediate_files_output_type
  subprocess.check_call(gnu_cmd, shell=True)
  # command for spread file
  spread_out_file = os.path.join(result_dir, output_prefix + "-spread." + intermediate_files_output_type)
  elprep_io_wrapper.cmd_wrap_io(["elprep"], spread_file, spread_out_file , given_cmd_opts)
  # merge command
  elprep_io_wrapper.cmd_wrap_output(["elprep", "merge"], result_dir, file_out, nr_of_threads_opt)
  # remove directories for intermediate results
  for root, dirs, files in os.walk(splits_path):
    for file in files:
      ffile = os.path.join(root, file)
      os.remove(ffile)
  os.rmdir(splits_path)
  os.remove(spread_file)
  os.rmdir(split_dir)
  for root, dirs, files in os.walk(result_dir):
    for file in files:
      ffile = os.path.join(root, file)
      os.remove(ffile)
  os.rmdir(result_dir)
예제 #2
0
def elprep_sfm():
    # set up directories for intermediate results
    file_in = sys.argv[1]
    file_out = sys.argv[2]
    input = os.path.basename(file_in)
    output_prefix, output_extension = os.path.splitext(input)
    stamp = str(time.time())
    split_dir = os.path.join(os.getcwd(), "temp-" + stamp + os.sep)
    result_dir = os.path.join(os.getcwd(), "temp-processed-" + stamp + os.sep)
    os.mkdir(split_dir)
    os.mkdir(result_dir)
    # split command
    nr_of_threads_opt = elprep_io_wrapper.cmd_option("--nr-of-threads", sys.argv)
    intermediate_files_opt = elprep_io_wrapper.cmd_option("--intermediate-files-output-type", sys.argv)
    if intermediate_files_opt:
        intermediate_files_output_type = intermediate_files_opt[1]
    else:
        intermediate_files_output_type = "sam"
    given_cmd_opts = elprep_io_wrapper.remove_cmd_option(sys.argv[3:], "--intermediate-files-output-type")
    cmd_opts = given_cmd_opts
    elprep_io_wrapper.cmd_wrap_input(
        ["elprep", "split"],
        file_in,
        split_dir,
        ["--output-prefix", output_prefix, "--output-type", intermediate_files_output_type] + nr_of_threads_opt,
    )
    spread_file = os.path.join(split_dir, output_prefix + "-spread." + intermediate_files_output_type)
    splits_path = os.path.join(split_dir, "splits" + os.sep)
    # run filter command for split files
    for root, dirs, files in os.walk(splits_path):
        for file in files:
            ext = os.path.splitext(file)[1]
            if ext == ".sam" or ext == ".bam" or ext == ".cram":
                ffile = os.path.join(root, file)
                processed_file = os.path.join(result_dir, os.path.basename(file))
                elprep_io_wrapper.cmd_wrap_io(["elprep"], ffile, processed_file, cmd_opts + ["--split-file"])
                os.remove(ffile)
        os.rmdir(splits_path)
    # command for spread file
    spread_out_file = os.path.join(result_dir, output_prefix + "-spread." + intermediate_files_output_type)
    elprep_io_wrapper.cmd_wrap_io(["elprep"], spread_file, spread_out_file, cmd_opts)
    os.remove(spread_file)
    os.rmdir(split_dir)
    # merge command
    elprep_io_wrapper.cmd_wrap_output(["elprep", "merge"], result_dir, file_out, nr_of_threads_opt)
    # remove directories for intermediate results
    for root, dirs, files in os.walk(result_dir):
        for file in files:
            ffile = os.path.join(root, file)
            os.remove(ffile)
    os.rmdir(result_dir)
예제 #3
0
def elprep_sfm (argv):
  # set up directories for intermediate results
  file_in = argv[1]
  file_out = argv[2]
  input = os.path.basename(file_in)
  output_prefix, output_extension = os.path.splitext(input)
  stamp = str(time.time())
  split_dir = os.path.join(os.getcwd(), "temp-" + stamp + os.sep)
  result_dir = os.path.join(os.getcwd(), "temp-processed-" + stamp + os.sep)
  os.mkdir(split_dir)
  os.mkdir(result_dir)
  # split command
  nr_of_threads_opt = elprep_io_wrapper.cmd_option("--nr-of-threads", argv)
  intermediate_files_opt = elprep_io_wrapper.cmd_option("--intermediate-files-output-type", argv)
  if intermediate_files_opt:
    intermediate_files_output_type = intermediate_files_opt[1]
  else:
    intermediate_files_output_type = "sam" 
  given_cmd_opts = elprep_io_wrapper.remove_cmd_option(argv[3:], "--intermediate-files-output-type")
  cmd_opts = given_cmd_opts
  elprep_io_wrapper.cmd_wrap_input(["elprep", "split"], file_in, split_dir, ["--output-prefix", output_prefix, "--output-type", intermediate_files_output_type] + nr_of_threads_opt)
  spread_file = os.path.join(split_dir, output_prefix + "-spread." + intermediate_files_output_type)
  splits_path = os.path.join(split_dir, "splits" + os.sep)
  # run filter command for split files
  for root, dirs, files in os.walk(splits_path):
    for file in files:
      ext = os.path.splitext(file)[1]
      if (ext == ".sam" or ext == ".bam" or ext == ".cram"):
        ffile = os.path.join(root, file)
        processed_file = os.path.join(result_dir, os.path.basename(file))
        elprep_io_wrapper.cmd_wrap_io(["elprep"], ffile, processed_file, cmd_opts + ["--split-file"])
        os.remove(ffile)
    os.rmdir(splits_path)
  # command for spread file
  spread_out_file = os.path.join(result_dir, output_prefix + "-spread." + intermediate_files_output_type)
  elprep_io_wrapper.cmd_wrap_io(["elprep"], spread_file, spread_out_file , cmd_opts)
  os.remove(spread_file)
  os.rmdir(split_dir)
  # merge command
  elprep_io_wrapper.cmd_wrap_output(["elprep", "merge"], result_dir, file_out, nr_of_threads_opt)
  # remove directories for intermediate results
  for root, dirs, files in os.walk(result_dir):
    for file in files:
      ffile = os.path.join(root, file)
      os.remove(ffile)
  os.rmdir(result_dir)
예제 #4
0
def elprep_sfm_gnupar(argv):
    # set up directories for intermediate results
    file_in = argv[1]
    file_out = argv[2]
    stamp = str(time.time())
    split_dir = os.path.join(os.getcwd(), "temp-" + stamp + os.sep)
    result_dir = os.path.join(os.getcwd(), "temp-processed-" + stamp + os.sep)
    os.mkdir(split_dir)
    os.mkdir(result_dir)
    nr_of_jobs_opt = elprep_io_wrapper.cmd_option('--nr-of-jobs',
                                                  argv) or ['--nr-of-jobs', 1]
    nr_of_threads_opt = elprep_io_wrapper.cmd_option(
        '--nr-of-threads', argv) or [
            '--nr-of-threads',
            str((multiprocessing.cpu_count() / int(nr_of_jobs_opt[1])) or 1)
        ]
    # split command
    nr_of_split_merge_threads_opt = [
        '--nr-of-threads',
        str(int(nr_of_jobs_opt[1]) * int(nr_of_threads_opt[1]))
    ]

    if os.path.isdir(file_in):
        output_prefix, output_extension = os.path.splitext(
            os.path.basename(os.listdir(file_in)[0]))
    else:  # file
        output_prefix, output_extension = os.path.splitext(
            os.path.basename(file_in))
    if output_extension == '':  # e.g. /dev/stdin
        output_extension = '.sam'

    intermediate_files_opt = elprep_io_wrapper.cmd_option(
        '--intermediate-files-output-type', argv)
    if intermediate_files_opt:
        intermediate_files_output_type = intermediate_files_opt[1]
    else:
        intermediate_files_output_type = output_extension[1:]

    intermediate_files_op_opt = elprep_io_wrapper.cmd_option(
        '--intermediate-files-output-prefix', argv)
    if intermediate_files_op_opt:
        output_prefix = intermediate_files_op_opt[1]

    single_end_opt = elprep_io_wrapper.flg_option("--single-end", argv)

    elprep_io_wrapper.cmd_wrap_input(["elprep", "split"], file_in, split_dir, [
        "--output-prefix", output_prefix, "--output-type",
        intermediate_files_output_type
    ] + nr_of_split_merge_threads_opt + single_end_opt)

    # gnu parallel command
    read_group_string = elprep_io_wrapper.cmd_option('--replace-read-group',
                                                     argv)
    given_cmd_opts = elprep_io_wrapper.remove_cmd_option(
        argv[3:], '--nr-of-jobs')
    given_cmd_opts = elprep_io_wrapper.remove_cmd_option(
        given_cmd_opts, '--intermediate-files-output-type')
    given_cmd_opts = elprep_io_wrapper.remove_cmd_option(
        given_cmd_opts, '--intermediate-files-output-prefix')
    if single_end_opt:
        splits_path = split_dir
        given_cmd_opts = elprep_io_wrapper.remove_flg_option(
            given_cmd_opts, "--single-end")
    else:
        splits_path = os.path.join(split_dir, "splits" + os.sep)

    given_cmd_opts = elprep_io_wrapper.remove_cmd_option(
        given_cmd_opts, '--nr-of-threads')
    cmd_opts = given_cmd_opts + nr_of_threads_opt
    if read_group_string:
        cmd_opts = elprep_io_wrapper.remove_cmd_option(cmd_opts,
                                                       '--replace-read-group')
        cmd_opts = cmd_opts + [
            '--replace-read-group', '\"' + read_group_string[1] + '\"'
        ]
    cmd_list = ["elprep filter"]
    elprep_cmd = '\'' + reduce(
        append_cmd, cmd_list +
        ['{}', result_dir + '{/.}.' + intermediate_files_output_type] +
        cmd_opts) + '\''
    gnu_cmd = 'parallel --gnu -j ' + str(
        nr_of_jobs_opt[1]
    ) + ' ' + elprep_cmd + ' ::: ' + splits_path + '*.' + intermediate_files_output_type
    ret = subprocess.check_call(gnu_cmd, shell=True)
    if ret != 0: raise SystemExit, ret

    # command for spread file

    if not single_end_opt:
        spread_file = os.path.join(
            split_dir,
            output_prefix + "-spread." + intermediate_files_output_type)
        spread_out_file = os.path.join(
            result_dir,
            output_prefix + "-spread." + intermediate_files_output_type)
        elprep_io_wrapper.cmd_wrap_io(["elprep", "filter"], spread_file,
                                      spread_out_file, given_cmd_opts +
                                      nr_of_split_merge_threads_opt)
        os.remove(spread_file)

    # merge command
    elprep_io_wrapper.cmd_wrap_output(["elprep", "merge"], result_dir,
                                      file_out, nr_of_split_merge_threads_opt +
                                      single_end_opt)
    # remove directories for intermediate results
    for root, dirs, files in os.walk(splits_path):
        for file in files:
            ffile = os.path.join(root, file)
            os.remove(ffile)
    if not single_end_opt:
        os.rmdir(splits_path)
    os.rmdir(split_dir)
    for root, dirs, files in os.walk(result_dir):
        for file in files:
            ffile = os.path.join(root, file)
            os.remove(ffile)
    os.rmdir(result_dir)
def elprep_sfm_gnupar(argv):
    # set up directories for intermediate results
    file_in = argv[1]
    file_out = argv[2]
    input = os.path.basename(file_in)
    output_prefix, output_extension = os.path.splitext(input)
    stamp = str(time.time())
    split_dir = os.path.join(os.getcwd(), "temp-" + stamp + os.sep)
    result_dir = os.path.join(os.getcwd(), "temp-processed-" + stamp + os.sep)
    os.mkdir(split_dir)
    os.mkdir(result_dir)
    # split command
    nr_of_threads_opt = elprep_io_wrapper.cmd_option('--nr-of-threads', argv)
    intermediate_files_opt = elprep_io_wrapper.cmd_option(
        '--intermediate-files-output-type', argv)
    if intermediate_files_opt:
        intermediate_files_output_type = intermediate_files_opt[1]
    else:
        intermediate_files_output_type = "sam"
    elprep_io_wrapper.cmd_wrap_input(["elprep", "split"], file_in, split_dir, [
        "--output-prefix", output_prefix, "--output-type",
        intermediate_files_output_type
    ] + nr_of_threads_opt)
    spread_file = os.path.join(
        split_dir, output_prefix + "-spread." + intermediate_files_output_type)
    splits_path = os.path.join(split_dir, "splits" + os.sep)
    # gnu parallel command
    nr_of_jobs_opt = elprep_io_wrapper.cmd_option('--nr-of-jobs', argv)
    read_group_string = elprep_io_wrapper.cmd_option('--replace-read-group',
                                                     argv)
    given_cmd_opts = elprep_io_wrapper.remove_cmd_option(
        argv[3:], '--nr-of-jobs')
    given_cmd_opts = elprep_io_wrapper.remove_cmd_option(
        given_cmd_opts, '--intermediate-files-output-type')
    cmd_opts = given_cmd_opts
    if read_group_string:
        cmd_opts = elprep_io_wrapper.remove_cmd_option(cmd_opts,
                                                       '--replace-read-group')
        cmd_opts = cmd_opts + [
            '--replace-read-group', '\"' + read_group_string[1] + '\"'
        ]
    cmd_opts = cmd_opts + ['--split-file']
    cmd_list = ["elprep"]
    elprep_cmd = '\'' + reduce(
        append_cmd, cmd_list +
        ['{}', result_dir + '{/.}.' + intermediate_files_output_type] +
        cmd_opts) + '\''
    gnu_cmd = 'parallel --gnu -j ' + str(
        nr_of_jobs_opt[1]
    ) + ' ' + elprep_cmd + ' ::: ' + splits_path + '*.' + intermediate_files_output_type
    ret = subprocess.check_call(gnu_cmd, shell=True)
    if ret != 0: raise SystemExit, ret
    # command for spread file
    spread_out_file = os.path.join(
        result_dir,
        output_prefix + "-spread." + intermediate_files_output_type)
    elprep_io_wrapper.cmd_wrap_io(["elprep"], spread_file, spread_out_file,
                                  given_cmd_opts)
    # merge command
    elprep_io_wrapper.cmd_wrap_output(["elprep", "merge"], result_dir,
                                      file_out, nr_of_threads_opt)
    # remove directories for intermediate results
    for root, dirs, files in os.walk(splits_path):
        for file in files:
            ffile = os.path.join(root, file)
            os.remove(ffile)
    os.rmdir(splits_path)
    os.remove(spread_file)
    os.rmdir(split_dir)
    for root, dirs, files in os.walk(result_dir):
        for file in files:
            ffile = os.path.join(root, file)
            os.remove(ffile)
    os.rmdir(result_dir)
예제 #6
0
def elprep_sfm(argv):
    # set up directories for intermediate results
    file_in = argv[1]
    file_out = argv[2]
    stamp = str(time.time())
    split_dir = os.path.join(os.getcwd(), "temp-" + stamp + os.sep)
    result_dir = os.path.join(os.getcwd(), "temp-processed-" + stamp + os.sep)
    os.mkdir(split_dir)
    os.mkdir(result_dir)
    # split command
    nr_of_threads_opt = elprep_io_wrapper.cmd_option(
        "--nr-of-threads",
        argv) or ["--nr-of-threads",
                  str(multiprocessing.cpu_count())]

    if os.path.isdir(file_in):
        output_prefix, output_extension = os.path.splitext(
            os.path.basename(os.listdir(file_in)[0]))
    else:  # file
        output_prefix, output_extension = os.path.splitext(
            os.path.basename(file_in))
    if output_extension == '':  # e.g. /dev/stdin
        output_extension = '.sam'

    intermediate_files_opt = elprep_io_wrapper.cmd_option(
        "--intermediate-files-output-type", argv)
    if intermediate_files_opt:
        intermediate_files_output_type = intermediate_files_opt[1]
    else:
        intermediate_files_output_type = output_extension[1:]

    file_out_output_prefix, file_out_output_extension = os.path.splitext(
        os.path.basename(file_out))

    fasta_opt = []
    if intermediate_files_output_type == "cram" or file_out_output_extension == ".cram":
        fasta_t_opt = elprep_io_wrapper.cmd_option("--reference-t", argv)
        fasta_T_opt = elprep_io_wrapper.cmd_option("--reference-T", argv)
        if fasta_t_opt:
            fasta_opt = fasta_t_opt
        elif fasta_T_opt:
            fasta_opt = fasta_T_opt
        else:
            if intermediate_files_output_type == "cram":
                print(
                    "Intermediate files output type is .cram, so need to pass either --reference-t or --reference-T"
                )
                return
            else:
                print(
                    "Output file output type is .cram, so need to pass either --reference-t or --reference-T"
                )
                return

    intermediate_files_op_opt = elprep_io_wrapper.cmd_option(
        "--intermediate-files-output-prefix", argv)
    if intermediate_files_op_opt:
        output_prefix = intermediate_files_op_opt[1]

    given_cmd_opts = elprep_io_wrapper.remove_cmd_option(
        argv[3:], "--intermediate-files-output-type")

    given_cmd_opts = elprep_io_wrapper.remove_cmd_option(
        given_cmd_opts, "--intermediate-files-output-prefix")

    cmd_opts = given_cmd_opts
    nr_of_threads_opt_given = elprep_io_wrapper.cmd_option(
        "--nr-of-threads", cmd_opts)
    if not nr_of_threads_opt_given:
        cmd_opts = cmd_opts + nr_of_threads_opt  # so we pass --nr-of-threads to the elprep filter command explicitly

    single_end_opt = elprep_io_wrapper.flg_option("--single-end", argv)

    elprep_io_wrapper.cmd_wrap_input(["elprep", "split"], file_in, split_dir, [
        "--output-prefix", output_prefix, "--output-type",
        intermediate_files_output_type
    ] + fasta_opt + nr_of_threads_opt + single_end_opt)

    if single_end_opt:
        splits_path = split_dir
        cmd_opts = elprep_io_wrapper.remove_flg_option(cmd_opts,
                                                       "--single-end")
    else:
        splits_path = os.path.join(split_dir, "splits" + os.sep)

    # run filter command for split files
    for root, dirs, files in os.walk(splits_path):
        for file in files:
            ext = os.path.splitext(file)[1]
            if (ext == ".sam" or ext == ".bam" or ext == ".cram"):
                ffile = os.path.join(root, file)
                processed_file = os.path.join(result_dir,
                                              os.path.basename(file))
                elprep_io_wrapper.cmd_wrap_io(["elprep", "filter"], ffile,
                                              processed_file, cmd_opts)
                os.remove(ffile)

    # command for spread file
    # commands for split files and spread file are the same, but the files are stored in different folders
    # we keep them in seperate folders for backwards compatibility with elPrep v2.61
    # in a future release this will be simplified, so all split files will be in the same folder
    if not single_end_opt:
        os.rmdir(splits_path)
        spread_file = os.path.join(
            split_dir,
            output_prefix + "-spread." + intermediate_files_output_type)
        spread_out_file = os.path.join(
            result_dir,
            output_prefix + "-spread." + intermediate_files_output_type)
        elprep_io_wrapper.cmd_wrap_io(["elprep", "filter"], spread_file,
                                      spread_out_file, cmd_opts)
        os.remove(spread_file)
    os.rmdir(split_dir)

    # merge command
    elprep_io_wrapper.cmd_wrap_output(["elprep", "merge"], result_dir,
                                      file_out, fasta_opt + nr_of_threads_opt +
                                      single_end_opt)
    # remove directories for intermediate results
    for root, dirs, files in os.walk(result_dir):
        for file in files:
            ffile = os.path.join(root, file)
            os.remove(ffile)
    os.rmdir(result_dir)