Beispiel #1
0
def getQuantCombinations(job_requests, fastq_ref, input_parameters, debug):

    # initialize debug
    debug_list = []
    format = '{:<15}'
    if debug: debug_list.append(['def: getQuantCombinations():'])
    if debug: debug_list.append(['-'])
    if debug: debug_list.append(['combo', 'list'])
    if debug: debug_list.append(['-'])

    # initialize parameters
    fastq_combos = {}
    align_fq_comb = {}
    align_jobs = []
    quant_jobs = []

    # get possible alignment and quantification jobs
    for job in job_requests.keys():
        if re.search(r'_align$', job):
            align_jobs.append(re.sub(r'_align$', '', job))
        if re.search(r'_quant$', job): quant_jobs.append(job)

    # get possible fastq combinations
    # transforms comb job list into
    print("%s Status: Make fastq combinations ..." % (getTime()))
    for combi_str in filter(None,
                            input_parameters['quant_comb_list'].split(';')):
        fq_combi = ''
        combi_list = []
        for combi in sorted(filter(None, combi_str.split(','))):
            # make fq combi key
            fq_combi = '%s_%s' % (fq_combi, combi)
            # save fastq names into ordered list (according to fq_combi) from -> fastq_ref[j] = [ref_name, num_reads]
            combi_list.append(fastq_ref[combi][0])
        # get final fq_combi entry into fastq_combos -> fastq_combos['_fq1_fq2_fq3'] = ['fq_name_1','fq_name_2','fq_name_3']
        if debug: debug_list.append([fq_combi, combi_list])
        fastq_combos[fq_combi] = combi_list
    print("%s Combinations: %s" % (getTime(), fastq_combos.keys()))

    # get possible aligner + fastq_combo combinations
    if debug: debug_list.append(['-'])
    print("%s Status: Make aligner-fastq combinations ..." % (getTime()))
    for aligner in align_jobs:
        for fq_combi in fastq_combos.keys():
            aln_fq = '%s%s' % (aligner, fq_combi)
            # get fastq - aligner combination -> align_fq_comb['ALIGNTOOL1_fq1_fq2_fq3'] = ['fq_name_1','fq_name_2','fq_name_3']
            if debug: debug_list.append([aln_fq, fastq_combos[fq_combi]])
            align_fq_comb[aln_fq] = fastq_combos[fq_combi]
    print("%s Combinations: %s" % (getTime(), align_fq_comb.keys()))

    # print debug
    if debug: printDebug(debug_list, format)
    if debug: user = userCall("Continue? y[es] / n[o]", True)

    # return
    return align_fq_comb
Beispiel #2
0
def makeBashConfig(job_list, input_parameters, job_config_data, debug):

    # initialize debug
    debug_list = []
    format = '{:<17}'
    if debug: debug_list.append(['def: makeBashConfig():'])
    if debug: debug_list.append(['-'])
    if debug: debug_list.append(['job', 'script'])
    if debug: debug_list.append(['-'])

    # initialize bash config
    bash_config = {}

    # save bash config
    for job in job_list.keys():
        if job_list[job] in ['request', 'ask', 'new']:
            # get job config for requested job
            job_config = job_config_data[job]
            # save bash pre and seq config
            config = {}
            print("%s Status: Create \'bash_pre_script\' for \'%s\' ..." %
                  (getTime(), job))
            config['bash_pre_script'] = job_config['bash_pre_script']
            print("%s Status: Create \'bash_seq_script\' for \'%s\' ..." %
                  (getTime(), job))
            config['bash_seq_script'] = job_config['bash_seq_script']
            # go through 'script_order' list and save script command
            print("%s Status: Create \'bash_main_script\' for \'%s\' ..." %
                  (getTime(), job))
            script = ''
            for order_item in job_config['script_order']:
                if order_item != 'options':
                    script += job_config[order_item] + ' '
                else:
                    for option in job_config['script_options']:
                        if input_parameters[option]:
                            script += job_config[option] + ' '
            print('%s Script: %s' % (getTime(), script))
            if debug: debug_list.append([job, script])
            # save bash main script
            config['bash_main_script'] = script
            # save config for job into bash config
            bash_config[job] = config

    # print debug
    if debug: printDebug(debug_list, format)
    if debug: user = userCall("Continue? y[es] / n[o]", True)

    # return bash config
    return bash_config
Beispiel #3
0
def checkArgsProject(project, debug):

    # initialize parameters
    proj_dir_prefix = ''
    debug_list = []
    format = '{:<17}'
    if debug: debug_list.append(['def: checkArgsProject():'])
    if debug: debug_list.append(['-'])

    # check if project has a file path
    proj_path = os.path.dirname(project)
    proj_base = os.path.basename(project)
    if debug: debug_list.append(['proj_path', proj_path])
    if debug: debug_list.append(['proj_base', proj_base])
    if not proj_base:
        print("Error: Project name not defined properly!" % ())
        sys.exit()
    elif proj_path and not os.path.isdir(proj_path):
        print("Error: \'%s\' has a path, but \'%s\' is not a directory!" %
              (proj_base, proj_path))
        sys.exit()
    # ask user if this file path should be used as prefix
    elif proj_path and os.path.isdir(proj_path):
        print("%s Warning: \'%s\' has the path \'%s\'!" %
              (getTime(), proj_base, proj_path))
        print(
            "%s Warning: This will overwrite all other \'PROJ_DIR_PREFIX\' variables."
            % (getTime()))
        user = userCall("Continue? y[es] / n[o]", True)
        proj_dir_prefix = proj_path

    # print debug
    if debug: printDebug(debug_list, format)
    if debug: user = userCall("Continue? y[es] / n[o]", True)

    # return arguments
    return proj_dir_prefix, proj_base
Beispiel #4
0
def getQuantDict(possible_jobs, input_parameters):

    # new dict for quantification names
    quant_names_dict = {}

    # assign quantification names
    for name_mod in filter(None,
                           input_parameters['quant_name_list'].split(';')):
        job, name = filter(None, name_mod.split(':'))
        if job in possible_jobs.keys():
            quant_names_dict[job] = name
        else:
            print(
                "%s Warning: \'%s\' is not a valid job, it will be ignored!" %
                (getTime(), job))

    # return dict
    return quant_names_dict
Beispiel #5
0
def main(project, config, prefix, dataset, fasta, gtf, fastq1, fastq2, debug,
         test, arq_path):

    ###############################################################################
    ## initialize data structures

    # parse option arguments to dictionary for improved error handling
    input_options = {'project': project, 'prefix': prefix}
    # parse file arguments to dictionary for improved error handling
    input_files = {
        'config': config,
        'fasta_gen_file': fasta,
        'gtf_index_file': gtf,
        'fastq1_links': fastq1,
        'fastq2_links': fastq2
    }
    # initialize dictionary for parameter handling
    ''' TODO: save ALL possible variables and update empty checks in code '''
    print("%s Status: Get input dictionary ..." % (getTime()))
    input_parameters = getMainDictionaries('input_parameters', debug)
    input_parameters['test_call'] = test
    input_parameters['arq_path'] = arq_path
    # tool requests with tool links
    ''' TODO: make tool requests as input parameter and save them automatically '''
    print("%s Status: Get dependency dictionary ..." % (getTime()))
    possible_jobs = getMainDictionaries('possible_jobs', debug)
    # possible alignment - quantification combinations
    input_parameters['quant_align_combi_list'] = getMainDictionaries(
        'quant_align', debug)
    # job requests and status
    ''' TODO: options: 'request', 'enumerate', 'overwrite', 'skip', 'skipped', 'finished' '''
    # job config data
    ''' TODO: TODO: add special config data dynamically '''
    job_config_data = {}
    # set timezone
    os.environ['TZ'] = 'EST'
    ''' TODO: make sam merge folder, same as fq temp folder '''
    ''' TODO: make remove option '''
    ''' TODO: remove fastq file, etc. necessity -> so it can be used only for quantification finished alignments '''

    ###############################################################################
    ## save input data and options to data structures

    # get check and get all provided arguments
    # TODO: get and check fastq files
    print("%s Status: Get settings ..." % (getTime()))
    input_parameters, fastq1_files, fastq2_files, fastq_ref = getArguments(
        input_files, input_options, input_parameters, debug)

    # generate project directories
    print("%s Status: Make main directories ..." % (getTime()))
    input_parameters, progress_data = makeMainDirectory(
        input_parameters, debug)
    # add alignment free placeholder
    progress_data.append('AFREE_align')

    # save requested jobs
    print("%s Status: Get job requests ..." % (getTime()))
    job_requests = getJobRequests(possible_jobs, input_parameters, debug)

    # check for finished jobs
    ''' check and mark dependencies '''
    ''' TODO: do better distinguishing for fastq vs quant jobs '''
    print("%s Status: Check job progress ..." % (getTime()))
    job_requests = checkJobRequests(progress_data, job_requests,
                                    input_parameters, debug)

    # get tool parameters
    ''' TODO: make it user dependent '''
    ''' TODO: check tool paths '''
    ''' TODO: optional global command '''

    # get job config data
    print("%s Status: Get index config ..." % (getTime()))
    job_config_data = getIndexConfig(job_config_data, debug)
    print("%s Status: Get alignment config ..." % (getTime()))
    job_config_data = getAlignConfig(job_config_data, debug)
    print("%s Status: Get quantification config ..." % (getTime()))
    job_config_data = getQuantConfig(job_config_data, debug)
    ''' TODO: remove individual alignment file folder '''

    # check all parameters in debug mode
    checkParametersDebug(input_parameters, job_requests, fastq1_files,
                         fastq2_files, debug)

    ###############################################################################
    ## start tools - index
    ''' TODO: track time and memory consumption '''

    # set qsub parameters
    ''' TODO: make as input parameter -> dependent on number of threads used '''
    #input_parameters['qs_mem_free'] = '40'
    #input_parameters['qs_run_time'] = '28800'

    # make index tool directories
    print("%s Status: Make index directories ..." % (getTime()))
    index_jobs, input_parameters = makeToolDirectories(job_requests,
                                                       input_parameters,
                                                       'index', debug)

    # make index bash config
    print("%s Status: Make index bash config ..." % (getTime()))
    bash_index_config = makeBashConfig(index_jobs, input_parameters,
                                       job_config_data, debug)

    # run index jobs
    print("%s Status: Run index jobs ..." % (getTime()))
    index_jobs = runQsubJobs(index_jobs, input_parameters, bash_index_config,
                             {}, {}, {}, 'index', debug)

    ###############################################################################
    ## start tools - alignments
    ''' TODO: track time and memory consumption -> qacct -j <number> -> get job number and save it '''
    ''' TODO: get statistics out of qacct '''

    # set qsub parameters
    ''' TODO: make as input parameter -> dependent on number of threads used '''
    #input_parameters['qs_mem_free'] = '40'
    #input_parameters['qs_run_time'] = '28800'

    # make alignment tool directories
    print("%s Status: Make alignment directories ..." % (getTime()))
    align_jobs, input_parameters = makeToolDirectories(job_requests,
                                                       input_parameters,
                                                       'align', debug)

    # make alignment bash config
    print("%s Status: Make alignment bash config ..." % (getTime()))
    bash_align_config = makeBashConfig(align_jobs, input_parameters,
                                       job_config_data, debug)

    # unzip fastq files
    ''' TODO: make optional to remove fastq at the end '''
    print("%s Status: Unzip fastq1 files ..." % (getTime()))
    fastq1_files_uz = unzipFastq(fastq1_files, input_parameters)
    print("%s Status: Unzip fastq2 files ..." % (getTime()))
    fastq2_files_uz = unzipFastq(fastq2_files, input_parameters)

    # check alignment progress
    # INPUT  -> align_jobs      = {'TOOL1_align':'overwrite', 'TOOL2_align':'ask', ...}
    #           fastq1_files_uz = {'fq1':'path', 'fq2':'path', ...}
    # OUTPUT -> fastq_jobs      = {'TOOL1_align':{'fq1':'overwrite', 'fq2':'request', ...},
    #                              'TOOL2_align':{'fq1':'overwrite', 'fq2':'request', ...}, ...}
    print("%s Status: Check alignment progress ..." % (getTime()))
    fastq_jobs = checkProgress(align_jobs, fastq1_files_uz, input_parameters,
                               'align', debug)

    # get tophat merge options
    input_parameters['tophat_merge_opt'] = tophatMergeOpt(
        input_parameters['tool_dir_toprec'])

    # run alignment jobs
    ''' TODO: check fastq progress, when a job breaks '''
    ''' TODO: gzip sam / bam ? '''
    print("%s Status: Run alignment jobs ..." % (getTime()))
    align_jobs = runQsubJobs(fastq_jobs, input_parameters, bash_align_config,
                             {}, fastq1_files_uz, fastq2_files_uz, 'align',
                             debug)

    # remove unzipped fastq files
    ''' TODO: make definition for removeFastq and test '''
    ''' TODO: dont remove, neede for kallisto '''
    #if input_parameters['remove_fastq'].lower() in ['remove', 'true']:
    #print("%s Status: Remove unzipped fastq1 files ..." % (getTime()))
    #removeFastq(fastq1_files_uz)
    #if input_parameters['remove_fastq'].lower() in ['remove', 'true']:
    #print("%s Status: Remove unzipped fastq2 files ..." % (getTime()))
    #removeFastq(fastq2_files_uz)

    ###############################################################################
    ## start tools - quantification
    ''' TODO: quant<->align tool combination '''
    ''' TODO: get specific number of fastq files (10M, 20M, 50M reads) -> modify fastq1_files_uz and fastq2_files_uz '''
    ''' TODO: track used fastq file names? '''

    # set qsub parameters
    ''' TODO: make as input parameter -> dependent on number of threads used '''
    #input_parameters['num_processors'] = '4'
    #input_parameters['qs_mem_free'] = '10'
    #input_parameters['qs_run_time'] = '28800'

    # make quantification tool directories
    ''' TODO: make better options -> new option for only make new jobs (skip will always skip complete atm '''
    ''' TODO:     and overwrite will always overwrite all or you have to decide for each '''
    print("%s Status: Make quantification directories ..." % (getTime()))
    quant_jobs, input_parameters = makeToolDirectories(job_requests,
                                                       input_parameters,
                                                       'quant', debug)

    # make quantification bash config
    print("%s Status: Make quantification bash config ..." % (getTime()))
    bash_quant_config = makeBashConfig(quant_jobs, input_parameters,
                                       job_config_data, debug)

    # make quantification job combinations
    ''' TODO: get specific number of fastq files (10M, 20M, 50M reads) -> modify fastq1_files_uz and fastq2_files_uz '''
    print("%s Status: Count fastq reads ..." % (getTime()))
    fastq_ref = countFastqReads(fastq_ref, fastq1_files_uz,
                                input_parameters['seq_style_fq'])
    # INPUT  -> fastq_ref       = {'fq1':['fq_name_1','num_reads'], 'fq2':['fq_name_2','num_reads'], ...}
    #           job_requests    = {'TOOL1_quant':'skip', 'TOOL2_quant':'request', 'TOOL3_align':'skip', ...}
    print("%s Status: Make alignment combinations ..." % (getTime()))
    quant_files = getQuantCombinations(job_requests, fastq_ref,
                                       input_parameters, debug)

    # get user defined quantification names
    print("%s Status: Get quantification names ..." % (getTime()))
    input_parameters['quant_name_list'] = getQuantDict(possible_jobs,
                                                       input_parameters)

    # check quantification progress
    # INPUT  -> quant_jobs      = {'TOOL1_quant':'overwrite', 'TOOL2_quant':'ask', ...}
    #           quant_files     = {'ALIGNTOOL1_fq1_fq2_fq3':['fq_name_1','fq_name_2','fq_name_3'],
    #                              'ALIGNTOOL2_fq1_fq2_fq3':['fq_name_1','fq_name_2','fq_name_3'],
    #                              'ALIGNTOOL1_fq4_fq5':['fq_name_4','fq_name_5'],  ...}
    # OUTPUT -> combi_jobs      = {'tool1_quant':{'ALIGNTOOL1_fq1_fq2_fq3':'overwrite', 'ALIGNTOOL2_fq1_fq2_fq3':'request', ...},
    #                              'tool2_quant':{'ALIGNTOOL1_fq1_fq2_fq3':'overwrite', 'ALIGNTOOL2_fq1_fq2_fq3':'request', ...}, ...}
    print("%s Status: Check quantification progress ..." % (getTime()))
    combi_jobs = checkProgress(quant_jobs, quant_files, input_parameters,
                               'quant', debug)

    # write fastq reference file for alignment compilations
    print("%s Status: Write fastq reference file ..." % (getTime()))
    input_parameters['fastq_ref_file'] = '%s/%s_fastq.ref' % (
        input_parameters['quant_dir_path'], input_parameters['data_set_name'])
    writeFastqRef(fastq_ref, input_parameters)

    # run quantification jobs
    print("%s Status: Run quantification jobs ..." % (getTime()))
    quant_jobs = runQsubJobs(combi_jobs, input_parameters, bash_quant_config,
                             quant_files, fastq1_files_uz, fastq2_files_uz,
                             'quant', debug)

    # remove combined alignment files
    '''TODO: manage path with input parameters '''
    if input_parameters['rem_comb_align'].lower() in ['remove', 'true']:
        print("%s Status: Remove alignment combinations ..." % (getTime()))
        removeCombAlign(quant_files, input_parameters)

    ###############################################################################
    ## clean up

    # remove alignment tmp folder
    '''
    print("%s Status: Removing temp alignment folders ..." % (getTime()))
    for job in fastq_jobs.keys():
        search_dir = '%s/%s' % (input_parameters['align_dir_path'], job)
        for file in os.listdir(search_dir):
            is_dir = '%s/%s' % (search_dir, file)
            if os.path.isdir(is_dir):
                removeJob(is_dir, [''], debug)
    
    # remove quantification tmp folder
    print("%s Status: Removing temp quantification folders ..." % (getTime()))
    for job in combi_jobs.keys():
        search_dir = '%s/%s' % (input_parameters['quant_dir_path'], job)
        for file in os.listdir(search_dir):
            is_dir = '%s/%s' % (search_dir, file)
            if os.path.isdir(is_dir):
                removeJob(is_dir, [''], debug)
    '''
    # done
    print("%s Status: ARQ finished ..." % (getTime()))
Beispiel #6
0
        dest='fastq2',
        help="input file with fastq file paths (second end; paired end reads)")
    parser.add_argument('-t',
                        '--test',
                        dest='test',
                        action='store_true',
                        help="use predefined test data")
    ''' TODO: make full parameter list '''
    parser._optionals.title = "arguments"
    options = parser.parse_args()

    # handle test
    arq_path = ''
    if options.test:
        arq_path = os.path.dirname(os.path.realpath(__file__))
        print("%s Warning: Test requested ... " % getTime())
        print("%s Warning: Setting parameter -p test_temp ... " % getTime())
        options.project = 'test_temp'
        print(
            "%s Warning: Setting parameter -c %s/test_files/arq_proj_test.conf ... "
            % (getTime(), arq_path))
        options.config = '%s/test_files/arq_proj_test.conf' % arq_path

    # check if either a config file or all required parameters are given
    config_file_given = None not in [options.project, options.config]
    required_arguments_given = None not in [
        options.project, options.prefix, options.dataset, options.fasta,
        options.gtf, options.fastq1
    ]
    if not (config_file_given or required_arguments_given):
        print(
Beispiel #7
0
def makeMainDirectory(input_parameters, debug):

    # initialize parameters
    project_directory = input_parameters['proj_dir_path']
    progress_data = []
    debug_list = []
    format = '{:<17}'
    if debug: debug_list.append(['def: makeMainDirectory():'])
    if debug: debug_list.append(['-'])

    # directory list
    dir_list = ['index', 'align', 'quant', 'qsub', 'fastq', 'comb']
    # save project directories
    input_parameters['index_dir_path'] = '%s/indexes' % (project_directory)
    input_parameters['align_dir_path'] = '%s/alignments/%s' % (
        project_directory, input_parameters['data_set_name'])
    input_parameters['quant_dir_path'] = '%s/quantifications/%s' % (
        project_directory, input_parameters['data_set_name'])
    input_parameters['qsub_dir_path'] = '%s/qsub_data' % (project_directory)
    input_parameters['fastq_dir_path'] = '%s/fastq_data' % (project_directory)
    input_parameters['comb_dir_path'] = '%s/comb_data/%s' % (
        project_directory, input_parameters['data_set_name'])

    # make new project folders
    new_project = False
    if not os.path.isdir(project_directory):
        # main project folder
        print("%s Status: Create \'%s\' main folder ..." %
              (getTime(), input_parameters['p_name_suffix']))
        os.mkdir(project_directory)
        new_project = True
    # make arq data directories
    if not new_project:
        print("%s Warning: \'%s\' already exists! Checking progress ..." %
              (getTime(), project_directory))
    # make data folders
    if debug: debug_list.append(['dir key', 'dir path'])
    if debug: debug_list.append(['-'])
    for data_type in dir_list:
        if not os.path.isdir(input_parameters['%s_dir_path' % data_type]):
            print("%s Status: Create \'%s\' folder ..." %
                  (getTime(), data_type))
            if debug:
                debug_list.append([
                    '%s_dir_path' % data_type,
                    input_parameters['%s_dir_path' % data_type]
                ])
            os.makedirs('%s' % (input_parameters['%s_dir_path' % data_type]))
        else:
            print("%s Warning: \'%s\' folder already exists ..." %
                  (getTime(), data_type))
    # remove qsub error file if exists
    if os.path.isfile('%s/qsub.error' % (input_parameters['qsub_dir_path'])):
        print("%s Status: Remove \'qsub.error\' file ..." % (getTime()))
        os.remove('%s/qsub.error' % (input_parameters['qsub_dir_path']))
    # check tool progress
    if not new_project:
        print("%s Status: Check tool progress ..." % (getTime()))
        for mainDirs in [
                input_parameters['index_dir_path'],
                input_parameters['align_dir_path'],
                input_parameters['quant_dir_path']
        ]:
            if debug: debug_list.append(['mainDirs', mainDirs])
            # list all contents and check if content is a directory
            for jobName in os.listdir(mainDirs):
                subDirs = '%s/%s' % (mainDirs, jobName)
                if debug: debug_list.append(['subDirs', subDirs])
                if os.path.isdir(subDirs):
                    # save progress into progress_data
                    print("%s Warning: Job " % getTime() +
                          '{:<15}'.format("\'%s\'" %
                                          (jobName)) + " ... is already done!")
                    progress_data.append(jobName)
                    if debug:
                        debug_list.append([
                            jobName,
                            True if jobName in progress_data else False
                        ])

    # print debug
    if debug: printDebug(debug_list, format)
    if debug: user = userCall("Continue? y[es] / n[o]", True)

    # return parameters
    return input_parameters, progress_data
Beispiel #8
0
def makeToolDirectories(job_requests, input_parameters, job_type, debug):

    # initialize debug
    debug_list = []
    format = '{:<17}'
    if debug: debug_list.append(['def: makeToolDirectories():'])
    if debug: debug_list.append(['-'])
    if debug: debug_list.append(['job', 'setting'])
    if debug: debug_list.append(['-'])

    # initiate tool job requests
    job_list = {}
    for job in job_requests.keys():
        # save all tool jobs to job_list
        if re.search(r'_%s$' % job_type, job):
            job_list[job] = job_requests[job]
            if debug: debug_list.append([job, job_list[job]])

    # declare tool job folders
    # job options: 'request', 'overwrite', 'skip', 'skipped', 'finished', 'required', 'ask', 'new'
    if debug: debug_list.append(['-'])
    for job in job_list.keys():
        job_dir_name = '%s_dir' % job
        job_folder = '%s/%s' % (input_parameters['%s_dir_path' % job_type],
                                job)
        input_parameters[job_dir_name] = job_folder
        if job_list[job] in ['request', 'overwrite', 'required', 'new']:
            # check if it is an alignment job and ask if all alignments should be redone
            if job_list[job] == 'overwrite' and job_type in ['align', 'quant']:
                print(
                    "%s Warning: You requested \'overwrite\' for job \'%s\'!" %
                    (getTime(), job))
                overwrite_all = userCall(
                    "Type: y[es] to remove all / n[o] to decide for each job part",
                    False)
                job_list[job] = 'overwrite' if overwrite_all else 'ask'
            # check for overwrite option and remove directory
            if job_list[job] == 'overwrite':
                removeJob(job_folder, [''], debug)
            # make new folder
            if not os.path.isdir(job_folder) and job_list[job] in [
                    'request', 'overwrite', 'new'
            ]:
                print("%s Status: Create \'%s\' folder ..." % (getTime(), job))
                os.mkdir(job_folder)
                if job_list[job] == 'overwrite': job_list[job] = 'request'
                if debug: debug_list.append([job, job_list[job]])
            elif not os.path.isdir(job_folder) and job_list[job] == 'required':
                if debug: printDebug(debug_list, format)
                print("Error: \'%s\' is required but doesn't exist!" %
                      (job_folder))
                sys.exit()
            elif os.path.isdir(job_folder) and job_list[job] in [
                    'required', 'ask', 'new'
            ]:
                pass
            else:
                if debug: printDebug(debug_list, format)
                print("Error: \'%s\' already exists!" % (job_folder))
                sys.exit()
        elif job_list[job] == 'skip':
            job_list[job] = 'skipped'
            if debug: debug_list.append([job, job_list[job]])

    # print debug
    if debug: printDebug(debug_list, format)
    if debug: user = userCall("Continue? y[es] / n[o]", True)

    # return index jobs
    return job_list, input_parameters
Beispiel #9
0
def runQsubJobs(job_list, input_parameters, bash_config, align_files,
                fq1_files, fq2_files, job_type, debug):

    # initialize debug
    debug_list = []
    format = '{:<17}'
    if debug: debug_list.append(['def: runQsubJobs():'])
    if debug: debug_list.append(['-'])
    if debug and job_type == 'align':
        debug_list.append(['job', 'fastq', 'mode'])
    if debug and job_type == 'index': debug_list.append(['job', 'mode'])
    if debug and job_type == 'quant':
        debug_list.append(['job', 'combination', 'mode'])
    if debug: debug_list.append(['-'])

    # initialize parameters
    file_status = {}
    job_queue = []
    queues_empty = False
    q_running = False
    bowtie_running = True if 'BOWTIE_index' in job_list.keys() else False
    TOOLS_running = []
    error_file = False

    # make job queues
    print("%s Status: Create queue ..." % (getTime()))
    for job in job_list.keys():
        queue = []
        # save jobs directly when index is requested
        if job_type == 'index':
            if job_list[job] == 'request':
                job_queue.append(job)
        # save fastq jobs when align is requested or combi align jobs when quant is requested
        elif job_type in ['align', 'quant']:
            for file_name in job_list[job].keys():
                if job_list[job][file_name] in ['overwrite', 'request', 'new']:
                    queue.append(file_name)
            file_status[job] = queue
            job_queue.append(job)

    # run jobs
    print("%s Status: Run jobs ..." % (getTime()))
    while job_queue or q_running:
        # set user naming
        user_naming = ''
        # set q_running to True, because job_queue isn't empty
        q_running = True
        # print tool queue
        print('%s Queue: %s' % (getTime(), job_queue))
        # take first job from queue
        job = job_queue.pop(0) if job_queue else ''
        # start qsub for index jobs
        if job_type == 'index' and job:
            # append TOPHAT_index to queue if bowtie isn't done yet
            if job == 'TOPHAT_index' and bowtie_running:
                job_queue.append(job)
            # start index job
            else:
                if debug: debug_list.append([job, job_list[job]])
                runQsubCommand(job, input_parameters, bash_config[job], debug)
                job_list[job] = 'running'
                if debug: debug_list.append([job, job_list[job]])
                if (input_parameters['single_tool_job'].lower()
                        in ['single', 'true']):
                    TOOLS_running.append(job)
        elif job_type in ['align', 'quant'] and job:
            queue = file_status.pop(job) if file_status.keys() else []
            # print queue
            print('%s %s: %s' %
                  (getTime(), re.match(r'[A-Z]+', job).group(0), queue))
            # submit quantification or alignment jobs
            while queue and not (job in TOOLS_running):
                file_name = queue.pop(0)
                if debug:
                    debug_list.append(
                        [job, file_name, job_list[job][file_name]])
                if job_type == 'align':
                    input_parameters['fastq1_file'] = fq1_files[file_name]
                    input_parameters['fastq2_file'] = fq2_files[file_name]
                if job_type == 'quant':
                    """ TODO: get to work with combi alignments (kallisto) """
                    input_parameters['fastq1_file'] = fq1_files[
                        align_files[file_name][0]]
                    input_parameters['fastq2_file'] = fq2_files[
                        align_files[file_name][0]]
                    input_parameters['ALIGNER_align_dir'] = '%s/%s_align' % (
                        input_parameters['align_dir_path'],
                        file_name.partition('_')[0])
                    input_parameters['alignment_prefix'] = align_files[
                        file_name][0]
                    combi_string = ''
                    for align_name in align_files[file_name]:
                        combi_string = '%s%s/%s.bam ' % (
                            combi_string,
                            input_parameters['ALIGNER_align_dir'], align_name)
                    combi_string = combi_string.strip()
                    input_parameters['combi_align_list'] = combi_string
                    input_parameters['comb_out_prefix'] = file_name
                    if job in input_parameters['quant_name_list'].keys():
                        user_naming = '_%s' % input_parameters[
                            'quant_name_list'][job]
                    else:
                        user_naming = '_default'
                    input_parameters['quant_ref_file'] = '%s/%s/%s%s.ref' % (
                        input_parameters['quant_dir_path'], job, file_name,
                        user_naming)
                input_parameters['%s_out_prefix' %
                                 job_type] = '%s%s' % (file_name, user_naming)
                input_parameters['%s_out_dir' % job_type] = '%s/%s/%s%s' % (
                    input_parameters['%s_dir_path' % job_type], job, file_name,
                    user_naming)
                # start qsub for alignment and quantification jobs
                if job_list[job][file_name] in ['overwrite', 'request', 'new']:
                    if job_type == 'quant':
                        if len(align_files[file_name]) > 1:
                            makeCombAlign(input_parameters)
                            input_parameters['comb_in_file'] = '%s/%s.bam' % (
                                input_parameters['comb_dir_path'],
                                input_parameters['comb_out_prefix'])
                        else:
                            input_parameters['comb_in_file'] = '%s/%s.bam' % (
                                input_parameters['ALIGNER_align_dir'],
                                input_parameters['alignment_prefix'])
                        # create quantification parameter ref file
                        writeQuantRef(input_parameters, bash_config[job])
                    qsub_name = '%s-%s' % (
                        job, input_parameters['%s_out_prefix' % job_type])
                    runQsubCommand(qsub_name, input_parameters,
                                   bash_config[job], debug)
                    job_list[job][file_name] = 'running'
                    if debug:
                        debug_list.append(
                            [job, file_name, job_list[job][file_name]])
                # force only one job per tool
                if (input_parameters['single_tool_job'].lower()
                        in ['single', 'true']):
                    TOOLS_running.append(job)
            # save queue if it isn't empty (for when single_tool_job option is active)
            if queue:
                file_status[job] = queue
                job_queue.append(job)
        # print tool status
        if (input_parameters['single_tool_job'].lower() in ['single', 'true']):
            print('%s Running: %s' % (getTime(), TOOLS_running))
        print('%s Qstat: ... working ...' % getTime())
        # sleep this long when jobs are in the queue and bowtie is running
        if job_queue and bowtie_running:
            time.sleep(int(input_parameters['sleep_time_qs']))
        # sleep this long when jobs are in the queue and tools are running
        elif job_queue and TOOLS_running:
            time.sleep(int(input_parameters['sleep_time_qs']))
        # sleep this long when the queue is empty
        elif not job_queue:
            time.sleep(int(input_parameters['sleep_time_qs']))
        # get qstat job info
        (out, err) = subprocess.Popen('qstat',
                                      stdout=subprocess.PIPE).communicate()
        # set bowtie running to false if BOWTIE job is not in the queue and not running anymore
        if ('BOWTIE_index' not in job_queue) and not re.search('BOWTIE', out):
            bowtie_running = False
        # check if tool in TOOLS_running are still running
        for active in TOOLS_running:
            if not re.search(active[:8], out):
                TOOLS_running.remove(active)
        # check if any job is running
        if not out:
            q_running = False

    # there was an error with qsub ... exiting
    ''' TODO: make specific error detection (make new file inside bash script) and only dismiss dependent jobs '''
    if os.path.isfile('%s/qsub.error' % (input_parameters['qsub_dir_path'])):
        print(
            "%s Warning: There was an error with qsub! Check qsub_data/qsub.error for more information."
            % getTime())
        if input_parameters['ignore_qs_error'].lower() not in [
                'ignore', 'true'
        ]:
            user = userCall("Continue? y[es] / n[o]", True)

    # update job info
    if debug: debug_list.append(['-'])
    for job in job_list.keys():
        if job_type == 'index':
            if job_list[job] == 'running':
                job_list[job] = 'finished'
            if debug: debug_list.append([job, job_list[job]])
        elif job_type == 'align':
            for fq_name in job_list[job].keys():
                if job_list[job][fq_name] == 'running':
                    job_list[job][fq_name] = 'finished'
                if debug: debug_list.append([job, job_list[job]])
                if debug:
                    debug_list.append([job, fq_name, job_list[job][fq_name]])

    # print debug
    if debug: printDebug(debug_list, format)
    if debug: user = userCall("Continue? y[es] / n[o]", True)

    return job_list
Beispiel #10
0
def checkProgress(job_list, job_parts, input_parameters, job_type, debug):

    # initialize debug
    debug_list = []
    format = '{:<15}'
    if debug: debug_list.append(['def: checkProgress():'])
    if debug: debug_list.append(['-'])
    if debug: debug_list.append(['job', 'fastq', 'status', 'loop'])
    if debug: debug_list.append(['-'])

    # initialize parameters
    todo_jobs = {}
    job_parts_keys = []
    quant_align_combi_list = input_parameters['quant_align_combi_list']

    # make job list for fastq jobs
    for job in job_list.keys():
        # get job part keys
        job_parts_keys = job_parts.keys()
        # make modified job_list for quantification jobs
        user_naming = ''
        if job_type == 'quant':
            job_parts_keys = []
            for job_name in job_parts.keys():
                if job in input_parameters['quant_name_list'].keys():
                    # add user defined name tag
                    user_naming = '_%s' % input_parameters['quant_name_list'][
                        job]
                    job_name = '%s%s' % (job_name, user_naming)
                else:
                    user_naming = '_default'
                    # add 'default' name tag if no user defined name tag was given for this job
                    job_name = '%s%s' % (job_name, user_naming)
                if ('%s_%s' %
                    (job.partition('_')[0],
                     job_name.partition('_')[0])) in quant_align_combi_list:
                    job_parts_keys.append(job_name)
        # make dict for each job type
        job_requests = {}
        # check each job when 'ask' was requested
        if job_list[job] in ['ask', 'new']:
            # check existing tool and job part data and save it to todo_jobs dict
            job_tool_dir = '%s/%s' % (input_parameters['%s_dir_path' %
                                                       job_type], job)
            for jobName in os.listdir(job_tool_dir):
                jobBaseName = os.path.basename(os.path.splitext(jobName)[0])
                if debug:
                    debug_list.append([job, jobName, jobBaseName, 'basename'])
                # only save existing jobs which are in the job part file list
                if jobBaseName in job_parts_keys:
                    if job_type == 'quant':
                        jobBaseName = re.sub(r'\_[a-zA-Z0-9]+$', '',
                                             jobBaseName)
                    job_requests['%s' % jobBaseName] = ''
                    if debug:
                        debug_list.append(
                            [job, jobName, jobBaseName, 'bname in keys'])
            if debug: debug_list.append(['-'])
            # iterate through requested job part names
            for job_name in job_parts.keys():
                if (job_type in [
                        'index', 'align'
                ]) or (job_type == 'quant' and
                       (('%s_%s' %
                         (job.partition('_')[0], job_name.partition('_')[0]))
                        in quant_align_combi_list)):
                    # job string
                    job_string = '%s/%s/%s%s' % (input_parameters[
                        '%s_dir_path' % job_type], job, job_name, user_naming)
                    # check if job was already done
                    if job_name in job_requests.keys(
                    ) and job_list[job] == 'ask':
                        print(
                            "%s Warning: Job part \'%s%s\' for \'%s\' already exists!"
                            % (getTime(), job_name, user_naming, job))
                        overwrite = userCall(
                            "Type: y[es] to overwrite / n[o] to skip this job part",
                            False)
                        if overwrite:
                            removeJob(job_string, ['', '.sam', '.bam', '.ref'],
                                      debug)
                            job_requests[job_name] = 'overwrite'
                            print(
                                "%s Status: Create \'%s%s\' folder for \'%s\' ..."
                                % (getTime(), job_name, user_naming, job))
                            os.mkdir(job_string)
                        else:
                            job_requests[job_name] = 'skip'
                        if debug:
                            debug_list.append([
                                job, job_name, job_requests[job_name],
                                'job exists'
                            ])
                    elif job_name not in job_requests.keys():
                        job_requests[job_name] = 'request'
                        print(
                            "%s Status: Create \'%s%s\' folder for \'%s\' ..."
                            % (getTime(), job_name, user_naming, job))
                        os.mkdir(job_string)
                        if debug:
                            debug_list.append([
                                job, job_name, job_requests[job_name],
                                'new job'
                            ])
                    else:
                        job_requests[job_name] = 'skipped'
                        if debug:
                            debug_list.append([
                                job, job_name, job_requests[job_name], 'else'
                            ])
            if debug: debug_list.append(['-'])
        # save overwrite all and request jobs
        elif job_list[job] in ['overwrite', 'request']:
            for job_name in job_parts.keys():
                if (job_type in [
                        'index', 'align'
                ]) or (job_type == 'quant' and
                       (('%s_%s' %
                         (job.partition('_')[0], job_name.partition('_')[0]))
                        in quant_align_combi_list)):
                    # job string
                    job_string = '%s/%s/%s%s' % (input_parameters[
                        '%s_dir_path' % job_type], job, job_name, user_naming)
                    print("%s Status: Create \'%s%s\' folder for \'%s\' ..." %
                          (getTime(), job_name, user_naming, job))
                    os.mkdir(job_string)
                    job_requests[job_name] = job_list[job]
                    if debug:
                        debug_list.append([
                            job, job_name, job_requests[job_name],
                            'overwrite or request'
                        ])
        # save to fastq jobs
        if not job_list[job] == 'skipped':
            todo_jobs[job] = job_requests
        if debug: debug_list.append(['-'])

    # add todo_jobs debug
    if debug: debug_list.append(['-'])
    if debug:
        for job in todo_jobs.keys():
            for part in todo_jobs[job]:
                debug_list.append(
                    [job, part, todo_jobs[job][part], 'todo_jobs'])

    # print debug
    if debug: printDebug(debug_list, format)
    if debug: user = userCall("Continue? y[es] / n[o]", True)

    # return fastq_jobs
    return todo_jobs
Beispiel #11
0
def checkJobRequests(progress_data, job_requests, input_parameters, debug):

    # initialize parameters
    debug_list = []
    format = '{:<15}'
    if debug: debug_list.append(['def: checkJobRequests():'])
    if debug: debug_list.append(['-'])
    if debug:
        debug_list.append(
            ['job', 'requests[job]', 'progress[job]', 'skip all', 'over all'])
    if debug: debug_list.append(['-'])

    # iterate through job requests and ask user to skip job if already done
    do_for_all = {
        'for_all_index': '',
        'for_all_align': '',
        'for_all_quant': '',
        'for_all_new': ''
    }
    # skip      -> skip all <TOOL> jobs
    # overwrite -> overwrite all <TOOL> jobs (in case of alignment/quantification, it will be asked for every alignment/quantification)
    # new       -> make only jobs which aren't done yet
    user_options = {
        's': 'skip',
        'skip': 'skip',
        's all': 'skip',
        'skip all': 'skip',
        'o': 'overwrite',
        'overwrite': 'overwrite',
        'o all': 'overwrite',
        'overwrite all': 'overwrite',
        'n': 'new',
        'new': 'new',
        'n all': 'new',
        'new all': 'new'
    }
    # make config setting list for user_set_over, user_set_skip and user_set_new
    conf_settings = {}
    options = ['over', 'skip', 'new']
    for opt in options:
        for job in filter(None,
                          input_parameters['user_set_%s' % opt].split(',')):
            conf_settings[job] = opt[0]
    # ask user
    for job in job_requests.keys():
        job_done = True if job in progress_data else False
        # call if job was already done
        if job_done and job == 'AFREE_align':
            job_requests[job] = 'skip'
        elif job_done:
            all_job = 'for_all_%s' % job.rpartition('_')[2]
            if job not in conf_settings.keys():
                user_input = do_for_all[all_job] if do_for_all[
                    all_job] else raw_input(
                        "%s Warning: What to do with existing \'%s\' job \'%s\'? a[bort] / s[kip] / o[verwrite] / n[ew] [all]: "
                        % (getTime(), job[-5:], job))
            else:
                user_input = conf_settings[job]
            input_valid = False
            # only accept  (case insensitive) as input
            while not input_valid:
                # handle do for all cases
                if user_input.lower() in [
                        's all', 'skip all', 'o all', 'overwrite all', 'n all',
                        'new all'
                ]:
                    do_for_all[all_job] = user_options[user_input.lower()]
                # do input command
                if user_input.lower() in user_options.keys():
                    job_requests[job] = user_options[user_input.lower()]
                    input_valid = True
                    print("%s Status: Setting for job " % getTime() +
                          '{:<15}'.format("\'%s\'" % (job)) + " -> \'%s\'!" %
                          (job_requests[job]))
                    if debug:
                        debug_list.append([
                            job, job_requests[job], job_done, user_input,
                            do_for_all[all_job]
                        ])
                # abort if requested
                elif user_input.lower() in ['a', 'abort']:
                    if debug: printDebug(debug_list, format)
                    print("%s Warning: Script interrupted by user ... " %
                          getTime())
                    sys.exit()
                # repeat until input is valid
                else:
                    user_input = raw_input(
                        "%s Warning: Wrong input! Use a[bort] / s[kip] / o[verwrite] / n[ew] [all]: "
                        % getTime())
        # job not yet done
        else:
            job_requests[job] = 'request'
            print("%s Status: Setting for job " % getTime() +
                  '{:<15}'.format("\'%s\'" %
                                  (job)) + " -> \'%s\'!" % (job_requests[job]))
            if debug:
                debug_list.append([job, job_requests[job], job_done, '', ''])

    # print debug
    if debug: printDebug(debug_list, format)
    if debug: user = userCall("Continue? y[es] / n[o]", True)

    # return updated job_requests
    return job_requests
Beispiel #12
0
def getArguments(input_files, input_options, input_parameters, debug):

    # initialize parameters
    debug_list = []
    format = '{:<17}'
    if debug: debug_list.append(['def: getArguments():'])
    if debug: debug_list.append(['-'])
    if debug: debug_list.append(['key', 'value'])
    if debug: debug_list.append(['-'])

    # check project argument and save new project name if project had a path
    input_parameters['proj_dir_prefix'], input_parameters[
        'p_name_suffix'] = checkArgsProject(input_options['project'], debug)
    if debug:
        debug_list.append(['p_name_suffix', input_parameters['p_name_suffix']])

    # check for command line input files and save if valid
    ''' TODO: do for input_options '''
    for type, file in input_files.iteritems():
        if file and os.path.isfile("%s" % (file)) and not type == 'config':
            input_parameters[type] = file
            if debug: debug_list.append([type, file])
        elif file and not os.path.isfile("%s" % (file)):
            print("Error: No \'%s\' file named \'%s\'!" % (type, file))
            sys.exit()

    # check command line prefix option and save if not already declared
    prefix = input_options['prefix']
    if prefix and not os.path.isdir(prefix):
        print("Error: \'%s\' is not a directory!" % (prefix))
        sys.exit()
    elif prefix and not input_parameters['proj_dir_prefix']:
        input_parameters['proj_dir_prefix'] = prefix

    # get config arguments
    print("%s Status: Save config parameters ..." % (getTime()))
    input_parameters = getArgsConfig(input_files['config'], input_parameters,
                                     debug)
    if debug:
        debug_list.append(
            ['proj_dir_prefix', input_parameters['proj_dir_prefix']])

    # configure test data if test was requested
    if input_parameters['test_call']:
        input_parameters[
            'proj_dir_prefix'] = '%s/test_files' % input_parameters['arq_path']
        input_parameters[
            'fasta_gen_file'] = '%s/test_files/EF204940.fa' % input_parameters[
                'arq_path']
        input_parameters[
            'gtf_index_file'] = '%s/test_files/ef204940.gtf' % input_parameters[
                'arq_path']
        input_parameters[
            'fastq1_links'] = '%s/test_files/arq_proj_test.fq1.info' % input_parameters[
                'arq_path']
        input_parameters[
            'fastq2_links'] = '%s/test_files/arq_proj_test.fq2.info' % input_parameters[
                'arq_path']

    # define default parameters
    ''' TODO: make definition '''

    # validate arguments
    checkArguments(input_parameters, debug)

    # define index base name without path
    if not 'index_base_name' in input_parameters.keys():
        input_parameters['index_base_name'] = os.path.basename(
            os.path.splitext(input_parameters['fasta_gen_file'])[0])
    if debug:
        debug_list.append(
            ['index_base_name', input_parameters['index_base_name']])

    # define tool directories
    ''' TODO: enable global tool command '''

    # save and validate fastq files
    print("%s Status: Save fastq files ..." % (getTime()))
    ''' TODO: make option for automatic fastq2 file read and enable 'paired end' option as input parameter '''
    fastq1_files, fastq_ref = getArgsFastq(input_parameters['fastq1_links'],
                                           input_parameters, debug)
    if input_parameters['fastq2_links']:
        fastq2_files, fastq_ref2 = getArgsFastq(
            input_parameters['fastq2_links'], input_parameters, debug)
        for fq_name in fastq1_files.keys():
            if not ((re.sub(r'\_1.fq.gz$', '', fastq1_files[fq_name])
                     == re.sub(r'\_2.fq.gz$', '', fastq2_files[fq_name])) or
                    (re.sub(r'\-1.fastq.gz$', '', fastq1_files[fq_name])
                     == re.sub(r'\-2.fastq.gz$', '', fastq2_files[fq_name]))):
                print(
                    "%s Warning: The links of \'%s\' are not equal after substituting the enumeration!"
                    % (getTime(), fq_name))
                ''' TODO: DO TEST '''
                user = userCall("Continue? y[es] / n[o]", True)

    # declare project folder
    input_parameters['proj_dir_path'] = '%s/%s' % (
        input_parameters['proj_dir_prefix'], input_parameters['p_name_suffix'])
    if debug:
        debug_list.append(['proj_dir_path', input_parameters['proj_dir_path']])

    # print debug
    if debug: printDebug(debug_list, format)
    if debug: user = userCall("Continue? y[es] / n[o]", True)

    # return arguments
    return input_parameters, fastq1_files, fastq2_files, fastq_ref