Exemple #1
0
def main(argv):
    global parser
    (opts, args) = parser.parse_args()
    if valid_arguments(opts, args):
        print usage
        sys.exit(0)

    signal.signal(signal.SIGINT, sigint_handler)
    signal.signal(signal.SIGTERM, sigint_handler)

    eprintf("COMMAND : %s\n", sys.argv[0] + ' ' + ' '.join(argv))
    # initialize the input directory or file
    input_fp = opts.input_fp
    output_dir = path.abspath(opts.output_dir)
    verbose = opts.verbose
    print_only = opts.print_only

    sample_subset = removeSuffix(opts.sample_subset)

    run_type = opts.run_type.strip()
    '''no need to remove the whole directory'''
    #    if run_type == 'overwrite':
    #       force_remove_dir=True
    #    else:
    #       force_remove_dir=False

    if opts.config_file:
        config_file = opts.config_file
    else:
        config_file = cmd_folder + PATHDELIM + metapaths_config

    if opts.ncbi_header and opts.ncbi_sbt:
        if not path.exists(opts.ncbi_header):
            print "Could not open or missing NCBI header file " + opts.ncbi_header
            print "Either disable option to CREATE_SEQUIN_FILE or provide a valid header file"
            sys.exit(0)

        if not path.exists(opts.ncbi_sbt):
            print """You must must have a sbt file obtained from the NCBI \"Create Submission Template\" form \n 
                 http://www.ncbi.nlm.nih.gov/WebSub/template.cgi """ + opts.ncbi_sbt
            sys.exit(0)

        ncbi_sequin_params = path.abspath(opts.ncbi_header)
        ncbi_sequin_sbt = path.abspath(opts.ncbi_sbt)
    else:
        ncbi_sequin_params = None
        ncbi_sequin_sbt = None

    # try to load the parameter file
    try:
        if opts.parameter_fp:
            parameter_fp = opts.parameter_fp
        else:
            parameter_fp = cmd_folder + PATHDELIM + metapaths_param
    except IOError:
        raise IOError, (
            "Can't open parameters file (%s). Does it exist? Do you have read access?"
            % opts.parameter_fp)

    try:
        if run_type in ['overlay', 'safe'] and not path.exists(output_dir):
            makedirs(output_dir)
    except OSError:
        print ""
        print "ERROR: Cannot create output directory \"" + output_dir + "\"\n"+\
              "       Perhaps directory \"" + output_dir  + "\" already exists.\n" +\
              "       Please choose a different directory, or \n" +\
              "       run with the option \"-r  overwrite\" to force overwrite it."
        sys.exit(1)

    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates

    command_line_params = {}
    command_line_params['verbose'] = opts.verbose

    params = parse_metapaths_parameters(parameter_fp)
    """ load the sample inputs  it expects either a fasta 
        file or  a directory containing fasta and yaml file pairs
    """

    globalerrorlogger = WorkflowLogger(generate_log_fp(
        output_dir, basefile_name='global_errors_warnings'),
                                       open_mode='w')

    input_output_list = {}
    if path.isfile(input_fp):
        """ check if it is a file """
        input_output_list = create_an_input_output_pair(
            input_fp, output_dir, globalerrorlogger=globalerrorlogger)
    else:
        if path.exists(input_fp):
            """ check if dir exists """
            input_output_list = create_input_output_pairs(
                input_fp, output_dir, globalerrorlogger=globalerrorlogger)
        else:
            """ must be an error """
            eprintf(
                "ERROR\tNo valid input sample file or directory containing samples exists .!"
            )
            eprintf("ERROR\tAs provided as arguments in the -in option.!\n")
            exit_process(
                "ERROR\tAs provided as arguments in the -in option.!\n")
    """ these are the subset of sample to process if specified
        in case of an empty subset process all the sample """

    # remove all samples that are not specifed unless sample_subset is empty
    remove_unspecified_samples(input_output_list,
                               sample_subset,
                               globalerrorlogger=globalerrorlogger)

    # add check the config parameters
    sorted_input_output_list = sorted(input_output_list.keys())

    filetypes = check_file_types(sorted_input_output_list)

    #stop on in valid samples
    if not halt_on_invalid_input(input_output_list, filetypes, sample_subset):
        globalerrorlogger.printf(
            "ERROR\tInvalid inputs found. Check for file with bad format or characters!\n"
        )
        halt_process(opts.delay)

    # make sure the sample files are found
    report_missing_filenames(input_output_list,
                             sample_subset,
                             logger=globalerrorlogger)

    #check the pipeline configuration
    config_settings = read_pipeline_configuration(config_file,
                                                  globalerrorlogger)

    parameter = Parameters()
    if not staticDiagnose(config_settings, params, logger=globalerrorlogger):
        eprintf(
            "ERROR\tFailed to pass the test for required scripts and inputs before run\n"
        )
        globalerrorlogger.printf(
            "ERROR\tFailed to pass the test for required scripts and inputs before run\n"
        )
        halt_process(opts.delay)

    samplesData = {}
    # PART1 before the blast

    block_mode = opts.block_mode
    runid = opts.runid

    try:
        # load the sample information
        print "RUNNING MetaPathways version 2.5.2"
        if len(input_output_list):
            for input_file in sorted_input_output_list:
                sample_output_dir = input_output_list[input_file]
                algorithm = get_parameter(params,
                                          'annotation',
                                          'algorithm',
                                          default='LAST').upper()
                s = SampleData()
                s.setInputOutput(inputFile=input_file,
                                 sample_output_dir=sample_output_dir)
                s.setParameter('algorithm', algorithm)
                s.setParameter('ncbi_params_file', ncbi_sequin_params)
                s.setParameter('ncbi_sequin_sbt', ncbi_sequin_sbt)
                s.setParameter('FILE_TYPE', filetypes[input_file][0])
                if params["INPUT"]['format'] in [
                        "gbk-annotated", "gff-annotated"
                ]:
                    s.setParameter('ANNOTATED', True)
                else:
                    s.setParameter('ANNOTATED', False)
                s.setParameter('SEQ_TYPE', filetypes[input_file][1])
                s.clearJobs()

                if run_type == 'overwrite' and path.exists(sample_output_dir):
                    shutil.rmtree(sample_output_dir)
                    makedirs(sample_output_dir)
                if not path.exists(sample_output_dir):
                    makedirs(sample_output_dir)

                s.prepareToRun()
                samplesData[input_file] = s

            # load the sample information
            run_metapathways(samplesData,
                             sample_output_dir,
                             output_dir,
                             globallogger=globalerrorlogger,
                             command_line_params=command_line_params,
                             params=params,
                             metapaths_config=metapaths_config,
                             status_update_callback=status_update_callback,
                             config_file=config_file,
                             run_type=run_type,
                             config_settings=config_settings,
                             block_mode=block_mode,
                             runid=runid)
        else:
            eprintf(
                "ERROR\tNo valid input files/Or no files specified  to process in folder %s!\n",
                sQuote(input_fp))
            globalerrorlogger.printf(
                "ERROR\tNo valid input files to process in folder %s!\n",
                sQuote(input_fp))

        # blast the files

        blasting_system = get_parameter(params,
                                        'metapaths_steps',
                                        'BLAST_REFDB',
                                        default='yes')
        if blasting_system == 'grid':
            #  blasting the files files on the grids
            input_files = sorted_input_output_list
            blast_in_grid(
                sampleData[input_file],
                input_files,
                path.abspath(opts.output_dir),  #important to use opts.
                params=params,
                metapaths_config=metapaths_config,
                config_file=config_file,
                run_type=run_type,
                runid=runid)

    except:
        exit_process(str(traceback.format_exc(10)), logger=globalerrorlogger)

    eprintf("            ***********                \n")
    eprintf("INFO : FINISHED PROCESSING THE SAMPLES \n")
    eprintf("             THE END                   \n")
    eprintf("            ***********                \n")
    halt_process(opts.delay)
def main(argv):
    global parser

    (opts, args) = parser.parse_args()
    if valid_arguments(opts, args):
       print usage
       sys.exit(0)

    signal.signal(signal.SIGINT, sigint_handler)
    signal.signal(signal.SIGTERM, sigint_handler)

    eprintf("%-10s:%s\n" %('COMMAND', sys.argv[0] + ' ' +  ' '.join(argv)) )
    # initialize the input directory or file
    input_fp = opts.input_fp 
    output_dir = path.abspath(opts.output_dir)
    verbose = opts.verbose
    print_only = opts.print_only

    sample_subset = removeSuffix(opts.sample_subset)

    run_type = opts.run_type.strip()


    '''no need to remove the whole directory'''
#    if run_type == 'overwrite':
#       force_remove_dir=True
#    else:
#       force_remove_dir=False

    if opts.config_file:
       config_file= opts.config_file
    else:
       config_file = cmd_folder + PATHDELIM + metapaths_config
    

    # try to load the parameter file    
    try:
       if opts.parameter_fp:
          parameter_fp= opts.parameter_fp
       else:
          parameter_fp = cmd_folder + PATHDELIM + metapaths_param
    except IOError:
        raise IOError, ( "Can't open parameters file (%s). Does it exist? Do you have read access?" % opts.parameter_fp )

    
    try:
       if run_type in ['overlay', 'safe'] and not path.exists(output_dir):
             makedirs(output_dir)
    except OSError:
        print ""
        print "ERROR: Cannot create output directory \"" + output_dir + "\"\n"+\
              "       Perhaps directory \"" + output_dir  + "\" already exists.\n" +\
              "       Please choose a different directory, or \n" +\
              "       run with the option \"-r  overwrite\" to force overwrite it."
        sys.exit(2)

        
    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates
    
    command_line_params={}
    command_line_params['verbose']= opts.verbose

    if not path.exists(parameter_fp):
        eprintf("%-10s: No parameters file %s found!\n" %('WARNING', parameter_fp))
        eprintf("%-10s: Creating a parameters file %s found!\n" %('INFO', parameter_fp))
        create_metapaths_parameters(parameter_fp, cmd_folder)

    params=parse_metapaths_parameters(parameter_fp)

    """ load the sample inputs  it expects either a fasta 
        file or  a directory containing fasta and yaml file pairs
    """

    globalerrorlogger = WorkflowLogger(generate_log_fp(output_dir, basefile_name= 'global_errors_warnings'), open_mode='w') 

    input_output_list = {}
    if path.isfile(input_fp):   
       """ check if it is a file """
       input_output_list = create_an_input_output_pair(input_fp, output_dir,  globalerrorlogger=globalerrorlogger)
    else:
       if path.exists(input_fp):   
          """ check if dir exists """
          input_output_list = create_input_output_pairs(input_fp, output_dir, globalerrorlogger=globalerrorlogger)
       else:   
          """ must be an error """
          eprintf("ERROR\tNo valid input sample file or directory containing samples exists .!")
          eprintf("ERROR\tAs provided as arguments in the -in option.!\n")
          exit_process("ERROR\tAs provided as arguments in the -in option.!\n")
   
    """ these are the subset of sample to process if specified
        in case of an empty subset process all the sample """

    # remove all samples that are not specifed unless sample_subset is empty
    remove_unspecified_samples(input_output_list, sample_subset, globalerrorlogger = globalerrorlogger)

    # add check the config parameters 
    sorted_input_output_list = sorted(input_output_list.keys())

    filetypes = check_file_types(sorted_input_output_list) 

    #stop on in valid samples
    if not halt_on_invalid_input(input_output_list, filetypes, sample_subset):
       globalerrorlogger.printf("ERROR\tInvalid inputs found. Check for file with bad format or characters!\n")
       halt_process(opts.delay)

    # make sure the sample files are found
    report_missing_filenames(input_output_list, sample_subset, logger=globalerrorlogger)


    #check the pipeline configuration

    print 'config'
    if not path.exists(config_file):
        eprintf("%-10s: No config file %s found!\n" %('WARNING', config_file))
        eprintf("%-10s: Creating a config file %s!\n" %('INFO', config_file))
        if not environment_variables_defined():
           sys.exit(0)
        create_metapaths_configuration(config_file, cmd_folder)

    config_settings = read_pipeline_configuration(config_file, globalerrorlogger)


    parameter =  Parameters()
    if not staticDiagnose(config_settings, params, logger = globalerrorlogger):
        eprintf("ERROR\tFailed to pass the test for required scripts and inputs before run\n")
        globalerrorlogger.printf("ERROR\tFailed to pass the test for required scripts and inputs before run\n")
        return 
    
    samplesData = {}
    # PART1 before the blast

    block_mode = opts.block_mode
    runid = opts.runid

    try:
         # load the sample information 
         print "RUNNING MetaPathways version FogDog 3.0"
         if len(input_output_list): 
              for input_file in sorted_input_output_list:
                sample_output_dir = input_output_list[input_file]
                algorithm = get_parameter(params, 'annotation', 'algorithm', default='LAST').upper()
   
                s = SampleData() 
                s.setInputOutput(inputFile = input_file, sample_output_dir = sample_output_dir)
                s.setParameter('algorithm', algorithm)
                s.setParameter('FILE_TYPE', filetypes[input_file][0])
                s.setParameter('SEQ_TYPE', filetypes[input_file][1])
                s.clearJobs()
   
                if run_type=='overwrite' and  path.exists(sample_output_dir):
                   shutil.rmtree(sample_output_dir)
                   makedirs(sample_output_dir)
                if not  path.exists(sample_output_dir):
                   makedirs(sample_output_dir)
   
                s.prepareToRun()
                samplesData[input_file] = s
   
              # load the sample information 
              run_metapathways(
                   samplesData,
                   sample_output_dir,
                   output_dir,
                   globallogger = globalerrorlogger,
                   command_line_params=command_line_params,
                   params=params,
                   metapaths_config=metapaths_config,
                   status_update_callback=status_update_callback,
                   config_file=config_file,
                   run_type = run_type, 
                   config_settings = config_settings,
                   block_mode = block_mode,
                   runid = runid
              )
         else: 
              eprintf("ERROR\tNo valid input files/Or no files specified  to process in folder %s!\n",sQuote(input_fp) )
              globalerrorlogger.printf("ERROR\tNo valid input files to process in folder %s!\n",sQuote(input_fp) )
   
    except:
       exit_process(str(traceback.format_exc(10)), logger= globalerrorlogger )


    
    eprintf("            ***********                \n")
    eprintf("INFO : FINISHED PROCESSING THE SAMPLES \n")
    eprintf("             THE END                   \n")
    eprintf("            ***********                \n")