Ejemplo n.º 1
0
def main(args):
    register_ctrl_c()

    p = parse_args(args)
    log_path = set_up_logging(p.debug, p.out)
    log.info('python ' + __file__ + ' ' + ' '.join(args))
    log.info('logging to ' + log_path)
    log.info('')

    try:
        working_dir = p.out

        with open(config.config_file) as f:
            conf = dict(l.strip().lower().split('=', 1)
                        for l in f.readlines() if l.strip() and l.strip()[0] != '#')

        check_and_install_tools(p.debug, conf.get('db_vendor', 'sqlite') == 'sqlite', log_path)

        start_from, start_after = get_starting_step(p.start_from, join(p.out, log_fname))

        log.debug('Changing to %s' % working_dir)
        chdir(working_dir)

        if not p.overwrite:
            check_results_existence()

        if not exists(config.intermediate_dir):
            mkdir(config.intermediate_dir)

        set_up_config(working_dir)

        # Building the workflow
        workflow = Workflow(working_dir, id=make_workflow_id(working_dir),
                            cmdline_args=['python', __file__] + args)
        log.debug('Workflow id is "' + workflow.id + '"')
        log.debug('')

        suffix = '' if conf.get('db_vendor', 'sqlite') == 'sqlite' else '_' + workflow.id

        njobs = p.threads or p.jobs or 30

        workflow.extend([
            step_prepare_proteomes_and_annotations(p),
            steps.filter_proteomes(
                min_length=int(p.min_length),
                max_percent_stop=int(p.max_percent_stop)),
            steps.make_blast_db(),
            steps.blast(
                workflow.id,
                int(p.threads) or int(p.jobs) or 30,
                on_cluster=njobs and not p.threads,
                evalue=float(p.evalue)),
            steps.parse_blast_results(),
            steps.clean_database(suffix),
            steps.install_schema(suffix),
            steps.load_blast_results(suffix),
            steps.find_pairs(suffix),
            steps.dump_pairs_to_files(suffix),
            steps.mcl(p.debug),
            steps.step_save_orthogroups()])

        result = workflow.run(
            start_after, start_from,
            overwrite=True,
            ask_before=p.ask_each_step)

        if result == 0:
            log.info('Done.')
            log.info('Log is in ' + join(working_dir, log_fname))
            if isfile(join(working_dir, config.orthogroups_file)):
                log.info('Groups are in ' + join(working_dir, config.orthogroups_file))
                if isfile(config.nice_orthogroups_file):
                    log.info('Groups with aligned columns are in ' +
                             join(working_dir, config.nice_orthogroups_file))
            else:
                log.info('Groups in short format are in ' +
                         join(working_dir, config.short_orthogroups_file))

            if isfile(log_fname):
                with open(log_fname, 'a') as f:
                    f.write('\n')

        return result

    except (KeyboardInterrupt, SystemExit, GeneratorExit):
        if isfile(log_fname):
            with open(log_fname, 'a') as f:
                f.write('\n')
        return 1

    except Exception as e:
        log.error('')
        log.exception('Unexpected error!')
        if isfile(log_fname):
            with open(log_fname, 'a') as f:
                f.write('\n')
        return 2
Ejemplo n.º 2
0
def main(args):
    register_ctrl_c()

    p = parse_args(args)

    try:
        if not exists(join(p.directory, 'intermediate')):
            arg_parse_error('You need to run Scenario 1 on this directory first.')

        if not p.out_dir:
            p.out_dir = p.directory

        working_dir = p.out_dir

        with open(config_file) as cf:
            conf = dict(
                l.strip().split('=', 1) for l
                in cf.readlines() if l.strip() and l.strip()[0] != '#')

        start_from, start_after = get_starting_step(p.start_from, join(working_dir, log_fname))

        if (not start_from or start_from == 1) and p.out_dir != p.directory:
            log_text = ''

            if isdir(p.out_dir):
                if not p.overwrite:
                    files = [f for f in listdir(p.out_dir) if f and f[0] != '.']
                    #log.debug(files)
                    if files:
                        print('The output directory exists. Do you want to overwrite it? ' +
                              '(You can run with the --overwrite option to avoid this warning.)')
                        try:
                            raw_input('Press any key to overwrite and continue, or Ctrl+C to interrupt.\n> ')
                        except (EOFError, KeyboardInterrupt, SystemExit, GeneratorExit):
                            exit(1)
                if exists(join(p.out_dir, log_fname)):
                    with open(join(p.out_dir, log_fname)) as log_f:
                        log_text = log_f.read()
                rmtree(p.out_dir)

            makedirs(p.out_dir)
            rmdir(p.out_dir)
            copytree(p.directory, p.out_dir)
            if isfile(join(p.out_dir, log_fname)):
                remove(join(p.out_dir, log_fname))
            chdir(p.out_dir)
            if log_text:
                with open(join(p.out_dir, log_fname), 'w') as log_f:
                    log_f.write(log_text)

        log_fpath = set_up_logging(p.debug, p.out_dir, 'a')
        log.info('python ' + basename(__file__) + ' ' + ' '.join(args))
        log.info('')
        check_and_install_tools(p.debug, conf.get('db_vendor', 'sqlite'), log_fpath)

        log.info('Changing to %s' % working_dir)
        if not isdir(working_dir):
            makedirs(working_dir)
        chdir(working_dir)

        set_up_config(working_dir)

        # Building the workflow
        workflow = Workflow(working_dir, id=make_workflow_id(working_dir),
                            cmdline_args=['python', __file__] + args)
        log.info('Workflow id is "' + workflow.id + '"')
        log.info('')

        if conf.get('db_vendor', 'sqlite') == 'sqlite':
            suffix = ''
        else:
            suffix = '_' + workflow.id

        workflow.extend([
            step_prepare_input(p),
            steps.filter_proteomes(
                min_length=int(p.min_length),
                max_percent_stop=int(p.max_percent_stop)),
            filter_new_proteomes(
                new_proteomes_dir,
                min_length=int(p.min_length),
                max_percent_stop=int(p.max_percent_stop)),
            steps.make_blast_db(),
            steps.blast(
                workflow.id,
                p.threads or p.jobs or 30,
                on_cluster=p.threads > 0,
                new_good_proteomes=new_good_proteomes,
                evalue=float(p.evalue)),
            steps.parse_blast_results(),
            steps.clean_database(suffix),
            steps.install_schema(suffix),
            steps.load_blast_results(suffix),
            steps.find_pairs(suffix),
            steps.dump_pairs_to_files(suffix),
            steps.mcl(p.debug),
            steps.step_save_orthogroups(new_proteomes_dir if not p.ids_list and p.blast_singletones else None)
        ])

        blastdb = p.blastdb or conf.get('blastdb', None)

        if not p.ids_list:
            workflow.extend([step_blast_singletones(p.threads, p.blast_singletones, blastdb, p.debug, p.overwrite)])

        result = workflow.run(
            start_after, start_from,
            overwrite=True,
            ask_before=p.ask_each_step)

        if result == 0:
            log.info('Done.')
            log.info('Log is in ' + join(working_dir, log_fname))
            if isfile(join(working_dir, config.orthogroups_file)):
                log.info('Groups are in ' + join(working_dir, config.orthogroups_file))
                if isfile(config.nice_orthogroups_file):
                    log.info('Groups with aligned columns are in ' +
                             join(working_dir, config.nice_orthogroups_file))
            else:
                log.info('Groups in short format are in ' + join(working_dir, config.short_orthogroups_file))

        return result

    except (KeyboardInterrupt, SystemExit, GeneratorExit):
        return 1

    except Exception as e:
        log.error('')
        log.exception('Unexpected error!')
        raise
Ejemplo n.º 3
0
def main(args):
    register_ctrl_c()

    p = parse_args(args)
    log_path = set_up_logging(p.debug, p.out)
    log.info('python ' + __file__ + ' ' + ' '.join(args))
    log.info('logging to ' + log_path)
    log.info('')

    try:
        working_dir = p.out

        with open(config.config_file) as f:
            conf = dict(l.strip().lower().split('=', 1) for l in f.readlines()
                        if l.strip() and l.strip()[0] != '#')

        check_and_install_tools(p.debug,
                                conf.get('db_vendor', 'sqlite') == 'sqlite',
                                log_path)

        start_from, start_after = get_starting_step(p.start_from,
                                                    join(p.out, log_fname))

        log.debug('Changing to %s' % working_dir)
        chdir(working_dir)

        if not p.overwrite:
            check_results_existence()

        if not exists(config.intermediate_dir):
            mkdir(config.intermediate_dir)

        set_up_config(working_dir)

        # Building the workflow
        workflow = Workflow(working_dir,
                            id=make_workflow_id(working_dir),
                            cmdline_args=['python', __file__] + args)
        log.debug('Workflow id is "' + workflow.id + '"')
        log.debug('')

        suffix = '' if conf.get('db_vendor',
                                'sqlite') == 'sqlite' else '_' + workflow.id

        njobs = p.threads or p.jobs or 30

        workflow.extend([
            step_prepare_proteomes_and_annotations(p),
            steps.filter_proteomes(min_length=int(p.min_length),
                                   max_percent_stop=int(p.max_percent_stop)),
            steps.make_blast_db(),
            steps.blast(workflow.id,
                        int(p.threads) or int(p.jobs) or 30,
                        on_cluster=njobs and not p.threads,
                        evalue=float(p.evalue)),
            steps.parse_blast_results(),
            steps.clean_database(suffix),
            steps.install_schema(suffix),
            steps.load_blast_results(suffix),
            steps.find_pairs(suffix),
            steps.dump_pairs_to_files(suffix),
            steps.mcl(p.debug),
            steps.step_save_orthogroups()
        ])

        result = workflow.run(start_after,
                              start_from,
                              overwrite=True,
                              ask_before=p.ask_each_step)

        if result == 0:
            log.info('Done.')
            log.info('Log is in ' + join(working_dir, log_fname))
            if isfile(join(working_dir, config.orthogroups_file)):
                log.info('Groups are in ' +
                         join(working_dir, config.orthogroups_file))
                if isfile(config.nice_orthogroups_file):
                    log.info('Groups with aligned columns are in ' +
                             join(working_dir, config.nice_orthogroups_file))
            else:
                log.info('Groups in short format are in ' +
                         join(working_dir, config.short_orthogroups_file))

            if isfile(log_fname):
                with open(log_fname, 'a') as f:
                    f.write('\n')

        return result

    except (KeyboardInterrupt, SystemExit, GeneratorExit):
        if isfile(log_fname):
            with open(log_fname, 'a') as f:
                f.write('\n')
        return 1

    except Exception as e:
        log.error('')
        log.exception('Unexpected error!')
        if isfile(log_fname):
            with open(log_fname, 'a') as f:
                f.write('\n')
        return 2
Ejemplo n.º 4
0
def main(args):
    register_ctrl_c()

    p = parse_args(args)

    try:
        if not exists(join(p.directory, 'intermediate')):
            arg_parse_error(
                'You need to run Scenario 1 on this directory first.')

        if not p.out_dir:
            p.out_dir = p.directory

        working_dir = p.out_dir

        with open(config_file) as cf:
            conf = dict(l.strip().split('=', 1) for l in cf.readlines()
                        if l.strip() and l.strip()[0] != '#')

        start_from, start_after = get_starting_step(
            p.start_from, join(working_dir, log_fname))

        if (not start_from or start_from == 1) and p.out_dir != p.directory:
            log_text = ''

            if isdir(p.out_dir):
                if not p.overwrite:
                    files = [
                        f for f in listdir(p.out_dir) if f and f[0] != '.'
                    ]
                    #log.debug(files)
                    if files:
                        print(
                            'The output directory exists. Do you want to overwrite it? '
                            +
                            '(You can run with the --overwrite option to avoid this warning.)'
                        )
                        try:
                            raw_input(
                                'Press any key to overwrite and continue, or Ctrl+C to interrupt.\n> '
                            )
                        except (EOFError, KeyboardInterrupt, SystemExit,
                                GeneratorExit):
                            exit(1)
                if exists(join(p.out_dir, log_fname)):
                    with open(join(p.out_dir, log_fname)) as log_f:
                        log_text = log_f.read()
                rmtree(p.out_dir)

            makedirs(p.out_dir)
            rmdir(p.out_dir)
            copytree(p.directory, p.out_dir)
            if isfile(join(p.out_dir, log_fname)):
                remove(join(p.out_dir, log_fname))
            chdir(p.out_dir)
            if log_text:
                with open(join(p.out_dir, log_fname), 'w') as log_f:
                    log_f.write(log_text)

        log_fpath = set_up_logging(p.debug, p.out_dir, 'a')
        log.info('python ' + basename(__file__) + ' ' + ' '.join(args))
        log.info('')
        check_and_install_tools(p.debug, conf.get('db_vendor', 'sqlite'),
                                log_fpath)

        log.info('Changing to %s' % working_dir)
        if not isdir(working_dir):
            makedirs(working_dir)
        chdir(working_dir)

        set_up_config(working_dir)

        # Building the workflow
        workflow = Workflow(working_dir,
                            id=make_workflow_id(working_dir),
                            cmdline_args=['python', __file__] + args)
        log.info('Workflow id is "' + workflow.id + '"')
        log.info('')

        if conf.get('db_vendor', 'sqlite') == 'sqlite':
            suffix = ''
        else:
            suffix = '_' + workflow.id

        workflow.extend([
            step_prepare_input(p),
            steps.filter_proteomes(min_length=int(p.min_length),
                                   max_percent_stop=int(p.max_percent_stop)),
            filter_new_proteomes(new_proteomes_dir,
                                 min_length=int(p.min_length),
                                 max_percent_stop=int(p.max_percent_stop)),
            steps.make_blast_db(),
            steps.blast(workflow.id,
                        p.threads or p.jobs or 30,
                        on_cluster=p.threads > 0,
                        new_good_proteomes=new_good_proteomes,
                        evalue=float(p.evalue)),
            steps.parse_blast_results(),
            steps.clean_database(suffix),
            steps.install_schema(suffix),
            steps.load_blast_results(suffix),
            steps.find_pairs(suffix),
            steps.dump_pairs_to_files(suffix),
            steps.mcl(p.debug),
            steps.step_save_orthogroups(new_proteomes_dir if not p.ids_list
                                        and p.blast_singletones else None)
        ])

        blastdb = p.blastdb or conf.get('blastdb', None)

        if not p.ids_list:
            workflow.extend([
                step_blast_singletones(p.threads, p.blast_singletones, blastdb,
                                       p.debug, p.overwrite)
            ])

        result = workflow.run(start_after,
                              start_from,
                              overwrite=True,
                              ask_before=p.ask_each_step)

        if result == 0:
            log.info('Done.')
            log.info('Log is in ' + join(working_dir, log_fname))
            if isfile(join(working_dir, config.orthogroups_file)):
                log.info('Groups are in ' +
                         join(working_dir, config.orthogroups_file))
                if isfile(config.nice_orthogroups_file):
                    log.info('Groups with aligned columns are in ' +
                             join(working_dir, config.nice_orthogroups_file))
            else:
                log.info('Groups in short format are in ' +
                         join(working_dir, config.short_orthogroups_file))

        return result

    except (KeyboardInterrupt, SystemExit, GeneratorExit):
        return 1

    except Exception as e:
        log.error('')
        log.exception('Unexpected error!')
        raise