コード例 #1
0
def check_options(opts):
    # check resume
    if not path.exists(opts.workdir):
        warn('ERROR: workdir not found, creating it')
        mkdir(opts.workdir)
        # write version log
        vlog_path = path.join(opts.workdir,
                              'TADbit_and_dependencies_versions.log')
        dependencies = get_dependencies_version()
        if not path.exists(
                vlog_path) or open(vlog_path).readlines() != dependencies:
            print('Writing versions of TADbit and dependencies')
            vlog = open(vlog_path, 'w')
            vlog.write(dependencies)
            vlog.close()
    # do the division to bins
    if opts.job_list is not None:
        if opts.job_list == []:
            opts.job_list = [
                'maxdist', 'upfreq', 'lowfreq', 'scale', 'dcutoff'
            ]
    try:
        opts.ori_beg = opts.beg
        opts.ori_end = opts.end
        opts.beg = int(float(opts.beg) / opts.reso)
        opts.end = int(float(opts.end) / opts.reso)
        if opts.end - opts.beg <= 2:
            raise Exception('"beg" and "end" parameter should be given in ' +
                            'genomic coordinates, not bin')
    except TypeError:
        pass

    # turn options into lists
    def _load_range(range_str, num=float):
        try:
            beg, end, step = map(num, range_str[0].split(':'))
            return tuple(arange(beg, end + step, step))
        except (AttributeError, ValueError):
            return tuple([num(v) for v in range_str])

    opts.scale = _load_range(opts.scale)
    opts.maxdist = _load_range(opts.maxdist, num=int)
    opts.upfreq = _load_range(opts.upfreq)
    opts.lowfreq = _load_range(opts.lowfreq)
    opts.dcutoff = _load_range(opts.dcutoff)

    opts.nmodels_run = opts.nmodels_run or opts.nmodels

    if opts.matrix:
        opts.matrix = path.abspath(opts.matrix)
    opts.workdir = path.abspath(opts.workdir)

    mkdir(opts.workdir)
    if 'tmpdb' in opts and opts.tmpdb:
        dbdir = opts.tmpdb
        # tmp file
        dbfile = 'trace_%s' % (''.join(
            [ascii_letters[int(random() * 52)] for _ in range(10)]))
        opts.tmpdb = path.join(dbdir, dbfile)
コード例 #2
0
ファイル: tadbit_parse.py プロジェクト: lelou6666/TADbit
def check_options(opts):

    if not opts.workdir: raise Exception('ERROR: output option required.')
    if opts.type != 'map':
        raise NotImplementedError('ERROR: not yet there')

    if not opts.genome: raise Exception('ERROR: genome parameter required.')
    if not opts.workdir: raise Exception('ERROR: workdir parameter required.')

    # check skip
    if not path.exists(opts.workdir) and opts.skip:
        print ('WARNING: can use output files, found, not skipping...')
        opts.skip = False

    if opts.workdir.endswith('/'):
        opts.workdir = opts.workdir[:-1]

    # write log
    log_format = '[PARSING]   %(message)s'

    # reset logging
    logging.getLogger().handlers = []

    try:
        print 'Writting log to ' + path.join(opts.workdir, 'process.log')
        logging.basicConfig(level=logging.INFO,
                            format=log_format,
                            filename=path.join(opts.workdir, 'process.log'),
                            filemode='aw')
    except IOError:
        logging.basicConfig(level=logging.DEBUG,
                            format=log_format,
                            filename=path.join(opts.workdir, 'process.log2'),
                            filemode='aw')

    # to display log on stdout also
    logging.getLogger().addHandler(logging.StreamHandler())

    # write version log
    vlog_path = path.join(opts.workdir, 'TADbit_and_dependencies_versions.log')
    dependencies = get_dependencies_version()
    if not path.exists(vlog_path) or open(vlog_path).readlines() != dependencies:
        logging.info('Writting versions of TADbit and dependencies')
        vlog = open(vlog_path, 'w')
        vlog.write(dependencies)
        vlog.close()

    # check if job already run using md5 digestion of parameters
    if already_run(opts):
        exit('WARNING: exact same job already computed, see JOBs table above')
コード例 #3
0
def check_options(opts):
    if not path.exists(opts.workdir):
        mkdir(opts.workdir)
        # write version log
        vlog_path = path.join(opts.workdir,
                              'TADbit_and_dependencies_versions.log')
        dependencies = get_dependencies_version()
        if not path.exists(
                vlog_path) or open(vlog_path).readlines() != dependencies:
            logging.info('Writing versions of TADbit and dependencies')
            vlog = open(vlog_path, 'w')
            vlog.write(dependencies)
            vlog.close()

    mkdir(path.join(opts.workdir, '03_filtered_reads'))

    # create empty DB if don't exists
    dbpath = path.join(opts.workdir, 'trace.db')
    open(dbpath, 'a').close()

    # for LUSTRE file system....
    if 'tmpdb' in opts and opts.tmpdb:
        dbdir = opts.tmpdb
        # tmp file
        dbfile = 'trace_%s' % (''.join(
            [ascii_letters[int(random() * 52)] for _ in range(10)]))
        opts.tmpdb = path.join(dbdir, dbfile)
        try:
            copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb)
        except IOError:
            pass

    # number of cpus
    if opts.cpus == 0:
        opts.cpus = cpu_count()
    else:
        opts.cpus = min(opts.cpus, cpu_count())
コード例 #4
0
ファイル: tadbit_parse.py プロジェクト: aescrdni/TADbit
def check_options(opts):

    if not opts.workdir: raise Exception('ERROR: output option required.')
    if opts.type != 'map':
        raise NotImplementedError('ERROR: not yet there')

    if not opts.genome: raise Exception('ERROR: genome parameter required.')
    if not opts.workdir: raise Exception('ERROR: workdir parameter required.')

    # check skip
    if not path.exists(opts.workdir) and opts.skip:
        print('WARNING: can use output files, found, not skipping...')
        opts.skip = False

    if opts.workdir.endswith('/'):
        opts.workdir = opts.workdir[:-1]

    # write log
    newbie = False
    if not path.exists(opts.workdir):
        newbie = True
        mkdir(opts.workdir)
    log_format = '[PARSING]   %(message)s'

    # reset logging
    logging.getLogger().handlers = []

    try:
        print('Writing log to ' + path.join(opts.workdir, 'process.log'))
        logging.basicConfig(level=logging.INFO,
                            format=log_format,
                            filename=path.join(opts.workdir, 'process.log'),
                            filemode='a+')
    except IOError:
        logging.basicConfig(level=logging.DEBUG,
                            format=log_format,
                            filename=path.join(opts.workdir, 'process.log2'),
                            filemode='a+')

    # to display log on stdout also
    logging.getLogger().addHandler(logging.StreamHandler())

    # write version log
    vlog_path = path.join(opts.workdir, 'TADbit_and_dependencies_versions.log')
    dependencies = get_dependencies_version()
    if not path.exists(
            vlog_path) or open(vlog_path).readlines() != dependencies:
        logging.info('Writing versions of TADbit and dependencies')
        vlog = open(vlog_path, 'w')
        vlog.write(dependencies)
        vlog.close()

    # for lustre file system....
    if 'tmpdb' in opts and opts.tmpdb:
        dbdir = opts.tmpdb
        # tmp file
        dbfile = 'trace_%s' % (''.join(
            [ascii_letters[int(random() * 52)] for _ in range(10)]))
        opts.tmpdb = path.join(dbdir, dbfile)
        try:
            copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb)
        except IOError:
            pass

    # check if job already run using md5 digestion of parameters
    try:
        if already_run(opts):
            if 'tmpdb' in opts and opts.tmpdb:
                remove(path.join(dbdir, dbfile))
            exit(
                'WARNING: exact same job already computed, see JOBs table above'
            )
    except OSError:
        pass
コード例 #5
0
def check_options(opts):

    if not opts.mapper_binary:
        if opts.mapper == 'gem':
            opts.mapper_binary = 'gem-mapper'
        else:
            opts.mapper_binary = opts.mapper
    opts.mapper_binary = which(opts.mapper_binary)
    if not opts.mapper_binary:
        raise Exception(
            '\n\nERROR: Mapper binary not found, for GEM install it from:'
            '\nhttps://sourceforge.net/projects/gemlibrary/files/gem-library/Binary%20pre-release%202/'
            '\n - Download the GEM-binaries-Linux-x86_64-core_i3 if'
            'have a recent computer, the '
            'GEM-binaries-Linux-x86_64-core_2 otherwise\n - '
            'Uncompress with "tar xjvf GEM-binaries-xxx.tbz2"\n - '
            'Copy the binary gem-mapper to /usr/local/bin/ for '
            'example (somewhere in your PATH).\n\nNOTE: GEM does '
            'not provide any binary for MAC-OS.')

    opts.gem_version = 0
    if opts.mapper == 'gem':
        opts.gem_version = None
        try:
            out, _ = Popen([opts.mapper_binary, '--version'],
                           stdout=PIPE,
                           stderr=STDOUT,
                           universal_newlines=True).communicate()
            opts.gem_version = int(out[1])
        except ValueError as e:
            opts.gem_version = 2
            print('Falling to gem v2')

    if opts.fast_fragment:
        if opts.gem_version < 3:
            raise Exception('ERROR: Fast fragment mapping needs GEM v3')
        if not opts.fastq2 or not path.exists(opts.fastq2):
            raise Exception(
                'ERROR: Fast fragment mapping needs both fastq files. '
                'Please specify --fastq2')
        if opts.read != 0:
            raise Exception(
                'ERROR: Fast fragment mapping needs to be specified with --read 0'
            )
        if not opts.genome:
            raise Exception('ERROR: Fast fragment mapping needs '
                            'the genome parameter.')
    # check RE name
    if opts.renz == ['CHECK']:
        print('\nSearching for most probable restriction enzyme in file: %s' %
              (opts.fastq))
        try:
            pat, enz, pv = identify_re(opts.fastq, nreads=100000)
            print(' -> Most probable digested site: %s (pv: %f)' % (pat, pv))
            print(' -> Enzymes matching: %s' % (', '.join(enz)))
        except ValueError:
            print(' -> Nothing found...')
        exit()
    for n, renz in enumerate(opts.renz):
        if renz == 'NONE':
            opts.renz[n] = None
            continue
        try:
            _ = RESTRICTION_ENZYMES[renz]
        except KeyError:
            print('\n\nERROR: restriction enzyme %s not found.' % (renz) +
                  'Use one of:\n\n' + ' '.join(sorted(RESTRICTION_ENZYMES)) +
                  '\n\n')
            raise KeyError()
        except AttributeError:
            pass

    # check skip
    if not path.exists(opts.workdir) and opts.skip:
        print('WARNING: can use output files, found, not skipping...')
        opts.skip = False

    # number of cpus
    if opts.cpus == 0:
        opts.cpus = cpu_count()
    else:
        opts.cpus = min(opts.cpus, cpu_count())

    # check paths
    if opts.mapper == 'gem' and not path.exists(opts.index):
        raise IOError('ERROR: index file not found at ' + opts.index)

    if not path.exists(opts.fastq):
        raise IOError('ERROR: FASTQ file not found at ' + opts.fastq)

    if not is_fastq(opts.fastq):
        raise IOError(
            ('ERROR: FASTQ file %s wrong format, check') % (opts.fastq))

    try:
        opts.windows = [[int(i) for i in win.split(':')]
                        for win in opts.windows]
    except TypeError:
        pass

    mkdir(opts.workdir)
    # write log
    # if opts.mapping_only:
    log_format = '[MAPPING {} READ{}]   %(message)s'.format(
        opts.fastq, opts.read)
    # else:
    #     log_format = '[DEFAULT]   %(message)s'

    # reset logging
    logging.getLogger().handlers = []

    try:
        print('Writing log to ' + path.join(opts.workdir, 'process.log'))
        logging.basicConfig(level=logging.INFO,
                            format=log_format,
                            filename=path.join(opts.workdir, 'process.log'),
                            filemode='a+')
    except IOError:
        logging.basicConfig(level=logging.DEBUG,
                            format=log_format,
                            filename=path.join(opts.workdir, 'process.log2'),
                            filemode='a+')

    # to display log on stdout also
    logging.getLogger().addHandler(logging.StreamHandler())

    # write version log
    vlog_path = path.join(opts.workdir, 'TADbit_and_dependencies_versions.log')
    dependencies = get_dependencies_version()
    if not path.exists(
            vlog_path) or open(vlog_path).readlines() != dependencies:
        logging.info('Writing versions of TADbit and dependencies')
        vlog = open(vlog_path, 'w')
        vlog.write(dependencies)
        vlog.close()

    # check mapper extra options
    if opts.mapper_param:
        if (len(opts.mapper_param) == 1 and
            ('-' in opts.mapper_param[0] or '--' in opts.mapper_param[0])):
            # Single string surrounded by quotes
            opts.mapper_param = opts.mapper_param[0].split()
        else:
            opts.mapper_param = dict([o.split(':') for o in opts.mapper_param])
    else:
        opts.mapper_param = {}
    if opts.mapper == 'gem' and opts.gem_version < 3:
        gem_valid_option = set([
            "granularity", "q", "quality-format", "gem-quality-threshold",
            "mismatch-alphabet", "m", "e", "min-matched-bases",
            "max-big-indel-length", "s", "strata-after-best", "fast-mapping",
            "unique-mapping", "d", "D", "allow-incomplete-strata",
            "max-decoded-matches", "min-decoded-strata", "p",
            "paired-end-alignment", "b", "map-both-ends", "min-insert-size",
            "max-insert-size", "E", "max-extendable-matches",
            "max-extensions-per-match", "unique-pairing"
        ])
        for k in opts.mapper_param:
            if not k in gem_valid_option:
                raise NotImplementedError(
                    ('ERROR: option "%s" not a valid GEM option'
                     'or not suported by this tool.') % k)

    # create empty DB if don't exists
    dbpath = path.join(opts.workdir, 'trace.db')
    open(dbpath, 'a').close()

    # for lustre file system....
    if 'tmpdb' in opts and opts.tmpdb:
        dbdir = opts.tmpdb
        # tmp file
        dbfile = 'trace_%s' % (''.join(
            [ascii_letters[int(random() * 52)] for _ in range(10)]))
        opts.tmpdb = path.join(dbdir, dbfile)
        try:
            copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb)
        except IOError:
            pass

    # check if job already run using md5 digestion of parameters
    if already_run(opts):
        if 'tmpdb' in opts and opts.tmpdb:
            remove(path.join(dbdir, dbfile))
        exit('WARNING: exact same job already computed, see JOBs table above')
コード例 #6
0
def check_options(opts):
    if opts.cfg:
        get_options_from_cfg(opts.cfg, opts)

    opts.gem_binary = which(opts.gem_binary)
    if not opts.gem_binary:
        raise Exception('\n\nERROR: GEM binary not found, install it from:'
                        '\nhttps://sourceforge.net/projects/gemlibrary/files/gem-library/Binary%20pre-release%202/'
                        '\n - Download the GEM-binaries-Linux-x86_64-core_i3 if'
                        'have a recent computer, the '
                        'GEM-binaries-Linux-x86_64-core_2 otherwise\n - '
                        'Uncompress with "tar xjvf GEM-binaries-xxx.tbz2"\n - '
                        'Copy the binary gem-mapper to /usr/local/bin/ for '
                        'example (somewhere in your PATH).\n\nNOTE: GEM does '
                        'not provide any binary for MAC-OS.')

    # check RE name
    try:
        _ = RESTRICTION_ENZYMES[opts.renz]
    except KeyError:
        print ('\n\nERROR: restriction enzyme not found. Use one of:\n\n'
               + ' '.join(sorted(RESTRICTION_ENZYMES)) + '\n\n')
        raise KeyError()
    except AttributeError:
        pass

    # check skip
    if not path.exists(opts.workdir) and opts.skip:
        print ('WARNING: can use output files, found, not skipping...')
        opts.skip = False

    # number of cpus
    if opts.cpus == 0:
        opts.cpus = cpu_count()
    else:
        opts.cpus = min(opts.cpus, cpu_count())

    # check paths
    if not path.exists(opts.index):
        raise IOError('ERROR: index file not found at ' + opts.index)

    if not path.exists(opts.fastq):
        raise IOError('ERROR: FASTQ file not found at ' + opts.fastq)
    
    # create tmp directory
    if not opts.tmp:
        opts.tmp = opts.workdir + '_tmp_r%d' % opts.read

    try:
        opts.windows = [[int(i) for i in win.split(':')]
                        for win in opts.windows]
    except TypeError:
        pass
        
    mkdir(opts.workdir)
    # write log
    # if opts.mapping_only:
    log_format = '[MAPPING {} READ{}]   %(message)s'.format(opts.fastq, opts.read)
    # else:
    #     log_format = '[DEFAULT]   %(message)s'

    # reset logging
    logging.getLogger().handlers = []

    try:
        print 'Writing log to ' + path.join(opts.workdir, 'process.log')
        logging.basicConfig(level=logging.INFO,
                            format=log_format,
                            filename=path.join(opts.workdir, 'process.log'),
                            filemode='aw')
    except IOError:
        logging.basicConfig(level=logging.DEBUG,
                            format=log_format,
                            filename=path.join(opts.workdir, 'process.log2'),
                            filemode='aw')

    # to display log on stdout also
    logging.getLogger().addHandler(logging.StreamHandler())

    # write version log
    vlog_path = path.join(opts.workdir, 'TADbit_and_dependencies_versions.log')
    dependencies = get_dependencies_version()
    if not path.exists(vlog_path) or open(vlog_path).readlines() != dependencies:
        logging.info('Writing versions of TADbit and dependencies')
        vlog = open(vlog_path, 'w')
        vlog.write(dependencies)
        vlog.close()

    # check GEM mapper extra options
    if opts.gem_param:
        opts.gem_param = dict([o.split(':') for o in opts.gem_param])
    else:
        opts.gem_param = {}
    gem_valid_option = set(["granularity", "q", "quality-format",
                            "gem-quality-threshold", "mismatch-alphabet",
                            "m", "e", "min-matched-bases",
                            "max-big-indel-length", "s", "strata-after-best",
                            "fast-mapping", "unique-mapping", "d", "D",
                            "allow-incomplete-strata", "max-decoded-matches",
                            "min-decoded-strata", "p", "paired-end-alignment",
                            "b", "map-both-ends", "min-insert-size",
                            "max-insert-size", "E", "max-extendable-matches",
                            "max-extensions-per-match", "unique-pairing"])
    for k in opts.gem_param:
        if not k in gem_valid_option:
            raise NotImplementedError(('ERROR: option "%s" not a valid GEM option'
                                       'or not suported by this tool.') % k)
    # check if job already run using md5 digestion of parameters
    if already_run(opts):
        exit('WARNING: exact same job already computed, see JOBs table above')
コード例 #7
0
ファイル: model_and_analyze.py プロジェクト: jaquol/tadbit
def get_options():
    """
    parse option from call

    """
    parser = ArgumentParser(
        usage="%(prog)s [options] [--cfg CONFIG_PATH]",
        formatter_class=lambda prog: HelpFormatter(prog, width=95,
                                                   max_help_position=27))
    glopts = parser.add_argument_group('General arguments')
    taddet = parser.add_argument_group('TAD detection arguments')
    optimo = parser.add_argument_group('Optimization of IMP arguments')
    modelo = parser.add_argument_group('Modeling with optimal IMP arguments')
    descro = parser.add_argument_group('Descriptive, optional arguments')
    analyz = parser.add_argument_group('Output arguments')

    ## Define analysis actions:
    actions = {0  : "do nothing",
               1  : "column filtering",
               2  : "TAD borders",
               3  : "TAD alignment",
               4  : "optimization plot",
               5  : "correlation real/models",
               6  : "z-score plot",
               7  : "constraints",
               8  : "objective function",
               9  : "centroid",
               10 : "consistency",
               11 : "density",
               12 : "contact map",
               13 : "walking angle",
               14 : "persistence length",
               15 : "accessibility",
               16 : "interaction"}

    parser.add_argument('--usage', dest='usage', action="store_true",
                        default=False,
                        help='''show detailed usage documentation, with examples
                        and exit''')
    parser.add_argument('--cfg', dest='cfg', metavar="PATH", action='store',
                      default=None, type=str,
                      help='path to a configuration file with predefined ' +
                      'parameters')
    parser.add_argument('--analyze_only', dest='analyze_only',
                        action='store_true', default=False,
                        help=('load precomputed models in outdir, ' +
                              'skip optimization, modeling'))
    parser.add_argument('--optimize_only', dest='optimize_only', default=False,
                        action='store_true',
                        help='do the optimization of the region and exit')
    parser.add_argument('--tad_only', dest='tad_only', action="store_true",
                        default=False,
                        help='[%(default)s] exit after searching for TADs')
    parser.add_argument('--ncpus', dest='ncpus', metavar="INT", default=1,
                        type=int, help='[%(default)s] Number of CPUs to use')

    #########################################
    # GENERAL
    glopts.add_argument(
        '--root_path', dest='root_path', metavar="PATH",
        default='', type=str,
        help=('path to search for data files (just pass file name' +
              'in "data")'))
    glopts.add_argument('--data', dest='data', metavar="PATH", nargs='+',
                        type=str,
                        help='''path to file(s) with Hi-C data matrix. If many,
                        experiments will be summed up. I.e.: --data
                        replicate_1.txt replicate_2.txt''')
    glopts.add_argument('--xname', dest='xname', metavar="STR", nargs='+',
                        default=[], type=str,
                        help='''[file name] experiment name(s). Use same order
                        as data.''')
    glopts.add_argument('--norm', dest='norm', metavar="PATH", nargs='+',
                        type=str,
                        help='path to file(s) with normalizedHi-C data matrix.')
    glopts.add_argument('--nodiag', dest='nodiag', action='store_true',
                        help='''If the matrix does not contain self interacting
                        bins (only zeroes in the diagonal)''')
    glopts.add_argument('--filt', dest='filt', metavar='INT', default=90,
                        help='''Filter out column with more than a given
                        percentage of zeroes''')
    glopts.add_argument('--crm', dest='crm', metavar="NAME",
                        help='chromosome name')
    glopts.add_argument('--beg', dest='beg', metavar="INT", type=float,
                        default=None,
                        help='genomic coordinate from which to start modeling')
    glopts.add_argument('--end', dest='end', metavar="INT", type=float,
                        help='genomic coordinate where to end modeling')
    glopts.add_argument('--res', dest='res', metavar="INT", type=int,
                        help='resolution of the Hi-C experiment')
    glopts.add_argument('--outdir', dest='outdir', metavar="PATH",
                        default=None,
                        help='out directory for results')

    #########################################
    # TADs
    taddet.add_argument('--tad', dest='tad', action="store_true", default=False,
                        help='[%(default)s] search for TADs in experiments')
    taddet.add_argument('--centromere', dest='centromere', action="store_true",
                        default=False,
                        help='[%(default)s] search for centromeric region')
    taddet.add_argument('--group', dest='group', nargs='+', type=int,
                        default=0, metavar='INT',
                        help='''[all together] How to group Hi-C experiments for
                        the detection of TAD borders. I.e.: "--exp_group 2 2 1"
                        first 2 experiments used together, next 2 also, and last
                        alone (batch_mode option used)''')

    #########################################
    # MODELING
    modelo.add_argument('--nmodels_mod', dest='nmodels_mod', metavar="INT",
                        default='5000', type=int,
                        help=('[%(default)s] number of models to generate for' +
                              ' modeling'))
    modelo.add_argument('--nkeep_mod', dest='nkeep_mod', metavar="INT",
                        default='1000', type=int,
                        help=('[%(default)s] number of models to keep for ' +
                        'modeling'))

    #########################################
    # OPTIMIZATION
    optimo.add_argument('--maxdist', action='store', metavar="LIST",
                        default='400', dest='maxdist',
                        help='range of numbers for maxdist' +
                        ', i.e. 400:1000:100 -- or just a number')
    optimo.add_argument('--upfreq', dest='upfreq', metavar="LIST",
                        default='0',
                        help='range of numbers for upfreq' +
                        ', i.e. 0:1.2:0.3 --  or just a number')
    optimo.add_argument('--lowfreq', dest='lowfreq', metavar="LIST",
                        default='0',
                        help='range of numbers for lowfreq' +
                        ', i.e. -1.2:0:0.3 -- or just a number')
    optimo.add_argument('--scale', dest='scale', metavar="LIST",
                        default="0.01",
                        help='[%(default)s] range of numbers to be test as ' +
                        'optimal scale value, i.e. 0.005:0.01:0.001 -- Can ' +
                        'also pass only one number')
    optimo.add_argument('--dcutoff', dest='dcutoff', metavar="LIST",
                        default="2",
                        help='[%(default)s] range of numbers to be test as ' +
                        'optimal distance cutoff parameter (distance, in ' +
                        'number of beads, from which to consider 2 beads as ' +
                        'being close), i.e. 1:5:0.5 -- Can also pass only one' +
                        ' number')
    optimo.add_argument('--nmodels_opt', dest='nmodels_opt', metavar="INT",
                        default='500', type=int,
                        help='[%(default)s] number of models to generate for ' +
                        'optimization')
    optimo.add_argument('--nkeep_opt', dest='nkeep_opt', metavar="INT",
                        default='100', type=int,
                        help='[%(default)s] number of models to keep for ' +
                        'optimization')
    optimo.add_argument('--force_opt', dest='optimize_from_scratch',
                        action="store_true", default=False,
                        help='''[%(default)s] do not take into account previous
                        optimizations. Usefull for running in parallel in a
                        cluster for example.''')

    #########################################
    # DESCRIPTION
    descro.add_argument('--species', dest='species', metavar="STRING",
                        default='UNKNOWN',
                        help='species name, with no spaces, i.e.: homo_sapiens')
    descro.add_argument('--cell', dest='cell', metavar="STRING",
                        help='cell type name')
    descro.add_argument('--exp_type', dest='exp_type', metavar="STRING",
                        help='experiment type name (i.e.: Hi-C)')
    descro.add_argument('--assembly', dest='assembly', metavar="STRING",
                        default=None,
                        help='''NCBI ID of the original assembly
                        (i.e.: NCBI36 for human)''')
    descro.add_argument('--enzyme', dest='enzyme', metavar="STRING",
                        default=None,
                        help='''name of the enzyme used to digest
                        chromatin (i.e. HindIII)''')
    descro.add_argument('--identifier', dest='identifier', metavar="STRING",
                        default=None,
                        help='''NCBI identifier of the experiment''')
    descro.add_argument('--project', dest='project', metavar="STRING",
                        default=None,
                        help='''project name''')


    #########################################
    # OUTPUT
    analyz.add_argument('--analyze', dest='analyze', nargs='+',
                        choices=range(len(actions)), type=int,
                        default=range(2, len(actions)), metavar='INT',
                        help=('''[%s] list of numbers representing the
                        analysis to be done. Choose between:
                        %s''' % (' '.join([str(i) for i in range(
                                  2, len(actions))]),
                                 '\n'.join(['%s) %s' % (k, actions[k])
                                            for k in actions]))))
    analyz.add_argument('--not_write_cmm', dest='not_write_cmm',
                        default=False, action='store_true',
                        help='''[%(default)s] do not generate cmm files for each
                        model (Chimera input)''')
    analyz.add_argument('--not_write_xyz', dest='not_write_xyz',
                        default=False, action='store_true',
                        help='''[%(default)s] do not generate xyz files for each
                        model (3D coordinates)''')

    parser.add_argument_group(optimo)
    parser.add_argument_group(modelo)
    parser.add_argument_group(descro)
    parser.add_argument_group(analyz)
    opts = parser.parse_args()


    if opts.usage:
        print __doc__
        exit()

    log = '\tSummary of arguments:\n'
    # merger opts with CFG file and write summary
    args = reduce(lambda x, y: x + y, [i.strip('-').split('=')
                                       for i in sys.argv])
    new_opts = {}
    if opts.cfg:
        for line in open(opts.cfg):
            if not '=' in line:
                continue
            if line.startswith('#'):
                continue
            key, value = line.split('#')[0].strip().split('=')
            key = key.strip()
            value = value.strip()
            if value == 'True':
                value = True
            elif value == 'False':
                value = False
            elif key in ['data', 'norm', 'xname', 'group', 'analyze']:
                new_opts.setdefault(key, []).extend(value.split())
                continue
            new_opts[key] = value
    # bad key in configuration file
    for bad_k in set(new_opts.keys()) - set(opts.__dict__.keys()):
        sys.stderr.write('WARNING: parameter "%s" not recognized' % (bad_k))
    for key in sorted(opts.__dict__.keys()):
        if key in args:
            log += '  * Command setting   %13s to %s\n' % (
                key, opts.__dict__[key])
        elif key in new_opts:
            opts.__dict__[key] = new_opts[key]
            log += '  - Config. setting   %13s to %s\n' % (
                key, new_opts[key])
        else:
            log += '  o Default setting   %13s to %s\n' % (
                key, opts.__dict__[key])

    # rename analysis actions
    for i, j in enumerate(opts.analyze):
        opts.analyze[i] = actions[int(j)]

    if not opts.data and not opts.norm:
        sys.stderr.write('MISSING data')
        exit(parser.print_help())
    if not opts.outdir:
        sys.stderr.write('MISSING outdir')
        exit(parser.print_help())
    if not opts.crm:
        sys.stderr.write('MISSING crm NAME')
        exit(parser.print_help())
    if not opts.res:
        sys.stderr.write('MISSING resolution')
        exit(parser.print_help())
    if not opts.analyze_only:
        if not opts.maxdist:
            sys.stderr.write('MISSING maxdist')
            exit(parser.print_help())
        if not opts.lowfreq:
            sys.stderr.write('MISSING lowfreq')
            exit(parser.print_help())
        if not opts.upfreq:
            sys.stderr.write('MISSING upfreq')
            exit(parser.print_help())

    if not opts.beg and not opts.tad_only:
        sys.stderr.write('WARNING: no begin coordinate given all')
    if not opts.end and not opts.tad_only:
        sys.stderr.write('WARNING: no begin coordinate given all')

    # groups for TAD detection
    if not opts.data:
        opts.data = [None] * len(opts.norm)
    else:
        opts.norm = [None] * len(opts.data)
    if not opts.group:
        opts.group = [len(opts.data)]
    else:
        opts.group = [int(i) for i in opts.group]

    if sum(opts.group) > len(opts.data):
        logging.info('ERROR: Number of experiments in groups larger than ' +
                     'the number of Hi-C data files given.')
        exit()

    # this options should stay as this now
    # opts.scale = '0.01'

    # switch to number
    opts.nmodels_mod = int(opts.nmodels_mod)
    opts.nkeep_mod   = int(opts.nkeep_mod  )
    opts.nmodels_opt = int(opts.nmodels_opt)
    opts.nkeep_opt   = int(opts.nkeep_opt  )
    opts.ncpus       = int(opts.ncpus      )
    opts.res         = int(opts.res        )

    # TODO: UNDER TEST
    opts.container   = None #['cylinder', 1000, 5000, 100]

    # do the division to bins
    if not opts.tad_only:
        try:
            opts.beg = int(float(opts.beg) / opts.res)
            opts.end = int(float(opts.end) / opts.res)
            if opts.end - opts.beg <= 2:
                raise Exception('"beg" and "end" parameter should be given in ' +
                                'genomic coordinates, not bin')
        except TypeError:
            pass

    # Create out-directory
    name = '{0}_{1}_{2}'.format(opts.crm, opts.beg, opts.end)
    if not os.path.exists(os.path.join(opts.outdir, name)):
        os.makedirs(os.path.join(opts.outdir, name))

    # write version log
    if not os.path.exists(os.path.join(opts.outdir,
                                       'TADbit_and_dependencies_versions.log')):
        vlog = os.path.join(opts.outdir, 'TADbit_and_dependencies_versions.log')
        vlog = open(vlog, 'w')
        vlog.write(get_dependencies_version())
        vlog.close()

    # write log
    if opts.optimize_only:
        log_format = '[OPTIMIZATION {}_{}_{}_{}_{}]   %(message)s'.format(
            opts.maxdist, opts.upfreq, opts.lowfreq, opts.scale, opts.dcutoff)
    elif opts.analyze_only:
        log_format = '[ANALYZE]   %(message)s'
    elif opts.tad_only:
        log_format = '[TAD]   %(message)s'
    else:
        log_format = '[DEFAULT]   %(message)s'
    try:
        logging.basicConfig(filename=os.path.join(opts.outdir, name, name + '.log'),
                            level=logging.INFO, format=log_format)
    except IOError:
        logging.basicConfig(filename=os.path.join(opts.outdir, name, name + '.log2'),
                            level=logging.INFO, format=log_format)
    logging.getLogger().addHandler(logging.StreamHandler())
    logging.info(('\n' + log_format.replace('   %(message)s', '')
                  ).join(log.split('\n')))

    # update path to Hi-C data adding root directory
    if opts.root_path and opts.data[0]:
        for i in xrange(len(opts.data)):
            logging.info(os.path.join(opts.root_path, opts.data[i]))
            opts.data[i] = os.path.join(opts.root_path, opts.data[i])

    # update path to Hi-C norm adding root directory
    if opts.root_path and opts.norm[0]:
        for i in xrange(len(opts.norm)):
            logging.info(os.path.join(opts.root_path, opts.norm[i]))
            opts.norm[i] = os.path.join(opts.root_path, opts.norm[i])

    return opts
コード例 #8
0
def get_options():
    """
    parse option from call
    """
    parser = ArgumentParser(usage="%(prog)s [options] [--cfg CONFIG_PATH]",
                            formatter_class=lambda prog: HelpFormatter(
                                prog, width=95, max_help_position=27))

    glopts = parser.add_argument_group('General options')
    mapper = parser.add_argument_group('Mapping options')
    descro = parser.add_argument_group('Descriptive, optional arguments')

    glopts.add_argument('--cfg',
                        dest='cfg',
                        metavar="PATH",
                        action='store',
                        default=None,
                        type=str,
                        help='path to a configuration file with predefined ' +
                        'parameters')

    glopts.add_argument('--qc_plot',
                        dest='quality_plot',
                        action='store_true',
                        default=False,
                        help='generate a quality plot of FASTQ and exits')

    glopts.add_argument('-o',
                        '--output',
                        dest='output',
                        metavar="PATH",
                        action='store',
                        default=None,
                        type=str,
                        help='path to output folder')

    glopts.add_argument('--fastq',
                        dest='fastq',
                        metavar="PATH",
                        action='store',
                        default=None,
                        type=str,
                        help='path to a FASTQ files (can be compressed files)')

    glopts.add_argument('--genome',
                        dest='genome',
                        metavar="PATH",
                        nargs='+',
                        type=str,
                        help='''paths to file(s) with FASTA files of the
                        reference genome. If many, files will be concatenated.
                        I.e.: --fasta chr_1.fa chr_2.fa
                        In this last case, order is important or the rest of the
                        analysis.''')

    glopts.add_argument(
        '--index',
        dest='index',
        metavar="PATH",
        type=str,
        help='''paths to file(s) with indexed FASTA files of the
                        reference genome.''')

    glopts.add_argument('--read',
                        dest='read',
                        metavar="INT",
                        type=str,
                        help='read number')

    glopts.add_argument('--renz',
                        dest='renz',
                        metavar="STR",
                        type=str,
                        help='restriction enzyme name')

    glopts.add_argument('--chr_name',
                        dest='chr_name',
                        metavar="STR",
                        nargs='+',
                        default=[],
                        type=str,
                        help='''[fasta header] chromosome name(s). Used in the
                        same order as data.''')

    glopts.add_argument('--tmp',
                        dest='tmp',
                        metavar="PATH",
                        action='store',
                        default=None,
                        type=str,
                        help='''path to a temporary directory (default next to
                      output directory)''')

    mapper.add_argument('--strategy',
                        dest='strategy',
                        default='frag',
                        choices=['frag', 'iter'],
                        help='''mapping strategy, can be "frag" for fragment
                        based mapping or "iter" for iterative mapping''')

    mapper.add_argument('--windows',
                        dest='windows',
                        default='auto',
                        nargs='+',
                        help='''for iterative mapping, defines windows. e.g.
                        --windows 20 25 30 35 40 45 50''')

    mapper.add_argument(
        '--read_length',
        dest='read_length',
        type=int,
        help='''read length, compulsory in iterative mapping with
                        --windows auto''')

    mapper.add_argument('--mapping_only',
                        dest='mapping_only',
                        action='store_true',
                        help='only do the mapping does not parse results')

    descro.add_argument('--species',
                        dest='species',
                        metavar="STR",
                        type=str,
                        help='species name')

    descro.add_argument(
        '--descr',
        dest='description',
        metavar="LIST",
        nargs='+',
        type=str,
        help='''extra descriptive fields each filed separated by
                        coma, and inside each, name and value separated by column: 
                        --descr=cell:lymphoblast,flowcell:C68AEACXX,index:24nf'''
    )

    parser.add_argument_group(glopts)
    parser.add_argument_group(descro)
    parser.add_argument_group(mapper)
    opts = parser.parse_args()

    if opts.cfg:
        get_options_from_cfg(opts.cfg, opts)

    if (opts.strategy == 'iter' and opts.window == 'auto'
            and not opts.read_length):
        raise Exception('ERROR: need to input read_length')
    # check RE name
    try:
        _ = RESTRICTION_ENZYMES[opts.renz]
    except KeyError:
        print('\n\nERROR: restriction enzyme not found. Use one of:\n\n' +
              ' '.join(sorted(RESTRICTION_ENZYMES)) + '\n\n')
        raise KeyError()

    # check compulsory options
    if not opts.quality_plot:
        if not opts.genome: raise Exception('ERROR: genome option required.')
        if not opts.index: raise Exception('ERROR: index  option required.')
    if not opts.output: raise Exception('ERROR: output option required.')
    if not opts.fastq: raise Exception('ERROR: fastq  option required.')
    if not opts.renz: raise Exception('ERROR: renz   option required.')
    if not opts.tmp:
        opts.tmp = opts.output + '_tmp_r' + opts.read

    if opts.strategy == 'frag':
        opts.windows = None

    if opts.strategy == 'iter':
        raise NotImplementedError()

    system('mkdir -p ' + opts.output)
    # write log
    if opts.mapping_only:
        log_format = '[MAPPING {} READ{}]   %(message)s'.format(
            opts.fastq, opts.read)
    else:
        log_format = '[DEFAULT]   %(message)s'

    # reset logging
    logging.getLogger().handlers = []

    try:
        print 'Writting log to ' + path.join(opts.output, 'process.log')
        logging.basicConfig(level=logging.INFO,
                            format=log_format,
                            filename=path.join(opts.output, 'process.log'),
                            filemode='aw')
    except IOError:
        logging.basicConfig(level=logging.DEBUG,
                            format=log_format,
                            filename=path.join(opts.output, 'process.log2'),
                            filemode='aw')

    # to display log on stdout also
    logging.getLogger().addHandler(logging.StreamHandler())

    # write version log
    vlog_path = path.join(opts.output, 'TADbit_and_dependencies_versions.log')
    dependencies = get_dependencies_version()
    if not path.exists(
            vlog_path) or open(vlog_path).readlines() != dependencies:
        logging.info('Writting versions of TADbit and dependencies')
        vlog = open(vlog_path, 'w')
        vlog.write(dependencies)
        vlog.close()

    return opts
コード例 #9
0
ファイル: model_and_analyze.py プロジェクト: MarcoDiS/TADbit
def get_options():
    """
    parse option from call

    """
    parser = ArgumentParser(
        usage="%(prog)s [options] [--cfg CONFIG_PATH]",
        formatter_class=lambda prog: HelpFormatter(prog, width=95, max_help_position=27),
    )
    glopts = parser.add_argument_group("General arguments")
    taddet = parser.add_argument_group("TAD detection arguments")
    optimo = parser.add_argument_group("Optimization of IMP arguments")
    modelo = parser.add_argument_group("Modeling with optimal IMP arguments")
    descro = parser.add_argument_group("Descriptive, optional arguments")
    analyz = parser.add_argument_group("Output arguments")

    ## Define analysis actions:
    actions = {
        0: "do nothing",
        1: "column filtering",
        2: "TAD borders",
        3: "TAD alignment",
        4: "optimization plot",
        5: "correlation real/models",
        6: "z-score plot",
        7: "constraints",
        8: "objective function",
        9: "centroid",
        10: "consistency",
        11: "density",
        12: "contact map",
        13: "walking angle",
        14: "persistence length",
        15: "accessibility",
        16: "interaction",
    }

    parser.add_argument(
        "--usage",
        dest="usage",
        action="store_true",
        default=False,
        help="""show detailed usage documentation, with examples
                        and exit""",
    )
    parser.add_argument(
        "--cfg",
        dest="cfg",
        metavar="PATH",
        action="store",
        default=None,
        type=str,
        help="path to a configuration file with predefined " + "parameters",
    )
    parser.add_argument(
        "--analyze_only",
        dest="analyze_only",
        action="store_true",
        default=False,
        help=("load precomputed models in outdir, " + "skip optimization, modeling"),
    )
    parser.add_argument(
        "--optimize_only",
        dest="optimize_only",
        default=False,
        action="store_true",
        help="do the optimization of the region and exit",
    )
    parser.add_argument(
        "--tad_only",
        dest="tad_only",
        action="store_true",
        default=False,
        help="[%(default)s] exit after searching for TADs",
    )
    parser.add_argument(
        "--ncpus", dest="ncpus", metavar="INT", default=1, type=int, help="[%(default)s] Number of CPUs to use"
    )

    #########################################
    # GENERAL
    glopts.add_argument(
        "--root_path",
        dest="root_path",
        metavar="PATH",
        default="",
        type=str,
        help=("path to search for data files (just pass file name" + 'in "data")'),
    )
    glopts.add_argument(
        "--data",
        dest="data",
        metavar="PATH",
        nargs="+",
        type=str,
        help="""path to file(s) with Hi-C data matrix. If many,
                        experiments will be summed up. I.e.: --data
                        replicate_1.txt replicate_2.txt""",
    )
    glopts.add_argument(
        "--xname",
        dest="xname",
        metavar="STR",
        nargs="+",
        default=[],
        type=str,
        help="""[file name] experiment name(s). Use same order
                        as data.""",
    )
    glopts.add_argument(
        "--norm",
        dest="norm",
        metavar="PATH",
        nargs="+",
        type=str,
        help="path to file(s) with normalizedHi-C data matrix.",
    )
    glopts.add_argument(
        "--nodiag",
        dest="nodiag",
        action="store_true",
        help="""If the matrix does not contain self interacting
                        bins (only zeroes in the diagonal)""",
    )
    glopts.add_argument(
        "--filt",
        dest="filt",
        metavar="INT",
        default=90,
        help="""Filter out column with more than a given
                        percentage of zeroes""",
    )
    glopts.add_argument("--crm", dest="crm", metavar="NAME", help="chromosome name")
    glopts.add_argument(
        "--beg",
        dest="beg",
        metavar="INT",
        type=float,
        default=None,
        help="genomic coordinate from which to start modeling",
    )
    glopts.add_argument("--end", dest="end", metavar="INT", type=float, help="genomic coordinate where to end modeling")
    glopts.add_argument("--res", dest="res", metavar="INT", type=int, help="resolution of the Hi-C experiment")
    glopts.add_argument("--outdir", dest="outdir", metavar="PATH", default=None, help="out directory for results")

    #########################################
    # TADs
    taddet.add_argument(
        "--tad", dest="tad", action="store_true", default=False, help="[%(default)s] search for TADs in experiments"
    )
    taddet.add_argument(
        "--centromere",
        dest="centromere",
        action="store_true",
        default=False,
        help="[%(default)s] search for centromeric region",
    )
    taddet.add_argument(
        "--group",
        dest="group",
        nargs="+",
        type=int,
        default=0,
        metavar="INT",
        help="""[all together] How to group Hi-C experiments for
                        the detection of TAD borders. I.e.: "--exp_group 2 2 1"
                        first 2 experiments used together, next 2 also, and last
                        alone (batch_mode option used)""",
    )

    #########################################
    # MODELING
    modelo.add_argument(
        "--nmodels_mod",
        dest="nmodels_mod",
        metavar="INT",
        default="5000",
        type=int,
        help=("[%(default)s] number of models to generate for" + " modeling"),
    )
    modelo.add_argument(
        "--nkeep_mod",
        dest="nkeep_mod",
        metavar="INT",
        default="1000",
        type=int,
        help=("[%(default)s] number of models to keep for " + "modeling"),
    )

    #########################################
    # OPTIMIZATION
    optimo.add_argument(
        "--maxdist",
        action="store",
        metavar="LIST",
        default="400",
        dest="maxdist",
        help="range of numbers for maxdist" + ", i.e. 400:1000:100 -- or just a number",
    )
    optimo.add_argument(
        "--upfreq",
        dest="upfreq",
        metavar="LIST",
        default="0",
        help="range of numbers for upfreq" + ", i.e. 0:1.2:0.3 --  or just a number",
    )
    optimo.add_argument(
        "--lowfreq",
        dest="lowfreq",
        metavar="LIST",
        default="0",
        help="range of numbers for lowfreq" + ", i.e. -1.2:0:0.3 -- or just a number",
    )
    optimo.add_argument(
        "--scale",
        dest="scale",
        metavar="LIST",
        default="0.01",
        help="[%(default)s] range of numbers to be test as "
        + "optimal scale value, i.e. 0.005:0.01:0.001 -- Can "
        + "also pass only one number",
    )
    optimo.add_argument(
        "--dcutoff",
        dest="dcutoff",
        metavar="LIST",
        default="2",
        help="[%(default)s] range of numbers to be test as "
        + "optimal distance cutoff parameter (distance, in "
        + "number of beads, from which to consider 2 beads as "
        + "being close), i.e. 1:5:0.5 -- Can also pass only one"
        + " number",
    )
    optimo.add_argument(
        "--nmodels_opt",
        dest="nmodels_opt",
        metavar="INT",
        default="500",
        type=int,
        help="[%(default)s] number of models to generate for " + "optimization",
    )
    optimo.add_argument(
        "--nkeep_opt",
        dest="nkeep_opt",
        metavar="INT",
        default="100",
        type=int,
        help="[%(default)s] number of models to keep for " + "optimization",
    )
    optimo.add_argument(
        "--force_opt",
        dest="optimize_from_scratch",
        action="store_true",
        default=False,
        help="""[%(default)s] do not take into account previous
                        optimizations. Usefull for running in parallel in a
                        cluster for example.""",
    )

    #########################################
    # DESCRIPTION
    descro.add_argument(
        "--species",
        dest="species",
        metavar="STRING",
        default="UNKNOWN",
        help="species name, with no spaces, i.e.: homo_sapiens",
    )
    descro.add_argument("--cell", dest="cell", metavar="STRING", help="cell type name")
    descro.add_argument("--exp_type", dest="exp_type", metavar="STRING", help="experiment type name (i.e.: Hi-C)")
    descro.add_argument(
        "--assembly",
        dest="assembly",
        metavar="STRING",
        default=None,
        help="""NCBI ID of the original assembly
                        (i.e.: NCBI36 for human)""",
    )
    descro.add_argument(
        "--enzyme",
        dest="enzyme",
        metavar="STRING",
        default=None,
        help="""name of the enzyme used to digest
                        chromatin (i.e. HindIII)""",
    )
    descro.add_argument(
        "--identifier", dest="identifier", metavar="STRING", default=None, help="""NCBI identifier of the experiment"""
    )
    descro.add_argument("--project", dest="project", metavar="STRING", default=None, help="""project name""")

    #########################################
    # OUTPUT
    analyz.add_argument(
        "--analyze",
        dest="analyze",
        nargs="+",
        choices=range(len(actions)),
        type=int,
        default=range(2, len(actions)),
        metavar="INT",
        help=(
            """[%s] list of numbers representing the
                        analysis to be done. Choose between:
                        %s"""
            % (
                " ".join([str(i) for i in range(2, len(actions))]),
                "\n".join(["%s) %s" % (k, actions[k]) for k in actions]),
            )
        ),
    )
    analyz.add_argument(
        "--not_write_cmm",
        dest="not_write_cmm",
        default=False,
        action="store_true",
        help="""[%(default)s] do not generate cmm files for each
                        model (Chimera input)""",
    )
    analyz.add_argument(
        "--not_write_xyz",
        dest="not_write_xyz",
        default=False,
        action="store_true",
        help="""[%(default)s] do not generate xyz files for each
                        model (3D coordinates)""",
    )
    analyz.add_argument(
        "--not_write_json",
        dest="not_write_json",
        default=False,
        action="store_true",
        help="""[%(default)s] do not generate json file.""",
    )

    parser.add_argument_group(optimo)
    parser.add_argument_group(modelo)
    parser.add_argument_group(descro)
    parser.add_argument_group(analyz)
    opts = parser.parse_args()

    if opts.usage:
        print __doc__
        exit()

    log = "\tSummary of arguments:\n"
    # merger opts with CFG file and write summary
    args = reduce(lambda x, y: x + y, [i.strip("-").split("=") for i in sys.argv])
    new_opts = {}
    if opts.cfg:
        for line in open(opts.cfg):
            if not "=" in line:
                continue
            if line.startswith("#"):
                continue
            key, value = line.split("#")[0].strip().split("=")
            key = key.strip()
            value = value.strip()
            if value == "True":
                value = True
            elif value == "False":
                value = False
            elif key in ["data", "norm", "xname", "group", "analyze"]:
                new_opts.setdefault(key, []).extend(value.split())
                continue
            new_opts[key] = value
    # bad key in configuration file
    opts.__dict__["description"] = {}
    for bad_k in set(new_opts.keys()) - set(opts.__dict__.keys()):
        sys.stderr.write('WARNING: parameter "%s" not recognized (used as description)\n' % (bad_k))
        try:
            opts.__dict__["description"][bad_k] = int(new_opts[bad_k])
        except ValueError:
            opts.__dict__["description"][bad_k] = new_opts[bad_k]
    for key in sorted(opts.__dict__.keys()):
        if key in args:
            log += "  * Command setting   %13s to %s\n" % (key, opts.__dict__[key])
        elif key in new_opts:
            opts.__dict__[key] = new_opts[key]
            log += "  - Config. setting   %13s to %s\n" % (key, new_opts[key])
        else:
            log += "  o Default setting   %13s to %s\n" % (key, opts.__dict__[key])

    # rename analysis actions
    for i, j in enumerate(opts.analyze):
        opts.analyze[i] = actions[int(j)]

    if not opts.data and not opts.norm:
        sys.stderr.write("MISSING data")
        exit(parser.print_help())
    if not opts.outdir:
        sys.stderr.write("MISSING outdir")
        exit(parser.print_help())
    if not opts.crm:
        sys.stderr.write("MISSING crm NAME")
        exit(parser.print_help())
    if not opts.res:
        sys.stderr.write("MISSING resolution")
        exit(parser.print_help())
    if not opts.analyze_only:
        if not opts.maxdist:
            sys.stderr.write("MISSING maxdist")
            exit(parser.print_help())
        if not opts.lowfreq:
            sys.stderr.write("MISSING lowfreq")
            exit(parser.print_help())
        if not opts.upfreq:
            sys.stderr.write("MISSING upfreq")
            exit(parser.print_help())

    if not opts.beg and not opts.tad_only:
        sys.stderr.write("WARNING: no begin coordinate given all")
    if not opts.end and not opts.tad_only:
        sys.stderr.write("WARNING: no begin coordinate given all")

    # groups for TAD detection
    if not opts.data:
        opts.data = [None] * len(opts.norm)
    else:
        opts.norm = [None] * len(opts.data)
    if not opts.group:
        opts.group = [len(opts.data)]
    else:
        opts.group = [int(i) for i in opts.group]

    if sum(opts.group) > len(opts.data):
        logging.info("ERROR: Number of experiments in groups larger than " + "the number of Hi-C data files given.")
        exit()

    # this options should stay as this now
    # opts.scale = '0.01'

    # switch to number
    opts.nmodels_mod = int(opts.nmodels_mod)
    opts.nkeep_mod = int(opts.nkeep_mod)
    opts.nmodels_opt = int(opts.nmodels_opt)
    opts.nkeep_opt = int(opts.nkeep_opt)
    opts.ncpus = int(opts.ncpus)
    opts.res = int(opts.res)

    # TODO: UNDER TEST
    opts.container = None  # ['cylinder', 1000, 5000, 100]

    # do the division to bins
    if not opts.tad_only:
        try:
            opts.beg = int(float(opts.beg) / opts.res)
            opts.end = int(float(opts.end) / opts.res)
            if opts.end - opts.beg <= 2:
                raise Exception('"beg" and "end" parameter should be given in ' + "genomic coordinates, not bin")
        except TypeError:
            pass

    # Create out-directory
    name = "{0}_{1}_{2}".format(opts.crm, opts.beg, opts.end)
    if not os.path.exists(os.path.join(opts.outdir, name)):
        os.makedirs(os.path.join(opts.outdir, name))

    # write version log
    if not os.path.exists(os.path.join(opts.outdir, "TADbit_and_dependencies_versions.log")):
        vlog = os.path.join(opts.outdir, "TADbit_and_dependencies_versions.log")
        vlog = open(vlog, "w")
        vlog.write(get_dependencies_version())
        vlog.close()

    # write log
    if opts.optimize_only:
        log_format = "[OPTIMIZATION {}_{}_{}_{}_{}]   %(message)s".format(
            opts.maxdist, opts.upfreq, opts.lowfreq, opts.scale, opts.dcutoff
        )
    elif opts.analyze_only:
        log_format = "[ANALYZE]   %(message)s"
    elif opts.tad_only:
        log_format = "[TAD]   %(message)s"
    else:
        log_format = "[DEFAULT]   %(message)s"
    try:
        logging.basicConfig(
            filename=os.path.join(opts.outdir, name, name + ".log"), level=logging.INFO, format=log_format
        )
    except IOError:
        logging.basicConfig(
            filename=os.path.join(opts.outdir, name, name + ".log2"), level=logging.INFO, format=log_format
        )
    logging.getLogger().addHandler(logging.StreamHandler())
    logging.info(("\n" + log_format.replace("   %(message)s", "")).join(log.split("\n")))

    # update path to Hi-C data adding root directory
    if opts.root_path and opts.data[0]:
        for i in xrange(len(opts.data)):
            logging.info(os.path.join(opts.root_path, opts.data[i]))
            opts.data[i] = os.path.join(opts.root_path, opts.data[i])

    # update path to Hi-C norm adding root directory
    if opts.root_path and opts.norm[0]:
        for i in xrange(len(opts.norm)):
            logging.info(os.path.join(opts.root_path, opts.norm[i]))
            opts.norm[i] = os.path.join(opts.root_path, opts.norm[i])

    return opts
#!/usr/bin/python


#==================================================================================================
# Created on: 2016-04-05
# Usage: ./print_tadbit_and_dependencies_version.py
# Author: Javier Quilez (GitHub: jaquol)
# Goal: print the version of TADbit and its dependencies
#==================================================================================================

# import packages
import pytadbit
import re

# print TADbit and dependencies versions
print re.sub(r"\n+", ";", pytadbit.get_dependencies_version()).replace(" ", "")
コード例 #11
0
def get_options():
    """
    parse option from call

    """
    parser = ArgumentParser(
        usage="%(prog)s [options] [--cfg CONFIG_PATH]",
        formatter_class=lambda prog: HelpFormatter(prog, width=95,
                                                   max_help_position=27))
    glopts = parser.add_argument_group('General arguments')
    taddet = parser.add_argument_group('TAD detection arguments')
    optimo = parser.add_argument_group('Optimization of IMP arguments')
    modelo = parser.add_argument_group('Modeling with optimal IMP arguments')
    descro = parser.add_argument_group('Descriptive, optional arguments')
    analyz = parser.add_argument_group('Output arguments')

    ## Define analysis actions:
    actions = {0  : "do nothing",
               1  : "column filtering",
               2  : "TAD borders",
               3  : "TAD alignment",
               4  : "optimization plot",
               5  : "correlation real/models",
               6  : "z-score plot",
               7  : "constraints",
               8  : "objective function",
               9  : "centroid",
               10 : "consistency",
               11 : "density",
               12 : "contact map",
               13 : "walking angle",
               14 : "persistence length",
               15 : "accessibility",
               16 : "interaction"}

    parser.add_argument('--usage', dest='usage', action="store_true",
                        default=False,
                        help='''show detailed usage documentation, with examples
                        and exit''')
    parser.add_argument('--cfg', dest='cfg', metavar="PATH", action='store',
                      default=None, type=str,
                      help='path to a configuration file with predefined ' +
                      'parameters')
    parser.add_argument('--analyze_only', dest='analyze_only',
                        action='store_true', default=False,
                        help=('load precomputed models in outdir, ' +
                              'skip optimization, modeling'))
    parser.add_argument('--optimize_only', dest='optimize_only', default=False,
                        action='store_true',
                        help='do the optimization of the region and exit')
    parser.add_argument('--tad_only', dest='tad_only', action="store_true",
                        default=False,
                        help='[%(default)s] exit after searching for TADs')
    parser.add_argument('--ncpus', dest='ncpus', metavar="INT", default=1,
                        type=int, help='[%(default)s] Number of CPUs to use')

    #########################################
    # GENERAL
    glopts.add_argument(
        '--root_path', dest='root_path', metavar="PATH",
        default='', type=str,
        help=('path to search for data files (just pass file name' +
              'in "data")'))
    glopts.add_argument('--data', dest='data', metavar="PATH", nargs='+',
                        type=str,
                        help='''path to file(s) with Hi-C data matrix. If many,
                        experiments will be summed up. I.e.: --data
                        replicate_1.txt replicate_2.txt''')
    glopts.add_argument('--xname', dest='xname', metavar="STR", nargs='+',
                        default=[], type=str,
                        help='''[file name] experiment name(s). Use same order
                        as data.''')
    glopts.add_argument('--norm', dest='norm', metavar="PATH", nargs='+',
                        type=str,
                        help='path to file(s) with normalizedHi-C data matrix.')
    glopts.add_argument('--nodiag', dest='nodiag', action='store_true',
                        help='''If the matrix does not contain self interacting
                        bins (only zeroes in the diagonal)''')
    glopts.add_argument('--filt', dest='filt', metavar='INT', default=90,
                        help='''Filter out column with more than a given
                        percentage of zeroes''')
    glopts.add_argument('--crm', dest='crm', metavar="NAME",
                        help='chromosome name')
    glopts.add_argument('--beg', dest='beg', metavar="INT", type=float,
                        default=None,
                        help='genomic coordinate from which to start modeling')
    glopts.add_argument('--end', dest='end', metavar="INT", type=float,
                        help='genomic coordinate where to end modeling')
    glopts.add_argument('--res', dest='res', metavar="INT", type=int,
                        help='resolution of the Hi-C experiment')
    glopts.add_argument('--outdir', dest='outdir', metavar="PATH",
                        default=None,
                        help='out directory for results')

    #########################################
    # TADs
    taddet.add_argument('--tad', dest='tad', action="store_true", default=False,
                        help='[%(default)s] search for TADs in experiments')
    taddet.add_argument('--centromere', dest='centromere', action="store_true",
                        default=False,
                        help='[%(default)s] search for centromeric region')
    taddet.add_argument('--group', dest='group', nargs='+', type=int,
                        default=0, metavar='INT',
                        help='''[all together] How to group Hi-C experiments for
                        the detection of TAD borders. I.e.: "--exp_group 2 2 1"
                        first 2 experiments used together, next 2 also, and last
                        alone (batch_mode option used)''')

    #########################################
    # MODELING
    modelo.add_argument('--nmodels_mod', dest='nmodels_mod', metavar="INT",
                        default='5000', type=int,
                        help=('[%(default)s] number of models to generate for' +
                              ' modeling'))
    modelo.add_argument('--nkeep_mod', dest='nkeep_mod', metavar="INT",
                        default='1000', type=int,
                        help=('[%(default)s] number of models to keep for ' +
                        'modeling'))

    #########################################
    # OPTIMIZATION
    optimo.add_argument('--maxdist', action='store', metavar="LIST",
                        default='400', dest='maxdist',
                        help='range of numbers for maxdist' +
                        ', i.e. 400:1000:100 -- or just a number')
    optimo.add_argument('--upfreq', dest='upfreq', metavar="LIST",
                        default='0',
                        help='range of numbers for upfreq' +
                        ', i.e. 0:1.2:0.3 --  or just a number')
    optimo.add_argument('--lowfreq', dest='lowfreq', metavar="LIST",
                        default='0',
                        help='range of numbers for lowfreq' +
                        ', i.e. -1.2:0:0.3 -- or just a number')
    optimo.add_argument('--scale', dest='scale', metavar="LIST",
                        default="0.01",
                        help='[%(default)s] range of numbers to be test as ' +
                        'optimal scale value, i.e. 0.005:0.01:0.001 -- Can ' +
                        'also pass only one number')
    optimo.add_argument('--dcutoff', dest='dcutoff', metavar="LIST",
                        default="2",
                        help='[%(default)s] range of numbers to be test as ' +
                        'optimal distance cutoff parameter (distance, in ' +
                        'number of beads, from which to consider 2 beads as ' +
                        'being close), i.e. 1:5:0.5 -- Can also pass only one' +
                        ' number')
    optimo.add_argument('--nmodels_opt', dest='nmodels_opt', metavar="INT",
                        default='500', type=int,
                        help='[%(default)s] number of models to generate for ' +
                        'optimization')
    optimo.add_argument('--nkeep_opt', dest='nkeep_opt', metavar="INT",
                        default='100', type=int,
                        help='[%(default)s] number of models to keep for ' +
                        'optimization')
    optimo.add_argument('--force_opt', dest='optimize_from_scratch',
                        action="store_true", default=False,
                        help='''[%(default)s] do not take into account previous
                        optimizations. Usefull for running in parallel in a
                        cluster for example.''')

    #########################################
    # DESCRIPTION
    descro.add_argument('--species', dest='species', metavar="STRING",
                        default='UNKNOWN',
                        help='species name, with no spaces, i.e.: homo_sapiens')
    descro.add_argument('--cell', dest='cell', metavar="STRING",
                        help='cell type name')
    descro.add_argument('--exp_type', dest='exp_type', metavar="STRING",
                        help='experiment type name (i.e.: Hi-C)')
    descro.add_argument('--assembly', dest='assembly', metavar="STRING",
                        default=None,
                        help='''NCBI ID of the original assembly
                        (i.e.: NCBI36 for human)''')
    descro.add_argument('--enzyme', dest='enzyme', metavar="STRING",
                        default=None,
                        help='''name of the enzyme used to digest
                        chromatin (i.e. HindIII)''')
    descro.add_argument('--identifier', dest='identifier', metavar="STRING",
                        default=None,
                        help='''NCBI identifier of the experiment''')
    descro.add_argument('--project', dest='project', metavar="STRING",
                        default=None,
                        help='''project name''')


    #########################################
    # OUTPUT
    analyz.add_argument('--analyze', dest='analyze', nargs='+',
                        choices=range(len(actions)), type=int,
                        default=range(2, len(actions)), metavar='INT',
                        help=('''[%s] list of numbers representing the
                        analysis to be done. Choose between:
                        %s''' % (' '.join([str(i) for i in range(
                                  2, len(actions))]),
                                 '\n'.join(['%s) %s' % (k, actions[k])
                                            for k in actions]))))
    analyz.add_argument('--not_write_cmm', dest='not_write_cmm',
                        default=False, action='store_true',
                        help='''[%(default)s] do not generate cmm files for each
                        model (Chimera input)''')
    analyz.add_argument('--not_write_xyz', dest='not_write_xyz',
                        default=False, action='store_true',
                        help='''[%(default)s] do not generate xyz files for each
                        model (3D coordinates)''')
    analyz.add_argument('--not_write_json', dest='not_write_json',
                        default=False, action='store_true',
                        help='''[%(default)s] do not generate json file.''')

    parser.add_argument_group(optimo)
    parser.add_argument_group(modelo)
    parser.add_argument_group(descro)
    parser.add_argument_group(analyz)
    opts = parser.parse_args()


    if opts.usage:
        print __doc__
        exit()

    log = '\tSummary of arguments:\n'
    # merger opts with CFG file and write summary
    args = reduce(lambda x, y: x + y, [i.strip('-').split('=')
                                       for i in sys.argv])
    new_opts = {}
    if opts.cfg:
        for line in open(opts.cfg):
            if not '=' in line:
                continue
            if line.startswith('#'):
                continue
            key, value = line.split('#')[0].strip().split('=')
            key = key.strip()
            value = value.strip()
            if value == 'True':
                value = True
            elif value == 'False':
                value = False
            elif key in ['data', 'norm', 'xname', 'group', 'analyze']:
                new_opts.setdefault(key, []).extend(value.split())
                continue
            new_opts[key] = value
    # bad key in configuration file
    opts.__dict__['description'] = {}
    for bad_k in set(new_opts.keys()) - set(opts.__dict__.keys()):
        sys.stderr.write('WARNING: parameter "%s" not recognized (used as description)\n' % (bad_k))
        try:
            opts.__dict__['description'][bad_k] = int(new_opts[bad_k])
        except ValueError:
            opts.__dict__['description'][bad_k] = new_opts[bad_k]
    for key in sorted(opts.__dict__.keys()):
        if key in args:
            log += '  * Command setting   %13s to %s\n' % (
                key, opts.__dict__[key])
        elif key in new_opts:
            opts.__dict__[key] = new_opts[key]
            log += '  - Config. setting   %13s to %s\n' % (
                key, new_opts[key])
        else:
            log += '  o Default setting   %13s to %s\n' % (
                key, opts.__dict__[key])

    # rename analysis actions
    for i, j in enumerate(opts.analyze):
        opts.analyze[i] = actions[int(j)]

    if not opts.data and not opts.norm:
        sys.stderr.write('MISSING data')
        exit(parser.print_help())
    if not opts.outdir:
        sys.stderr.write('MISSING outdir')
        exit(parser.print_help())
    if not opts.crm:
        sys.stderr.write('MISSING crm NAME')
        exit(parser.print_help())
    if not opts.res:
        sys.stderr.write('MISSING resolution')
        exit(parser.print_help())
    if not opts.analyze_only:
        if not opts.maxdist:
            sys.stderr.write('MISSING maxdist')
            exit(parser.print_help())
        if not opts.lowfreq:
            sys.stderr.write('MISSING lowfreq')
            exit(parser.print_help())
        if not opts.upfreq:
            sys.stderr.write('MISSING upfreq')
            exit(parser.print_help())

    if not opts.beg and not opts.tad_only:
        sys.stderr.write('WARNING: no begin coordinate given all')
    if not opts.end and not opts.tad_only:
        sys.stderr.write('WARNING: no begin coordinate given all')

    # groups for TAD detection
    if not opts.data:
        opts.data = [None] * len(opts.norm)
    else:
        opts.norm = [None] * len(opts.data)
    if not opts.group:
        opts.group = [len(opts.data)]
    else:
        opts.group = [int(i) for i in opts.group]

    if sum(opts.group) > len(opts.data):
        logging.info('ERROR: Number of experiments in groups larger than ' +
                     'the number of Hi-C data files given.')
        exit()

    # this options should stay as this now
    # opts.scale = '0.01'

    # switch to number
    opts.nmodels_mod = int(opts.nmodels_mod)
    opts.nkeep_mod   = int(opts.nkeep_mod  )
    opts.nmodels_opt = int(opts.nmodels_opt)
    opts.nkeep_opt   = int(opts.nkeep_opt  )
    opts.ncpus       = int(opts.ncpus      )
    opts.res         = int(opts.res        )

    # TODO: UNDER TEST
    opts.container   = None #['cylinder', 1000, 5000, 100]

    # do the division to bins
    if not opts.tad_only:
        try:
            opts.beg = int(float(opts.beg) / opts.res)
            opts.end = int(float(opts.end) / opts.res)
            if opts.end - opts.beg <= 2:
                raise Exception('"beg" and "end" parameter should be given in ' +
                                'genomic coordinates, not bin')
        except TypeError:
            pass

    # Create out-directory
    name = '{0}_{1}_{2}'.format(opts.crm, opts.beg, opts.end)
    if not os.path.exists(os.path.join(opts.outdir, name)):
        os.makedirs(os.path.join(opts.outdir, name))

    # write version log
    if not os.path.exists(os.path.join(opts.outdir,
                                       'TADbit_and_dependencies_versions.log')):
        vlog = os.path.join(opts.outdir, 'TADbit_and_dependencies_versions.log')
        vlog = open(vlog, 'w')
        vlog.write(get_dependencies_version())
        vlog.close()

    # write log
    if opts.optimize_only:
        log_format = '[OPTIMIZATION {}_{}_{}_{}_{}]   %(message)s'.format(
            opts.maxdist, opts.upfreq, opts.lowfreq, opts.scale, opts.dcutoff)
    elif opts.analyze_only:
        log_format = '[ANALYZE]   %(message)s'
    elif opts.tad_only:
        log_format = '[TAD]   %(message)s'
    else:
        log_format = '[DEFAULT]   %(message)s'
    try:
        logging.basicConfig(filename=os.path.join(opts.outdir, name, name + '.log'),
                            level=logging.INFO, format=log_format)
    except IOError:
        logging.basicConfig(filename=os.path.join(opts.outdir, name, name + '.log2'),
                            level=logging.INFO, format=log_format)
    logging.getLogger().addHandler(logging.StreamHandler())
    logging.info(('\n' + log_format.replace('   %(message)s', '')
                  ).join(log.split('\n')))

    # update path to Hi-C data adding root directory
    if opts.root_path and opts.data[0]:
        for i in xrange(len(opts.data)):
            logging.info(os.path.join(opts.root_path, opts.data[i]))
            opts.data[i] = os.path.join(opts.root_path, opts.data[i])

    # update path to Hi-C norm adding root directory
    if opts.root_path and opts.norm[0]:
        for i in xrange(len(opts.norm)):
            logging.info(os.path.join(opts.root_path, opts.norm[i]))
            opts.norm[i] = os.path.join(opts.root_path, opts.norm[i])

    return opts
コード例 #12
0
ファイル: map_hic_reads.py プロジェクト: 3DGenomes/TADbit
def get_options():
    """
    parse option from call
    """
    parser = ArgumentParser(
        usage="%(prog)s [options] [--cfg CONFIG_PATH]",
        formatter_class=lambda prog: HelpFormatter(prog, width=95,
                                                   max_help_position=27))

    glopts = parser.add_argument_group('General options')
    mapper = parser.add_argument_group('Mapping options')
    descro = parser.add_argument_group('Descriptive, optional arguments')

    glopts.add_argument('--cfg', dest='cfg', metavar="PATH", action='store',
                      default=None, type=str,
                      help='path to a configuration file with predefined ' +
                      'parameters')

    glopts.add_argument('--qc_plot', dest='quality_plot', action='store_true',
                      default=False,
                      help='generate a quality plot of FASTQ and exits')

    glopts.add_argument('-o', '--output', dest='output', metavar="PATH",
                        action='store', default=None, type=str,
                        help='path to output folder')

    glopts.add_argument('--fastq', dest='fastq', metavar="PATH", action='store',
                      default=None, type=str,
                      help='path to a FASTQ files (can be compressed files)')

    glopts.add_argument('--genome', dest='genome', metavar="PATH", nargs='+',
                        type=str,
                        help='''paths to file(s) with FASTA files of the
                        reference genome. If many, files will be concatenated.
                        I.e.: --fasta chr_1.fa chr_2.fa
                        In this last case, order is important or the rest of the
                        analysis.''')

    glopts.add_argument('--index', dest='index', metavar="PATH",
                        type=str,
                        help='''paths to file(s) with indexed FASTA files of the
                        reference genome.''')

    glopts.add_argument('--read', dest='read', metavar="INT", 
                        type=str,
                        help='read number')

    glopts.add_argument('--renz', dest='renz', metavar="STR", 
                        type=str,
                        help='restriction enzyme name')

    glopts.add_argument('--chr_name', dest='chr_name', metavar="STR", nargs='+',
                        default=[], type=str,
                        help='''[fasta header] chromosome name(s). Used in the
                        same order as data.''')

    glopts.add_argument('--tmp', dest='tmp', metavar="PATH", action='store',
                      default=None, type=str,
                      help='''path to a temporary directory (default next to
                      output directory)''')

    mapper.add_argument('--strategy', dest='strategy', default='frag',
                        choices=['frag', 'iter'],
                        help='''mapping strategy, can be "frag" for fragment
                        based mapping or "iter" for iterative mapping''')

    mapper.add_argument('--windows', dest='windows', default='auto',
                        nargs='+',
                        help='''for iterative mapping, defines windows. e.g.
                        --windows 20 25 30 35 40 45 50''')

    mapper.add_argument('--read_length', dest='read_length',
                        type=int,
                        help='''read length, compulsory in iterative mapping with
                        --windows auto''')

    mapper.add_argument('--mapping_only', dest='mapping_only', action='store_true',
                        help='only do the mapping does not parse results')

    descro.add_argument('--species', dest='species', metavar="STR", 
                        type=str,
                        help='species name')

    descro.add_argument('--descr', dest='description', metavar="LIST", nargs='+',
                        type=str,
                        help='''extra descriptive fields each filed separated by
                        coma, and inside each, name and value separated by column: 
                        --descr=cell:lymphoblast,flowcell:C68AEACXX,index:24nf''')

    parser.add_argument_group(glopts)
    parser.add_argument_group(descro)
    parser.add_argument_group(mapper)
    opts = parser.parse_args()

    if opts.cfg:
        get_options_from_cfg(opts.cfg, opts)

    if (opts.strategy == 'iter' and opts.window == 'auto'
        and not opts.read_length):
        raise Exception('ERROR: need to input read_length')
    # check RE name
    try:
        _ = RESTRICTION_ENZYMES[opts.renz]
    except KeyError:
        print ('\n\nERROR: restriction enzyme not found. Use one of:\n\n'
               + ' '.join(sorted(RESTRICTION_ENZYMES)) + '\n\n')
        raise KeyError()

    # check compulsory options
    if not opts.quality_plot:
        if not opts.genome: raise Exception('ERROR: genome option required.')
        if not opts.index : raise Exception('ERROR: index  option required.')
    if not opts.output: raise Exception('ERROR: output option required.')
    if not opts.fastq : raise Exception('ERROR: fastq  option required.')
    if not opts.renz  : raise Exception('ERROR: renz   option required.')
    if not opts.tmp:
        opts.tmp = opts.output + '_tmp_r' + opts.read

    if opts.strategy == 'frag':
        opts.windows = None
        
    if opts.strategy == 'iter':
        raise NotImplementedError()

    system('mkdir -p ' + opts.output)
    # write log
    if opts.mapping_only:
        log_format = '[MAPPING {} READ{}]   %(message)s'.format(opts.fastq, opts.read)
    else:
        log_format = '[DEFAULT]   %(message)s'

    # reset logging
    logging.getLogger().handlers = []

    try:
        print 'Writting log to ' + path.join(opts.output, 'process.log')
        logging.basicConfig(level=logging.INFO,
                            format=log_format,
                            filename=path.join(opts.output, 'process.log'),
                            filemode='aw')
    except IOError:
        logging.basicConfig(level=logging.DEBUG,
                            format=log_format,
                            filename=path.join(opts.output, 'process.log2'),
                            filemode='aw')

    # to display log on stdout also
    logging.getLogger().addHandler(logging.StreamHandler())

    # write version log
    vlog_path = path.join(opts.output, 'TADbit_and_dependencies_versions.log')
    dependencies = get_dependencies_version()
    if not path.exists(vlog_path) or open(vlog_path).readlines() != dependencies:
        logging.info('Writting versions of TADbit and dependencies')
        vlog = open(vlog_path, 'w')
        vlog.write(dependencies)
        vlog.close()

    return opts