Example #1
0
def check_options(opts):

    if not opts.workdir: raise Exception('ERROR: output option required.')

    # check resume
    if not path.exists(opts.workdir) and opts.resume:
        print('WARNING: can use output files, found, not resuming...')
        opts.resume = False

    # sort filters
    if opts.apply:
        opts.apply.sort()

    # for lustre file system....
    if 'tmpdb' in opts and opts.tmpdb:
        dbdir = opts.tmpdb
        # tmp file
        dbfile = 'trace_%s' % (''.join(
            [ascii_letters[int(random() * 52)] for _ in range(10)]))
        opts.tmpdb = path.join(dbdir, dbfile)
        try:
            copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb)
        except IOError:
            pass

    # check if job already run using md5 digestion of parameters
    if already_run(opts) and not opts.force:
        if 'tmpdb' in opts and opts.tmpdb:
            remove(path.join(dbdir, dbfile))
        exit('WARNING: exact same job already computed, see JOBs table above')
Example #2
0
def check_options(opts):

    if not opts.workdir: raise Exception('ERROR: output option required.')

    # check resume
    if not path.exists(opts.workdir) and opts.resume:
        print ('WARNING: can use output files, found, not resuming...')
        opts.resume = False

    # sort filters
    if opts.apply:
        opts.apply.sort()

    # for lustre file system....
    if 'tmpdb' in opts and opts.tmpdb:
        dbdir = opts.tmpdb
        # tmp file
        dbfile = 'trace_%s' % (''.join([ascii_letters[int(random() * 52)]
                                        for _ in range(10)]))
        opts.tmpdb = path.join(dbdir, dbfile)
        try:
            copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb)
        except IOError:
            pass

    # check if job already run using md5 digestion of parameters
    if already_run(opts) and not opts.force:
        if 'tmpdb' in opts and opts.tmpdb:
            remove(path.join(dbdir, dbfile))
        exit('WARNING: exact same job already computed, see JOBs table above')
Example #3
0
def check_options(opts):
    mkdir(opts.workdir)

    # transform filtering reads option
    opts.filter = filters_to_bin(opts.filter)

    # enlighten plotting parameter writing
    if opts.only_plot:
        opts.plot = True
    if opts.interactive:
        if opts.nox:
            raise Exception('ERROR: no screen no fun.\n'
                            'Interactive plot incompatible with noX option.')
        opts.plot = True
        opts.only_plot = True

    # check resume
    if not path.exists(opts.workdir):
        raise IOError('ERROR: workdir not found.')

    # check resume
    if opts.triangular and opts.coord2:
        raise NotImplementedError('ERROR: triangular is only available for '
                                  'symmetric matrices.')

    # for LUSTRE file system....
    if 'tmpdb' in opts and opts.tmpdb:
        dbdir = opts.tmpdb
        # tmp file
        dbfile = 'trace_%s' % (''.join(
            [ascii_letters[int(random() * 52)] for _ in range(10)]))
        opts.tmpdb = path.join(dbdir, dbfile)
        try:
            copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb)
        except IOError:
            pass

    # number of cpus
    if opts.cpus == 0:
        opts.cpus = cpu_count()
    else:
        opts.cpus = min(opts.cpus, cpu_count())

    # check if job already run using md5 digestion of parameters
    try:
        if already_run(opts):
            if not opts.force:
                if 'tmpdb' in opts and opts.tmpdb:
                    remove(path.join(dbdir, dbfile))
                    exit(
                        'WARNING: exact same job already computed, see JOBs table above'
                    )
            else:
                warn(
                    'WARNING: exact same job already computed, overwriting...')
    except IOError:
        warn((""
              "\nWARNING:\n  new working directory created. It's ok... "
              "but next time use TADbit since the beginning!! :)"))
Example #4
0
def check_options(opts):

    # check resume
    if not path.exists(opts.workdir):
        raise IOError('ERROR: wordir not found.')

    if already_run(opts) and not opts.force:
        exit('WARNING: exact same job already computed, see JOBs table above')
Example #5
0
def check_options(opts):
    mkdir(opts.workdir)

    # transform filtering reads option
    opts.filter = filters_to_bin(opts.filter)

    # enlighten plotting parameter writing
    if opts.only_plot:
        opts.plot = True
    if opts.interactive:
        if opts.nox:
            raise Exception('ERROR: no screen no fun.\n'
                            'Interactive plot incompatible with noX option.')
        opts.plot = True
        opts.only_plot = True

    # check resume
    if not path.exists(opts.workdir):
        raise IOError('ERROR: workdir not found.')

    # check resume
    if opts.triangular and opts.coord2:
        raise NotImplementedError('ERROR: triangular is only available for '
                                  'symmetric matrices.')

    # for LUSTRE file system....
    if 'tmpdb' in opts and opts.tmpdb:
        dbdir = opts.tmpdb
        # tmp file
        dbfile = 'trace_%s' % (''.join([ascii_letters[int(random() * 52)]
                                        for _ in range(10)]))
        opts.tmpdb = path.join(dbdir, dbfile)
        try:
            copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb)
        except IOError:
            pass

    # number of cpus
    if opts.cpus == 0:
        opts.cpus = cpu_count()
    else:
        opts.cpus = min(opts.cpus, cpu_count())

    # check if job already run using md5 digestion of parameters
    try:
        if already_run(opts):
            if not opts.force:
                if 'tmpdb' in opts and opts.tmpdb:
                    remove(path.join(dbdir, dbfile))
                    exit('WARNING: exact same job already computed, see JOBs table above')
            else:
                warn('WARNING: exact same job already computed, overwriting...')
    except IOError:
        warn((""
              "\nWARNING:\n  new working directory created. It's ok... "
              "but next time use TADbit from the beginning!! :)"))
Example #6
0
def check_options(opts):
    mkdir(opts.workdir)

    # create empty DB if don't exists
    dbpath = path.join(opts.workdir, 'trace.db')
    open(dbpath, 'a').close()

    # for lustre file system....
    if 'tmpdb' in opts and opts.tmpdb:
        dbdir = opts.tmpdb
        # tmp file
        dbfile = 'trace_%s' % (''.join([ascii_letters[int(random() * 52)]
                                        for _ in range(10)]))
        opts.tmpdb = path.join(dbdir, dbfile)
        try:
            copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb)
        except IOError:
            pass
        if opts.workdir1:
            # tmp file
            dbfile1 = 'trace1_%s' % (''.join([ascii_letters[int(random() * 52)]
                                              for _ in range(10)]))
            opts.tmpdb1 = path.join(dbdir, dbfile1)
            try:
                copyfile(path.join(opts.workdir1, 'trace.db'), opts.tmpdb1)
            except IOError:
                pass
        if opts.workdir2:
            # tmp file
            dbfile2 = 'trace2_%s' % (''.join([ascii_letters[int(random() * 52)]
                                              for _ in range(10)]))
            opts.tmpdb2 = path.join(dbdir, dbfile2)
            try:
                copyfile(path.join(opts.workdir2, 'trace.db'), opts.tmpdb2)
            except IOError:
                pass
    else:
        if opts.workdir1:
            opts.tmpdb1 = path.join(opts.workdir1, 'trace.db')
        if opts.workdir2:
            opts.tmpdb2 = path.join(opts.workdir2, 'trace.db')

    # resolution needed to compare
    if not opts.skip_comparison and not opts.reso:
        raise Exception('ERROR: need to define resolution at which to compare')

    # check if job already run using md5 digestion of parameters
    if already_run(opts):
        if 'tmpdb' in opts and opts.tmpdb:
            remove(path.join(dbdir, dbfile))
            if opts.workdir1:
                remove(path.join(dbdir, dbfile1))
            if opts.workdir2:
                remove(path.join(dbdir, dbfile2))
        exit('WARNING: exact same job already computed, see JOBs table above')
Example #7
0
def check_options(opts):
    mkdir(opts.workdir)

    # create empty DB if don't exists
    dbpath = path.join(opts.workdir, 'trace.db')
    open(dbpath, 'a').close()

    # for lustre file system....
    if 'tmpdb' in opts and opts.tmpdb:
        dbdir = opts.tmpdb
        # tmp file
        dbfile = 'trace_%s' % (''.join([ascii_letters[int(random() * 52)]
                                        for _ in range(10)]))
        opts.tmpdb = path.join(dbdir, dbfile)
        try:
            copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb)
        except IOError:
            pass
        if opts.workdir1:
            # tmp file
            dbfile1 = 'trace1_%s' % (''.join([ascii_letters[int(random() * 52)]
                                              for _ in range(10)]))
            opts.tmpdb1 = path.join(dbdir, dbfile1)
            try:
                copyfile(path.join(opts.workdir1, 'trace.db'), opts.tmpdb1)
            except IOError:
                pass
        if opts.workdir2:
            # tmp file
            dbfile2 = 'trace2_%s' % (''.join([ascii_letters[int(random() * 52)]
                                              for _ in range(10)]))
            opts.tmpdb2 = path.join(dbdir, dbfile2)
            try:
                copyfile(path.join(opts.workdir2, 'trace.db'), opts.tmpdb2)
            except IOError:
                pass
    else:
        if opts.workdir1:
            opts.tmpdb1 = path.join(opts.workdir1, 'trace.db')
        if opts.workdir2:
            opts.tmpdb2 = path.join(opts.workdir2, 'trace.db')

    # resolution needed to compare
    if not opts.skip_comparison and not opts.reso:
        raise Exception('ERROR: need to define resolution at which to compare')

    # check if job already run using md5 digestion of parameters
    if already_run(opts):
        if 'tmpdb' in opts and opts.tmpdb:
            remove(path.join(dbdir, dbfile))
            if opts.workdir1:
                remove(path.join(dbdir, dbfile1))
            if opts.workdir2:
                remove(path.join(dbdir, dbfile2))
        exit('WARNING: exact same job already computed, see JOBs table above')
Example #8
0
def check_options(opts):

    if not opts.workdir: raise Exception('ERROR: output option required.')
    if opts.type != 'map':
        raise NotImplementedError('ERROR: not yet there')

    if not opts.genome: raise Exception('ERROR: genome parameter required.')
    if not opts.workdir: raise Exception('ERROR: workdir parameter required.')

    # check skip
    if not path.exists(opts.workdir) and opts.skip:
        print ('WARNING: can use output files, found, not skipping...')
        opts.skip = False

    if opts.workdir.endswith('/'):
        opts.workdir = opts.workdir[:-1]

    # write log
    log_format = '[PARSING]   %(message)s'

    # reset logging
    logging.getLogger().handlers = []

    try:
        print 'Writting log to ' + path.join(opts.workdir, 'process.log')
        logging.basicConfig(level=logging.INFO,
                            format=log_format,
                            filename=path.join(opts.workdir, 'process.log'),
                            filemode='aw')
    except IOError:
        logging.basicConfig(level=logging.DEBUG,
                            format=log_format,
                            filename=path.join(opts.workdir, 'process.log2'),
                            filemode='aw')

    # to display log on stdout also
    logging.getLogger().addHandler(logging.StreamHandler())

    # write version log
    vlog_path = path.join(opts.workdir, 'TADbit_and_dependencies_versions.log')
    dependencies = get_dependencies_version()
    if not path.exists(vlog_path) or open(vlog_path).readlines() != dependencies:
        logging.info('Writting versions of TADbit and dependencies')
        vlog = open(vlog_path, 'w')
        vlog.write(dependencies)
        vlog.close()

    # check if job already run using md5 digestion of parameters
    if already_run(opts):
        exit('WARNING: exact same job already computed, see JOBs table above')
Example #9
0
def check_options(opts):

    if not opts.workdir: raise Exception('ERROR: output option required.')

    # check resume
    if not path.exists(opts.workdir) and opts.resume:
        print ('WARNING: can use output files, found, not resuming...')
        opts.resume = False

    # sort filters
    if opts.apply:
        opts.apply.sort()

    # check if job already run using md5 digestion of parameters
    if already_run(opts) and not opts.force:
        exit('WARNING: exact same job already computed, see JOBs table above')
Example #10
0
def check_options(opts):

    if not opts.workdir: raise Exception('ERROR: output option required.')

    # check resume
    if not path.exists(opts.workdir) and opts.resume:
        print('WARNING: can use output files, found, not resuming...')
        opts.resume = False

    # sort filters
    if opts.apply:
        opts.apply.sort()

    # check if job already run using md5 digestion of parameters
    if already_run(opts) and not opts.force:
        exit('WARNING: exact same job already computed, see JOBs table above')
Example #11
0
def check_options(opts):
    mkdir(opts.workdir)

    # transform filtering reads option
    opts.filter = filters_to_bin(opts.filter)

    # check custom normalization
    if opts.normalization == 'custom':
        if not opts.biases_path:
            raise IOError(
                'ERROR: biases file required for "custom" normalization.')
        elif not path.exists(opts.biases_path):
            raise IOError('ERROR: biases not found at path: %s' %
                          opts.biases_path)

    # check resume
    if not path.exists(opts.workdir):
        raise IOError('ERROR: workdir not found.')

    # for lustre file system....
    if 'tmpdb' in opts and opts.tmpdb:
        dbdir = opts.tmpdb
        # tmp file
        dbfile = 'trace_%s' % (''.join(
            [ascii_letters[int(random() * 52)] for _ in range(10)]))
        opts.tmpdb = path.join(dbdir, dbfile)
        try:
            copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb)
        except IOError:
            pass

    # number of cpus
    if opts.cpus == 0:
        opts.cpus = cpu_count()
    else:
        opts.cpus = min(opts.cpus, cpu_count())

    # check if job already run using md5 digestion of parameters
    try:
        if already_run(opts):
            if 'tmpdb' in opts and opts.tmpdb:
                remove(path.join(dbdir, dbfile))
            exit(
                'WARNING: exact same job already computed, see JOBs table above'
            )
    except IOError:  # new working directory
        pass
Example #12
0
def check_options(opts):

    # check resume
    if not path.exists(opts.workdir):
        raise IOError('ERROR: %s does not exists' % opts.workdir)

    # number of cpus
    if opts.cpus == 0:
        opts.cpus = cpu_count()
    else:
        opts.cpus = min(opts.cpus, cpu_count())

    if opts.rich_in_A and opts.fasta:
        raise Exception('ERROR: should choose one of FASTA or rich_in_A')

    # rich_in_A
    if opts.fasta:
        if opts.rich_in_A:
            raise Exception(
                ('ERROR: if you input a FASTA file, GC content will'
                 'will be used as "rich in A" metric to infer '
                 'compartments.'))
        opts.rich_in_A = opts.fasta

    # N EVs
    if opts.ev_index:
        if max(opts.ev_index) > opts.n_evs:
            warn(
                'WARNING: increasing number of calculated eigenvectors to %d, '
                'to match the u=input eigenvectors indices' %
                max(opts.ev_index))
            opts.n_evs = max(opts.ev_index)

    # tmp folder
    if 'tmpdb' in opts and opts.tmpdb:
        dbdir = opts.tmpdb
        # tmp file
        dbfile = 'trace_%s' % (''.join(
            [ascii_letters[int(random() * 52)] for _ in range(10)]))
        opts.tmpdb = path.join(dbdir, dbfile)
        copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb)

    if already_run(opts) and not opts.force:
        if 'tmpdb' in opts and opts.tmpdb:
            remove(path.join(dbdir, dbfile))
        exit('WARNING: exact same job already computed, see JOBs table above')
Example #13
0
def check_options(opts):
    mkdir(opts.workdir)

    # transform filtering reads option
    opts.filter = filters_to_bin(opts.filter)

    # enlight plotting parameter writing
    if opts.only_plot:
        opts.plot = True
    if opts.interactive:
        opts.plot = True
        opts.only_plot = True

    # check resume
    if not path.exists(opts.workdir):
        raise IOError('ERROR: workdir not found.')

    # for lustre file system....
    if 'tmpdb' in opts and opts.tmpdb:
        dbdir = opts.tmpdb
        # tmp file
        dbfile = 'trace_%s' % (''.join(
            [ascii_letters[int(random() * 52)] for _ in range(10)]))
        opts.tmpdb = path.join(dbdir, dbfile)
        try:
            copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb)
        except IOError:
            pass

    # number of cpus
    if opts.cpus == 0:
        opts.cpus = cpu_count()
    else:
        opts.cpus = min(opts.cpus, cpu_count())

    # check if job already run using md5 digestion of parameters
    if already_run(opts):
        if not opts.force:
            if 'tmpdb' in opts and opts.tmpdb:
                remove(path.join(dbdir, dbfile))
            exit(
                'WARNING: exact same job already computed, see JOBs table above'
            )
        else:
            warn('WARNING: exact same job already computed, overwritting...')
Example #14
0
def check_options(opts):
    mkdir(opts.workdir)

    # for lustre file system....
    if 'tmpdb' in opts and opts.tmpdb:
        dbdir = opts.tmpdb
        # tmp file
        dbfile = 'trace_%s' % (''.join(
            [ascii_letters[int(random() * 52)] for _ in range(10)]))
        opts.tmpdb = path.join(dbdir, dbfile)
        try:
            copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb)
        except IOError:
            pass
        if opts.workdir1:
            # tmp file
            dbfile1 = 'trace1_%s' % (''.join(
                [ascii_letters[int(random() * 52)] for _ in range(10)]))
            opts.tmpdb1 = path.join(dbdir, dbfile1)
            try:
                copyfile(path.join(opts.workdir1, 'trace.db'), opts.tmpdb1)
            except IOError:
                pass
        if opts.workdir2:
            # tmp file
            dbfile2 = 'trace2_%s' % (''.join(
                [ascii_letters[int(random() * 52)] for _ in range(10)]))
            opts.tmpdb2 = path.join(dbdir, dbfile2)
            try:
                copyfile(path.join(opts.workdir2, 'trace.db'), opts.tmpdb2)
            except IOError:
                pass
    else:
        opts.tmpdb1 = path.join(opts.workdir1, 'trace.db')
        opts.tmpdb2 = path.join(opts.workdir2, 'trace.db')

    # check if job already run using md5 digestion of parameters
    if already_run(opts):
        if 'tmpdb' in opts and opts.tmpdb:
            remove(path.join(dbdir, dbfile))
            if opts.workdir1:
                remove(path.join(dbdir, dbfile1))
            if opts.workdir2:
                remove(path.join(dbdir, dbfile2))
        exit('WARNING: exact same job already computed, see JOBs table above')
Example #15
0
def check_options(opts):

    # check resume
    if not path.exists(opts.workdir):
        raise IOError('ERROR: %s does not exists' % opts.workdir)

    if 'tmpdb' in opts and opts.tmpdb:
        dbdir = opts.tmpdb
        # tmp file
        dbfile = 'trace_%s' % (''.join([ascii_letters[int(random() * 52)]
                                        for _ in range(10)]))
        opts.tmpdb = path.join(dbdir, dbfile)
        copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb)

    if already_run(opts) and not opts.force:
        if 'tmpdb' in opts and opts.tmpdb:
            remove(path.join(dbdir, dbfile))
        exit('WARNING: exact same job already computed, see JOBs table above')
Example #16
0
def check_options(opts):

    # check resume
    if not path.exists(opts.workdir):
        raise IOError('ERROR: %s does not exists' % opts.workdir)

    if 'tmpdb' in opts and opts.tmpdb:
        dbdir = opts.tmpdb
        # tmp file
        dbfile = 'trace_%s' % (''.join([ascii_letters[int(random() * 52)]
                                        for _ in range(10)]))
        opts.tmpdb = path.join(dbdir, dbfile)
        copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb)

    if already_run(opts) and not opts.force:
        if 'tmpdb' in opts and opts.tmpdb:
            remove(path.join(dbdir, dbfile))
        exit('WARNING: exact same job already computed, see JOBs table above')
Example #17
0
def check_options(opts):

    # check resume
    if not path.exists(opts.workdir) and opts.resume:
        print ('WARNING: can use output files, found, not resuming...')
        opts.resume = False

    if 'tmp' in opts and opts.tmp:
        dbdir = opts.tmp
        # tmp file
        dbfile = 'trace_%s' % (''.join([ascii_letters[int(random() * 52)]
                                        for _ in range(10)]))
        opts.tmp = path.join(dbdir, dbfile)
        copyfile(path.join(opts.workdir, 'trace.db'), opts.tmp)

    if already_run(opts) and not opts.force:
        if 'tmp' in opts and opts.tmp:
            remove(path.join(dbdir, dbfile))
        exit('WARNING: exact same job already computed, see JOBs table above')
Example #18
0
def check_options(opts):

    # number of cpus
    if opts.cpus == 0:
        opts.cpus = cpu_count()
    else:
        opts.cpus = min(opts.cpus, cpu_count())

    if opts.rich_in_A and opts.fasta:
        raise Exception('ERROR: should choose one of FASTA or rich_in_A')

    # rich_in_A
    if opts.fasta:
        if opts.rich_in_A:
            raise Exception(('ERROR: if you input a FASTA file, GC content will'
                             'will be used as "rich in A" metric to infer '
                             'compartments.'))
        opts.rich_in_A = opts.fasta

    # N EVs
    if opts.ev_index:
        if max(opts.ev_index) > opts.n_evs:
            warn('WARNING: increasing number of calculated eigenvectors to %d, '
                 'to match the u=input eigenvectors indices' % max(opts.ev_index))
            opts.n_evs = max(opts.ev_index)

    # tmp folder
    if 'tmpdb' in opts and opts.tmpdb:
        dbdir = opts.tmpdb
        # tmp file
        dbfile = 'trace_%s' % (''.join([ascii_letters[int(random() * 52)]
                                        for _ in range(10)]))
        opts.tmpdb = path.join(dbdir, dbfile)
        copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb)

    try:
        _already_run = already_run(opts)
    except IOError:
        _already_run = False
    if _already_run and not opts.force:
        if 'tmpdb' in opts and opts.tmpdb:
            remove(path.join(dbdir, dbfile))
        exit('WARNING: exact same job already computed, see JOBs table above')
Example #19
0
def check_options(opts):
    mkdir(opts.workdir)

    # transform filtering reads option
    opts.filter = filters_to_bin(opts.filter)

    # check custom normalization
    if opts.normalization=='custom':
        if not opts.biases_path:
            raise IOError('ERROR: biases file required for "custom" normalization.')
        elif not path.exists(opts.biases_path):
            raise IOError('ERROR: biases not found at path: %s' % opts.biases_path)

    # check resume
    if not path.exists(opts.workdir):
        raise IOError('ERROR: workdir not found.')

    # for lustre file system....
    if 'tmpdb' in opts and opts.tmpdb:
        dbdir = opts.tmpdb
        # tmp file
        dbfile = 'trace_%s' % (''.join([ascii_letters[int(random() * 52)]
                                        for _ in range(10)]))
        opts.tmpdb = path.join(dbdir, dbfile)
        try:
            copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb)
        except IOError:
            pass

    # number of cpus
    if opts.cpus == 0:
        opts.cpus = cpu_count()
    else:
        opts.cpus = min(opts.cpus, cpu_count())

    # check if job already run using md5 digestion of parameters
    try:
        if already_run(opts):
            if 'tmpdb' in opts and opts.tmpdb:
                remove(path.join(dbdir, dbfile))
            exit('WARNING: exact same job already computed, see JOBs table above')
    except IOError:  # new working directory
        pass
Example #20
0
def check_options(opts):
    mkdir(opts.workdir)

    # for lustre file system....
    if 'tmpdb' in opts and opts.tmpdb:
        dbdir = opts.tmpdb
        # tmp file
        dbfile = 'trace_%s' % (''.join([ascii_letters[int(random() * 52)]
                                        for _ in range(10)]))
        opts.tmpdb = path.join(dbdir, dbfile)
        try:
            copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb)
        except IOError:
            pass
        if opts.workdir1:
            # tmp file
            dbfile1 = 'trace1_%s' % (''.join([ascii_letters[int(random() * 52)]
                                              for _ in range(10)]))
            opts.tmpdb1 = path.join(dbdir, dbfile1)
            try:
                copyfile(path.join(opts.workdir1, 'trace.db'), opts.tmpdb1)
            except IOError:
                pass
        if opts.workdir2:
            # tmp file
            dbfile2 = 'trace2_%s' % (''.join([ascii_letters[int(random() * 52)]
                                              for _ in range(10)]))
            opts.tmpdb2 = path.join(dbdir, dbfile2)
            try:
                copyfile(path.join(opts.workdir2, 'trace.db'), opts.tmpdb2)
            except IOError:
                pass

    # check if job already run using md5 digestion of parameters
    if already_run(opts):
        if 'tmpdb' in opts and opts.tmpdb:
            remove(path.join(dbdir, dbfile))
            if opts.workdir1:
                remove(path.join(dbdir, dbfile1))
            if opts.workdir2:
                remove(path.join(dbdir, dbfile2))
        exit('WARNING: exact same job already computed, see JOBs table above')
Example #21
0
def check_options(opts):

    # check resume
    if not path.exists(opts.workdir):
        raise IOError('ERROR: wordir not found.')

    # for lustre file system....
    if 'tmpdb' in opts and opts.tmpdb:
        dbdir = opts.tmpdb
        # tmp file
        dbfile = 'trace_%s' % (''.join(
            [ascii_letters[int(random() * 52)] for _ in range(10)]))
        opts.tmpdb = path.join(dbdir, dbfile)
        try:
            copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb)
        except IOError:
            pass

    # check if job already run using md5 digestion of parameters
    if already_run(opts):
        if 'tmpdb' in opts and opts.tmpdb:
            remove(path.join(dbdir, dbfile))
        exit('WARNING: exact same job already computed, see JOBs table above')
Example #22
0
def check_options(opts):

    # check resume
    if not path.exists(opts.workdir):
        raise IOError('ERROR: wordir not found.')

    # for lustre file system....
    if 'tmpdb' in opts and opts.tmpdb:
        dbdir = opts.tmpdb
        # tmp file
        dbfile = 'trace_%s' % (''.join([ascii_letters[int(random() * 52)]
                                        for _ in range(10)]))
        opts.tmpdb = path.join(dbdir, dbfile)
        try:
            copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb)
        except IOError:
            pass

    # check if job already run using md5 digestion of parameters
    if already_run(opts):
        if 'tmpdb' in opts and opts.tmpdb:
            remove(path.join(dbdir, dbfile))
        exit('WARNING: exact same job already computed, see JOBs table above')
Example #23
0
def check_options(opts):

    if not opts.workdir: raise Exception('ERROR: output option required.')
    if opts.type != 'map':
        raise NotImplementedError('ERROR: not yet there')

    if not opts.genome: raise Exception('ERROR: genome parameter required.')
    if not opts.workdir: raise Exception('ERROR: workdir parameter required.')

    # check skip
    if not path.exists(opts.workdir) and opts.skip:
        print('WARNING: can use output files, found, not skipping...')
        opts.skip = False

    if opts.workdir.endswith('/'):
        opts.workdir = opts.workdir[:-1]

    # write log
    newbie = False
    if not path.exists(opts.workdir):
        newbie = True
        mkdir(opts.workdir)
    log_format = '[PARSING]   %(message)s'

    # reset logging
    logging.getLogger().handlers = []

    try:
        print('Writing log to ' + path.join(opts.workdir, 'process.log'))
        logging.basicConfig(level=logging.INFO,
                            format=log_format,
                            filename=path.join(opts.workdir, 'process.log'),
                            filemode='a+')
    except IOError:
        logging.basicConfig(level=logging.DEBUG,
                            format=log_format,
                            filename=path.join(opts.workdir, 'process.log2'),
                            filemode='a+')

    # to display log on stdout also
    logging.getLogger().addHandler(logging.StreamHandler())

    # write version log
    vlog_path = path.join(opts.workdir, 'TADbit_and_dependencies_versions.log')
    dependencies = get_dependencies_version()
    if not path.exists(
            vlog_path) or open(vlog_path).readlines() != dependencies:
        logging.info('Writing versions of TADbit and dependencies')
        vlog = open(vlog_path, 'w')
        vlog.write(dependencies)
        vlog.close()

    # for lustre file system....
    if 'tmpdb' in opts and opts.tmpdb:
        dbdir = opts.tmpdb
        # tmp file
        dbfile = 'trace_%s' % (''.join(
            [ascii_letters[int(random() * 52)] for _ in range(10)]))
        opts.tmpdb = path.join(dbdir, dbfile)
        try:
            copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb)
        except IOError:
            pass

    # check if job already run using md5 digestion of parameters
    try:
        if already_run(opts):
            if 'tmpdb' in opts and opts.tmpdb:
                remove(path.join(dbdir, dbfile))
            exit(
                'WARNING: exact same job already computed, see JOBs table above'
            )
    except OSError:
        pass
Example #24
0
def check_options(opts):

    if not opts.mapper_binary:
        if opts.mapper == 'gem':
            opts.mapper_binary = 'gem-mapper'
        else:
            opts.mapper_binary = opts.mapper
    opts.mapper_binary = which(opts.mapper_binary)
    if not opts.mapper_binary:
        raise Exception(
            '\n\nERROR: Mapper binary not found, for GEM install it from:'
            '\nhttps://sourceforge.net/projects/gemlibrary/files/gem-library/Binary%20pre-release%202/'
            '\n - Download the GEM-binaries-Linux-x86_64-core_i3 if'
            'have a recent computer, the '
            'GEM-binaries-Linux-x86_64-core_2 otherwise\n - '
            'Uncompress with "tar xjvf GEM-binaries-xxx.tbz2"\n - '
            'Copy the binary gem-mapper to /usr/local/bin/ for '
            'example (somewhere in your PATH).\n\nNOTE: GEM does '
            'not provide any binary for MAC-OS.')

    opts.gem_version = 0
    if opts.mapper == 'gem':
        opts.gem_version = None
        try:
            out, _ = Popen([opts.mapper_binary, '--version'],
                           stdout=PIPE,
                           stderr=STDOUT,
                           universal_newlines=True).communicate()
            opts.gem_version = int(out[1])
        except ValueError as e:
            opts.gem_version = 2
            print('Falling to gem v2')

    if opts.fast_fragment:
        if opts.gem_version < 3:
            raise Exception('ERROR: Fast fragment mapping needs GEM v3')
        if not opts.fastq2 or not path.exists(opts.fastq2):
            raise Exception(
                'ERROR: Fast fragment mapping needs both fastq files. '
                'Please specify --fastq2')
        if opts.read != 0:
            raise Exception(
                'ERROR: Fast fragment mapping needs to be specified with --read 0'
            )
        if not opts.genome:
            raise Exception('ERROR: Fast fragment mapping needs '
                            'the genome parameter.')
    # check RE name
    if opts.renz == ['CHECK']:
        print('\nSearching for most probable restriction enzyme in file: %s' %
              (opts.fastq))
        try:
            pat, enz, pv = identify_re(opts.fastq, nreads=100000)
            print(' -> Most probable digested site: %s (pv: %f)' % (pat, pv))
            print(' -> Enzymes matching: %s' % (', '.join(enz)))
        except ValueError:
            print(' -> Nothing found...')
        exit()
    for n, renz in enumerate(opts.renz):
        if renz == 'NONE':
            opts.renz[n] = None
            continue
        try:
            _ = RESTRICTION_ENZYMES[renz]
        except KeyError:
            print('\n\nERROR: restriction enzyme %s not found.' % (renz) +
                  'Use one of:\n\n' + ' '.join(sorted(RESTRICTION_ENZYMES)) +
                  '\n\n')
            raise KeyError()
        except AttributeError:
            pass

    # check skip
    if not path.exists(opts.workdir) and opts.skip:
        print('WARNING: can use output files, found, not skipping...')
        opts.skip = False

    # number of cpus
    if opts.cpus == 0:
        opts.cpus = cpu_count()
    else:
        opts.cpus = min(opts.cpus, cpu_count())

    # check paths
    if opts.mapper == 'gem' and not path.exists(opts.index):
        raise IOError('ERROR: index file not found at ' + opts.index)

    if not path.exists(opts.fastq):
        raise IOError('ERROR: FASTQ file not found at ' + opts.fastq)

    if not is_fastq(opts.fastq):
        raise IOError(
            ('ERROR: FASTQ file %s wrong format, check') % (opts.fastq))

    try:
        opts.windows = [[int(i) for i in win.split(':')]
                        for win in opts.windows]
    except TypeError:
        pass

    mkdir(opts.workdir)
    # write log
    # if opts.mapping_only:
    log_format = '[MAPPING {} READ{}]   %(message)s'.format(
        opts.fastq, opts.read)
    # else:
    #     log_format = '[DEFAULT]   %(message)s'

    # reset logging
    logging.getLogger().handlers = []

    try:
        print('Writing log to ' + path.join(opts.workdir, 'process.log'))
        logging.basicConfig(level=logging.INFO,
                            format=log_format,
                            filename=path.join(opts.workdir, 'process.log'),
                            filemode='a+')
    except IOError:
        logging.basicConfig(level=logging.DEBUG,
                            format=log_format,
                            filename=path.join(opts.workdir, 'process.log2'),
                            filemode='a+')

    # to display log on stdout also
    logging.getLogger().addHandler(logging.StreamHandler())

    # write version log
    vlog_path = path.join(opts.workdir, 'TADbit_and_dependencies_versions.log')
    dependencies = get_dependencies_version()
    if not path.exists(
            vlog_path) or open(vlog_path).readlines() != dependencies:
        logging.info('Writing versions of TADbit and dependencies')
        vlog = open(vlog_path, 'w')
        vlog.write(dependencies)
        vlog.close()

    # check mapper extra options
    if opts.mapper_param:
        if (len(opts.mapper_param) == 1 and
            ('-' in opts.mapper_param[0] or '--' in opts.mapper_param[0])):
            # Single string surrounded by quotes
            opts.mapper_param = opts.mapper_param[0].split()
        else:
            opts.mapper_param = dict([o.split(':') for o in opts.mapper_param])
    else:
        opts.mapper_param = {}
    if opts.mapper == 'gem' and opts.gem_version < 3:
        gem_valid_option = set([
            "granularity", "q", "quality-format", "gem-quality-threshold",
            "mismatch-alphabet", "m", "e", "min-matched-bases",
            "max-big-indel-length", "s", "strata-after-best", "fast-mapping",
            "unique-mapping", "d", "D", "allow-incomplete-strata",
            "max-decoded-matches", "min-decoded-strata", "p",
            "paired-end-alignment", "b", "map-both-ends", "min-insert-size",
            "max-insert-size", "E", "max-extendable-matches",
            "max-extensions-per-match", "unique-pairing"
        ])
        for k in opts.mapper_param:
            if not k in gem_valid_option:
                raise NotImplementedError(
                    ('ERROR: option "%s" not a valid GEM option'
                     'or not suported by this tool.') % k)

    # create empty DB if don't exists
    dbpath = path.join(opts.workdir, 'trace.db')
    open(dbpath, 'a').close()

    # for lustre file system....
    if 'tmpdb' in opts and opts.tmpdb:
        dbdir = opts.tmpdb
        # tmp file
        dbfile = 'trace_%s' % (''.join(
            [ascii_letters[int(random() * 52)] for _ in range(10)]))
        opts.tmpdb = path.join(dbdir, dbfile)
        try:
            copyfile(path.join(opts.workdir, 'trace.db'), opts.tmpdb)
        except IOError:
            pass

    # check if job already run using md5 digestion of parameters
    if already_run(opts):
        if 'tmpdb' in opts and opts.tmpdb:
            remove(path.join(dbdir, dbfile))
        exit('WARNING: exact same job already computed, see JOBs table above')
Example #25
0
def check_options(opts):
    if opts.cfg:
        get_options_from_cfg(opts.cfg, opts)

    opts.gem_binary = which(opts.gem_binary)
    if not opts.gem_binary:
        raise Exception('\n\nERROR: GEM binary not found, install it from:'
                        '\nhttps://sourceforge.net/projects/gemlibrary/files/gem-library/Binary%20pre-release%202/'
                        '\n - Download the GEM-binaries-Linux-x86_64-core_i3 if'
                        'have a recent computer, the '
                        'GEM-binaries-Linux-x86_64-core_2 otherwise\n - '
                        'Uncompress with "tar xjvf GEM-binaries-xxx.tbz2"\n - '
                        'Copy the binary gem-mapper to /usr/local/bin/ for '
                        'example (somewhere in your PATH).\n\nNOTE: GEM does '
                        'not provide any binary for MAC-OS.')

    # check RE name
    try:
        _ = RESTRICTION_ENZYMES[opts.renz]
    except KeyError:
        print ('\n\nERROR: restriction enzyme not found. Use one of:\n\n'
               + ' '.join(sorted(RESTRICTION_ENZYMES)) + '\n\n')
        raise KeyError()
    except AttributeError:
        pass

    # check skip
    if not path.exists(opts.workdir) and opts.skip:
        print ('WARNING: can use output files, found, not skipping...')
        opts.skip = False

    # number of cpus
    if opts.cpus == 0:
        opts.cpus = cpu_count()
    else:
        opts.cpus = min(opts.cpus, cpu_count())

    # check paths
    if not path.exists(opts.index):
        raise IOError('ERROR: index file not found at ' + opts.index)

    if not path.exists(opts.fastq):
        raise IOError('ERROR: FASTQ file not found at ' + opts.fastq)
    
    # create tmp directory
    if not opts.tmp:
        opts.tmp = opts.workdir + '_tmp_r%d' % opts.read

    try:
        opts.windows = [[int(i) for i in win.split(':')]
                        for win in opts.windows]
    except TypeError:
        pass
        
    mkdir(opts.workdir)
    # write log
    # if opts.mapping_only:
    log_format = '[MAPPING {} READ{}]   %(message)s'.format(opts.fastq, opts.read)
    # else:
    #     log_format = '[DEFAULT]   %(message)s'

    # reset logging
    logging.getLogger().handlers = []

    try:
        print 'Writing log to ' + path.join(opts.workdir, 'process.log')
        logging.basicConfig(level=logging.INFO,
                            format=log_format,
                            filename=path.join(opts.workdir, 'process.log'),
                            filemode='aw')
    except IOError:
        logging.basicConfig(level=logging.DEBUG,
                            format=log_format,
                            filename=path.join(opts.workdir, 'process.log2'),
                            filemode='aw')

    # to display log on stdout also
    logging.getLogger().addHandler(logging.StreamHandler())

    # write version log
    vlog_path = path.join(opts.workdir, 'TADbit_and_dependencies_versions.log')
    dependencies = get_dependencies_version()
    if not path.exists(vlog_path) or open(vlog_path).readlines() != dependencies:
        logging.info('Writing versions of TADbit and dependencies')
        vlog = open(vlog_path, 'w')
        vlog.write(dependencies)
        vlog.close()

    # check GEM mapper extra options
    if opts.gem_param:
        opts.gem_param = dict([o.split(':') for o in opts.gem_param])
    else:
        opts.gem_param = {}
    gem_valid_option = set(["granularity", "q", "quality-format",
                            "gem-quality-threshold", "mismatch-alphabet",
                            "m", "e", "min-matched-bases",
                            "max-big-indel-length", "s", "strata-after-best",
                            "fast-mapping", "unique-mapping", "d", "D",
                            "allow-incomplete-strata", "max-decoded-matches",
                            "min-decoded-strata", "p", "paired-end-alignment",
                            "b", "map-both-ends", "min-insert-size",
                            "max-insert-size", "E", "max-extendable-matches",
                            "max-extensions-per-match", "unique-pairing"])
    for k in opts.gem_param:
        if not k in gem_valid_option:
            raise NotImplementedError(('ERROR: option "%s" not a valid GEM option'
                                       'or not suported by this tool.') % k)
    # check if job already run using md5 digestion of parameters
    if already_run(opts):
        exit('WARNING: exact same job already computed, see JOBs table above')