Python get_file_partsの例、chipsequtil.get_file_parts Pythonの例

コード例 #1

0

ファイルを表示

ファイル: nib.py プロジェクト: hjanime/OmicsIntegrator

    def __init__(self, nib_fns=[], nib_dirs=[]):
        '''*nib_fns* is a list of paths to specific .nib files desired for the
        NibDB.  *nib_dirs* is a list of paths to directories containing .nib
        files such that every .nib file in the directories is added to the NibDB.
        Explicitly passed files take precedence over those found in directories
        when sequence names collide.
        '''
        SeqDB.__init__(self)

        # find all *.nib files in the directories passed
        if isinstance(nib_dirs, str):  # user just provided single directory
            nib_dirs = [nib_dirs]

        dir_nibs = []
        for d in nib_dirs:
            dir_nibs.extend(glob.glob(os.path.join(d, '*.nib')))

        if isinstance(nib_fns, str):
            nib_fns = [nib_fns]
        # for each .nib found, add to db
        # if there is a collision of names, those specified in files (not dirs)
        # takes precedence without warning
        for fn in dir_nibs + nib_fns:

            # open the nib file
            nib_path, nib_fn, nib_base, nib_ext = get_file_parts(fn)
            fn, nib_f = _nib_fd(fn)
            self._db_map[nib_base] = nib_f

            # store some info
            self.db_info[nib_base]['path'] = fn
            nbases = validate_nib_file(self._db_map[nib_base])
            self.db_info[nib_base]['nbases'] = nbases

コード例 #2

0

ファイルを表示

ファイル: nib.py プロジェクト: aabaker99/OmicsIntegrator

    def __init__(self,nib_fns=[],nib_dirs=[]) :
        '''*nib_fns* is a list of paths to specific .nib files desired for the
        NibDB.  *nib_dirs* is a list of paths to directories containing .nib
        files such that every .nib file in the directories is added to the NibDB.
        Explicitly passed files take precedence over those found in directories
        when sequence names collide.
        '''
        SeqDB.__init__(self)

        # find all *.nib files in the directories passed
        if isinstance(nib_dirs,str) : # user just provided single directory
            nib_dirs = [nib_dirs]

        dir_nibs = []
        for d in nib_dirs :
            dir_nibs.extend(glob.glob(os.path.join(d,'*.nib')))

        if isinstance(nib_fns,str) :
            nib_fns = [nib_fns]
        # for each .nib found, add to db
        # if there is a collision of names, those specified in files (not dirs)
        # takes precedence without warning
        for fn in dir_nibs+nib_fns :

            # open the nib file
            nib_path,nib_fn,nib_base,nib_ext = get_file_parts(fn)
            fn, nib_f = _nib_fd(fn)
            self._db_map[nib_base] = nib_f

            # store some info
            self.db_info[nib_base]['path'] = fn
            nbases = validate_nib_file(self._db_map[nib_base])
            self.db_info[nib_base]['nbases'] = nbases

コード例 #3

0

ファイルを表示

ファイル: nib.py プロジェクト: hjanime/OmicsIntegrator

def get_nib_header_batch(nib, queries):
    '''Batch method for creating nibFrag headers.  *queries* is a list of at most
    6-tuples (start,end,strand,name,dbHeader,tbaHeader) representing queries as
    specified by the original nibFrag utility.  Only start, end, and strand
    fields are required.'''

    nib_path, nib_f = _nib_fd(nib)

    nib_dir, nib_fn, nib_base, nib_ext = get_file_parts(nib_path)
    nbases = validate_nib_file(nib)
    headers = []
    header_tmpl = '>%(name)s%(db)s\n'

    for rec in queries:

        # set some defaults if they are not supplied
        rec = list(rec)
        rec.extend([None] * (6 - len(rec)))
        start, end, strand, name, dbHeader, tbaHeader = rec

        if end == -1:
            end = nbases
        fields = {}
        fields['name'] = nib_path + ':%d-%d' % (start,
                                                end) if not name else name
        fields['db'] = ''

        if tbaHeader:
            # ignored for some reason in nibFrag when tbaHeader supplied and dbHeader is not
            fields['name'] = '' if not dbHeader else fields['name']
            fields['db'] = '%s.%s:%d-%d of %d' % (tbaHeader, nib_base, start,
                                                  end, nbases)
        if dbHeader:
            fields['db'] = ':%s.%s:%d-%d:%s:%d' % (dbHeader, nib_base, start,
                                                   end, strand, nbases)

        headers.append(header_tmpl % fields)

    return headers

コード例 #4

0

ファイルを表示

ファイル: nib.py プロジェクト: aabaker99/OmicsIntegrator

def get_nib_header_batch(nib,queries) :
    '''Batch method for creating nibFrag headers.  *queries* is a list of at most
    6-tuples (start,end,strand,name,dbHeader,tbaHeader) representing queries as
    specified by the original nibFrag utility.  Only start, end, and strand
    fields are required.'''

    nib_path, nib_f = _nib_fd(nib)

    nib_dir,nib_fn,nib_base,nib_ext = get_file_parts(nib_path)
    nbases = validate_nib_file(nib)
    headers = []
    header_tmpl = '>%(name)s%(db)s\n'

    for rec in queries :

        # set some defaults if they are not supplied
        rec = list(rec)
        rec.extend([None]*(6-len(rec)))
        start, end, strand, name, dbHeader, tbaHeader  = rec

        if end == -1 :
            end = nbases
        fields = {}
        fields['name'] = nib_path+':%d-%d'%(start,end) if not name else name
        fields['db'] = ''

        if tbaHeader :
            # ignored for some reason in nibFrag when tbaHeader supplied and dbHeader is not
            fields['name'] = '' if not dbHeader else fields['name']
            fields['db'] = '%s.%s:%d-%d of %d'%(tbaHeader,nib_base,start,end,nbases)
        if dbHeader :
            fields['db'] = ':%s.%s:%d-%d:%s:%d'%(dbHeader,nib_base,start,end,strand,nbases)

        headers.append(header_tmpl%fields)

    return headers

コード例 #5

0

ファイルを表示

ファイル: chipseq_pipeline_wo_ctrl.py プロジェクト: dvanderk/chipsequtil

    # parse command line arguments
    opts, args = parser.parse_args(sys.argv[1:])

    if len(args) < 3 :
        parser.error('Must provide two non-option arguments')

    # filenames and paths
    organism, experiment_fn, control_fn = args[0:3]
    control_fn = None
    if len(args) > 3 :
        control_fn = args[2]

    org_settings = get_org_settings(organism)
    refseq_fn = org_settings['annotation_path']

    exp_fpath,exp_fname,exp_fbase,exp_fext = get_file_parts(experiment_fn)
    exp_wrk_dir = os.path.abspath('.exp_%s_%s'%(exp_fbase,opts.exp_name))

    if control_fn :
        cnt_fpath,cnt_fname,cnt_fbase,cnt_fext = get_file_parts(control_fn)
        cnt_wrk_dir = os.path.abspath('.cnt_%s_%s'%(cnt_fbase,opts.exp_name))

    # the pipeline
    pipeline = Pypeline()

    steps = []

    # split up files
    calls = ["mkdir %s"%exp_wrk_dir,
             "split_file.py %s --outdir=%s %s"%(opts.split_args,exp_wrk_dir,experiment_fn),]
    if control_fn :

コード例 #6

0

ファイルを表示

ファイル: gerald_to_bed.py プロジェクト: dvanderk/chipsequtil



if __name__ == '__main__' :

    opts,args = parser.parse_args(sys.argv[1:])

    if len(args) == 0 :
        parser.print_usage()
        sys.exit(1)

    gerald_fns = args

    # step through the files
    for gerald_fn in gerald_fns :
        path,fn,fnbase,fnext = get_file_parts(gerald_fn)
        bed_lines = []


        # where to write output to
        if opts.stdout :
            f_out = sys.stdout
        else :
            f_out = open(os.path.join(path,fnbase+'.bed'),'w')

        # process input
        gerald_d = DictReader(open(gerald_fn),fieldnames=GERALDOutput.FIELD_NAMES,delimiter='\t')
        for line_d in gerald_d :
            if (opts.pass_only and line_d['filtering'] == 'Y' and line_d['match_pos'] != '') or (not opts.pass_only and line_d['match_pos'] != '') :

                if opts.chromo_strip is not None :

コード例 #7

0

ファイルを表示

ファイル: split_qsub.py プロジェクト: dvanderk/chipsequtil

if __name__ == '__main__' :

    opts, args = parser.parse_args(sys.argv[1:])

    utility, filenames = args[0], args[1:]

    # try to find the utility
    abs_utility = os.path.abspath(utility)
    if not os.path.exists(abs_utility) :
        # look on the path
        abs_utility = Popen('which %s'%utility,shell=True,stdout=PIPE,stderr=PIPE).communicate()[0].strip()
        if not os.path.exists(abs_utility) :
            raise Exception("Utility %s could not be found in the local directory or on the user's path, exiting"%utility)
            sys.exit(1)

    upath,uname,ubase,uext = get_file_parts(abs_utility)

    runscript_tmpl = """
#!/bin/bash

#$ -N %(jobname)s
#$ -S /bin/sh
#$ -o %(stdout)s
#$ -e %(stderr)s
#$ -cwd
export PYTHONPATH=%(pythonpath)s:${PYTHONPATH}

%(utility)s %(utilargs)s %(filename)s"""

    suffix = ubase if opts.suffix is None else opts.suffix
    for fn in filenames :

コード例 #8

0

ファイルを表示

ファイル: nibFrag.py プロジェクト: dvanderk/chipsequtil

nibFrag_grp.add_option('--dbHeader',dest='dbHeader',default=None,help='Add full database info to the header, with or without -name option')
nibFrag_grp.add_option('--tbaHeader',dest='tbaHeader',default=None,help='Format header for compatibility with tba, takes database name as argument')
parser.add_option_group(nibFrag_grp)


if __name__ == '__main__' :

    opts, args = parser.parse_args(sys.argv[1:])

    if len(args) < 1 :
        parser.print_usage()
        parser.exit(1)

    # setup
    nib_path = args[0]
    nib_dir,nib_fn,nib_base,nib_ext = get_file_parts(nib_path)

    queries = []
    if opts.batch :

        if len(args) < 2 :
            parser.error('Two arguments must be supplied in batch mode')

        batch_fns = args[1:]

        for fn in batch_fns :
            if opts.batch_format == 'BED' :
                for bed in BEDFile(fn) :
                    if bed['chrom'] != nib_base :
                        warnings.warn('Chromosome in BED line %s does not match file %s, skipping'%(bed['chrom'],nib_base))
                    else :

コード例 #9

0

ファイルを表示

ファイル: filter_macs_peaks.py プロジェクト: dvanderk/chipsequtil

            filter_str = filter_str.replace('>=','_GTE_')
            filter_str = filter_str.replace('<=','_LTE_')
            filter_str = filter_str.replace('>','_GT_')
            filter_str = filter_str.replace('<','_LT_')
            fn_str += '_%s'%filter_str

        if opts.top is not None :
            fn_str += '_top%d'%opts.top

        if len(opts.sort_by) != 0 :
            fn_str += '_sortby_%s'%opts.sort_by

        if opts.shuffle :
            fn_str += '_shuffled'

        macs_path,macs_fn,macs_basefn,macs_ext = get_file_parts(args[0])
        encoded_fn = os.path.join(macs_path,macs_basefn+fn_str+macs_ext)
        if opts.print_encoded_fn :
            sys.stdout.write(encoded_fn)
            sys.exit(0)
        else :
            out_f = open(encoded_fn,'w')
    elif opts.output :
        out_f = open(opts.output,'w')
    else :
        out_f = sys.stdout

    # parse the filters
    field_filters = defaultdict(list)
    for filter in opts.filters :
        field, filter_cond = parse_filter(filter)

コード例 #10

0

ファイルを表示

ファイル: kg_to_gff.py プロジェクト: dvanderk/chipsequtil

from optparse import OptionParser

from chipsequtil import KnownGeneFile, get_file_parts

#args = ['/nfs/genomes/mouse_gp_jul_07/anno/knownGene-2010-07-08.txt','/nfs/genomes/mouse_gp_jul_07/anno/kgXref-2010-07-08.txt']
args = ['/nfs/genomes/mouse_gp_jul_07/anno/knownGene-2010-08-03.gtf','/nfs/genomes/mouse_gp_jul_07/anno/kgXref-2010-07-08.txt']
usage = '%prog <knownGene annotation>'
description = 'convert a UCSC knownGene annotation to GFF'
parser = OptionParser(usage=usage,description=description)


if __name__ == '__main__' :

    opts, args = parser.parse_args(args)

    kg_path,kg_fn,kg_base,kg_ext = get_file_parts(args[0])
    #kg_f = KnownGeneFile(args[0])

    # xref for finding gene symbols
    kgXref_fn = args[1]
    kgXref_fieldnames = ['kgID','mRNA','spID','spDisplayID','geneSymbol','refseq','proAcc','description']
    xref_map = dict([(x['kgID'],x) for x in DictReader(open(kgXref_fn),delimiter='\t',fieldnames=kgXref_fieldnames)])

    gff_headers = ['seqname','source','feature','start','end','score','strand','frame','attributes']
    gff_reader = DictReader(open(args[0]),delimiter='\t',fieldnames=gff_headers)
    gff_writer = DictWriter(sys.stdout,delimiter='\t',fieldnames=gff_headers,quotechar='',quoting=QUOTE_NONE,lineterminator='\n')
    #gff_writer.writerow(dict([(x,x) for x in gff_headers]))

    for i,rec in enumerate(gff_reader) :
        #d = {}
        #d['seqname'] = rec['chrom']

コード例 #11

0

ファイルを表示

ファイル: wqsub.py プロジェクト: dvanderk/chipsequtil

        else :
            other_args.append(arg)

    opts, args = parser.parse_args(wqsub_args)

    if len(other_args) == 0 :
        parser.error('Must provide a command')

    command = ' '.join(other_args)
    runscript_tmpl = templates[opts.drm]
    # set up job parameters
    cmd_exe = os.path.basename(other_args[0])
    jobname = opts.wqsub_name+'_'+cmd_exe
    stdout_fn = jobname+opts.wqsub_ext
    stdout = os.path.abspath(stdout_fn)
    fpath,fname,fbase,fext = get_file_parts(stdout)
    stderr = os.path.abspath(os.path.join(jobname+'.err'))

    # get the user's current environment and put it into the execute script
    if opts.wqsub_no_env :
        env_str = '# local environment variables omitted'
    else :
        env_str = '#%s -V'%drm_symb[opts.drm]

    # construct the script
    addnl_params = []
    for addnl in opts.drm_args :
        addnl_params.append('#%s %s'%(drm_symb[opts.drm],addnl))
    addnl_params = '\n'.join(addnl_params)

    job_dict = {'jobname':fname,