Esempio n. 1
0
class List2TrackForm(BaseForm):
    child = twd.HidingTableLayout()
    assembly = twf.SingleSelectField(
        label='Assembly: ',
        options=genrep.GenRep().assemblies_available(),
        help_text='Reference genome',
        validator=twc.Validator(required=True),
    )
    feature_type = twf.SingleSelectField(
        label='Feature type: ',
        options=['genes', 'exons', 'transcripts'],
        prompt_text=None,
        help_text='Choose the kind of genomic features yo want to annotate',
        validator=twc.Validator(required=True),
    )
    ids_list = twf.FileField(
        label='IDs list: ',
        help_text='Select the file with the list of IDs',
    )
    format = twf.SingleSelectField(
        label='Output format: ',
        options=["sql", "bed"],
        prompt_text=None,
        help_text='Format of the output file',
    )
    submit = twf.SubmitButton(id="submit", value="Submit")
Esempio n. 2
0
class SmoothingForm(BaseForm):
    track = twb.BsFileField(
        label='Signal: ',
        help_text='Select signal file (e.g. bedgraph)',
        validator=twb.BsFileFieldValidator(required=True))
    assembly = twf.SingleSelectField(
        label='Assembly: ',
        options=genrep.GenRep().assemblies_available(),
        help_text='Reference genome')
    window_size = twf.TextField(
        label='Window size: ',
        validator=twc.IntValidator(required=True),
        value=size_def,
        help_text='Size of the sliding window')
    window_step = twf.TextField(
        label='Window step: ',
        validator=twc.IntValidator(required=True),
        value=step_def,
        help_text='Size of steps between windows')
    by_feature = twf.CheckBox(
        label='Window size in features (not basepairs): ',
        value=False,
        help_text='Will count size and step parameters in number of features, not in basepairs')
    format = twf.SingleSelectField(
        label='Output format: ',
        options=['sql','bedGraph','wig','bigWig','sga'],
        prompt_text=None,
        help_text='Format of the output file', )
    submit = twf.SubmitButton(id="submit", value="Submit")
Esempio n. 3
0
class GenomeGraphForm(BaseForm):
    class SigMultiP(twb.BsMultiple):
        label = 'Positive Signals: '
        signals_plus = twb.BsFileField(
            label=' ',
            help_text='Signal files (e.g. bedgraph) to plot above the axis',
            validator=twb.BsFileFieldValidator(required=False))

    class SigMultiM(twb.BsMultiple):
        label = 'Negative Signals: '
        signals_minus = twb.BsFileField(
            label=' ',
            help_text='Signal files (e.g. bedgraph) to plot below the axis',
            validator=twb.BsFileFieldValidator(required=False))

    class FeatMulti(twb.BsMultiple):
        label = 'Features: '
        features = twb.BsFileField(
            label=' ',
            help_text=
            'Features files (e.g. bed) to plot as segments on the axis',
            validator=twb.BsFileFieldValidator(required=False))

    assembly = twf.SingleSelectField(
        label='Assembly: ',
        options=genrep.GenRep().assemblies_available(),
        help_text='Reference genome')
    submit = twf.SubmitButton(id="submit", value="Plot")
Esempio n. 4
0
class DESeqForm(BaseForm):
    child = twd.HidingTableLayout()
    input_type = twd.HidingRadioButtonList(
        label='Input type: ',
        options=['Table', 'Signals'],
        mapping={
            'Table': ['table'],
            'Signals': ['Group1', 'Group2', 'feature_type', 'assembly'],
        },
        value='Table',
        help_text='Select input type (Formatted table, or signal tracks)')
    table = twb.BsFileField(label='Table: ',
                            help_text='Select scores table',
                            validator=twb.BsFileFieldValidator(required=True))

    class Group1(twb.BsMultiple):
        label = 'Signals group 1: '
        signals1 = twb.BsFileField(
            label=' ',
            help_text='Select signal files (position and score, e.g. bedgraph)',
            validator=twb.BsFileFieldValidator(required=True))

    class Group2(twb.BsMultiple):
        label = 'Signals group 2: '
        signals2 = twb.BsFileField(
            label=' ',
            help_text='Select signal files (position and score, e.g. bedgraph)',
            validator=twb.BsFileFieldValidator(required=True))

    feature_type = twd.HidingSingleSelectField(
        label='Feature type: ',
        options=ftypes,
        prompt_text=None,
        mapping={
            ftypes[-1][0]: ['features'],
            1: ['upstream', 'downstream']
        },
        help_text='Choose a feature set or upload your own',
        validator=twc.Validator(required=True))
    features = twb.BsFileField(
        label='Custom feature set: ',
        help_text='Select a feature file (e.g. bed)',
        validator=twb.BsFileFieldValidator(required=True))
    upstream = twf.TextField(label='Promoter upstream distance: ',
                             validator=twc.IntValidator(required=True),
                             value=prom_up_def,
                             help_text='Size of promoter upstream of TSS')
    downstream = twf.TextField(label='Promoter downstream distance: ',
                               validator=twc.IntValidator(required=True),
                               value=prom_down_def,
                               help_text='Size of promoter downstream of TSS')

    assembly = twf.SingleSelectField(
        label='Assembly: ',
        options=genrep.GenRep().assemblies_available(),
        validator=twc.Validator(required=True),
        help_text='Reference genome')
    submit = twf.SubmitButton(id="submit", value="Submit")
Esempio n. 5
0
class PairsPlotForm(BaseForm):
    class SigMulti(twb.BsMultiple):
        label = 'Signal: '
        signals = twb.BsFileField(
            label=' ',
            help_text='Select signal file (e.g. bedgraph)',
            validator=twb.BsFileFieldValidator(required=True))

    child = twd.HidingTableLayout()
    feature_type = twd.HidingSingleSelectField(
        label='Feature type: ',
        options=ftypes,
        prompt_text=None,
        mapping={
            ftypes[-1][0]: ['features'],
            1: ['upstream', 'downstream']
        },
        help_text='Choose a feature set or upload your own',
        validator=twc.Validator(required=True))
    features = twb.BsFileField(label='Custom feature set: ',
                               help_text='Select a feature file (e.g. bed)',
                               validator=twb.BsFileFieldValidator())
    upstream = twf.TextField(label='Promoter upstream distance: ',
                             validator=twc.IntValidator(),
                             value=prom_up_def,
                             help_text='Size of promoter upstream of TSS')
    downstream = twf.TextField(label='Promoter downstream distance: ',
                               validator=twc.IntValidator(),
                               value=prom_down_def,
                               help_text='Size of promoter downstream of TSS')
    assembly = twf.SingleSelectField(
        label='Assembly: ',
        options=genrep.GenRep().assemblies_available(),
        help_text='Reference genome')

    class HiMulti(twb.BsMultiple):
        label = 'Features to highlight: '
        highlights = twb.BsFileField(
            label=' ',
            help_text='Select a feature file (e.g. bed)',
            validator=twb.BsFileFieldValidator())

    mode = twd.HidingSingleSelectField(label='Plot type: ',
                                       options=plot_types,
                                       mapping={0: ['cormax']},
                                       prompt_text=None)
    cormax = twf.TextField(
        label='Spatial range for correlation: ',
        validator=twc.IntValidator(),
        value=_cormax,
        help_text='Maximum distance in bp to compute correlations')
    submit = twf.SubmitButton(id="submit", value="Plot")
Esempio n. 6
0
 def __call__(self, **kw):
     assembly = kw.get('assembly') or 'guess'
     signals_plus = kw.get('SigMultiP', {}).get('signals_plus', [])
     if not isinstance(signals_plus, list): signals_plus = [signals_plus]
     signals_minus = kw.get('SigMultiM', {}).get('signals_minus', [])
     if not isinstance(signals_minus, list): signals_minus = [signals_minus]
     features = kw.get('FeatMulti', {}).get('features', [])
     if not isinstance(features, list): features = [features]
     sptracks = [
         track(sig, chrmeta=assembly) for sig in signals_plus
         if os.path.exists(sig)
     ]
     smtracks = [
         track(sig, chrmeta=assembly) for sig in signals_minus
         if os.path.exists(sig)
     ]
     ftracks = [
         track(feat, chrmeta=assembly) for feat in features
         if os.path.exists(feat)
     ]
     snames = [t.name for t in sptracks + smtracks + ftracks]
     if len(sptracks) > 0:
         chrmeta = sptracks[0].chrmeta
     elif len(smtracks) > 0:
         chrmeta = smtracks[0].chrmeta
     elif len(features) > 0:
         chrmeta = ftracks[0].chrmeta
     else:
         raise ValueError("No data provided")
     if assembly in [x[0] for x in genrep.GenRep().assemblies_available()]:
         chrnames = genrep.Assembly(assembly).chrnames
     else:
         chrnames = [
             x[1] for x in sorted([(v['length'], c)
                                   for c, v in chrmeta.iteritems()],
                                  reverse=True)
         ]
     pdf = self.temporary_path(fname='genome_graph.pdf')
     _fs = ['chr', 'start', 'end', 'score']
     _ff = ['chr', 'start', 'end', 'name']
     genomeGraph([(c, chrmeta[c]['length']) for c in chrnames],
                 [sig.read(fields=_fs) for sig in sptracks],
                 [sig.read(fields=_fs) for sig in smtracks],
                 [feat.read(fields=_ff) for feat in ftracks],
                 output=pdf,
                 new=True,
                 last=True,
                 legend=snames)
     self.new_file(pdf, 'genome_graph')
     return self.display_time()
Esempio n. 7
0
 def _get_chrmeta(self, chrmeta=None):
     """:param chrmeta: (str or dict) assembly name, or dict of the type {chr: {'length': 1234}}."""
     if isinstance(chrmeta, dict):
         return chrmeta
     if isinstance(chrmeta, basestring) and not (str(chrmeta) == "guess"):
         self.assembly = chrmeta
     if self.assembly is None:
         return {}
     from bbcflib import genrep
     if genrep.GenRep().assemblies_available(self.assembly):
         self.assembly = genrep.Assembly(self.assembly)
         return self.assembly.chrmeta
     else:
         self.assembly = None
         return {}
Esempio n. 8
0
class RatiosForm(BaseForm):
    numerator = twb.BsFileField(
        label='Numerator: ',
        help_text='Select the track with the numerators',
        validator=twb.BsFileFieldValidator(required=True))
    denominator = twb.BsFileField(
        label='Denominator: ',
        help_text='Select the track with the denominators',
        validator=twb.BsFileFieldValidator(required=True))
    assembly = twf.SingleSelectField(
        label='Assembly: ',
        options=genrep.GenRep().assemblies_available(),
        help_text='Reference genome')
    format = twf.SingleSelectField(
        label='Output format ',
        prompt_text=None,
        options=["bedGraph", "sql", "wig", "bigWig", "sga"],
        validator=twc.Validator(required=True),
        help_text='Format of the output file')
    window_size = twf.TextField(
        label='Window size: ',
        validator=twc.IntValidator(),
        value=size_def,
        help_text='Size of the sliding window in bp (default: 1)')
    pseudo = twf.TextField(
        label='Pseudo-count: ',
        validator=twb.FloatValidator(min=0, max=1000),
        value=pseudo_def,
        help_text='Value to be added to both signals (default: 0.5)')
    threshold = twf.TextField(
        label='Threshold: ',
        validator=twb.FloatValidator(min=0, max=1000),
        value=threshold_def,
        help_text=
        'This sets ratio=1 at each genomic position satisfying numerator value < threshold (default: 0)'
    )
    log = twf.CheckBox(label='Log ratios: ',
                       value=False,
                       help_text='Computes the log2 of the ratios')
    distribution = twf.CheckBox(
        label='Plot distribution: ',
        value=False,
        help_text=
        'Creates a graphical representation of the distributions of the ratios based on a sample of genomic regions'
    )
    submit = twf.SubmitButton(id="submit", value="Submit")
Esempio n. 9
0
class OverlapForm(BaseForm):
    child = twd.HidingTableLayout()
    features = twb.BsFileField(
        label='Features file: ',
        help_text='Upload your own file',
        validator=twb.BsFileFieldValidator(required=True))
    filter = twb.BsFileField(label='Filter file: ',
                             help_text='Upload your own file',
                             validator=twb.BsFileFieldValidator(required=True))
    format = twf.SingleSelectField(
        label='Output format: ',
        options=["txt", "bed", "sql", "bedGraph", "bigWig"],
        validator=twc.Validator(required=True),
        help_text='Format of the output file')
    assembly = twf.SingleSelectField(
        label='Assembly: ',
        options=genrep.GenRep().assemblies_available(),
        help_text='Reference genome')
    submit = twf.SubmitButton(id="submit", value="Submit")
Esempio n. 10
0
class CombineForm(BaseForm):
    child = twd.HidingTableLayout()

    class TrackMulti(twb.BsMultiple):
        label = 'Tracks: '
        tracks = twb.BsFileField(
            label=' ',
            help_text='Select files to combine',
            validator=twb.BsFileFieldValidator(required=True))

    format = twf.SingleSelectField(label='Output format: ',
                                   options=["sql", "bed", "sga"],
                                   prompt_text=None,
                                   validator=twc.Validator(required=True),
                                   help_text='Format of the output file')
    assembly = twf.SingleSelectField(
        label='Assembly: ',
        options=genrep.GenRep().assemblies_available(),
        help_text='Reference genome')
    submit = twf.SubmitButton(id="submit", value="Quantify")
Esempio n. 11
0
class Table2TracksForm(BaseForm):
    table = twb.BsFileField(label='Table: ',
                            help_text='Select table',
                            validator=twb.BsFileFieldValidator(required=True))
    id_columns = twf.TextField(
        label='columns id: ',
        validator=twc.Validator(required=True),
        value='',
        help_text=
        'comma separated list of columns id for which signal tracks will be generated (e.g. 3,5)'
    )
    format = twf.SingleSelectField(
        label='Output format: ',
        options=["sql", "bedgraph", "bigwig", "wig"],
        validator=twc.Validator(required=False),
        help_text='Output file(s) format (default: bedGraph)')
    assembly = twf.SingleSelectField(
        label='Assembly: ',
        options=genrep.GenRep().assemblies_available(),
        help_text='Reference genome')
    submit = twf.SubmitButton(id="submit", value="Submit")
Esempio n. 12
0
class FileConvertForm(BaseForm):
    hover_help = True
    show_errors = True
    infile = twb.BsFileField(label='File: ',
        help_text='Select file.',
        validator=twb.BsFileFieldValidator(required=True))
    child = twd.HidingTableLayout()
    to = twd.HidingSingleSelectField(label='Output format: ',
        options=format_list,
        prompt_text=None,
        mapping={'sql': ['dtype', 'assembly'],
                 'bigwig': ['assembly']},
        validator=twc.Validator(required=True),
        help_text='Select the format of your result')
    dtype = twf.SingleSelectField(label='Output datatype: ',
        prompt_text=None,
        options=['quantitative', 'qualitative'],
        help_text='Choose sql data type attribute')
    assembly = twf.SingleSelectField(label='Assembly: ',
        options=genrep.GenRep().assemblies_available(),
        help_text='Reference genome')
    submit = twf.SubmitButton(id="submit", value="Convert")
Esempio n. 13
0
class MergeTracksForm(BaseForm):
    forward = twb.BsFileField(label='Forward: ',
                              help_text='Select forward density file',
                              validator=twb.BsFileFieldValidator(required=True))
    reverse = twb.BsFileField(label='Reverse: ',
                              help_text='Select reverse density file',
                              validator=twb.BsFileFieldValidator(required=True))
    assembly = twf.SingleSelectField(label='Assembly: ',
                                     options=genrep.GenRep().assemblies_available(),
                                     help_text='Reference genome')
    shift = twf.TextField(label='Shift: ',
                          validator=twc.IntValidator(required=True),
                          value=0,
                          help_text='Enter positive downstream shift ([fragment_size-read_length]/2), \
                                     or a negative value to estimate shift by cross-correlation')
    format = twf.SingleSelectField(label='Output format: ',
                                   options=['sql','bed','bedGraph','wig','bigWig','sga'],
                                   prompt_text=None,
                                   help_text='Format of the output file', )
    method = twf.RadioButtonList(label='Method: ',
                                 options=['mean','min','max','geometric','median','sum'],
                                 value='mean',
                                 help_text='Select the score combination method')
    submit = twf.SubmitButton(id="submit", value='Merge tracks')
Esempio n. 14
0
    def __call__(self,opts):
        self.opts = opts
        if os.path.exists(self.opts.wdir):
            os.chdir(self.opts.wdir)
        else:
            raise Usage("Working directory '%s' does not exist." %self.opts.wdir)

##### Connect to Minilims, recover global variables, fetch job info
        self.minilims = os.path.join(self.opts.basepath,self.name+"_minilims")
        M = MiniLIMS(self.minilims)
        if not((self.opts.key != None or (self.opts.config and os.path.exists(self.opts.config)))):
            raise Usage("Need a job key or a configuration file")
        if self.opts.key:
            self.globals = use_pickle(M, "global variables")
            htss = frontend.Frontend( url=self.globals['hts_mapseq']['url'] )
            self.job = htss.job( self.opts.key )
            [M.delete_execution(x) for x in \
                 M.search_executions(with_description=self.opts.key,fails=True)]
            if self.job.options.get("config_file"):
                if os.path.exists(self.job.options["config_file"]):
                    self.opts.config = os.path.abspath(self.job.options["config_file"])
                elif os.path.exists("config.txt"):
                    self.opts.config = os.path.abspath("config.txt")
            if self.opts.config and os.path.exists(self.opts.config):
                (self.job,self.globals) = frontend.parseConfig( self.opts.config, self.job, self.globals )
        elif os.path.exists(self.opts.config):
            (self.job,self.globals) = frontend.parseConfig( self.opts.config )
            self.opts.key = self.job.description
        else:
            raise Usage("Need either a job key (-k) or a configuration file (-c).")
##### Genrep instance
        if 'fasta_file' in self.job.options:
            if os.path.exists(self.job.options['fasta_file']):
                self.job.options['fasta_file'] = os.path.abspath(self.job.options['fasta_path'])
            else:
                for ext in (".fa",".fa.gz",".tar.gz"):
                    if os.path.exists("ref_sequence"+ext):
                        self.job.options['fasta_file'] = os.path.abspath("ref_sequence"+ext)
            if not os.path.exists(self.job.options['fasta_file']):
                raise Usage("Don't know where to find fasta file %s." %self.job.options["fasta_file"])
        g_rep = genrep.GenRep( url=self.globals.get("genrep_url"),
                               root=self.globals.get("bwt_root") )
##### Configure facility LIMS
        if 'lims' in self.globals:
            from bbcflib import daflims
            self.job.dafl = dict((loc,daflims.DAFLIMS( username=self.globals['lims']['user'],
                                                       password=pwd ))
                                 for loc,pwd in self.globals['lims']['passwd'].iteritems())
########################################################################
##########################  EXECUTION  #################################
########################################################################
##### Logging
        logfile_name = os.path.abspath(self.opts.key+".log")
        debugfile_name = os.path.abspath(self.opts.key+".debug")
        self.logfile = open(logfile_name,'w')
        self.debugfile = open(debugfile_name,'w')
        self.debug_write(json.dumps(self.globals)+"\n")
        with execution( M, description=self.opts.key,
                        remote_working_directory=self.opts.wdir ) as ex:
            self.log_write("Enter execution. Current working directory: %s" %ex.working_directory)
            self.job.assembly = genrep.Assembly( assembly=self.job.assembly_id,
                                                 genrep=g_rep,
                                                 fasta=self.job.options.get('fasta_file'),
                                                 annot=self.job.options.get('annot_file'),
                                                 intype=self.job.options.get('input_type_id',0),
                                                 ex=ex, via=self.opts.via,
                                                 bowtie2=self.job.options.get("bowtie2",True) )
##### Check all the options
            if not self.check_options():
                raise Usage("Problem with options %s" %self.opts)
            self.debug_write(json.dumps(self.job.options))
            self.init_files( ex )
##### Run workflow
            self.log_write("Starting workflow.")
            self.main_func(ex,**self.main_args)
##### Add logs to the LIMS in admin mode
            self.logfile.flush()
            self.debugfile.flush()
            log_desc = set_file_descr('logfile.txt', step='log', type='txt', view="admin")
            debug_desc = set_file_descr('debug.txt', step='log', type='txt', view="admin")
            ex.add(os.path.join(logfile_name), description=log_desc)
            ex.add(os.path.join(debugfile_name), description=debug_desc)
##### Create GDV project
            if self.job.options['create_gdv_project']: self.gdv_create(ex)

########################################################################
########################  POSTPROCESSING  ##############################
########################################################################
        allfiles = get_files( ex.id, M )
        if self.job.options['create_gdv_project'] and \
                self.job.options['gdv_project'].get('project',{}).get('id',0)>0:
            allfiles['url'] = self.gdv_upload(allfiles.get('sql',{}))
        self.logfile.close()
        self.debugfile.close()
        print json.dumps(allfiles)
        with open(self.opts.key+".done",'w') as done: json.dump(allfiles,done)
        self.send_email()
        return 0
Esempio n. 15
0
from bsPlugins import *
from bein import execution
from bbcflib import genrep
from bbcflib.common import fasta_composition
from bbcflib.motif import save_motif_profile
from bbcflib.track import track, FeatureStream
import os


g = genrep.GenRep()
available_motifs = g.motifs_available()
assembly_list = g.assemblies_available()

input_types = [(0, 'Fasta upload'), (1, 'Select regions from genome')]
input_map = {0: ['fastafile'], 1: ['regions']}

meta = {'version': "1.0.0",
        'author': "BBCF",
        'contact': "*****@*****.**"}

in_parameters = [{'id': 'input_type', 'type': 'radio'},
                 {'id': 'fastafile', 'type': 'userfile'},
                 {'id': 'background', 'type': 'txt'},
                 {'id': 'assembly', 'type': 'assembly'},
                 {'id': 'regions', 'type': 'track'},
                 {'id': 'motifs', 'type': 'list'},
                 {'id': 'customMotif', 'type': 'txt'},
                 {'id': 'threshold', 'type': 'float', 'required': True}]
out_parameters = [{'id': 'motif_track', 'type': 'track'}]

class MotifScanForm(BaseForm):
Esempio n. 16
0
def main():
    try:
        # Parse args
        parser = optparse.OptionParser(usage=usage, description=descr)
        for opt in opts:
            parser.add_option(opt[0],opt[1],help=opt[2],**opt[3])

        # Get variables
        (opt, args) = parser.parse_args()
        if opt.assembly:
            assembly_id = re.search('([._\-\w]+)', str(opt.assembly)).groups()[0]
        genrep_root = os.path.abspath(opt.root)
        genrep_url = normalize_url(opt.url)
        if opt.output:
            fout = open(re.search('([._\-\w]+)', str(opt.output)).groups()[0], 'w')
        else:
            fout = sys.stdout
        regions = None
        if opt.regions:
            if os.path.exists(opt.regions):
                regions = opt.regions
            else:
                regions = []
                for x in str(opt.regions).split(","):
                    chrom,start,end = re.search('(\S+):(\d+)\-(\d+)',x).groups()[0:3]
                    regions.append([chrom,int(start),int(end)])

        # Program body
        g_rep = genrep.GenRep(url=genrep_url, root=genrep_root)
        if opt.assembly:
            assembly = genrep.Assembly(assembly=assembly_id,genrep=g_rep,intype=opt.intype)
        if opt.list:
            if opt.assembly:
                table = ["\t".join((v['ac'],k,str(v['length'])))
                         for k,v in assembly.chrmeta.iteritems()]
                fout.write("\n".join(table)+"\n")
            else:
                fout.write("\n".join(v[1] for v in g_rep.assemblies_available())+"\n")
            return 0
        if not(opt.assembly):
            parser.print_help()
            return 0
        if regions:
            seq = assembly.fasta_from_regions(regions=regions, out=fout)[0]
        if opt.bowtie:
            fout.write(">"+str(assembly.id)+":"+assembly.name+" bowtie index prefix\n")
            fout.write(assembly.index_path+"\n")
        if opt.bowtie2:
            fout.write(">"+str(assembly.id)+":"+assembly.name+" bowtie2 index prefix\n")
            fout.write(re.sub(r'bowtie/','bowtie2/',assembly.index_path)+"\n")
        if opt.fasta:
            fout.write(">"+str(assembly.id)+":"+assembly.name+" fasta file\n")
            fout.write(assembly.fasta_path()+"\n")
        if opt.db:
            fout.write(">"+str(assembly.id)+":"+assembly.name+" sqlite file\n")
            fout.write(assembly.sqlite_path+"\n")
        if opt.genes:
            if os.path.exists(opt.genes):
                glist = _parse_list(opt.genes)
            else:
                glist = opt.genes.split(",")
            for gcoord in assembly.gene_coordinates(glist):
                fout.write("\t".join([str(x) for x in gcoord])+"\n")
        if opt.all:
            from bbcflib.track import track
            if opt.intype == 1:
                feats = assembly.exon_track()
            elif opt.intype == 2:
                feats = assembly.transcript_track()
            else:
                feats = assembly.gene_track()
            with track(fout,format='bed',fields=['strand']) as _tfeat:
                _tfeat.write(feats)
        if opt.stats:
            stats = assembly.statistics(frequency=True)
            bases = ["A","C","G","T"]
            fout.write("#Assembly: %s\n" % assembly.name)
            [fout.write("%s\t%s\n" % (x,stats[x])) for x in bases]
            fout.write("#N\t%s\n" % stats["N"] )
            [[fout.write("%s\t%s\n" % (x+y,stats[x+y])) for y in bases] 
             for x in bases]
        fout.close()
        if opt.convert:
            if not(os.path.exists(opt.convert)):
                raise Usage("No such file: %s."%opt.convert)
            if not(opt.output):
                raise Usage("Need an output file name.")
            import pysam
            infile = pysam.Samfile( opt.convert )
            header = infile.header
            chromosomes = dict((v['ac'],k) for k,v in assembly.chrmeta.iteritems())
            for h in header["SQ"]:
                if h["SN"] in chromosomes:
                    h["SN"] = chromosomes[h["SN"]]
            outfile = pysam.Samfile(re.search('([._\-\w]+)', str(opt.output)).groups()[0], 'wb', header=header )
            for read in infile:
                outfile.write(read)
            outfile.close()
            infile.close()

        return 0
    except Usage, err:
        print >>sys.stderr, err.msg
        print >>sys.stderr, usage
        return 2
Esempio n. 17
0
from bsPlugins import *
from bein import execution
from bbcflib.common import fasta_length
from bbcflib.motif import meme
from bbcflib import genrep
from bbcflib.track import track, FeatureStream
import os, tarfile

input_types = [(0, 'Fasta upload'), (1, 'Select regions from genome')]
input_map = {0: ['fastafile'], 1: ['assembly', 'regions']}
_nm = 4

assembly_list = genrep.GenRep().assemblies_available()

meta = {
    'version': "1.0.0",
    'author': "BBCF",
    'contact': "*****@*****.**"
}

in_parameters = [{
    'id': 'input_type',
    'type': 'radio'
}, {
    'id': 'fastafile',
    'type': 'userfile'
}, {
    'id': 'assembly',
    'type': 'assembly'
}, {
    'id': 'regions',
Esempio n. 18
0
#!/usr/bin/env python

from bbcflib import genrep
import os, getopt, sys

opts = dict(getopt.getopt(sys.argv[1:], "d:", [])[0])
basepath = opts.get('-d') or "/data/epfl/bbcf/genrep/nr_assemblies"
basepath += "/%s"
for _a, info in genrep.GenRep().assemblies_available():
    for n in range(100):
        assembly = genrep.Assembly(_a)
        gtf_path = os.path.join(basepath % "gtf",
                                "%s_%i.gtf.gz" % (assembly.md5, n))
        if not (assembly.bbcf_valid and os.path.exists(gtf_path)): break
        sql_path = os.path.join(basepath % "annot_tracks",
                                "%s_%i.sql" % (assembly.md5, n))
        if os.path.exists(sql_path): continue
        print info, gtf_path, sql_path
        assembly.gtf_to_sql(gtf_path=gtf_path, sql_path=sql_path)