Beispiel #1
0
    def handle(self, *args, **options):
        tool_name = 'Prodigal'
        tool_version = '2.60'
        
        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(tool_name, tool_version) )
            return True

        settings = configparser.ConfigParser()
        settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') )

        tool_settings = settings[ "{0} {1}".format(tool_name, tool_version) ]

        flow_bp = FlowBlueprint( type='s' )
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='https://code.google.com/p/prodigal/', \
                               flow_bp=flow_bp )
        tool.save()


        command_bp = CommandBlueprint( parent = flow_bp, \
                                       name = 'Run prodigal', \
                                       exec_path = tool_settings['exec_path'] )
        command_bp.save()

        CommandBlueprintParam( command=command_bp, name='-a', prefix='-a ', position=1, \
            short_desc='Write protein translations to the selected file' ).save()

        CommandBlueprintParam( command=command_bp, name='-c', prefix='-c ', position=2, has_no_value=True, \
            short_desc='Closed ends.  Do not allow genes to run off edges' ).save()

        CommandBlueprintParam( command=command_bp, name='-d', prefix='-d ', position=3, \
            short_desc='Write nucleotide sequences of genes to the selected file' ).save()

        ## TODO: limit choices to (gbk, gff, or sco)
        CommandBlueprintParam( command=command_bp, name='-f', prefix='-f ', position=4, default_value='gbk', \
            short_desc='Select output format (gbk, gff, or sco).  Default is gbk' ).save()

        CommandBlueprintParam( command=command_bp, name='-g', prefix='-g ', position=5, default_value='11', \
            short_desc='Specify a translation table to use (default 11)' ).save()

        CommandBlueprintParam( command=command_bp, name='-i', prefix='-i ', position=6, is_optional=False, \
            short_desc='Specify input file (default reads from stdin).' ).save()

        CommandBlueprintParam( command=command_bp, name='-m', prefix='-m ', position=7, has_no_value=True, \
            short_desc='Treat runs of Ns as masked sequence and do not build genes across them' ).save()

        CommandBlueprintParam( command=command_bp, name='-n', prefix='-n ', position=8, has_no_value=True, \
            short_desc='Bypass the Shine-Dalgarno trainer and force the program to scan for motifs' ).save()

        CommandBlueprintParam( command=command_bp, name='-o', prefix='-o ', position=9, is_optional=False, \
            short_desc='Specify output file' ).save()

        CommandBlueprintParam( command=command_bp, name='-p', prefix='-p ', position=10, default_value='single', \
            short_desc='Select procedure (single or meta).  Default is single.' ).save()

        CommandBlueprintParam( command=command_bp, name='-s', prefix='-s ', position=11, \
            short_desc='Write all potential genes (with scores) to the selected file' ).save()

        CommandBlueprintParam( command=command_bp, name='-t', prefix='-t ', position=12, \
            short_desc='Write or read the specified training file', \
            long_desc='Write a training file (if none exists); otherwise, read and use the specified training file' ).save()


        tool.needs( filetype_name='FASTA (nucleotide)', via_command=command_bp, via_param='-i' )
        tool.can_create( filetype_name='GenBank Flat File Format', via_command=command_bp, via_params=['-o', '-f=gbk'] )
        tool.can_create( filetype_name='GFF3', via_command=command_bp, via_params=['-o', '-f=gff'] )
    def handle(self, *args, **options):
        tool_name = 'Trinity in silico read normalization'
        tool_version = 'r2013-02-25'
        
        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(tool_name, tool_version) )
            return True

        settings = configparser.ConfigParser()
        settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') )

        tool_settings = settings[ "{0} {1}".format('Trinity', tool_version) ]

        flow_bp = FlowBlueprint( type='s', \
                                 description='Large RNA-Seq data sets, such as those exceeding 300M pairs, are best suited for in silico normalization prior to running Trinity, in order to reduce memory requirements and greatly improve upon runtimes. Before running the normalization, be sure that in the case of paired reads, the left read names end with suffix /1 and the right read names end with /2')
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='http://trinityrnaseq.sourceforge.net/trinity_insilico_normalization.html', \
                               flow_bp=flow_bp )
        tool.save()


        command_bp = CommandBlueprint( parent = flow_bp, \
                                       name = 'Run Trinity read normalization', \
                                       exec_path = tool_settings['normalization_script'] )
        command_bp.save()


        CommandBlueprintParam( command=command_bp, name='--seqType', prefix='--seqType ', position=1, \
            is_optional=False, short_desc='Type of reads: (fa, or fq)' ).save()

        CommandBlueprintParam( command=command_bp, name='--JM', prefix='--JM ', position=2, \
            is_optional=False, short_desc='Number of GB of system memory to use for k-mer counting by jellyfish (eg. 10G).  Include the G character.' ).save()

        CommandBlueprintParam( command=command_bp, name='--left', prefix='--left ', position=3, \
            short_desc='Left reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--right', prefix='--right ', position=4, \
            short_desc='Right reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--single', prefix='--single ', position=5, \
            short_desc='Single (unpaired) reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--left_list', prefix='--left_list ', position=3, \
            short_desc='Left reads, if using a list file.  One file path per line', \
            long_desc='If you have read collections in different files you can use list files, where each line in a list file is the full path to an input file.  This saves you the time of combining them just so you can pass a single file for each direction.').save()

        CommandBlueprintParam( command=command_bp, name='--right_list', prefix='--right_list ', position=4, \
            short_desc='Right reads, if using a list file.  One file path per line', \
            long_desc='If you have read collections in different files you can use list files, where each line in a list file is the full path to an input file.  This saves you the time of combining them just so you can pass a single file for each direction.').save()

        CommandBlueprintParam( command=command_bp, name='--pairs_together', prefix='--pairs_together ', position=6, \
            has_no_value=True, short_desc='Process paired reads by averaging stats between pairs and retaining linking info' ).save()

        CommandBlueprintParam( command=command_bp, name='--SS_lib_type', prefix='--SS_lib_type ', position=7, \
            short_desc='Strand-specific RNA-Seq read orientation.  if paired: RF or FR, if single: F or R.  (dUTP method = RF)' ).save()

        CommandBlueprintParam( command=command_bp, name='--output', prefix='--output ', position=8, \
            short_desc='Name of directory for output (will be created if doesn\'t already exist.', \
            default_value='normalized_reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--JELLY_CPU', prefix='--JELLY_CPU ', position=9, \
            short_desc='Number of threads for Jellyfish to use', default_value='2' ).save()

        CommandBlueprintParam( command=command_bp, name='--PARALLEL_STATS', prefix='--PARALLEL_STATS ', position=10, \
            has_no_value=True, short_desc='Generate read stats in parallel for paired reads (Figure 2X Inchworm memory requirement)' ).save()

        CommandBlueprintParam( command=command_bp, name='--KMER_SIZE', prefix='--KMER_SIZE ', position=11, \
            short_desc='K-mer size for de Bruijn graph construction', default_value='25' ).save()

        CommandBlueprintParam( command=command_bp, name='--min_kmer_cov', prefix='--min_kmer_cov ', \
            position=12, short_desc='Minimum kmer coverage for catalog construction', default_value='1' ).save()

        CommandBlueprintParam( command=command_bp, name='--max_pct_stdev', prefix='--max_pct_stdev ', position=13, \
            short_desc='Maximum pct of mean for stdev of kmer coverage across read', default_value='100' ).save()

        # TODO: parameter grouping needs to be applied here.
        tool.can_use( filetype_name='FASTQ (Sanger, paired reads, left)', via_command=command_bp, via_param='--left' )
        tool.can_use( filetype_name='FASTQ (Sanger, paired reads, right)', via_command=command_bp, via_param='--right' )
        tool.can_use( filetype_name='FASTQ (Sanger, unpaired reads)', via_command=command_bp, via_param='--single' )

        # TODO: parameter grouping needs to be applied here.
        # TODO: needs improving.  Unfortunately, Trinity currently only supports output definition
        #  at the directory level, and the file names under that are created by convention.
        #  I've written Brian to see if I can add this
        tool.can_create( filetype_name='FASTQ (Sanger, paired reads, left)', via_command=command_bp, via_param='--output' )
        tool.can_create( filetype_name='FASTQ (Sanger, paired reads, right)', via_command=command_bp, via_param='--output' )
        tool.can_create( filetype_name='FASTQ (Sanger, unpaired reads)', via_command=command_bp, via_param='--output' )
    def handle(self, *args, **options):
        tool_name = 'Prodigal'
        tool_version = '2.60'

        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(
                tool_name, tool_version))
            return True

        settings = configparser.ConfigParser()
        settings.read(
            os.path.join(os.path.abspath(os.path.dirname(__file__)),
                         '../../settings.ini'))

        tool_settings = settings["{0} {1}".format(tool_name, tool_version)]

        flow_bp = FlowBlueprint(type='s')
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='https://code.google.com/p/prodigal/', \
                               flow_bp=flow_bp )
        tool.save()


        command_bp = CommandBlueprint( parent = flow_bp, \
                                       name = 'Run prodigal', \
                                       exec_path = tool_settings['exec_path'] )
        command_bp.save()

        CommandBlueprintParam( command=command_bp, name='-a', prefix='-a ', position=1, \
            short_desc='Write protein translations to the selected file' ).save()

        CommandBlueprintParam( command=command_bp, name='-c', prefix='-c ', position=2, has_no_value=True, \
            short_desc='Closed ends.  Do not allow genes to run off edges' ).save()

        CommandBlueprintParam( command=command_bp, name='-d', prefix='-d ', position=3, \
            short_desc='Write nucleotide sequences of genes to the selected file' ).save()

        ## TODO: limit choices to (gbk, gff, or sco)
        CommandBlueprintParam( command=command_bp, name='-f', prefix='-f ', position=4, default_value='gbk', \
            short_desc='Select output format (gbk, gff, or sco).  Default is gbk' ).save()

        CommandBlueprintParam( command=command_bp, name='-g', prefix='-g ', position=5, default_value='11', \
            short_desc='Specify a translation table to use (default 11)' ).save()

        CommandBlueprintParam( command=command_bp, name='-i', prefix='-i ', position=6, is_optional=False, \
            short_desc='Specify input file (default reads from stdin).' ).save()

        CommandBlueprintParam( command=command_bp, name='-m', prefix='-m ', position=7, has_no_value=True, \
            short_desc='Treat runs of Ns as masked sequence and do not build genes across them' ).save()

        CommandBlueprintParam( command=command_bp, name='-n', prefix='-n ', position=8, has_no_value=True, \
            short_desc='Bypass the Shine-Dalgarno trainer and force the program to scan for motifs' ).save()

        CommandBlueprintParam( command=command_bp, name='-o', prefix='-o ', position=9, is_optional=False, \
            short_desc='Specify output file' ).save()

        CommandBlueprintParam( command=command_bp, name='-p', prefix='-p ', position=10, default_value='single', \
            short_desc='Select procedure (single or meta).  Default is single.' ).save()

        CommandBlueprintParam( command=command_bp, name='-s', prefix='-s ', position=11, \
            short_desc='Write all potential genes (with scores) to the selected file' ).save()

        CommandBlueprintParam( command=command_bp, name='-t', prefix='-t ', position=12, \
            short_desc='Write or read the specified training file', \
            long_desc='Write a training file (if none exists); otherwise, read and use the specified training file' ).save()

        tool.needs(filetype_name='FASTA (nucleotide)',
                   via_command=command_bp,
                   via_param='-i')
        tool.can_create(filetype_name='GenBank Flat File Format',
                        via_command=command_bp,
                        via_params=['-o', '-f=gbk'])
        tool.can_create(filetype_name='GFF3',
                        via_command=command_bp,
                        via_params=['-o', '-f=gff'])