def handle(self, *args, **options):
        tool_name = 'Prodigal'
        tool_version = '2.60'

        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(
                tool_name, tool_version))
            return True

        settings = configparser.ConfigParser()
        settings.read(
            os.path.join(os.path.abspath(os.path.dirname(__file__)),
                         '../../settings.ini'))

        tool_settings = settings["{0} {1}".format(tool_name, tool_version)]

        flow_bp = FlowBlueprint(type='s')
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='https://code.google.com/p/prodigal/', \
                               flow_bp=flow_bp )
        tool.save()


        command_bp = CommandBlueprint( parent = flow_bp, \
                                       name = 'Run prodigal', \
                                       exec_path = tool_settings['exec_path'] )
        command_bp.save()

        CommandBlueprintParam( command=command_bp, name='-a', prefix='-a ', position=1, \
            short_desc='Write protein translations to the selected file' ).save()

        CommandBlueprintParam( command=command_bp, name='-c', prefix='-c ', position=2, has_no_value=True, \
            short_desc='Closed ends.  Do not allow genes to run off edges' ).save()

        CommandBlueprintParam( command=command_bp, name='-d', prefix='-d ', position=3, \
            short_desc='Write nucleotide sequences of genes to the selected file' ).save()

        ## TODO: limit choices to (gbk, gff, or sco)
        CommandBlueprintParam( command=command_bp, name='-f', prefix='-f ', position=4, default_value='gbk', \
            short_desc='Select output format (gbk, gff, or sco).  Default is gbk' ).save()

        CommandBlueprintParam( command=command_bp, name='-g', prefix='-g ', position=5, default_value='11', \
            short_desc='Specify a translation table to use (default 11)' ).save()

        CommandBlueprintParam( command=command_bp, name='-i', prefix='-i ', position=6, is_optional=False, \
            short_desc='Specify input file (default reads from stdin).' ).save()

        CommandBlueprintParam( command=command_bp, name='-m', prefix='-m ', position=7, has_no_value=True, \
            short_desc='Treat runs of Ns as masked sequence and do not build genes across them' ).save()

        CommandBlueprintParam( command=command_bp, name='-n', prefix='-n ', position=8, has_no_value=True, \
            short_desc='Bypass the Shine-Dalgarno trainer and force the program to scan for motifs' ).save()

        CommandBlueprintParam( command=command_bp, name='-o', prefix='-o ', position=9, is_optional=False, \
            short_desc='Specify output file' ).save()

        CommandBlueprintParam( command=command_bp, name='-p', prefix='-p ', position=10, default_value='single', \
            short_desc='Select procedure (single or meta).  Default is single.' ).save()

        CommandBlueprintParam( command=command_bp, name='-s', prefix='-s ', position=11, \
            short_desc='Write all potential genes (with scores) to the selected file' ).save()

        CommandBlueprintParam( command=command_bp, name='-t', prefix='-t ', position=12, \
            short_desc='Write or read the specified training file', \
            long_desc='Write a training file (if none exists); otherwise, read and use the specified training file' ).save()

        tool.needs(filetype_name='FASTA (nucleotide)',
                   via_command=command_bp,
                   via_param='-i')
        tool.can_create(filetype_name='GenBank Flat File Format',
                        via_command=command_bp,
                        via_params=['-o', '-f=gbk'])
        tool.can_create(filetype_name='GFF3',
                        via_command=command_bp,
                        via_params=['-o', '-f=gff'])
Beispiel #2
0
    def handle(self, *args, **options):
        tool_name = 'Trinity in silico read normalization'
        tool_version = 'r2013-02-25'

        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(
                tool_name, tool_version))
            return True

        settings = configparser.ConfigParser()
        settings.read(
            os.path.join(os.path.abspath(os.path.dirname(__file__)),
                         '../../settings.ini'))

        tool_settings = settings["{0} {1}".format('Trinity', tool_version)]

        flow_bp = FlowBlueprint( type='s', \
                                 description='Large RNA-Seq data sets, such as those exceeding 300M pairs, are best suited for in silico normalization prior to running Trinity, in order to reduce memory requirements and greatly improve upon runtimes. Before running the normalization, be sure that in the case of paired reads, the left read names end with suffix /1 and the right read names end with /2')
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='http://trinityrnaseq.sourceforge.net/trinity_insilico_normalization.html', \
                               flow_bp=flow_bp )
        tool.save()

        self.add_toolfiletype(tool, 'i', 'FASTQ (Sanger, paired reads, left)',
                              False)
        self.add_toolfiletype(tool, 'i', 'FASTQ (Sanger, paired reads, right)',
                              False)
        self.add_toolfiletype(tool, 'i', 'FASTQ (Sanger, unpaired reads)',
                              False)

        self.add_toolfiletype(tool, 'o', 'FASTQ (Sanger, paired reads, left)',
                              False)
        self.add_toolfiletype(tool, 'o', 'FASTQ (Sanger, paired reads, right)',
                              False)
        self.add_toolfiletype(tool, 'o', 'FASTQ (Sanger, unpaired reads)',
                              False)


        command_bp = CommandBlueprint( parent = flow_bp, \
                                       name = 'Run Trinity read normalization', \
                                       exec_path = tool_settings['normalization_script'] )
        command_bp.save()


        CommandBlueprintParam( command=command_bp, name='--seqType', prefix='--seqType ', position=1, \
            is_optional=False, short_desc='Type of reads: (fa, or fq)' ).save()

        CommandBlueprintParam( command=command_bp, name='--JM', prefix='--JM ', position=2, \
            is_optional=False, short_desc='Number of GB of system memory to use for k-mer counting by jellyfish (eg. 10G).  Include the G character.' ).save()

        CommandBlueprintParam( command=command_bp, name='--left', prefix='--left ', position=3, \
            short_desc='Left reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--right', prefix='--right ', position=4, \
            short_desc='Right reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--single', prefix='--single ', position=5, \
            short_desc='Single (unpaired) reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--left_list', prefix='--left_list ', position=3, \
            short_desc='Left reads, if using a list file.  One file path per line', \
            long_desc='If you have read collections in different files you can use list files, where each line in a list file is the full path to an input file.  This saves you the time of combining them just so you can pass a single file for each direction.').save()

        CommandBlueprintParam( command=command_bp, name='--right_list', prefix='--right_list ', position=4, \
            short_desc='Right reads, if using a list file.  One file path per line', \
            long_desc='If you have read collections in different files you can use list files, where each line in a list file is the full path to an input file.  This saves you the time of combining them just so you can pass a single file for each direction.').save()

        CommandBlueprintParam( command=command_bp, name='--pairs_together', prefix='--pairs_together ', position=6, \
            has_no_value=True, short_desc='Process paired reads by averaging stats between pairs and retaining linking info' ).save()

        CommandBlueprintParam( command=command_bp, name='--SS_lib_type', prefix='--SS_lib_type ', position=7, \
            short_desc='Strand-specific RNA-Seq read orientation.  if paired: RF or FR, if single: F or R.  (dUTP method = RF)' ).save()

        CommandBlueprintParam( command=command_bp, name='--output', prefix='--output ', position=8, \
            short_desc='Name of directory for output (will be created if doesn\'t already exist.', \
            default_value='normalized_reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--JELLY_CPU', prefix='--JELLY_CPU ', position=9, \
            short_desc='Number of threads for Jellyfish to use', default_value='2' ).save()

        CommandBlueprintParam( command=command_bp, name='--PARALLEL_STATS', prefix='--PARALLEL_STATS ', position=10, \
            has_no_value=True, short_desc='Generate read stats in parallel for paired reads (Figure 2X Inchworm memory requirement)' ).save()

        CommandBlueprintParam( command=command_bp, name='--KMER_SIZE', prefix='--KMER_SIZE ', position=11, \
            short_desc='K-mer size for de Bruijn graph construction', default_value='25' ).save()

        CommandBlueprintParam( command=command_bp, name='--min_kmer_cov', prefix='--min_kmer_cov ', \
            position=12, short_desc='Minimum kmer coverage for catalog construction', default_value='1' ).save()

        CommandBlueprintParam( command=command_bp, name='--max_pct_stdev', prefix='--max_pct_stdev ', position=13, \
            short_desc='Maximum pct of mean for stdev of kmer coverage across read', default_value='100' ).save()
    def handle(self, *args, **options):
        tool_name = 'Trinity'
        tool_version = 'r2013-02-25'
        
        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(tool_name, tool_version) )
            return True

        settings = configparser.ConfigParser()
        settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') )

        tool_settings = settings[ "{0} {1}".format(tool_name, tool_version) ]

        flow_bp = FlowBlueprint( type='s' )
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='http://trinityrnaseq.sourceforge.net/', \
                               flow_bp=flow_bp )
        tool.save()

        self.add_toolfiletype( tool, 'i', 'FASTQ (Sanger, paired reads, left)', False )
        self.add_toolfiletype( tool, 'i', 'FASTQ (Sanger, paired reads, right)', False )
        self.add_toolfiletype( tool, 'i', 'FASTQ (Sanger, unpaired reads)', False )
        
        self.add_toolfiletype( tool, 'o', 'FASTA (nucleotide)', True )


        command_bp = CommandBlueprint( parent = flow_bp, \
                                       name = 'Run Trinity', \
                                       exec_path = tool_settings['exec_path'] )
        command_bp.save()


        CommandBlueprintParam( command=command_bp, name='--seqType', prefix='--seqType ', position=1, \
            is_optional=False, short_desc='Type of reads: (cfa, cfq, fa, or fq)' ).save()

        CommandBlueprintParam( command=command_bp, name='--JM', prefix='--JM ', position=2, \
            is_optional=False, short_desc='Number of GB of system memory to use for k-mer counting by jellyfish (eg. 10G).  Include the G character.' ).save()

        CommandBlueprintParam( command=command_bp, name='--left', prefix='--left ', position=3, \
            short_desc='Left reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--right', prefix='--right ', position=4, \
            short_desc='Right reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--single', prefix='--single ', position=5, \
            short_desc='Single (unpaired) reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--SS_lib_type', prefix='--SS_lib_type ', position=6, \
            short_desc='Strand-specific RNA-Seq read orientation.  if paired: RF or FR, if single: F or R.  (dUTP method = RF)' ).save()

        CommandBlueprintParam( command=command_bp, name='--output', prefix='--output ', position=7, \
            short_desc='Name of directory for output (will be created if doesn\'t already exist.', \
            default_value='trinity_out_dir' ).save()

        CommandBlueprintParam( command=command_bp, name='--CPU', prefix='--CPU ', position=8, \
            short_desc='Number of CPUs to use', default_value='2' ).save()

        CommandBlueprintParam( command=command_bp, name='--min_contig_length', prefix='--min_contig_length ', \
            position=9, short_desc='Minimum assembled contig length to report', default_value='200' ).save()

        CommandBlueprintParam( command=command_bp, name='--jaccard_clip', prefix='--jaccard_clip ', position=10, \
            has_no_value=True, short_desc='Set if you have paired reads and expect high gene density with UTR overlap.  This is an expensive operation.' ).save()

        CommandBlueprintParam( command=command_bp, name='--no_cleanup', prefix='--no_cleanup ', position=11, \
            has_no_value=True, short_desc='Retain all intermediate input files' ).save()


        ####################################################
        # Inchworm and K-mer counting-related options: #####

        CommandBlueprintParam( command=command_bp, name='--min_kmer_cov', prefix='--min_kmer_cov ', position=12, \
            short_desc='Min count for K-mers to be assembled by Inchworm', default_value='1' ).save()

        ## Should later add the --no_run_quantifygraph option and process the rest via an iterator

        #####################################
        ###  Butterfly-related options:  ####
        
        CommandBlueprintParam( command=command_bp, name='--max_number_of_paths_per_node', prefix='--max_number_of_paths_per_node ', \
            position=13, short_desc='Only most supported (N) paths are extended from node A->B, mitigating combinatoric path explorations', \
            default_value='10' ).save()
        
        CommandBlueprintParam( command=command_bp, name='--group_pairs_distance', prefix='--group_pairs_distance ', \
            position=14, short_desc='Maximum length expected between fragment pairs.  Reads outside this will be treated as single-end', \
            default_value='500' ).save()

        CommandBlueprintParam( command=command_bp, name='--path_reinforcement_distance', prefix='--path_reinforcement_distance ', \
            position=15, short_desc='Minimum overlap of reads with growing transcript path (default: PE: 75, SE: 25)' ).save()

        CommandBlueprintParam( command=command_bp, name='--no_triplet_lock', prefix='--no_triplet_lock ', position=16, \
            has_no_value=True, short_desc='Do not lock triplet-supported nodes' ).save()
        
        CommandBlueprintParam( command=command_bp, name='--bflyHeapSpaceMax', prefix='--bflyHeapSpaceMax ', position=17, \
            default_value='20G', short_desc='Java max heap space setting for butterfly' ).save()

        CommandBlueprintParam( command=command_bp, name='--bflyHeapSpaceInit', prefix='--bflyHeapSpaceInit ', position=18, \
            default_value='1G', short_desc='Java initial heap space settings for butterfly' ).save()
        
        CommandBlueprintParam( command=command_bp, name='--bflyGCThreads', prefix='--bflyGCThreads ', position=19, \
            short_desc='Threads for garbage collection' ).save()

        CommandBlueprintParam( command=command_bp, name='--bflyCPU', prefix='--bflyCPU ', position=20, \
            short_desc='CPUs to use.  Default will match --CPU value' ).save()

        CommandBlueprintParam( command=command_bp, name='--bflyCalculateCPU', prefix='--bflyCalculateCPU ', position=21, \
            short_desc='Calculate CPUs based on 805 of max_memory divided by bflyHeapSpaceMax' ).save()