def getToolDescription(cls):
        '''
        Specifies a help text in HTML that is displayed below the tool.
        '''
        from proto.hyperbrowser.HtmlCore import HtmlCore
        from gold.gsuite.GSuiteConstants import URI_COL, FILE_FORMAT_COL, \
                                                TRACK_TYPE_COL, TITLE_COL

        core = HtmlCore()
        core.paragraph('This tool provides the option of filtering metadata columns '
                       'in a GSuite file, generating as output a GSuite file with the '
                       'same tracks (rows), but with only a subset of '
                       'metadata columns as selected by the user. ')
        core.divider()
        core.paragraph('To filter a GSuite file, please follow these steps: ')
        core.orderedList(['Select the input GSuite file from history',
                          'Select the metadata columns that should be kept',
                          'Click the "Execute" button'])

        core.divider()
        core.smallHeader('Note')
        core.paragraph('The standard metadata columns of a GSuite file cannot be '
                       'removed by this tool: ')
        core.unorderedList([URI_COL, FILE_FORMAT_COL, TRACK_TYPE_COL])
        core.paragraph('The exception to this rule is the "%s" column.' % TITLE_COL)

        cls._addGSuiteFileDescription(core,
                                      alwaysShowRequirements=True,
                                      alwaysShowOutputFile=True)

        return str(core)
        string = 'This tool provides the option of filtering track attributes in a GSuite file and generates a GSuite with the user selected attributes. It takes a GSuite file as an input, and displays all the included attributes in mutiple selections box. Upon the user selection, only the selected attributes will appear in the resulted GSuite.'
        return string
    def getToolDescription(cls):
        '''
        Specifies a help text in HTML that is displayed below the tool.
        '''
        from proto.hyperbrowser.HtmlCore import HtmlCore

        core = HtmlCore()
        core.smallHeader('General')
        core.paragraph(
            'This tool exports selected tracks in a local GSuite file to the Galaxy history. '
            'The tool creates a new GSuite file with points to the exported tracks. '
            ' In addition, the tool can provide a preview of the the beginning of any track '
            'directly from the tool interface.')
        core.divider()
        core.smallHeader('Conversion')
        core.paragraph(
            'As part of the export process, the tracks can be converted into another file format. '
            'The list of file formats are dynamically created based upon the selected tracks. The list '
            'contains only file formats that allow conversion from all the selected tracks.'
        )

        cls._addGSuiteFileDescription(
            core,
            allowedLocations=cls.GSUITE_ALLOWED_LOCATIONS,
            allowedFileFormats=cls.GSUITE_ALLOWED_FILE_FORMATS,
            allowedTrackTypes=cls.GSUITE_ALLOWED_TRACK_TYPES,
            outputLocation=cls.GSUITE_OUTPUT_LOCATION,
            outputFileFormat=cls.GSUITE_OUTPUT_FILE_FORMAT,
            outputTrackType=cls.GSUITE_OUTPUT_TRACK_TYPE)

        return str(core)
Example #3
0
    def getToolDescription():
        '''
        Specifies a help text in HTML that is displayed below the tool.
        '''
        from proto.hyperbrowser.HtmlCore import HtmlCore
        core = HtmlCore()
        core.paragraph('''
This tool combines elements from two separate data sets into a single track where
the elements are denoted as case (target) or control, depending on their source.
This allows analyses of how other tracks preferentially interact with case
elements as opposed to control elements.''')
        core.divider()
        core.smallHeader('Shared regions')
        core.paragraph('''
This tool supports four ways of handling regions that are found in both case and
control tracks. These regions do not need to be full segments, as defined in the
input tracks; the tool also correctly handles shared parts of segments.''')
        core.descriptionLine('Shared regions should be removed', \
            'the shared regions are not returned, neither as case nor as control segments')
        core.descriptionLine('Shared regions should be returned as case regions', \
            'the shared regions are all denoted as case regions and returned')
        core.descriptionLine('Shared regions should be returned as control regions', \
            'the shared regions are all denoted as control regions and returned')
        core.descriptionLine('Shared regions should be returned as they are', \
            '''
the shared regions are not handled in any way, but denoted according to whether
they were found in the case or the control input track. Note that this may
result in case and control regions that overlap. The subsequent analysis may not
handle this correctly.''')
        return str(core)
Example #4
0
    def getToolDescription(cls):
        '''
        Specifies a help text in HTML that is displayed below the tool.
        '''
        from proto.hyperbrowser.HtmlCore import HtmlCore

        core = HtmlCore()
        core.paragraph(
            'This tool can be used to compile a GSuite file referring to a selection '
            'of files from the current Galaxy history. Selection of a genome build is optional.'
        )
        core.divider()
        core.smallHeader('Note')
        core.paragraph(
            'Even though this tool can be used to build GSuite compilations of any history elements, '
            'the resulting GSuite file will be more usable if the elements are somewhat homogeneous '
            'in file format and/or track type.')

        cls._addGSuiteFileDescription(
            core,
            outputLocation=cls.GSUITE_OUTPUT_LOCATION,
            outputFileFormat=cls.GSUITE_OUTPUT_FILE_FORMAT,
            outputTrackType=cls.GSUITE_OUTPUT_TRACK_TYPE)

        return str(core)
    def getToolDescription(cls):
        '''
        Specifies a help text in HTML that is displayed below the tool.
        '''
        from proto.hyperbrowser.HtmlCore import HtmlCore

        core = HtmlCore()
        core.paragraph(
            'This tool can be used to compile a GSuite file referring to a subset of the tracks '
            'in the HyperBrowser repository. To use the tool, please follow these steps:'
        )
        core.orderedList([
            'Select the genome build',
            'Select the parent track, i.e. the top level in the track hierarchy that '
            'you are interested in. The tool will return all tracks that are found in the '
            'hierarchy underneath the selected parent track, descending into all sublevels.'
        ])
        core.divider()
        core.smallHeader('Note')
        core.paragraph(
            'Even though this tool can be used to build GSuite compilations of any tracks, '
            'the resulting GSuite file will be more usable if the track are somewhat homogeneous '
            'in track type.')

        cls._addGSuiteFileDescription(
            core,
            outputLocation=cls.GSUITE_OUTPUT_LOCATION,
            outputFileFormat=cls.GSUITE_OUTPUT_FILE_FORMAT,
            outputTrackType=cls.GSUITE_OUTPUT_TRACK_TYPE)

        return str(core)
    def getToolDescription(cls):
        '''
        Specifies a help text in HTML that is displayed below the tool.
        '''
        from proto.hyperbrowser.HtmlCore import HtmlCore

        core = HtmlCore()
        core.paragraph(
            'This tool contains various operations for manipulating textual '
            'track files referred to in a GSuite file.')
        core.divider()
        core.smallHeader('Instructions')
        core.orderedList([
            'Select a GSuite file referring to local textual track files',
            'The first track referred to in the GSuite is automatically selected '
            'as an example track, with the beginning of the file shown in a '
            'text box. The example file is later used to show the results of the '
            'selected operation. If you want to use another file as the example track '
            'please select it in the selection box. ',
            'Select the required operation from the list of operations '
            '(click the info box if you need further description of each operation '
            'and its parameters):' + str(HtmlCore().unorderedList(
                [key for key in cls.ALL_OPERATIONS.keys()])),
            'Each parameter for the selected operation is shown in a selection box, '
            'with the default value indicated. If another value than the default is '
            'needed, please select the parameter and change its value. '
            'the order in which the parameters is selected is unimportant.',
            'The output of the selected operation with the selected parameter values '
            'on the beginning of the selected example track is shown in a text box.',
            'If the file format (e.g. "bed") of the track is changed as a result of '
            'carrying out the operation, please indicate the new file suffix. '
            'It is important for the tracks to have the correct file suffix for further '
            'analysis.'
        ])

        cls._addGSuiteFileDescription(
            core,
            allowedLocations=cls.GSUITE_ALLOWED_LOCATIONS,
            allowedFileFormats=cls.GSUITE_ALLOWED_FILE_FORMATS,
            outputLocation=cls.GSUITE_OUTPUT_LOCATION,
            outputFileFormat=cls.GSUITE_OUTPUT_FILE_FORMAT,
            outputTrackType=cls.GSUITE_OUTPUT_TRACK_TYPE,
            errorFile=True)

        return str(core)
    def getToolDescription(cls):
        '''
        Specifies a help text in HTML that is displayed below the tool.
        '''
        from proto.hyperbrowser.HtmlCore import HtmlCore
        core = HtmlCore()

        core.paragraph('The tool provides a structured search for genomic tracks stored in '
                       'the following repositories:')
        core.unorderedList(['ENCODE tracks which are located in both UCSC and Ensembl databases.',
#                            'Roadmap Epigenomics',
                            'Roadmap Epigenomics',
                            'Cancer Genome Atlas (TCGA)',
                            'FANTOM 5',
                            'International Cancer Genome Consortium (ICGC)',
                            'Ensembl BLUEPRINT epigenome project',
                            'NHGRI-EBI GWAS Catalog'])

        core.paragraph('The tool generates a metadata file in the GSuite format which contains '
                       'the URL and other metadata associated with each of the track files that '
                       'match the search criteria. To use the tool, please follow these steps:')
        core.orderedList(['Select an attribute from the attribute list to search with',
                          'Select the value associated with this attribute from the associated attribute list',
                          'Repeat steps 1 and 2 to filter using more attributes',
                          'Select whether to compile a GSuite using:' +
                          str(HtmlCore().unorderedList(['All rearch results',
                                                        'Present results as a file list and have '
                                                        'the option of selecting a subset of those '
                                                        'result to compile the GSuite'])),
                          'Specify  the format of the output (' + str(HtmlCore().highlight('gsuite')) +
                              ' for GSuite or ' + str(HtmlCore().highlight('HTML')) +
                              ' for a more human readable format)'])

        core.divider()
        core.smallHeader('Note')
        core.paragraph('Even though this tool can be used to build GSuite compilations of any files, '
                       'the resulting GSuite file will be more usable if the files are somewhat homogeneous '
                       'in file format and/or track type.')

        cls._addGSuiteFileDescription(core,
                                      outputLocation=cls.GSUITE_OUTPUT_LOCATION,
                                      outputFileFormat=cls.GSUITE_OUTPUT_FILE_FORMAT,
                                      outputTrackType=cls.GSUITE_OUTPUT_TRACK_TYPE)

        return str(core)
    def getToolDescription(cls):
        '''
        Specifies a help text in HTML that is displayed below the tool.
        '''

        core = HtmlCore()
        core.paragraph(
            'This tool creates a new GSuite file based upon the contents of an '
            'archive file. Currently, the tool supports ".tar" (with and without '
            '".gz") and ".zip" files. To use the tool, please upload an archive file '
            'to your history using the Galaxy import tool (click the icon to the right '
            'of the "Tools" header in the top left of the window). You will need to '
            'manually select the file type as "gsuite.zip" or "gsuite.tar", as '
            'described below. After uploading, you then open this tool, select the '
            'archive, and click "Execute".')
        core.divider()
        core.smallHeader('* IMPORTANT *')
        core.paragraph(
            str(HtmlCore().emphasize(
                'In order to circumvent Galaxy\'s integrated archive decompresssion (which '
                'for zip files only extracts a single file), you must manually select '
                '"gsuite.tar" or "gsuite.zip" in the "Type" column in the upload file '
                'dialog box.')))
        core.divider()
        core.smallHeader(
            'Keep directory hierarchy intact and present as columns in the GSuite '
            'file?')
        core.paragraph(
            'By default, any directory structure in the archive is kept intact in '
            'the file path in the URIs in the output GSuite file. In addition, one '
            'column is added per level in the directory hierarchy, with the values '
            'denoting the directory where the file resides. In this way, one can '
            'make use of the directory structure as metadata categories. Optionally, '
            'one can ignore the hierarchy, flattening the archive into a single '
            'directory level, with no extra columns added.')

        cls._addGSuiteFileDescription(
            core,
            outputLocation=cls.GSUITE_OUTPUT_LOCATION,
            outputFileFormat=cls.GSUITE_OUTPUT_FILE_FORMAT,
            outputTrackType=cls.GSUITE_OUTPUT_TRACK_TYPE,
            errorFile=False)

        return str(core)
    def getToolDescription(cls):
        '''
        Specifies a help text in HTML that is displayed below the tool.
        '''
        from proto.hyperbrowser.HtmlCore import HtmlCore
        from gold.origdata.GenomeElementSource import getAllGenomeElementSourceClasses

        core = HtmlCore()
        core.paragraph(
            'This tool is used to preprocess the textual track files referred to by '
            'a GSuite file into a indexed, binary format that is needed for '
            'efficient analysis by the HyperBrowser analysis tools.')
        core.divider()
        core.smallHeader('Genome')
        core.paragraph(
            'Preprocessing tracks requires that the specific genome build is selected. '
            'If a genome build is defined within the GSuite file, it can still be overridden '
            'by the user if another build is selected. If the GSuite file contains no '
            'genome, the selection of a genome build is required.')
        core.divider()
        core.smallHeader('Supported file types')
        core.paragraph(
            'The HyperBrowser preprocessor supports the following file types (name: file suffix):'
        )

        geSourceClsList = getAllGenomeElementSourceClasses(
            forPreProcessor=True)
        core.unorderedList([
            '%s: "%s"' % (geSourceCls.FILE_FORMAT_NAME,
                          ', '.join(geSourceCls.FILE_SUFFIXES))
            for geSourceCls in geSourceClsList
        ] + ['broadPeak: ".broadpeak"', 'narrowPeak: ".narrowpeak"'])

        cls._addGSuiteFileDescription(
            core,
            allowedFileFormats=cls.GSUITE_ALLOWED_FILE_FORMATS,
            allowedLocations=cls.GSUITE_ALLOWED_LOCATIONS,
            outputLocation=cls.GSUITE_OUTPUT_LOCATION,
            outputFileFormat=cls.GSUITE_OUTPUT_FILE_FORMAT,
            errorFile=True)

        return str(core)
    def getToolDescription(cls):
        '''
        Specifies a help text in HTML that is displayed below the tool.
        '''
        from proto.hyperbrowser.HtmlCore import HtmlCore

        core = HtmlCore()

        core.paragraph('This tool computes the proportions of overlap between the segments '
                       'of a query track against each track in a collection of reference tracks '
                       'described in a GSuite file. The overlap proportions are output in a '
                       'BED file with the query segments, where each query segment is partitioned '
                       'and colored according to the overlap with each reference track.')

        core.divider()
        core.paragraph('To carry out the analysis, please follow these steps:')
        core.orderedList(['Select a genome build. Both the query track and the reference tracks'
                          'need to use the same genome build.',
                          'Select a query track from the HyperBrowser repository',
                          'Select a reference track collection as a GSuite file from history',
                          'Every track from the GSuite file is represented by a separate color, '
                          'presented to the user in a table.',
                          'Click "Execute"'])

        core.divider()
        core.paragraph('The resulting BED file can be visualized in external genome browsers. '
                       'To browse the segments, click the title of the resulting history element '
                       'and click "display at UCSC main". The UCSC Genome Browser will appear, '
                       'with the segments with color-coding imported as a track.')

        core.divider()
        core.smallHeader('Requirements for query track')
        core.descriptionLine('Track types', ', '.join(cls.TRACK_ALLOWED_TRACK_TYPES), emphasize=True)


        cls._addGSuiteFileDescription(core,
                                      allowedLocations=cls.GSUITE_ALLOWED_LOCATIONS,
                                      allowedFileFormats=cls.GSUITE_ALLOWED_FILE_FORMATS,
                                      allowedTrackTypes=cls.GSUITE_ALLOWED_TRACK_TYPES,
                                      disallowedGenomes=cls.GSUITE_DISALLOWED_GENOMES)

        return str(core)
    def getToolDescription():
        '''
        Specifies a help text in HTML that is displayed below the tool.
        '''
        core = HtmlCore()
        core.paragraph(
            'Analyze a selected track of genome locations for spatial co-localization with '
            'respect to the three-dimensional structure of the genome, as defined using '
            'results from recent Hi-C experiments. The Hi-C data has been corrected for bias '
            'using a method presented in a recent manuscript (submitted), and further '
            'normalized by subtracting the expected signal given the sequential distance '
            'between elements.')

        core.divider()
        core.smallHeader('References')
        core.paragraph('Paulsen, Jonas, Tonje G. Lien, Geir Kjetil Sandve, Lars Holden, Ørnulf Borgan, ' +\
                       'Ingrid K. Glad, and Eivind Hovig. "' +\
                       str(HtmlCore().link('Handling realistic assumptions in hypothesis testing of 3D co-localization of genomic elements.',
                                           'http://nar.oxfordjournals.org/content/41/10/5164.full')) + \
                       '" Nucleic acids research 41, no. 10 (2013): 5164-5174.')

        return str(core)
    def getToolDescription():
        core = HtmlCore()
        core.paragraph(
            'Checks a GTrack file for correspondence to the GTrack specification. '
            'For the latest version of the specification, see the "Show GTrack specification" tool.'
        )

        core.divider()
        core.smallHeader('Genome')
        core.paragraph(
            'If a genome build is selected, the tool will check whether all coordinates '
            'fall within the coordinate system of the genome build (i.e. based on the sequence names and lengths). '
            'Also, some GTrack files require a specified genome to be valid, e.g. if bounding regions '
            'are specified without explicit end coordinates.')

        core.divider()
        core.smallHeader('Notice')
        core.paragraph(
            'The results of the validation is output as a new history element. '
            'A correctly run history item, i.e. one colored green, does not mean that the GTrack file '
            'has been found valid. One must click the eye icon of the history item to see the correct '
            'conclusion.')
        return str(core)
Example #13
0
    def getToolDescription(cls):
        '''
        Specifies a help text in HTML that is displayed below the tool.
        '''
        from proto.hyperbrowser.HtmlCore import HtmlCore

        core = HtmlCore()

        core.paragraph('This tool computes the proportions of overlap between the segments '
                       'of a query track against each track in a collection of reference tracks '
                       'described in a GSuite file. The overlap proportions are output in an '
                       'interactive heatmap, where each cell is colored according to the '
                       'overlap between each query segment (column) with each reference '
                       'track (row).')

        core.divider()
        core.paragraph('To carry out the analysis, please follow these steps:')
        core.orderedList(['Select a genome build. Both the query track and the reference tracks'
                          'need to use the same genome build.',
                          'Select a query track from the HyperBrowser repository',
                          'Select a reference track collection as a GSuite file from history',
                          'Select the color map, going from no overlap to full overlap.',
                          'Click "Execute"'])

        core.divider()
        core.smallHeader('Requirements for query track')
        core.descriptionLine('Track types', ', '.join(cls.TRACK_ALLOWED_TRACK_TYPES), emphasize=True)


        cls._addGSuiteFileDescription(core,
                                      allowedLocations=cls.GSUITE_ALLOWED_LOCATIONS,
                                      allowedFileFormats=cls.GSUITE_ALLOWED_FILE_FORMATS,
                                      allowedTrackTypes=cls.GSUITE_ALLOWED_TRACK_TYPES,
                                      disallowedGenomes=cls.GSUITE_DISALLOWED_GENOMES)

        return str(core)
    def getToolDescription():
        '''
        Specifies a help text in HTML that is displayed below the tool.
        '''
        core = HtmlCore()
        core.paragraph('''
This tool extracts segments from an existing track that has floating point
values associated with the genome coordinates. The segments are extracted
according to a threshold value.''')
        core.divider()
        core.smallHeader('Input track')
        core.paragraph('''
The input track can be fetched either from the history or from the HyperBrowser
repository. The only requirement is that each track elements has an associated
floating point value. The track type thus has to be one of "Valued Points",
"Valued Segments", "Step Function", "Function", or one of the linked variants of
these. Note that tracks of type "Function" will work, but will be very slow.'''
                       )

        core.divider()
        core.smallHeader('Threshold and rule')
        core.paragraph('''
Type in a floating point value as the threshold and select an associated rule.
When the rule is true, the corresponding segment will be written to the output
file.''')

        core.divider()
        core.smallHeader('Merge adjacent segments')
        core.paragraph('''
If "Merge adjacent segments" is set to "Yes", any resulting segments that are
adjacent (''' + str(HtmlCore().emphasize('i.e.')) +
                       ''' that have no gaps between
them), are merged into the same segment.''')

        core.divider()
        core.smallHeader('Output format')
        core.paragraph('''
Several output file formats are available. These are all file formats that can
represent segment tracks.''')

        core.divider()
        core.smallHeader('Example')
        core.paragraph('Input file:')
        core.styleInfoBegin(styleClass='debug')
        core.append('''track type=bedGraph
chr21	10042712	10080194	-0.3655
chr21	10042712	10080194	0.2621
chr21	10079666	10080197	-1.047
chr21	13664826	13665788	-0.1566
chr21	13904368	13935777	1.396
chr21	13973462	13975927	0.007720
chr21	14403007	14439210	-1.021
chr21	14406599	14407013	2.022
chr21	14438228	14438658	-0.2405
chr21	14510257	14522024	-0.1010''')
        core.styleInfoEnd()

        core.paragraph('Output file:')
        core.styleInfoBegin(styleClass='debug')
        core.append('''chr21	10042712	10080194
chr21	13904368	13935777
chr21	13973462	13975927
chr21	14406599	14407013''')
        core.styleInfoEnd()
        return str(core)
    def getToolDescription(cls):
        '''
        Specifies a help text in HTML that is displayed below the tool.
        '''

        core = HtmlCore()
        core.paragraph('This tool is used to specify a set of analyses to be run in sequence, defined via a set of ' + \
                       str(HtmlCore().emphasize('batch command lines')) + '.')
        core.paragraph('Batch command lines can be found by ' +\
                       'clicking the "Inspect parameters of this analysis" link, either from the "Analyze genomic tracks" tool ' +\
                       'interface, or from the history element of previously run analyses. These batch run lines can then ' +\
                       'be copied to this tool and duplicated or modified if needed.')
        core.paragraph('The batch command lines are executed in sequence, that is, ' +\
                       'each batch command line is started only after the previous line has been fully executed.')

        core.divider()
        core.smallHeader('Batch command line format')
        core.styleInfoBegin(styleClass='debug')
        core.append('genome|regions|binsize|track1|track2|statistic')
        core.styleInfoEnd()
        core.descriptionLine('genome', 'The short name of the reference genome for the analysis (can be found in ' +\
                                        'the genome info box of the "Analyze genomic tracks" tool)', emphasize=True)
        core.descriptionLine('regions', 'The regions where the analysis should be performed, in the following format:' +\
                                        cls._exampleText('seqid[:start-[end]],seqid[:start-[end]],...') +\
                                        str(HtmlCore().descriptionLine('seqid', 'The sequence id of the region, e.g. "chr1" for chromosome 1', \
                                                                       emphasize=True)) +\
                                        str(HtmlCore().descriptionLine('start', 'The start position of the region. If the start position ' +\
                                                                                'is omitted, the region starts at posision 1.', emphasize=True)) +\
                                        str(HtmlCore().descriptionLine('seqid', 'The end position of the region. If the end position is ' +\
                                                                                'omitted, the region ends at the end of the specified ' +\
                                                                                'sequence (e.g. chromosome 1).', emphasize=True)) +\
                                        'Note:' + str(HtmlCore().unorderedList( \
                                            ['All positions are 1-based, and end-inclusive (i.e. the first base pair ' +\
                                                'of the sequence has position 1, and the end position is included in the region). ', \
                                             str(HtmlCore().emphasize('k')) + ' and ' + str(HtmlCore().emphasize('m')) +\
                                                ' can be used for specifying thousand (kilo) and million (mega) base pairs, ' +\
                                                'respectively (e.g. "chr1:1k-2k" corresponds to "chr1:1001-2000").', \
                                             '* denotes all (standard) sequences of the reference genome, e.g. all chromosomes'])) +\
                                        'Examples: ' +\
                                        cls._exampleText('chr1,chr2\nchr1:1001-1500\nchr2:1m-2m,chr2:3m-\n*'), emphasize=True)
        core.descriptionLine('binsize', 'The regions are further divided into smaller bins of this size. ' +\
                                        str(HtmlCore().indent( \
                                        'Note:' + str(HtmlCore().unorderedList( \
                                            [str(HtmlCore().emphasize('k')) + ' and ' + str(HtmlCore().emphasize('m')) +\
                                                ' denotes thousand and million base pairs, respectively', \
                                             '* denotes that the regions are not subdivided.'])) +\
                                        'Examples: ' +\
                                        cls._exampleText('5000\n100k\n*'))), emphasize=True)
        core.descriptionLine('track1', 'The first track of the analysis. ' +\
                                        str(HtmlCore().indent( \
                                        'Note:' + str(HtmlCore().unorderedList( \
                                            ['Colon, ":", is used to separate the different levels of the track hierarchy', \
                                             str(HtmlCore().link('URL-encoding', 'http://www.w3schools.com/tags/ref_urlencode.asp')) + \
                                                ' is supported, as non-ASCII characters will not work', \
                                            'A special format, starting with "galaxy", is used to represent a track from history'])) +\
                                        'Examples: ' +\
                                        cls._exampleText('Genes and gene subsets:Genes:CCDS\n' +\
                                                                  'Sequence:Repeating%20elements:SINE\ngalaxy:bed:%2Fusit%2Finvitro' +\
                                                                  '%2Fdata%2Fgalaxy%2Fgalaxy-dist-hg-stable%2Fdatabase%2Ffiles%2F028' +\
                                                                  '%2Fdataset_28276.dat:3%20-%20Extract%20track'))), emphasize=True)
        core.descriptionLine('track2', 'The second track of the analysis, specified in the same way as the first track. ' +\
                                        'If only a single track is to be analyzed, this field should be left empty', emphasize=True)
        core.descriptionLine('statistic', 'The specification of the analysis to run, and its parameters, in the following format:' +
                                        str(HtmlCore().indent( \
                                        cls._exampleText('statisticClass(paramA=valueA,paramB=valueB,...)') +\
                                        'The exact specification possibilities, with different statistic classes and parameter values, ' +\
                                        'are diverse and extensive. However, one may easily find a particular specification by ' +\
                                        'specifying the analysis and parameters in the "Analyze genomic tracks" tool and clicking the ' +\
                                        '"Inspect parameters of this analysis" link.')), emphasize=True)

        core.divider()
        core.smallHeader('Region specification variants')
        core.paragraph(
            'Other specifications of analysis regions are also supported, using both "regions" and "binsize" fields, as follows:'
        )
        core.tableHeader([
            'regions',
            'binsize (example)',
            'description',
        ])
        core.tableLine([cls._exampleText('__brs__'), cls._exampleText(''), \
            'Use the bounding region of the track if only one track is specified, else use the intersection '
            'of the bounding regions of the two tracks'])
        core.tableLine([cls._exampleText('__chrs__'), cls._exampleText('chr1,chr2'), \
            'List of complete sequence ids of the reference genome, e.g. chromosome names'])
        core.tableLine([cls._exampleText('__chrArms__'), cls._exampleText('chr1q,chr2p'), \
            'List of chromosome arm names. (Note: not supported for all reference genomes)'])
        core.tableLine([cls._exampleText('__genes__'), cls._exampleText('ENSG00000208234,ENSG00000199674'), \
            'List of Ensembl gene ids. (Note: not supported for all reference genomes)'])
        core.tableLine([''.join([cls._exampleText(x) for x in ['bed','gff','wig','bedgraph','gtrack']]), \
                        cls._exampleText('/usit/invitro/data/galaxy/galaxy-dist-hg-stable/database/files/028/dataset_28276.dat'), \
                        'Internal path to a file of the specified format, containing custom regions'])
        core.tableFooter()

        core.divider()
        core.smallHeader('Multiple run expansion')
        core.paragraph('The batch command line format supports two options for automatic expansion of a single batch ' +\
                       'command line into multiple batch lines, thus supporting multiple analyses from a single line:')
        core.orderedList(['Multiple values of the ' + str(HtmlCore().emphasize('track1')) + ', ' + str(HtmlCore().emphasize('track2')) + ', and' +\
                          str(HtmlCore().emphasize('statistic')) + ' fields can be specified using the slash character, "/", as ' +\
                          'separator. If more than one field is specified this way, all combinations of the values are expanded and ' +\
                          'executed. Example expansion:' + \
                          cls._exampleText('hg18|*|*|Genes and gene subsets:Genes:CCDS/Genes and gene subsets:Genes:Refseq||ProportionCountStat()/CountPointAllowingOverlapStat()') + \
                          'This expands to the following lines internally:' + \
                          cls._exampleText('hg18|*|*|Genes and gene subsets:Genes:CCDS||ProportionCountStat()\n' +\
                                                    'hg18|*|*|Genes and gene subsets:Genes:CCDS||CountPointAllowingOverlapStat()\n' +\
                                                    'hg18|*|*|Genes and gene subsets:Genes:Refseq||ProportionCountStat()\n' +\
                                                    'hg18|*|*|Genes and gene subsets:Genes:Refseq||CountPointAllowingOverlapStat()'),
                          'All tracks under a specific hierarchy can be specified using the "*" character. This works for ' +\
                          'values of the ' + str(HtmlCore().emphasize('track1')) + ' and ' + str(HtmlCore().emphasize('track2')) +\
                          ' fields. This expansion is also combinatorical, in the same manner as with the "/" character, and can ' +\
                          'be freely combined with such notation. Example:' + \
                          cls._exampleText('hg19|*|*|Genes and gene subsets:Genes:*|Chromatin:Chromatin State Segmentation:wgEncodeBroadHmmK562HMM:*|DerivedOverlapStat()') +\
                          'This batch command line will result in 30 analyses (2 gene tracks * 15 chromatin tracks).'])
        core.divider()
        core.smallHeader('Defining variables')
        core.paragraph('The batch command line format allows definition of variables using the following format:' +\
                       cls._exampleText('@variable=value') +\
                       'To use a variable, simply enter the variable name with a starting "@" character in a batch ' +\
                       'command line. Example:' +\
                       cls._exampleText('@trackname=Genes and gene subsets:Genes:CCDS\nhg18|*|*|@trackname||ProportionCountStat()') +\
                       'Nested variable declarations, i.e. defining a variable using previously defined variables, are also allowed. Example:' +\
                       cls._exampleText('@TN1=Genes and gene subsets:Genes:CCDS\n' +\
                                  '@TN2=Genes and gene subsets:Genes:Refseq\n' +\
                                  '@TNs=@TN1/@TN2\n' +\
                                  'hg18|*|*|@TNs||ProportionCountStat()/CountPointAllowingOverlapStat()') +\
                       'Note:' +\
                        str(HtmlCore().unorderedList(['The variable names are case sensitive', \
                                                      '"=" characters are allowed in variable values'])))

        return str(core)
Example #16
0
    def getToolDescription():
        core = HtmlCore()
        core.paragraph(
            'This tool is used to complement the data of a GTrack '
            'file with additional columns from another GTrack file. '
            'Note that all data lines of the first GTrack file is '
            'retained, but the contents of the second is only used if '
            'the tool finds a match.')
        core.divider()
        core.smallHeader('Genome')
        core.paragraph(
            'Some GTrack files require a specified genome to be valid, e.g. if bounding regions '
            'are specified without explicit end coordinates.')

        core.divider()
        core.smallHeader('Intersecting factor')
        core.paragraph('This choice governs how a the data lines of the two '
                       'GTrack files are matched.')
        core.descriptionLine('Element ID', 'the data lines are matched on the ' +\
                             str(HtmlCore().emphasize('id')) + ' column.', emphasize=True)
        core.descriptionLine('Positional information', \
                             'the matching is done using any of the ' + \
                             '%s, %s, %s, and %s columns ' % \
                             tuple(str(HtmlCore().emphasize(x)) for x in \
                              ['genome', 'seqid', 'start', 'end']) +\
                             'that are defined in both ' +\
                             'GTrack files. Note that the match must be complete, ' +\
                             'e.g. matching both start and end if both are ' +\
                             'defined for one of the GTrack files.', emphasize=True)
        core.divider()
        core.smallHeader('Example')
        core.paragraph('File 1:')

        core.styleInfoBegin(styleClass='debug')
        core.append('''##track type: valued segments
###seqid  start  end  value  id
chrM      100    120  2.5    A
chrM      200    220  1.2    B''')
        core.styleInfoEnd()

        core.paragraph('File 2:')

        core.styleInfoBegin(styleClass='debug')
        core.append('''##track type: points
###seqid  start  strand  sth  other  id
chrM      300    +       b    yes    B
chrM      400    -       c    yes    C
chrM      500    -       a    no     A''')
        core.styleInfoEnd()

        core.paragraph('Complementing on "Element ID" and choosing the ' +\
                        'additional columns %s and %s, gives:' % \
                        tuple(str(HtmlCore().emphasize(x)) for x in ('strand', 'other')))

        core.styleInfoBegin(styleClass='debug')
        core.append('''##gtrack version: 1.0
##track type: valued segments
##uninterrupted data lines: true
##sorted elements: true
##no overlapping elements: true
###seqid  start  end  value  strand  id  other
chrM      100    120  2.5    -       A   no
chrM      200    220  1.2    +       B   yes''')
        core.styleInfoEnd()

        return str(core)
    def getToolDescription():
        core = HtmlCore()
        core.paragraph(
            'The GTrack format requires a set of header lines to be a valid GTrack file. '
            '(See the "Show GTrack specification" tool for the specification of the format.) '
            'This tools tries to generate missing GTrack header lines based on the contents '
            'of the GTrack file selected. The "fixed" GTrack file is returned as a new '
            'history element.')
        core.divider()

        core.smallHeader(
            'The following header lines are affected by this tool')
        core.paragraph(
            'Header lines that are guaranteed to be properly generated:')
        core.unorderedList([x.capitalize() for x in EXPANDABLE_HEADERS])
        core.paragraph(
            'Header lines that are generated, but not guaranteed to get the correct value:'
        )
        core.unorderedList(
            [x.capitalize() for x in NOT_GUARANTEED_EXPANDABLE_HEADERS])
        core.paragraph(
            'Header lines that may change as part of the expansion (but are part of the '
            'extended GTrack definition, and thus superfluous):')
        core.unorderedList([x.capitalize() for x in VALUE_NOT_KEPT_HEADERS])
        core.divider()

        core.smallHeader('GTrack subtypes')
        core.paragraph(
            'If the header "subtype url" is specified, the corresponding subtype '
            'is read and the headers defined by the subtype are explicitly included. '
            'Also if the input file contains any headers from the extended specification, '
            'GTrack subtype information may be added to the output file. '
            'The following GTrack subtypes are automatically detected '
            'from the contents of the input file: ')

        from gold.origdata.GtrackComposer import StdGtrackComposer
        core.unorderedList(
            str(HtmlCore().link(x, x))
            for x in StdGtrackComposer.GTRACK_PRIORITIZED_SUBTYPE_LIST)
        core.divider()

        core.smallHeader('Genome')
        core.paragraph(
            'Some GTrack files require a specified genome to be valid, e.g. if bounding regions '
            'are specified without explicit end coordinates.')
        core.divider()

        core.smallHeader('Notice')
        core.paragraph(
            'This tool requires that the GTrack file already has a column specification '
            'line. If your file does not have this, please use the "Convert tabular file to '
            'GTrack" tool, where you can specify the column specification line. That tool '
            'also carries out the same header expansion as this tool.')
        core.divider()

        core.smallHeader('Example')
        core.paragraph('Input file')
        core.styleInfoBegin(styleClass='debug')
        core.append('''##1-indexed: true
##end inclusive: true
###seqid  start  end   value
chrM      100    165   0
chrM      200    2900  1
chrM      3000   3900  1''')
        core.styleInfoEnd()

        core.paragraph('Output file (with only non-default headers)')
        core.styleInfoBegin(styleClass='debug')
        core.append('''##gtrack version: 1.0
##track type: valued segments
##value type: binary
##uninterrupted data lines: true
##sorted elements: true
##no overlapping elements: true
##1-indexed: true
##end inclusive: true
###seqid  start  end   value
chrM      100    165   0
chrM      200    2900  1
chrM      3000   3900  1''')
        core.styleInfoEnd()

        core.paragraph('Output file (with all headers)')
        core.styleInfoBegin(styleClass='debug')
        core.append('''##gtrack version: 1.0
##track type: valued segments
##value type: binary
##value dimension: scalar
##undirected edges: false
##edge weights: false
##edge weight type: number
##edge weight dimension: scalar
##uninterrupted data lines: true
##sorted elements: true
##no overlapping elements: true
##circular elements: false
##1-indexed: true
##end inclusive: true
###seqid  start  end   value
chrM      100    165   0
chrM      200    2900  1
chrM      3000   3900  1''')
        core.styleInfoEnd()

        return str(core)
    def getToolDescription():
        core = HtmlCore()
        core.paragraph('The GTrack format permits the use of variable column names and order, and '
                       'correspondingly, variable track types. This variation comes at a price, '
                       'increasing the complexity of parsing GTrack files. All GTrack files can, '
                       'however, be represented as the same track type: Linked Valued Segments (LVS). '
                       'This tool converts all GTrack files to the same standardized version of the GTrack format.')
        core.divider()
        
        core.smallHeader('Specification of the standardized GTrack format')
        core.paragraph('The following columns are always present, in the following order:')
        core.orderedList(['seqid ' + str(HtmlCore().emphasize('(sequence ID)')),
                          'start',
                          'end',
                          'value',
                          'strand',
                          'id',
                          'edges'])
        core.paragraph('Any additional columns will then follow, in the order specified in the original GTrack file.')
        core.paragraph('The following header lines are also changed to standardized settings:')
        core.unorderedList(['Track type: linked valued segments', \
                            'Uninterrupted data lines: true ' + \
                                str(HtmlCore().emphasize('(any bounding specification lines are thus removed)')), \
                            '0-indexed: false', \
                            'end inclusive: false'])
        
        core.divider()
        
        core.smallHeader('Genome')
        core.paragraph('Some GTrack files require a specified genome to be valid, e.g. if bounding regions '
                       'are specified without explicit end coordinates.')
        core.divider()
        
        core.smallHeader('Notice')
        core.paragraph('The "value type", "value dimension", "edge weights", "edge weight type" and "edge weight dimension" '
                       'header lines are not standardized. The "value" and "edges" columns may therefore contain all types '
                       'of values supported by the GTrack format. It is, however, simple to assert particular configurations '
                       'of these header lines in specialized parsers.')
        
        core.divider()
        
        core.smallHeader('Example')
        core.paragraph('Input file')
        core.styleInfoBegin(styleClass='debug')
        core.append(
'''##gtrack version: 1.0
##track type: linked genome partition
##edge weights: true
##edge weight type: binary
##1-indexed: true
##end inclusive: true
###end  id      edges
####seqid=chrM; end=500
200     aaa     aab=0;aac=1
500     aab     aaa=0
####seqid=chr21; end=300
200     aac     .
300     aad     aaa=1
####seqid=chr21; start=302; end=400
400     aae     aad=0''')
        core.styleInfoEnd()
        
        core.paragraph('Output file')
        core.styleInfoBegin(styleClass='debug')
        core.append(
'''##gtrack version: 1.0
##track type: linked valued segments
##edge weights: true
##edge weight type: binary
##uninterrupted data lines: true
##no overlapping elements: true
###seqid        start   end     value   strand  id      edges
chrM    0       200     .       .       aaa     aab=0;aac=1
chrM    200     500     .       .       aab     aaa=0
chr21   0       200     .       .       aac     .
chr21   200     300     .       .       aad     aaa=1
chr21   301     400     .       .       aae     aad=0''')
        core.styleInfoEnd()
        
        return str(core)
    def getToolDescription():
        core = HtmlCore()
        core.paragraph(
            'This tool converts files between the following file formats:')
        core.descriptionLine('GTrack',
                             "See the 'Show GTrack specification' tool",
                             emphasize=True)
        core.descriptionLine('BED', str(HtmlCore().link('BED specification', \
                                        'http://genome.ucsc.edu/FAQ/FAQformat.html')), emphasize=True)
        core.descriptionLine('WIG', str(HtmlCore().link('WIG specification', \
                                        'http://genome.ucsc.edu/goldenPath/help/wiggle.html')), emphasize=True)
        core.descriptionLine('bedGraph', str(HtmlCore().link('bedGraph specification', \
                                            'http://genome.ucsc.edu/goldenPath/help/bedgraph.html')), emphasize=True)
        core.descriptionLine('GFF', str(HtmlCore().link('GFF version 3 specification', \
                                            'http://www.sequenceontology.org/gff3.shtml')), emphasize=True)
        core.descriptionLine('FASTA', str(HtmlCore().link('bedGraph specification', \
                                            'http://www.ncbi.nlm.nih.gov/BLAST/blastcgihelp.shtml')), emphasize=True)
        core.paragraph(
            'The input data type is defined by the format field of the history element of the data. '
            'The available conversions for the selected format are automatically '
            'shown in the conversion selection box')

        core.divider()

        core.smallHeader('Genome')
        core.paragraph(
            'Some GTrack files require a specified genome to be valid, e.g. if bounding regions '
            'are specified without explicit end coordinates.')

        core.divider()

        core.smallHeader('GTrack subtypes')
        core.paragraph(
            'If the conversion to extended GTrack is selected, GTrack subtype information may be '
            'added to the output file. The following GTrack subtypes are automatically detected from '
            'the contents of the input file:')

        from gold.origdata.GtrackComposer import StdGtrackComposer
        core.unorderedList(
            str(HtmlCore().link(x, x))
            for x in StdGtrackComposer.GTRACK_PRIORITIZED_SUBTYPE_LIST)
        core.divider()

        core.smallHeader('Notice')
        core.paragraph(
            "The GFF support is somewhat preliminary. For conversions between BED and GFF, we "
            "recommend the specialized Galaxy tools: 'BED-to-GFF converter' and 'GFF-to-BED converter'."
        )

        core.divider()

        core.smallHeader('Example 1')
        core.paragraph('Input file (BED)')
        core.styleInfoBegin(styleClass='debug')
        core.append(
            '''chrM    71      82      A       1000    +       71      79      0,0,255 2       4,4,    0,8
chrM    103     105     B       800     .       103     105     0,255,0 1       2       0
chr21   3       13      C       645     -       3       13      255,0,0 3       2,2,2   0,5,8'''
        )
        core.styleInfoEnd()

        core.paragraph('Output file (Extended GTrack)')
        core.styleInfoBegin(styleClass='debug')
        core.append('''##gtrack version: 1.0
##track type: valued segments
##uninterrupted data lines: true
###seqid	start	end	value	strand	name	thickstart	thickend	itemrgb	blockcount	blocksizes	blockstarts
chrM	71	82	1000	+	A	71	79	0,0,255	2	4,4,	0,8
chrM	103	105	800	.	B	103	105	0,255,0	1	2	0
chr21	3	13	645	-	C	3	13	255,0,0	3	2,2,2	0,5,8''')
        core.styleInfoEnd()

        core.divider()

        core.smallHeader('Example 2')
        core.paragraph('Input file (WIG)')
        core.styleInfoBegin(styleClass='debug')
        core.append('''track type=wiggle_0
fixedStep chrom=chrM start=11 step=10 span=5
4.500
-3.700
fixedStep chrom=chrM start=1013 step=10 span=5
2.100
11.00
fixedStep chrom=chr21 start=201 step=10 span=5
21.10''')
        core.styleInfoEnd()

        core.paragraph('Output file (bedGraph)')
        core.styleInfoBegin(styleClass='debug')
        core.append('''track type=bedGraph
chrM	10	15	4.500
chrM	20	25	-3.700
chrM	1012	1017	2.100
chrM	1022	1027	11.00
chr21	200	205	21.10''')
        core.styleInfoEnd()

        core.paragraph('Output file (GTrack)')
        core.styleInfoBegin(styleClass='debug')
        core.append('''##gtrack version: 1.0
##track type: valued segments
##1-indexed: true
##end inclusive: true
##fixed length: 5
##fixed gap size: 5
##gtrack subtype: wig fixedstep
##subtype url: http://gtrack.no/wig_fixedstep.gtrack
##subtype adherence: strict
###value
####seqid=chrM; start=11; end=25
4.5
-3.7
####seqid=chrM; start=1013; end=1027
2.1
11.0
####seqid=chr21; start=201; end=205
21.1''')
        core.styleInfoEnd()
        return str(core)
Example #20
0
    def getToolDescription():
        core = HtmlCore()
        core.paragraph('This tool is used to create GTrack files from any tabular input file. The '
                       'user must select the column names for the table, enabling the GTrack '
                       'header expander to automatically expand the headers, effectively converting '
                       'the file to a GTrack file. Custom column names are also supported.')
        core.divider()

        core.smallHeader('The following column names are part of the GTrack specification')
        core.descriptionLine('seqid', "An identifier of the underlying sequence of "
                                      "the track element (i.e. the row). Example: 'chr1'", emphasize=True)
        core.descriptionLine('start', 'The start position of the track element', emphasize=True)
        core.descriptionLine('end', 'The end position of the track element', emphasize=True)
        core.descriptionLine('value', 'A value associated to the track element. '
                                      'The value type is automatically found by the tool.', emphasize=True)
        core.descriptionLine('strand', "The strand of the track element, either '+', '-' or '.'", emphasize=True)
        core.descriptionLine('id', "An unique identifier of the track element, e.g. 'a'", emphasize=True)
        core.descriptionLine('edges', "A semicolon-separated list of id's, representing "
                                      "edges from this track element to other elements. "
                                      "Weights may also be specified. Example: 'a=1.0;b=0.9'", emphasize=True)
        core.paragraph("See the 'Show GTrack specification' tool for more information.")
        core.divider()

        core.smallHeader('Column selection method')
        core.paragraph('The tool supports two ways of selecting column names. First, you can select '
                       'the column names manually. The other option is to select a GTrack file in the '
                       'the history. The tool will then use the same column names (only using the first '
                       'columns if the number of columns in the current tabular file is less than in the '
                       'GTrack file.')
        core.divider()

        core.smallHeader('Genome')
        core.paragraph("Some GTrack files require a specified genome to be valid, e.g. if bounding regions "
                       "are specified without explicit end coordinates. A genome build must thus be selected if "
                       "such a GTrack file is to be used as template file for column specification. "
                       "Also, auto-correction of the sequence id ('seqid') column requires the selection of a "
                       "genome build. The resulting GTrack file in the history will be associated with the "
                       "selected genome.")
        core.divider()

        core.smallHeader('Track type')
        core.paragraph('According to the columns selected, the tool automatically finds the '
                       'corresponding track type according to the GTrack specification. '
                       'Note that dense track types are noe supported yet byt this tool.')
        core.divider()

        core.smallHeader('Indexing standard')
        core.paragraph('Two common standards of coordinate indexing are common in bioinformatics. A track '
                       'element covering the first 10 base pairs of chr1 are represented in two ways:' )
        core.descriptionLine('0-indexed, end exclusive', "seqid=chr1, start=0, end=10", emphasize=True)
        core.descriptionLine('1-indexed, end inclusive', "seqid=chr1, start=1, end=10", emphasize=True)
        core.paragraph('The GTrack format supports both standards, but the user must inform the system '
                       'which standard is used for each particular case.')
        core.divider()

        core.smallHeader('Auto-correction of sequence id')
        core.paragraph("The tool supports auto-correction of the sequence id ('seqid') column. "
                       "If this is selected, a search is carried out on the sequence id's defined "
                       "for the current genome build. The nearest match, if unique, is inserted in "
                       "the new GTrack file. If no unique match is found, the original value is "
                       "used. The algorithm also handles roman numbers. Example: 'IV' -> 'chr4'")
        core.divider()

        core.smallHeader('Example')
        core.paragraph('Input table')
        core.tableHeader(['start','','id','something','seqid'])
        core.tableLine(['100','.','a','yes','chr1'])
        core.tableLine(['250','.','b','yes','chr1'])
        core.tableLine(['120','.','c','no','chr2'])
        core.tableFooter()

        core.paragraph('Output file')
        core.styleInfoBegin(styleClass='debug')
        core.append(
'''##gtrack version: 1.0
##track type: points
##uninterrupted data lines: true
##sorted elements: true
##no overlapping elements: true
###seqid  start  id	 something
chr1	  100	 a	 yes
chr1	  250	 b	 yes
chr2	  120	 c	 no''')
        core.styleInfoEnd()

        return str(core)