def default( self, obj ):
     """ Encode an HDA, default encoding for everything else. """
     if isinstance( obj, trans.app.model.HistoryDatasetAssociation ):
         return {
             "__HistoryDatasetAssociation__" : True,
             "create_time" : obj.create_time.__str__(),
             "update_time" : obj.update_time.__str__(),
             "hid" : obj.hid,
             "name" : to_unicode( obj.name ),
             "info" : to_unicode( obj.info ),
             "blurb" : obj.blurb,
             "peek" : obj.peek,
             "extension" : obj.extension,
             "metadata" : prepare_metadata( dict( obj.metadata.items() ) ),
             "parent_id" : obj.parent_id,
             "designation" : obj.designation,
             "deleted" : obj.deleted,
             "visible" : obj.visible,
             "file_name" : obj.file_name,
             "annotation" : to_unicode( getattr( obj, 'annotation', '' ) ),
             "tags" : get_item_tag_dict( obj ),
         }
     if isinstance( obj, UnvalidatedValue ):
         return obj.__str__()
     return simplejson.JSONEncoder.default( self, obj )
Beispiel #2
0
 def default( self, obj ):
     """ Encode an HDA, default encoding for everything else. """
     if isinstance( obj, trans.app.model.HistoryDatasetAssociation ):
         rval = {
             "__HistoryDatasetAssociation__": True,
             "create_time": obj.create_time.__str__(),
             "update_time": obj.update_time.__str__(),
             "hid": obj.hid,
             "name": to_unicode( obj.name ),
             "info": to_unicode( obj.info ),
             "blurb": obj.blurb,
             "peek": obj.peek,
             "extension": obj.extension,
             "metadata": prepare_metadata( dict( obj.metadata.items() ) ),
             "parent_id": obj.parent_id,
             "designation": obj.designation,
             "deleted": obj.deleted,
             "visible": obj.visible,
             "file_name": obj.file_name,
             "uuid":  ( lambda uuid: str( uuid ) if uuid else None )( obj.dataset.uuid ),
             "annotation": to_unicode( getattr( obj, 'annotation', '' ) ),
             "tags": get_item_tag_dict( obj ),
         }
         if not obj.visible and not include_hidden:
             rval['exported'] = False
         elif obj.deleted and not include_deleted:
             rval['exported'] = False
         else:
             rval['exported'] = True
         return rval
     if isinstance( obj, UnvalidatedValue ):
         return obj.__str__()
     return json.JSONEncoder.default( self, obj )
 def default(self, obj):
     """ Encode an HDA, default encoding for everything else. """
     if isinstance(obj, trans.app.model.HistoryDatasetAssociation):
         return {
             "__HistoryDatasetAssociation__": True,
             "create_time": obj.create_time.__str__(),
             "update_time": obj.update_time.__str__(),
             "hid": obj.hid,
             "name": to_unicode(obj.name),
             "info": to_unicode(obj.info),
             "blurb": obj.blurb,
             "peek": obj.peek,
             "extension": obj.extension,
             "metadata":
             prepare_metadata(dict(obj.metadata.items())),
             "parent_id": obj.parent_id,
             "designation": obj.designation,
             "deleted": obj.deleted,
             "visible": obj.visible,
             "file_name": obj.file_name,
             "annotation":
             to_unicode(getattr(obj, 'annotation', '')),
             "tags": get_item_tag_dict(obj),
         }
     if isinstance(obj, UnvalidatedValue):
         return obj.__str__()
     return simplejson.JSONEncoder.default(self, obj)
Beispiel #4
0
 def default(self, obj):
     """ Encode an HDA, default encoding for everything else. """
     if isinstance(obj, trans.app.model.HistoryDatasetAssociation):
         rval = {
             "__HistoryDatasetAssociation__": True,
             "create_time": obj.create_time.__str__(),
             "update_time": obj.update_time.__str__(),
             "hid": obj.hid,
             "name": to_unicode(obj.name),
             "info": to_unicode(obj.info),
             "blurb": obj.blurb,
             "peek": obj.peek,
             "extension": obj.extension,
             "metadata": prepare_metadata(dict(obj.metadata.items())),
             "parent_id": obj.parent_id,
             "designation": obj.designation,
             "deleted": obj.deleted,
             "visible": obj.visible,
             "file_name": obj.file_name,
             "uuid": (lambda uuid: str(uuid) if uuid else None)(obj.dataset.uuid),
             "annotation": to_unicode(getattr(obj, 'annotation', '')),
             "tags": get_item_tag_dict(obj),
             "extra_files_path": obj.extra_files_path
         }
         if not obj.visible and not include_hidden:
             rval['exported'] = False
         elif obj.deleted and not include_deleted:
             rval['exported'] = False
         else:
             rval['exported'] = True
         return rval
     return json.JSONEncoder.default(self, obj)
Beispiel #5
0
 def build_index( self, index_help=True ):
     log.debug( 'Starting to build toolbox index.' )
     self.storage = RamStorage()
     self.index = self.storage.create_index( schema )
     writer = self.index.writer()
     for id, tool in self.toolbox.tools():
         #  Do not add data managers to the public index
         if tool.tool_type == 'manage_data':
             continue
         add_doc_kwds = {
             "id": id,
             "name": to_unicode( tool.name ),
             "description": to_unicode( tool.description ),
             "section": to_unicode( tool.get_panel_section()[1] if len( tool.get_panel_section() ) == 2 else '' ),
             "help": to_unicode( "" )
         }
         if index_help and tool.help:
             try:
                 add_doc_kwds['help'] = to_unicode( tool.help.render( host_url="", static_path="" ) )
             except Exception:
                 # Don't fail to build index just because a help message
                 # won't render.
                 pass
         writer.add_document( **add_doc_kwds )
     writer.commit()
     log.debug( 'Toolbox index finished.' )
Beispiel #6
0
 def build_index(self, index_help):
     self.storage = RamStorage()
     self.index = self.storage.create_index(schema)
     writer = self.index.writer()
     for id, tool in self.toolbox.tools():
         add_doc_kwds = {
             "id":
             id,
             "title":
             to_unicode(tool.name),
             "description":
             to_unicode(tool.description),
             "section":
             to_unicode(tool.get_panel_section(
             )[1] if len(tool.get_panel_section()) == 2 else ''),
             "help":
             to_unicode(""),
         }
         if index_help and tool.help:
             try:
                 add_doc_kwds['help'] = to_unicode(
                     tool.help.render(host_url="", static_path=""))
             except Exception:
                 # Don't fail to build index just because a help message
                 # won't render.
                 pass
         writer.add_document(**add_doc_kwds)
     writer.commit()
Beispiel #7
0
 def build_index( self ):
     self.storage = RamStorage()
     self.index = self.storage.create_index( schema )
     writer = self.index.writer()
     ## TODO: would also be nice to search section headers.
     for id, tool in self.toolbox.tools_by_id.iteritems():
         writer.add_document( id=id, title=to_unicode(tool.name), description=to_unicode(tool.description), help=to_unicode(tool.help) )
     writer.commit()
Beispiel #8
0
 def build_index( self ):
     self.storage = RamStorage()
     self.index = self.storage.create_index( schema )
     writer = self.index.writer()
     ## TODO: would also be nice to search section headers.
     for id, tool in self.toolbox.tools_by_id.iteritems():
         writer.add_document( id=id, title=to_unicode(tool.name), description=to_unicode(tool.description), help=to_unicode(tool.help) )
     writer.commit()
Beispiel #9
0
 def get_item_tag_dict(item):
     """ Create dictionary of an item's tags. """
     tags = {}
     for tag in item.tags:
         tag_user_tname = to_unicode(tag.user_tname)
         tag_user_value = to_unicode(tag.user_value)
         tags[tag_user_tname] = tag_user_value
     return tags
Beispiel #10
0
 def get_item_tag_dict(item):
     """ Create dictionary of an item's tags. """
     tags = {}
     for tag in item.tags:
         tag_user_tname = to_unicode(tag.user_tname)
         tag_user_value = to_unicode(tag.user_value)
         tags[tag_user_tname] = tag_user_value
     return tags
 def _create_doc(self, tool_id, tool, index_help=True):
     #  Do not add data managers to the public index
     if tool.tool_type == 'manage_data':
         return {}
     add_doc_kwds = {
         "id": tool_id,
         "description": to_unicode(tool.description),
         "section": to_unicode(tool.get_panel_section()[1] if len(tool.get_panel_section()) == 2 else ''),
         "help": to_unicode("")
     }
     if tool.name.find('-') != -1:
         # Hyphens are wildcards in Whoosh causing bad things
         add_doc_kwds['name'] = (' ').join([token.text for token in self.rex(to_unicode(tool.name))])
     else:
         add_doc_kwds['name'] = to_unicode(tool.name)
     if tool.guid:
         # Create a stub consisting of owner, repo, and tool from guid
         slash_indexes = [m.start() for m in re.finditer('/', tool.guid)]
         id_stub = tool.guid[(slash_indexes[1] + 1): slash_indexes[4]]
         add_doc_kwds['stub'] = (' ').join([token.text for token in self.rex(to_unicode(id_stub))])
     else:
         add_doc_kwds['stub'] = to_unicode(id)
     if tool.labels:
         add_doc_kwds['labels'] = to_unicode(" ".join(tool.labels))
     if index_help and tool.help:
         try:
             add_doc_kwds['help'] = to_unicode(tool.help.render(host_url="", static_path=""))
         except Exception:
             # Don't fail to build index just because a help message
             # won't render.
             pass
     return add_doc_kwds
Beispiel #12
0
 def _create_doc(self, tool_id, tool, index_help=True):
     #  Do not add data managers to the public index
     if tool.tool_type == 'manage_data':
         return {}
     add_doc_kwds = {
         "id": tool_id,
         "description": to_unicode(tool.description),
         "section": to_unicode(tool.get_panel_section()[1] if len(tool.get_panel_section()) == 2 else ''),
         "help": to_unicode("")
     }
     if tool.name.find('-') != -1:
         # Hyphens are wildcards in Whoosh causing bad things
         add_doc_kwds['name'] = (' ').join([token.text for token in self.rex(to_unicode(tool.name))])
     else:
         add_doc_kwds['name'] = to_unicode(tool.name)
     if tool.guid:
         # Create a stub consisting of owner, repo, and tool from guid
         slash_indexes = [m.start() for m in re.finditer('/', tool.guid)]
         id_stub = tool.guid[(slash_indexes[1] + 1): slash_indexes[4]]
         add_doc_kwds['stub'] = (' ').join([token.text for token in self.rex(to_unicode(id_stub))])
     else:
         add_doc_kwds['stub'] = to_unicode(id)
     if tool.labels:
         add_doc_kwds['labels'] = to_unicode(" ".join(tool.labels))
     if index_help and tool.help:
         try:
             raw_html = tool.help.render(host_url="", static_path="")
             cleantext = clean(raw_html, tags=[''], strip=True).replace('\n', ' ')
             add_doc_kwds['help'] = to_unicode(cleantext)
         except Exception:
             # Don't fail to build index just because a help message
             # won't render.
             pass
     return add_doc_kwds
Beispiel #13
0
 def _create_doc(self, tool_id: str, tool, index_help: bool = True) -> Dict[str, str]:
     #  Do not add data managers to the public index
     if tool.tool_type == 'manage_data':
         return {}
     add_doc_kwds = {
         "id": tool_id,
         "description": to_unicode(tool.description),
         "section": to_unicode(tool.get_panel_section()[1] if len(tool.get_panel_section()) == 2 else ''),
         "help": to_unicode("")
     }
     if tool.name.find('-') != -1:
         # Replace hyphens, since they are wildcards in Whoosh causing false positives
         add_doc_kwds['name'] = (' ').join(token.text for token in self.rex(to_unicode(tool.name)))
     else:
         add_doc_kwds['name'] = to_unicode(tool.name)
     if tool.guid:
         # Create a stub consisting of owner, repo, and tool from guid
         slash_indexes = [m.start() for m in re.finditer('/', tool.guid)]
         id_stub = tool.guid[(slash_indexes[1] + 1): slash_indexes[4]]
         add_doc_kwds['stub'] = (' ').join(token.text for token in self.rex(to_unicode(id_stub)))
     else:
         add_doc_kwds['stub'] = to_unicode(id)
     if tool.labels:
         add_doc_kwds['labels'] = to_unicode(" ".join(tool.labels))
     if index_help:
         raw_help = tool.raw_help
         if raw_help:
             try:
                 add_doc_kwds['help'] = to_unicode(raw_help)
             except Exception:
                 # Don't fail to build index just because help can't be converted.
                 pass
     return add_doc_kwds
Beispiel #14
0
 def build_index(self, index_help=True):
     """Prepare search index for tools loaded in toolbox."""
     RamStorage.temp_storage = _temp_storage
     # Works around https://bitbucket.org/mchaput/whoosh/issues/391/race-conditions-with-temp-storage
     self.storage = RamStorage()
     self.index = self.storage.create_index(self.schema)
     writer = self.index.writer()
     start_time = datetime.now()
     log.debug('Starting to build toolbox index.')
     for id, tool in self.toolbox.tools():
         #  Do not add data managers to the public index
         if tool.tool_type == 'manage_data':
             continue
         add_doc_kwds = {
             "id":
             id,
             "description":
             to_unicode(tool.description),
             "section":
             to_unicode(tool.get_panel_section(
             )[1] if len(tool.get_panel_section()) == 2 else ''),
             "help":
             to_unicode("")
         }
         if tool.name.find('-') != -1:
             # Hyphens are wildcards in Whoosh causing bad things
             add_doc_kwds['name'] = (' ').join(
                 [token.text for token in self.rex(to_unicode(tool.name))])
         else:
             add_doc_kwds['name'] = to_unicode(tool.name)
         if tool.guid:
             # Create a stub consisting of owner, repo, and tool from guid
             slash_indexes = [
                 m.start() for m in re.finditer('/', tool.guid)
             ]
             id_stub = tool.guid[(slash_indexes[1] + 1):slash_indexes[4]]
             add_doc_kwds['stub'] = (' ').join(
                 [token.text for token in self.rex(to_unicode(id_stub))])
         else:
             add_doc_kwds['stub'] = to_unicode(id)
         if tool.labels:
             add_doc_kwds['labels'] = to_unicode(" ".join(tool.labels))
         if index_help and tool.help:
             try:
                 add_doc_kwds['help'] = to_unicode(
                     tool.help.render(host_url="", static_path=""))
             except Exception:
                 # Don't fail to build index just because a help message
                 # won't render.
                 pass
         writer.add_document(**add_doc_kwds)
     writer.commit()
     stop_time = datetime.now()
     log.debug('Toolbox index finished. It took: ' +
               str(stop_time - start_time))
Beispiel #15
0
 def search(self, q, tool_name_boost, tool_section_boost,
            tool_description_boost, tool_label_boost, tool_stub_boost,
            tool_help_boost, tool_search_limit):
     """
     Perform search on the in-memory index. Weight in the given boosts.
     """
     # Change field boosts for searcher
     searcher = self.index.searcher(weighting=BM25F(
         field_B={
             'name_B': float(tool_name_boost),
             'section_B': float(tool_section_boost),
             'description_B': float(tool_description_boost),
             'labels_B': float(tool_label_boost),
             'stub_B': float(tool_stub_boost),
             'help_B': float(tool_help_boost)
         }))
     # Set query to search name, description, section, help, and labels.
     parser = MultifieldParser(
         ['name', 'description', 'section', 'help', 'labels', 'stub'],
         schema=self.schema)
     # Hyphens are wildcards in Whoosh causing bad things
     if q.find('-') != -1:
         q = (' ').join([token.text for token in self.rex(to_unicode(q))])
     # Perform the search
     hits = searcher.search(parser.parse('*' + q + '*'),
                            limit=float(tool_search_limit))
     return [hit['id'] for hit in hits]
Beispiel #16
0
 def build_index( self, index_help ):
     self.storage = RamStorage()
     self.index = self.storage.create_index( schema )
     writer = self.index.writer()
     for id, tool in self.toolbox.tools():
         add_doc_kwds = {
             "id": id,
             "title": to_unicode( tool.name ),
             "description": to_unicode( tool.description ),
             "section": to_unicode( tool.get_panel_section()[1] if len( tool.get_panel_section() ) == 2 else '' ),
             "help": to_unicode( "" ),
         }
         if index_help and tool.help:
             try:
                 add_doc_kwds['help'] = to_unicode(tool.help.render( host_url="", static_path="" ))
             except Exception:
                 # Don't fail to build index just because a help message
                 # won't render.
                 pass
         writer.add_document( **add_doc_kwds )
     writer.commit()
 def search( self, q, tool_name_boost, tool_section_boost, tool_description_boost, tool_label_boost, tool_stub_boost, tool_help_boost, tool_search_limit, tool_enable_ngram_search, tool_ngram_minsize, tool_ngram_maxsize ):
     """
     Perform search on the in-memory index. Weight in the given boosts.
     """
     # Change field boosts for searcher
     searcher = self.index.searcher(
         weighting=BM25F(
             field_B={ 'name_B': float( tool_name_boost ),
                       'section_B': float( tool_section_boost ),
                       'description_B': float( tool_description_boost ),
                       'labels_B': float( tool_label_boost ),
                       'stub_B': float( tool_stub_boost ),
                       'help_B': float( tool_help_boost ) }
         )
     )
     # Set query to search name, description, section, help, and labels.
     parser = MultifieldParser( [ 'name', 'description', 'section', 'help', 'labels', 'stub' ], schema=self.schema )
     # Hyphens are wildcards in Whoosh causing bad things
     if q.find( '-' ) != -1:
         q = (' ').join( [ token.text for token in self.rex( to_unicode( q ) ) ] )
     # Perform tool search with ngrams if set to true in the config file
     if ( tool_enable_ngram_search is True or tool_enable_ngram_search == "True" ):
         hits_with_score = {}
         token_analyzer = StandardAnalyzer() | analysis.NgramFilter( minsize=int( tool_ngram_minsize ), maxsize=int( tool_ngram_maxsize ) )
         ngrams = [ token.text for token in token_analyzer( q ) ]
         for query in ngrams:
             # Get the tool list with respective scores for each qgram
             curr_hits = searcher.search( parser.parse( '*' + query + '*' ), limit=float( tool_search_limit ) )
             for i, curr_hit in enumerate( curr_hits ):
                 is_present = False
                 for prev_hit in hits_with_score:
                     # Check if the tool appears again for the next qgram search
                     if curr_hit[ 'id' ] == prev_hit:
                         is_present = True
                         # Add the current score with the previous one if the
                         # tool appears again for the next qgram
                         hits_with_score[ prev_hit ] = curr_hits.score(i) + hits_with_score[ prev_hit ]
                 # Add the tool if not present to the collection with its score
                 if not is_present:
                     hits_with_score[ curr_hit[ 'id' ] ] = curr_hits.score(i)
         # Sort the results based on aggregated BM25 score in decreasing order of scores
         hits_with_score = sorted( hits_with_score.items(), key=lambda x: x[1], reverse=True )
         # Return the tool ids
         return [ item[0] for item in hits_with_score[ 0:int( tool_search_limit ) ] ]
     else:
         # Perform the search
         hits = searcher.search( parser.parse( '*' + q + '*' ), limit=float( tool_search_limit ) )
         return [ hit[ 'id' ] for hit in hits ]
Beispiel #18
0
 def search(self, q, tool_name_boost, tool_section_boost, tool_description_boost, tool_label_boost, tool_stub_boost, tool_help_boost, tool_search_limit, tool_enable_ngram_search, tool_ngram_minsize, tool_ngram_maxsize):
     """
     Perform search on the in-memory index. Weight in the given boosts.
     """
     # Change field boosts for searcher
     searcher = self.index.searcher(
         weighting=BM25F(
             field_B={'name_B': float(tool_name_boost),
                      'section_B': float(tool_section_boost),
                      'description_B': float(tool_description_boost),
                      'labels_B': float(tool_label_boost),
                      'stub_B': float(tool_stub_boost),
                      'help_B': float(tool_help_boost)}
         )
     )
     # Set query to search name, description, section, help, and labels.
     parser = MultifieldParser(['name', 'description', 'section', 'help', 'labels', 'stub'], schema=self.schema)
     # Hyphens are wildcards in Whoosh causing bad things
     if q.find('-') != -1:
         q = (' ').join([token.text for token in self.rex(to_unicode(q))])
     # Perform tool search with ngrams if set to true in the config file
     if (tool_enable_ngram_search is True or tool_enable_ngram_search == "True"):
         hits_with_score = {}
         token_analyzer = StandardAnalyzer() | analysis.NgramFilter(minsize=int(tool_ngram_minsize), maxsize=int(tool_ngram_maxsize))
         ngrams = [token.text for token in token_analyzer(q)]
         for query in ngrams:
             # Get the tool list with respective scores for each qgram
             curr_hits = searcher.search(parser.parse('*' + query + '*'), limit=float(tool_search_limit))
             for i, curr_hit in enumerate(curr_hits):
                 is_present = False
                 for prev_hit in hits_with_score:
                     # Check if the tool appears again for the next qgram search
                     if curr_hit['id'] == prev_hit:
                         is_present = True
                         # Add the current score with the previous one if the
                         # tool appears again for the next qgram
                         hits_with_score[prev_hit] = curr_hits.score(i) + hits_with_score[prev_hit]
                 # Add the tool if not present to the collection with its score
                 if not is_present:
                     hits_with_score[curr_hit['id']] = curr_hits.score(i)
         # Sort the results based on aggregated BM25 score in decreasing order of scores
         hits_with_score = sorted(hits_with_score.items(), key=lambda x: x[1], reverse=True)
         # Return the tool ids
         return [item[0] for item in hits_with_score[0:int(tool_search_limit)]]
     else:
         # Perform the search
         hits = searcher.search(parser.parse('*' + q + '*'), limit=float(tool_search_limit))
         return [hit['id'] for hit in hits]
Beispiel #19
0
 def search(self, q, tool_name_boost, tool_section_boost,
            tool_description_boost, tool_label_boost, tool_stub_boost,
            tool_help_boost, tool_search_limit, tool_enable_ngram_search,
            tool_ngram_minsize, tool_ngram_maxsize):
     """
     Perform search on the in-memory index. Weight in the given boosts.
     """
     # Change field boosts for searcher
     self.searcher = self.index.searcher(weighting=BM25F(
         field_B={
             'name_B': float(tool_name_boost),
             'section_B': float(tool_section_boost),
             'description_B': float(tool_description_boost),
             'labels_B': float(tool_label_boost),
             'stub_B': float(tool_stub_boost),
             'help_B': float(tool_help_boost)
         }))
     # Use OrGroup to change the default operation for joining multiple terms to logical OR.
     # This means e.g. for search 'bowtie of king arthur' a document that only has 'bowtie' will be a match.
     # https://whoosh.readthedocs.io/en/latest/api/qparser.html#whoosh.qparser.MultifieldPlugin
     # However this changes scoring i.e. searching 'bowtie of king arthur' a document with 'arthur arthur arthur'
     # would have a higher score than a document with 'bowtie arthur' which is usually unexpected for a user.
     # Hence we introduce a bonus on multi-hits using the 'factory()' method using a scaling factor between 0-1.
     # https://whoosh.readthedocs.io/en/latest/parsing.html#searching-for-any-terms-instead-of-all-terms-by-default
     og = OrGroup.factory(0.9)
     self.parser = MultifieldParser(
         ['name', 'description', 'section', 'help', 'labels', 'stub'],
         schema=self.schema,
         group=og)
     cleaned_query = q.lower()
     # Replace hyphens, since they are wildcards in Whoosh causing false positives
     if cleaned_query.find('-') != -1:
         cleaned_query = (' ').join(
             token.text for token in self.rex(to_unicode(cleaned_query)))
     if tool_enable_ngram_search is True:
         rval = self._search_ngrams(cleaned_query, tool_ngram_minsize,
                                    tool_ngram_maxsize, tool_search_limit)
         return rval
     else:
         # Use asterisk Whoosh wildcard so e.g. 'bow' easily matches 'bowtie'
         parsed_query = self.parser.parse(cleaned_query + '*')
         hits = self.searcher.search(parsed_query,
                                     limit=float(tool_search_limit),
                                     sortedby='')
         return [hit['id'] for hit in hits]
def get_hda_dict(trans, history, hda, for_editing):
    hda_dict = hda.get_api_value(view="element")

    hda_dict["id"] = trans.security.encode_id(hda.id)
    hda_dict["history_id"] = trans.security.encode_id(history.id)
    hda_dict["hid"] = hda.hid

    hda_dict["file_ext"] = hda.ext
    if trans.user_is_admin() or trans.app.config.expose_dataset_path:
        hda_dict["file_name"] = hda.file_name

    if not hda_dict["deleted"]:
        # Problem: Method url_for cannot use the dataset controller
        # Get the environment from DefaultWebTransaction
        #   and use default webapp mapper instead of webapp API mapper
        web_url_for = routes.URLGenerator(trans.webapp.mapper, trans.environ)
        # http://routes.groovie.org/generating.html
        # url_for is being phased out, so new applications should use url
        hda_dict["download_url"] = web_url_for(
            controller="dataset", action="display", dataset_id=trans.security.encode_id(hda.id), to_ext=hda.ext
        )

    can_access_hda = trans.app.security_agent.can_access_dataset(trans.get_current_user_roles(), hda.dataset)
    hda_dict["accessible"] = trans.user_is_admin() or can_access_hda
    hda_dict["api_type"] = "file"

    if not (hda.purged or hda.deleted or hda.dataset.purged):
        meta_files = []
        for meta_type in hda.metadata.spec.keys():
            if isinstance(hda.metadata.spec[meta_type].param, FileParameter):
                meta_files.append(dict(file_type=meta_type))
        if meta_files:
            hda_dict["meta_files"] = meta_files

    hda_dict["display_apps"] = get_display_apps(trans, hda)
    # hda_dict[ 'display_types' ] = get_display_types( trans, hda )
    hda_dict["visualizations"] = hda.get_visualizations()
    hda_dict["peek"] = to_unicode(hda.display_peek())

    if hda.creating_job and hda.creating_job.tool_id:
        tool_used = trans.app.toolbox.get_tool(hda.creating_job.tool_id)
        if tool_used and tool_used.force_history_refresh:
            hda_dict["force_history_refresh"] = True

    return hda_dict
Beispiel #21
0
 def build_index( self, index_help=True ):
     # Works around https://bitbucket.org/mchaput/whoosh/issues/391/race-conditions-with-temp-storage
     RamStorage.temp_storage = _temp_storage
     self.storage = RamStorage()
     self.index = self.storage.create_index( self.schema )
     writer = self.index.writer()
     start_time = datetime.now()
     log.debug( 'Starting to build toolbox index.' )
     for id, tool in self.toolbox.tools():
         #  Do not add data managers to the public index
         if tool.tool_type == 'manage_data':
             continue
         add_doc_kwds = {
             "id": id,
             "description": to_unicode( tool.description ),
             "section": to_unicode( tool.get_panel_section()[1] if len( tool.get_panel_section() ) == 2 else '' ),
             "help": to_unicode( "" )
         }
         # Hyphens are wildcards in Whoosh causing bad things
         if tool.name.find( '-' ) != -1:
             add_doc_kwds['name'] = (' ').join( [ token.text for token in self.rex( to_unicode( tool.name ) ) ] )
         else:
             add_doc_kwds['name'] = to_unicode( tool.name )
         # We do not want to search Tool Shed or version parts
         # of the long ids
         if id.find( '/' ) != -1:
             slash_indexes = [ m.start() for m in re.finditer( '/', id ) ]
             id_stub = id[ ( slash_indexes[1] + 1 ): slash_indexes[4] ]
             add_doc_kwds['stub'] = (' ').join( [ token.text for token in self.rex( to_unicode( id_stub ) ) ] )
         else:
             add_doc_kwds['stub'] = to_unicode( id )
         if tool.labels:
             add_doc_kwds['labels'] = to_unicode( " ".join( tool.labels ) )
         if index_help and tool.help:
             try:
                 add_doc_kwds['help'] = to_unicode( tool.help.render( host_url="", static_path="" ) )
             except Exception:
                 # Don't fail to build index just because a help message
                 # won't render.
                 pass
         writer.add_document( **add_doc_kwds )
     writer.commit()
     stop_time = datetime.now()
     log.debug( 'Toolbox index finished. It took: ' + str(stop_time - start_time) )
Beispiel #22
0
 def search( self, q, tool_name_boost, tool_section_boost, tool_description_boost, tool_label_boost, tool_stub_boost, tool_help_boost, tool_search_limit ):
     """
     Perform search on the in-memory index. Weight in the given boosts.
     """
     # Change field boosts for searcher
     searcher = self.index.searcher(
         weighting=BM25F(
             field_B={ 'name_B': float( tool_name_boost ),
                       'section_B': float( tool_section_boost ),
                       'description_B': float( tool_description_boost ),
                       'labels_B': float( tool_label_boost ),
                       'stub_B': float( tool_stub_boost ),
                       'help_B': float( tool_help_boost ) }
         )
     )
     # Set query to search name, description, section, help, and labels.
     parser = MultifieldParser( [ 'name', 'description', 'section', 'help', 'labels', 'stub' ], schema=self.schema )
     # Hyphens are wildcards in Whoosh causing bad things
     if q.find( '-' ) != -1:
         q = (' ').join( [ token.text for token in self.rex( to_unicode( q ) ) ] )
     # Perform the search
     hits = searcher.search( parser.parse( '*' + q + '*' ), limit=float( tool_search_limit ) )
     return [ hit[ 'id' ] for hit in hits ]
Beispiel #23
0
    def setup_job(self, trans, jeha, include_hidden=False, include_deleted=False):
        """ Perform setup for job to export a history into an archive. Method generates
            attribute files for export, sets the corresponding attributes in the jeha
            object, and returns a command line for running the job. The command line
            includes the command, inputs, and options; it does not include the output
            file because it must be set at runtime. """

        #
        # Helper methods/classes.
        #

        def get_item_tag_dict(item):
            """ Create dictionary of an item's tags. """
            tags = {}
            for tag in item.tags:
                tag_user_tname = to_unicode(tag.user_tname)
                tag_user_value = to_unicode(tag.user_value)
                tags[tag_user_tname] = tag_user_value
            return tags

        def prepare_metadata(metadata):
            """ Prepare metatdata for exporting. """
            for name, value in list(metadata.items()):
                # Metadata files are not needed for export because they can be
                # regenerated.
                if isinstance(value, trans.app.model.MetadataFile):
                    del metadata[name]
            return metadata

        class HistoryDatasetAssociationEncoder(json.JSONEncoder):
            """ Custom JSONEncoder for a HistoryDatasetAssociation. """

            def default(self, obj):
                """ Encode an HDA, default encoding for everything else. """
                if isinstance(obj, trans.app.model.HistoryDatasetAssociation):
                    rval = {
                        "__HistoryDatasetAssociation__": True,
                        "create_time": obj.create_time.__str__(),
                        "update_time": obj.update_time.__str__(),
                        "hid": obj.hid,
                        "name": to_unicode(obj.name),
                        "info": to_unicode(obj.info),
                        "blurb": obj.blurb,
                        "peek": obj.peek,
                        "extension": obj.extension,
                        "metadata": prepare_metadata(dict(obj.metadata.items())),
                        "parent_id": obj.parent_id,
                        "designation": obj.designation,
                        "deleted": obj.deleted,
                        "visible": obj.visible,
                        "file_name": obj.file_name,
                        "uuid": (lambda uuid: str(uuid) if uuid else None)(obj.dataset.uuid),
                        "annotation": to_unicode(getattr(obj, 'annotation', '')),
                        "tags": get_item_tag_dict(obj),
                        "extra_files_path": obj.extra_files_path
                    }
                    if not obj.visible and not include_hidden:
                        rval['exported'] = False
                    elif obj.deleted and not include_deleted:
                        rval['exported'] = False
                    else:
                        rval['exported'] = True
                    return rval
                return json.JSONEncoder.default(self, obj)

        #
        # Create attributes/metadata files for export.
        #
        temp_output_dir = tempfile.mkdtemp()

        # Write history attributes to file.
        history = jeha.history
        history_attrs = {
            "create_time": history.create_time.__str__(),
            "update_time": history.update_time.__str__(),
            "name": to_unicode(history.name),
            "hid_counter": history.hid_counter,
            "genome_build": history.genome_build,
            "annotation": to_unicode(self.get_item_annotation_str(trans.sa_session, history.user, history)),
            "tags": get_item_tag_dict(history),
            "includes_hidden_datasets": include_hidden,
            "includes_deleted_datasets": include_deleted
        }
        history_attrs_filename = tempfile.NamedTemporaryFile(dir=temp_output_dir).name
        history_attrs_out = open(history_attrs_filename, 'w')
        history_attrs_out.write(dumps(history_attrs))
        history_attrs_out.close()
        jeha.history_attrs_filename = history_attrs_filename

        # Write datasets' attributes to file.
        datasets = self.get_history_datasets(trans, history)
        included_datasets = []
        datasets_attrs = []
        provenance_attrs = []
        for dataset in datasets:
            dataset.annotation = self.get_item_annotation_str(trans.sa_session, history.user, dataset)
            if (not dataset.visible and not include_hidden) or (dataset.deleted and not include_deleted):
                provenance_attrs.append(dataset)
            else:
                datasets_attrs.append(dataset)
                included_datasets.append(dataset)
        datasets_attrs_filename = tempfile.NamedTemporaryFile(dir=temp_output_dir).name
        datasets_attrs_out = open(datasets_attrs_filename, 'w')
        datasets_attrs_out.write(dumps(datasets_attrs, cls=HistoryDatasetAssociationEncoder))
        datasets_attrs_out.close()
        jeha.datasets_attrs_filename = datasets_attrs_filename

        provenance_attrs_out = open(datasets_attrs_filename + ".provenance", 'w')
        provenance_attrs_out.write(dumps(provenance_attrs, cls=HistoryDatasetAssociationEncoder))
        provenance_attrs_out.close()

        #
        # Write jobs attributes file.
        #

        # Get all jobs associated with included HDAs.
        jobs_dict = {}
        for hda in included_datasets:
            # Get the associated job, if any. If this hda was copied from another,
            # we need to find the job that created the origial hda
            job_hda = hda
            while job_hda.copied_from_history_dataset_association:  # should this check library datasets as well?
                job_hda = job_hda.copied_from_history_dataset_association
            if not job_hda.creating_job_associations:
                # No viable HDA found.
                continue

            # Get the job object.
            job = None
            for assoc in job_hda.creating_job_associations:
                job = assoc.job
                break
            if not job:
                # No viable job.
                continue

            jobs_dict[job.id] = job

        # Get jobs' attributes.
        jobs_attrs = []
        for id, job in jobs_dict.items():
            job_attrs = {}
            job_attrs['tool_id'] = job.tool_id
            job_attrs['tool_version'] = job.tool_version
            job_attrs['state'] = job.state
            job_attrs['info'] = job.info
            job_attrs['traceback'] = job.traceback
            job_attrs['command_line'] = job.command_line
            job_attrs['stderr'] = job.stderr
            job_attrs['stdout'] = job.stdout
            job_attrs['exit_code'] = job.exit_code
            job_attrs['create_time'] = job.create_time.isoformat()
            job_attrs['update_time'] = job.update_time.isoformat()

            # Get the job's parameters
            try:
                params_objects = job.get_param_values(trans.app)
            except:
                # Could not get job params.
                continue

            params_dict = {}
            for name, value in params_objects.items():
                params_dict[name] = value
            job_attrs['params'] = params_dict

            # -- Get input, output datasets. --

            input_datasets = []
            input_mapping = {}
            for assoc in job.input_datasets:
                # Optional data inputs will not have a dataset.
                if assoc.dataset:
                    input_datasets.append(assoc.dataset.hid)
                    input_mapping[assoc.name] = assoc.dataset.hid
            job_attrs['input_datasets'] = input_datasets
            job_attrs['input_mapping'] = input_mapping
            output_datasets = [assoc.dataset.hid for assoc in job.output_datasets]
            job_attrs['output_datasets'] = output_datasets

            jobs_attrs.append(job_attrs)

        jobs_attrs_filename = tempfile.NamedTemporaryFile(dir=temp_output_dir).name
        jobs_attrs_out = open(jobs_attrs_filename, 'w')
        jobs_attrs_out.write(dumps(jobs_attrs, cls=HistoryDatasetAssociationEncoder))
        jobs_attrs_out.close()
        jeha.jobs_attrs_filename = jobs_attrs_filename

        #
        # Create and return command line for running tool.
        #
        options = ""
        if jeha.compressed:
            options = "-G"
        return "%s %s %s %s" % (options, history_attrs_filename,
                                datasets_attrs_filename,
                                jobs_attrs_filename)
Beispiel #24
0
    def setup_job(self,
                  trans,
                  jeha,
                  include_hidden=False,
                  include_deleted=False):
        """ Perform setup for job to export a history into an archive. Method generates
            attribute files for export, sets the corresponding attributes in the jeha
            object, and returns a command line for running the job. The command line
            includes the command, inputs, and options; it does not include the output
            file because it must be set at runtime. """

        #
        # Helper methods/classes.
        #

        def get_item_tag_dict(item):
            """ Create dictionary of an item's tags. """
            tags = {}
            for tag in item.tags:
                tag_user_tname = to_unicode(tag.user_tname)
                tag_user_value = to_unicode(tag.user_value)
                tags[tag_user_tname] = tag_user_value
            return tags

        def prepare_metadata(metadata):
            """ Prepare metatdata for exporting. """
            for name, value in list(metadata.items()):
                # Metadata files are not needed for export because they can be
                # regenerated.
                if isinstance(value, trans.app.model.MetadataFile):
                    del metadata[name]
            return metadata

        class HistoryDatasetAssociationEncoder(json.JSONEncoder):
            """ Custom JSONEncoder for a HistoryDatasetAssociation. """
            def default(self, obj):
                """ Encode an HDA, default encoding for everything else. """
                if isinstance(obj, trans.app.model.HistoryDatasetAssociation):
                    rval = {
                        "__HistoryDatasetAssociation__":
                        True,
                        "create_time":
                        obj.create_time.__str__(),
                        "update_time":
                        obj.update_time.__str__(),
                        "hid":
                        obj.hid,
                        "name":
                        to_unicode(obj.name),
                        "info":
                        to_unicode(obj.info),
                        "blurb":
                        obj.blurb,
                        "peek":
                        obj.peek,
                        "extension":
                        obj.extension,
                        "metadata":
                        prepare_metadata(dict(obj.metadata.items())),
                        "parent_id":
                        obj.parent_id,
                        "designation":
                        obj.designation,
                        "deleted":
                        obj.deleted,
                        "visible":
                        obj.visible,
                        "file_name":
                        obj.file_name,
                        "uuid": (lambda uuid: str(uuid)
                                 if uuid else None)(obj.dataset.uuid),
                        "annotation":
                        to_unicode(getattr(obj, 'annotation', '')),
                        "tags":
                        get_item_tag_dict(obj),
                        "extra_files_path":
                        obj.extra_files_path
                    }
                    if not obj.visible and not include_hidden:
                        rval['exported'] = False
                    elif obj.deleted and not include_deleted:
                        rval['exported'] = False
                    else:
                        rval['exported'] = True
                    return rval
                return json.JSONEncoder.default(self, obj)

        #
        # Create attributes/metadata files for export.
        #
        temp_output_dir = tempfile.mkdtemp()

        # Write history attributes to file.
        history = jeha.history
        history_attrs = {
            "create_time":
            history.create_time.__str__(),
            "update_time":
            history.update_time.__str__(),
            "name":
            to_unicode(history.name),
            "hid_counter":
            history.hid_counter,
            "genome_build":
            history.genome_build,
            "annotation":
            to_unicode(
                self.get_item_annotation_str(trans.sa_session, history.user,
                                             history)),
            "tags":
            get_item_tag_dict(history),
            "includes_hidden_datasets":
            include_hidden,
            "includes_deleted_datasets":
            include_deleted
        }
        history_attrs_filename = tempfile.NamedTemporaryFile(
            dir=temp_output_dir).name
        history_attrs_out = open(history_attrs_filename, 'w')
        history_attrs_out.write(dumps(history_attrs))
        history_attrs_out.close()
        jeha.history_attrs_filename = history_attrs_filename

        # Write datasets' attributes to file.
        datasets = self.get_history_datasets(trans, history)
        included_datasets = []
        datasets_attrs = []
        provenance_attrs = []
        for dataset in datasets:
            dataset.annotation = self.get_item_annotation_str(
                trans.sa_session, history.user, dataset)
            if (not dataset.visible
                    and not include_hidden) or (dataset.deleted
                                                and not include_deleted):
                provenance_attrs.append(dataset)
            else:
                datasets_attrs.append(dataset)
                included_datasets.append(dataset)
        datasets_attrs_filename = tempfile.NamedTemporaryFile(
            dir=temp_output_dir).name
        datasets_attrs_out = open(datasets_attrs_filename, 'w')
        datasets_attrs_out.write(
            dumps(datasets_attrs, cls=HistoryDatasetAssociationEncoder))
        datasets_attrs_out.close()
        jeha.datasets_attrs_filename = datasets_attrs_filename

        provenance_attrs_out = open(datasets_attrs_filename + ".provenance",
                                    'w')
        provenance_attrs_out.write(
            dumps(provenance_attrs, cls=HistoryDatasetAssociationEncoder))
        provenance_attrs_out.close()

        #
        # Write jobs attributes file.
        #

        # Get all jobs associated with included HDAs.
        jobs_dict = {}
        for hda in included_datasets:
            # Get the associated job, if any. If this hda was copied from another,
            # we need to find the job that created the origial hda
            job_hda = hda
            while job_hda.copied_from_history_dataset_association:  # should this check library datasets as well?
                job_hda = job_hda.copied_from_history_dataset_association
            if not job_hda.creating_job_associations:
                # No viable HDA found.
                continue

            # Get the job object.
            job = None
            for assoc in job_hda.creating_job_associations:
                job = assoc.job
                break
            if not job:
                # No viable job.
                continue

            jobs_dict[job.id] = job

        # Get jobs' attributes.
        jobs_attrs = []
        for id, job in jobs_dict.items():
            job_attrs = {}
            job_attrs['tool_id'] = job.tool_id
            job_attrs['tool_version'] = job.tool_version
            job_attrs['state'] = job.state
            job_attrs['info'] = job.info
            job_attrs['traceback'] = job.traceback
            job_attrs['command_line'] = job.command_line
            job_attrs['stderr'] = job.stderr
            job_attrs['stdout'] = job.stdout
            job_attrs['exit_code'] = job.exit_code
            job_attrs['create_time'] = job.create_time.isoformat()
            job_attrs['update_time'] = job.update_time.isoformat()

            # Get the job's parameters
            try:
                params_objects = job.get_param_values(trans.app)
            except:
                # Could not get job params.
                continue

            params_dict = {}
            for name, value in params_objects.items():
                params_dict[name] = value
            job_attrs['params'] = params_dict

            # -- Get input, output datasets. --

            input_datasets = []
            input_mapping = {}
            for assoc in job.input_datasets:
                # Optional data inputs will not have a dataset.
                if assoc.dataset:
                    input_datasets.append(assoc.dataset.hid)
                    input_mapping[assoc.name] = assoc.dataset.hid
            job_attrs['input_datasets'] = input_datasets
            job_attrs['input_mapping'] = input_mapping
            output_datasets = [
                assoc.dataset.hid for assoc in job.output_datasets
            ]
            job_attrs['output_datasets'] = output_datasets

            jobs_attrs.append(job_attrs)

        jobs_attrs_filename = tempfile.NamedTemporaryFile(
            dir=temp_output_dir).name
        jobs_attrs_out = open(jobs_attrs_filename, 'w')
        jobs_attrs_out.write(
            dumps(jobs_attrs, cls=HistoryDatasetAssociationEncoder))
        jobs_attrs_out.close()
        jeha.jobs_attrs_filename = jobs_attrs_filename

        #
        # Create and return command line for running tool.
        #
        options = ""
        if jeha.compressed:
            options = "-G"
        return "%s %s %s %s" % (options, history_attrs_filename,
                                datasets_attrs_filename, jobs_attrs_filename)