class TestAll(unittest.TestCase): def setUp(self): connect("mongodb://localhost:27017/unittest") self.tool1 = MongoTool() self.tool1.id = "tool-1" self.tool1.name = "tool-1" self.tool1.description = "PeptideShaker is a search engine independent platform for interpretation of proteomics identification results from multiple search engines," \ "currently supporting X!Tandem, MS-GF+, MS Amanda, OMSSA, MyriMatch, Comet, Tide, Mascot, Andromeda and mzIdentML. " \ "By combining the results from multiple search engines, while re-calculating PTM localization scores and redoing the protein inference," \ "PeptideShaker attempts to give you the best possible understanding of your proteomics data" self.tool1.save() self.tool2 = MongoTool() self.tool2.id = "tool-2" self.tool2.name = "tool-2" self.tool2.tool_classes = ['TOOL'] self.tool2.save() self.assertTrue(len(self.tool2.tool_classes) == 1) def test_01_func(self): self.tool_version = MongoToolVersion() self.tool_version.id = "version-1" self.tool_version.name = "tool-1" self.tool_version.ref_tool = self.tool1 self.tool_version.save() self.assertEqual(self.tool1.id, "tool-1") self.assertTrue(len(self.tool1.get_tool_versions()) == 1) def tearDown(self): _get_db().client.drop_database('unittest')
def transform_mongo_tool_dict(mongo_tool): tool = Tool() tool.id = mongo_tool["id"] tool.description = mongo_tool["description"] tool.organization = mongo_tool["organization"] if 'license' in mongo_tool: tool.license = mongo_tool['license'] # By default all our tools will be declare as verified tool.verified = True tool.author = MongoTool.get_main_author_dict(mongo_tool["authors"]) tool.name = mongo_tool["name"] tool.url = _PUBLIC_REGISTRY_URL + "tools/" + tool.id count = 0 if 'total_pulls' in mongo_tool: count = mongo_tool['total_pulls'] tool.pulls = count if 'tool_tags' in mongo_tool and len(mongo_tool['tool_tags']) > 0: tool.tool_tags = mongo_tool['tool_tags'] # Set the Tool Class mongo_tool_class = MongoTool.get_main_tool_class_dict( mongo_tool["tool_classes"]) tool.toolclass = transform_dic_tool_class(mongo_tool_class) if 'home_url' in mongo_tool: tool.tool_url = mongo_tool['home_url'] tool.versions = [] identifiers = [] if 'additional_identifiers' in mongo_tool: for a in mongo_tool['additional_identifiers']: if ':' in a: identifiers.append(a) if 'publications' in mongo_tool: for a in mongo_tool['publications']: if 'pubmed_id' in a: identifiers.append("PMID:" + a['pubmed_id']) tool.identifiers = identifiers contains = [] for mongo_tool_version in mongo_tool["tool_versions"]: tool.versions.append( transform_tool_version_dict(mongo_tool_version, mongo_tool["id"])) if 'contains' in mongo_tool_version: for a in mongo_tool_version['contains']: container = a.split("=")[0] if '#' not in container: contains.append(container) tool.contains = contains return tool
def setUp(self): connect("mongodb://localhost:27017/unittest") self.tool1 = MongoTool() self.tool1.id = "tool-1" self.tool1.name = "tool-1" self.tool1.description = "PeptideShaker is a search engine independent platform for interpretation of proteomics identification results from multiple search engines," \ "currently supporting X!Tandem, MS-GF+, MS Amanda, OMSSA, MyriMatch, Comet, Tide, Mascot, Andromeda and mzIdentML. " \ "By combining the results from multiple search engines, while re-calculating PTM localization scores and redoing the protein inference," \ "PeptideShaker attempts to give you the best possible understanding of your proteomics data" self.tool1.save() self.tool2 = MongoTool() self.tool2.id = "tool-2" self.tool2.name = "tool-2" self.tool2.tool_classes = ['TOOL'] self.tool2.save() self.assertTrue(len(self.tool2.tool_classes) == 1)
def tools_get_common(id=None, alias=None, registry=None, organization=None, name=None, toolname=None, toolclass=None, description=None, author=None, checker=None, license=None, tool_tags=None, facets=None, offset=0, limit=1000, is_all_field_search=False, sort_field=None, sort_order=None): tools = [] resp = MongoTool.get_tools(id=id, alias=alias, registry=registry, organization=organization, name=name, toolname=toolname, toolclass=toolclass, description=description, author=author, checker=checker, license=license, tool_tags=tool_tags, facets=facets, offset=offset, limit=limit, is_all_field_search=is_all_field_search, sort_field=sort_field, sort_order=sort_order) if resp is None: return None mongo_tools = resp.tools if mongo_tools is not None: for mongo_tool in mongo_tools: if ('anchor_tool' in mongo_tool) and (len(mongo_tool['anchor_tool']) > 0): continue # don't include tools that have 'anchor_tool' field # Transform the mongo tool to API tool tool = transform_mongo_tool_dict(mongo_tool) tools.append(tool) # If the checker is provided, we filter for checker tools. if checker is not None: new_tools = [] for tool in tools: if tool.has_checker == checker: new_tools.append(tool) tools = new_tools resp.tools = tools return resp
def main(ctx, find_missing_annotations, find_duplicate_tools, find_invalid_annotations, annotations_yml_url, db_name, db_host, db_auth_database, db_user, db_password, db_port, slack_token): config = {} if (db_name is None) or (db_host is None) or (db_user is None): print_help(ctx, value=True) elif ( (find_missing_annotations is True) or (find_invalid_annotations is True)) and (annotations_yml_url is None): print_help(ctx, value=True) else: config['BIOCONT_DB_NAME'] = db_name config['MONGODB_HOST'] = db_host config['MONGO_PORT'] = db_port config['MONGODB_USER'] = db_user config['MONGODB_ADMIN_DB'] = db_auth_database config['MONGODB_PASS'] = db_password config['DATABASE_URI'] = get_database_uri(config) tools = [] if find_missing_annotations is True or find_duplicate_tools is True: db_uri = get_database_uri(config) connect(db_uri) tools = list(MongoTool.get_all_tools()) i = len(tools) print("Total tools: {}".format(i)) if find_missing_annotations is True: missing_annotations(annotations_yml_url, tools, slack_token) if find_duplicate_tools is True: duplicate_tools(tools, slack_token) if find_invalid_annotations is True: invalid_annotations(annotations_yml_url)
def annotate_docker_containers(docker_recipes): for entry in docker_recipes: logger.info("Annotating the recipe -- " + entry['name']) name = entry['name'] name_parts = name.split("/") tool_version_id = name_parts[0] + "-v" + name_parts[1] tool_id = name_parts[0] tool_version = MongoToolVersion.get_tool_version_by_id(tool_version_id) tool = MongoTool.get_tool_by_id(tool_id) if tool_version is not None: if entry["recipe"].get_description() is not None: tool_version.description = entry["recipe"].get_description().capitalize() if entry['recipe'].get_home_url() is not None: tool_version.home_url = entry['recipe'].get_home_url() if entry['recipe'].get_license() is not None: tool_version.license = entry['recipe'].get_license() else: tool_version.license = NOT_AVAILABLE tool_version.save() logger.info("Updated tool version description of -- " + tool_version_id) if tool is not None: if entry["recipe"].get_description() is not None: tool.description = entry["recipe"].get_description().capitalize() if entry['recipe'].get_home_url() is not None: tool.home_url = entry['recipe'].get_home_url() if entry['recipe'].get_license() is not None: tool.license = entry['recipe'].get_license() else: tool.license = NOT_AVAILABLE if entry['recipe'].get_tags() is not None: tool.tool_tags = entry['recipe'].get_tags() if entry['recipe'].get_additional_ids() is not None: tool.add_additional_identifiers(entry['recipe'].get_additional_ids()) tool.save() logger.info("Updated tool description of -- " + tool_version_id)
def stats(): """ This method returns a list of stats for the API :return: """ tools = MongoTool.get_all_tools() stats = [] stats.append(Stat('num_tools', str(len(tools)))) tool_versions = MongoToolVersion.get_all_tool_versions() stats.append(Stat('num_versions', str(len(tool_versions)))) num_containers = 0 num_docker = 0 num_conda = 0 for key in tool_versions: num_containers = num_containers + len(key.image_containers) for container in key.image_containers: if (container.container_type == 'DOCKER'): num_docker = num_docker + 1 elif container.container_type == 'CONDA': num_conda = num_conda + 1 stats.append(Stat('num_containers', str(num_containers))) stats.append(Stat('num_conda_containers', str(num_conda))) stats.append(Stat('num_docker_containers', str(num_docker))) return stats
def compute_similarity(): tool_ids = [] descriptions = [] tools = list(MongoTool.get_all_tools()) count = 0 for tool in tools: tool.build_complete_metadata() tool_ids.append({"index":count, "id":tool.id, "description":tool.additional_metadata}) count = count + 1 descriptions.append(tool.additional_metadata) dic_results = [] vect = TfidfVectorizer(min_df=1) tfidf = vect.fit_transform(descriptions) results = (tfidf * tfidf.T).A print(results) for i in range(0 , len(tool_ids) -1): similars = [] for j in range(0, len(tool_ids) -1): if i != j and results[i][j] > 0.2: similars.append({"id": tool_ids[j]["id"], "score": (results[i][j]) * 100}) dic_results.append({"id": tool_ids[i]["id"], "similars": similars}) print(i) for result in dic_results: similar = SimilarTool() similar.id = result['id'] for a in result['similars']: similar.add_similar(a['id'], a['score']) similar.save()
def tools_id_versions_get(id): # noqa: E501 """List versions of a tool Returns all versions of the specified tool. # noqa: E501 :param id: A unique identifier of the tool, scoped to this registry, for example `123456`. :type id: str :rtype: List[ToolVersion] """ mongo_tool = MongoTool.get_tool_by_id(id) anchor_tools = MongoTool.get_tool_with_anchor_tool_field(id) tool_versions = [] if mongo_tool is not None: mongo_tool_versions = mongo_tool.get_tool_versions() if (anchor_tools is not None) and (len(anchor_tools) > 0): for a_tool in anchor_tools: mongo_tool_versions.extend(a_tool.get_tool_versions()) for mongo_tool_version in mongo_tool_versions: tool_versions.append(transform_tool_version(mongo_tool_version, mongo_tool.id)) return tool_versions
def annotate_biotools_metadata(tools_recipes): global tool_id for entry in tools_recipes: logger.info("Annotating the recipe -- " + entry['name']) if entry['recipe'].get_id() is not None: tool_id = entry['recipe'].get_id() tools = MongoTool.get_tool_by_additional_id("biotools:" + tool_id) if len(tools) > 0 and tools[0] is not None: tool = tools[0] found = False for id in tool.additional_identifiers: if id in ("biotools:" + tool_id): found = True if found: if entry["recipe"].get_description() is not None: tool.description = entry["recipe"].get_description().capitalize() if entry['recipe'].get_home_url() is not None: tool.home_url = entry['recipe'].get_home_url() if entry['recipe'].get_license() is not None and bool(entry['recipe'].get_license()): tool.license = entry['recipe'].get_license() if entry['recipe'].get_references() is not None: for reference in entry['recipe'].get_references(): publication = Publication() if 'pmcid' in reference and reference['pmcid'] is not None: publication.pmc_id = reference['pmcid'] if 'pmid' in reference and reference['pmid'] is not None: publication.pubmed_id = reference['pmid'] if 'doi' in reference and reference['doi'] is not None: publication.doi = reference['doi'] if 'metadata' in reference and reference['metadata'] is not None: if 'title' in reference['metadata'] and reference['metadata']['title']: publication.title = reference['metadata']['title'] if 'abstract' in reference['metadata'] and reference['metadata']['abstract'] is not None and len((reference['metadata']['abstract']).strip()) > 0: publication.abstract = reference['metadata']['abstract'] if 'citationCount' in reference['metadata'] and reference['metadata']['citationCount'] is not None: publication.citation_count = reference['metadata']['citationCount'] if 'journal' in reference['metadata'] and reference['metadata']['journal'] is not None: publication.journal = reference['metadata']['journal'] if 'date' in reference['metadata'] and reference['metadata']['date']: publication.publication_date = reference['metadata']['date'] if 'authors' in reference['metadata'] and reference['metadata']['authors'] is not None: for author in reference['metadata']['authors']: publication.add_author(author['name']) tool.add_publication(publication) tool.build_complete_metadata() tool.save() logger.info("Updated tool description of -- " + tool_id) logger.info("The following tool has been analyzed -- " + str(tool_id))
def get_missing_info_tools(self): list_tools = list(MongoTool.get_all_tools()) to_map = [] for tool in list_tools: export = False if tool.description is None or len(tool.description) == 0: export = True if tool.license is None or tool.license.upper() == 'NOT AVAILABLE': export = True if tool.home_url is None or len(tool.home_url) == 0: export = True to_map.append(tool) return to_map
def tools_get_similars(id=None): similar_tool = SimilarTool.get_similars_by_id(id) ids = [] for similar in similar_tool.similars: ids.append(similar.id) tools = MongoTool.get_all_tools_by_id(ids) result_tools = [] if tools is not None: for mongo_tool in tools: if ('anchor_tool' in mongo_tool) and (len(mongo_tool.anchor_tool) > 0): continue # don't include tools that have 'anchor_tool' field score = 0 for similar in similar_tool.similars: if similar.id == mongo_tool.id: score = similar.score tool = transform_mongo_tool(mongo_tool) tool.similar_score = score result_tools.append(tool) # If the checker is provided, we filter for checker tools. return result_tools
def annotate_conda_recipes(): conda_helper = CondaMetrics() mongo_versions = MongoToolVersion.get_all_tool_versions() tools = [] for tool_version in mongo_versions: count = 0 tool_not_found = True for tool in tools: if tool['id'] == tool_version.name: count = tool['count'] tool_not_found = False old_images = [] for image in tool_version.image_containers: if image.container_type == 'CONDA': annotations = conda_helper.get_number_downloas_by_version(tool_version.name, tool_version.version) image.downloads = annotations['downloads'] count = count + image.downloads image.size = annotations['size'] if annotations['last_update'][0:10] is not None and bool(annotations['last_update'][0:10].strip()): image.last_updated = annotations['last_update'][0:10] # else: # image.last_updated = None print(annotations) old_images.append(image) tool_version.image_containers = old_images if tool_not_found and count > 0: tools.append({"id": tool_version.name, "count":count}) else: for tool in tools: if tool['id'] == tool_version.name: tool['count'] = count tool_version.save() print(tools) for stat in tools: tool = MongoTool.get_tool_by_id(stat['id']) tool.add_pull_provider("conda", stat['count']) tool.save()
def annotate_quayio_containers(conda_recipes): for entry in conda_recipes: logger.info("Annotating the recipe -- " + entry['name']) tool_version_id = None if (entry['recipe'].get_name() is not None) and (entry['recipe'].get_version() is not None) \ and ("{" not in entry['recipe'].get_name()) \ and ("|" not in entry['recipe'].get_name()) and ("{" not in entry['recipe'].get_version()) \ and ("|" not in entry['recipe'].get_version()): tool_version_id = (entry['recipe'].get_name() + "-" + entry['recipe'].get_version()).lower() tool_id = entry['recipe'].get_name().lower() tool_version = MongoToolVersion.get_tool_version_by_id(tool_version_id) tool = MongoTool.get_tool_by_id(tool_id) if tool_version is not None: if entry["recipe"].get_description() is not None: tool_version.description = entry["recipe"].get_description().capitalize() if entry['recipe'].get_home_url() is not None: tool_version.home_url = entry['recipe'].get_home_url() if entry['recipe'].get_license() is not None and len(entry['recipe'].get_license()) > 0: tool_version.license = entry['recipe'].get_license() else: tool_version.license = NOT_AVAILABLE tool_version.save() logger.info("Updated tool version description of -- " + tool_version_id) if tool is not None: if entry["recipe"].get_description() is not None: tool.description = entry["recipe"].get_description().capitalize() if entry['recipe'].get_home_url() is not None: tool.home_url = entry['recipe'].get_home_url() if entry['recipe'].get_license() is not None and bool(entry['recipe'].get_license()): tool.license = entry['recipe'].get_license() if entry['recipe'].get_biotool_ids() is not None: tool.add_additional_identifiers(entry['recipe'].get_biotool_ids()) else: tool.license = NOT_AVAILABLE tool.save() logger.info("Updated tool description of -- " + tool_version_id) logger.info("The following tool has been analyzed -- " + str(tool_version_id))
def update_from_file(self, file_annotations): """ This methods update the metadata of the tool from a file: https://github.com/BioContainers/tools-metadata/blob/master/annotations.yaml """ for key in file_annotations: tool_file = file_annotations[key] if 'manually_check' in tool_file and tool_file['manually_check'] == True: mongo_tool = MongoTool.get_tool_by_id(key) changed = False if mongo_tool is not None: if mongo_tool.description != tool_file['description']: mongo_tool.description = tool_file['description'] changed = True if mongo_tool.license != tool_file['license']: mongo_tool.license = tool_file['license'] changed = True if mongo_tool.home_url != tool_file['home_url']: mongo_tool.home_url = tool_file['home_url'] changed = True if 'identifiers' in tool_file and tool_file['identifiers'] != mongo_tool.additional_identifiers: mongo_tool.add_additional_identifiers = tool_file['identifiers'] changed = True if 'keywords' in tool_file and len(tool_file['keywords']) > 0: tags = [] if mongo_tool.tool_tags is not None: tags = mongo_tool.tool_tags for keyword in tool_file['keywords']: tags.append(keyword) tags = list(dict.fromkeys(tags)) mongo_tool.tool_tags = tags changed = True if ('anchor_tool' in tool_file) and (mongo_tool.anchor_tool != tool_file['anchor_tool']): mongo_tool.anchor_tool = tool_file['anchor_tool'] changed = True if changed: mongo_tool.save() logger.info("The tool has been updated " + key)
def compute_facets(): tool_tags = {} licenses = {} facet = Facet.get_facet_by_id("tool_tags") license = Facet.get_facet_by_id("licenses") if facet is not None: tool_tags = facet.values if license is not None: licenses = license.values tools = list(MongoTool.get_all_tools()) for tool in tools: for keyword in tool.tool_tags: if keyword in tool_tags: tool_tags[keyword] = tool_tags[keyword] + 1 else: tool_tags[keyword] = 1 current_license = tool.license # This is needed to mongoDB key-> can contains sometimes . if current_license is not None: current_license = mongo_encode_key(current_license) if current_license in licenses: licenses[current_license] = licenses[current_license] + 1 else: licenses[current_license] = 1 if facet is None: facet = Facet() facet.id = "tool_tags" facet.values = tool_tags if license is None: license = Facet() license.id = "licenses" license.values = licenses license.save()
def tools_id_versions_version_id_get(id, version_id): # noqa: E501 """List one specific tool version, acts as an anchor for self references This endpoint returns one specific tool version. # noqa: E501 :param id: A unique identifier of the tool, scoped to this registry, for example `123456`. :type id: str :param version_id: An identifier of the tool version, scoped to this registry, for example `v1`. We recommend that versions use semantic versioning https://semver.org/spec/v2.0.0.html (For example, `1.0.0` instead of `develop`) :type version_id: str :rtype: ToolVersion """ mongo_tool = MongoTool.get_tool_by_id(id) tool_versions = [] if mongo_tool is not None: mongo_tool_versions = mongo_tool.get_tool_versions() for mongo_tool_version in mongo_tool_versions: tool_versions.append(transform_tool_version(mongo_tool_version, mongo_tool.id)) for tool_version in tool_versions: if tool_version.id == version_id: return tool_version return None
def insert_dockerhub_containers(dockerhub_containers): """ This method provide the mechanism to insert dockerhub containers into the Mongo Database :param dockerhub_containers: List of DockerHub containers :return: """ list_versions = list(MongoToolVersion.get_all_tool_versions()) tool_versions_dic = {} for tool_version in list_versions: tool_versions_dic[tool_version.id] = tool_version tools_dic = {} list_tools = list(MongoTool.get_all_tools()) for tool in list_tools: tools_dic[tool.id] = tool for container in dockerhub_containers: # The version is read from the container tag. current_tool = None for key in container.tags: # First insert Tool version containers. For that we need to parse first the version of the tool. Version is also handle as defined by # the container provider Docker or Quay.io version = key['name'].split("_", 1)[0] tool_version_id = container.name() + TOOL_VERSION_SPLITTER + version if tool_version_id not in tool_versions_dic: mongo_tool_version = MongoToolVersion() mongo_tool_version.name = container.name() mongo_tool_version.version = version mongo_tool_version.description = container.description() mongo_tool_version.tool_classes = [_CONSTANT_TOOL_CLASSES['CommandLineTool']] mongo_tool_version.id = tool_version_id mongo_tool_version.add_author(BIOCONTAINERS_USER) mongo_tool_version.organization = container.organization() else: mongo_tool_version = tool_versions_dic[tool_version_id] ## Get the tag information (Container image) and add to the ToolVersion container_image = ContainerImage() container_image.tag = key container_image.full_tag = DOCKER_DOMAIN + container.name() + ":" + key['name'] container_image.container_type = 'DOCKER' datetime_object = datetime.datetime.strptime(key['last_updated'][0:10], '%Y-%m-%d') container_image.last_updated = datetime_object container_image.size = int(int(key['full_size'])) mongo_tool_version.add_image_container(container_image) tool_versions_dic[tool_version_id] = mongo_tool_version # Insert the corresponding tool tool_id = container.name() if tool_id not in tools_dic: mongo_tool = MongoTool() mongo_tool.name = container.name() mongo_tool.id = container.name() mongo_tool.description = container.description() mongo_tool.tool_classes = [_CONSTANT_TOOL_CLASSES['CommandLineTool']] tools_dic[tool_id] = mongo_tool mongo_tool.add_authors(mongo_tool_version.authors) mongo_tool.organization = container.organization() mongo_tool.checker = container.checker() else: mongo_tool = tools_dic[tool_id] mongo_tool.add_registry(container.registry()) mongo_tool.add_alias(container.alias()) tools_dic[tool_id] = mongo_tool try: mongo_tool.save() current_tool = mongo_tool except DuplicateKeyError as error: logger.error(" A tool with same name is already in the database -- " + tool_id) mongo_tool_version.ref_tool = mongo_tool # mongo_versions = mongo_tool.get_tool_versions() try: mongo_tool_version.save() except DuplicateKeyError as error: logger.error( " A tool version with a same name and version is in the database -- " + tool_version_id) if current_tool is not None: current_tool.add_pull_provider("dockerhub", container.get_pull_count()) current_tool.save() containers_list = list(tool_versions_dic.values())
def facets_get(id=None, alias=None, tool_class=None, registry=None, organization=None, name=None, toolname=None, description=None, author=None, checker=None, facets=None, all_fields_search=None): # noqa: E501 """Facets all the properties from tools This endpoint returns all facets properties available or a filtered subset using metadata query parameters. # noqa: E501 :param id: A unique identifier of the tool, scoped to this registry, for example `123456`. :type id: str :param alias: Support for this parameter is optional for tool registries that support aliases. If provided will only return entries with the given alias. :type alias: str :param tool_class: Filter tools by the name of the subclass (#/definitions/ToolClass) :type tool_class: str :param registry: The image registry that contains the image. :type registry: str :param organization: The organization in the registry that published the image. :type organization: str :param name: The name of the image. :type name: str :param toolname: The name of the tool. :type toolname: str :param description: The description of the tool. :type description: str :param author: The author of the tool (TODO a thought occurs, are we assuming that the author of the CWL and the image are the same?). :type author: str :param checker: Return only checker workflows. :type checker: bool :param offset: Start index of paging. Pagination results can be based on numbers or other values chosen by the registry implementor (for example, SHA values). If this exceeds the current result set return an empty set. If not specified in the request, this will start at the beginning of the results. :type offset: str :param limit: Amount of records to return in a given page. :type limit: int :rtype: List[Facet] """ tools = [] is_all_field_search = False license = None tool_tags = None if all_fields_search is not None: id = license = tool_tags = alias = organization = name = toolname = description = author = all_fields_search is_all_field_search = True facets_dic = {} if facets is not None: facets_list = facets.split(",") for facet in facets_list: value_list = facet.split(":") if value_list[0] not in facets_dic: facets_dic[value_list[0]] = [] facets_dic[value_list[0]].append(value_list[1]) else: facets_dic[value_list[0]].append(value_list[1]) resp = MongoTool.get_tools(id=id, alias=alias, registry=registry, organization=organization, name=name, toolname=toolname, description=description, author=author, checker=checker, license=license, tool_tags=tool_tags, facets=facets_dic, offset=0, limit=100000, is_all_field_search=is_all_field_search) if resp is None: return None mongo_tools = resp.tools facets = get_facets(mongo_tools) return facets
def insert_quayio_containers(quayio_containers): """ This method provide the mechanism to insert quayio containers into the Mongo Database :param quayio_containers: List of Quay.io containers :return: """ list_versions = list(MongoToolVersion.get_all_tool_versions()) tool_versions_dic = {} for tool_version in list_versions: tool_versions_dic[tool_version.id] = tool_version tools_dic = {} list_tools = list(MongoTool.get_all_tools()) for tool in list_tools: tools_dic[tool.id] = tool for container in quayio_containers: # The version is read from the container tag. version_list = [] current_tool = None for key, val in container.tags().items(): # First insert Tool version containers. For that we need to parse first the version of the tool. Version is also handle as defined by # the container provider Docker or Quay.io version = key.split("--", 1)[0] tool_version_id = container.name() + TOOL_VERSION_SPLITTER + version if tool_version_id not in tool_versions_dic: mongo_tool_version = MongoToolVersion() mongo_tool_version.name = container.name() mongo_tool_version.version = version mongo_tool_version.description = container.description() mongo_tool_version.organization = container.organization() if "mulled-v2" not in mongo_tool_version.name: mongo_tool_version.tool_classes = [_CONSTANT_TOOL_CLASSES['CommandLineTool']] else: mongo_tool_version.tool_classes = [_CONSTANT_TOOL_CLASSES['CommandLineMultiTool']] mongo_tool_version.id = tool_version_id mongo_tool_version.add_author(BIOCONTAINERS_USER) mongo_tool_version.add_author(BICONDA_USER) else: mongo_tool_version = tool_versions_dic[tool_version_id] ## Add only one conda package for each version if key not in version_list: container_image = ContainerImage() container_image.tag = "conda:" + key container_image.full_tag = container.name() + "==" + key container_image.container_type = 'CONDA' container_image.size = 0 container_image.downloads = 0 mongo_tool_version.add_image_container(container_image) version_list.append(key) ## Add container container_image = ContainerImage() container_image.tag = key container_image.full_tag = QUAYIO_DOMAIN + container.name() + ":" + key container_image.container_type = 'DOCKER' datetime_object = datetime.datetime.strptime(val['last_modified'][0:-15], '%a, %d %b %Y') container_image.last_updated = datetime_object container_image.size = int(int(val['size'])) container_image.downloads = 0 mongo_tool_version.add_image_container(container_image) tool_versions_dic[tool_version_id] = mongo_tool_version # Insert the corresponding tool tool_id = container.name() if tool_id not in tools_dic: mongo_tool = MongoTool() mongo_tool.name = container.name() if "mulled-v2" not in mongo_tool_version.name: mongo_tool.tool_classes = [_CONSTANT_TOOL_CLASSES['CommandLineTool']] else: mongo_tool.tool_classes = [_CONSTANT_TOOL_CLASSES['CommandLineMultiTool']] mongo_tool.id = container.name() mongo_tool.description = container.description() mongo_tool.add_authors(mongo_tool_version.authors) mongo_tool.organization = container.organization() mongo_tool.checker = container.checker() else: mongo_tool = tools_dic[tool_id] mongo_tool.add_registry(container.registry()) mongo_tool.add_alias(container.alias()) tools_dic[tool_id] = mongo_tool try: mongo_tool.save() current_tool = mongo_tool except DuplicateKeyError as error: logger.error(" A tool with same name is already in the database -- " + tool_id) mongo_tool_version.ref_tool = mongo_tool try: mongo_tool_version.save() except DuplicateKeyError as error: logger.error( " A tool version with a same name and version is in the database -- " + tool_version_id) if current_tool is not None: count = 0 for stat in container.pulls(): count = count + stat['count'] current_tool.add_pull_provider("quay.io", count) current_tool.save() containers_list = list(tool_versions_dic.values())