Beispiel #1
0
def update(e_set_config, rebuild=False):

    # Create a fresh data model
    e_data = dm.EnrichmentData("e_sets")

    # Load the specified e_set, if it exists
    print "Updating e_set: " + e_set_config.name
    e_set = e_data.add_e_set(e_set_config.name)
    storage.ensure_e_set_dir(e_set_config.name)
    e_set.load()

    na = ndex_access.NdexAccess(e_set_config.ndex, e_set_config.account_name, e_set_config.password)

    # Find the candidate networks
    networks = na.find_networks(e_set_config)

    print "Found " + str(len(networks)) + " networks"

    # TODO: remove e_sets that are not found in the query

    # Figure out which ones to update
    networks_to_update = {}
    if rebuild:
        for network in networks:
            network_id = network.get("externalId")
            network_name = network.get("name")
            networks_to_update[network_name] = network_id
    else:
        for network in networks:
            network_id = network.get("externalId")
            network_name = network.get("name")
            modification_date = network.get("modificationTime")
            id_set = e_set.get_id_set_by_network_id(network_id)
            if id_set:
                if not id_set.modificationDate == modification_date:
                    networks_to_update[network_name] = network_id
            else:
                networks_to_update[network_name] = network_id

    print "Updating " + str(len(networks_to_update.keys())) + " networks"
    # Do the updates
    counter = 0
    term_mapper = term2gene_mapper.Term2gene_mapper()
    for network_name, network_id in networks_to_update.iteritems():
        print network_name.encode("ascii", "ignore") + " : " + network_id.encode("ascii", "ignore")
        node_table = na.get_nodes_from_cx(network_id)
        term_mapper.add_network_nodes(node_table)
        gene_symbols = get_genes_for_network(node_table, term_mapper)
        id_set_dict = {
            "name": network_name,
            "ids": gene_symbols,
            "network_id": network_id,
            "ndex": e_set_config.ndex,
            "e_set": e_set_config.name,
        }
        id_set = dm.IdentifierSet(id_set_dict)
        e_set.add_id_set(id_set)
        counter += 1
        print str(counter) + " networks indexed."

    # now that the updated e_set is ready to save, clear the old cached data
    storage.remove_all_id_sets(e_set_config.name)

    print "Saving e_set with " + str(len(e_set.get_id_set_names())) + " id_sets"
    e_set.save()
Beispiel #2
0
else:
    print "skipping run sif to cx"

#===================================
#===================================
# Generate similarity files
#===================================
#===================================
na = ndex_access.NdexAccess(upload_to_server, upload_to_username,
                            upload_to_password)

term_mapper = term2gene_mapper.Term2gene_mapper()
# Create a fresh data model
e_data = dm.EnrichmentData("e_sets")
e_set = e_data.add_e_set('pipeline')
storage.ensure_e_set_dir('pipeline')
counter = 0
if run_generate_gsea_files:
    for network_name, network_id in networks_to_update.iteritems():
        print str(network_id)
        print network_name.encode(
            'ascii', 'ignore') + " : " + network_id.encode('ascii', 'ignore')
        node_table = na.get_nodes_from_cx(network_id)
        term_mapper.add_network_nodes(node_table)
        gene_symbols = get_genes_for_network(node_table, term_mapper)
        if (len(gene_symbols) < 1):
            print gene_symbols
        id_set_dict = {
            'name': network_name,
            'ids': gene_symbols,
            'network_id': network_id,
 def save(self, alt_grp_path=None):
     storage.ensure_e_set_dir(self.name)
     for set_name, id_set in self.id_set_map.iteritems():
         id_set.save(alt_grp_path)
Beispiel #4
0
 def save(self):
     storage.ensure_e_set_dir(self.name)
     for set_name, id_set in self.id_set_map.iteritems():
         id_set.save()