コード例 #1
0
def find_imp_neig(imp_neigs, g1, perc, model, scaler, inputs, explore_prob,
                  rand_flag):  # rename to epsilon-greedy
    score_fin = None
    comp_bool_fin = None
    if len(imp_neigs) == 1:
        imp_neig = list(imp_neigs.keys())[0]
    else:
        cur_trial = rand_uniform(low=0.0, high=1.0)
        if cur_trial <= explore_prob:
            logging_debug("Exploring with low probability"
                          )  # Move to top for efficiency and remove del max
            imp_neig = random_choice(list(imp_neigs.keys()))
            rand_flag = 1
        else:
            if inputs["use_all_neigs"] == 0:
                imp_neigs = pick_top_weight_neigs(imp_neigs, perc, inputs)
                # Return none when imp_neigs is None

            for neig in imp_neigs:
                # Add to graph
                g1 = add_newnode(g1, neig, imp_neigs[neig]['graph_neigs'])
                # Check score
                (score_curr, comp_bool) = get_score(g1, model, scaler,
                                                    inputs['model_type'])
                imp_neigs[neig]['compScore'] = score_curr
                imp_neigs[neig]['compBool'] = comp_bool
                g1.remove_node(neig)
            imp_neig_tup = max(imp_neigs.items(),
                               key=lambda elem: elem[1]['compScore'])
            imp_neig = imp_neig_tup[0]
            score_fin = imp_neig_tup[1]['compScore']
            comp_bool_fin = imp_neig_tup[1]['compBool']
    return imp_neig, score_fin, comp_bool_fin, rand_flag
コード例 #2
0
def add_top_neig_update(g1, thres_neig, folNm, inputs, model, scaler,
                        neig_list):
    neig_list_orig = None
    rand_flag = 0
    if not neig_list:  # Checking if empty
        logging_debug("No more neighbors to add")
        return g1, 0, None, None, None, rand_flag, neig_list
    if inputs["use_all_neigs"] == 0:
        # Don't check all neighbors - just a subset if number of neighbors is large
        if len(neig_list) > thres_neig:  # Make 500
            neig_list_orig = neig_list
            neig_list = dict(random_sample(neig_list.items(), thres_neig))
            rand_flag = 1

    node_to_add, score, compBool, rand_flag = find_imp_neig(
        neig_list, g1, inputs['perc'], model, scaler, inputs,
        inputs['explore_prob'], rand_flag)

    g1 = add_newnode(g1, node_to_add, neig_list[node_to_add]['graph_neigs'])

    if neig_list_orig is None:
        neig_list = update_neig_list(neig_list, node_to_add, folNm, g1.nodes())
    else:
        neig_list = update_neig_list(neig_list_orig, node_to_add, folNm,
                                     g1.nodes())
    return g1, 1, node_to_add, score, compBool, rand_flag, neig_list
コード例 #3
0
def random_walk(seed_node, n, folNm):  # in graph G
    neg_comp = nx_Graph()
    neg_comp.add_node(seed_node)
    node_num = 1
    pres_node = seed_node
    extra = 10
    while node_num < n + extra:
        with open(folNm + "/" + pres_node, 'rb') as f:
            neig_list = pickle_load(f)
        if not neig_list:
            logging_debug("No neighbours")
            break
        if len(neig_list) != 1:
            new_node = rand_choice(list(neig_list.keys()))
        else:
            new_node = list(neig_list.keys())[0]
        wt = neig_list[new_node]
        wt_edge = wt['weight']
        neg_comp.add_edge(pres_node, new_node, weight=wt_edge)
        if len(neg_comp.nodes()) == n:
            break
        pres_node = new_node
        node_num = node_num + 1
        # print(pres_node)
    return neg_comp
コード例 #4
0
def find_imp_neig(neig_list,explore_prob):
    if len(neig_list) == 1:
        imp_neig = list(neig_list.keys())[0]
    else:    
        cur_trial = rand_uniform(low=0.0,high=1.0)
        if  cur_trial <= explore_prob: 
            logging_debug("Exploring with low probability") # Move to top for efficiency and remove del max
            imp_neig = random_choice(list(neig_list.keys()))
        else:    
            imp_neig = max(iter(neig_list.items()), key=lambda elem: elem[1]['compScore'])[0]
    return imp_neig
コード例 #5
0
ファイル: sample.py プロジェクト: marcottelab/super.complex
def search_top_neigs(seed_node, scaler, par_inputs_fn):
    # Picks out of a subset of its neighbors and adds the best node
    # logging_debug("No. of nodes in g = ",len(G))
    # Assigning original graph to temporary variable
    with open(par_inputs_fn, 'rb') as f:
        inputs = pickle_load(f)
    with open(inputs['modelfname'], 'rb') as f:
        model = pickle_load(f)
    folNm = inputs['folNm']
    folNm_out = inputs['folNm_out']
    cd, g1 = starting_edge(folNm, seed_node)
    if cd == 0:
        return
    score_curr = 0
    max_nodes = inputs["max_size"]
    thres_neig = inputs[
        "thres_neig"]  # Threshold on number of neighbors to consider
    while len(g1) < max_nodes:

        score_prev = score_curr
        logging_debug("Adding next node")
        g1, cc, node_to_add, score_curr, comp_bool, rand_flag = add_top_neig(
            g1, thres_neig, folNm, inputs, model, scaler)
        if (score_curr is None) or (comp_bool is None):
            score_curr, comp_bool = get_score(g1, model, scaler,
                                              inputs['model_type'])
        if cc == 0:
            break
        if score_curr < inputs["classi_thresh"]:
            logging_debug("Complex found")

            # Remove the node last added
            g1.remove_node(node_to_add)
            score_curr = score_prev
            break

    with open(folNm_out + "/" + seed_node, 'wb') as f:
        pickle_dump((frozenset(g1.nodes()), score_curr), f)
コード例 #6
0
ファイル: sample.py プロジェクト: marcottelab/super.complex
def search_max_neig(seed_node, scaler, par_inputs_fn):
    with open(par_inputs_fn, 'rb') as f:
        inputs = pickle_load(f)
    with open(inputs['modelfname'], 'rb') as f:
        model = pickle_load(f)  # Seed node
    logging_debug("Seed node is", seed_node)
    folNm = inputs['folNm']
    folNm_out = inputs['folNm_out']
    max_nodes = inputs["max_size"]
    score_curr = 0
    cd, g1 = starting_edge(folNm, seed_node)
    if cd == 0:
        return
    while len(g1) < max_nodes:
        # print(len(g1))
        logging_debug("Adding next node")

        neig_list = read_neigs(g1.nodes(), folNm)
        if not neig_list:  # Checking if empty
            logging_debug("No more neighbors to add")
            break

        node_to_add = max(neig_list.items(),
                          key=lambda elem: elem[1]['weight'])[0]
        g1 = add_newnode(g1, node_to_add,
                         neig_list[node_to_add]['graph_neigs'])

        score_prev = score_curr

        (score_curr, comp_bool) = get_score(g1, model, scaler,
                                            inputs['model_type'])

        if score_curr < inputs["classi_thresh"]:
            logging_debug("Complex found")

            # Remove the node last added
            g1.remove_node(node_to_add)
            score_curr = score_prev
            break
    with open(folNm_out + "/" + seed_node, 'wb') as f:
        pickle_dump((frozenset(g1.nodes()), score_curr), f)
コード例 #7
0
    def close(self, *args, **kwargs):
        """
        Engine closed, copy file to DB if it has changed
        """
        super(DatabaseWrapper, self).close(*args, **kwargs)

        signature_version = self.settings_dict.get("SIGNATURE_VERSION", "s3v4")
        s3 = boto3.resource(
            's3',
            config=botocore.client.Config(signature_version=signature_version),
        )

        try:
            with open(self.settings_dict['NAME'], 'rb') as f:
                fb = f.read()

                m = md5()
                m.update(fb)
                if self.db_hash == m.hexdigest():
                    logging_debug(
                        "Database unchanged, not saving to remote DB!")
                    return

                bytesIO = BytesIO()
                bytesIO.write(fb)
                bytesIO.seek(0)

                s3_object = s3.Object(
                    self.settings_dict['BUCKET'],
                    self.settings_dict['REMOTE_NAME'],
                )
                s3_object.put('rb', Body=bytesIO)
        except Exception as e:
            logging_debug(e)

        logging_debug("Saved to remote DB!")
コード例 #8
0
ファイル: sample.py プロジェクト: marcottelab/super.complex
def search_isa(
    seed_node, scaler, par_inputs_fn
):  # Picks out of a subset of its neighbors and adds the best node
    with open(par_inputs_fn, 'rb') as f:
        inputs = pickle_load(f)
    with open(inputs['modelfname'], 'rb') as f:
        model = pickle_load(f)

    folNm = inputs['folNm']
    folNm_out = inputs['folNm_out']

    score_prev = 0
    cd, g1 = starting_edge(folNm, seed_node)
    if cd == 0:
        return
    max_nodes = inputs["max_size"] - len(g1)

    num_iter = 1
    last_iter_imp = 0
    thres_neig = inputs["thres_neig"]
    T = inputs["T0"]  # T0 value
    alpha = inputs["alpha"]

    while num_iter < max_nodes:  # Limiting number of iteration rounds

        logging_debug("Adding next node")
        # neig_list_old = neig_list
        # g1, cc, node_to_add, score_curr, comp_bool, rand_flag, neig_list = add_top_neig(g1, thres_neig, folNm, inputs, model, scaler, neig_list)
        g1, cc, node_to_add, score_curr, comp_bool, rand_flag = add_top_neig(
            g1, thres_neig, folNm, inputs, model, scaler)
        if (score_curr is None) or (comp_bool is None):
            score_curr, comp_bool = get_score(g1, model, scaler,
                                              inputs['model_type'])

        if cc == 0:
            break
        if score_curr < inputs["classi_thresh"]:
            logging_debug("Complex found")

            # Remove the node last added
            g1.remove_node(node_to_add)
            break

        cur_trial = rand_uniform(low=0.0, high=1.0)
        if score_curr < score_prev:

            prob_isa = np_exp((score_curr - score_prev) / T)
            if cur_trial > prob_isa:
                # Remove the node last added
                g1.remove_node(node_to_add)
                # neig_list = neig_list_old
            else:
                logging_debug("Accepting with low probability")
                rand_flag = 1
        elif score_curr > score_prev:
            last_iter_imp = num_iter

        if (num_iter - last_iter_imp
            ) > 10:  # Has been a long time since a score improvement
            logging_debug("Long time since score improvement")
            break

        score_prev = score_curr
        num_iter += 1
        T = float(T) / alpha

    # If number of nodes is less than 2, don't write.
    with open(folNm_out + "/" + seed_node, 'wb') as f:
        pickle_dump((frozenset(g1.nodes()), score_prev), f)
コード例 #9
0
ファイル: sample.py プロジェクト: marcottelab/super.complex
def met(g1, model, scaler, inputs, score_prev):
    max_nodes = inputs["max_size"] - len(g1)

    num_iter = 1
    last_iter_imp = 0
    thres_neig = inputs["thres_neig"]
    prob_metropolis = inputs["prob_metropolis"]
    folNm = inputs['folNm']
    met_low_prob_acc = 0
    while num_iter < max_nodes:  # Limiting number of iteration rounds

        logging_debug("Adding next node")
        # neig_list_old = neig_list
        # g1, cc, node_to_add, score_curr, comp_bool, rand_flag, neig_list = add_top_neig(g1, thres_neig, folNm, inputs, model, scaler, neig_list)
        g1, cc, node_to_add, score_curr, comp_bool, rand_flag = add_top_neig(
            g1, thres_neig, folNm, inputs, model, scaler)
        if (score_curr is None) or (comp_bool is None):
            score_curr, comp_bool = get_score(g1, model, scaler,
                                              inputs['model_type'])

        if cc == 0:
            break
        if score_curr < inputs["classi_thresh"]:
            logging_debug("Complex found")

            # Remove the node last added
            g1.remove_node(node_to_add)
            break

        cur_trial = rand_uniform(low=0.0, high=1.0)
        if score_curr < score_prev:
            if cur_trial > prob_metropolis:
                # Remove the node last added
                g1.remove_node(node_to_add)
                # neig_list = neig_list_old
            else:
                logging_debug("Accepting with low probability")
                met_low_prob_acc += 1
                rand_flag = 1
        elif score_curr > score_prev:
            last_iter_imp = num_iter

        if (num_iter - last_iter_imp
            ) > 10:  # Has been a long time since a score improvement
            logging_debug("Long time since score improvement")
            break

        score_prev = score_curr

        num_iter += 1
    logging_debug("No. of low probability acceptances = ")
    logging_debug(str(met_low_prob_acc))

    # print(g1.nodes())
    # print(g1.edges())
    return frozenset(g1.nodes()), score_prev
コード例 #10
0
def debug(msg: str):

    logging_debug(msg)
コード例 #11
0
def search_max_neig(seed_node,scaler,par_inputs_fn):
    with open(par_inputs_fn,'rb') as f:                
        inputs = pickle_load(f)      
    with open(inputs['modelfname'],'rb') as f:
        model = pickle_load(f)    # Seed node
    logging_debug("Seed node is",seed_node)
    folNm = inputs['folNm']
    with open(folNm+"/"+seed_node,'rb') as f:
        neig_list = pickle_load(f)
    folNm_out = inputs['folNm_out']
    
    if not neig_list:
        return         
            
    imp_neig = max(neig_list) # Largest weight neighbor - gives the most confident graphs 
    wt = neig_list[imp_neig]
    wt_edge = wt['weight']
    
    score_curr = 0
    g1=nx_Graph()
    g1.add_edge(seed_node,imp_neig,weight=wt_edge)
    
    max_nodes = inputs["max_size"]
    
    
    while True:

        logging_debug("Adding next node")
            
        imp_neigs = dict()
        g1_nodes = g1.nodes()
        for node in g1_nodes:
            # get its max neighbor and weight and store in dict 
            with open(folNm+"/"+node,'rb') as f:
                neig_list = pickle_load(f)          
            
            # Remove neighbors already in graph - one small computation to save memory
            neig_fin = set(neig_list) -  set(g1_nodes)
            neig_list = dict([neig for neig in list(neig_list.items()) if neig[0] in neig_fin])
            
            if not neig_list: # Checking if empty
                break
            imp_neig = max(neig_list)
            wt = neig_list[imp_neig]
            wt_edge = wt['weight']
            imp_neigs[imp_neig] = wt_edge
                
        if not imp_neigs:
            logging_debug("No more neighbors to add")
            break
        
        node_to_add = max(imp_neigs) # Check again that this is the max 
        #ADD ALL EDGES OF NEW NODE TO ORIG GRAPH
        
        with open(folNm+"/"+node_to_add,'rb') as f:
            its_neig_list = pickle_load(f)              
            
        orig_nodes = g1.nodes()
        for node in orig_nodes:
            if node in its_neig_list:
                wt = its_neig_list[node]
                wt_edge = wt['weight']
                g1.add_edge(node_to_add,node,weight=wt_edge)
            
        if len(g1) > max_nodes:
            logging_debug("Max size exceeded")
            break
        
        score_prev = score_curr       
        
        (score_curr,comp_bool) = get_score(g1,model,scaler,inputs['model_type'])

        if comp_bool == 0:
            logging_debug("Complex found")
        
            # Remove the node last added                
            g1.remove_node(node_to_add)
            score_curr = score_prev
            break        
    with open(folNm_out+"/"+seed_node,'wb') as f:  
        pickle_dump((list(g1.nodes()),score_curr),f)
コード例 #12
0
def met(g1,model,scaler,inputs,score_prev):
    # Assigning original graph to temporary variable
    for edge in g1.edges():
        (node1,node2) = edge
    
    max_nodes = inputs["max_size"] - len(g1)
    
    num_iter = 1
    last_iter_imp = 0        
    thres_neig = inputs["thres_neig"]
    prob_metropolis = inputs["prob_metropolis"]
    rem_nodes = []
    folNm = inputs['folNm']
    
    while num_iter < max_nodes: # Limiting number of iteration rounds 

        logging_debug("Adding next node")
            
        imp_neigs = dict()
        g1_nodes = g1.nodes()
        for node in g1_nodes:
            # get its max neighbor and weight and store in dict 
            with open(folNm+"/"+node,'rb') as f:
                neig_list = pickle_load(f)    
            
            # Remove neighbors already in graph - one small computation to save memory
            neig_fin = set(neig_list) -  set(list(g1_nodes)+rem_nodes)
            neig_list = dict([neig for neig in neig_list.items() if neig[0] in neig_fin])
            
            # Don't check all neighbors - just a subset if number of neighbors is large
            if len(neig_list) > thres_neig: # Make 500 
                neig_list = dict(random_sample(neig_list.items(),thres_neig))
            if not neig_list: # Checking if empty
                break
            imp_neig,max_score = find_max_neig(neig_list,g1,inputs['perc'],model,scaler,inputs)

            wt = neig_list[imp_neig]
            wt_edge = wt['weight']
            
            imp_neigs[imp_neig] = {'weight': wt_edge, 'compScore' : max_score}
                
        if not imp_neigs:
            logging_debug("No more neighbors to add")
            break
        
        node_to_add = find_imp_neig(imp_neigs,inputs['explore_prob'])
        #ADD ALL EDGES OF NEW NODE TO ORIG GRAPH
        with open(folNm+"/"+node_to_add,'rb') as f:
            its_neig_list = pickle_load(f)                  
        orig_nodes = g1.nodes()
        all_nodesWedges = set(orig_nodes).intersection(its_neig_list)
                
        for node in all_nodesWedges:
            wt = its_neig_list[node]
            wt_edge = wt['weight']
            g1.add_edge(node_to_add,node,weight=wt_edge)
            
        (score_curr,comp_bool) = get_score(g1,model,scaler,inputs['model_type'])    

        if comp_bool == 0:
            logging_debug("Complex found")
        
            # Remove the node last added                
            g1.remove_node(node_to_add)
            break

        cur_trial = rand_uniform(low=0.0,high=1.0)
        if score_curr < score_prev:
            if  cur_trial > prob_metropolis:  
            # Remove the node last added                
                g1.remove_node(node_to_add)
                rem_nodes.append(node_to_add)
            # since edges from this node to complex have been removed from tempG it will not be revisited 
            else:
                logging_debug("Accepting with low probability")
        elif score_curr > score_prev:
            last_iter_imp = num_iter

        if (num_iter - last_iter_imp)> 10: # Has been a long time since a score improvement
            logging_debug("Long time since score imporovement") 
            break
        
        score_prev = score_curr

        num_iter += 1
    
    #print(g1.nodes())
    #print(g1.edges())
    return (g1.nodes(),score_prev)
コード例 #13
0
    def load_remote_db(self):
        """
        Load the database from the S3 storage bucket into the current AWS Lambda
        instance.
        """
        signature_version = self.settings_dict.get("SIGNATURE_VERSION", "s3v4")
        s3 = boto3.resource(
            's3',
            config=botocore.client.Config(signature_version=signature_version),
        )

        if '/tmp/' not in self.settings_dict['NAME']:
            try:
                etag = ''
                if path.isfile('/tmp/' + self.settings_dict['NAME']):
                    m = md5()
                    with open('/tmp/' + self.settings_dict['NAME'], 'rb') as f:
                        m.update(f.read())

                    # In general the ETag is the md5 of the file, in some cases it's
                    # not, and in that case we will just need to reload the file,
                    # I don't see any other way
                    etag = m.hexdigest()

                obj = s3.Object(
                    self.settings_dict['BUCKET'],
                    self.settings_dict['NAME'],
                )
                obj_bytes = obj.get(
                    IfNoneMatch=etag, )["Body"]  # Will throw E on 304 or 404

                with open('/tmp/' + self.settings_dict['NAME'], 'wb') as f:
                    f.write(obj_bytes.read())

                m = md5()
                with open('/tmp/' + self.settings_dict['NAME'], 'rb') as f:
                    m.update(f.read())

                self.db_hash = m.hexdigest()

            except botocore.exceptions.ClientError as e:
                if e.response['Error']['Code'] == "304":
                    logging_debug(
                        "ETag matches md5 of local copy, using local copy of DB!",
                    )
                    self.db_hash = etag
                else:
                    logging_debug("Couldn't load remote DB object.")
            except Exception as e:
                # Weird one
                logging_debug(e)

        # SQLite DatabaseWrapper will treat our tmp as normal now
        # Check because Django likes to call this function a lot more than it should
        if '/tmp/' not in self.settings_dict['NAME']:
            self.settings_dict['REMOTE_NAME'] = self.settings_dict['NAME']
            self.settings_dict['NAME'] = '/tmp/' + self.settings_dict['NAME']

        # Make sure it exists if it doesn't yet
        if not path.isfile(self.settings_dict['NAME']):
            open(self.settings_dict['NAME'], 'a').close()

        logging_debug("Loaded remote DB!")