def find_imp_neig(imp_neigs, g1, perc, model, scaler, inputs, explore_prob, rand_flag): # rename to epsilon-greedy score_fin = None comp_bool_fin = None if len(imp_neigs) == 1: imp_neig = list(imp_neigs.keys())[0] else: cur_trial = rand_uniform(low=0.0, high=1.0) if cur_trial <= explore_prob: logging_debug("Exploring with low probability" ) # Move to top for efficiency and remove del max imp_neig = random_choice(list(imp_neigs.keys())) rand_flag = 1 else: if inputs["use_all_neigs"] == 0: imp_neigs = pick_top_weight_neigs(imp_neigs, perc, inputs) # Return none when imp_neigs is None for neig in imp_neigs: # Add to graph g1 = add_newnode(g1, neig, imp_neigs[neig]['graph_neigs']) # Check score (score_curr, comp_bool) = get_score(g1, model, scaler, inputs['model_type']) imp_neigs[neig]['compScore'] = score_curr imp_neigs[neig]['compBool'] = comp_bool g1.remove_node(neig) imp_neig_tup = max(imp_neigs.items(), key=lambda elem: elem[1]['compScore']) imp_neig = imp_neig_tup[0] score_fin = imp_neig_tup[1]['compScore'] comp_bool_fin = imp_neig_tup[1]['compBool'] return imp_neig, score_fin, comp_bool_fin, rand_flag
def add_top_neig_update(g1, thres_neig, folNm, inputs, model, scaler, neig_list): neig_list_orig = None rand_flag = 0 if not neig_list: # Checking if empty logging_debug("No more neighbors to add") return g1, 0, None, None, None, rand_flag, neig_list if inputs["use_all_neigs"] == 0: # Don't check all neighbors - just a subset if number of neighbors is large if len(neig_list) > thres_neig: # Make 500 neig_list_orig = neig_list neig_list = dict(random_sample(neig_list.items(), thres_neig)) rand_flag = 1 node_to_add, score, compBool, rand_flag = find_imp_neig( neig_list, g1, inputs['perc'], model, scaler, inputs, inputs['explore_prob'], rand_flag) g1 = add_newnode(g1, node_to_add, neig_list[node_to_add]['graph_neigs']) if neig_list_orig is None: neig_list = update_neig_list(neig_list, node_to_add, folNm, g1.nodes()) else: neig_list = update_neig_list(neig_list_orig, node_to_add, folNm, g1.nodes()) return g1, 1, node_to_add, score, compBool, rand_flag, neig_list
def random_walk(seed_node, n, folNm): # in graph G neg_comp = nx_Graph() neg_comp.add_node(seed_node) node_num = 1 pres_node = seed_node extra = 10 while node_num < n + extra: with open(folNm + "/" + pres_node, 'rb') as f: neig_list = pickle_load(f) if not neig_list: logging_debug("No neighbours") break if len(neig_list) != 1: new_node = rand_choice(list(neig_list.keys())) else: new_node = list(neig_list.keys())[0] wt = neig_list[new_node] wt_edge = wt['weight'] neg_comp.add_edge(pres_node, new_node, weight=wt_edge) if len(neg_comp.nodes()) == n: break pres_node = new_node node_num = node_num + 1 # print(pres_node) return neg_comp
def find_imp_neig(neig_list,explore_prob): if len(neig_list) == 1: imp_neig = list(neig_list.keys())[0] else: cur_trial = rand_uniform(low=0.0,high=1.0) if cur_trial <= explore_prob: logging_debug("Exploring with low probability") # Move to top for efficiency and remove del max imp_neig = random_choice(list(neig_list.keys())) else: imp_neig = max(iter(neig_list.items()), key=lambda elem: elem[1]['compScore'])[0] return imp_neig
def search_top_neigs(seed_node, scaler, par_inputs_fn): # Picks out of a subset of its neighbors and adds the best node # logging_debug("No. of nodes in g = ",len(G)) # Assigning original graph to temporary variable with open(par_inputs_fn, 'rb') as f: inputs = pickle_load(f) with open(inputs['modelfname'], 'rb') as f: model = pickle_load(f) folNm = inputs['folNm'] folNm_out = inputs['folNm_out'] cd, g1 = starting_edge(folNm, seed_node) if cd == 0: return score_curr = 0 max_nodes = inputs["max_size"] thres_neig = inputs[ "thres_neig"] # Threshold on number of neighbors to consider while len(g1) < max_nodes: score_prev = score_curr logging_debug("Adding next node") g1, cc, node_to_add, score_curr, comp_bool, rand_flag = add_top_neig( g1, thres_neig, folNm, inputs, model, scaler) if (score_curr is None) or (comp_bool is None): score_curr, comp_bool = get_score(g1, model, scaler, inputs['model_type']) if cc == 0: break if score_curr < inputs["classi_thresh"]: logging_debug("Complex found") # Remove the node last added g1.remove_node(node_to_add) score_curr = score_prev break with open(folNm_out + "/" + seed_node, 'wb') as f: pickle_dump((frozenset(g1.nodes()), score_curr), f)
def search_max_neig(seed_node, scaler, par_inputs_fn): with open(par_inputs_fn, 'rb') as f: inputs = pickle_load(f) with open(inputs['modelfname'], 'rb') as f: model = pickle_load(f) # Seed node logging_debug("Seed node is", seed_node) folNm = inputs['folNm'] folNm_out = inputs['folNm_out'] max_nodes = inputs["max_size"] score_curr = 0 cd, g1 = starting_edge(folNm, seed_node) if cd == 0: return while len(g1) < max_nodes: # print(len(g1)) logging_debug("Adding next node") neig_list = read_neigs(g1.nodes(), folNm) if not neig_list: # Checking if empty logging_debug("No more neighbors to add") break node_to_add = max(neig_list.items(), key=lambda elem: elem[1]['weight'])[0] g1 = add_newnode(g1, node_to_add, neig_list[node_to_add]['graph_neigs']) score_prev = score_curr (score_curr, comp_bool) = get_score(g1, model, scaler, inputs['model_type']) if score_curr < inputs["classi_thresh"]: logging_debug("Complex found") # Remove the node last added g1.remove_node(node_to_add) score_curr = score_prev break with open(folNm_out + "/" + seed_node, 'wb') as f: pickle_dump((frozenset(g1.nodes()), score_curr), f)
def close(self, *args, **kwargs): """ Engine closed, copy file to DB if it has changed """ super(DatabaseWrapper, self).close(*args, **kwargs) signature_version = self.settings_dict.get("SIGNATURE_VERSION", "s3v4") s3 = boto3.resource( 's3', config=botocore.client.Config(signature_version=signature_version), ) try: with open(self.settings_dict['NAME'], 'rb') as f: fb = f.read() m = md5() m.update(fb) if self.db_hash == m.hexdigest(): logging_debug( "Database unchanged, not saving to remote DB!") return bytesIO = BytesIO() bytesIO.write(fb) bytesIO.seek(0) s3_object = s3.Object( self.settings_dict['BUCKET'], self.settings_dict['REMOTE_NAME'], ) s3_object.put('rb', Body=bytesIO) except Exception as e: logging_debug(e) logging_debug("Saved to remote DB!")
def search_isa( seed_node, scaler, par_inputs_fn ): # Picks out of a subset of its neighbors and adds the best node with open(par_inputs_fn, 'rb') as f: inputs = pickle_load(f) with open(inputs['modelfname'], 'rb') as f: model = pickle_load(f) folNm = inputs['folNm'] folNm_out = inputs['folNm_out'] score_prev = 0 cd, g1 = starting_edge(folNm, seed_node) if cd == 0: return max_nodes = inputs["max_size"] - len(g1) num_iter = 1 last_iter_imp = 0 thres_neig = inputs["thres_neig"] T = inputs["T0"] # T0 value alpha = inputs["alpha"] while num_iter < max_nodes: # Limiting number of iteration rounds logging_debug("Adding next node") # neig_list_old = neig_list # g1, cc, node_to_add, score_curr, comp_bool, rand_flag, neig_list = add_top_neig(g1, thres_neig, folNm, inputs, model, scaler, neig_list) g1, cc, node_to_add, score_curr, comp_bool, rand_flag = add_top_neig( g1, thres_neig, folNm, inputs, model, scaler) if (score_curr is None) or (comp_bool is None): score_curr, comp_bool = get_score(g1, model, scaler, inputs['model_type']) if cc == 0: break if score_curr < inputs["classi_thresh"]: logging_debug("Complex found") # Remove the node last added g1.remove_node(node_to_add) break cur_trial = rand_uniform(low=0.0, high=1.0) if score_curr < score_prev: prob_isa = np_exp((score_curr - score_prev) / T) if cur_trial > prob_isa: # Remove the node last added g1.remove_node(node_to_add) # neig_list = neig_list_old else: logging_debug("Accepting with low probability") rand_flag = 1 elif score_curr > score_prev: last_iter_imp = num_iter if (num_iter - last_iter_imp ) > 10: # Has been a long time since a score improvement logging_debug("Long time since score improvement") break score_prev = score_curr num_iter += 1 T = float(T) / alpha # If number of nodes is less than 2, don't write. with open(folNm_out + "/" + seed_node, 'wb') as f: pickle_dump((frozenset(g1.nodes()), score_prev), f)
def met(g1, model, scaler, inputs, score_prev): max_nodes = inputs["max_size"] - len(g1) num_iter = 1 last_iter_imp = 0 thres_neig = inputs["thres_neig"] prob_metropolis = inputs["prob_metropolis"] folNm = inputs['folNm'] met_low_prob_acc = 0 while num_iter < max_nodes: # Limiting number of iteration rounds logging_debug("Adding next node") # neig_list_old = neig_list # g1, cc, node_to_add, score_curr, comp_bool, rand_flag, neig_list = add_top_neig(g1, thres_neig, folNm, inputs, model, scaler, neig_list) g1, cc, node_to_add, score_curr, comp_bool, rand_flag = add_top_neig( g1, thres_neig, folNm, inputs, model, scaler) if (score_curr is None) or (comp_bool is None): score_curr, comp_bool = get_score(g1, model, scaler, inputs['model_type']) if cc == 0: break if score_curr < inputs["classi_thresh"]: logging_debug("Complex found") # Remove the node last added g1.remove_node(node_to_add) break cur_trial = rand_uniform(low=0.0, high=1.0) if score_curr < score_prev: if cur_trial > prob_metropolis: # Remove the node last added g1.remove_node(node_to_add) # neig_list = neig_list_old else: logging_debug("Accepting with low probability") met_low_prob_acc += 1 rand_flag = 1 elif score_curr > score_prev: last_iter_imp = num_iter if (num_iter - last_iter_imp ) > 10: # Has been a long time since a score improvement logging_debug("Long time since score improvement") break score_prev = score_curr num_iter += 1 logging_debug("No. of low probability acceptances = ") logging_debug(str(met_low_prob_acc)) # print(g1.nodes()) # print(g1.edges()) return frozenset(g1.nodes()), score_prev
def debug(msg: str): logging_debug(msg)
def search_max_neig(seed_node,scaler,par_inputs_fn): with open(par_inputs_fn,'rb') as f: inputs = pickle_load(f) with open(inputs['modelfname'],'rb') as f: model = pickle_load(f) # Seed node logging_debug("Seed node is",seed_node) folNm = inputs['folNm'] with open(folNm+"/"+seed_node,'rb') as f: neig_list = pickle_load(f) folNm_out = inputs['folNm_out'] if not neig_list: return imp_neig = max(neig_list) # Largest weight neighbor - gives the most confident graphs wt = neig_list[imp_neig] wt_edge = wt['weight'] score_curr = 0 g1=nx_Graph() g1.add_edge(seed_node,imp_neig,weight=wt_edge) max_nodes = inputs["max_size"] while True: logging_debug("Adding next node") imp_neigs = dict() g1_nodes = g1.nodes() for node in g1_nodes: # get its max neighbor and weight and store in dict with open(folNm+"/"+node,'rb') as f: neig_list = pickle_load(f) # Remove neighbors already in graph - one small computation to save memory neig_fin = set(neig_list) - set(g1_nodes) neig_list = dict([neig for neig in list(neig_list.items()) if neig[0] in neig_fin]) if not neig_list: # Checking if empty break imp_neig = max(neig_list) wt = neig_list[imp_neig] wt_edge = wt['weight'] imp_neigs[imp_neig] = wt_edge if not imp_neigs: logging_debug("No more neighbors to add") break node_to_add = max(imp_neigs) # Check again that this is the max #ADD ALL EDGES OF NEW NODE TO ORIG GRAPH with open(folNm+"/"+node_to_add,'rb') as f: its_neig_list = pickle_load(f) orig_nodes = g1.nodes() for node in orig_nodes: if node in its_neig_list: wt = its_neig_list[node] wt_edge = wt['weight'] g1.add_edge(node_to_add,node,weight=wt_edge) if len(g1) > max_nodes: logging_debug("Max size exceeded") break score_prev = score_curr (score_curr,comp_bool) = get_score(g1,model,scaler,inputs['model_type']) if comp_bool == 0: logging_debug("Complex found") # Remove the node last added g1.remove_node(node_to_add) score_curr = score_prev break with open(folNm_out+"/"+seed_node,'wb') as f: pickle_dump((list(g1.nodes()),score_curr),f)
def met(g1,model,scaler,inputs,score_prev): # Assigning original graph to temporary variable for edge in g1.edges(): (node1,node2) = edge max_nodes = inputs["max_size"] - len(g1) num_iter = 1 last_iter_imp = 0 thres_neig = inputs["thres_neig"] prob_metropolis = inputs["prob_metropolis"] rem_nodes = [] folNm = inputs['folNm'] while num_iter < max_nodes: # Limiting number of iteration rounds logging_debug("Adding next node") imp_neigs = dict() g1_nodes = g1.nodes() for node in g1_nodes: # get its max neighbor and weight and store in dict with open(folNm+"/"+node,'rb') as f: neig_list = pickle_load(f) # Remove neighbors already in graph - one small computation to save memory neig_fin = set(neig_list) - set(list(g1_nodes)+rem_nodes) neig_list = dict([neig for neig in neig_list.items() if neig[0] in neig_fin]) # Don't check all neighbors - just a subset if number of neighbors is large if len(neig_list) > thres_neig: # Make 500 neig_list = dict(random_sample(neig_list.items(),thres_neig)) if not neig_list: # Checking if empty break imp_neig,max_score = find_max_neig(neig_list,g1,inputs['perc'],model,scaler,inputs) wt = neig_list[imp_neig] wt_edge = wt['weight'] imp_neigs[imp_neig] = {'weight': wt_edge, 'compScore' : max_score} if not imp_neigs: logging_debug("No more neighbors to add") break node_to_add = find_imp_neig(imp_neigs,inputs['explore_prob']) #ADD ALL EDGES OF NEW NODE TO ORIG GRAPH with open(folNm+"/"+node_to_add,'rb') as f: its_neig_list = pickle_load(f) orig_nodes = g1.nodes() all_nodesWedges = set(orig_nodes).intersection(its_neig_list) for node in all_nodesWedges: wt = its_neig_list[node] wt_edge = wt['weight'] g1.add_edge(node_to_add,node,weight=wt_edge) (score_curr,comp_bool) = get_score(g1,model,scaler,inputs['model_type']) if comp_bool == 0: logging_debug("Complex found") # Remove the node last added g1.remove_node(node_to_add) break cur_trial = rand_uniform(low=0.0,high=1.0) if score_curr < score_prev: if cur_trial > prob_metropolis: # Remove the node last added g1.remove_node(node_to_add) rem_nodes.append(node_to_add) # since edges from this node to complex have been removed from tempG it will not be revisited else: logging_debug("Accepting with low probability") elif score_curr > score_prev: last_iter_imp = num_iter if (num_iter - last_iter_imp)> 10: # Has been a long time since a score improvement logging_debug("Long time since score imporovement") break score_prev = score_curr num_iter += 1 #print(g1.nodes()) #print(g1.edges()) return (g1.nodes(),score_prev)
def load_remote_db(self): """ Load the database from the S3 storage bucket into the current AWS Lambda instance. """ signature_version = self.settings_dict.get("SIGNATURE_VERSION", "s3v4") s3 = boto3.resource( 's3', config=botocore.client.Config(signature_version=signature_version), ) if '/tmp/' not in self.settings_dict['NAME']: try: etag = '' if path.isfile('/tmp/' + self.settings_dict['NAME']): m = md5() with open('/tmp/' + self.settings_dict['NAME'], 'rb') as f: m.update(f.read()) # In general the ETag is the md5 of the file, in some cases it's # not, and in that case we will just need to reload the file, # I don't see any other way etag = m.hexdigest() obj = s3.Object( self.settings_dict['BUCKET'], self.settings_dict['NAME'], ) obj_bytes = obj.get( IfNoneMatch=etag, )["Body"] # Will throw E on 304 or 404 with open('/tmp/' + self.settings_dict['NAME'], 'wb') as f: f.write(obj_bytes.read()) m = md5() with open('/tmp/' + self.settings_dict['NAME'], 'rb') as f: m.update(f.read()) self.db_hash = m.hexdigest() except botocore.exceptions.ClientError as e: if e.response['Error']['Code'] == "304": logging_debug( "ETag matches md5 of local copy, using local copy of DB!", ) self.db_hash = etag else: logging_debug("Couldn't load remote DB object.") except Exception as e: # Weird one logging_debug(e) # SQLite DatabaseWrapper will treat our tmp as normal now # Check because Django likes to call this function a lot more than it should if '/tmp/' not in self.settings_dict['NAME']: self.settings_dict['REMOTE_NAME'] = self.settings_dict['NAME'] self.settings_dict['NAME'] = '/tmp/' + self.settings_dict['NAME'] # Make sure it exists if it doesn't yet if not path.isfile(self.settings_dict['NAME']): open(self.settings_dict['NAME'], 'a').close() logging_debug("Loaded remote DB!")