def get_model_filter_worms(p_dict): which_model = p_dict['nn_filter_to_use'] if which_model != 'custom': if p_dict['path_to_custom_pytorch_model'] != '': warnings.warm('A path to a custom model wass provided, ' + f'but "nn_filter_to_use" was set to {which_model}.' + ' The custom path will be ignored.') if which_model == 'tensorflow_default': model_filter_worms = os.path.join(AUX_FILES_DIR, 'model_isworm_20170407_184845.h5') elif which_model == 'pytorch_default': model_filter_worms = os.path.join(AUX_FILES_DIR, 'model_state_isworm_20200615.pth') elif which_model == 'custom': model_filter_worms = p_dict['path_to_custom_pytorch_model'] if model_filter_worms == '': warnings.warn('The path to the custom pytorch model to filter ' + 'spurious particles was not given. ' + 'This step will not be done.') elif which_model == 'none': model_filter_worms = '' else: raise Exception('Invalid option for model_filter_worms') if model_filter_worms != '': if not os.path.exists(model_filter_worms): warnings.warn('The selected model file to filter ' + 'spurious particles was not found. ' + 'This step will not be done.') model_filter_worms = '' return model_filter_worms
def increase_ionic_strength(inwhichfilename, nions, outfilename, topfilename): # <nions> number of both NA and CL ions are added to increase ionic concentration if nions == 0: warnings.warm('Zero ions are asked to add. Doing nothing.') return add_ions(inwhichfilename, 'NA', nions, outfilename, topfilename) add_ions(inwhichfilename, 'CL', nions, outfilename, topfilename)
def verify(self): '''load the saved module and verify the data This tries several ways of comparing the saved and the attached data, but might not work for all possible data structures. Returns ------- all_correct : bool true if no differences are found, for floating point numbers rtol=1e-16, atol=1e-16 is used to determine equality (allclose) correctli : list list of attribute names that compare as equal incorrectli : list list of attribute names that did not compare as equal, either because they differ or because the comparison does not handle the data structure correctly ''' module = __import__(self._filename.replace('.py', '')) if not self._useinstance: raise NotImplementedError('currently only implemented when' 'useinstance is true') data = getattr(module, self.name) correctli = [] incorrectli = [] for d in self._what: self_item = getattr(data, d) saved_item = getattr(data, d) #print d, #try simple equality correct = np.all(self.item == saved_item) #try allclose if not correct and not self.item.dtype == np.dtype('object'): correct = np.allclose(self_item, saved_item, rtol=1e-16, atol=1e-16) if not correct: import warnings warnings.warm("inexact precision in " + d) #try iterating, if object array if not correct: correlem = [ np.all(data[d].item()[k] == getattr( testsave.var_results, d).item()[k]) for k in data[d].item().keys() ] if not correlem: #print d, "wrong" incorrectli.append(d) correctli.append(d) return len(incorrectli) == 0, correctli, incorrectli
def verify(self): '''load the saved module and verify the data This tries several ways of comparing the saved and the attached data, but might not work for all possible data structures. Returns ------- all_correct : bool true if no differences are found, for floating point numbers rtol=1e-16, atol=1e-16 is used to determine equality (allclose) correctli : list list of attribute names that compare as equal incorrectli : list list of attribute names that did not compare as equal, either because they differ or because the comparison does not handle the data structure correctly ''' module = __import__(self._filename.replace('.py', '')) if not self._useinstance: raise NotImplementedError('currently only implemented when' 'useinstance is true') data = getattr(module, self.name) correctli = [] incorrectli = [] for d in self._what: self_item = getattr(data, d) saved_item = getattr(data, d) #print d, #try simple equality correct = np.all(self.item == saved_item) #try allclose if not correct and not self.item.dtype == np.dtype('object'): correct = np.allclose( self_item, saved_item, rtol=1e-16, atol=1e-16) if not correct: import warnings warnings.warm("inexact precision in " + d) #try iterating, if object array if not correct: correlem = [ np.all(data[d].item()[k] == getattr( testsave.var_results, d).item()[k]) for k in list(data[d].item().keys()) ] if not correlem: #print d, "wrong" incorrectli.append(d) correctli.append(d) return len(incorrectli) == 0, correctli, incorrectli
def scale_data(self): ''' ImageJ results are in pixels, apply unit conversion from metadata''' if not self.params['scaled']: self.image_data['area'] = self.image_data['area'] \ * self.pixel_size ** 2 for measurement in ['x','y','perimeter','bx','by','width','height', 'major_axis','minor_axis','xstart','ystart', 'radius','diameter']: self.image_data[measurement] = self.image_data[measurement] * \ self.pixel_size self.params['scaled'] = True else: warnings.warm('Image data has already been scaled, scale not applied', RuntimeWarning, stacklevel=2)
def k_nearest_neighbors(data, predict, k=3): if len(data) >= k: warnings.warm('K is set to a value less than total voting groups') distances = [] for group in data: for features in data[group]: #alternate version (harder to comprehend) provided by numpy to calculte euclidean distance euclidean_distance = np.linalg.norm( np.array(features) - np.array(predict)) distances.append([euclidean_distance, group]) votes = [i[1] for i in sorted(distances)[:k]] vote_result = Counter(votes).most_common(1)[0][0] confidence = Counter(votes).most_common(1)[0][1] / k return vote_result, confidence
def k_nearest_neighbors(data, predict, k=3): #return warning if k is bigger than data set given if len(data) >= k: warnings.warm('u dumb') #finds euclidean distance of all points and adds them to new list distances = [] for group in data: for features in data[group]: euclidean_distance = np.linalg.norm( np.array(features) - np.array(predict)) distances.append([euclidean_distance, group]) #counts number of votes for each dataset in dict votes = [i[1] for i in sorted(distances)[:k]] print(Counter(votes).most_common(1)) vote_result = Counter(votes).most_common(1)[0][0] return vote_result
def add_ions(inwhichfilename, ionname, nions, outfilename, topfilename): """ Replaces <nions> number of W in <inwhichfilename> with ion <ionname>. Updates the topfile to correct number of W and ions ionname: Currently only accepts NA or CL This method i used by neutralize_system and increase_ionic_strength """ if nions==0: warnings.warm('Zero %s ions are asked to add. Doing nothing.'%ionname) return if 'NA' in ionname.upper(): ionname = 'NA' ion = 'NA+' elif 'CL' in ionname.upper(): ionname = 'CL' ion = 'CL-' else: raise ValueError('%s not recognized'%ionname) # include martini_ionitp in topfilename if not already included os.system('cp %s/%s ./'%(this_path,martini_ionitp)) with open(topfilename, 'r') as f: data = f.read() if not '#include "%s"'%martini_ionitp in data: with open(topfilename, 'r') as f: data = '' for line in f: if '#include "%s"'%martini_itp in line: data+=line+'#include "%s"\n'%martini_ionitp else: data+=line with open(topfilename, 'w') as f: f.write(data) # replace last <nions> W atoms with <ionname> with open(inwhichfilename, 'r') as f: lines = f.readlines() added_ions = 0 lines_reversed = lines[::-1] for i,line in enumerate(lines_reversed): if added_ions == nions: break if 'W ' in line: lines_reversed[i] = lines_reversed[i].replace(' '*(len(ionname)-1)+'W', ionname) lines_reversed[i] = lines_reversed[i].replace('W'+' '*(len(ion)-1), ion) added_ions += 1 lines = lines_reversed[::-1] # write outfile with open(outfilename, 'w') as f: f.write(''.join(lines)) # recalculate nions and W in the new file # recalculate W atoms added net_W = _actual_atoms_added(outfilename, 'W ') net_nions = _actual_atoms_added(outfilename, ion) # Append/Update number of ions in .top file added=False with open(topfilename, 'r') as f: data = '' for line in f: if len(line.split())!=0 and line.split()[0]==ion: data+=ion+ ' '*(9-len(ion)) + '%s\n'%(net_nions) added=True elif len(line.split())!=0 and line.split()[0]=='W': data+='W'+ ' '*(9-len('W')) + '%s\n'%(net_W) else: data+=line if not added: data += '%s'%ion + ' '*(9-len(ion)) + '%s\n'%net_nions with open(topfilename, 'w') as f: f.write(data)
def add_ions(inwhichfilename, nions, vdwradius, outfilename, topfilename): # <nions> number of NA and CL ions are added to increase ionic concentration if nions == 0: warnings.warm('Zero ions are asked to add. Doing nothing.') return os.system('cp %s/%s ./' % (this_path, martini_ionitp)) # add extra ions os.system('gmx insert-molecules \ -f %s \ -nmol %s \ -ci %s/ion_NA.pdb \ -radius %s \ -o %s' % (inwhichfilename, nions, this_path, vdwradius, outfilename)) os.system('gmx insert-molecules \ -f %s \ -nmol %s \ -ci %s/ion_CL.pdb \ -radius %s \ -o %s' % (inwhichfilename, nions, this_path, vdwradius, outfilename)) ## update topfilename # include martini_ionitp in topfilename if not already included with open(topfilename, 'r') as f: data = f.read() if not '#include "%s"' % martini_ionitp in data: with open(topfilename, 'r') as f: data = '' for line in f: if '#include "%s"' % martini_itp in line: data += line + '#include "%s"\n' % martini_ionitp else: data += line with open(topfilename, 'w') as f: f.write(data) # Append/Update number of ions in .top file # adds the ion amount already present ion = 'NA+' added = False with open(topfilename, 'r') as f: data = '' for line in f: if len(line.split()) != 0 and line.split()[0] == ion: data += ion + ' ' * (9 - len(ion)) + '%s\n' % ( nions + int(line.split()[1])) added = True else: data += line if not added: data += '%s' % ion + ' ' * (9 - len(ion)) + '%s\n' % nions with open(topfilename, 'w') as f: f.write(data) ion = 'CL-' added = False with open(topfilename, 'r') as f: data = '' for line in f: if len(line.split()) != 0 and line.split()[0] == ion: data += ion + ' ' * (9 - len(ion)) + '%s\n' % ( nions + int(line.split()[1])) added = True else: data += line if not added: data += '%s' % ion + ' ' * (9 - len(ion)) + '%s\n' % nions with open(topfilename, 'w') as f: f.write(data)
def probabilistic_returns( scientist, with_funding, *lock): # optimistic disregarding marginal effort available # Array: keeping track of all the returns of investing in each available ideas slope_ideas, effort_ideas, score_ideas, exp_val = [], [], [], [] p_slope, p_effort, z_slope, z_effort, data, slopes, prob_slope, bins = \ None, None, None, None, None, None, None, None # Loops over all the ideas the scientist is allowed to invest in # condition checks ideas where scientist.avail_ideas is TRUE for idea in np.where(scientist.avail_ideas)[0]: effort = int(scientist.total_effort_start[idea]) effort_ideas.append(effort) # scientist invested one unit of effort into perceived returns matrix for idea slope = get_returns(idea, scientist.perceived_returns_matrix, effort, effort + 1) # CALCULATE ACTUALLY SLOPE AS WELL! slope_ideas.append(slope) del effort, slope if config.switch == 0: # percentiles p_slope = [ stats.percentileofscore(slope_ideas, slope_ideas[i]) / 100 for i in range(len(slope_ideas)) ] p_effort = [ stats.percentileofscore(effort_ideas, effort_ideas[i]) / 100 for i in range(len(effort_ideas)) ] elif config.switch == 1: # z score z_slope = stats.zscore(slope_ideas) # catches divide by 0 error in beginning step TP 2 where all effort will be 0 --> std Dev is 0 z_effort = [0] * len( effort_ideas ) if scientist.model.schedule.time == 2 else stats.zscore(effort_ideas) elif config.switch == 2: # bayesian unpack_model_lists(scientist.model, lock[0]) slopes = [ scientist.model.final_slope[i][scientist.unique_id - 1] for i in range(0, 2) ] if config.use_equal: exp_val = [ sum(scientist.model.exp_bayes[idea]) / len(scientist.model.exp_bayes[idea]) for idea in range(len(scientist.model.exp_bayes)) ] exp_val = np.log(exp_val) else: exp_val = [ sum(scientist.model.exp_bayes[scientist.unique_id - 1][idea]) / len(scientist.model.exp_bayes[scientist.unique_id - 1][idea]) for idea in range( len(scientist.model.exp_bayes[scientist.unique_id - 1])) ] # 0.5 shifts range from 0-1 to 0.5-1.5 so even if scientist is not oldest he does not despair exp_val = [0.5 + get_bayesian_formula([a, 1 - a]) for a in exp_val] store_model_lists(scientist.model, False, lock[0]) # 0 = 'm > M', 1 = 'm <= M' data = np.asarray([np.asarray([None, None])] * len(slope_ideas)) prob_slope = [np.asarray([]), np.asarray([])] bins = [np.asarray([]), np.asarray([])] for i in range(0, 2): # scientist has never invested, so he has no data for bayesian update if len(slopes[i]) == 0: for idea in range(len(slope_ideas)): data[idea][i] = 0.5 del idea else: # prob_slope is probability of such a slope, bins are interval prob_slope[i], bins[i] = np.histogram(slopes[i], bins=len(slopes[i]), density=True) prob_slope[i] /= sum( prob_slope[i]) # ensures max probability is 1 # for all zero elements take average of adjacent elements for idx, val in enumerate(prob_slope[i]): # idx here should never be 0 or last value since those intervals cover min/max # temp fix for above: use try statements and treat outside as 0 (NOTE: problem is still unresolved) if val == 0: try: left = prob_slope[i][idx - 1] # IndexError should not happen if program runs smoothly except Exception as e: left = 0 w.warn('check prob_slope in optimize.py') try: right = prob_slope[i][idx + 1] # IndexError should not happen if program runs smoothly except Exception as e: right = 0 w.warm('check prob_slope in optimize.py') prob_slope[i][idx] = (left + right) / 2 bins[i][0] = -100000 # so least value is included in last bin bins[i][ -1] = 100000 # so greatest value is included in last bin data[np.arange(len(slope_ideas)), i] = prob_slope[i][np.digitize(slope_ideas, bins[i]) - 1] p_score, z_score, bayes_score = 0, 0, 0 for idea in range(len(slope_ideas)): # penalize low slope, high effort (high score is better idea to invest) if config.switch == 0: p_score = p_slope[idea] * ( 1 - p_effort[idea] ) * scientist.model.f_mult[idea] # idea indices are equal? elif config.switch == 1: z_score = z_slope[idea] - z_effort[idea] * scientist.model.f_mult[ idea] # idea indices should be the same? elif config.switch == 2: power_scale = 15 # flawed because only calculates probability you will get greater returns given current slope, not the # best returns? --> implemented additional multipliers to account for it # NOTE: still need to balance out factors so that not one is dominant # NOTE: add possible slides? bayes_score = (get_bayesian_formula(data[idea]) ** power_scale) * \ (slope_ideas[idea] / (exp_val[idea] ** power_scale)) * \ scientist.model.f_mult[idea] # idea indices should be the same? score_ideas.append([p_score, z_score, bayes_score][config.switch]) del p_score, z_score, bayes_score # Scalar: finds the maximum return over all the available ideas max_return = max(score_ideas) # Array: finds the index of the maximum return over all the available ideas idx_max_return = np.where(np.asarray(score_ideas) == max_return)[0] # choosing random value out of all possible values (starts at index 2+10 = 12) random.seed( config.seed_array[scientist.unique_id][scientist.model.schedule.time + 10]) idea_choice = idx_max_return[random.randint(0, len(idx_max_return) - 1)] del idx_max_return, slope_ideas, effort_ideas, z_slope, z_effort, score_ideas, p_slope, p_effort, slopes, \ prob_slope, bins, exp_val return process_idea(scientist, with_funding, idea_choice, None, lock)