def find_solution_set(self, components, return_one=False, require_hit=False): print components from pcompile.helper import unserialize all_hits = {} filt = ['unknown', 'Unknown', 'unknown:micromolar', 'Unknown:micromolar', '?:x'] # For now, it will ignore any hit with a concentration or component concentration # of one of the above types. for comp in components: query = dict_to_str(strip_internal(comp)) classif = dict_to_str(strip_internal(comp))['classification'] sol_query = {'classification': classif} comp_query = {'components': {'$elemMatch': {'classification': classif}}} query = {'$or':[sol_query,comp_query]} hits = self.db.find(query) for hit in hits: unique_identifier = hit['_id'] if unique_identifier not in all_hits: if hit['concentration'] not in filt: f=0 #for hitcomp in hit['components']: # print hitcomp # if hitcomp['concentration'] in filt: # f=0 if f==0: hit_stripped = strip_internal(hit) all_hits[unique_identifier] = Solution(**hit_stripped) print all_hits[unique_identifier].name print all_hits[unique_identifier].components all_hits[unique_identifier].update_units() if require_hit and (len(all_hits.values()) == 0): raise Exception('Did not find DB hit for query with require_hit enabled.') if (return_one) and (len(all_hits.values())>=1): return all_hits.values()[0] else: return all_hits.values()
def compatible(self, solution): compatible = True assert isinstance(solution, Solution) self.build_component_index() for c in solution.components: if 'concentration' not in c: compatible = False # Has a matching component of unknown concentration. assert isinstance(c['concentration'], ureg.Quantity) key = serialize(strip_internal(c['classification'])) if key in self.comp_index: c_target_conc = self.components[ self.comp_index[key]]['concentration'] if c['concentration'].to_base_units( ).units != c_target_conc.to_base_units().units: compatible = False return compatible
def build_component_index(self): cindex = {} ct = 0 if len(self.components) > 0: for c in self.components: if isinstance(c, dict) and 'classification' in c: si = strip_internal(c['classification']) key = serialize(si) if key in cindex: raise Exception('warning, found duplicate components'+\ ' in solution object') cindex[key] = ct ct += 1 else: # If a component does not have a classification, then it is # not indexed. E.g. want to keep a placeholder for something # ambiguous like "stabilizers" but don't yet have a rigorous # classification or concentration for that. pass self.comp_index = cindex else: self.comp_index = {}
def intersection_dist(self, another_solution): self.build_component_index() another_solution.build_component_index() dmax = 0 dist = 0 for c in self.components: assert isinstance(c['concentration'], ureg.Quantity) key = serialize(strip_internal(c['classification'])) if (('_ignore_concentration' in c) and \ c['_ignore_concentration'] == False) or \ ('_ignore_concentration' not in c): if key in another_solution.comp_index: dmax += 1 ind = another_solution.comp_index[key] c1 = c['concentration'] c2 = another_solution.components[ind]['concentration'] assert c1.to_base_units().units == c2.to_base_units().units mn = min(c1, c2).to_base_units().magnitude mx = max(c1, c2).to_base_units().magnitude if mn == 0: dist += 1 elif mx > 0: dist += 1 - mn / mx return dist/dmax
def find_by_location(self, location, return_one=True, require_hit=True): ''' TODO: Deal with whether to have a return one flag or always return an array. To accept a solution as a query or to require a naked location. I prefer to be able to have as much of the logic that is repeated across different protocols handled by the underlying framework and thus favor find(solution, by="location") or find(solution, by="components") over location=sample['container']['location']; find_by_location(location)[0]. ''' print location hits = list(self.db.find({'container.location': location})) if len(hits) > 1: print 'Found more than one DB entry for location query.' print 'Theres probably something wrong with your database.' hit = hits[0] if require_hit and (len(hits) == 0): raise Exception('Did not find DB hit for query with require_hit enabled.') stripped = strip_internal(hit) s = Solution(**stripped) s.update_units() # Note - this will fail if the thing being passed in # has a dna concentration of unknown or string None... return s
def dist_self_to_target(self, target_solution, safety_first=True): '''using canberra distance for now''' #print "==========================" #print "== " + traceback.extract_stack()[-2][2] + ' / ' + traceback.extract_stack()[-1][2] #print "==========================" self.build_component_index() target_solution.build_component_index() #print target_solution.comp_index #print self.comp_index dist = 0 ignore_count = 0 for c in target_solution.components: assert isinstance(c['concentration'], ureg.Quantity) key = serialize(strip_internal(c['classification'])) if (('_ignore_concentration' in c) and \ c['_ignore_concentration'] == False) or \ ('_ignore_concentration' not in c): c1 = c['concentration'] if key not in self.comp_index: #print 'problem...' c2 = c1 * 0 else: ind = self.comp_index[key] c2 = self.components[ind]['concentration'] if safety_first: assert c1.to_base_units().units == c2.to_base_units().units v1 = c1.to_base_units().magnitude v2 = c2.to_base_units().magnitude denom = float(v1 + v2) if denom != 0: dist += abs(v1 - v2) / denom else: ignore_count += 1 #print dist / float(len(target_solution.components)) return dist / float(len(target_solution.components) - ignore_count)
def load_sample(self, db, sample): loc = sample['container']['location'] hits = list(db.db.find({'container.location': loc})) if len(hits) > 1: print 'Found more than one DB entry for location query.' print 'Theres probably something wrong with your database.' hit = hits[0] stripped = strip_internal(hit) s = Solution(**stripped) s.update_units() # Note - this will fail if the thing being passed in # has a dna concentration of unknown or string None... self.solutions.append(s)
def map_input(self, update, reference): # Map solution to the key 'name' in the *target* solution model hit = None for i,comp in enumerate(self.target_solution.components): if comp['_reference'] == reference: for key in strip_internal(update): self.target_solution.components[i][key] = update[key] hit = 1 assert hit is not None, 'Couldnt map input.'
def compatible(self, solution): compatible = True assert isinstance(solution, Solution) self.build_component_index() for c in solution.components: if 'concentration' not in c: compatible = False # Has a matching component of unknown concentration. assert isinstance(c['concentration'], ureg.Quantity) key = serialize(strip_internal(c['classification'])) if key in self.comp_index: c_target_conc = self.components[self.comp_index[key]]['concentration'] if c['concentration'].to_base_units().units != c_target_conc.to_base_units().units: compatible = False return compatible
def dist(self, another_solution, compare_volumes=True, safety_first=True): '''Compute the absolute molarity distance between two solutions, i.e. the sum of the absolute difference in the number of moles per unit volume present of each chemical. For components with '_ignore':True for both in a pair of matching components, the concentration difference for these will not be added to the total. This is to be used for diluents, such as water, in the case that the concentration of water is not relevant. ''' dist = 0 self.build_component_index() another_solution.build_component_index() dmax = len(self.comp_index) + len(another_solution.comp_index) for c in self.components: if ('classification' in c) and isinstance(c, dict): key = serialize(strip_internal(c['classification'])) if key in another_solution.comp_index: ind = another_solution.comp_index[key] if ('concentration' not in c) or ('concentration' not in another_solution.components[ind]): dist += 1 else: c1 = c['concentration'] c2 = another_solution.components[ind]['concentration'] if safety_first: assert c1.to_base_units().units == c2.to_base_units().units mn = min(c1, c2).to_base_units().magnitude mx = max(c1, c2).to_base_units().magnitude if mn == 0: dist += 1 elif mx > 0: dist += 1 - mn / mx # otherwise, they are both zero, add nothing to the dist else: dist += 1 else: dmax -= 1 for c in another_solution.components: if ('classification' in c) and isinstance(c, dict): key = serialize(strip_internal(c['classification'])) if key not in self.comp_index: if safety_first: assert isinstance(c['concentration'], ureg.Quantity) dist += 1 else: dmax -= 1 if dmax > 0: return dist/dmax else: # Not sure about this. There are two different uses, that of getting # a usable dist and that of testing whether two are equivalent. return dist
def add(self, solution, volume, hypothetical=False, safety_first=True): '''Add a specified volume of a solution to this solution.''' #print "==========================" #print "== " + traceback.extract_stack()[-2][2] + ' / ' + traceback.extract_stack()[-1][2] #print "==========================" # Any time solution add is called it should update the histories of # both solutions involved, adding a simple dict that specified what # operation was performed and some way that, given only one of the # solutions, you could reconstruct which chemials were added to the # solution, when. if safety_first: assert isinstance(volume, ureg.Quantity) assert isinstance(solution, Solution) vtot = self.volume + volume if vtot == 0.0 * ureg.microliter: return #print 'adding ' + str(volume) + ' from' #print solution.components #import ipdb; ipdb.set_trace() # Check that what would be the final volume will be less than the max # volume of the well containing the solution to which an addition # would be made. # Only relevant for soutions that are contained in some container. # Allows working with hypothetical solutions that don't have containers # as well. if not hypothetical: if self.container is not None and 'ctype' in self.container: assert vtot <= max_volume(self.container['ctype']) self.build_component_index() solution.build_component_index() for c in solution.components: if ('classification' in c) and ('concentration' in c): if safety_first: assert isinstance(c['concentration'], ureg.Quantity) key = serialize(strip_internal(c['classification'])) if key in self.comp_index: key_target = self.comp_index[key] c_target_conc = self.components[key_target]['concentration'] if safety_first: assert c['concentration'].to_base_units().units == c_target_conc.to_base_units().units, \ 'attempted to add two incompatible soutions, use solution.compatible(other_solution) to '+\ 'check compatibility before combining' #print 'add: found and adding' conc = volume/vtot * c['concentration'] + self.volume/vtot * c_target_conc self.components[key_target]['concentration'] = conc.to(c['concentration'].units) #print c['concentration'], conc else: #print 'add: wasnt in index, appending' #print 'appending:' #print c self.components.append(copy(c)) self.comp_index[key] = len(self.comp_index.keys()) - 1 #print 'last before update' #print self.components[-1] self.components[-1]['concentration'] = volume/vtot * c['concentration'] #print 'looking up just appended' #print self.components[-1] if safety_first: assert isinstance(c['concentration'], ureg.Quantity) for c in self.components: if 'classification' in c: key = serialize(strip_internal(c['classification'])) if key not in solution.comp_index: #print 'add: not found but adding' conc = self.components[self.comp_index[key]]['concentration'] self.components[self.comp_index[key]]['concentration'] = self.volume/vtot * conc self.volume = vtot if not hypothetical: solution.remove(volume) assert isinstance(self.volume, ureg.Quantity)