def test_create_small_molecule(self): """ Tests that we can create a small molecule entity. """ facilityId = "HMSL10001" sm = SmallMolecule(facility_id=facilityId) sm.save() print "New ID: %d" % sm.id self.assertTrue(sm.id is not None, "No ID was assigned") sm2 = SmallMolecule.objects.get(pk=sm.id) self.assertTrue(sm2 is not None, "Couldn't find the object") self.assertTrue(sm2.facility_id == facilityId) print "Created the Small Molecule: ", sm
def test_create_multiple_small_molecules(self): """ Tests that we can create a few small molecule objects in the db, and select a subset """ facilityIdBase = "HMSL" for x in range(100) : sm = SmallMolecule(facility_id=facilityIdBase + str(10000+x),pub_date=timezone.now()) sm.save() self.assertEqual(100, SmallMolecule.objects.all().count(), "Should be 100 items, but was %d" % SmallMolecule.objects.all().count()) # read back a small sample resultSet = SmallMolecule.objects.filter(id__lt=50) self.assertEquals(resultSet.count(), 49, "Actual size of subqery return is %d" % resultSet.count()) for y in resultSet : print "Small Molecule: id: %d, facilityId: %s, pubDate: %s" % ( y.id, y.facility_id, y.pub_date )
def test_create_small_molecule(self): """ Tests that we can create a small molecule entity. """ facilityId = "HMSL10001" pubDate = timezone.now() sm = SmallMolecule(facility_id=facilityId,pub_date=pubDate) sm.save() print "New ID: %d" % sm.id self.assertTrue(sm.id is not None, "No ID was assigned") id = sm.id sm2 = SmallMolecule.objects.get(pk=id) self.assertTrue(sm is not None, "Couldn't find the object") self.assertTrue(sm.facility_id == facilityId) self.assertTrue(sm.pub_date == pubDate) print "Created the Small Molecule: id: %d, facilityId: %s, pubDate: %s" % ( sm.id, sm.facility_id, sm.pub_date )
def build_schema(self): schema = super(SmallMoleculeResource,self).build_schema() schema['fields'] = get_detail_schema(SmallMolecule(),['smallmolecule']) return schema
def main(path): """ Read in the sdf file """ # map field labels to model fields properties = ('model_field','required','default','converter') get_primary_name = lambda x: x.split(';')[0].strip() get_alternate_names = lambda x: ';'.join([x.strip() for x in x.split(';')[1:]]) labels = { s2p.MOLDATAKEY:('molfile',True), # NOTE: even though these db field are not integers, # it is convenient to convert the read in values to INT to make sure they are not interpreted as float values 'facility_reagent_id': ('facility_id',True,None, lambda x: util.convertdata(x[x.index('HMSL')+4:],int)), 'salt_id': ('salt_id',True,None, lambda x: util.convertdata(x,int)), 'lincs_id':('lincs_id',False), #None,lambda x:util.convertdata(x,int)), 'chemical_name':('name',True), 'alternative_names':'alternative_names', 'pubchem_cid':'pubchem_cid', 'chembl_id':'chembl_id', 'chebi_id':'chebi_id', 'inchi':'_inchi', 'inchi_key':'_inchi_key', 'smiles': ('_smiles',True), 'molecular_mass':('_molecular_mass',False,None, lambda x: round(util.convertdata(x, float),2)), 'molecular_formula':'_molecular_formula', 'software':'software', # 'concentration':'concentration', #'well_type':('well_type',False,'experimental'), 'is_restricted':('is_restricted',False,False,util.bool_converter)} # convert the labels to fleshed out dict's, with strategies for optional, default and converter labels = util.fill_in_column_definitions(properties,labels) assert typecheck.isstring(path) with open(path) as fh: data = fh.read().decode(DEFAULT_ENCODING) records = s2p.parse_sdf(data) logger.info(str(('read rows: ', len(records)))) count = 0 for record in records: logger.debug(str(('record', record))) initializer = {} for key,properties in labels.items(): logger.debug(str(('look for key: ', key, ', properties: ', properties))) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] value = record.get(key) # Todo, refactor to a method try: logger.debug(str(('raw value', value))) if(converter != None): value = converter(value) if(value == None ): if( default != None ): value = default if(value == 'n/a'): value = None if(value == None and required == True): raise Exception(str(('Field is required: ', key, initializer, 'record:', count))) logger.debug(str(('model_field: ' , model_field, ', value: ', value))) initializer[model_field] = value except Exception, e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error(str((exc_type, fname, exc_tb.tb_lineno))) logger.error(str(('invalid input', e, 'count', count))) raise e # follows is a kludge, to split up the entered "chemical_name" field, on ';' - TODO: just have two fields that get entered if(initializer['name']): initializer['alternative_names']=get_alternate_names(initializer['name']) initializer['name']=get_primary_name(initializer['name']) if(logger.isEnabledFor(logging.DEBUG)): logger.debug(str(('initializer: ', initializer))) try: sm = SmallMolecule(**initializer) sm.save() logger.info(str(('sm created:', sm))) count += 1 except Exception, e: logger.error(str(('save failed for: ', initializer, 'error',e, 'count: ', count))) raise e
filename = path.basename(inputFile) attachedFile = AttachedFile(filename=filename, facility_id_for=facilityId, relative_path=relativePath, is_restricted=args.isRestricted) # lookup the Entity if (facilityId <= 30000): # SM or Screen logger.info('look for the small molecule for:' + str(facilityId)) saltId = util.int_converter(args.saltId) if (saltId is not None): logger.info('look for the small molecule for saltId ' + str(saltId)) try: sm = SmallMolecule(facility_id=facilityId, salt_id=saltId) attachedFile.salt_id_for = saltId batchId = util.int_converter(args.batchId) if (batchId is not None): logger.info('look for the batch Id: ' + str(batchId)) attachedFile.batch_id_for = batchId try: smb = SmallMoleculeBatch(smallmolecule=sm, facility_batch_id=batchId) except ObjectDoesNotExist, e: logger.error( str(('No such SmallMoleculeBatch found', facilityId, saltId, batchId, e))) raise e except ObjectDoesNotExist, e: logger.error(
def main(path): properties = ('model_field','required','default','converter') get_primary_name = lambda x: x.split(';')[0].strip() get_alternate_names = ( lambda x: '; '.join([x.strip() for x in x.split(';')[1:]])) labels = { s2p.MOLDATAKEY:('molfile',True), 'facility_reagent_id': ( 'facility_id',True,None, lambda x: util.convertdata(x[x.index('HMSL')+4:],int)), 'salt_id': ('salt_id',True,None, lambda x: util.convertdata(x,int)), 'lincs_id':('lincs_id',False), 'chemical_name':('name',True), 'alternative_names':'alternative_names', 'pubchem_cid':'pubchem_cid', 'chembl_id':'chembl_id', 'chebi_id':'chebi_id', 'inchi':'_inchi', 'inchi_key':'_inchi_key', 'smiles': ('_smiles',False), 'molecular_mass':( '_molecular_mass',False,None, lambda x: round(util.convertdata(x, float),2)), 'relevant_citations': '_relevant_citations', 'molecular_formula':'_molecular_formula', 'software':'software', 'date_data_received':('date_data_received',False,None, util.date_converter), 'date_loaded': ('date_loaded',False,None,util.date_converter), 'date_publicly_available': ('date_publicly_available',False,None, util.date_converter), 'date_updated': ('date_updated',False,None,util.date_converter), 'is_restricted':('is_restricted',False,False,util.bool_converter) } labels = util.fill_in_column_definitions(properties,labels) assert typecheck.isstring(path) with open(path) as fh: data = fh.read().decode(DEFAULT_ENCODING) records = s2p.parse_sdf(data) logger.info('rows read: %d ', len(records)) count = 0 for record in records: initializer = {} for key,properties in labels.items(): required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] value = record.get(key) try: if(converter != None): value = converter(value) if(value == None ): if( default != None ): value = default if(value == 'n/a'): value = None if(value == None and required == True): raise Exception( 'Field is required: %r, values: %r, row: %d' % (key,initializer,count)) initializer[model_field] = value except Exception, e: logger.exception('invalid input, row: %d', count) raise e # follows is a kludge, to split up the entered "chemical_name" field, # on ';' - TODO: just have two fields that get entered if(initializer['name']): initializer['alternative_names']=get_alternate_names(initializer['name']) initializer['name']=get_primary_name(initializer['name']) try: sm = SmallMolecule(**initializer) sm.save() count += 1 # create a default batch - 0 SmallMoleculeBatch.objects.create(reagent=sm,batch_id=0) except Exception: logger.exception('save failed for: %r, row: %d', initializer, count) raise