Beispiel #1
0
    def _init(this, source, target, encoding=DEFAULT_ENCODING):
        super(populate_from_sdf, this)._init(target)

        assert tc.isstring(source)
        with open(source) as fh:
            data = fh.read().decode(encoding)

        this._sdfrecords = s2p.parse_sdf(data)
Beispiel #2
0
    def __init__(self, data=None, types=None,
                 fielddelimiter=FIELDDELIMITER,
                 recorddelimiter=RECORDDELIMITER,
                 prefix=PREFIX, suffix=SUFFIX,
                 prologue=PROLOGUE, epilogue=EPILOGUE):

        self._format = _subdict(locals(), self._FORMATTING_ATTRS)
        self.__dict__.update(self._format)
        self._rows = rows = self._makerows(data,
                                           (types if types is None else
                                            tuple([None if (t is None or isstring(t()))
                                                   else t for t in types])))
        _h = len(rows)
        self._width = _w = max(len(r) for r in rows) if _h > 0 else 0
Beispiel #3
0
    def __init__(self,
                 data=None,
                 types=None,
                 fielddelimiter=FIELDDELIMITER,
                 recorddelimiter=RECORDDELIMITER,
                 prefix=PREFIX,
                 suffix=SUFFIX,
                 prologue=PROLOGUE,
                 epilogue=EPILOGUE):

        self._format = _subdict(locals(), self._FORMATTING_ATTRS)
        self.__dict__.update(self._format)
        self._rows = rows = self._makerows(
            data, (types if types is None else tuple(
                [None if (t is None or isstring(t())) else t for t in types])))
        _h = len(rows)
        self._width = _w = max(len(r) for r in rows) if _h > 0 else 0
Beispiel #4
0
    def __init__(self, data,
                 fielddelimiter=FIELDDELIMITER,
                 recorddelimiter=RECORDDELIMITER,
                 prefix=PREFIX, suffix=SUFFIX,
                 prologue=PROLOGUE, epilogue=EPILOGUE,
                 keep_empty=False):

        self.__dict__.update(_subdict(locals(), Worksheet._FORMATTING_ATTRS))

        assert isstring(data)
        self._path = _path = data

        _all = xl.open_workbook(_path).sheets()
        wss = _all if keep_empty else [sh for sh in _all if sh.nrows > 0]

        self._sheets = sheets = tuple([Worksheet(s, parent=self) for s in wss])
        names = [sh.name for sh in sheets]
        self._label_2_index = dict((v, i) for i, v in enumerate(names))
Beispiel #5
0
    def __init__(self,
                 data,
                 fielddelimiter=FIELDDELIMITER,
                 recorddelimiter=RECORDDELIMITER,
                 prefix=PREFIX,
                 suffix=SUFFIX,
                 prologue=PROLOGUE,
                 epilogue=EPILOGUE,
                 keep_empty=False):

        self.__dict__.update(_subdict(locals(), Worksheet._FORMATTING_ATTRS))

        assert isstring(data)
        self._path = _path = data

        _all = xl.open_workbook(_path).sheets()
        wss = _all if keep_empty else [sh for sh in _all if sh.nrows > 0]

        self._sheets = sheets = tuple([Worksheet(s, parent=self) for s in wss])
        names = [sh.name for sh in sheets]
        self._label_2_index = dict((v, i) for i, v in enumerate(names))
def main(path):
    """
    Read in the sdf file
    """
    # map field labels to model fields
    properties = ('model_field','required','default','converter')
    get_primary_name = lambda x: x.split(';')[0].strip()
    get_alternate_names = lambda x: ';'.join([x.strip() for x in x.split(';')[1:]])
    
    labels = { s2p.MOLDATAKEY:('molfile',True),
              # NOTE: even though these db field are not integers, 
              # it is convenient to convert the read in values to INT to make sure they are not interpreted as float values
               'facility_reagent_id': ('facility_id',True,None, lambda x: util.convertdata(x[x.index('HMSL')+4:],int)), 
               'salt_id': ('salt_id',True,None, lambda x: util.convertdata(x,int)),
               'lincs_id':('lincs_id',False), #None,lambda x:util.convertdata(x,int)),
               'chemical_name':('name',True),
               'alternative_names':'alternative_names',
               'pubchem_cid':'pubchem_cid',
               'chembl_id':'chembl_id',
               'chebi_id':'chebi_id',
               'inchi':'_inchi',
               'inchi_key':'_inchi_key',
               'smiles': ('_smiles',True),
               'molecular_mass':('_molecular_mass',False,None, lambda x: round(util.convertdata(x, float),2)),
               'molecular_formula':'_molecular_formula',
               'software':'software',
               # 'concentration':'concentration',
               #'well_type':('well_type',False,'experimental'),
               'is_restricted':('is_restricted',False,False,util.bool_converter)}
    # convert the labels to fleshed out dict's, with strategies for optional, default and converter
    labels = util.fill_in_column_definitions(properties,labels)
    
    assert typecheck.isstring(path)
    with open(path) as fh:
        data = fh.read().decode(DEFAULT_ENCODING)

    records = s2p.parse_sdf(data)
    logger.info(str(('read rows: ', len(records))))
    
    count = 0
    for record in records:
        logger.debug(str(('record', record)))
        initializer = {}
        for key,properties in labels.items():
            logger.debug(str(('look for key: ', key, ', properties: ', properties)))
            required = properties['required']
            default = properties['default']
            converter = properties['converter']
            model_field = properties['model_field']
            
            value = record.get(key)

            # Todo, refactor to a method
            try:
                logger.debug(str(('raw value', value)))
                if(converter != None):
                    value = converter(value)
                if(value == None ):
                    if( default != None ):
                        value = default
                if(value == 'n/a'): value = None
                if(value == None and  required == True):
                    raise Exception(str(('Field is required: ', key, initializer, 'record:', count)))
                logger.debug(str(('model_field: ' , model_field, ', value: ', value)))
                initializer[model_field] = value
            except Exception, e:
                exc_type, exc_obj, exc_tb = sys.exc_info()
                fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]      
                logger.error(str((exc_type, fname, exc_tb.tb_lineno)))
                logger.error(str(('invalid input', e, 'count', count)))
                raise e
        # follows is a kludge, to split up the entered "chemical_name" field, on ';' - TODO: just have two fields that get entered
        if(initializer['name']):
            initializer['alternative_names']=get_alternate_names(initializer['name'])
            initializer['name']=get_primary_name(initializer['name'])
                
        if(logger.isEnabledFor(logging.DEBUG)): logger.debug(str(('initializer: ', initializer)))
        try:
            sm = SmallMolecule(**initializer)
            sm.save()
            logger.info(str(('sm created:', sm)))
            count += 1
        except Exception, e:
            logger.error(str(('save failed for: ', initializer, 'error',e, 'count: ', count)))
            raise e
Beispiel #7
0
def main(path):
 
    properties = ('model_field','required','default','converter')
    get_primary_name = lambda x: x.split(';')[0].strip()
    get_alternate_names = (
        lambda x: '; '.join([x.strip() for x in x.split(';')[1:]]))
    
    labels = { s2p.MOLDATAKEY:('molfile',True),
        'facility_reagent_id': (
            'facility_id',True,None, 
            lambda x: util.convertdata(x[x.index('HMSL')+4:],int)), 
        'salt_id': ('salt_id',True,None, lambda x: util.convertdata(x,int)),
        'lincs_id':('lincs_id',False), 
        'chemical_name':('name',True),
        'alternative_names':'alternative_names',
        'pubchem_cid':'pubchem_cid',
        'chembl_id':'chembl_id',
        'chebi_id':'chebi_id',
        'inchi':'_inchi',
        'inchi_key':'_inchi_key',
        'smiles': ('_smiles',False),
        'molecular_mass':(
            '_molecular_mass',False,None, 
            lambda x: round(util.convertdata(x, float),2)),
        'relevant_citations': '_relevant_citations',
        'molecular_formula':'_molecular_formula',
        'software':'software',
        'date_data_received':('date_data_received',False,None,
                              util.date_converter),
        'date_loaded': ('date_loaded',False,None,util.date_converter),
        'date_publicly_available': ('date_publicly_available',False,None,
                                    util.date_converter),
        'date_updated': ('date_updated',False,None,util.date_converter),
        'is_restricted':('is_restricted',False,False,util.bool_converter)
    }
    labels = util.fill_in_column_definitions(properties,labels)
    
    assert typecheck.isstring(path)
    with open(path) as fh:
        data = fh.read().decode(DEFAULT_ENCODING)

    records = s2p.parse_sdf(data)
    logger.info('rows read: %d ', len(records))
    
    count = 0
    for record in records:
        initializer = {}
        for key,properties in labels.items():
            required = properties['required']
            default = properties['default']
            converter = properties['converter']
            model_field = properties['model_field']
            
            value = record.get(key)

            try:
                if(converter != None):
                    value = converter(value)
                if(value == None ):
                    if( default != None ):
                        value = default
                if(value == 'n/a'): value = None
                if(value == None and  required == True):
                    raise Exception(
                        'Field is required: %r, values: %r, row: %d'
                        % (key,initializer,count))
                initializer[model_field] = value
            except Exception, e:
                logger.exception('invalid input, row: %d', count)
                raise e
        # follows is a kludge, to split up the entered "chemical_name" field, 
        # on ';' - TODO: just have two fields that get entered
        if(initializer['name']):
            initializer['alternative_names']=get_alternate_names(initializer['name'])
            initializer['name']=get_primary_name(initializer['name'])
                
        try:
            sm = SmallMolecule(**initializer)
            sm.save()
            count += 1
            
            # create a default batch - 0
            SmallMoleculeBatch.objects.create(reagent=sm,batch_id=0)
            
        except Exception:
            logger.exception('save failed for: %r, row: %d', initializer, count)
            raise