Esempio n. 1
0
def insert_compounds(
    filename,
    lib_name,
    namekey,
    idkey,
    lib_ver=0,
    ):
    """ Read SDF file, fetch each compound and process it.  
........All compounds will be inserted into a new library,
........If you are updating an existing library, 
........please use update_compounds()"""

    library = get_or_create_library(lib_name, lib_ver)

    f = open(filename)
    sdffile = u''
    count = 0
    for line in f.xreadlines():
        line = unicode(line, 'latin1')
        sdffile += line

        if line.startswith('$$$$'):
            moldata = parse_annotation(sdffile, namekey, idkey)
            insert_single_compound(moldata, sdffile, library, namekey,
                                   idkey)

            sdffile = u''
            count += 1
    root.warning('  -->  count:%s' % count)
    f.close()
Esempio n. 2
0
def gen_joelib_property(sdf):
    """run and parse the property output """

    # save the input in FS

    t = tempfile.NamedTemporaryFile(suffix='.sdf')
    t.write(codecs.encode(sdf, 'utf-8'))
    t.flush()

    # prepare the output file

    (f, out) = tempfile.mkstemp(suffix='.sdf')
    os.close(f)

    # convert

    cmd = \
        """JAVA_HOME=/opt/jre/ JOELIB2=/opt/JOELib2-alpha-20070303/ /opt/JOELib2-alpha-20070303/moleculeConversion.sh +d +h -iSDF -osdf "%s" "%s" > /dev/null""" \
        % (t.name, out)
    root.warning('  -->   running:%s' % cmd)
    if os.system(cmd) != 0:
        os.unlink(out)
        raise 'cannot run JOELib'

    # read and parse

    f = file(out)
    tags = get_sdf_tags(codecs.decode(f.read(), 'utf-8'))
    f.close()

    # clean

    os.unlink(out)
    return tags
Esempio n. 3
0
def parse_annotation(sdf, namekey, idkey):
    """ parse annotation from SDF file """

    # parse the sdf tags

    moldata = get_sdf_tags(sdf)
    moldata[idkey] = moldata[idkey].replace(' ', '-')

    # --- inchi

    inchiconv.SetInAndOutFormats('sdf', 'Inchi')
    mol = openbabel.OBMol()
    if '(noMol)' not in moldata[idkey]:
        res = inchiconv.ReadString(mol, codecs.encode(sdf, 'utf-8'))
        if mol.Empty():
            root.warning('  -->  ERROR on sdf')
            raise Exception

    # standard data generated
    # --- inchi/formula/weight

    moldata['inchi'] = inchiconv.WriteString(mol).strip()
    moldata['formula'] = mol.GetFormula()
    mol.AddHydrogens()
    moldata['weight'] = str(mol.GetMolWt())

    # if the name is not in sdf:

    if namekey == '__NA__':
        moldata[namekey] = ''

    # smiles

    inchiconv.SetInAndOutFormats('sdf', 'smi')
    mol = openbabel.OBMol()
    if '(noMol)' not in moldata[idkey]:
        res = inchiconv.ReadString(mol, codecs.encode(sdf, 'utf-8'))
        moldata['smiles'] = inchiconv.WriteString(mol).strip()
    else:
        moldata['smiles'] = ''

    # check if necessary keys are present: the name and key

    if not moldata.has_key(idkey):
        print 'compound data error: the idkey is not found'
        raise Exception
    if not moldata.has_key(namekey):

        # print "warning: no name key"

        moldata[namekey] = ''

    # do any necessary fixes for tags

    return moldata
Esempio n. 4
0
 def do_process(compounds, filep, tags):
     for cmp in compounds:
         sdf = SDFFile.objects.get(compound=cmp).sdffile
         print ' running compound:%s' % cmp.id
         try:
             propertydata = gen_joelib_property(sdf)
             _insert_property(cmp, propertydata, tags)
         except:
             root.warning('  -->  cannot run property on %s'
                          % cmp.id)
             raise
Esempio n. 5
0
def insert_single_compound(
    moldata,
    sdf,
    library,
    namekey,
    idkey,
    username='',
    ):
    """ insert single compound into database """

    cid = moldata[idkey]
    name = moldata[namekey]
    if '\n' in name:
        name = name.split('\n')[0]

    # compound

    c = Compound(
        cid=cid,
        name=name,
        formula=moldata['formula'],
        weight=moldata['weight'],
        inchi=moldata['inchi'],
        smiles=moldata['smiles'],
        username=username,
        )

                    # sdf_file=s)

    c.save()
    c.library.add(library)
    c_id = c.id
    root.warning('  -->new compound inserted: c_id=%s, cid=%s' % (c_id,
                 cid))

    # sdf file

    s = SDFFile(sdffile=sdf, compound=c)
    s.save()
    sdfid = s.id

    # annotation

    a_ids = []
    for (name, value) in moldata.items():
        if name in (namekey, idkey):
            continue
        a = Annotation(name=name, value=value, compound=c)
        a.save()
        aid = a.id
        a_ids.append(aid)

    return False
Esempio n. 6
0
def insert_plate_from_db(
    library_name,
    library_ver,
    format,
    plate_key,
    plate_base,
    well_key,
    row_key,
    col_key,
    ):
    """if no file about plate mapping info,
........use plate info from sdf in db
........'format': 24/48/96/384
........'library': library name/id
........'plate_key': required
........'plate_base': '__NA__'/required, e.g. 'LAT', to be stripped from plate
........'well_key': optional, if no, must be '__NA__'
........'row_key' 'col_key': if no well_key, these 2 keys are required,
........                     if has well_key, will be ignored
........e.g.:
........insert_plate_from_db('Latca', 96, 'PLATE', 'LAT', '__NA__','ROW', 'COL')"""

    library = get_library(library_name, library_ver)
    compounds = Compound.objects.filter(library=library)
    format = int(format)

    count = 0
    for cmp in compounds:
        plate = Annotation.objects.get(compound=cmp,
                name=plate_key).value
        if plate_base is not '__NA__':
            try:
                plate = int(plate.lstrip(plate_base))
            except:
                root.warning('  -->  plate:%s, base:%s' % (plate,
                             plate_base))
        if well_key is not '__NA__':
            well = Annotation.objects.get(compound=cmp,
                    name=well_key).value
        else:
            row = Annotation.objects.get(compound=cmp,
                    name=row_key).value
            col = Annotation.objects.get(compound=cmp,
                    name=col_key).value
            if len(col) == 1:
                col = '0%s' % col
            well = '%s%s' % (row, col)

        try:
            p = Plate(format=format, plate=int(plate), well=well,
                      compound=cmp)
            p.save()
            count += 1
            root.warning('  -->  count:%s, compound:%s' % (count, cmp))
        except:
            raise
            root.warning('  -->  count:%s, compound:%s' % (count, cmp))

    root.warning('  -->  total count:%s' % count)
    return
Esempio n. 7
0
def parse_annotation(sdf, namekeys):
    """ parse annotation from SDF file """

    # parse the sdf tags

    moldata = get_sdf_tags(sdf)

    # --- inchi

    inchiconv.SetInAndOutFormats('sdf', 'Inchi')
    mol = openbabel.OBMol()
    res = inchiconv.ReadString(mol, sdf)
    if mol.Empty():
        root.warning('  -->  ERROR on sdf')
        raise Exception

    # standard data generated
    # --- inchi/formula/weight

    moldata['inchi'] = inchiconv.WriteString(mol).strip()
    moldata['formula'] = mol.GetFormula()
    moldata['id'] = mol.GetTitle()
    if moldata['id'] == '':
        moldata['id'] = 'unspecified_' \
            + ''.join(random.sample(string.digits, 6))
    mol.AddHydrogens()
    moldata['weight'] = str(mol.GetMolWt())

    # Retrieve display name from SDF, or empty string if absent:
    for n in namekeys:
        if n in moldata:
            moldata['name'] = moldata[n]
            break
    if moldata.get('name') is None:
        moldata['name'] = ''

    # smiles

    inchiconv.SetInAndOutFormats('sdf', 'smi')
    mol = openbabel.OBMol()
    #res = inchiconv.ReadString(mol, codecs.encode(sdf, 'utf-8'))
    res = inchiconv.ReadString(mol, sdf)
    if mol.Empty():
        root.warning('  -->  ERROR on sdf')
        raise Exception
    moldata['smiles'] = inchiconv.WriteString(mol).strip()

    return moldata
Esempio n. 8
0
def fast_insert_plate_from_file(
    library_name,
    library_ver,
    platefile,
    format,
    ):
    """if we have a file containing plate mapping info,
........use plate info from the file.
........'library': library name/library id
........'platefile' format:
............1st column: compound id
............2nd column: plate
............3rd column: well
........'format': 24/48/96/384 """

    format = int(format)
    library = get_library(library_name, library_ver)

    fp = file(platefile)
    count = 0
    for i in fp.xreadlines():
        cid = ''
        plate = ''
        well = ''

        (cid, plate, well) = i.strip().split(',')[0:3]
        cid = str(cid)
        if cid == 'NA':
            continue
        plate = int(plate)
        well = str(well)

        # sys.stdout.write('\r  -->  cid:%s' % (cid, ))

        compound = Compound.objects.get(library=library, cid=cid)
        try:
            p = Plate(format=format, plate=int(plate), well=well,
                      compound=compound)
            p.save()
            count += 1
        except:
            print '  -->  count:%s, cid:%s' % (count, cid)
            raise
    root.warning('  -->  count:%s' % (count, ))
    fp.close()
    return
Esempio n. 9
0
def parse_annotation(sdf, namekey):
    """ parse annotation from SDF file """

    # parse the sdf tags

    moldata = get_sdf_tags(sdf)

    # --- inchi

    inchiconv.SetInAndOutFormats('sdf', 'Inchi')
    mol = openbabel.OBMol()
    res = inchiconv.ReadString(mol, sdf)
    if mol.Empty():
        root.warning('  -->  ERROR on sdf')
        raise Exception

    # standard data generated
    # --- inchi/formula/weight

    moldata['inchi'] = inchiconv.WriteString(mol).strip()
    moldata['formula'] = mol.GetFormula()
    moldata['id'] = mol.GetTitle()
    if moldata['id'] == '':
        moldata['id'] = 'unspecified_' \
            + ''.join(random.sample(string.digits, 6))
    mol.AddHydrogens()
    moldata['weight'] = str(mol.GetMolWt())

    # if the name is not in sdf:

    if namekey not in moldata:
        moldata[namekey] = ''

    # smiles

    inchiconv.SetInAndOutFormats('sdf', 'smi')
    mol = openbabel.OBMol()
    #res = inchiconv.ReadString(mol, codecs.encode(sdf, 'utf-8'))
    res = inchiconv.ReadString(mol, sdf)
    if mol.Empty():
        root.warning('  -->  ERROR on sdf')
        raise Exception
    moldata['smiles'] = inchiconv.WriteString(mol).strip()

    return moldata
Esempio n. 10
0
def insert_single_property_field(
    name,
    description,
    is_integer,
    source_tag,
    ):

    if is_integer:
        is_integer = True
    else:
        is_integer = False

    field = PropertyField(name=name, description=description,
                          is_integer=is_integer, source_tag=source_tag)
    field.save()
    root.warning('  -->  id=%s' % field.id)

    return False
Esempio n. 11
0
def parse_annotation(sdf, namekey):
    """ parse annotation from SDF file """

    # parse the sdf tags

    moldata = get_sdf_tags(sdf)

    # --- inchi

    inchiconv.SetInAndOutFormats('sdf', 'Inchi')
    mol = openbabel.OBMol()
    res = inchiconv.ReadString(mol, codecs.encode(sdf, 'utf-8'))
    if mol.Empty():
        root.warning('  -->  ERROR on sdf')
        raise Exception

    # standard data generated
    # --- inchi/formula/weight

    moldata['inchi'] = inchiconv.WriteString(mol).strip()
    moldata['formula'] = mol.GetFormula()
    moldata['id'] = mol.GetTitle()
    if moldata['id'] == '':
        moldata['id'] = 'unspecified_' \
            + ''.join(random.sample(string.digits, 6))
    mol.AddHydrogens()
    moldata['weight'] = str(mol.GetMolWt())

    # if the name is not in sdf:

    if not moldata.has_key(namekey):
        moldata[namekey] = ''

    # smiles

    inchiconv.SetInAndOutFormats('sdf', 'smi')
    mol = openbabel.OBMol()
    res = inchiconv.ReadString(mol, codecs.encode(sdf, 'utf-8'))
    if mol.Empty():
        root.warning('  -->  ERROR on sdf')
        raise Exception
    moldata['smiles'] = inchiconv.WriteString(mol).strip()

    return moldata
Esempio n. 12
0
def update_compounds(
    filename,
    library_name,
    version,
    namekey,
    idkey,
    ):
    """update an existing library:
........ library version += 1 """

    library = get_library(library_name, version)

    if check_or_update_compounds(filename, library, namekey, idkey,
                                 check_only=True):
        root.warning('  --> : compound update found. Will update now.')
        check_or_update_compounds(filename, library, namekey, idkey,
                                  check_only=False)
    else:
        root.warning('  --> : No compound update found.')
Esempio n. 13
0
def create_library_w_header(header):
    """ given header, create library with latest version + 1"""

    # library_set w/ given header

    libs = header.library_set.all()

    # no library yet

    if len(libs) == 0:
        version = 0
    else:
        version = libs.latest().version + 1

    library = Library(header=header, version=version)
    library.save()

    root.warning('  --> : New library created: %s' % library)
    return library
Esempio n. 14
0
def insert_property_fields(jfile):
    """file format: NAME, DESCRIPTION, IS_INTEGER?, SOURCE_TAG"""

    fp = file(jfile)
    counter = 0
    for i in fp.readlines():
        counter += 1
        try:
            (name, description, is_integer, source_tag) = \
                i.strip().split(',')[0:4]
        except:
            root.warning('  -->  line %s' % counter)
            raise
        insert_single_property_field(name, description, is_integer,
                source_tag)

    fp.close()

    return False
Esempio n. 15
0
def gen_joelib_property(sdf):
    """run and parse the property output """

    # save the input in FS

    t = tempfile.NamedTemporaryFile(suffix='.sdf')
    t.write(codecs.encode(sdf, 'utf-8'))
    t.flush()

    # prepare the output file

    (f, out) = tempfile.mkstemp(suffix='.sdf')
    os.close(f)

    # convert

    cmd = \
        """JAVA_HOME=%s/support/jre1.6.0_06/ JOELIB2=%s/support/JOELib2-alpha-20070303/ %s/support/JOELib2-alpha-20070303/moleculeConversion.sh +d +h -iSDF -osdf "%s" "%s" > /dev/null""" \
        % (cur_dir, cur_dir, cur_dir, t.name, out)
    if os.uname()[1] == 'chemmineweb':
        cmd = \
            """JAVA_HOME=/usr/lib/jvm/java-6-sun JOELIB2=%s/support/JOELib2-alpha-20070303/ %s/support/JOELib2-alpha-20070303/moleculeConversion.sh +d +h -iSDF -osdf "%s" "%s" > /dev/null""" \
            % (cur_dir, cur_dir, t.name, out)
    elif os.uname()[1] == 'biocluster':
        cmd = \
            """JAVA_HOME=/usr/lib/jvm/java-1.5.0-sun-1.5.0.14/ JOELIB2=%s/support/JOELib2-alpha-20070303/ %s/support/JOELib2-alpha-20070303/moleculeConversion.sh +d +h -iSDF -osdf "%s" "%s" > /dev/null""" \
            % (cur_dir, cur_dir, t.name, out)
    root.warning('  -->   running:%s' % cmd)
    if os.system(cmd) != 0:
        os.unlink(out)
        raise 'cannot run JOELib'

    # read and parse

    f = file(out)
    tags = get_sdf_tags(codecs.decode(f.read(), 'utf-8'))
    f.close()

    # clean

    os.unlink(out)
    return tags
Esempio n. 16
0
def insert_single_compound(
    moldata,
    sdf,
    namekey,
    idkey,
    user,
    ):
    """ insert single compound into database """

    cid = moldata[idkey]
    name = moldata[namekey]
    if '\n' in name:
        name = name.split('\n')[0]

    # compound

    c = Compound(
        cid=cid,
        name=name,
        formula=moldata['formula'],
        weight=moldata['weight'],
        inchi=moldata['inchi'],
        smiles=moldata['smiles'],
        user=user,
        )

                    # sdf_file=s)

    c.save()
    c_id = c.id
    root.warning('  -->new compound inserted: c_id=%s, cid=%s' % (c_id,
                 cid))

    # sdf file

    s = SDFFile(sdffile=sdf, compound=c)
    s.save()
    sdfid = s.id

    return c.id
Esempio n. 17
0
    plates = []
    plate = compound.plate_set.all()
    for p in plate:
        plates.append(dict(format=p.format, plate=p.plate, well=p.well))

    # --- get duplicates via fingerprint

    try:
        fpt = Fingerprint.objects.get(compound=compound)
        dup_fpts = \
            Fingerprint.objects.filter(fingerprint=fpt.fingerprint)
        if len(dup_fpts) == 1:
            dup_fpts = []
    except:
        root.warning('failed to get duplicates for %s' % compound)
        dup_fpts = []

    if 'addWorkbench' in request.POST:
        input_mode = 'view'
        addToWorkbench(compound=compound,
                       username=request.user.username)
        matches = None
        request.user.message_set.create(message='Compound added to workbench'
                )

    return render_to_response('compound.html', dict(
        libname=library,
        compound=compound,
        annotations=annotations,
        annotations_head=annotations.items()[:11],
Esempio n. 18
0
def insert_property(
    library_name,
    library_ver,
    offset,
    limit,
    filename,
    ):
    """for JOELib property
....   good for small library as it is slow
....   for library with large amount of compounds:
....    1. use extra scripts to generate Property files
........2. use insert_property_from_file() """

    def do_process(compounds, filep, tags):
        for cmp in compounds:
            sdf = SDFFile.objects.get(compound=cmp).sdffile
            print ' running compound:%s' % cmp.id
            try:
                propertydata = gen_joelib_property(sdf)
                _insert_property(cmp, propertydata, tags)
            except:
                root.warning('  -->  cannot run property on %s'
                             % cmp.id)
                raise

    library = get_library(library_name, library_ver)

    try:
        offset = int(offset)
        limit = int(limit)
    except:
        raise

    # number of compounds

    filep = file(filename, 'w')
    compounds = library.compound_set.all()
    count = len(compounds)
    root.warning('  -->  %s compounds to be processed' % count)

    tags = []
    for i in PropertyField.objects.values_list('source_tag'):
        tags.append(i[0])

    limit = limit + offset
    count = 0
    while offset < limit:
        cur_limit = 1000

        # cur_limit = 7 # for testing only

        if offset + cur_limit < limit:
            cur_limit = limit - offset
        root.warning('  --> cur_limit:%s' % cur_limit)

        filep_progress = file(filename + '.progress', 'w')
        filep_progress.write('working on %s - %s\n' % (offset, offset
                             + cur_limit))
        filep_progress.flush()

        # process

        cmps = compounds[offset:offset + cur_limit]
        do_process(cmps, filep, tags)

        filep_progress.close()
        count += 1
        offset += cur_limit

    filep.close()
    return False