예제 #1
0
def create_molecules_from_smiles_file(file: BinaryIO) -> MoleculeSet:
    """
    Reads in a .smiles file and constructs :class:`Molecule` instances from it, all linked to a
    single new :class:`MoleculeSet` instance.

    :param file: A file handle to a .smiles file.
    :returns: The newly created :class:`MoleculeSet` instance. The created :class:`Molecule`
      instances are available as its ``.molecules`` property.
    """
    session = get_session()
    nof_mol = 0
    molset = MoleculeSet()

    for line in file:
        # Rudimentary SMILES parsing
        line = line.decode('utf-8')
        if line.startswith('#'):
            continue

        line_contents = line.split(None, 1)
        if len(line_contents) == 2:
            pattern, name = line_contents
        else:
            pattern = line_contents[0]
            name = ''
        pattern, name = pattern.strip(), name.strip()

        molecule = Molecule(pattern=pattern, name=name, molset=molset)
        session.add(molecule)
        nof_mol += 1
    session.commit()
    logging.info(f"Added {nof_mol} Molecules to the database.")
    return molset
예제 #2
0
def matches_for_molecule_set(id: int):
    """
    A route that retrieves all matches associated with a MoleculeSet instance.
    Responds with a JSON object containing the ``molecule_set_id``, as well as an array of
    ``matches``. Each match in ``matches`` will have a ``molecule_id``, a ``molecule_name``
    and a ``smarts_id``.

    :param id: The ID of the MoleculeSet instance.
    :return: JSON as described above.
    """
    session = get_session()
    molset = session.query(MoleculeSet).get(id)
    if molset is None:
        return {'error': 'Unknown molecule set.'}, 404

    molecule_ids = session.query(Molecule.id).filter_by(molset_id=molset.id)
    matches = session.query(Match).filter(Match.molecule_id.in_(molecule_ids))

    return {
        'molecule_set_id': molset.id,
        'matches': [
            {
                'molecule_id': match.molecule_id,
                'molecule_name': match.molecule.name,
                'smarts_id': match.smarts_id
            }
            for match in matches
        ]
    }, 200
예제 #3
0
def upload_molecule_set():
    """
    A route for uploading a set of molecules, given as a single SMILES file in the 'file' parameter.

    On success, redirects to the route :func:`matches_for_molecule_set` for the newly created
    MoleculeSet. Therefore, on success this returns the matches associated with the
    uploaded molecule set.

    On failure, responds with a 400 error and JSON containing a descriptive error string
    (key 'error'). This string can be displayed directly in the frontend.
    """
    if 'file' not in request.files or not request.files['file'].filename:
        return {'error': 'Request seems to be missing a molecule file.'}, 400

    plain_file = request.files['file']
    try:
        file = _check_valid_file(plain_file)
    except ValueError as e:
        return {'error': str(e)}, 400

    mol_set = None
    try:
        mol_set = calculate_molecule_matches(file)
        draw_molecules_from_molset(mol_set)
        return redirect(url_for('molecules.matches_for_molecule_set', id=mol_set.id))
    except Exception as e:
        logging.error(e)
        if mol_set is not None:
            session = get_session()
            session.delete(mol_set)
            session.commit()
        return {'error': 'Unknown error occurred'}, 500
예제 #4
0
def from_db(min_similarity: float, max_similarity: float) -> dict:
    """Generates a dict representation of all directed graph data stored in the database, consisting
    of all stored SMARTS nodes (key 'nodes') and those stored directed edges (key 'edges') whose
    spsim property fulfils (interval min_similarity <= spsim <= max_similarity).

    :param min_similarity: The minimum similarity of the returned edges (inclusive).
    :param max_similarity: The maximum similarity of the returned edges (exclusive).
    :return: A dict of the available graph data as described.
    """
    session = get_session()
    smarts = session.query(SMARTS).all()
    edges = session.query(DirectedEdge).filter(
        DirectedEdge.spsim >= min_similarity,
        DirectedEdge.spsim <= max_similarity).all()

    graph_dict = {
        'nodes': [{
            'id': smart.id,
            'name': smart.name,
            'library': smart.library,
            'pattern': smart.pattern
        } for smart in smarts],
        'edges': [{
            'id': edge.id,
            'source': edge.from_id,
            'target': edge.to_id,
            'mcssim': edge.mcssim,
            'spsim': edge.spsim
        } for edge in edges]
    }
    return graph_dict
예제 #5
0
def add_library_command(name, filename):
    """
    Add a SMARTS library (name & .smarts file) to the db.
    """
    session = get_session()
    if session.query(SMARTS).filter_by(library=name).count() > 0:
        answer = input(
            f"There are already SMARTS with this library name ({name}) in the database.\n"
            f"Are you sure you want to add them? Enter Y to continue [y/N] ")
        if answer.lower() != 'y':
            click.echo(f"{name} was *not* inserted.")
            return

    return add_library(name, filename)
예제 #6
0
def draw_all_smarts_command():
    """
    Draws all SMARTS in the db to the serving directory.

    This is a required action before serving the application in
    production, for the frontend to work correctly.
    """
    import os

    session = get_session()
    all_smarts = session.query(SMARTS).all()
    viewer_path = current_app.config['SMARTSCOMPARE_VIEWER_PATH']
    output_path = current_app.config['STATIC_SMARTSVIEW_PATH']
    os.makedirs(output_path, exist_ok=True)
    if not os.path.isfile(viewer_path):
        raise ValueError(
            f"Viewer path {viewer_path} does not point to a file...!")

    return draw_multiple_smarts(all_smarts, viewer_path, output_path)
예제 #7
0
def deliver_molecule_image(id):
    """
    A route that delivers the image for a molecule, given the molecule's ID.
    Responds with 404 if the molecule or its image could not be found.

    :param id: The ID of the molecule.
    :return: A file response on success, a 404 response on error.
    """
    session = get_session()
    molecule = session.query(Molecule).get(id)
    if molecule is None:
        return {'error': 'Molecule not found'}, 404

    subdir = secure_filename(str(molecule.molset_id))
    filename = secure_filename(f'{molecule.id}.svg')
    return send_from_directory(
        os.path.join(current_app.config['STATIC_MOL2SVG_MOLECULE_SETS_PATH'], subdir),
        filename
    )
예제 #8
0
def add_library(name: str, filename: str) -> None:
    """
    Add a SMARTS library (name and a .smarts file) to the db, by adding corresponding SMARTS
    objects to the database.

    Note that, if available, the SMARTS label will be used as the newly created SMARTS objects'
    names. This is typically a tab- or space-separated string that comes after each SMARTS pattern
    in the .smarts file.

    :param name: The name of the library to add. Will be stored on the created SMARTS instances.
    :param filename: The filename of the .smarts file to create and store SMARTS from.
    """
    import re
    session = get_session()

    with open(filename, 'r') as stream:  # TODO maybe some line documentation
        ignored_lines = []
        nof_added_smarts = 0
        for i, line in enumerate(stream):
            line = line.strip()
            if line.startswith('#'):
                continue
            m = re.search(r'(^[^\s]+)\s+(.+)$', line)
            if m:
                smarts_pattern = m.group(1)
                smarts_name = m.group(2)
                smarts = SMARTS(name=smarts_name,
                                pattern=smarts_pattern,
                                library=name)
                session.add(smarts)
                nof_added_smarts += 1
            else:
                ignored_lines.append(i + 1)  # take care of 0 indexing!

    session.commit()
    click.echo(
        f"Added {nof_added_smarts} SMARTS to the database as library {name}.")
    if ignored_lines:
        click.echo("Ignored lines: " + ", ".join(map(str, ignored_lines)))
예제 #9
0
def draw_all_subsets_command():
    """
    Draws all DirectedEdges in the db to the serving directory.

    This is a required action before serving the application in
    production, for the frontend to work correctly.
    """
    import os

    session = get_session()
    all_edges = session.query(DirectedEdge).options(
        subqueryload(DirectedEdge.from_smarts),
        subqueryload(DirectedEdge.to_smarts)).all()
    viewer_path = os.path.join(current_app.root_path,
                               current_app.config['SMARTSCOMPARE_VIEWER_PATH'])
    output_path = current_app.config['STATIC_SMARTSVIEW_SUBSETS_PATH']
    os.makedirs(output_path, exist_ok=True)
    if not os.path.isfile(viewer_path):
        raise ValueError(
            f"Viewer path {viewer_path} does not point to a file...!")

    return draw_multiple_smarts_subset_relations(all_edges, viewer_path,
                                                 output_path)
예제 #10
0
def calculate_molecule_matches(
        uploaded_molecules_file: BinaryIO) -> MoleculeSet:
    """
    Calculate molecule matches of all SMARTS in the database given a molecule file,
    and store the Molecule and Match instances in the database.

    :param uploaded_molecules_file: An open file handle to a molecule file to match
    """
    import tempfile
    import sys
    from smartsexplore.util import run_process
    moleculefile, smartsfile, moleculematchfile = [None] * 3

    # get all SMARTS patterns in file
    mol_set = None
    try:
        session = get_session()
        mol_set = create_molecules_from_smiles_file(uploaded_molecules_file)
        moleculefile, _ = molecules_to_temporary_smiles_file(mol_set.molecules)
        try:
            smartsfile = write_smarts_to_tempfile()
        except NoSMARTSException:
            session.commit()
            return mol_set

        # Run moleculematch on the temporary SMARTS file, and write the
        # stdout to a new temporary result output file.
        moleculematchfile = tempfile.NamedTemporaryFile(mode='w+')
        match_cmd = [
            current_app.config['MATCHTOOL_PATH'], '-i', '2', '-m',
            moleculefile.name, '-s', smartsfile.name
        ]
        run_process(match_cmd,
                    stdout=moleculematchfile,
                    stderr=sys.stderr,
                    reraise_exceptions=True)
        moleculematchfile.seek(0)

        # Parse the moleculematch output
        parse_iterator = parse_moleculematch(moleculematchfile)

        # --- Code to store results in the database starts here ---
        for (smartsid, moleculeid) in parse_iterator:
            mmol = session.query(Molecule).get(moleculeid)
            msmarts = session.query(SMARTS).get(smartsid)
            newmatch = Match(molecule=mmol, smarts=msmarts)
            session.add(newmatch)

        # Commit the session
        session.commit()
        return mol_set
    except Exception as e:
        if mol_set is not None:  # clean up molset if exception occurred
            session = get_session()
            session.delete(mol_set)
            session.commit()
        raise e
    finally:  # close all open file handles
        if uploaded_molecules_file:
            uploaded_molecules_file.close()
        if moleculefile:
            moleculefile.close()
        if smartsfile:
            smartsfile.close()
        if moleculematchfile:
            moleculematchfile.close()
예제 #11
0
def calculate_edges(mode):
    """
    Calculate and add edges between all SMARTS in the database.

    Currently implements modes 'Similarity' and
    'SubsetOfFirst'. 'SubsetOfSecond' is redundant, and 'Identical' is
    currently just not implemented.

    When 'Similarity' mode is chosen, 0.1 is picked as a fixed
    similarity value lower bound; otherwise a too large number of
    edges for our purposes would (generally) be generated.
    """
    import tempfile, os, sys
    from smartsexplore.util import run_process

    # Check validity of chosen mode
    implemented_modes = ('Similarity', 'SubsetOfFirst')
    if mode not in implemented_modes:
        raise ValueError(
            f"{mode} is not an implemented mode. Implemented modes are: "
            f"{', '.join(implemented_modes)}")

    # Get a DB session, retrieve all SMARTS patterns, and write them into file
    session = get_session()
    try:
        smartsfile = write_smarts_to_tempfile()
    except NoSMARTSException:
        logging.warning(
            "No SMARTS in the database! Exiting the edge calculation process..."
        )

    # Get mode ID
    mode_map = {
        'Identical': 1,
        'SubsetOfFirst': 2,
        'SubsetOfSecond': 3,
        'Similarity': 4
    }
    mode_id = mode_map[mode]

    # Run SMARTScompare on the temporary SMARTS file, and write the
    # stdout to a new temporary result output file.
    smartscomparefile = tempfile.NamedTemporaryFile(mode='w+')
    compare_cmd = [
        current_app.config['SMARTSCOMPARE_PATH'],
        smartsfile.name,
        '-M',
        '-1',
        # discard edges with <= 0.1 similarity when using (undirected) mode "Similarity"
        *(['-t', '0.1'] if mode == 'Similarity' else []),
        '-p',
        str(os.cpu_count() // 2),
        '-d',
        '|',
        '-D',
        '`',
        '-m',
        str(mode_id)
    ]
    run_process(compare_cmd, stdout=smartscomparefile, stderr=sys.stderr)
    smartscomparefile.seek(0)  # must rewind before further usage

    # Parse the SMARTScompare output
    parse_iterator = parse_smartscompare(smartscomparefile)
    resultfile_mode = next(parse_iterator)
    assert resultfile_mode == mode,\
        f"Mode of the SMARTScompare output, {resultfile_mode}, does not match specified mode, {mode}!"

    # --- Code to store results in the database starts here ---

    # Get the existing edges in the database and store them in memory, for efficient checks
    existing_edges = _get_existing_edges(mode, session)
    nof_added_edges = 0
    duplicate_edges = []

    # Define a function to check for duplicates
    def _check_for_duplicates(l, r):
        if (l, r) in existing_edges:
            duplicate_edges.append((l, r))
            return True
        else:
            return False

    # Different loops and logic based on mode
    if mode == 'Similarity':
        for (line_no, lname, rname, mcssim, spsim) in parse_iterator:
            lsmarts = session.query(SMARTS).filter_by(id=int(lname)).first()
            rsmarts = session.query(SMARTS).filter_by(id=int(rname)).first()
            losmarts, hismarts = (lsmarts, rsmarts) if lsmarts.id < rsmarts.id\
                else (rsmarts, lsmarts)
            assert losmarts.id != hismarts.id, f'   {lname} {lsmarts}\n== {rname} {rsmarts}'
            assert losmarts.id < hismarts.id

            if not _check_for_duplicates(losmarts.id, hismarts.id):
                edge = UndirectedEdge(low_smarts=losmarts,
                                      high_smarts=hismarts,
                                      mcssim=mcssim,
                                      spsim=spsim)
                existing_edges.add((losmarts.id, hismarts.id))
                session.add(edge)
                nof_added_edges += 1
    elif mode == 'SubsetOfFirst':
        for (line_no, lname, rname, mcssim, spsim) in parse_iterator:
            lsmarts = session.query(SMARTS).filter_by(id=int(lname)).first()
            rsmarts = session.query(SMARTS).filter_by(id=int(rname)).first()
            fromsmarts, tosmarts = rsmarts, lsmarts

            if not _check_for_duplicates(fromsmarts.id, tosmarts.id):
                edge = DirectedEdge(from_smarts=fromsmarts,
                                    to_smarts=tosmarts,
                                    mcssim=mcssim,
                                    spsim=spsim)
                existing_edges.add((fromsmarts.id, tosmarts.id))
                session.add(edge)
                nof_added_edges += 1

    # Commit the session and close all temporary files
    session.commit()
    smartsfile.close()
    smartscomparefile.close()