def prepare_dataset(sdffile, dest=None, rename=True, conformations=False, overwrite=False):
    """ This method bootstraps the analysis of Ames data.
       - Rename the compounds
       - Merge train/test
       - Generate 3D conformations
       - Save "master" and "saliviewer" tables
       - Redirects stdout/stderr to a "prepare.log" file
    """
    root, name = op.split(sdffile)
    name = op.splitext(name)[0]

    if not dest: dest = root

    dest_sdf = op.join(dest, name + '-prepared.sdf')
    master_table = op.join(dest, name + '-prepared-master.csv')
    sali_table = op.join(dest, name + '-prepared-saliviewer.csv')

    if op.exists(dest_sdf) and not overwrite:
        print '%s is already there and not overwriting requested' % dest_sdf
    else:
        print 'Reading %s' % sdffile
        mols = list(pybel.readfile('sdf', sdffile))

        print '\tCreating dataset root: %s' % dest
        if not op.exists(dest):
            os.makedirs(dest)

        if rename:
            print '\tRenaming the compounds to keep track of the provenance'
            rename_mols_by_index(mols, name + '-')

        if conformations:
            print '\tGenerating conformations'
            for mol in mols:
                if not any(name in mol.title for name in ('train-3988', 'train-4205', 'dsstox-4205', 'dsstox-4206')):
                    try:
                        print 'Conformation for %s' % mol.title
                        mol.make3D()
                    except Exception:
                        print 'Error computing a 3D conformation for %s' % mol.title

        print '\tSaving compounds'
        save_mols(mols, dest_sdf)

        print '\tCreating \"master\" table: %s' % master_table
        create_master_table(dest_sdf, master_table, fields=['Activity'])

        print '\tCreating \"saliviewer\" table: %s' % sali_table
        create_saliviewer_input(master_table, sali_table)

    return dest_sdf, master_table
    mols_dsstox = list(pybel.readfile('sdf', dsstox_original))

    print 'Num molecules ames=%d, bursi=%d, dsstox=%d' % (len(mols_ames), len(mols_bursi), len(mols_dsstox))

    #The activity is always stored in the same field
    for mol in mols_bursi:
        activity = '1' if mol.data['Ames test categorisation'] == 'mutagen' else '0'
        mol.data['Activity'] = activity
        mol.data['CAS_NO'] = mol.title

    for mol in mols_dsstox:
        mol.data['Activity'] = mol.data['Tox']
        mol.data['CAS_NO'] = mol.data['CAS']

    print '\tRenaming the compounds to keep track of the provenance'
    rename_mols_by_index(mols_ames, 'ames-')
    rename_mols_by_index(mols_bursi, 'bursi-')
    rename_mols_by_index(mols_dsstox, 'dsstox-')

    print '\tComputing and analyzing the union of the datasets'
    cas_dupes = duplicates_by_field(mols_ames + mols_bursi + mols_dsstox)
    #inchi_dupes = duplicates_by_format(mols_ames + mols_bursi + mols_dsstox,)
    can_dupes = duplicates_by_format(mols_ames + mols_bursi + mols_dsstox, 'can')

    #Quick and dirty retrieval of compounds for unit-tests
    #Canonical smiles that are different due to
    # - missing hydrogens (report)
    # - bad perception of stereochemistry
    # - charges
    # - ...
    can_dupes2 = {}