def generate_df_results(molid, importances, dset, feats, model, calibration, lso):
    cooccurrences, molids, expids, folds = molecules_coocurrences_df(dset=dset, feats=feats, model=model, lso=lso)
    df_losses, folds_df = read_losses(dset=dset,
                                      feats=feats,
                                      model=model,
                                      calibration=calibration,
                                      lso=lso,
                                      also_folds=True)
    dico_for_df = defaultdict(dict)

    MRDK = ManysourcesDataset(dset).mols()
    for other_molid in molids:
        if other_molid == molid:
            continue
        dico_for_df[other_molid] = {'importance': importances[np.where(molids == other_molid)[0][0]],
                                    'cooc_loss': average_loss(molid,
                                                              other_molid,
                                                              cooccurrences,
                                                              molids,
                                                              expids,
                                                              df_losses),
                                    'smiles': MolToSmiles(MRDK.molid2mol(other_molid))}
    df = pd.DataFrame.from_dict(dico_for_df, orient='index')
    df.index.names = ['molid']
    df['relabsimportance'] = df.importance.abs() / df.importance.abs().sum()
    return df[['relabsimportance', 'importance', 'smiles', 'cooc_loss']]
def do_for_one_molid(calibration, dset, feats, lso, model, molid, results_dir, rm_factory, by_source=False):
    print molid
    MRDK = ManysourcesDataset(dset).mols()  # FIXME: this is read on each job, so once per molecule ATM...
    # Train and evaluate the model
    y, expids = get_y(molid, dset, feats, model, calibration, lso)
    if not by_source:
        X = get_X(molid, expids, dset, feats, model, lso)
    else:
        X = get_X_source(molid, expids, dset, feats, model) # makes no sense to run by source on LSO=False
    X = ~X  # coocurrences in train, less sparse, but better interpretation unless we tweak well the numbers...
    rsquared, feat_weights, trained_model = build_and_validate_regression_model(X, y, model_factory=rm_factory)
    rsquared = float(rsquared)
    # REMOVE moldir shows r2
    moldir = op.join(results_dir, 'r2=%.2f__%s' % (rsquared, molid))
    ensure_dir(moldir)
    # Save the model
    pd.to_pickle(trained_model, op.join(moldir, 'model_trained_rsquare=%.2f.pkl' % rsquared))
    # Save the smiles
    smiles = MolToSmiles(MRDK.molid2mol(molid))
    with open(op.join(moldir, 'smiles.txt'), 'w') as writer:
        writer.write(smiles)
    # Save the molecule-influence table
    if not by_source:
        df = generate_df_results(molid, feat_weights, dset, feats, model, calibration, lso)
        pd.to_pickle(df, op.join(moldir, 'results_df.pkl'))
        df.loc[molid] = (1E16, rsquared, smiles, np.mean(y))  # FIXME
        df['label'] = map(MRDK.molid2label, df.index)
        df = df[['label', 'relabsimportance', 'importance', 'smiles', 'cooc_loss']]
        df = df.sort('relabsimportance', ascending=False)
        df.head(20).to_html(op.join(moldir, 'results_df.html'))
    else:
        df = generate_df_results_source(molid, feat_weights, dset, feats, model, calibration, lso)
        pd.to_pickle(df, op.join(moldir, 'results_df_bysource.pkl'))
        df = df.sort('relabsimportance', ascending=False)
        df.head(20).to_html(op.join(moldir, 'results_df_bysource.html'))
    # Plot the distribution of losses (y)
    plt.figure()
    seaborn.distplot(y, bins=40)
    plt.xlim((-0.05, 1.05))
    plt.title('molid=%s, r2=%.2f' % (molid, rsquared))
    plt.savefig(op.join(moldir, 'y_dist.png'), bbox_inches='tight')
    plt.close()
    # --- WIP gridspec with chemdeco pics and things like that
    if not by_source:
        show_top = 4
        gs = gridspec.GridSpec(show_top, 2)
        fig = plt.figure(figsize=(24, 16))
        # Plot the molecule itself
        ax_mol = fig.add_subplot(gs[0:show_top / 2, 0])
        ax_mol.grid(False)
        ax_mol.get_xaxis().set_ticks([])
        ax_mol.get_yaxis().set_ticks([])
        mol = MRDK.molid2mol(molid)
        AllChem.Compute2DCoords(mol)
        ax_mol.imshow(artdeco2(rdkit2im(mol, size=(400, 400)), color='red' if df.loc[molid]['label'] == 'INHIBITOR' else 'green', chorrada=5))
        # Plot the distribution of losses
        ax_distr = fig.add_subplot(gs[show_top / 2:0, 0])
        seaborn.distplot(y, bins=40, ax=ax_distr)
        # Plot the top (we should align all to a common scaffold and maybe highlight substructures that matter)
        for rank, (inf_molid, row) in enumerate(df.iloc[1:show_top + 1].iterrows()):
            ax_influential_mol = fig.add_subplot(gs[rank, 1])
            ax_influential_mol.grid(False)
            ax_influential_mol.get_xaxis().set_ticks([])
            ax_influential_mol.get_yaxis().set_ticks([])
            mol_color = 'red' if row['label'] == 'INHIBITOR' else 'green'
            good_or_bad_color = 'red' if row['importance'] > 0 else 'green'
            # add decos
            mol = MRDK.molid2mol(inf_molid)
            AllChem.Compute2DCoords(mol)
            image = rdkit2im(mol)
            image = artdeco1(image, decos=(('black', good_or_bad_color),))
            image = artdeco2(image, color=mol_color)
            ax_influential_mol.imshow(image)
            ax_influential_mol.set_title('%s, inf=%.4f, cooc_loss=%.4f' %
                                         (inf_molid, row['importance'], row['cooc_loss']))
            # FIXME: cooc_loss also with stddev and standard error

        fig.suptitle('%s, r2=%.2f, cooc_loss=%.4f +/- %.4f' % (molid, rsquared, float(np.mean(y)), float(np.std(y))))
        plt.savefig(op.join(moldir, 'verde_que_te_quiero_verde.png'), bbox_inches='tight')
        plt.close()