Esempio n. 1
0
def mean(resultrootdir, normal, mutant, layerlist, savebinary,
         normal_timesteps, mutant_timesteps):
    """
    diff of mean
    
    E_mutant - E_normal and E_normal - E_mutant
    
    Parameters
    =======================================
    resultrootdir : str
        root directory for result
    normal : str
        ID of normal worm
    mutant : str
        ID of mutant worm
    layerlist : list of str
        list of layer
    savebinary : bool
        whether scores are saved by binary
    """
    tag = normal + '_vs_' + mutant

    for layer in layerlist:
        print(layer)
        scorelist = io_utils.get_nodescores(
            os.path.join(resultrootdir, tag, normal), layer, savebinary,
            normal_timesteps)
        normal_varlist = np.array(
            [np.mean(score, axis=1) for score in scorelist])  #time axis

        scorelist = io_utils.get_nodescores(
            os.path.join(resultrootdir, tag, mutant), layer, savebinary,
            mutant_timesteps)
        mutant_varlist = np.array(
            [np.mean(score, axis=1) for score in scorelist])

        normal_var = np.mean(normal_varlist, axis=0)  #all trajectories
        mutant_var = np.mean(mutant_varlist, axis=0)
        varmn = mutant_var - normal_var  #mutant_var / normal_var
        varnm = normal_var - mutant_var  #normal_var / mutant_var

        resultlist = np.array(
            np.stack((np.arange(len(normal_var)), normal_var, mutant_var,
                      varmn, varnm))).T
        np.savetxt(os.path.join(resultrootdir, tag, layer + '_mean.csv'),
                   resultlist,
                   header=io_utils.delimited_list([
                       'node', 'mean_' + normal, 'mean_' + mutant, 'mean_' +
                       mutant + '/' + normal, 'mean_' + normal + '/' + mutant
                   ], ' '))
Esempio n. 2
0
def correlation(datasetrootdir, resultrootdir, normal, mutant, layerlist,
                savebinary):
    """
    correlation between activation and existing feature
    
    Parameters
    =======================================
    datasetrootdir : str
        root directory of dataset
    resultrootdir : str
        root directory for result
    normal : str
        ID of normal worm
    mutant : str
        ID of mutant worm
    layerlist : list of str
        list of layer
    savebinary : bool
        whether scores are saved by binary
    """
    tag = normal + '_vs_' + mutant

    normalfeaturelist, header = io_utils.get_features(
        os.path.join(datasetrootdir, normal, const.allfeature))
    mutantfeaturelist, header = io_utils.get_features(
        os.path.join(datasetrootdir, mutant, const.allfeature))

    normal_timesteps = [x.shape[1] for x in normalfeaturelist]
    mutant_timesteps = [x.shape[1] for x in mutantfeaturelist]

    normalfeature = np.concatenate(normalfeaturelist, axis=1)
    mutantfeature = np.concatenate(mutantfeaturelist, axis=1)

    normalcorlist = []
    mutantcorlist = []
    conccorlist = []
    for layer in layerlist:
        print(layer)
        normalscorelist = io_utils.get_nodescores(
            os.path.join(resultrootdir, tag, normal), layer, savebinary,
            normal_timesteps)
        mutantscorelist = io_utils.get_nodescores(
            os.path.join(resultrootdir, tag, mutant), layer, savebinary,
            mutant_timesteps)

        # correlation between raw features and activations
        # normalscore, normalfeature = arrange(normalscorelist, normalfeaturelist)
        normalscore = np.concatenate(normalscorelist, axis=1)
        normalcor = np.corrcoef(
            normalfeature,
            normalscore)[len(normalfeature):, :len(normalfeature)]

        # mutantscore, mutantfeature = arrange(mutantscorelist, mutantfeaturelist)
        mutantscore = np.concatenate(mutantscorelist, axis=1)
        mutantcor = np.corrcoef(
            mutantfeature,
            mutantscore)[len(mutantfeature):, :len(mutantfeature)]

        concatenated_score = np.concatenate((normalscore, mutantscore), axis=1)
        concatenated_feature = np.concatenate((normalfeature, mutantfeature),
                                              axis=1)
        concatenated_correlation = np.corrcoef(
            concatenated_feature, concatenated_score)[
                len(concatenated_feature):, :len(concatenated_feature)]

        # resultlist.append(np.concatenate((normalcor, mutantcor, concatenated_correlation),axis=1))
        normalcorlist.append(normalcor)
        mutantcorlist.append(mutantcor)
        conccorlist.append(concatenated_correlation)

    header = ['layer', 'node'] + header
    f = open(
        os.path.join(resultrootdir, tag, 'correlation_' + normal + '.csv'),
        'w')
    io_utils.writeline(f, io_utils.delimited_list(header))

    for ly in range(len(layerlist)):
        for node in range(len(normalcorlist[ly])):
            line = [layerlist[ly], str(node)] + list(normalcorlist[ly][node])
            io_utils.writeline(f, io_utils.delimited_list(line))
    f.close()

    f = open(
        os.path.join(resultrootdir, tag, 'correlation_' + mutant + '.csv'),
        'w')
    io_utils.writeline(f, io_utils.delimited_list(header))

    for ly in range(len(layerlist)):
        for node in range(len(mutantcorlist[ly])):
            line = [layerlist[ly], str(node)] + list(mutantcorlist[ly][node])
            io_utils.writeline(f, io_utils.delimited_list(line))
    f.close()

    f = open(os.path.join(resultrootdir, tag, 'correlation.csv'), 'w')
    io_utils.writeline(f, io_utils.delimited_list(header))

    for ly in range(len(layerlist)):
        for node in range(len(conccorlist[ly])):
            line = [layerlist[ly], str(node)] + list(conccorlist[ly][node])
            io_utils.writeline(f, io_utils.delimited_list(line))
    f.close()
Esempio n. 3
0
def calc_attention_var_each_file(resultrootdir,
                                 normal,
                                 mutant,
                                 layerlist,
                                 savebinary,
                                 normal_timesteps,
                                 mutant_timesteps,
                                 num_node=const.num_node):
    """


    Parameters
    =======================================
    resultrootdir : str
        root directory for result
    normal : str
        ID of normal worm
    mutant : str
        ID of mutant worm
    layerlist : list of str
        list of layer
    savebinary : bool
        whether scores are saved by binary
    num_node : int, optional
        the number of node in a hidden layer
    """
    tag = normal + '_vs_' + mutant

    nf = open(
        os.path.join(resultrootdir, tag, normal + '_each_attn_total.csv'), 'w')
    mf = open(
        os.path.join(resultrootdir, tag, mutant + '_each_attn_total.csv'), 'w')

    files = io_utils.get_filelist(
        os.path.join(resultrootdir, tag, normal, layerlist[0]), savebinary)
    nf.write(
        ','.join(['layer', 'node'] +
                 [io_utils.filename_from_fullpath(x, False) for x in files]))
    nf.write('\n')

    files = io_utils.get_filelist(
        os.path.join(resultrootdir, tag, mutant, layerlist[0]), savebinary)
    mf.write(
        ','.join(['layer', 'node'] +
                 [io_utils.filename_from_fullpath(x, False) for x in files]))
    mf.write('\n')

    for layer in layerlist:
        print(layer)

        normal_eachdata = io_utils.get_nodescores(
            os.path.join(resultrootdir, tag, normal), layer, savebinary,
            normal_timesteps)
        #data1 = np.concatenate(normal_eachdata, axis=1)

        mutant_eachdata = io_utils.get_nodescores(
            os.path.join(resultrootdir, tag, mutant), layer, savebinary,
            mutant_timesteps)
        #data2 = np.concatenate(mutant_eachdata, axis=1)

        for n in range(num_node):

            normal_resultlist = [layer, n]
            mutant_resultlist = [layer, n]

            for i in range(len(normal_eachdata)):
                total_attn = np.var(normal_eachdata[i][n], ddof=1)
                normal_resultlist.append(total_attn)

            for i in range(len(mutant_eachdata)):
                total_attn = np.var(mutant_eachdata[i][n], ddof=1)
                mutant_resultlist.append(total_attn)

            nf.write(','.join(map(str, normal_resultlist)))
            nf.write('\n')
            mf.write(','.join(map(str, mutant_resultlist)))
            mf.write('\n')
    nf.close()
    mf.close()
Esempio n. 4
0
def comparehist_each_file(resultrootdir,
                          normal,
                          mutant,
                          layerlist,
                          savebinary,
                          normal_timesteps,
                          mutant_timesteps,
                          num_node=const.num_node,
                          method=2):
    """


    Parameters
    =======================================
    resultrootdir : str
        root directory for result
    normal : str
        ID of normal worm
    mutant : str
        ID of mutant worm
    layerlist : list of str
        list of layer
    savebinary : bool
        whether scores are saved by binary
    num_node : int, optional
        the number of node in a hidden layer
    """
    tag = normal + '_vs_' + mutant

    nf = open(os.path.join(resultrootdir, tag, normal + '_each_histdist.csv'),
              'w')
    mf = open(os.path.join(resultrootdir, tag, mutant + '_each_histdist.csv'),
              'w')

    files = io_utils.get_filelist(
        os.path.join(resultrootdir, tag, normal, layerlist[0]), savebinary)
    nf.write(
        ','.join(['layer', 'node'] +
                 [io_utils.filename_from_fullpath(x, False) for x in files]))
    nf.write('\n')

    files = io_utils.get_filelist(
        os.path.join(resultrootdir, tag, mutant, layerlist[0]), savebinary)
    mf.write(
        ','.join(['layer', 'node'] +
                 [io_utils.filename_from_fullpath(x, False) for x in files]))
    mf.write('\n')

    for layer in layerlist:
        print(layer)

        normal_eachdata = io_utils.get_nodescores(
            os.path.join(resultrootdir, tag, normal), layer, savebinary,
            normal_timesteps)
        data1 = np.concatenate(normal_eachdata, axis=1)

        mutant_eachdata = io_utils.get_nodescores(
            os.path.join(resultrootdir, tag, mutant), layer, savebinary,
            mutant_timesteps)
        data2 = np.concatenate(mutant_eachdata, axis=1)

        for n in range(num_node):
            hdims = 200
            hranges = [-1.0, 1.0]
            #hranges = [0.0, 0.05]

            if layer.startswith("attention"):
                att_max = max(data1[n].max(), data2[n].max())
                att_min = min(data1[n].min(), data2[n].min())
                hranges = [att_min, att_max]

            normal_resultlist = [layer, n]
            mutant_resultlist = [layer, n]

            hist1 = histogram.calcHist(data1[n], hdims, hranges)
            hist2 = histogram.calcHist(data2[n], hdims, hranges)
            for i in range(len(normal_eachdata)):
                each_hist = histogram.calcHist(normal_eachdata[i][n], hdims,
                                               hranges)
                dist = histogram.compareHist(each_hist, hist2, method)
                normal_resultlist.append(dist)

            for i in range(len(mutant_eachdata)):
                each_hist = histogram.calcHist(mutant_eachdata[i][n], hdims,
                                               hranges)
                dist = histogram.compareHist(hist1, each_hist, method)
                mutant_resultlist.append(dist)

            nf.write(','.join(map(str, normal_resultlist)))
            nf.write('\n')
            mf.write(','.join(map(str, mutant_resultlist)))
            mf.write('\n')
    nf.close()
    mf.close()
Esempio n. 5
0
def comparehist(resultrootdir,
                normal,
                mutant,
                layerlist,
                savebinary,
                normal_timesteps,
                mutant_timesteps,
                num_node=const.num_node):
    """
    
    
    Parameters
    =======================================
    resultrootdir : str
        root directory for result
    normal : str
        ID of normal worm
    mutant : str
        ID of mutant worm
    layerlist : list of str
        list of layer
    savebinary : bool
        whether scores are saved by binary
    num_node : int, optional
        the number of node in a hidden layer 
    """
    tag = normal + '_vs_' + mutant

    for layer in layerlist:
        print(layer)

        resultlist = []
        scorelist = io_utils.get_nodescores(
            os.path.join(resultrootdir, tag, normal), layer, savebinary,
            normal_timesteps)
        data1 = np.concatenate(scorelist, axis=1)

        scorelist = io_utils.get_nodescores(
            os.path.join(resultrootdir, tag, mutant), layer, savebinary,
            mutant_timesteps)
        data2 = np.concatenate(scorelist, axis=1)

        savemodeldir_normal = os.path.join(resultrootdir, tag, 'gmm_model',
                                           normal, layer)
        savemodeldir_mutant = os.path.join(resultrootdir, tag, 'gmm_model',
                                           mutant, layer)

        header = [
            'node', 'CORREL', 'CHISQR', 'INTERSECT', 'BHATTACHARYYA',
            'histogram'
        ]

        if os.path.exists(savemodeldir_normal) and os.path.exists(
                savemodeldir_mutant):
            header += ['PDF']

        for n in range(num_node):
            hdims = 200
            hranges = [-1.0, 1.0]
            #hranges = [0.0, 0.05]

            if layer.startswith("attention"):
                att_max = max(data1[n].max(), data2[n].max())
                att_min = min(data1[n].min(), data2[n].min())
                hranges = [att_min, att_max]

            tmplist = [n]
            print('node-' + str(n))
            hist1 = histogram.calcHist(data1[n], hdims, hranges)
            hist2 = histogram.calcHist(data2[n], hdims, hranges)

            normalized_hist1 = hist1 / np.sum(hist1)
            normalized_hist2 = hist2 / np.sum(hist2)

            dist0 = histogram.compareHist(hist1, hist2,
                                          0)  # CV_COMP_CORREL [-1, 1]
            dist1 = histogram.compareHist(normalized_hist1, normalized_hist2,
                                          1)  # CV_COMP_CHISQR [0, inf)
            dist2 = histogram.compareHist(normalized_hist1, normalized_hist2,
                                          2)  # CV_COMP_INTERSECT [0, 1]
            dist3 = histogram.compareHist(hist1, hist2,
                                          3)  # CV_COMP_BHATTACHARYYA [1, 0]

            diff = np.sum(np.abs(normalized_hist1 - normalized_hist2))
            match = np.sum(
                np.maximum(normalized_hist1, normalized_hist2) -
                np.abs(normalized_hist1 - normalized_hist2))
            area_hist = diff / match

            tmplist += [dist0, dist1, dist2, dist3, area_hist]

            if os.path.exists(os.path.join(savemodeldir_normal, 'node-' + str(n) + '.pkl')) \
                    and os.path.exists(os.path.join(savemodeldir_mutant, 'node-' + str(n) + '.pkl')):
                gmm1 = joblib.load(
                    os.path.join(savemodeldir_normal,
                                 'node-' + str(n) + '.pkl'))
                gmm2 = joblib.load(
                    os.path.join(savemodeldir_mutant,
                                 'node-' + str(n) + '.pkl'))

                random = np.random.rand(100000) * (hranges[1] -
                                                   hranges[0]) + hranges[0]

                log_pdf1 = gmm1.score_samples(random.reshape(-1, 1))
                log_pdf2 = gmm2.score_samples(random.reshape(-1, 1))

                pdf1 = np.exp(log_pdf1)
                pdf2 = np.exp(log_pdf2)

                diff = np.sum(np.abs(pdf1 - pdf2))
                match = np.sum(np.maximum(pdf1, pdf2) - np.abs(pdf1 - pdf2))

                area_pdf = diff / match
                tmplist.append(area_pdf)

            resultlist.append(tmplist)

        np.savetxt(os.path.join(resultrootdir, tag, layer + '_histdist.csv'),
                   np.array(resultlist),
                   header=' '.join(header))
Esempio n. 6
0
def gmm_fit(resultrootdir, normal, mutant, layerlist, savebinary,
            normal_timesteps, mutant_timesteps):
    """
    fit GMM to histgram of activation 
    
    Parameters
    =======================================
    resultrootdir : str
        root directory for result
    normal : str
        ID of normal worm
    mutant : str
        ID of mutant worm
    layerlist : list of str
        list of layer
    savebinary : bool
        whether scores are saved by binary
    """
    tag = normal + '_vs_' + mutant

    fig = plt.figure()
    ax1 = fig.add_subplot(121)
    ax2 = fig.add_subplot(122)

    for layer in layerlist:
        print(layer)

        scorelist = io_utils.get_nodescores(
            os.path.join(resultrootdir, tag, normal), layer, savebinary,
            normal_timesteps)
        data1 = np.concatenate(scorelist, axis=1)

        scorelist = io_utils.get_nodescores(
            os.path.join(resultrootdir, tag, mutant), layer, savebinary,
            mutant_timesteps)
        data2 = np.concatenate(scorelist, axis=1)

        savedir = os.path.join(resultrootdir, tag, 'gmm', layer)
        savemodeldir_normal = os.path.join(resultrootdir, tag, 'gmm_model',
                                           normal, layer)
        savemodeldir_mutant = os.path.join(resultrootdir, tag, 'gmm_model',
                                           mutant, layer)

        if not os.path.exists(savedir):
            os.makedirs(savedir)
        if not os.path.exists(savemodeldir_normal):
            os.makedirs(savemodeldir_normal)
        if not os.path.exists(savemodeldir_mutant):
            os.makedirs(savemodeldir_mutant)

        params = []

        for n in range(len(data1)):
            print('node-' + str(n))
            ax1.cla()
            ax2.cla()

            graph_max = 1
            graph_min = -1
            if layer.startswith("attention"):
                graph_max = max(data1[n].max(), data2[n].max())
                graph_min = min(data1[n].min(), data2[n].min())

            gmm1 = mixture.GaussianMixture(5, covariance_type='diag')
            gmm1.fit(data1[n].reshape(-1, 1))
            plot_gmm(gmm1, ax1, graph_min, graph_max)
            ax1.set_xlabel(normal)
            gmm2 = mixture.GaussianMixture(5, covariance_type='diag')
            gmm2.fit(data2[n].reshape(-1, 1))
            plot_gmm(gmm2, ax2, graph_min, graph_max)
            ax2.set_xlabel(mutant)

            params.append(
                np.concatenate(
                    ([n], gmm1.means_[:,
                                      0], gmm1.covariances_[:,
                                                            0], gmm1.weights_,
                     gmm2.means_[:, 0], gmm2.covariances_[:,
                                                          0], gmm2.weights_)))

            plt.savefig(os.path.join(savedir, 'node-' + str(n) + '.png'))

            joblib.dump(
                gmm1,
                os.path.join(savemodeldir_normal, 'node-' + str(n) + '.pkl'))
            joblib.dump(
                gmm2,
                os.path.join(savemodeldir_mutant, 'node-' + str(n) + '.pkl'))

        np.savetxt(os.path.join(savedir, 'gmmparams.txt'),
                   np.array(params),
                   delimiter=',')
Esempio n. 7
0
def plot_hist(resultrootdir, normal, mutant, layerlist, savebinary,
              normal_timesteps, mutant_timesteps):
    """
    save histgram of activation
    
    Parameters
    =======================================
    resultrootdir : str
        root directory for result
    normal : str
        ID of normal worm
    mutant : str
        ID of mutant worm
    layerlist : list of str
        list of layer
    savebinary : bool
        whether scores are saved by binary
    """
    tag = normal + '_vs_' + mutant

    fig = plt.figure()
    ax1 = fig.add_subplot(121)
    ax2 = fig.add_subplot(122)

    for layer in layerlist:
        print(layer)

        scorelist = io_utils.get_nodescores(
            os.path.join(resultrootdir, tag, normal), layer, savebinary,
            normal_timesteps)
        data1 = np.concatenate(scorelist, axis=1)

        scorelist = io_utils.get_nodescores(
            os.path.join(resultrootdir, tag, mutant), layer, savebinary,
            mutant_timesteps)
        data2 = np.concatenate(scorelist, axis=1)

        savedir = os.path.join(resultrootdir, tag, 'hist', layer)
        if not os.path.exists(savedir):
            os.makedirs(savedir)

        for n in range(len(data1)):
            ax1.cla()
            ax2.cla()
            #print('data1_len'+str(len(data1)))
            #print(str(data1))
            graph_max = 1
            graph_min = -1
            if layer.startswith("attention"):
                graph_max = max(data1[n].max(), data2[n].max())
                graph_min = min(data1[n].min(), data2[n].min())

            hist1 = ax1.hist(data1[n],
                             bins=np.linspace(graph_min, graph_max, 101))
            hist2 = ax2.hist(data2[n],
                             bins=np.linspace(graph_min, graph_max, 101))
            ax1.set_xlabel(normal)
            ax2.set_xlabel(mutant)
            ax1.set_xlim(graph_min, graph_max)
            ax2.set_xlim(graph_min, graph_max)

            plt.savefig(os.path.join(savedir, 'node-' + str(n) + '.png'))
            np.savetxt(os.path.join(savedir, 'node-' + str(n) + '.csv'),
                       np.vstack((hist1[1][:-1], hist1[1][1:], hist1[0],
                                  hist2[0])).T,
                       header='start,end,' + normal + ',' + mutant,
                       delimiter=',')
Esempio n. 8
0
def compare_attended(datasetrootdir, resultrootdir, normal, mutant, layerlist,
                     savebinary):
    """
    compare existing features in attended segments
    
    Parameters
    =======================================
    datasetrootdir : str
        root directory of dataset
    resultrootdir : str
        root directory for result
    normal : str
        ID of normal worm
    mutant : str
        ID of mutant worm
    layerlist : list of str
        list of layer
    savebinary : bool
        whether scores are saved by binary
    """
    tag = normal + '_vs_' + mutant

    normalfeaturelist, header = io_utils.get_features(
        os.path.join(datasetrootdir, normal, const.allfeature))
    mutantfeaturelist, header = io_utils.get_features(
        os.path.join(datasetrootdir, mutant, const.allfeature))

    normal_timesteps = [x.shape[1] for x in normalfeaturelist]
    mutant_timesteps = [x.shape[1] for x in mutantfeaturelist]

    normalfeature = np.concatenate(normalfeaturelist, axis=1)
    mutantfeature = np.concatenate(mutantfeaturelist, axis=1)

    feature_hist_file = os.path.join(resultrootdir, tag,
                                     "attended_feature_diff.csv")
    f = open(feature_hist_file, 'w')
    io_utils.writeline(f, io_utils.delimited_list(['layer'] + header))
    for layer in layerlist:
        if layer.startswith("attention") and "last" not in layer:
            one_line = [layer]
            hist_dir = os.path.join(resultrootdir, tag,
                                    "attended_feature_hist", layer)
            if not os.path.exists(hist_dir):
                os.makedirs(hist_dir)
            print(layer)
            normalscorelist = io_utils.get_nodescores(
                os.path.join(resultrootdir, tag, normal), layer, savebinary,
                normal_timesteps)
            mutantscorelist = io_utils.get_nodescores(
                os.path.join(resultrootdir, tag, mutant), layer, savebinary,
                mutant_timesteps)

            normalscore_all = np.concatenate(normalscorelist, axis=1)
            mutantscore_all = np.concatenate(mutantscorelist, axis=1)
            attn_max = min(np.nanmax(normalscore_all),
                           np.nanmax(mutantscore_all))
            attn_min = min(np.nanmin(normalscore_all),
                           np.nanmin(mutantscore_all))
            attn_th = (attn_max - attn_min) * 0.5 + attn_min
            print("attn_max", attn_max)
            print("attn_min", attn_min)
            print("attn_th", attn_th)
            for feat_idx, feature in enumerate(header):
                print(feature)
                norm_attended_features = []
                mutant_attended_features = []
                for features, scores in zip(normalfeaturelist,
                                            normalscorelist):  #each trajectory
                    mask = scores[0] > attn_th
                    #plt.plot(np.arange(len(features[feat_idx])), features[feat_idx])
                    #plt.show()
                    #plt.plot(np.arange(len(scores[0])), scores[0])
                    #plt.show()
                    #plt.plot(np.arange(len(mask)), mask)
                    #plt.show()
                    masked_feature = features[feat_idx][
                        mask[:len(features[feat_idx])]]
                    #plt.hist(masked_feature)
                    #plt.show()
                    norm_attended_features = norm_attended_features + masked_feature.tolist(
                    )
                for features, scores in zip(mutantfeaturelist,
                                            mutantscorelist):  #each trajectory
                    mask = scores[0] > attn_th
                    #plt.plot(np.arange(len(features[feat_idx])), features[feat_idx])
                    #plt.show()
                    #plt.plot(np.arange(len(scores[0])), scores[0])
                    ##plt.show()
                    #plt.plot(np.arange(len(mask)), mask)
                    #plt.show()
                    masked_feature = features[feat_idx][
                        mask[:len(features[feat_idx])]]
                    #plt.hist(masked_feature)
                    #plt.show()
                    mutant_attended_features = mutant_attended_features + masked_feature.tolist(
                    )
                feat_max = max(np.max(norm_attended_features),
                               np.max(mutant_attended_features))
                feat_min = min(np.min(norm_attended_features),
                               np.min(mutant_attended_features))
                hist1 = histogram.calcHist(
                    norm_attended_features, 100,
                    [feat_min, feat_max])  #, density=True)
                hist1 = hist1 / np.sum(hist1)
                hist2 = histogram.calcHist(
                    mutant_attended_features, 100,
                    [feat_min, feat_max])  #, density=True)
                hist2 = hist2 / np.sum(hist2)
                bins = np.linspace(feat_min, feat_max, 101)
                inverse_overlap = 1.0 - histogram.compareHist(hist1, hist2, 2)
                one_line.append(str(inverse_overlap))
                #plt.hist(norm_attended_features)
                #plt.hist(mutant_attended_features)
                #plt.show()
                print("inverse_overlap", inverse_overlap)
                feature_fname = re.sub(r'[\\|/|:|?|.|"|<|>|\|]', '-', feature)
                np.savetxt(os.path.join(
                    hist_dir,
                    str(feat_idx) + '-' + feature_fname + '.csv'),
                           np.vstack((bins[:-1], bins[1:], hist1, hist2)).T,
                           header='start,end,' + normal + ',' + mutant,
                           delimiter=',')
                #break
            io_utils.writeline(f, io_utils.delimited_list(one_line))
            #break
    f.close()