def tortoise_coefficient_statistics(pickle_output=None, generate_graphs=True):
    override_stdout_config(stdout=True)

    files = ['/tmp/baistats/'+x for x in os.listdir('/tmp/baistats/') if x.startswith('cluster_status_report_pid')]
    fnum = float(len(files))
    quanta = .1/fnum


    total_stats = 0
    used_coeffs = set()
    used_clusters = set()

    #av_counter, avg, min, max, nclus, normalized_avg
    cluster_stats = defaultdict(lambda : defaultdict(lambda : [0.,0.,0.,0.,0.,0.]))
    coeff_stats = defaultdict(lambda : [0.,0.,0.,0.,0.,0.])


    def gen_graphs(only_synthetic=False):
        update_status(0, 'Generating coefficients graph...')
        _gen_plot(coeff_stats, '/tmp/graphs/AAAAA-coefficients.svg')
        if not only_synthetic:
            cn = cluster_stats.keys()
            l = float(len(cn))
            for i,c in enumerate(cn):
                update_status(i/l, 'Generating name graphs... %s' % str(c))
                _gen_plot(cluster_stats[c], '/tmp/graphs/CS-%s.png' % str(c))

    for i,fi in enumerate(files):
        if generate_graphs:
            if i%1000 ==0:
                gen_graphs(True)

        f = open(fi,'r')
        status = i/fnum
        update_status(status, 'Loading '+ fi[fi.find('lastname')+9:])
        contents = SER.load(f)
        f.close()

        cur_coef = contents[0]
        cur_clust = contents[1]

        cur_maxlen = float(contents[3])

        if cur_coef:
            total_stats += 1
            used_coeffs.add(cur_coef)
            used_clusters.add(cur_clust)

            update_status(status+0.2*quanta, '  Computing averages...')

            cur_clen = len(contents[2])
            cur_coeffs = [x[2] for x in contents[2]]
            cur_clustnumber = float(len(set([x[0] for x in contents[2]])))

            assert cur_clustnumber > 0 and cur_clustnumber < cur_maxlen, "Error, found log with strange clustnumber! %s %s %s %s" % (str(cur_clust), str(cur_coef), str(cur_maxlen),
                                                                                                                          str(cur_clustnumber))

            if cur_coeffs:

                assert len(cur_coeffs) == cur_clen and cur_coeffs, "Error, there is a cluster witohut stuff? %s %s %s"% (str(cur_clust), str(cur_coef), str(cur_coeffs))
                assert all([x >= 0 and x <= 1 for x in cur_coeffs]), "Error, a coefficient is wrong here! Check me! %s %s %s" % (str(cur_clust), str(cur_coef), str(cur_coeffs))

                cur_min = min(cur_coeffs)
                cur_max = max(cur_coeffs)
                cur_avg = sum(cur_coeffs)/cur_clen

                update_status(status+0.4*quanta, '  comulative per coeff...')

                avi = coeff_stats[cur_coef][0]
                #number of points
                coeff_stats[cur_coef][0] = avi+1
                #average of coefficients
                coeff_stats[cur_coef][1] = (coeff_stats[cur_coef][1]*avi + cur_avg)/(avi+1)
                #min coeff
                coeff_stats[cur_coef][2] = min(coeff_stats[cur_coef][2], cur_min)
                #max coeff
                coeff_stats[cur_coef][3] = max(coeff_stats[cur_coef][3], cur_max)
                #avg number of clusters
                coeff_stats[cur_coef][4] = (coeff_stats[cur_coef][4]*avi + cur_clustnumber)/(avi+1)
                #normalized avg number of clusters
                coeff_stats[cur_coef][5] = (coeff_stats[cur_coef][5]*avi + cur_clustnumber/cur_maxlen)/(avi+1)


                update_status(status+0.6*quanta, '  comulative per cluster per coeff...')

                avi = cluster_stats[cur_clust][cur_coef][0]
                cluster_stats[cur_clust][cur_coef][0] = avi+1
                cluster_stats[cur_clust][cur_coef][1] = (cluster_stats[cur_clust][cur_coef][1]*avi + cur_avg)/(avi+1)
                cluster_stats[cur_clust][cur_coef][2] = min(cluster_stats[cur_clust][cur_coef][2], cur_min)
                cluster_stats[cur_clust][cur_coef][3] = max(cluster_stats[cur_clust][cur_coef][3], cur_max)
                cluster_stats[cur_clust][cur_coef][4] = (cluster_stats[cur_clust][cur_coef][4]*avi + cur_clustnumber)/(avi+1)
                cluster_stats[cur_clust][cur_coef][5] = (cluster_stats[cur_clust][cur_coef][5]*avi + cur_clustnumber/cur_maxlen)/(avi+1)

    update_status_final('Done!')

    if generate_graphs:
        gen_graphs()


    if pickle_output:
        update_status(0,'Dumping to file...')
        f = open(pickle_output,'w')
        SER.dump({'cluster_stats':dict((x,dict(cluster_stats[x])) for x in cluster_stats.iterkeys()), 'coeff_stats':dict((coeff_stats))}, f)
        f.close()
Example #2
0
def tortoise_coefficient_statistics(pickle_output=None, generate_graphs=True):
    override_stdout_config(stdout=True)

    files = [
        '/tmp/baistats/' + x for x in os.listdir('/tmp/baistats/')
        if x.startswith('cluster_status_report_pid')
    ]
    fnum = float(len(files))
    quanta = .1 / fnum

    total_stats = 0
    used_coeffs = set()
    used_clusters = set()

    #av_counter, avg, min, max, nclus, normalized_avg
    cluster_stats = defaultdict(
        lambda: defaultdict(lambda: [0., 0., 0., 0., 0., 0.]))
    coeff_stats = defaultdict(lambda: [0., 0., 0., 0., 0., 0.])

    def gen_graphs(only_synthetic=False):
        update_status(0, 'Generating coefficients graph...')
        _gen_plot(coeff_stats, '/tmp/graphs/AAAAA-coefficients.svg')
        if not only_synthetic:
            cn = cluster_stats.keys()
            l = float(len(cn))
            for i, c in enumerate(cn):
                update_status(i / l, 'Generating name graphs... %s' % str(c))
                _gen_plot(cluster_stats[c], '/tmp/graphs/CS-%s.png' % str(c))

    for i, fi in enumerate(files):
        if generate_graphs:
            if i % 1000 == 0:
                gen_graphs(True)

        f = open(fi, 'r')
        status = i / fnum
        update_status(status, 'Loading ' + fi[fi.find('lastname') + 9:])
        contents = SER.load(f)
        f.close()

        cur_coef = contents[0]
        cur_clust = contents[1]

        cur_maxlen = float(contents[3])

        if cur_coef:
            total_stats += 1
            used_coeffs.add(cur_coef)
            used_clusters.add(cur_clust)

            update_status(status + 0.2 * quanta, '  Computing averages...')

            cur_clen = len(contents[2])
            cur_coeffs = [x[2] for x in contents[2]]
            cur_clustnumber = float(len(set([x[0] for x in contents[2]])))

            assert cur_clustnumber > 0 and cur_clustnumber < cur_maxlen, "Error, found log with strange clustnumber! %s %s %s %s" % (
                str(cur_clust), str(cur_coef), str(cur_maxlen),
                str(cur_clustnumber))

            if cur_coeffs:

                assert len(
                    cur_coeffs
                ) == cur_clen and cur_coeffs, "Error, there is a cluster witohut stuff? %s %s %s" % (
                    str(cur_clust), str(cur_coef), str(cur_coeffs))
                assert all(
                    [x >= 0 and x <= 1 for x in cur_coeffs]
                ), "Error, a coefficient is wrong here! Check me! %s %s %s" % (
                    str(cur_clust), str(cur_coef), str(cur_coeffs))

                cur_min = min(cur_coeffs)
                cur_max = max(cur_coeffs)
                cur_avg = sum(cur_coeffs) / cur_clen

                update_status(status + 0.4 * quanta,
                              '  comulative per coeff...')

                avi = coeff_stats[cur_coef][0]
                #number of points
                coeff_stats[cur_coef][0] = avi + 1
                #average of coefficients
                coeff_stats[cur_coef][1] = (coeff_stats[cur_coef][1] * avi +
                                            cur_avg) / (avi + 1)
                #min coeff
                coeff_stats[cur_coef][2] = min(coeff_stats[cur_coef][2],
                                               cur_min)
                #max coeff
                coeff_stats[cur_coef][3] = max(coeff_stats[cur_coef][3],
                                               cur_max)
                #avg number of clusters
                coeff_stats[cur_coef][4] = (coeff_stats[cur_coef][4] * avi +
                                            cur_clustnumber) / (avi + 1)
                #normalized avg number of clusters
                coeff_stats[cur_coef][5] = (coeff_stats[cur_coef][5] * avi +
                                            cur_clustnumber / cur_maxlen) / (
                                                avi + 1)

                update_status(status + 0.6 * quanta,
                              '  comulative per cluster per coeff...')

                avi = cluster_stats[cur_clust][cur_coef][0]
                cluster_stats[cur_clust][cur_coef][0] = avi + 1
                cluster_stats[cur_clust][cur_coef][1] = (
                    cluster_stats[cur_clust][cur_coef][1] * avi +
                    cur_avg) / (avi + 1)
                cluster_stats[cur_clust][cur_coef][2] = min(
                    cluster_stats[cur_clust][cur_coef][2], cur_min)
                cluster_stats[cur_clust][cur_coef][3] = max(
                    cluster_stats[cur_clust][cur_coef][3], cur_max)
                cluster_stats[cur_clust][cur_coef][4] = (
                    cluster_stats[cur_clust][cur_coef][4] * avi +
                    cur_clustnumber) / (avi + 1)
                cluster_stats[cur_clust][cur_coef][5] = (
                    cluster_stats[cur_clust][cur_coef][5] * avi +
                    cur_clustnumber / cur_maxlen) / (avi + 1)

    update_status_final('Done!')

    if generate_graphs:
        gen_graphs()

    if pickle_output:
        update_status(0, 'Dumping to file...')
        f = open(pickle_output, 'w')
        SER.dump(
            {
                'cluster_stats':
                dict((x, dict(cluster_stats[x]))
                     for x in cluster_stats.iterkeys()),
                'coeff_stats':
                dict((coeff_stats))
            }, f)
        f.close()
def tortoise_coefficient_statistics(pickle_output=None, generate_graphs=True):
    import matplotlib.pyplot as plt
    plt.ioff()
    def _gen_plot(data, filename):
        plt.clf()
        ax = plt.subplot(111)
        ax.grid(visible=True)
        x = sorted(data.keys())

        w = [data[k][0] for k in x]
        try:
            wscf = max(w)
        except:
            wscf = 0
        w = [float(i)/wscf for i in w]
        y = [data[k][1] for k in x]
        maxi = [data[k][3] for k in x]
        mini = [data[k][2] for k in x]

        lengs = [data[k][4] for k in x]
        try:
            ml = float(max(lengs))
        except:
            ml = 1
        lengs = [k/ml for k in lengs]

        normalengs = [data[k][5] for k in x]

        ax.plot(x,y,'-o',label='avg')
        ax.plot(x,maxi,'-o', label='max')
        ax.plot(x,mini,'-o', label='min')
        ax.plot(x,w, '-x', label='norm %s' % str(wscf))
        ax.plot(x,lengs,'-o',label='acl %s' % str(int(ml)))
        ax.plot(x,normalengs, '-o', label='ncl')
        plt.ylim(ymax = 1., ymin = -0.01)
        plt.xlim(xmax = 1., xmin = -0.01)
        ax.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,ncol=6, mode="expand", borderaxespad=0.)
        plt.savefig(filename)

    override_stdout_config(stdout=True)

    files = ['/tmp/baistats/'+x for x in os.listdir('/tmp/baistats/') if x.startswith('cluster_status_report_pid')]
    fnum = float(len(files))
    quanta = .1/fnum


    total_stats = 0
    used_coeffs = set()
    used_clusters = set()

    #av_counter, avg, min, max, nclus, normalized_avg
    cluster_stats = defaultdict(lambda : defaultdict(lambda : [0.,0.,0.,0.,0.,0.]))
    coeff_stats = defaultdict(lambda : [0.,0.,0.,0.,0.,0.])


    def gen_graphs(only_synthetic=False):
        update_status(0, 'Generating coefficients graph...')
        _gen_plot(coeff_stats, '/tmp/graphs/AAAAA-coefficients.svg')
        if not only_synthetic:
            cn = cluster_stats.keys()
            l = float(len(cn))
            for i,c in enumerate(cn):
                update_status(i/l, 'Generating name graphs... %s' % str(c))
                _gen_plot(cluster_stats[c], '/tmp/graphs/CS-%s.png' % str(c))

    for i,fi in enumerate(files):
        if generate_graphs:
            if i%1000 ==0:
                gen_graphs(True)

        f = filehandler.open(fi,'r')
        status = i/fnum
        update_status(status, 'Loading '+ fi[fi.find('lastname')+9:])
        contents = SER.load(f)
        f.close()

        cur_coef = contents[0]
        cur_clust = contents[1]

        cur_maxlen = float(contents[3])

        if cur_coef:
            total_stats += 1
            used_coeffs.add(cur_coef)
            used_clusters.add(cur_clust)

            update_status(status+0.2*quanta, '  Computing averages...')

            cur_clen = len(contents[2])
            cur_coeffs = [x[2] for x in contents[2]]
            cur_clustnumber = float(len(set([x[0] for x in contents[2]])))

            assert cur_clustnumber > 0 and cur_clustnumber < cur_maxlen, "Error, found log with strange clustnumber! %s %s %s %s" % (str(cur_clust), str(cur_coef), str(cur_maxlen),
                                                                                                                          str(cur_clustnumber))

            if cur_coeffs:

                assert len(cur_coeffs) == cur_clen and cur_coeffs, "Error, there is a cluster witohut stuff? %s %s %s"% (str(cur_clust), str(cur_coef), str(cur_coeffs))
                assert all([x >= 0 and x <= 1 for x in cur_coeffs]), "Error, a coefficient is wrong here! Check me! %s %s %s" % (str(cur_clust), str(cur_coef), str(cur_coeffs))

                cur_min = min(cur_coeffs)
                cur_max = max(cur_coeffs)
                cur_avg = sum(cur_coeffs)/cur_clen

                update_status(status+0.4*quanta, '  comulative per coeff...')

                avi = coeff_stats[cur_coef][0]
                #number of points
                coeff_stats[cur_coef][0] = avi+1
                #average of coefficients
                coeff_stats[cur_coef][1] = (coeff_stats[cur_coef][1]*avi + cur_avg)/(avi+1)
                #min coeff
                coeff_stats[cur_coef][2] = min(coeff_stats[cur_coef][2], cur_min)
                #max coeff
                coeff_stats[cur_coef][3] = max(coeff_stats[cur_coef][3], cur_max)
                #avg number of clusters
                coeff_stats[cur_coef][4] = (coeff_stats[cur_coef][4]*avi + cur_clustnumber)/(avi+1)
                #normalized avg number of clusters
                coeff_stats[cur_coef][5] = (coeff_stats[cur_coef][5]*avi + cur_clustnumber/cur_maxlen)/(avi+1)


                update_status(status+0.6*quanta, '  comulative per cluster per coeff...')

                avi = cluster_stats[cur_clust][cur_coef][0]
                cluster_stats[cur_clust][cur_coef][0] = avi+1
                cluster_stats[cur_clust][cur_coef][1] = (cluster_stats[cur_clust][cur_coef][1]*avi + cur_avg)/(avi+1)
                cluster_stats[cur_clust][cur_coef][2] = min(cluster_stats[cur_clust][cur_coef][2], cur_min)
                cluster_stats[cur_clust][cur_coef][3] = max(cluster_stats[cur_clust][cur_coef][3], cur_max)
                cluster_stats[cur_clust][cur_coef][4] = (cluster_stats[cur_clust][cur_coef][4]*avi + cur_clustnumber)/(avi+1)
                cluster_stats[cur_clust][cur_coef][5] = (cluster_stats[cur_clust][cur_coef][5]*avi + cur_clustnumber/cur_maxlen)/(avi+1)

    update_status_final('Done!')

    if generate_graphs:
        gen_graphs()


    if pickle_output:
        update_status(0,'Dumping to file...')
        f = open(pickle_output,'w')
        SER.dump({'cluster_stats':dict((x,dict(cluster_stats[x])) for x in cluster_stats.iterkeys()), 'coeff_stats':dict((coeff_stats))}, f)
        f.close()
import gc
import numpy as np

#This is supposed to defeat a bit of the python vm performance losses:
import sys
sys.setcheckinterval(1000000)

try:
    from collections import defaultdict
except:
    from invenio.containerutils import defaultdict

from itertools import groupby, chain, repeat
from invenio.bibauthorid_general_utils import update_status, update_status_final, override_stdout_config, override_stdout_config

override_stdout_config(fileout=True, stdout=False)

from invenio.bibauthorid_cluster_set import delayed_cluster_sets_from_marktables
from invenio.bibauthorid_cluster_set import delayed_cluster_sets_from_personid
from invenio.bibauthorid_wedge import wedge
from invenio.bibauthorid_name_utils import generate_last_name_cluster_str
from invenio.bibauthorid_backinterface import empty_tortoise_results_table
from invenio.bibauthorid_backinterface import remove_clusters_by_name
from invenio.bibauthorid_general_utils import bibauthor_print
from invenio.bibauthorid_prob_matrix import prepare_matirx
#Scheduler is [temporarily] deprecated in favour of the much simpler schedule_workers
#from invenio.bibauthorid_scheduler import schedule, matrix_coefs
from invenio.bibauthorid_least_squares import to_function as create_approx_func


from invenio.bibauthorid_general_utils import schedule_workers
def tortoise_coefficient_statistics(pickle_output=None, generate_graphs=True):
    import matplotlib.pyplot as plt
    plt.ioff()
    def _gen_plot(data, filename):
        plt.clf()
        ax = plt.subplot(111)
        ax.grid(visible=True)
        x = sorted(data.keys())

        w = [data[k][0] for k in x]
        try:
            wscf = max(w)
        except:
            wscf = 0
        w = [float(i)/wscf for i in w]
        y = [data[k][1] for k in x]
        maxi = [data[k][3] for k in x]
        mini = [data[k][2] for k in x]

        lengs = [data[k][4] for k in x]
        try:
            ml = float(max(lengs))
        except:
            ml = 1
        lengs = [k/ml for k in lengs]

        normalengs = [data[k][5] for k in x]

        ax.plot(x,y,'-o',label='avg')
        ax.plot(x,maxi,'-o', label='max')
        ax.plot(x,mini,'-o', label='min')
        ax.plot(x,w, '-x', label='norm %s' % str(wscf))
        ax.plot(x,lengs,'-o',label='acl %s' % str(int(ml)))
        ax.plot(x,normalengs, '-o', label='ncl')
        plt.ylim(ymax = 1., ymin = -0.01)
        plt.xlim(xmax = 1., xmin = -0.01)
        ax.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,ncol=6, mode="expand", borderaxespad=0.)
        plt.savefig(filename)

    override_stdout_config(stdout=True)

    files = ['/tmp/baistats/'+x for x in os.listdir('/tmp/baistats/') if x.startswith('cluster_status_report_pid')]
    fnum = float(len(files))
    quanta = .1/fnum


    total_stats = 0
    used_coeffs = set()
    used_clusters = set()

    #av_counter, avg, min, max, nclus, normalized_avg
    cluster_stats = defaultdict(lambda : defaultdict(lambda : [0.,0.,0.,0.,0.,0.]))
    coeff_stats = defaultdict(lambda : [0.,0.,0.,0.,0.,0.])


    def gen_graphs(only_synthetic=False):
        update_status(0, 'Generating coefficients graph...')
        _gen_plot(coeff_stats, '/tmp/graphs/AAAAA-coefficients.svg')
        if not only_synthetic:
            cn = cluster_stats.keys()
            l = float(len(cn))
            for i,c in enumerate(cn):
                update_status(i/l, 'Generating name graphs... %s' % str(c))
                _gen_plot(cluster_stats[c], '/tmp/graphs/CS-%s.png' % str(c))

    for i,fi in enumerate(files):
        if generate_graphs:
            if i%1000 ==0:
                gen_graphs(True)

        f = filehandler.open(fi,'r')
        status = i/fnum
        update_status(status, 'Loading '+ fi[fi.find('lastname')+9:])
        contents = SER.load(f)
        f.close()

        cur_coef = contents[0]
        cur_clust = contents[1]

        cur_maxlen = float(contents[3])

        if cur_coef:
            total_stats += 1
            used_coeffs.add(cur_coef)
            used_clusters.add(cur_clust)

            update_status(status+0.2*quanta, '  Computing averages...')

            cur_clen = len(contents[2])
            cur_coeffs = [x[2] for x in contents[2]]
            cur_clustnumber = float(len(set([x[0] for x in contents[2]])))

            assert cur_clustnumber > 0 and cur_clustnumber < cur_maxlen, "Error, found log with strange clustnumber! %s %s %s %s" % (str(cur_clust), str(cur_coef), str(cur_maxlen),
                                                                                                                          str(cur_clustnumber))

            if cur_coeffs:

                assert len(cur_coeffs) == cur_clen and cur_coeffs, "Error, there is a cluster witohut stuff? %s %s %s"% (str(cur_clust), str(cur_coef), str(cur_coeffs))
                assert all([x >= 0 and x <= 1 for x in cur_coeffs]), "Error, a coefficient is wrong here! Check me! %s %s %s" % (str(cur_clust), str(cur_coef), str(cur_coeffs))

                cur_min = min(cur_coeffs)
                cur_max = max(cur_coeffs)
                cur_avg = sum(cur_coeffs)/cur_clen

                update_status(status+0.4*quanta, '  comulative per coeff...')

                avi = coeff_stats[cur_coef][0]
                #number of points
                coeff_stats[cur_coef][0] = avi+1
                #average of coefficients
                coeff_stats[cur_coef][1] = (coeff_stats[cur_coef][1]*avi + cur_avg)/(avi+1)
                #min coeff
                coeff_stats[cur_coef][2] = min(coeff_stats[cur_coef][2], cur_min)
                #max coeff
                coeff_stats[cur_coef][3] = max(coeff_stats[cur_coef][3], cur_max)
                #avg number of clusters
                coeff_stats[cur_coef][4] = (coeff_stats[cur_coef][4]*avi + cur_clustnumber)/(avi+1)
                #normalized avg number of clusters
                coeff_stats[cur_coef][5] = (coeff_stats[cur_coef][5]*avi + cur_clustnumber/cur_maxlen)/(avi+1)


                update_status(status+0.6*quanta, '  comulative per cluster per coeff...')

                avi = cluster_stats[cur_clust][cur_coef][0]
                cluster_stats[cur_clust][cur_coef][0] = avi+1
                cluster_stats[cur_clust][cur_coef][1] = (cluster_stats[cur_clust][cur_coef][1]*avi + cur_avg)/(avi+1)
                cluster_stats[cur_clust][cur_coef][2] = min(cluster_stats[cur_clust][cur_coef][2], cur_min)
                cluster_stats[cur_clust][cur_coef][3] = max(cluster_stats[cur_clust][cur_coef][3], cur_max)
                cluster_stats[cur_clust][cur_coef][4] = (cluster_stats[cur_clust][cur_coef][4]*avi + cur_clustnumber)/(avi+1)
                cluster_stats[cur_clust][cur_coef][5] = (cluster_stats[cur_clust][cur_coef][5]*avi + cur_clustnumber/cur_maxlen)/(avi+1)

    update_status_final('Done!')

    if generate_graphs:
        gen_graphs()


    if pickle_output:
        update_status(0,'Dumping to file...')
        f = open(pickle_output,'w')
        SER.dump({'cluster_stats':dict((x,dict(cluster_stats[x])) for x in cluster_stats.iterkeys()), 'coeff_stats':dict((coeff_stats))}, f)
        f.close()
import gc
import numpy as np

#This is supposed to defeat a bit of the python vm performance losses:
import sys
sys.setcheckinterval(1000000)

try:
    from collections import defaultdict
except:
    from invenio.containerutils import defaultdict

from itertools import groupby, chain, repeat
from invenio.bibauthorid_general_utils import update_status, update_status_final, override_stdout_config, override_stdout_config

override_stdout_config(fileout=True, stdout=False)

from invenio.bibauthorid_cluster_set import delayed_cluster_sets_from_marktables
from invenio.bibauthorid_cluster_set import delayed_cluster_sets_from_personid
from invenio.bibauthorid_wedge import wedge
from invenio.bibauthorid_name_utils import generate_last_name_cluster_str
from invenio.bibauthorid_backinterface import empty_tortoise_results_table
from invenio.bibauthorid_backinterface import remove_clusters_by_name
from invenio.bibauthorid_general_utils import bibauthor_print
from invenio.bibauthorid_prob_matrix import prepare_matirx
#Scheduler is [temporarily] deprecated in favour of the much simpler schedule_workers
#from invenio.bibauthorid_scheduler import schedule, matrix_coefs
from invenio.bibauthorid_least_squares import to_function as create_approx_func


from invenio.bibauthorid_general_utils import schedule_workers