Ejemplo n.º 1
0
def main(proc_num, lock, out_dir, in_dir, years):
    random.shuffle(years)
    print proc_num, "Start loop"
    while True:
        lock.acquire()
        work_left = False
        for year in years:
            dirs = set(os.listdir(out_dir))
            if str(year) + ".bin" in dirs:
                continue
            work_left = True
            print proc_num, "year", year
            fname = out_dir + str(year) + ".bin"
            with open(fname, "w") as fp:
                fp.write("")
            fp.close()
            break
        lock.release()
        if not work_left:
            print proc_num, "Finished"
            break

        print proc_num, "Loading  matrix", year
        coo_mat = matstore.retrieve_mat_as_coo(in_dir + str(year) + ".bin",
                                               min_size=230000)
        csr_mat = coo_mat.tocsr()
        sum_mat = (csr_mat + csr_mat.T)
        sum_mat = sum_mat.tocoo()
        print proc_num, "Writing counts for year", year
        matstore.export_mat_eff(sum_mat.row, sum_mat.col, sum_mat.data, year,
                                out_dir)
Ejemplo n.º 2
0
def main(proc_num, lock, in_dir, years, k):
    random.shuffle(years)
    print proc_num, "Start loop"
    tmp_pref = in_dir + "dknn-" + str(k) + "/"
    while True:
        lock.acquire()
        work_left = False
        for year in years:
            dirs = set(os.listdir(tmp_pref))
            if str(year) + ".bin" in dirs:
                continue
            work_left = True
            print proc_num, "year", year
            fname = tmp_pref + str(year) + ".bin"
            with open(fname, "w") as fp:
                fp.write("")
            fp.close()
            break
        lock.release()
        if not work_left:
            print proc_num, "Finished"
            break

        print proc_num, "Making knn net for year", year
        old_mat = matstore.retrieve_mat_as_coo(in_dir + str(year) + ".bin")
        row_d, col_d, data_d = make_knn_mat(old_mat, k)
        
        print proc_num, "Writing counts for year", year
        matstore.export_mat_eff(row_d, col_d, data_d, year, tmp_pref)
Ejemplo n.º 3
0
def main(proc_num, lock, in_dir, years, k):
    random.shuffle(years)
    print proc_num, "Start loop"
    tmp_pref = in_dir + "dknn-" + str(k) + "/"
    while True:
        lock.acquire()
        work_left = False
        for year in years:
            dirs = set(os.listdir(tmp_pref))
            if str(year) + ".bin" in dirs:
                continue
            work_left = True
            print proc_num, "year", year
            fname = tmp_pref + str(year) + ".bin"
            with open(fname, "w") as fp:
                fp.write("")
            fp.close()
            break
        lock.release()
        if not work_left:
            print proc_num, "Finished"
            break

        print proc_num, "Making knn net for year", year
        old_mat = matstore.retrieve_mat_as_coo(in_dir + str(year) + ".bin")
        row_d, col_d, data_d = make_knn_mat(old_mat, k)

        print proc_num, "Writing counts for year", year
        matstore.export_mat_eff(row_d, col_d, data_d, year, tmp_pref)
Ejemplo n.º 4
0
def main(proc_num, queue, out_dir, in_dir):
    random.shuffle(years)
    print proc_num, "Start loop"
    while True:
        try: 
            year = queue.get(block=False)
        except Empty:
            print proc_num, "Finished"
            break
        print proc_num, "Loading  matrix", year
        coo_mat = matstore.retrieve_mat_as_coo(in_dir + str(year) + ".bin", min_size=10**6)
        csr_mat = coo_mat.tocsr()
        sum_mat = (csr_mat + csr_mat.T) 
        sum_mat = sum_mat.tocoo()
        for i in xrange(len(sum_mat.data)):
            sum_mat.data[i] = max(csr_mat[sum_mat.row[i], sum_mat.col[i]], csr_mat[sum_mat.col[i], sum_mat.row[i]])
        
        print proc_num, "Writing counts for year", year
        matstore.export_mat_eff(sum_mat.row, sum_mat.col, sum_mat.data, year, out_dir)
Ejemplo n.º 5
0
def worker(proc_num, queue, in_dir):
    print proc_num, "Start loop"
    while True:
        try: 
            year = queue.get(block=False)
        except Empty:
            print proc_num, "Finished"
            break

        print proc_num, "Making second orders for year", year
        old_mat = matstore.retrieve_mat_as_coo(in_dir + str(year) + ".bin")
        row_d, col_d, data_d, keep_rows = make_secondorder_mat(old_mat)
        old_index = list(ioutils.load_pickle(in_dir + str(year) + "-index.pkl"))
        new_index = collections.OrderedDict()
        for i in xrange(len(keep_rows)):
            new_index[old_index[keep_rows[i]]] = i
        ioutils.write_pickle(new_index, in_dir + "/second/" + str(year) + "-index.pkl")
        print proc_num, "Writing counts for year", year
        matstore.export_mat_eff(row_d, col_d, data_d, year, in_dir + "/second/")
Ejemplo n.º 6
0
def worker(proc_num, queue, in_dir):
    print proc_num, "Start loop"
    while True:
        try:
            year = queue.get(block=False)
        except Empty:
            print proc_num, "Finished"
            break

        print proc_num, "Making second orders for year", year
        old_mat = matstore.retrieve_mat_as_coo(in_dir + str(year) + ".bin")
        row_d, col_d, data_d, keep_rows = make_secondorder_mat(old_mat)
        old_index = list(ioutils.load_pickle(in_dir + str(year) +
                                             "-index.pkl"))
        new_index = collections.OrderedDict()
        for i in xrange(len(keep_rows)):
            new_index[old_index[keep_rows[i]]] = i
        ioutils.write_pickle(new_index,
                             in_dir + "/second/" + str(year) + "-index.pkl")
        print proc_num, "Writing counts for year", year
        matstore.export_mat_eff(row_d, col_d, data_d, year,
                                in_dir + "/second/")