def main(proc_num, lock, out_dir, in_dir, years): random.shuffle(years) print proc_num, "Start loop" while True: lock.acquire() work_left = False for year in years: dirs = set(os.listdir(out_dir)) if str(year) + ".bin" in dirs: continue work_left = True print proc_num, "year", year fname = out_dir + str(year) + ".bin" with open(fname, "w") as fp: fp.write("") fp.close() break lock.release() if not work_left: print proc_num, "Finished" break print proc_num, "Loading matrix", year coo_mat = matstore.retrieve_mat_as_coo(in_dir + str(year) + ".bin", min_size=230000) csr_mat = coo_mat.tocsr() sum_mat = (csr_mat + csr_mat.T) sum_mat = sum_mat.tocoo() print proc_num, "Writing counts for year", year matstore.export_mat_eff(sum_mat.row, sum_mat.col, sum_mat.data, year, out_dir)
def main(proc_num, lock, in_dir, years, k): random.shuffle(years) print proc_num, "Start loop" tmp_pref = in_dir + "dknn-" + str(k) + "/" while True: lock.acquire() work_left = False for year in years: dirs = set(os.listdir(tmp_pref)) if str(year) + ".bin" in dirs: continue work_left = True print proc_num, "year", year fname = tmp_pref + str(year) + ".bin" with open(fname, "w") as fp: fp.write("") fp.close() break lock.release() if not work_left: print proc_num, "Finished" break print proc_num, "Making knn net for year", year old_mat = matstore.retrieve_mat_as_coo(in_dir + str(year) + ".bin") row_d, col_d, data_d = make_knn_mat(old_mat, k) print proc_num, "Writing counts for year", year matstore.export_mat_eff(row_d, col_d, data_d, year, tmp_pref)
def main(proc_num, queue, out_dir, in_dir): random.shuffle(years) print proc_num, "Start loop" while True: try: year = queue.get(block=False) except Empty: print proc_num, "Finished" break print proc_num, "Loading matrix", year coo_mat = matstore.retrieve_mat_as_coo(in_dir + str(year) + ".bin", min_size=10**6) csr_mat = coo_mat.tocsr() sum_mat = (csr_mat + csr_mat.T) sum_mat = sum_mat.tocoo() for i in xrange(len(sum_mat.data)): sum_mat.data[i] = max(csr_mat[sum_mat.row[i], sum_mat.col[i]], csr_mat[sum_mat.col[i], sum_mat.row[i]]) print proc_num, "Writing counts for year", year matstore.export_mat_eff(sum_mat.row, sum_mat.col, sum_mat.data, year, out_dir)
def worker(proc_num, queue, in_dir): print proc_num, "Start loop" while True: try: year = queue.get(block=False) except Empty: print proc_num, "Finished" break print proc_num, "Making second orders for year", year old_mat = matstore.retrieve_mat_as_coo(in_dir + str(year) + ".bin") row_d, col_d, data_d, keep_rows = make_secondorder_mat(old_mat) old_index = list(ioutils.load_pickle(in_dir + str(year) + "-index.pkl")) new_index = collections.OrderedDict() for i in xrange(len(keep_rows)): new_index[old_index[keep_rows[i]]] = i ioutils.write_pickle(new_index, in_dir + "/second/" + str(year) + "-index.pkl") print proc_num, "Writing counts for year", year matstore.export_mat_eff(row_d, col_d, data_d, year, in_dir + "/second/")