def calcEField(mol, wfn, file='eField'): points, X, Y, Z = generatePoints(mol) culled = molCull(mol, X, Y, Z) culledPoints_X = X[culled].flat culledPoints_Y = Y[culled].flat culledPoints_Z = Z[culled].flat culledPoints = np.column_stack((culledPoints_X, culledPoints_Y, culledPoints_Z)) numThreads = psi4.get_num_threads() lenThread = round(culledPoints.shape[0]/numThreads) count = 0 pointsThreadList = [] for i in range(numThreads): pointsThreadList += [culledPoints[count:count+lenThread]] count += lenThread if count < culledPoints.shape[0]: pointsThreadList += [culledPoints[count:culledPoints.shape[0]-count]] wfn.to_file('wfn') results = Parallel(n_jobs=numThreads)(delayed(palFieldCompute)(i) for i in pointsThreadList) for count, i in enumerate(results): if count == 0: field = i[0] points = i[1] else: field = np.concatenate((field, i[0])) points = np.concatenate((points, i[1])) np.save(f'{file}-vecs', field) np.save(f'{file}-points', points)
def test_threaded_blas(args): threads = int(args.nthread) times = {} size = [200, 500, 2000, 4000] threads = [1, threads] for th in threads: psi4.set_num_threads(th) for sz in size: nruns = max(1, int(1.e10 / (sz**3))) a = psi4.core.Matrix(sz, sz) b = psi4.core.Matrix(sz, sz) c = psi4.core.Matrix(sz, sz) tp4 = time.time() for n in range(nruns): c.gemm(False, False, 1.0, a, b, 0.0) retp4 = (time.time() - tp4) / nruns tnp = time.time() for n in range(nruns): np.dot(a, b, out=np.asarray(c)) retnp = (time.time() - tnp) / nruns print( "Time for threads %2d, size %5d: Psi4: %12.6f NumPy: %12.6f" % (th, sz, retp4, retnp)) if sz == 4000: times["p4-n{}".format(th)] = retp4 times["np-n{}".format(th)] = retnp assert psi4.get_num_threads() == th rat1 = times["np-n" + str(threads[-1])] / times["p4-n" + str(threads[-1])] rat2 = times["p4-n" + str(threads[0])] / times["p4-n" + str(threads[-1])] print(" NumPy@n%d : Psi4@n%d ratio (want ~1): %.2f" % (threads[-1], threads[-1], rat1)) print(" Psi4@n%d : Psi4@n%d ratio (want ~%d): %.2f" % (threads[0], threads[-1], threads[-1], rat2)) if args.passfail: assert math.isclose( rat1, 1.0, rel_tol=0.2), 'PsiAPI:NumPy speedup {} !~= 1.0'.format(rat1) assert math.isclose(rat2, threads[-1], rel_tol=0.4), 'PsiAPI speedup {} !~= {}'.format( rat2, threads[-1])
def disabled_test_threaded_blas(): threads = multiprocessing.cpu_count() threads = int(threads / 2) times = {} size = [200, 500, 2000, 5000] threads = [1, threads] for th in threads: psi4.set_num_threads(th) for sz in size: nruns = max(1, int(1.e10 / (sz**3))) a = psi4.core.Matrix(sz, sz) b = psi4.core.Matrix(sz, sz) c = psi4.core.Matrix(sz, sz) tp4 = time.time() for n in range(nruns): c.gemm(False, False, 1.0, a, b, 0.0) retp4 = (time.time() - tp4) / nruns tnp = time.time() for n in range(nruns): np.dot(a, b, out=np.asarray(c)) retnp = (time.time() - tnp) / nruns print( "Time for threads %2d, size %5d: Psi4: %12.6f NumPy: %12.6f" % (th, sz, retp4, retnp)) if sz == 5000: times["p4-n{}".format(th)] = retp4 times["np-n{}".format(th)] = retnp assert psi4.get_num_threads() == th rat1 = times["np-n" + str(threads[-1])] / times["p4-n" + str(threads[-1])] rat2 = times["p4-n" + str(threads[0])] / times["p4-n" + str(threads[-1])] print(" NumPy@n%d : Psi4@n%d ratio (want ~1): %.2f" % (threads[-1], threads[-1], rat1)) print(" Psi4@n%d : Psi4@n%d ratio (want ~%d): %.2f" % (threads[0], threads[-1], threads[-1], rat2)) assert pytest.approx(rat1, 0.2) == 1.0 assert pytest.approx(rat2, 0.8) == threads[-1]
def test_threaded_blas(args): threads = int(args.nthread) times = {} size = [200, 500, 2000, 4000] threads = [1, threads] for th in threads: psi4.set_num_threads(th) for sz in size: nruns = max(1, int(1.e10 / (sz ** 3))) a = psi4.core.Matrix(sz, sz) b = psi4.core.Matrix(sz, sz) c = psi4.core.Matrix(sz, sz) tp4 = time.time() for n in range(nruns): c.gemm(False, False, 1.0, a, b, 0.0) retp4 = (time.time() - tp4) / nruns tnp = time.time() for n in range(nruns): np.dot(a, b, out=np.asarray(c)) retnp = (time.time() - tnp) / nruns print("Time for threads %2d, size %5d: Psi4: %12.6f NumPy: %12.6f" % (th, sz, retp4, retnp)) if sz == 4000: times["p4-n{}".format(th)] = retp4 times["np-n{}".format(th)] = retnp assert psi4.get_num_threads() == th rat1 = times["np-n" + str(threads[-1])] / times["p4-n" + str(threads[-1])] rat2 = times["p4-n" + str(threads[0])] / times["p4-n" + str(threads[-1])] print(" NumPy@n%d : Psi4@n%d ratio (want ~1): %.2f" % (threads[-1], threads[-1], rat1)) print(" Psi4@n%d : Psi4@n%d ratio (want ~%d): %.2f" % (threads[0], threads[-1], threads[-1], rat2)) if args.passfail: assert math.isclose(rat1, 1.0, rel_tol=0.2), 'PsiAPI:NumPy speedup {} !~= 1.0'.format(rat1) assert math.isclose(rat2, threads[-1], rel_tol=0.4), 'PsiAPI speedup {} !~= {}'.format(rat2, threads[-1])
def disabled_test_threaded_blas(): threads = multiprocessing.cpu_count() threads = int(threads / 2) times = {} size = [200, 500, 2000, 5000] threads = [1, threads] for th in threads: psi4.set_num_threads(th) for sz in size: nruns = max(1, int(1.e10 / (sz ** 3))) a = psi4.core.Matrix(sz, sz) b = psi4.core.Matrix(sz, sz) c = psi4.core.Matrix(sz, sz) tp4 = time.time() for n in range(nruns): c.gemm(False, False, 1.0, a, b, 0.0) retp4 = (time.time() - tp4) / nruns tnp = time.time() for n in range(nruns): np.dot(a, b, out=np.asarray(c)) retnp = (time.time() - tnp) / nruns print("Time for threads %2d, size %5d: Psi4: %12.6f NumPy: %12.6f" % (th, sz, retp4, retnp)) if sz == 5000: times["p4-n{}".format(th)] = retp4 times["np-n{}".format(th)] = retnp assert psi4.get_num_threads() == th rat1 = times["np-n" + str(threads[-1])] / times["p4-n" + str(threads[-1])] rat2 = times["p4-n" + str(threads[0])] / times["p4-n" + str(threads[-1])] print(" NumPy@n%d : Psi4@n%d ratio (want ~1): %.2f" % (threads[-1], threads[-1], rat1)) print(" Psi4@n%d : Psi4@n%d ratio (want ~%d): %.2f" % (threads[0], threads[-1], threads[-1], rat2)) assert pytest.approx(rat1, 0.2) == 1.0 assert pytest.approx(rat2, 0.8) == threads[-1]