def get_fortran_manglings(): ''' Returns list of flags to test different Fortran name manglings. Setting one or more of: fortran_mangling=add_, fortran_mangling=lower, fortran_mangling=upper limits which manglings are returned. Ex: get_fortran_manglings() returns ['-DFORTRAN_ADD_', '-DFORTRAN_LOWER', '-DFORTRAN_UPPER'] ''' # Warn about obsolete settings. if (config.environ['fortran_add_']): print_warn('Variable `fortran_add_` is obsolete; use fortran_mangling=add_') if (config.environ['fortran_lower']): print_warn('Variable `fortran_lower` is obsolete; use fortran_mangling=lower') if (config.environ['fortran_upper']): print_warn('Variable `fortran_upper` is obsolete; use fortran_mangling=upper') # ADD_, NOCHANGE, UPCASE are traditional in lapack # FORTRAN_ADD_, FORTRAN_LOWER, DFORTRAN_UPPER are BLAS++/LAPACK++. manglings = [] if ('add_' in config.environ['fortran_mangling']): manglings.append('-DFORTRAN_ADD_ -DADD_') if ('lower' in config.environ['fortran_mangling']): manglings.append('-DFORTRAN_LOWER -DNOCHANGE') if ('upper' in config.environ['fortran_mangling']): manglings.append('-DFORTRAN_UPPER -DUPCASE') if (not manglings): if (config.environ['fortran_mangling']): print_warn('Unknown fortran_mangling: '+ config.environ['fortran_mangling']) manglings = ['-DFORTRAN_ADD_ -DADD_', '-DFORTRAN_LOWER -DNOCHANGE', '-DFORTRAN_UPPER -DUPCASE'] return manglings
def main(): config.init(prefix='/usr/local/blaspp') config.prog_cxx() config.prog_cxx_flags([ '-O2', '-std=c++11', '-MMD', '-Wall', '-pedantic', '-Wshadow', '-Wno-unused-local-typedefs', '-Wno-unused-function', #'-Wmissing-declarations', #'-Wconversion', #'-Werror', ]) config.openmp() config.lapack.blas() print() config.lapack.blas_float_return() config.lapack.blas_complex_return() config.lapack.vendor_version() # Must test mkl_version before cblas, to define HAVE_MKL. try: config.lapack.cblas() except Error: print_warn('BLAS++ needs CBLAS only in testers.') try: config.lapack.lapack() except Error: print_warn('BLAS++ needs LAPACK only in testers.') try: config.cublas_library() config.environ.merge({'devtarget': 'cuda'}) config.environ.append('CXXFLAGS', '-DBLASPP_WITH_CUBLAS') except Error: print_warn('BLAS++ CUDA wrappers will not be compiled.') testsweeper = config.get_package( 'testsweeper', ['../testsweeper', './testsweeper'], 'https://bitbucket.org/icl/testsweeper', 'https://bitbucket.org/icl/testsweeper/get/tip.tar.gz', 'testsweeper.tar.gz') if (not testsweeper): print_warn('BLAS++ needs TestSweeper only in testers.') config.extract_defines_from_flags('CXXFLAGS') config.output_files(['make.inc', 'blas_defines.h']) print('log in config/log.txt') print('-' * 80)
def blas_complex_return(): ''' For complex valued functions like zdotc, GNU returns complex, while Intel ifort and f2c return the complex in a hidden first argument. ''' (rc, out, err) = config.compile_run( 'config/return_complex.cc', {}, 'BLAS (zdotc) returns complex (GNU gfortran convention)' ) if (rc == 0): return (rc, out, err) = config.compile_run( 'config/return_complex_argument.cc', {}, 'BLAS (zdotc) returns complex as hidden argument (Intel ifort convention)' ) if (rc == 0): config.environ.append( 'CXXFLAGS', '-DBLAS_COMPLEX_RETURN_ARGUMENT' ) else: print_warn( 'unexpected error!' )
def blas_float_return(): ''' Normally, float functions like sdot return float. f2c and g77 always returned double, even for float functions like sdot. This affects clapack and MacOS Accelerate. ''' (rc, out, err) = config.compile_run( 'config/return_float.cc', {}, 'BLAS (sdot) returns float as float (standard)' ) if (rc == 0): return (rc, out, err) = config.compile_run( 'config/return_float_f2c.cc', {}, 'BLAS (sdot) returns float as double (f2c convention)' ) if (rc == 0): config.environ.append( 'CXXFLAGS', '-DHAVE_F2C' ) else: print_warn( 'unexpected error!' )
def blas(): ''' Searches for BLAS in default libraries, MKL, ACML, ESSL, OpenBLAS, and Accelerate. Checks FORTRAN_ADD_, FORTRAN_LOWER, FORTRAN_UPPER. Checks int (LP64) and int64_t (ILP64). Setting one or more of: blas=mkl, blas=acml, blas=essl, blas=openblas, blas=accelerate; fortran_mangling=add_, fortran_mangling=lower, fortran_mangling=upper; lp64=1, ilp64=1 in the environment or on the command line, limits the search space. ''' print_header( 'BLAS library' ) print_msg( 'Also detects Fortran name mangling and BLAS integer size.' ) # Warn about obsolete settings. if (config.environ['mkl']): print_warn('Variable `mkl` is obsolete; use blas=mkl') if (config.environ['acml']): print_warn('Variable `acml` is obsolete; use blas=acml') if (config.environ['essl']): print_warn('Variable `essl` is obsolete; use blas=essl') if (config.environ['openblas']): print_warn('Variable `openblas` is obsolete; use blas=openblas') if (config.environ['accelerate']): print_warn('Variable `accelerate` is obsolete; use blas=accelerate') test_mkl = ('mkl' in config.environ['blas']) test_acml = ('acml' in config.environ['blas']) test_essl = ('essl' in config.environ['blas']) test_openblas = ('openblas' in config.environ['blas']) test_accelerate = ('accelerate' in config.environ['blas']) # otherwise, test all test_all = not (test_mkl or test_acml or test_essl or test_openblas or test_accelerate) # build list of choices to test choices = [] if (test_all): # sometimes BLAS is in default libraries (e.g., on Cray) choices.extend([ ['Default', {}], ]) # end if (test_all or test_mkl): choices.extend([ # each pair has Intel conventions, then GNU conventions # int, threaded ['Intel MKL (int, Intel conventions, threaded)', {'LIBS': '-lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -lpthread -lm', 'CXXFLAGS': '-fopenmp', 'LDFLAGS': '-fopenmp'}], ['Intel MKL (int, GNU conventions, threaded)', {'LIBS': '-lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core -lpthread -lm', 'CXXFLAGS': '-fopenmp', 'LDFLAGS': '-fopenmp'}], # int64_t, threaded ['Intel MKL (int64_t, Intel conventions, threaded)', {'LIBS': '-lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core -lpthread -lm', 'CXXFLAGS': '-fopenmp -DMKL_ILP64', 'LDFLAGS': '-fopenmp'}], ['Intel MKL (int64_t, GNU conventions, threaded)', {'LIBS': '-lmkl_gf_ilp64 -lmkl_gnu_thread -lmkl_core -lpthread -lm', 'CXXFLAGS': '-fopenmp -DMKL_ILP64', 'LDFLAGS': '-fopenmp'}], # int, sequential ['Intel MKL (int, Intel conventions, sequential)', {'LIBS': '-lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lm', 'CXXFLAGS': ''}], ['Intel MKL (int, GNU conventions, sequential)', {'LIBS': '-lmkl_gf_lp64 -lmkl_sequential -lmkl_core -lm', 'CXXFLAGS': ''}], # int64_t, sequential ['Intel MKL (int64_t, Intel conventions, sequential)', {'LIBS': '-lmkl_intel_ilp64 -lmkl_sequential -lmkl_core -lm', 'CXXFLAGS': '-DMKL_ILP64'}], ['Intel MKL (int64_t, GNU conventions, sequential)', {'LIBS': '-lmkl_gf_ilp64 -lmkl_sequential -lmkl_core -lm', 'CXXFLAGS': '-DMKL_ILP64'}], ]) # end if (test_all or test_essl): choices.extend([ ['IBM ESSL', {'LIBS': '-lessl'}], ]) # end if (test_all or test_openblas): choices.extend([ ['OpenBLAS', {'LIBS': '-lopenblas'}], ]) # end if (test_all or test_accelerate): path = '/System/Library/Frameworks/Accelerate.framework/Frameworks/vecLib.framework/Headers' inc = '-I' + path if (os.path.exists( path )) else '' choices.extend([ ['MacOS Accelerate', {'LIBS': '-framework Accelerate', 'CXXFLAGS': inc}], ]) # end #-------------------- # Deprecated libraries last. if (test_all or test_acml): choices.extend([ ['AMD ACML (threaded)', {'LIBS': '-lacml_mp'}], ['AMD ACML (sequential)', {'LIBS': '-lacml'}], ]) # end manglings = get_fortran_manglings() int_sizes = get_int_sizes() passed = [] for (label, env) in choices: title = label if ('LIBS' in env): title += '\n ' + env['LIBS'] print_subhead( title ) (rc, out, err, env2) = compile_with_manglings( 'config/blas.cc', env, manglings, int_sizes ) if (rc == 0): passed.append( (label, env2) ) if (not config.interactive()): break # end labels = map( lambda c: c[0], passed ) i = config.choose( 'Choose BLAS library:', labels ) config.environ.merge( passed[i][1] ) config.environ.append( 'CXXFLAGS', '-DHAVE_BLAS' )
config.cublas_library() config.environ.merge({'devtarget': 'cuda'}) config.environ.append('CXXFLAGS', '-DBLASPP_WITH_CUBLAS') except Error: print_warn('BLAS++ CUDA wrappers will not be compiled.') testsweeper = config.get_package( 'testsweeper', ['../testsweeper', './testsweeper'], 'https://bitbucket.org/icl/testsweeper', 'https://bitbucket.org/icl/testsweeper/get/tip.tar.gz', 'testsweeper.tar.gz') if (not testsweeper): print_warn('BLAS++ needs TestSweeper only in testers.') config.extract_defines_from_flags('CXXFLAGS') config.output_files(['make.inc', 'blas_defines.h']) print('log in config/log.txt') print('-' * 80) # end #------------------------------------------------------------------------------- try: main() except Error as ex: print_warn('A fatal error occurred. ' + str(ex) + '\nBLAS++ could not be configured. Log in config/log.txt') exit(1)