def readGWAS(args): start = timer() validate(args) regexp = re.compile(args.gwas_file_pattern) if args.gwas_file_pattern else None names = Utilities.contentsWithRegexpFromFolder(args.gwas_folder, regexp) names.sort() #cosmetic, because different filesystems/OS yield folders in different order if len(names) == 0: msg = "No GWAS files found on %s with pattern %s" % (args.gwas_folder, args.gwas_file_pattern,) raise Exceptions.ReportableException(msg) print "INFO: Reading GWAS data" gwas_format = GWASUtilities.gwas_format_from_args(args) GWAS.validate_format_basic(gwas_format) GWAS.validate_format_for_strict(gwas_format) #model = PredictionModel.load_model(args.model_db_path) if args.model_db_path else None model = None # dataframe r = pandas.DataFrame() for name in names: b = build_betas(args, model, gwas_format, name) r = pandas.concat([r,b]) end = timer() logging.info("Successfully parsed input gwas in %s seconds"%(str(end-start))) print("Successfully parsed input gwas in %s seconds"%(str(end-start))) return r
def run(args): start = timer() validate(args) if args.gwas_folder: regexp = re.compile( args.gwas_file_pattern) if args.gwas_file_pattern else None names = Utilities.contentsWithRegexpFromFolder(args.gwas_folder, regexp) names.sort( ) #cosmetic, because different filesystems/OS yield folders in different order if len(names) == 0: msg = "No GWAS files found on %s with pattern %s" % ( args.gwas_folder, args.gwas_file_pattern, ) raise Exceptions.ReportableException(msg) else: names = [args.gwas_file] gwas_format = GWASUtilities.gwas_format_from_args(args) GWAS.validate_format_basic(gwas_format) GWAS.validate_format_for_strict(gwas_format) model = PredictionModel.load_model( args.model_db_path, args.model_db_snp_key) if args.model_db_path else None if args.output_folder: if not os.path.exists(args.output_folder): os.makedirs(args.output_folder) for name in names: output_path = os.path.join(args.output_folder, name) if not ".gz" in output_path: output_path += ".gz" if os.path.exists(output_path): logging.info( "%s already exists, delete it if you want it to be done again", output_path) continue b = build_betas(args, model, gwas_format, name) c = "gzip" if ".gz" in name else None b.to_csv(output_path, sep="\t", index=False, compression=c) end = timer() logging.info("Successfully ran GWAS input processing in %s seconds" % (str(end - start))) else: r = pandas.DataFrame() for name in names: b = build_betas(args, model, gwas_format, name) r = pandas.concat([r, b]) end = timer() logging.info("Successfully parsed input gwas in %s seconds" % (str(end - start))) return r
def run(args): start = timer() validate(args) if args.gwas_folder: regexp = re.compile(args.gwas_file_pattern) if args.gwas_file_pattern else None names = Utilities.contentsWithRegexpFromFolder(args.gwas_folder, regexp) names.sort() #cosmetic, because different filesystems/OS yield folders in different order if len(names) == 0: msg = "No GWAS files found on %s with pattern %s" % (args.gwas_folder, args.gwas_file_pattern,) raise Exceptions.ReportableException(msg) else: names = [args.gwas_file] gwas_format = GWASUtilities.gwas_format_from_args(args) GWAS.validate_format_basic(gwas_format) GWAS.validate_format_for_strict(gwas_format) model = PredictionModel.load_model(args.model_db_path, args.model_db_snp_key) if args.model_db_path else None if args.output_folder: if not os.path.exists(args.output_folder): os.makedirs(args.output_folder) for name in names: output_path = os.path.join(args.output_folder, name) if not ".gz" in output_path: output_path += ".gz" if os.path.exists(output_path): logging.info("%s already exists, delete it if you want it to be done again", output_path) continue b = build_betas(args, model, gwas_format, name) c = "gzip" if ".gz" in name else None b.to_csv(output_path, sep="\t", index=False, compression=c) end = timer() logging.info("Successfully ran GWAS input processing in %s seconds" %(str(end - start))) else: r = pandas.DataFrame() for name in names: b = build_betas(args, model, gwas_format, name) r = pandas.concat([r,b]) end = timer() logging.info("Successfully parsed input gwas in %s seconds"%(str(end-start))) return r