def readGWAS(args):
    start = timer()
    validate(args)
    regexp = re.compile(args.gwas_file_pattern) if args.gwas_file_pattern else  None
    names = Utilities.contentsWithRegexpFromFolder(args.gwas_folder, regexp)
    names.sort() #cosmetic, because different filesystems/OS yield folders in different order

    if len(names) == 0:
        msg = "No GWAS files found on %s with pattern %s" % (args.gwas_folder, args.gwas_file_pattern,)
        raise Exceptions.ReportableException(msg)
    
    print "INFO: Reading GWAS data"
    gwas_format = GWASUtilities.gwas_format_from_args(args)
    GWAS.validate_format_basic(gwas_format)
    GWAS.validate_format_for_strict(gwas_format)
    #model = PredictionModel.load_model(args.model_db_path) if args.model_db_path else None
    model = None
    # dataframe
    r = pandas.DataFrame()
    for name in names:
        b = build_betas(args, model, gwas_format, name)
        r = pandas.concat([r,b])
    end = timer()
    logging.info("Successfully parsed input gwas in %s seconds"%(str(end-start)))
    print("Successfully parsed input gwas in %s seconds"%(str(end-start)))
    return r
Example #2
0
    def run(self):
        if self.args.weight_db_path:
            logging.info("Loading weight model")
            weight_db_logic = WeightDBUtilities.WeightDBEntryLogic(
                self.weight_db_path)
        else:
            weight_db_logic = None

        names = Utilities.contentsWithRegexpFromFolder(self.gwas_folder,
                                                       self.gwas_regexp)

        if not os.path.exists(self.output_folder):
            os.makedirs(self.output_folder)

        if len(names) == 0:
            raise Exceptions.ReportableException(
                "No GWAS files found on %s with pattern %s" % (
                    self.gwas_folder,
                    self.gwas_regexp.pattern,
                ))

        for name in names:
            try:
                self.buildBetas(weight_db_logic, name)
            # This just means that there is some extra stuff inside that directory,
            # so I'm thinking we want to ignore it.
            except Exceptions.BadFilename as e:
                logging.info("Wrong file name: %s, skipping", e.msg)
                pass
Example #3
0
def run(args):
    start = timer()
    validate(args)

    if args.gwas_folder:
        regexp = re.compile(
            args.gwas_file_pattern) if args.gwas_file_pattern else None
        names = Utilities.contentsWithRegexpFromFolder(args.gwas_folder,
                                                       regexp)
        names.sort(
        )  #cosmetic, because different filesystems/OS yield folders in different order

        if len(names) == 0:
            msg = "No GWAS files found on %s with pattern %s" % (
                args.gwas_folder,
                args.gwas_file_pattern,
            )
            raise Exceptions.ReportableException(msg)
    else:
        names = [args.gwas_file]

    gwas_format = GWASUtilities.gwas_format_from_args(args)
    GWAS.validate_format_basic(gwas_format)
    GWAS.validate_format_for_strict(gwas_format)
    model = PredictionModel.load_model(
        args.model_db_path,
        args.model_db_snp_key) if args.model_db_path else None

    if args.output_folder:
        if not os.path.exists(args.output_folder):
            os.makedirs(args.output_folder)

        for name in names:
            output_path = os.path.join(args.output_folder, name)
            if not ".gz" in output_path:
                output_path += ".gz"
            if os.path.exists(output_path):
                logging.info(
                    "%s already exists, delete it if you want it to be done again",
                    output_path)
                continue

            b = build_betas(args, model, gwas_format, name)
            c = "gzip" if ".gz" in name else None
            b.to_csv(output_path, sep="\t", index=False, compression=c)
        end = timer()
        logging.info("Successfully ran GWAS input processing in %s seconds" %
                     (str(end - start)))
    else:
        r = pandas.DataFrame()
        for name in names:
            b = build_betas(args, model, gwas_format, name)
            r = pandas.concat([r, b])
        end = timer()
        logging.info("Successfully parsed input gwas in %s seconds" %
                     (str(end - start)))

        return r
Example #4
0
def expression_from_args(args, prediction_results = None):
    if prediction_results:
        if isinstance(prediction_results, BasicPredictionRepository):
            logging.info("Preparing PrediXcan context from data")
            expression = Expression.ExpressionFromData(prediction_results.genes)
        else:
            raise Exceptions.ReportableException("Invalid prediction results")
    elif args.hdf5_expression_file:
        logging.info("Preparing PrediXcan HDF5 context")
        expression = HDF5Expression.Expression(args.hdf5_expression_file)
    elif args.expression_file:
        logging.info("Preparing PrediXcan context")
        expression = PlainTextExpression.Expression(args.expression_file)
    else:
        raise RuntimeError("Could not build context from arguments")
    return expression