Python register Examples

Programming Language: Python

Namespace/Package Name: glow

Method/Function: register

Examples at hotexamples.com: 10

Python register - 10 examples found. These are the top rated real world Python examples of glow.register extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def add_glow(doctest_namespace, spark):
    glow.register(spark, new_session=False)
    doctest_namespace['Row'] = Row
    doctest_namespace['spark'] = spark
    doctest_namespace['lit'] = functions.lit
    doctest_namespace['col'] = functions.col
    doctest_namespace['glow'] = glow

Example #2

Show file

def add_spark(doctest_namespace, spark):
    glow.register(spark)
    doctest_namespace['Row'] = Row
    doctest_namespace['spark'] = spark
    doctest_namespace['lit'] = functions.lit
    doctest_namespace['col'] = functions.col
    doctest_namespace['glow'] = glow

Example #3

Show file

File: test_register.py Project: goodbright2014/glow-1

def test_register(spark):
    glow.register(spark)
    df = spark.read.format("vcf") \
        .load("test-data/1kg_sample.vcf")
    stats = df.selectExpr("expand_struct(dp_summary_stats(genotypes))") \
            .select("min", "max") \
            .head()
    assert stats.asDict() == Row(min=1.0, max=23).asDict()

Example #4

Show file

def test_register(spark):
    glow.register(spark)
    row_one = Row(Row(str_col='foo', int_col=1, bool_col=True))
    row_two = Row(Row(str_col='bar', int_col=2, bool_col=False))
    df = spark.createDataFrame([row_one, row_two], schema=['base_col'])
    added_col_row = df.selectExpr("add_struct_fields(base_col, 'float_col', 3.14, 'rev_str_col', reverse(base_col.str_col)) as added_col") \
                      .filter("added_col.str_col = 'foo'") \
                      .head()
    assert added_col_row.added_col.rev_str_col == 'oof'

Example #5

Show file

# MAGIC #!/usr/bin/env bash
# MAGIC rm -r /opt/liftover
# MAGIC mkdir /opt/liftover
# MAGIC curl https://raw.githubusercontent.com/broadinstitute/gatk/master/scripts/funcotator/data_sources/gnomAD/b37ToHg38.over.chain --output /opt/liftover/b37ToHg38.over.chain
# MAGIC ```
# MAGIC In this demo, we perform coordinate and variant liftover from b37 to hg38.
# MAGIC
# MAGIC To perform variant liftover, you must download a reference file to each node of the cluster. Here, we use the FUSE mount to access the reference genome at
# MAGIC ```/dbfs/databricks-datasets/genomics/grch38/data/GRCh38_full_analysis_set_plus_decoy_hla.fa```

# COMMAND ----------

# DBTITLE 1,Import glow and define path variables
import glow

spark = glow.register(spark)
chain_file = '/opt/liftover/b37ToHg38.over.chain'
reference_file = '/dbfs/databricks-datasets/genomics/grch38/data/GRCh38_full_analysis_set_plus_decoy_hla.fa'
vcf_file = 'dbfs:/databricks-datasets/genomics/1kg-vcfs/ALL.chr22.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz'

# COMMAND ----------

# DBTITLE 1,First, read in a VCF from a flat file or Delta Lake table.
input_df = (spark.read.format("vcf").load(vcf_file).limit(1).cache())

# COMMAND ----------

# MAGIC %md
# MAGIC
# MAGIC Now apply the `lift_over_coordinates` UDF, with the parameters as follows:
# MAGIC - chromosome (`string`)

Example #6

Show file

    root = sys.argv[1]
    freeze = sys.argv[2][1:-1] + "/"
    pheno = sys.argv[3]
    covar = sys.argv[4]
    split = sys.argv[5]
    offsets = sys.argv[6]
    jobname = sys.argv[7]
    splitctg = sys.argv[8]
    repart = sys.argv[9]

    spark = SparkSession\
        .builder\
        .appName(jobname)\
        .getOrCreate()

    glow.register(spark, False)
    spark.udf.registerJavaFunction("chartodoublearray",
                                   "org.gorpipe.spark.udfs.CharToDoubleArray",
                                   ArrayType(DoubleType()))

    rootfreeze = root + freeze

    label_df = pd.read_csv(root + pheno, sep='\t', index_col=0)
    covariate_df = None
    if len(covar) > 0:
        covariates = pd.read_csv(root + covar, sep='\t', index_col=0)
        covariate_df = covariates.fillna(covariates.mean())
        covariate_df = (covariate_df -
                        covariate_df.mean()) / covariate_df.std()
        covariate_df

Example #7

Show file

def register_glow(spark):
    glow.register(spark, new_session=False)

Example #8

Show file

spark = (
    SparkSession.builder
    .appName('desmi_inject_gnomad')
    .config("spark.jars.packages", ",".join([
        "io.projectglow:glow-spark3_2.12:1.0.0",
    ]))
    .config("spark.local.dir", os.environ.get("TMP"))
    .config("spark.master", f"local[{N_CPU},{MAX_FAILURES}]")
    .config("spark.sql.shuffle.partitions", "2001")
    .config("spark.sql.execution.arrow.enabled", "true")
    .config("spark.driver.maxResultSize", "48G")
    .config("spark.task.maxFailures", MAX_FAILURES)
    .getOrCreate()
)
glow.register(spark)
spark
# -


INPUT_VCF  = snakemake.input["vcf"]
INPUT_VCF

OUTPUT_PQ = snakemake.output["vep"]
OUTPUT_PQ

# +
FASTA=snakemake.input["fasta"]
GTF=snakemake.input["gtf"]

HUMAN_GENOME_VERSION=snakemake.params["human_genome_version"]

Example #9

Show file

def spark_session():
    spark = (SparkSession.builder.config(
        "spark.jars.packages", "io.projectglow:glow_2.11:0.5.0").config(
            "spark.sql.execution.arrow.pyspark.enabled", "true").getOrCreate())
    glow.register(spark)
    return spark

Example #10

Show file

def test_new_session(spark):
    sess = glow.register(spark, new_session=False)
    assert sess._jsparkSession.equals(spark._jsparkSession)

    sess = glow.register(spark, new_session=True)
    assert not sess._jsparkSession.equals(spark._jsparkSession)