Beispiel #1
0
    Collect rates in a Pandas dataframe
    >>> colnames = ["inputRowsPerSecond", "processedRowsPerSecond", "timestamp"]
    >>> monitor_progress_webui(
    ...     countquery, 1, colnames, ".", "test.csv", "live", True)

    Stop the sink
    >>> countquery.stop()
    """
    t = threading.Timer(tinterval,
                        monitor_progress_webui,
                        args=(countquery, tinterval, colnames, outpath,
                              outputname, mode, test))

    # Start it as a daemon
    t.daemon = True
    t.start()

    # Monitor the progress of the stream, and save data for the webUI
    save_monitoring(outpath, outputname, countquery, colnames, mode)

    if test:
        t.cancel()


if __name__ == "__main__":
    """ Execute the test suite with SparkSession initialised """

    # Run the Spark test suite
    spark_unit_tests(globals(), withstreaming=True)
Beispiel #2
0
    >>> df_flat = explodearrayofstruct(df, "prv_candidates")
    >>> "prv_candidates_ra" in df_flat.schema.fieldNames()
    True

    # Each new column contains array element cast to string
    >>> s_flat = df_flat.schema
    >>> typeOf = {i.name: i.dataType.typeName() for  i in s_flat.fields}
    >>> typeOf['prv_candidates_ra'] == 'string'
    True
    """
    sc = get_spark_context()
    obj = sc._jvm.com.astrolabsoftware.fink_broker.catalogUtils
    _df = obj.explodeArrayOfStruct(df._jdf, columnname)
    df_flatten = _java2py(sc, _df)
    return df_flatten


if __name__ == "__main__":
    """ Execute the test suite with SparkSession initialised """

    globs = globals()
    root = os.environ['FINK_HOME']
    globs["ztf_alert_sample"] = os.path.join(
        root, "schemas/template_schema_ZTF_3p3.avro")

    globs["ztf_alert_sample_rawdatabase"] = os.path.join(
        root, "schemas/template_schema_ZTF_rawdatabase.parquet")

    # Run the Spark test suite
    spark_unit_tests(globs, withstreaming=False)
Beispiel #3
0
    # Apply level one processor: nalerthist
    logger.info("New processor: nalerthist")
    df = df.withColumn('nalerthist', nalerthist(df['cmagpsf']))

    # Apply level one processor: kilonova detection
    logger.info("New processor: kilonova")
    knscore_args = ['cjd', 'cfid', 'cmagpsf', 'csigmapsf']
    knscore_args += [
        F.col('candidate.jdstarthist'),
        F.col('cdsxmatch'),
        F.col('candidate.ndethist')
    ]
    df = df.withColumn('rf_kn_vs_nonkn', knscore(*knscore_args))

    # Drop temp columns
    df = df.drop(*expanded)

    return df


if __name__ == "__main__":
    """ Execute the test suite with SparkSession initialised """

    globs = globals()
    root = os.environ['FINK_HOME']
    globs["ztf_alert_sample"] = os.path.join(
        root, "online/raw")

    # Run the Spark test suite
    spark_unit_tests(globs)
Beispiel #4
0
    >>> df_filtered.show()
    +------------+------------+-------------+----------------+----------------------------+
    |    objectId|candidate_ra|candidate_dec|candidate_magpsf|cross_match_alerts_per_batch|
    +------------+------------+-------------+----------------+----------------------------+
    |ZTF18aceatkx|   20.393772|  -25.4669463|       16.074839|                        Star|
    +------------+------------+-------------+----------------+----------------------------+
    <BLANKLINE>
    """
    # Get all the columns in the DataFrame
    df_cols = df.columns

    # Parse the xml file
    cols_to_distribute, rules_list = parse_xml_rules(rules_xml, df_cols)

    # Obtain the Filtered DataFrame:
    # Select cols to distribute
    df_filtered = df.select(cols_to_distribute)

    # Apply filters
    for rule in rules_list:
        df_filtered = df_filtered.filter(rule)

    return df_filtered


if __name__ == "__main__":
    """ Execute the test suite with SparkSession initialised """

    # Run the Spark test suite
    spark_unit_tests(globals())
            else:
                tracklet_names[tracklet_positions] = 'TRCK_{}_{:02d}'.format(
                    time_str, index_tracklet)
                index_tracklet += 1

        return pdf.assign(tracklet=tracklet_names)

    # extract tracklet information - beware there could be duplicated rows
    # so we use dropDuplicates to avoid these.
    df_trck = df_filt_tracklet\
        .cache()\
        .dropDuplicates(['jd', 'xpos', 'ypos'])\
        .groupBy('jd')\
        .apply(extract_tracklet_number)\
        .select(['candid', 'tracklet'])\
        .filter(F.col('tracklet') != '')

    return df_trck


if __name__ == "__main__":
    """ Execute the test suite with SparkSession initialised """

    globs = globals()
    root = os.environ['FINK_HOME']
    globs["ztf_alert_sample"] = os.path.join(
        root, "ztf_alerts/tracklet_TRCK1615_00")

    # Run the Spark test suite
    spark_unit_tests(globs, withstreaming=True)