Collect rates in a Pandas dataframe >>> colnames = ["inputRowsPerSecond", "processedRowsPerSecond", "timestamp"] >>> monitor_progress_webui( ... countquery, 1, colnames, ".", "test.csv", "live", True) Stop the sink >>> countquery.stop() """ t = threading.Timer(tinterval, monitor_progress_webui, args=(countquery, tinterval, colnames, outpath, outputname, mode, test)) # Start it as a daemon t.daemon = True t.start() # Monitor the progress of the stream, and save data for the webUI save_monitoring(outpath, outputname, countquery, colnames, mode) if test: t.cancel() if __name__ == "__main__": """ Execute the test suite with SparkSession initialised """ # Run the Spark test suite spark_unit_tests(globals(), withstreaming=True)
>>> df_flat = explodearrayofstruct(df, "prv_candidates") >>> "prv_candidates_ra" in df_flat.schema.fieldNames() True # Each new column contains array element cast to string >>> s_flat = df_flat.schema >>> typeOf = {i.name: i.dataType.typeName() for i in s_flat.fields} >>> typeOf['prv_candidates_ra'] == 'string' True """ sc = get_spark_context() obj = sc._jvm.com.astrolabsoftware.fink_broker.catalogUtils _df = obj.explodeArrayOfStruct(df._jdf, columnname) df_flatten = _java2py(sc, _df) return df_flatten if __name__ == "__main__": """ Execute the test suite with SparkSession initialised """ globs = globals() root = os.environ['FINK_HOME'] globs["ztf_alert_sample"] = os.path.join( root, "schemas/template_schema_ZTF_3p3.avro") globs["ztf_alert_sample_rawdatabase"] = os.path.join( root, "schemas/template_schema_ZTF_rawdatabase.parquet") # Run the Spark test suite spark_unit_tests(globs, withstreaming=False)
# Apply level one processor: nalerthist logger.info("New processor: nalerthist") df = df.withColumn('nalerthist', nalerthist(df['cmagpsf'])) # Apply level one processor: kilonova detection logger.info("New processor: kilonova") knscore_args = ['cjd', 'cfid', 'cmagpsf', 'csigmapsf'] knscore_args += [ F.col('candidate.jdstarthist'), F.col('cdsxmatch'), F.col('candidate.ndethist') ] df = df.withColumn('rf_kn_vs_nonkn', knscore(*knscore_args)) # Drop temp columns df = df.drop(*expanded) return df if __name__ == "__main__": """ Execute the test suite with SparkSession initialised """ globs = globals() root = os.environ['FINK_HOME'] globs["ztf_alert_sample"] = os.path.join( root, "online/raw") # Run the Spark test suite spark_unit_tests(globs)
>>> df_filtered.show() +------------+------------+-------------+----------------+----------------------------+ | objectId|candidate_ra|candidate_dec|candidate_magpsf|cross_match_alerts_per_batch| +------------+------------+-------------+----------------+----------------------------+ |ZTF18aceatkx| 20.393772| -25.4669463| 16.074839| Star| +------------+------------+-------------+----------------+----------------------------+ <BLANKLINE> """ # Get all the columns in the DataFrame df_cols = df.columns # Parse the xml file cols_to_distribute, rules_list = parse_xml_rules(rules_xml, df_cols) # Obtain the Filtered DataFrame: # Select cols to distribute df_filtered = df.select(cols_to_distribute) # Apply filters for rule in rules_list: df_filtered = df_filtered.filter(rule) return df_filtered if __name__ == "__main__": """ Execute the test suite with SparkSession initialised """ # Run the Spark test suite spark_unit_tests(globals())
else: tracklet_names[tracklet_positions] = 'TRCK_{}_{:02d}'.format( time_str, index_tracklet) index_tracklet += 1 return pdf.assign(tracklet=tracklet_names) # extract tracklet information - beware there could be duplicated rows # so we use dropDuplicates to avoid these. df_trck = df_filt_tracklet\ .cache()\ .dropDuplicates(['jd', 'xpos', 'ypos'])\ .groupBy('jd')\ .apply(extract_tracklet_number)\ .select(['candid', 'tracklet'])\ .filter(F.col('tracklet') != '') return df_trck if __name__ == "__main__": """ Execute the test suite with SparkSession initialised """ globs = globals() root = os.environ['FINK_HOME'] globs["ztf_alert_sample"] = os.path.join( root, "ztf_alerts/tracklet_TRCK1615_00") # Run the Spark test suite spark_unit_tests(globs, withstreaming=True)