def build_iter(self): """ Iteratively build model, yielding any problems as :class:`ValidationError` instances. For debugging, the unified model at :attr:`model` my contain intermediate results at any time, even if construction has failed. Check the :attr:`_errored` flag if neccessary. """ steps = [ (self._prevalidate, 'prevalidate'), (self._merge, 'merge'), (self._validate, 'validate'), (self._curate, 'curate'), ] self._errored = False self.model = None for (i, (step, step_name)) in enumerate(steps, start=1): try: for err in step(): yield err except ValidationError as ex: self._errored = True yield ex except Exception as ex: self._errored = True nex = ValidationError( 'Model step-%i(%s) failed due to: %s' % (i, step_name, ex)) nex.cause = ex yield nex if self._errored: yield ValidationError('Gave-up building model after step %i.%s (out of %i).' % (i, step_name, len(steps))) break
def _rule_properties_draft3(self, properties, instance, schema): if not self.is_type(instance, "object"): return for prop, subschema in self._iter_iprop_pairs(properties): if self._is_iprop_in(instance, prop): for error in self.descend( self._get_iprop(instance, prop), subschema, path=prop, schema_path=prop, ): yield error elif subschema.get("required", False): error = ValidationError("%r is a required prop" % prop) error._set( validator="required", validator_value=subschema["required"], instance=instance, schema=schema, ) error.path.appendleft(prop) error.schema_path.extend([prop, "required"]) yield error
def convert_duke_haase(schema, encoding, input_file, verbose=True, output=True, output_file=None, config={}, enforce_validation=True, reactor=None): DEFAULT_BEAD_MODEL = "SpheroTech URCP-38-2K" DEFAULT_BEAD_BATCH = "AJ02" duke_cytometer_configuration = """{ "channels": [{ "emission_filter": { "center": 488, "type": "bandpass", "width": 10 }, "excitation_wavelength": 488, "name": "FSC-A" }, { "emission_filter": { "center": 488, "type": "bandpass", "width": 10 }, "excitation_wavelength": 488, "name": "SSC-A" }, { "emission_filter": { "center": 530, "type": "bandpass", "width": 30 }, "excitation_wavelength": 488, "name": "BL1-A" }, { "emission_filter": { "center": 590, "type": "bandpass", "width": 40 }, "excitation_wavelength": 488, "name": "BL2-A" }, { "emission_filter": { "center": 695, "type": "bandpass", "width": 40 }, "excitation_wavelength": 488, "name": "BL3-A" }, { "emission_filter": { "center": 780, "type": "bandpass", "width": 60 }, "excitation_wavelength": 561, "name": "YL4-A" }, { "emission_filter": { "center": 695, "type": "bandpass", "width": 40 }, "excitation_wavelength": 561, "name": "YL3-A" }, { "emission_filter": { "center": 620, "type": "bandpass", "width": 15 }, "excitation_wavelength": 561, "name": "YL2-A" }, { "emission_filter": { "center": 585, "type": "bandpass", "width": 16 }, "excitation_wavelength": 561, "name": "YL1-A" }, { "emission_filter": { "center": 488, "type": "bandpass", "width": 10 }, "excitation_wavelength": 488, "name": "FSC-H" }, { "emission_filter": { "center": 488, "type": "bandpass", "width": 10 }, "excitation_wavelength": 488, "name": "SSC-H" }, { "emission_filter": { "center": 530, "type": "bandpass", "width": 30 }, "excitation_wavelength": 488, "name": "BL1-H" }, { "emission_filter": { "center": 590, "type": "bandpass", "width": 40 }, "excitation_wavelength": 488, "name": "BL2-H" }, { "emission_filter": { "center": 695, "type": "bandpass", "width": 40 }, "excitation_wavelength": 488, "name": "BL3-H" }, { "emission_filter": { "center": 780, "type": "bandpass", "width": 60 }, "excitation_wavelength": 561, "name": "YL4-H" }, { "emission_filter": { "center": 695, "type": "bandpass", "width": 40 }, "excitation_wavelength": 561, "name": "YL3-H" }, { "emission_filter": { "center": 620, "type": "bandpass", "width": 15 }, "excitation_wavelength": 561, "name": "YL2-H" }, { "emission_filter": { "center": 585, "type": "bandpass", "width": 16 }, "excitation_wavelength": 561, "name": "YL1-H" }, { "emission_filter": { "center": 488, "type": "bandpass", "width": 10 }, "excitation_wavelength": 488, "name": "FSC-W" }, { "emission_filter": { "center": 488, "type": "bandpass", "width": 10 }, "excitation_wavelength": 488, "name": "SSC-W" }, { "emission_filter": { "center": 530, "type": "bandpass", "width": 30 }, "excitation_wavelength": 488, "name": "BL1-W" }, { "emission_filter": { "center": 590, "type": "bandpass", "width": 40 }, "excitation_wavelength": 488, "name": "BL2-W" }, { "emission_filter": { "center": 695, "type": "bandpass", "width": 40 }, "excitation_wavelength": 488, "name": "BL3-W" }, { "emission_filter": { "center": 780, "type": "bandpass", "width": 60 }, "excitation_wavelength": 561, "name": "YL4-W" }, { "emission_filter": { "center": 695, "type": "bandpass", "width": 40 }, "excitation_wavelength": 561, "name": "YL3-W" }, { "emission_filter": { "center": 620, "type": "bandpass", "width": 15 }, "excitation_wavelength": 561, "name": "YL2-W" }, { "emission_filter": { "center": 585, "type": "bandpass", "width": 16 }, "excitation_wavelength": 561, "name": "YL1-W" } ] }""" duke_cytometer_configuration_object = json.loads( duke_cytometer_configuration) cytometer_channels = [] for channel in duke_cytometer_configuration_object['channels']: if channel['name'].endswith("-A"): cytometer_channels.append(channel['name']) if reactor is not None: helper = AgaveHelper(reactor.client) print("Helper loaded") else: print("Helper not loaded") # for SBH Librarian Mapping sbh_query = SynBioHubQuery(SD2Constants.SD2_SERVER) sbh_query.login(config["sbh"]["user"], config["sbh"]["password"]) input_fp_csvreader = csv.reader(open(input_file)) output_doc = {} lab = SampleConstants.LAB_DUKE_HAASE output_doc[SampleConstants.LAB] = lab output_doc[SampleConstants.SAMPLES] = [] headers = None is_cfu = False doe_format = "%Y%m%d" # This converter reads both CFU and FCS formatted metadata from Duke. They have different sets of fields # We key on the presence of a CFU column to determine which we are parsing # CFU Fields # 0 strain # 1 replicate # 2 treatment # 3 treatment_concentration # 4 treatment_concentration_unit # 5 treatment_time # 6 treatment_time_unit # 7 CFU # 8 culture_cells/ml # 9 date_of_experiment # 10 experiment_reference_url # 11 experiment_reference # 12 experiment_id # 13 parent_id # 14 estimated_cells_plated # 15 estimated_cells/ml # 16 percent_killed # 17 strain_class # 18 control_type # 19 sample_id # # FCS Fields # 0 strain # 1 replicate # 2 treatment # 3 treatment_concentration # 4 treatment_concentration_unit # 5 treatment_time # 6 treatment_time_unit # 7 culture_cells/ml # 8 date_of_experiment # 9 experiment_reference_url # 10 experiment_reference # 11 experiment_id # 12 parent_id # 13 strain_class # 14 control_type # 15 fcs_filename # 16 sytox_color # 17 sytox_concentration # 18 sytox_concentration_unit # 19 sample_id header_map = {} for row in input_fp_csvreader: if row[0] == "strain": headers = row for header_index, header in enumerate(headers): header_map[header] = header_index if "CFU" in header_map: is_cfu = True continue else: # Lookup experiment id, separate by measurement type if SampleConstants.EXPERIMENT_REFERENCE not in output_doc: if is_cfu: mt = SampleConstants.MT_CFU else: mt = SampleConstants.MT_FLOW output_doc[SampleConstants.EXPERIMENT_REFERENCE_URL] = row[ header_map["experiment_reference_url"]] # without measurement type - for filenames experiment_id_bak = row[header_map["experiment_id"]] output_doc[ SampleConstants.EXPERIMENT_ID] = namespace_experiment_id( experiment_id_bak + "_" + mt, lab) map_experiment_reference(config, output_doc) experiment_id = output_doc.get(SampleConstants.EXPERIMENT_ID) sample_doc = {} contents = [] strain = row[header_map["strain"]] replicate = row[header_map["replicate"]] treatment = row[header_map["treatment"]] sample_doc[SampleConstants.SAMPLE_ID] = namespace_sample_id( row[header_map["sample_id"]], lab, output_doc) sample_doc[ SampleConstants.REFERENCE_SAMPLE_ID] = namespace_sample_id( row[header_map["parent_id"]], lab, output_doc) sample_doc[SampleConstants.STRAIN] = create_mapped_name( experiment_id, strain, strain, lab, sbh_query, strain=True) sample_doc[SampleConstants.REPLICATE] = int(float(replicate)) m_time = None if len(treatment) > 0: treatment_concentration = row[ header_map["treatment_concentration"]] treatment_concentration_unit = row[ header_map["treatment_concentration_unit"]] if treatment == "heat": treatment_concentration_unit = treatment_concentration_unit.strip( ) if treatment_concentration_unit in ["C", "celsius"]: sample_doc[ SampleConstants.TEMPERATURE] = create_value_unit( treatment_concentration + ":celsius") else: raise ValueError("Unknown temperature {}".format( treatment_concentration_unit)) else: contents_append_value = create_media_component( experiment_id, treatment, treatment, lab, sbh_query, treatment_concentration + ":" + treatment_concentration_unit) contents.append(contents_append_value) treatment_time = row[header_map["treatment_time"]] treatment_time_unit = row[header_map["treatment_time_unit"]] # normalize to hours if treatment_time_unit in ["minute", "minutes"]: treatment_time = float(treatment_time) / 60.0 treatment_time_unit = "hour" if len(treatment_time_unit) > 0: m_time = create_value_unit( str(treatment_time) + ":" + treatment_time_unit) # controls strain_class = row[header_map["strain_class"]] control_type = row[header_map["control_type"]] if strain_class == "Control" and control_type == "Negative": sample_doc[SampleConstants. CONTROL_TYPE] = SampleConstants.CONTROL_EMPTY_VECTOR if strain_class == "Process": if control_type == SampleConstants.STANDARD_BEAD_FLUORESCENCE: sample_doc[ SampleConstants. STANDARD_TYPE] = SampleConstants.STANDARD_BEAD_FLUORESCENCE sample_doc[SampleConstants.STANDARD_ATTRIBUTES] = {} sample_doc[SampleConstants.STANDARD_ATTRIBUTES][ SampleConstants.BEAD_MODEL] = DEFAULT_BEAD_MODEL sample_doc[SampleConstants.STANDARD_ATTRIBUTES][ SampleConstants.BEAD_BATCH] = DEFAULT_BEAD_BATCH elif control_type == SampleConstants.STANDARD_BEAD_SIZE: sample_doc[ SampleConstants. STANDARD_TYPE] = SampleConstants.STANDARD_BEAD_SIZE # Styox if not is_cfu: sytox_color = row[header_map["sytox_color"]] if len(sytox_color) > 0: # concentration contents.append( create_media_component( experiment_id, "Sytox", "Sytox", lab, sbh_query, row[header_map["sytox_concentration"]] + ":" + row[header_map["sytox_concentration_unit"]])) #color sytox_color_content = create_media_component( experiment_id, "Sytox_color", "Sytox_color", lab, sbh_query) sytox_color_content["value"] = sytox_color contents.append(sytox_color_content) # Default Media yepd_media = create_media_component(experiment_id, "Media", "Media", lab, sbh_query) yepd_media["value"] = "YEPD" contents.append(yepd_media) if len(contents) > 0: sample_doc[SampleConstants.CONTENTS] = contents if not SampleConstants.TEMPERATURE in sample_doc: # default if not specified sample_doc[SampleConstants.TEMPERATURE] = create_value_unit( "22:celsius") measurement_doc = {} measurement_doc[SampleConstants.FILES] = [] if is_cfu: measurement_doc[ SampleConstants.MEASUREMENT_TYPE] = SampleConstants.MT_CFU else: measurement_doc[ SampleConstants.MEASUREMENT_TYPE] = SampleConstants.MT_FLOW measurement_doc[ SampleConstants.M_CHANNELS] = cytometer_channels # add default duke cytometer configuration if SampleConstants.CYTOMETER_CONFIG not in output_doc: output_doc[ SampleConstants. CYTOMETER_CONFIG] = duke_cytometer_configuration_object measurement_doc[ SampleConstants.MEASUREMENT_ID] = namespace_measurement_id( 1, lab, sample_doc, output_doc) measurement_doc[ SampleConstants. MEASUREMENT_GROUP_ID] = namespace_measurement_id( measurement_doc[SampleConstants.MEASUREMENT_TYPE] + "_1", lab, sample_doc, output_doc) if m_time is not None: measurement_doc[SampleConstants.TIMEPOINT] = m_time #CFU 305 #culture_cells_ml 2.33E+07 #estimated_cells_plated 583 #estimated_cells/ml 1.22E+07 #percent_killed 47.60% #date_of_experiment 6/10/20 cfu_data = {} doe = row[header_map["date_of_experiment"]] # excel trailing zeroes on strings: 20210430.0 if type(doe) == float: doe = str(int(doe)) if type(doe) == str and doe.endswith(".0"): doe = str(int(float(doe))) if is_cfu: if len(row[header_map["CFU"]]) > 0: cfu_data[headers[header_map["CFU"]]] = int( float(row[header_map["CFU"]])) cfu_data[headers[header_map["culture_cells/ml"]]] = int( float(row[header_map["culture_cells/ml"]])) cfu_data[headers[header_map["estimated_cells_plated"]]] = int( row[header_map["estimated_cells_plated"]]) cfu_data[headers[header_map["estimated_cells/ml"]]] = int( float(row[header_map["estimated_cells/ml"]])) cfu_data[headers[header_map["percent_killed"]]] = float( row[header_map["percent_killed"]]) cfu_data[headers[header_map[ "date_of_experiment"]]] = datetime.datetime.strptime( doe, doe_format).strftime(doe_format) else: #culture_cells/ml #date_of_experiment if len(row[header_map["culture_cells/ml"]]) > 0: cfu_data[headers[header_map["culture_cells/ml"]]] = int( float(row[header_map["culture_cells/ml"]])) cfu_data[headers[header_map[ "date_of_experiment"]]] = datetime.datetime.strptime( doe, doe_format).strftime(doe_format) measurement_doc["cfu_data"] = cfu_data file_id = namespace_file_id(1, lab, measurement_doc, output_doc) if is_cfu: file_type = SampleConstants.infer_file_type(input_file) measurement_doc[SampleConstants.FILES].append({ SampleConstants.M_NAME: experiment_id_bak + "__cfu_and_meta.csv", SampleConstants.M_TYPE: file_type, SampleConstants.M_LAB_LABEL: [SampleConstants.M_LAB_LABEL_RAW], SampleConstants.FILE_ID: file_id, SampleConstants.FILE_LEVEL: SampleConstants.F_LEVEL_0 }) else: filename = row[header_map["fcs_filename"]] file_type = SampleConstants.infer_file_type(filename) measurement_doc[SampleConstants.FILES].append({ SampleConstants.M_NAME: filename, SampleConstants.M_TYPE: file_type, SampleConstants.M_LAB_LABEL: [SampleConstants.M_LAB_LABEL_RAW], SampleConstants.FILE_ID: file_id, SampleConstants.FILE_LEVEL: SampleConstants.F_LEVEL_0 }) if SampleConstants.MEASUREMENTS not in sample_doc: sample_doc[SampleConstants.MEASUREMENTS] = [] sample_doc[SampleConstants.MEASUREMENTS].append(measurement_doc) output_doc[SampleConstants.SAMPLES].append(sample_doc) try: validate(output_doc, schema) if output is True or output_file is not None: if output_file is None: path = os.path.join("output/duke_haase", os.path.basename(input_file)) else: path = output_file if path.endswith(".csv"): path = path[:-4] + ".json" with open(path, 'w') as outfile: json.dump(output_doc, outfile, indent=4) return True except ValidationError as err: if enforce_validation: if verbose: print("Schema Validation Error: {0}\n".format(err)) raise ValidationError("Schema Validation Error", err) else: if verbose: print("Schema Validation Error: {0}\n".format(err)) return False return False
def format_expose(instance): if isinstance(instance, six.string_types): if not re.match(VALID_EXPOSE_FORMAT, instance): raise ValidationError("should be of the format 'PORT[/PROTOCOL]'") return True
def _check_higher_from_n_min_drive_set(n, n_min_drive_set): if n < n_min_drive_set: raise ValidationError( f"Must be higher than `{m.n_min_drive_set}`({n_min_drive_set})!")
def checkPlusAttributeConsistency(inDict): if 'confidentiality' in inDict.get('plus', {}).get('attribute', {}): if 'confidentiality' not in inDict.get('attribute', {}): raise ValidationError( "plus.attribute.confidentiality present but confidentiality is not an affected attribute." )
import json, sys, io from jsonschema import validate, ValidationError schema = json.load(io.open('schema.json', encoding='utf-8')) seen_ids = set() for file in sys.argv[1:]: source = json.load(io.open(file, encoding='utf-8')) try: validate(source, schema) id = source['properties']['id'] if id in seen_ids: raise ValidationError('Id %s used multiple times' % id) seen_ids.add(id) sys.stdout.write('.') sys.stdout.flush() except ValidationError as e: print(file) raise print('')
def checkSocialIntegrity(inDict): if 'social' in inDict['action']: if 'Alter behavior' not in inDict.get('attribute',{}).get('integrity',{}).get('variety',[]): raise ValidationError("acton.social present, but Alter behavior not in attribute.integrity.variety") return True
if not os.path.exists(filename): logger.debug("{} does not exist, skip".format(filename)) continue try: ## dict_raise_on_duplicates raises error on duplicate keys in geojson source = json.load(io.open(filename, encoding='utf-8'), object_pairs_hook=dict_raise_on_duplicates) ## jsonschema validate validator.validate(source, schema) sourceid = source['properties']['id'] if sourceid in seen_ids: raise ValidationError('Id %s used multiple times' % sourceid) seen_ids.add(sourceid) ## {z} instead of {zoom} if '{z}' in source['properties']['url']: raise ValidationError('{z} found instead of {zoom} in tile url') ## Check for license url. Too many missing to mark as required in schema. if 'license_url' not in source['properties']: logger.debug("{} has no license_url".format(filename)) if 'attribution' not in source['properties']: logger.debug("{} has no attribution".format(filename)) ## Check for big fat embedded icons if 'icon' in source['properties']:
def validate_params(self): if not self.event.get('body'): raise ValidationError('Request parameter is required') validate(self.params, self.get_schema())
def _check_harvested(record): """Harvested document cannot be linked to an order line.""" related_document = record.document if related_document and related_document.harvested: msg = _('Cannot link to an harvested document') raise ValidationError(msg)
def validate(json_to_validate, schema): validator = Draft7Validator(schema, format_checker=format_checker) errors = list(validator.iter_errors(json_to_validate)) if errors.__len__() > 0: raise ValidationError(build_error_message(errors)) return json_to_validate
def _maybe_validate_schema(instance: Dict[str, Any], schema: Dict[str, Any], validate_schema: bool) -> None: if validate_schema: try: jsonschema.validate(instance, schema) except TypeError: raise ValidationError("Invalid schema")
def validate_schema_postage(instance): if isinstance(instance, str): if instance not in ["first", "second"]: raise ValidationError("invalid. It must be either first or second.") return True
def validate_content_type(swagger: Mapping, content_type: str): consumes = swagger.get('consumes') if consumes and not any(content_type == consume for consume in consumes): raise ValidationError( message='Unsupported content type: {}'.format(content_type))
'memory_swap': 'memswap_limit', 'port': 'ports', 'privilege': 'privileged', 'priviliged': 'privileged', 'privilige': 'privileged', 'volume': 'volumes', 'workdir': 'working_dir', } VALID_NAME_CHARS = '[a-zA-Z0-9\._\-]' @FormatChecker.cls_checks( format="ports", raises=ValidationError( "Invalid port formatting, it should be '[[remote_ip:]remote_port:]port[/protocol]'" )) def format_ports(instance): try: split_port(instance) except ValueError: return False return True def validate_service_names(func): @wraps(func) def func_wrapper(config): for service_name in config.keys(): if type(service_name) is int: raise ConfigurationError(
def checkSecurityIncident(inDict): if inDict['security_incident'] == "Confirmed": if 'attribute' not in inDict: raise ValidationError("security_incident Confirmed but attribute section not present") return True
def checkMisuseActor(inDict): if 'misuse' in inDict['action'] and 'internal' not in inDict[ 'actor'] and 'partner' not in inDict['actor']: yield ValidationError( "Misuse in action, but no internal or partner actor defined. Per VERIS issue #229, there should always be an internal or partner actor if there is a misuse action." )
def checkMalwareIntegrity(inDict): if 'malware' in inDict['action']: if 'Software installation' not in inDict.get('attribute',{}).get('integrity',{}).get('variety',[]): raise ValidationError("Malware present, but no Software installation in attribute.integrity.variety") return True
def checkYear(inDict): if inDict.get('plus', {}).get('dbir_year', None): dbir_year = inDict['plus']['dbir_year'] nyear = inDict.get('plus', {}).get('timeline', {}).get('notification', {}).get('year', None) nmonth = inDict.get('plus', {}).get('timeline', {}).get('notification', {}).get('month', None) nday = inDict.get('plus', {}).get('timeline', {}).get('notification', {}).get('day', None) iyear = inDict.get('timeline', {}).get('incident', {}).get('year', None) imonth = inDict.get('timeline', {}).get('incident', {}).get('month', None) iday = inDict.get('timeline', {}).get('incident', {}).get('day', None) discovered = inDict.get('timeline', {}).get('discovered', {}).get('unit', "(no discovery unit)") if nyear is not None: source = "notification" tyear = nyear tmonth = nmonth else: tyear = iyear tmonth = imonth source = "incident" if tyear >= dbir_year: yield ValidationError( "DBIR year of {0} from {5} runs from Nov 1, {1} to Oct 31, {2}. Incident year {3} and month {4} is too late to be in this DBIR year." .format(dbir_year, dbir_year - 2, dbir_year - 1, tyear, tmonth, source)) if tyear == dbir_year - 1: if tmonth is not None and tmonth > 10: yield ValidationError( "DBIR year of {0} from {5} runs from Nov 1, {1} to Oct 31, {2}. Incident year {3} and month {4} is too late to be in this DBIR year." .format(dbir_year, dbir_year - 2, dbir_year - 1, tyear, tmonth, source)) elif tyear == dbir_year - 2: if tmonth is not None and tmonth < 11: if discovered in ["Months", "Years"]: yield ValidationError( "DBIR year of {0} from {5} runs from Nov 1, {1} to Oct 31, {2}. Incident year {3}, month {4}, and discovery unit {6} is before this range." .format(dbir_year, dbir_year - 2, dbir_year - 1, tyear, tmonth, source, discovered)) else: if discovered != "Years": yield ValidationError( "DBIR year of {0} from {4} runs from Nov 1, {1} to Oct 31, {2}. Incident year {3} and discovery unit {5} is before this range." .format(dbir_year, dbir_year - 2, dbir_year - 1, tyear, source, discovered)) # check if incident or notification dates are in future if nyear is not None: try: ndate = date(*[x if x else 1 for x in [nyear, nmonth, nday]]) except ValueError as e: yield ValidationError( "Problem with notification date: {0}".format(e)) if ndate > date.today(): yield ValidationError( "Notification date {0} is greater than today's date {1}.". format(ndate, date.today())) try: idate = date(*[x if x else 1 for x in [iyear, imonth, iday]]) except ValueError as e: yield ValidationError("Problem with incident date: {0}".format(e)) if idate > date.today(): yield ValidationError( "Incident date {0} is greater than today's date {1}.".format( idate, date.today())) if nyear is not None and idate > ndate: yield ValidationError( "Notification date {0} appears to be earlier than incident date {1}. This may be due to incomplete dates." .format(ndate, idate))
cycle = cycler.PhaseMarker().add_phase_markers(cb.cycle, cb.V, cb.A) n_stops = (cycle["stop"].astype(int).diff() < 0).sum(None) n_decels = (cycle["decel"].astype(int).diff() < 0).sum(None) n_stopdecels = (cycle["stopdecel"].astype(int).diff() < 0).sum(None) assert n_stopdecels < n_decels ## The initial stop has no deceleration before it BUT no diff >0 either! assert n_stopdecels == n_stops @pytest.mark.parametrize( "wltc_class, t_cold_end, err", zip( range(4), (800, 150), ( ValidationError("before the 1st cycle-part"), ValidationError("on a cycle stop"), ), ), ) def test_validate_t_start(wltc_class, t_cold_end, err): """ .. TODO:: move `t_cold_end` check in validations pipeline. """ V = datamodel.get_class_v_cycle(wltc_class) wltc_parts = datamodel.get_class_parts_limits(wltc_class) cb = CycleBuilder(V) cb.cycle = cycler.PhaseMarker().add_phase_markers(cb.cycle, cb.V, cb.A) with pytest.raises(type(err), match=str(err)): for err in cb.validate_nims_t_cold_end(t_cold_end, wltc_parts):
def convert_tulane(schema, encoding, input_file, verbose=True, output=True, output_file=None, config={}, enforce_validation=True, reactor=None): if reactor is not None: helper = AgaveHelper(reactor.client) print("Helper loaded") else: print("Helper not loaded") # for SBH Librarian Mapping sbh_query = SynBioHubQuery(SD2Constants.SD2_SERVER) sbh_query.login(config["sbh"]["user"], config["sbh"]["password"]) tulane_doc = json.load(open(input_file, encoding=encoding)) output_doc = {} lab = SampleConstants.LAB_TULANE original_experiment_id = tulane_doc[SampleConstants.EXPERIMENT_ID] output_doc[SampleConstants.EXPERIMENT_ID] = namespace_experiment_id( original_experiment_id, lab) output_doc[SampleConstants.CHALLENGE_PROBLEM] = tulane_doc[ SampleConstants.CHALLENGE_PROBLEM] output_doc[SampleConstants.EXPERIMENT_REFERENCE_URL] = tulane_doc[ SampleConstants.EXPERIMENT_REFERENCE] map_experiment_reference(config, output_doc) output_doc[SampleConstants.LAB] = lab output_doc[SampleConstants.SAMPLES] = [] samples_w_data = 0 if SampleConstants.CYTOMETER_CONFIG in tulane_doc: output_doc[SampleConstants.CYTOMETER_CONFIG] = tulane_doc[ SampleConstants.CYTOMETER_CONFIG] cytometer_channels = [] for channel in output_doc[ SampleConstants.CYTOMETER_CONFIG]['channels']: cytometer_channels.append(channel['name']) for tulane_sample in tulane_doc["tulane_samples"]: sample_doc = {} sample_id = tulane_sample["sample_id"] sample_doc[SampleConstants.SAMPLE_ID] = namespace_sample_id( sample_id, lab, output_doc) sample_doc[SampleConstants.LAB_SAMPLE_ID] = namespace_sample_id( sample_id, lab, None) if SampleConstants.STRAIN in tulane_sample: strain = tulane_sample[SampleConstants.STRAIN] sample_doc[SampleConstants.STRAIN] = create_mapped_name( original_experiment_id, strain, strain, lab, sbh_query, strain=False) if SampleConstants.CONTROL_TYPE in tulane_sample: sample_doc[SampleConstants.CONTROL_TYPE] = tulane_sample[ SampleConstants.CONTROL_TYPE] if SampleConstants.CONTROL_CHANNEL in tulane_sample: sample_doc[SampleConstants.CONTROL_CHANNEL] = tulane_sample[ SampleConstants.CONTROL_CHANNEL] measurement_counter = 1 for file in tulane_sample[SampleConstants.FILES]: measurement_doc = {} measurement_doc[SampleConstants.FILES] = [] measurement_type = file[SampleConstants.M_TYPE] file_name = file[SampleConstants.M_NAME] # same logic as uploads manager file_name = safen_filename(file_name) measurement_doc[ SampleConstants.MEASUREMENT_TYPE] = measurement_type # apply channels, if nothing mapped if measurement_type == SampleConstants.MT_FLOW: if SampleConstants.M_CHANNELS not in measurement_doc: measurement_doc[ SampleConstants.M_CHANNELS] = cytometer_channels # append the type so we have a distinct id per actual grouped measurement typed_measurement_id = '.'.join( [str(measurement_counter), measurement_type]) # generate a measurement id unique to this sample measurement_doc[ SampleConstants.MEASUREMENT_ID] = namespace_measurement_id( str(measurement_counter), output_doc[SampleConstants.LAB], sample_doc, output_doc) # record a measurement grouping id to find other linked samples and files measurement_doc[SampleConstants. MEASUREMENT_GROUP_ID] = namespace_measurement_id( typed_measurement_id, output_doc[SampleConstants.LAB], sample_doc, output_doc) file_type = SampleConstants.infer_file_type(file_name) file_name_final = file_name if file_name.startswith('s3') or file_name.count("/") >= 2: file_name_final = file_name.split(original_experiment_id)[-1] if file_name_final.startswith("/"): file_name_final = file_name_final[1:] measurement_doc[SampleConstants.FILES].append({ SampleConstants.M_NAME: file_name_final, SampleConstants.M_TYPE: file_type, SampleConstants.M_LAB_LABEL: [SampleConstants.M_LAB_LABEL_RAW], # measurements and files here are 1:1 SampleConstants.FILE_ID: namespace_file_id("1", output_doc[SampleConstants.LAB], measurement_doc, output_doc), SampleConstants.FILE_LEVEL: SampleConstants.F_LEVEL_0 }) if SampleConstants.MEASUREMENTS not in sample_doc: sample_doc[SampleConstants.MEASUREMENTS] = [] sample_doc[SampleConstants.MEASUREMENTS].append(measurement_doc) samples_w_data = samples_w_data + 1 #print('sample {} / measurement {} contains {} files'.format(sample_doc[SampleConstants.SAMPLE_ID], file_name, len(measurement_doc[SampleConstants.FILES]))) measurement_counter = measurement_counter + 1 if SampleConstants.MEASUREMENTS not in sample_doc: sample_doc[SampleConstants.MEASUREMENTS] = [] output_doc[SampleConstants.SAMPLES].append(sample_doc) print('Samples in file: {}'.format(len(tulane_doc))) print('Samples with data: {}'.format(samples_w_data)) try: validate(output_doc, schema) # if verbose: # print(json.dumps(output_doc, indent=4)) if output is True or output_file is not None: if output_file is None: path = os.path.join("output/tulane", os.path.basename(input_file)) else: path = output_file with open(path, 'w') as outfile: json.dump(output_doc, outfile, indent=4) return True except ValidationError as err: if enforce_validation: if verbose: print("Schema Validation Error: {0}\n".format(err)) raise ValidationError("Schema Validation Error", err) else: if verbose: print("Schema Validation Error: {0}\n".format(err)) return False
print(engine.parse_wot(wot)) @pytest.mark.parametrize( "wot, n_idle, n_rated, p_rated, err", [ # ([[1, 2], [3, 4]], None, None, None, ValueError("Too few points in wot")), ( { "p": _P, "n": _N }, None, None, 92, ValidationError( re.escape("`p_wot_max`(78) much lower than p_rated(92)!")), ), ( { "p": _P, "n": _N }, None, None, 22, ValidationError( re.escape("`p_wot_max`(78) much bigger than p_rated(22)!")), ), ], ) def test_validate_wot_errors(mdl, wot, n_idle, n_rated, p_rated, err):
borkenbuild = False spacesave = 0 for filename in arguments.path: try: ## dict_raise_on_duplicates raises error on duplicate keys in geojson source = json.load(io.open(filename, encoding='utf-8'), object_pairs_hook=dict_raise_on_duplicates) ## jsonschema validate validator.validate(source, schema) sourceid = source['properties']['id'] if sourceid in seen_ids: raise ValidationError('Id %s used multiple times' % sourceid) seen_ids.add(sourceid) ## {z} instead of {zoom} if '{z}' in source['properties']['url']: raise ValidationError('{z} found instead of {zoom} in tile url') if 'license' in source['properties']: license = source['properties']['license'] if not spdx_lookup.by_id(license): raise ValidationError('Unknown license %s' % license) else: logger.debug("{} has no license property".format(filename)) ## Check for license url. Too many missing to mark as required in schema. if 'license_url' not in source['properties']: logger.debug("{} has no license_url".format(filename))
if not filename.lower()[-8:] == ".geojson": logger.debug("{} is not a geojson file, skip".format(filename)) continue if not os.path.exists(filename): logger.debug("{} does not exist, skip".format(filename)) continue try: ## dict_raise_on_duplicates raises error on duplicate keys in geojson source = json.load(io.open(filename, encoding="utf-8"), object_pairs_hook=dict_raise_on_duplicates) except Exception as e: logger.exception(f"Could not parse file: {filename}: {e}") raise ValidationError(f"Could not parse file: {filename}: {e}") try: ## dict_raise_on_duplicates raises error on duplicate keys in geojson source = json.load(io.open(filename, encoding="utf-8"), object_pairs_hook=dict_raise_on_duplicates) ## jsonschema validate validator.validate(source, schema) sourceid = source["properties"]["id"] if sourceid in seen_ids: raise ValidationError("Id %s used multiple times" % sourceid) seen_ids.add(sourceid) ## {z} instead of {zoom}
def main(): rx = Reactor() m = AttrDict(rx.context.message_dict) if m == {}: try: jsonmsg = json.loads(rx.context.raw_message) m = jsonmsg except Exception: pass # ['event', 'agavejobs', 'create', 'delete'] action = "emptypost" try: for a in ["aloejobs", "event", "agavejobs"]: try: rx.logger.info("Testing against {} schema".format(a)) rx.validate_message(m, messageschema="/schemas/" + a + ".jsonschema", permissive=False) action = a break except Exception as exc: print("Validation error: {}".format(exc)) if action is None: pprint(m) raise ValidationError("Message did not a known schema") except Exception as vexc: rx.on_failure("Failed to process message", vexc) # rx.logger.debug("SCHEMA DETECTED: {}".format(action)) # store = PipelineJobStore(mongodb=rx.settings.mongodb) # Process the event # Get URL params from Abaco context # # These can be overridden by the event body or custom # code implemented to process the message. This has a # side effect of allowing the manager to process empty # POST bodies so long as the right values are presented # as URL params. # # cb_* variables are always overridden by the contents of # the POST body # cb_event_name = rx.context.get("event", None) cb_job_uuid = rx.context.get("uuid", None) cb_token = rx.context.get("token", "null") # Accept a 'note' as a URL parameter # TODO - urldecode the contents of 'note' cb_note = rx.context.get("note", "Event had no JSON payload") # NOTE - contents of cb_data will be overridden in create, event. aloejob cb_data = {"note": cb_note} # Accept 'status', the Aloe-centric name for job.state # as well as 'state' cb_agave_status = rx.context.get("status", rx.context.get("state", None)) # Prepare template PipelineJobsEvent event_dict = { "uuid": cb_job_uuid, "name": cb_event_name, "token": cb_token, "data": cb_data, } # This is the default message schema 'event' if action == "event": # Filter message and override values in event_dict with its contents for k in ["uuid", "name", "token", "data"]: event_dict[k] = m.get(k, event_dict.get(k)) # AgaveJobs can update the status of an existing job but cannot # create one. To do so, an Agave job must be launched # using the PipelineJobsAgaveProxy resource. if action == "agavejobs": rx.on_failure("Agave job callbacks are no longer supported") elif action == "aloejobs": try: # Aloe jobs POST their current JSON representation to # callback URL targets. The POST body contains a 'status' key. # If for some reason it doesn't, job status is determined by # the 'state' or 'status' URL parameter. if cb_agave_status is None: cb_agave_status = m.get("status", None) # Agave job message bodies include 'id' which is the jobId mes_agave_job_id = m.get("id", None) rx.logger.debug("aloe_status: {}".format(cb_agave_status)) if cb_agave_status is not None: cb_agave_status = cb_agave_status.upper() except Exception as exc: rx.on_failure( "Aloe callback POST and associated URL parameters were missing some required fields", exc, ) # If the job status is 'RUNNING' then use a subset of the POST for # event.data. Otherwise, create an event.data from the most recent # entry in the Agave job history. One small detail to note is that # callbacks are sent at the beginning of event processing in the # Agave jobs service and so a handful of fields in the job record # that are late bound are not yet populated when the event is sent. if cb_agave_status == "RUNNING": cb_data = minify_job_dict(dict(m)) else: cb_data = {"status": cb_agave_status} # Fetch latest history entry to put in event.data try: # Is there a better way than grabbing entire history that can # be implemented in a pure Agave call? Alternatively, we could # cache last offset for this job in rx.state but that will # limit our scaling to one worker # agave_job_latest_history = rx.client.jobs.getHistory( jobId=mes_agave_job_id, limit=100)[-1].get("description", None) if agave_job_latest_history is not None: cb_data["description"] = agave_job_latest_history except Exception as agexc: rx.logger.warning("Failed to get history for {}: {}".format( mes_agave_job_id, agexc)) # Map the Agave job status to an PipelineJobsEvent name if cb_event_name is None and cb_agave_status is not None: cb_event_name = AgaveEvents.agavejobs.get(cb_agave_status, "update") rx.logger.debug("Status: {} => Event: {}".format( cb_agave_status, cb_event_name)) # Event name and data can be updated as part of processing an Agave POST # so apply the current values to event_dict here event_dict["name"] = cb_event_name event_dict["data"] = cb_data # Sanity check event_dict and token if event_dict["uuid"] is None or event_dict[ "name"] is None or cb_token is None: rx.on_failure("No actionable event was received.") # Instantiate a job instance to leverage the MPJ framework store = ManagedPipelineJobInstance(rx.settings.mongodb, event_dict["uuid"], agave=rx.client) # Handle event... try: # First, proxy events. This code forwards index and indexed events to the jobs-indexer # Proxy 'index' if event_dict["name"] == "index": rx.logger.info("Forwarding 'index'") index_mes = { "name": "index", "uuid": event_dict["uuid"], "token": event_dict["token"], } rx.send_message(rx.settings.pipelines.job_indexer_id, index_mes, retryMaxAttempts=10) # Disable this since it should be picked up via events-manager subscription # message_control_annotator(up_job, ["INDEXING"], rx) # Proxy 'indexed' elif event_dict["name"] == "indexed": rx.logger.info("Forwarding 'indexed'") index_mes = { "name": "indexed", "uuid": event_dict["uuid"], "token": event_dict["token"], } rx.send_message(rx.settings.pipelines.job_indexer_id, index_mes, retryMaxAttempts=10) # Disable this since it should be picked up via events-manager subscription # message_control_annotator(up_job, ["FINISHED"], rx) # Handle all other events else: rx.logger.info("Handling '{}'".format(event_dict["name"])) # Get the current state of the MPJ. We use this to detect if # handling the event has resulted in a change of state store_state = store.state last_event = store.last_event # Send event at the beginning of state change so subscribers can pick # up, for instance, a case where the job receives an index event and # is in the FINISHED state. if rx.settings.state_enter: forward_event(event_dict["uuid"], event_dict['name'], store_state, {'last_event': last_event}, rx) up_job = store.handle(event_dict, cb_token) if rx.settings.state_exit: forward_event(up_job["uuid"], event_dict['name'], up_job["state"], {"last_event": up_job["last_event"]}, rx) except Exception as exc: rx.on_failure("Event not processed", exc) rx.on_success("Processed event in {} usec".format(rx.elapsed()))
def format_ports(instance): try: split_port(instance) except ValueError as e: raise ValidationError(six.text_type(e)) return True
def provisionList(items, database_name, overwrite=False, clear=False, skip_user_check=False): """Provisions a list of items according to their schema :param items: A list of provisionable items. :param database_name: :param overwrite: Causes existing items to be overwritten :param clear: Clears the collection first (Danger!) :param skip_user_check: Skips checking if a system user is existing already (for user provisioning) :return: """ log("Provisioning", items, database_name, lvl=debug) def get_system_user(): """Retrieves the node local system user""" user = objectmodels["user"].find_one({"name": "System"}) try: log("System user uuid: ", user.uuid, lvl=verbose) return user.uuid except AttributeError as system_user_error: log("No system user found:", system_user_error, lvl=warn) log( "Please install the user provision to setup a system user or " "check your database configuration", lvl=error, ) return False # TODO: Do not check this on specific objects but on the model (i.e. once) def needs_owner(obj): """Determines whether a basic object has an ownership field""" for privilege in obj._fields.get("perms", None): if "owner" in obj._fields["perms"][privilege]: return True return False import pymongo from isomer.database import objectmodels, dbhost, dbport, dbname database_object = objectmodels[database_name] log(dbhost, dbname) # TODO: Fix this to make use of the dbhost client = pymongo.MongoClient(dbhost, dbport) db = client[dbname] if not skip_user_check: system_user = get_system_user() if not system_user: return else: # TODO: Evaluate what to do instead of using a hardcoded UUID # This is usually only here for provisioning the system user # One way to avoid this, is to create (instead of provision) # this one upon system installation. system_user = "******" col_name = database_object.collection_name() if clear is True: log("Clearing collection for", col_name, lvl=warn) db.drop_collection(col_name) counter = 0 for no, item in enumerate(items): new_object = None item_uuid = item["uuid"] log("Validating object (%i/%i):" % (no + 1, len(items)), item_uuid, lvl=debug) if database_object.count({"uuid": item_uuid}) > 0: log("Object already present", lvl=warn) if overwrite is False: log("Not updating item", item, lvl=warn) else: log("Overwriting item: ", item_uuid, lvl=warn) new_object = database_object.find_one({"uuid": item_uuid}) new_object._fields.update(item) else: new_object = database_object(item) if new_object is not None: try: if needs_owner(new_object): if not hasattr(new_object, "owner"): log("Adding system owner to object.", lvl=verbose) new_object.owner = system_user except Exception as e: log("Error during ownership test:", e, type(e), exc=True, lvl=error) try: new_object.validate() new_object.save() counter += 1 except ValidationError as e: raise ValidationError( "Could not provision object: " + str(item_uuid), e) log("Provisioned %i out of %i items successfully." % (counter, len(items)))
def validate_n_rated_above_n_idle(n_idle_R, n_rated_R): if n_rated_R <= n_idle_R: raise ValidationError( f"{m.n_rated}({n_rated_R}) must be higher than {m.n_idle}({n_idle_R}!" )
def checkSQLiRepurpose(inDict): if 'SQLi' in inDict.get('action',{}).get('hacking',{}).get('variety',[]): if 'Repurpose' not in inDict.get('attribute',{}).get('integrity',{}).get('variety',[]): raise ValidationError("action.hacking.SQLi present but Repurpose not in attribute.integrity.variety") return True
def convert_caltech(schema, encoding, input_file, verbose=True, output=True, output_file=None, config={}, enforce_validation=True, reactor=None): if reactor is not None: helper = AgaveHelper(reactor.client) print("Helper loaded") else: print("Helper not loaded") # for SBH Librarian Mapping sbh_query = SynBioHubQuery(SD2Constants.SD2_SERVER) sbh_query.login(config["sbh"]["user"], config["sbh"]["password"]) # TODO sheet name may change? caltech_df = pandas.read_excel(input_file, sheet_name='IDs') output_doc = {} lab = SampleConstants.LAB_CALTECH output_doc[SampleConstants.LAB] = lab output_doc[SampleConstants.SAMPLES] = [] # We don't navtively know which experiment contains which columns - they can all be different # Idea: build up a map that relates column names to mapping functions # columns for exp exp_columns = {} # column functions exp_column_functions = {} # exp measurement type exp_mt = {} # exp measurement key exp_mk = {} # exp relative path to files exp_rel_path = {} # exp column units exp_column_units = {} # time exp_time = {} # temp exp_temp = {} # flow cytometer channels, configuration and controls exp_cytometer_channels = {} exp_cytometer_configuration = {} exp_negative_controls = {} exp_positive_controls = {} flow_1 = "20181009-top-4-A-B-cell-variants-A-B-sampling-exp-1" exp_columns[flow_1] = ["well", "a", "b", "ba ratio", "atc", "iptg"] exp_column_functions[flow_1] = [ SampleConstants.SAMPLE_ID, SampleConstants.STRAIN_CONCENTRATION, SampleConstants.STRAIN_CONCENTRATION, None, SampleConstants.REAGENT_CONCENTRATION, SampleConstants.REAGENT_CONCENTRATION ] exp_mt[flow_1] = [SampleConstants.MT_FLOW] exp_mk[flow_1] = ["0_flow"] exp_rel_path[flow_1] = ["0"] exp_time[flow_1] = ["0:hour"] exp_temp[flow_1] = ["37:celsius"] exp_cytometer_channels[flow_1] = [ "FSC-A", "SSC-A", "CFP/VioBlue-A", "GFP/FITC-A" ] exp_cytometer_configuration[ flow_1] = "agave://data-sd2e-projects.sd2e-project-21/ReedM-index/A_eq_B/20190214_A_eq_B_mar_1/20190214-A-B-mar-1-cc.json" exp_negative_controls[flow_1] = ["0/blank-RDM2019-02-14.0001.fcs"] exp_positive_controls[flow_1] = {} exp_positive_controls[flow_1]["CFP/VioBlue-A"] = [ "0/bfp-RDM2019-02-14.0001.fcs" ] exp_positive_controls[flow_1]["GFP/FITC-A"] = [ "0/yfp-RDM2019-02-14.0002.fcs" ] flow_2 = "20190214-A-B-mar-1" exp_columns[flow_2] = ["well", "iptg", "sal", "a", "b"] exp_column_functions[flow_2] = [ SampleConstants.SAMPLE_ID, SampleConstants.REAGENT_CONCENTRATION, SampleConstants.REAGENT_CONCENTRATION, SampleConstants.STRAIN_CONCENTRATION, SampleConstants.STRAIN_CONCENTRATION ] exp_mt[flow_2] = [SampleConstants.MT_FLOW, SampleConstants.MT_FLOW] exp_mk[flow_2] = ["0_flow", "18_flow"] exp_rel_path[flow_2] = ["0_flow", "18_flow"] exp_column_units[flow_2] = [None, "micromole", "micromole", None, None] exp_time[flow_2] = ["0:hour", "18:hour"] exp_temp[flow_2] = ["37:celsius", "37:celsius"] exp_cytometer_channels[flow_2] = [ "FSC-A", "SSC-A", "CFP/VioBlue-A", "GFP/FITC-A" ] exp_cytometer_configuration[ flow_2] = "agave://data-sd2e-projects.sd2e-project-21/ReedM-index/A_eq_B/20190214_A_eq_B_mar_1/20190214-A-B-mar-1-cc.json" exp_negative_controls[flow_2] = ["0_flow/blank-RDM2019-02-14.0001.fcs"] exp_positive_controls[flow_2] = {} exp_positive_controls[flow_2]["CFP/VioBlue-A"] = ["0_flow/A5.csv"] exp_positive_controls[flow_2]["GFP/FITC-A"] = [ "0_flow/yfp-RDM2019-02-14.0002.fcs" ] matched_exp_key = None matched_exp_cols = None matched_exp_functions = None header_row_values = list(caltech_df.columns.values) for exp_key in exp_columns: exp_col_list = exp_columns[exp_key] match_header = all( [header in header_row_values for header in exp_col_list]) if match_header: matched_exp_key = exp_key matched_exp_cols = exp_col_list matched_exp_functions = exp_column_functions[exp_key] break if matched_exp_key == None: raise ValueError( "Could not match caltech experiment headers {}".format(input_file)) # use the matched_exp_key as the reference output_doc[SampleConstants.EXPERIMENT_REFERENCE] = matched_exp_key map_experiment_reference(config, output_doc) # use matching exp key, e.g. 20181009-top-4-A-B-cell-variants-A--B-sampling-exp-1 output_doc[SampleConstants.EXPERIMENT_ID] = namespace_experiment_id( matched_exp_key, lab) replicate_count = {} for caltech_index, caltech_sample in caltech_df.iterrows(): measurement_key = exp_mk[matched_exp_key] for measurement_key_index, measurement_key_value in enumerate( measurement_key): # skip if this is a control skip = False sample_doc = {} contents = [] well_id = None value_string = "" for index, column_name in enumerate(matched_exp_cols): value = caltech_sample[column_name] function = matched_exp_functions[index] if function == SampleConstants.SAMPLE_ID: # 1:1 sample measurements sample_doc[ SampleConstants.SAMPLE_ID] = namespace_sample_id( value + "_" + str(measurement_key_index), lab, output_doc) well_id = value elif function == SampleConstants.STRAIN_CONCENTRATION: # add as reagent with concentration value # 'x' = not present/0 if value == 'x': value = 0 contents.append( create_media_component( output_doc.get(SampleConstants.EXPERIMENT_ID), column_name, column_name, lab, sbh_query, value)) # build up a string of values that define this sample value_string = value_string + str(value) elif function == SampleConstants.REAGENT_CONCENTRATION: if matched_exp_key in exp_column_units: unit = exp_column_units[matched_exp_key][index] value_unit = str(value) + ":" + str(unit) contents.append( create_media_component( output_doc.get(SampleConstants.EXPERIMENT_ID), column_name, column_name, lab, sbh_query, value_unit)) else: contents.append( create_media_component( output_doc.get(SampleConstants.EXPERIMENT_ID), column_name, column_name, lab, sbh_query, value)) value_string = value_string + str(value) elif function == None: # skip continue else: raise ValueError("Unknown function {}".format(function)) # have we seen this value before? if not value_string in replicate_count: replicate_count[value_string] = 0 sample_doc[SampleConstants.REPLICATE] = 0 else: replicate = replicate_count[value_string] replicate = replicate + 1 replicate_count[value_string] = replicate sample_doc[SampleConstants.REPLICATE] = replicate if len(contents) > 0: sample_doc[SampleConstants.CONTENTS] = contents measurement_doc = {} measurement_doc[SampleConstants.FILES] = [] measurement_doc[SampleConstants.MEASUREMENT_TYPE] = exp_mt[ matched_exp_key][measurement_key_index] measurement_doc[ SampleConstants.MEASUREMENT_NAME] = measurement_key_value # Fill in Flow information, if known if measurement_doc[SampleConstants. MEASUREMENT_TYPE] == SampleConstants.MT_FLOW: if matched_exp_key in exp_cytometer_channels: measurement_doc[ SampleConstants. M_CHANNELS] = exp_cytometer_channels[matched_exp_key] if matched_exp_key in exp_cytometer_configuration: measurement_doc[ SampleConstants. M_INSTRUMENT_CONFIGURATION] = exp_cytometer_configuration[ matched_exp_key] if matched_exp_key in exp_time: time = exp_time[matched_exp_key][measurement_key_index] measurement_doc[SampleConstants.TIMEPOINT] = create_value_unit( time) if SampleConstants.TEMPERATURE not in sample_doc: if matched_exp_key in exp_temp: temp = exp_temp[matched_exp_key][measurement_key_index] sample_doc[ SampleConstants.TEMPERATURE] = create_value_unit(temp) # generate a measurement id unique to this sample measurement_doc[ SampleConstants.MEASUREMENT_ID] = namespace_measurement_id( str(measurement_key_index + 1), output_doc[SampleConstants.LAB], sample_doc, output_doc) # record a measurement grouping id to find other linked samples and files measurement_doc[SampleConstants. MEASUREMENT_GROUP_ID] = namespace_measurement_id( measurement_key_value, output_doc[SampleConstants.LAB], sample_doc, output_doc) # sample id -> well name -> filename.csv? # TODO this may not hold fn_well = well_id + ".csv" if matched_exp_key in exp_negative_controls: for negative_control in exp_negative_controls[matched_exp_key]: if negative_control.endswith(fn_well): skip = True if matched_exp_key in exp_positive_controls: for positive_control_channel in exp_positive_controls[ matched_exp_key]: for positive_control in exp_positive_controls[ matched_exp_key][positive_control_channel]: if positive_control.endswith(fn_well): skip = True if skip: continue filename = os.path.join( exp_rel_path[matched_exp_key][measurement_key_index], fn_well) file_id = namespace_file_id(str(1), output_doc[SampleConstants.LAB], measurement_doc, output_doc) file_type = SampleConstants.infer_file_type(filename) measurement_doc[SampleConstants.FILES].append({ SampleConstants.M_NAME: filename, SampleConstants.M_TYPE: file_type, SampleConstants.M_LAB_LABEL: [SampleConstants.M_LAB_LABEL_RAW], SampleConstants.FILE_ID: file_id, SampleConstants.FILE_LEVEL: SampleConstants.F_LEVEL_0 }) if SampleConstants.MEASUREMENTS not in sample_doc: sample_doc[SampleConstants.MEASUREMENTS] = [] sample_doc[SampleConstants.MEASUREMENTS].append(measurement_doc) output_doc[SampleConstants.SAMPLES].append(sample_doc) # Add flow controls, if known if matched_exp_key in exp_negative_controls: for negative_control in exp_negative_controls[matched_exp_key]: create_flow_control_sample(negative_control, "negative flow control", \ exp_cytometer_channels[matched_exp_key], exp_cytometer_configuration[matched_exp_key], output_doc, \ True, False, None) if matched_exp_key in exp_positive_controls: for positive_control_channel in exp_positive_controls[matched_exp_key]: for positive_control in exp_positive_controls[matched_exp_key][ positive_control_channel]: create_flow_control_sample(positive_control, "positive flow control", \ exp_cytometer_channels[matched_exp_key], exp_cytometer_configuration[matched_exp_key], output_doc, \ False, True, positive_control_channel) try: validate(output_doc, schema) if output is True or output_file is not None: if output_file is None: path = os.path.join("output/caltech", os.path.basename(input_file)) else: path = output_file if path.endswith(".xlsx"): path = path[:-5] + ".json" with open(path, 'w') as outfile: json.dump(output_doc, outfile, indent=4) return True except ValidationError as err: if enforce_validation: if verbose: print("Schema Validation Error: {0}\n".format(err)) raise ValidationError("Schema Validation Error", err) else: if verbose: print("Schema Validation Error: {0}\n".format(err)) return False return False
def validate_schema_postage(instance): if isinstance(instance, str): if instance not in ["first", "second", "europe", "rest-of-world"]: raise ValidationError( "invalid. It must be first, second, europe or rest-of-world.") return True