def generate_feature_matrix(mapobject_type_id, ref_type): data = StringIO() w = csv.writer(data) with tm.utils.ExperimentSession(experiment_id) as session: results = _get_matching_layers(session, tpoint) layer_lut = dict() for r in results: layer_lut[r.id] = {'tpoint': r.tpoint, 'zplane': r.zplane} ref_position_lut = OrderedDict() if ref_type == 'Plate': results = _get_matching_plates(session, plate_name) for r in results: ref_position_lut[r.id] = { 'plate_name': r.plate_name, } metadata_names = [ 'plate_name' ] elif ref_type == 'Well': results = _get_matching_wells(session, plate_name, well_name) for r in results: ref_position_lut[r.id] = { 'plate_name': r.plate_name, 'well_name': r.well_name, } metadata_names = [ 'plate_name', 'well_name' ] elif ref_type == 'Site': results = _get_matching_sites( session, plate_name, well_name, well_pos_y, well_pos_x ) for r in results: ref_position_lut[r.id] = { 'well_pos_y': r.well_pos_y, 'well_pos_x': r.well_pos_x, 'plate_name': r.plate_name, 'well_name': r.well_name, } metadata_names = [ 'plate_name', 'well_name', 'well_pos_y', 'well_pos_x', 'tpoint', 'zplane', 'label', 'is_border' ] tool_results = session.query(tm.ToolResult.id, tm.ToolResult.name).\ filter_by(mapobject_type_id=mapobject_type_id).\ order_by(tm.ToolResult.id).\ all() tool_result_names = [t.name for t in tool_results] tool_result_ids = [t.id for t in tool_results] ref_mapobject_type = session.query(tm.MapobjectType.id).\ filter_by(ref_type=ref_type).\ order_by(tm.MapobjectType.id).\ first() w.writerow(tuple(['mapobject_id'] + metadata_names + tool_result_names)) yield data.getvalue() data.seek(0) data.truncate(0) for ref_id in ref_position_lut: logger.debug('collect metadata for %s %d', ref_type, ref_id) with tm.utils.ExperimentSession(experiment_id) as session: mapobjects = _get_mapobjects_at_ref_position( session, mapobject_type_id, ref_id, layer_lut.keys() ) mapobject_ids = [m.id for m in mapobjects] if not mapobject_ids: logger.warn( 'no mapobjects found for %s %d', ref_type, ref_id ) continue if ref_type == 'Site': border_segmentations = _get_border_mapobjects_at_ref_position( session, mapobject_ids, ref_mapobject_type.id, ref_id ) border_mapobject_ids = [ s.mapobject_id for s in border_segmentations ] label_values = session.query( tm.LabelValues.mapobject_id, tm.LabelValues.values ).\ filter(tm.LabelValues.mapobject_id.in_(mapobject_ids)).\ all() label_values_lut = dict(label_values) warn = True if not label_values_lut: warn = False rows = list() for mapobject_id, label, segmenation_layer_id in mapobjects: metadata_values = [ref_position_lut[ref_id]['plate_name']] if 'well_name' in ref_position_lut[ref_id]: metadata_values.append( ref_position_lut[ref_id]['well_name'] ) if 'well_pos_y' in ref_position_lut[ref_id]: metadata_values.extend([ str(ref_position_lut[ref_id]['well_pos_y']), str(ref_position_lut[ref_id]['well_pos_x']), ]) if layer_lut[segmenation_layer_id]['tpoint'] is not None: metadata_values.extend([ str(layer_lut[segmenation_layer_id]['tpoint']), str(layer_lut[segmenation_layer_id]['zplane']), str(label), str(1 if mapobject_id in border_mapobject_ids else 0) ]) if mapobject_id not in label_values_lut: if warn: logger.warn( 'no label values found for mapobject %d', mapobject_id ) metadata_values += [ str(np.nan) for x in xrange(len(tool_result_names)) ] else: vals = label_values_lut[mapobject_id] tool_result_values = list() for tid in tool_result_ids: try: v = vals[str(tid)] except KeyError: v = str(np.nan) tool_result_values.append(v) metadata_values += tool_result_values w.writerow(tuple([mapobject_id] + metadata_values)) yield data.getvalue() data.seek(0) data.truncate(0)
def generate_feature_matrix(mapobject_type_id, ref_type): data = StringIO() w = csv.writer(data) with tm.utils.ExperimentSession(experiment_id) as session: results = _get_matching_layers(session, tpoint) layer_lut = dict() for r in results: layer_lut[r.id] = {'tpoint': r.tpoint, 'zplane': r.zplane} ref_position_lut = OrderedDict() if ref_type == 'Plate': results = _get_matching_plates(session, plate_name) for r in results: ref_position_lut[r.id] = { 'plate_name': r.plate_name, } metadata_names = ['plate_name'] elif ref_type == 'Well': results = _get_matching_wells(session, plate_name, well_name) for r in results: ref_position_lut[r.id] = { 'plate_name': r.plate_name, 'well_name': r.well_name, } metadata_names = ['plate_name', 'well_name'] elif ref_type == 'Site': results = _get_matching_sites(session, plate_name, well_name, well_pos_y, well_pos_x) for r in results: ref_position_lut[r.id] = { 'well_pos_y': r.well_pos_y, 'well_pos_x': r.well_pos_x, 'plate_name': r.plate_name, 'well_name': r.well_name, } metadata_names = [ 'plate_name', 'well_name', 'well_pos_y', 'well_pos_x', 'tpoint', 'zplane', 'label', 'is_border' ] tool_results = session.query(tm.ToolResult.id, tm.ToolResult.name).\ filter_by(mapobject_type_id=mapobject_type_id).\ order_by(tm.ToolResult.id).\ all() tool_result_names = [t.name for t in tool_results] tool_result_ids = [t.id for t in tool_results] ref_mapobject_type = session.query(tm.MapobjectType.id).\ filter_by(ref_type=ref_type).\ order_by(tm.MapobjectType.id).\ first() w.writerow(tuple(['mapobject_id'] + metadata_names + tool_result_names)) yield data.getvalue() data.seek(0) data.truncate(0) for ref_id in ref_position_lut: logger.debug('collect metadata for %s %d', ref_type, ref_id) with tm.utils.ExperimentSession(experiment_id) as session: mapobjects = _get_mapobjects_at_ref_position( session, mapobject_type_id, ref_id, layer_lut.keys()) mapobject_ids = [m.id for m in mapobjects] if not mapobject_ids: logger.warn('no mapobjects found for %s %d', ref_type, ref_id) continue if ref_type == 'Site': border_segmentations = _get_border_mapobjects_at_ref_position( session, mapobject_ids, ref_mapobject_type.id, ref_id) border_mapobject_ids = [ s.mapobject_id for s in border_segmentations ] label_values = session.query( tm.LabelValues.mapobject_id, tm.LabelValues.values ).\ filter(tm.LabelValues.mapobject_id.in_(mapobject_ids)).\ all() label_values_lut = dict(label_values) warn = True if not label_values_lut: warn = False rows = list() for mapobject_id, label, segmenation_layer_id in mapobjects: metadata_values = [ref_position_lut[ref_id]['plate_name']] if 'well_name' in ref_position_lut[ref_id]: metadata_values.append( ref_position_lut[ref_id]['well_name']) if 'well_pos_y' in ref_position_lut[ref_id]: metadata_values.extend([ str(ref_position_lut[ref_id]['well_pos_y']), str(ref_position_lut[ref_id]['well_pos_x']), ]) if layer_lut[segmenation_layer_id]['tpoint'] is not None: metadata_values.extend([ str(layer_lut[segmenation_layer_id]['tpoint']), str(layer_lut[segmenation_layer_id]['zplane']), str(label), str(1 if mapobject_id in border_mapobject_ids else 0) ]) if mapobject_id not in label_values_lut: if warn: logger.warn( 'no label values found for mapobject %d', mapobject_id) metadata_values += [ str(np.nan) for x in xrange(len(tool_result_names)) ] else: vals = label_values_lut[mapobject_id] tool_result_values = list() for tid in tool_result_ids: try: v = vals[str(tid)] except KeyError: v = str(np.nan) tool_result_values.append(v) metadata_values += tool_result_values w.writerow(tuple([mapobject_id] + metadata_values)) yield data.getvalue() data.seek(0) data.truncate(0)
def generate_feature_matrix(mapobject_type_id, ref_type): data = StringIO() w = csv.writer(data) with tm.utils.ExperimentSession(experiment_id) as session: results = _get_matching_layers(session, tpoint) layer_lut = dict() for r in results: layer_lut[r.id] = {'tpoint': r.tpoint, 'zplane': r.zplane} if ref_type == 'Plate': results = _get_matching_plates(session, plate_name) elif ref_type == 'Well': results = _get_matching_wells(session, plate_name, well_name) elif ref_type == 'Site': results = _get_matching_sites( session, plate_name, well_name, well_pos_y, well_pos_x ) ref_ids = [r.id for r in results] features = session.query(tm.Feature.name).\ filter_by(mapobject_type_id=mapobject_type_id).\ order_by(tm.Feature.id).\ all() feature_names = [f.name for f in features] ref_mapobject_type = session.query(tm.MapobjectType.id).\ filter_by(ref_type=ref_type, id=mapobject_type_id).\ one() w.writerow(tuple(['mapobject_id'] + feature_names)) yield data.getvalue() data.seek(0) data.truncate(0) for ref_id in ref_ids: logger.debug('collect feature values for %s %d', ref_type, ref_id) with tm.utils.ExperimentSession(experiment_id) as session: mapobjects = _get_mapobjects_at_ref_position( session, mapobject_type_id, ref_id, layer_lut.keys() ) mapobject_ids = [m.id for m in mapobjects] if not mapobject_ids: logger.warn( 'no mapobjects found for %s %d', ref_type, ref_id ) continue feature_values = session.query( tm.FeatureValues.mapobject_id, tm.FeatureValues.values ).\ filter(tm.FeatureValues.mapobject_id.in_(mapobject_ids)).\ all() feature_values_lut = dict(feature_values) if not feature_values_lut: logger.warn( 'no feature values found for %s %d', ref_type, ref_id ) continue for mapobject_id, label, segmentation_layer_id in mapobjects: if mapobject_id not in feature_values_lut: logger.warn( 'no feature values found for mapobject %d', mapobject_id ) w.writerow(tuple( [str(np.nan) for x in xrange(len(feature_names) + 1)] )) yield data.getvalue() data.seek(0) data.truncate(0) continue vals = feature_values_lut[mapobject_id] # Values must be sorted based on feature_id, such that they # end up in the correct column of the CSV table matching # the corresponding column names. # Feature IDs must be sorted as integers to get the # desired order. w.writerow(tuple([mapobject_id] + [ vals[k] for k in sorted(vals, key=lambda k: int(k)) ])) yield data.getvalue() data.seek(0) data.truncate(0)
def generate_feature_matrix(mapobject_type_id, ref_type): data = StringIO() w = csv.writer(data) with tm.utils.ExperimentSession(experiment_id) as session: results = _get_matching_layers(session, tpoint) layer_lut = dict() for r in results: layer_lut[r.id] = {'tpoint': r.tpoint, 'zplane': r.zplane} if ref_type == 'Plate': results = _get_matching_plates(session, plate_name) elif ref_type == 'Well': results = _get_matching_wells(session, plate_name, well_name) elif ref_type == 'Site': results = _get_matching_sites(session, plate_name, well_name, well_pos_y, well_pos_x) ref_ids = [r.id for r in results] features = session.query(tm.Feature.name).\ filter_by(mapobject_type_id=mapobject_type_id).\ order_by(tm.Feature.id).\ all() feature_names = [f.name for f in features] ref_mapobject_type = session.query(tm.MapobjectType.id).\ filter_by(ref_type=ref_type, id=mapobject_type_id).\ one() w.writerow(tuple(['mapobject_id'] + feature_names)) yield data.getvalue() data.seek(0) data.truncate(0) for ref_id in ref_ids: logger.debug('collect feature values for %s %d', ref_type, ref_id) with tm.utils.ExperimentSession(experiment_id) as session: mapobjects = _get_mapobjects_at_ref_position( session, mapobject_type_id, ref_id, layer_lut.keys()) mapobject_ids = [m.id for m in mapobjects] if not mapobject_ids: logger.warn('no mapobjects found for %s %d', ref_type, ref_id) continue feature_values = session.query( tm.FeatureValues.mapobject_id, tm.FeatureValues.values ).\ filter(tm.FeatureValues.mapobject_id.in_(mapobject_ids)).\ all() feature_values_lut = dict(feature_values) if not feature_values_lut: logger.warn('no feature values found for %s %d', ref_type, ref_id) continue for mapobject_id, label, segmentation_layer_id in mapobjects: if mapobject_id not in feature_values_lut: logger.warn('no feature values found for mapobject %d', mapobject_id) w.writerow( tuple([ str(np.nan) for x in xrange(len(feature_names) + 1) ])) yield data.getvalue() data.seek(0) data.truncate(0) continue vals = feature_values_lut[mapobject_id] # Values must be sorted based on feature_id, such that they # end up in the correct column of the CSV table matching # the corresponding column names. # Feature IDs must be sorted as integers to get the # desired order. w.writerow( tuple([mapobject_id] + [ vals[k] for k in sorted(vals, key=lambda k: int(k)) ])) yield data.getvalue() data.seek(0) data.truncate(0)