예제 #1
0
    def _build(self,
               **kwargs):

        # Manage parameters
        kwargs = kwargs.copy()
        # Propagate workspace name because we captured it on kwargs
        # kwargs['workspace_name'] = workspace_name
        # Force required families: Quetzal workspace must have the following
        # families: (nb: None means "latest" version)
        required_families = dict(
            iguazu=None,
            omind=None,
            standard=None,
            protocol=None
        )
        families = kwargs.get('families', {}) or {}  # Could be None by default args
        for name in required_families:
            families.setdefault(name, required_families[name])
        kwargs['families'] = families

        # When the query is set by kwargs, leave the query and dialect as they
        # come. Otherwise, set to the default defined just above
        if not kwargs.get('query', None):
            kwargs['query'] = self.DEFAULT_QUERY
            kwargs['dialect'] = 'postgresql_json'

        # Manage connections to other flows
        dataset_flow = GenericDatasetFlow(**kwargs)
        self.update(dataset_flow)
        features_files = dataset_flow.terminal_tasks().pop()

        # instantiate tasks. Use separate tasks for a classic ETL approach:
        # E: read features from HDF5 file
        # T and L: merge features into a single dataframe, then save as CSV
        read_features = LoadDataframe(
            key='/iguazu/features/survey_meta',
        )
        merge_features = MergeDataframes(
            filename='surveys_summary.csv',
            path='datasets',
        )

        notify = SlackTask(message='VR surveys features summarization finished!')

        with self:
            feature_dataframes = read_features.map(file=features_files)
            merged_dataframe = merge_features(parents=features_files, dataframes=feature_dataframes)
            # Send slack notification
            notify(upstream_tasks=[merged_dataframe])
예제 #2
0
    def _build(self, *, base_url=DEFAULT_BASE_URL, form_id=None, **kwargs):
        required_families = dict(
            iguazu=None,
            omind=None,
            protocol=None,
            standard=None,
        )
        families = kwargs.get('families', {}) or {
        }  # Could be None by default args
        for name in required_families:
            families.setdefault(name, required_families[name])
        kwargs['families'] = families

        # When the query is set by kwargs, leave the query and dialect as they
        # come. Otherwise, set to the default defined just above
        if not kwargs.get('query', None):
            kwargs['query'] = self.DEFAULT_QUERY
            kwargs['dialect'] = 'postgresql_json'

        # First part of this flow: obtain a dataset of files
        dataset_flow = GenericDatasetFlow(**kwargs)

        json_files = dataset_flow.terminal_tasks().pop()
        self.update(dataset_flow)

        create_flow_metadata = CreateFlowMetadata(flow_name=self.REGISTRY_NAME)
        read_json = LoadJSON()
        read_form = GetForm(form_id=form_id, base_url=base_url)
        extract_scores = ExtractScores(
            output_hdf5_key='/iguazu/features/typeform/subject', )
        # TODO: propagate metadata when the branch that has that task is merged
        propagate_metadata = PropagateMetadata(
            propagate_families=['omind', 'protocol'])
        update_flow_metadata = UpdateFlowMetadata(flow_name=self.REGISTRY_NAME)

        with self:
            create_noresult = create_flow_metadata.map(parent=json_files)
            form = read_form()
            responses = read_json.map(file=json_files,
                                      upstream_tasks=[create_noresult])
            scores = extract_scores.map(parent=json_files,
                                        response=responses,
                                        form=unmapped(form))
            scores_with_metadata = propagate_metadata.map(parent=json_files,
                                                          child=scores)
            _ = update_flow_metadata.map(parent=json_files,
                                         child=scores_with_metadata)
예제 #3
0
 def click_options():
     return GenericDatasetFlow.click_options() + (
         click.option('--base-url',
                      required=False,
                      type=click.STRING,
                      default=DEFAULT_BASE_URL,
                      help='Base URL for the typeform API.'),
         click.option('--form-id',
                      required=False,
                      type=click.STRING,
                      help='ID of the form (questionnaire) on typeform.'),
     )
예제 #4
0
    def _build(self, **kwargs):
        required_families = dict(
            iguazu=None,
            omind=None,
            protocol=None,
            standard=None,
        )
        families = kwargs.get('families', {}) or {
        }  # Could be None by default args
        for name in required_families:
            families.setdefault(name, required_families[name])
        kwargs['families'] = families

        # When the query is set by kwargs, leave the query and dialect as they
        # come. Otherwise, set to the default defined just above
        if not kwargs.get('query', None):
            kwargs['query'] = self.DEFAULT_QUERY
            kwargs['dialect'] = 'postgresql_json'

        # First part of this flow: obtain a dataset of files
        dataset_flow = GenericDatasetFlow(**kwargs)

        features_files = dataset_flow.terminal_tasks().pop()
        self.update(dataset_flow)

        read_features = LoadDataframe(
            key='/iguazu/features/typeform/subject', )
        merge_features = MergeDataframes(
            filename='typeform_summary.csv',
            path='datasets',
        )
        notify = SlackTask(message='Typeform feature summarization finished!')

        with self:
            feature_dataframes = read_features.map(file=features_files)
            merged_dataframe = merge_features(parents=features_files,
                                              dataframes=feature_dataframes)
            # Send slack notification
            notify(upstream_tasks=[merged_dataframe])
예제 #5
0
    def _build(self, **kwargs):
        # Force required families: Quetzal workspace must have the following
        # families: (nb: None means "latest" version)
        required_families = dict(
            iguazu=None,
            omind=None,
            standard=None,
            protocol=None
        )
        families = kwargs.get('families', {}) or {}  # Could be None by default args
        for name in required_families:
            families.setdefault(name, required_families[name])
        kwargs['families'] = families

        # When the query is set by kwargs, leave the query and dialect as they
        # come. Otherwise, set to the default defined just above
        if not kwargs.get('query', None):
            kwargs['query'] = self.DEFAULT_QUERY
            kwargs['dialect'] = 'postgresql_json'

        # The cardiac features flow requires an upstream dataset flow in order
        # to provide the input files. Create one and deduce the tasks to
        # plug the cardiac flow to the output of the dataset flow
        dataset_flow = GenericDatasetFlow(**kwargs)
        raw_signals = dataset_flow.terminal_tasks().pop()
        events = raw_signals
        self.update(dataset_flow)

        create_flow_metadata = CreateFlowMetadata(flow_name=self.REGISTRY_NAME)

        # Instantiate tasks
        survey_report = ExtractReportFeatures(
            events_hdf5_key='/iguazu/events/standard',
            output_hdf5_key='/iguazu/features/survey_report',
            graceful_exceptions=(NoSurveyReport,
                                 SoftPreconditionFailed)
        )
        survey_meta = ExtractMetaFeatures(
            features_hdf5_key='/iguazu/features/survey_report',
            output_hdf5_key='/iguazu/features/survey_meta'
        )

        propagate_metadata = PropagateMetadata(propagate_families=['omind', 'protocol'])

        update_flow_metadata = UpdateFlowMetadata(flow_name=self.REGISTRY_NAME)
        report = Report()

        notify = SlackTask(preamble='Survey feature extraction finished\n'
                                    'Task report:')

        with self:

            create_noresult = create_flow_metadata.map(parent=events)
            # Feature extraction
            features_reports = survey_report.map(events=events, upstream_tasks=[create_noresult])
            features_metas = survey_meta.map(features=features_reports, parent=raw_signals,
                                             upstream_tasks=[create_noresult])

            features_with_metadata = propagate_metadata.map(parent=raw_signals, child=features_metas)
            update_noresult = update_flow_metadata.map(parent=raw_signals, child=features_with_metadata)
            # Send slack notification
            message = report(files=features_with_metadata, upstream_tasks=[update_noresult])
            notify(message=message)

        logger.debug('Built flow %s with tasks %s', self, self.tasks)
예제 #6
0
 def click_options():
     return GenericDatasetFlow.click_options()
예제 #7
0
    def _build(self, **kwargs):
        required_families = dict(
            iguazu=None,
            omind=None,
            standard=None,
            protocol=None,
        )
        families = kwargs.get('families', {}) or {
        }  # Could be None by default args
        for name in required_families:
            families.setdefault(name, required_families[name])
        kwargs['families'] = families

        # When the query is set by kwargs, leave the query and dialect as they
        # come. Otherwise, set to the default defined just above
        if not kwargs.get('query', None):
            kwargs['query'] = self.DEFAULT_QUERY
            kwargs['dialect'] = 'postgresql_json'

        # First part of this flow: obtain a dataset of files
        dataset_flow = GenericDatasetFlow(**kwargs)

        raw_files = dataset_flow.terminal_tasks().pop()
        self.update(dataset_flow)
        create_flow_metadata = CreateFlowMetadata(flow_name=self.REGISTRY_NAME)
        standardize_events = ExtractStandardEvents(
            name='UnityToStandardEvents',
            events_hdf5_key='/unity/events/unity_events',
            output_hdf5_key='/iguazu/events/standard',
        )
        # filter_vr = FilterVRSequences()
        standardize_ppg_signals = ExtractNexusSignal(
            name='NexusToStandardPPG',
            signals_hfd5_key='/nexus/signal/nexus_signal_raw',
            output_hdf5_key='/iguazu/signal/ppg/standard',
            source_column='G',
            target_column='PPG',
        )
        standardize_gsr_signals = ExtractNexusGSRSignal(
            name='NexusToStandardGSR',
            signals_hfd5_key='/nexus/signal/nexus_signal_raw',
            output_hdf5_key='/iguazu/signal/gsr/standard',
            source_column='F',
            target_column='GSR',
        )
        standardize_pzt_signals = ExtractNexusSignal(
            name='NexusToStandardPZT',
            signals_hfd5_key='/nexus/signal/nexus_signal_raw',
            output_hdf5_key='/iguazu/signal/pzt/standard',
            source_column='H',
            target_column='PZT',
        )
        merge = MergeHDF5(
            suffix='_standard',
            temporary=False,
            verify_status=True,
            hdf5_family='standard',
            meta_keys=['standard'],
            propagate_families=['omind', 'protocol'],
        )
        update_flow_metadata = UpdateFlowMetadata(flow_name=self.REGISTRY_NAME)
        report = Report()
        notify = SlackTask(
            preamble='Standardization of VR flow status finished.\n'
            'Task report:')

        # Build flow
        with self:
            create_noresult = create_flow_metadata.map(parent=raw_files)
            standard_events = standardize_events.map(
                events=raw_files, upstream_tasks=[create_noresult])
            # vr_sequences = filter_vr.map(events=standard_events)
            standard_ppg = standardize_ppg_signals.map(
                signals=raw_files, upstream_tasks=[create_noresult])
            standard_gsr = standardize_gsr_signals.map(
                signals=raw_files, upstream_tasks=[create_noresult])
            standard_pzt = standardize_pzt_signals.map(
                signals=raw_files, upstream_tasks=[create_noresult])
            merged = merge.map(
                parent=raw_files,
                events=standard_events,
                PPG=standard_ppg,
                GSR=standard_gsr,
                PZT=standard_pzt,
            )
            update_noresult = update_flow_metadata.map(parent=raw_files,
                                                       child=merged)
            message = report(files=merged, upstream_tasks=[update_noresult])
            notify(message=message)

        logger.debug('Built flow %s with tasks %s', self, self.tasks)
예제 #8
0
    def _build(self, *, plot=False, **kwargs):
        # Force required families: Quetzal workspace must have the following
        # families: (nb: None means "latest" version)
        required_families = dict(iguazu=None,
                                 omind=None,
                                 standard=None,
                                 protocol=None)
        families = kwargs.get('families', {}) or {
        }  # Could be None by default args
        for name in required_families:
            families.setdefault(name, required_families[name])
        kwargs['families'] = families

        # When the query is set by kwargs, leave the query and dialect as they
        # come. Otherwise, set to the default defined just above
        if not kwargs.get('query', None):
            kwargs['query'] = self.DEFAULT_QUERY
            kwargs['dialect'] = 'postgresql_json'

        # The cardiac features flow requires an upstream dataset flow in order
        # to provide the input files. Create one and deduce the tasks to
        # plug the cardiac flow to the output of the dataset flow
        dataset_flow = GenericDatasetFlow(**kwargs)
        raw_signals = dataset_flow.terminal_tasks().pop()
        events = raw_signals
        self.update(dataset_flow)
        create_flow_metadata = CreateFlowMetadata(flow_name=self.REGISTRY_NAME)

        # Instantiate tasks
        clean = CleanPPGSignal(
            signals_hdf5_key='/iguazu/signal/ppg/standard',
            output_hdf5_key='/iguazu/signal/ppg/clean',
        )
        detect_peaks = SSFPeakDetect(
            signals_hdf5_key='/iguazu/signal/ppg/clean',
            ssf_output_hdf5_key='/iguazu/signal/ppg/ssf',
            nn_output_hdf5_key='/iguazu/signal/ppg/NN',
            nni_output_hdf5_key='/iguazu/signal/ppg/NNi',
        )
        extract_features = ExtractHRVFeatures(
            nn_hdf5_key='/iguazu/signal/ppg/NN',
            nni_hdf5_key='/iguazu/signal/ppg/NNi',
            output_hdf5_key='/iguazu/features/ppg/sequence',
        )
        propagate_metadata = PropagateMetadata(
            propagate_families=['omind', 'protocol'])

        update_flow_metadata = UpdateFlowMetadata(flow_name=self.REGISTRY_NAME)
        report = Report()
        notify = SlackTask(preamble='Cardiac feature extraction finished.\n'
                           'Task report:')
        with self:
            create_noresult = create_flow_metadata.map(parent=raw_signals)
            # Signal processing branch
            clean_signals = clean.map(signals=raw_signals,
                                      upstream_tasks=[create_noresult])
            preprocessed_signals = detect_peaks.map(signals=clean_signals)
            # Feature extraction
            features = extract_features.map(nn=preprocessed_signals,
                                            nni=preprocessed_signals,
                                            events=events,
                                            parent=raw_signals)

            features_with_metadata = propagate_metadata.map(parent=raw_signals,
                                                            child=features)
            update_noresult = update_flow_metadata.map(
                parent=raw_signals, child=features_with_metadata)
            # Send slack notification
            message = report(files=features_with_metadata,
                             upstream_tasks=[update_noresult])
            notify(message=message)

        logger.debug('Built flow %s with tasks %s', self, self.tasks)
예제 #9
0
    def _build(self, **kwargs):
        # Force required families: Quetzal workspace must have the following
        # families: (nb: None means "latest" version)
        required_families = dict(
            iguazu=None,
            omind=None,
            standard=None,
            protocol=None,
        )
        families = kwargs.get('families', {}) or {
        }  # Could be None by default args
        for name in required_families:
            families.setdefault(name, required_families[name])
        kwargs['families'] = families

        # When the query is set by kwargs, leave the query and dialect as they
        # come. Otherwise, set to the default defined just above
        if not kwargs.get('query', None):
            kwargs['query'] = self.DEFAULT_QUERY
            kwargs['dialect'] = 'postgresql_json'

        # The galvanic features flow requires an upstream dataset flow in order
        # to provide the input files. Create one and deduce the tasks to
        # plug the galvanic flow to the output of the dataset flow
        dataset_flow = GenericDatasetFlow(**kwargs)
        raw_signals = dataset_flow.terminal_tasks().pop()
        events = raw_signals
        self.update(dataset_flow)

        create_flow_metadata = CreateFlowMetadata(flow_name=self.REGISTRY_NAME)

        # Instantiate tasks
        clean = CleanGSRSignal(signals_hdf5_key='/iguazu/signal/gsr/standard',
                               events_hdf5_key='/iguazu/events/standard',
                               output_hdf5_key='/iguazu/signal/gsr/clean',
                               graceful_exceptions=(GSRArtifactCorruption,
                                                    SoftPreconditionFailed))
        downsample = Downsample(
            signals_hdf5_key='/iguazu/signal/gsr/clean',
            output_hdf5_key='/iguazu/signal/gsr/downsampled',
        )
        cvx = ApplyCVX(
            signals_hdf5_key='/iguazu/signal/gsr/downsampled',
            output_hdf5_key='/iguazu/signal/gsr/cvx',
        )
        scrpeaks = DetectSCRPeaks(
            signals_hdf5_key='/iguazu/signal/gsr/cvx',
            output_hdf5_key='/iguazu/signal/gsr/scrpeaks',
        )

        extract_features = ExtractGSRFeatures(
            cvx_hdf5_key='/iguazu/signal/gsr/cvx',
            scrpeaks_hdf5_key='/iguazu/signal/gsr/scrpeaks',
            events_hdf5_key='/iguazu/events/standard',
            output_hdf5_key='/iguazu/features/gsr/sequence',
        )
        propagate_metadata = PropagateMetadata(
            propagate_families=['omind', 'protocol'])

        update_flow_metadata = UpdateFlowMetadata(flow_name=self.REGISTRY_NAME)
        report = Report()
        notify = SlackTask(preamble='Galvanic feature extraction finished.\n'
                           'Task report:')
        with self:
            create_noresult = create_flow_metadata.map(parent=raw_signals)
            # Signal processing branch
            clean_signals = clean.map(signals=raw_signals,
                                      annotations=raw_signals,
                                      events=events,
                                      upstream_tasks=[create_noresult])
            downsample_signals = downsample.map(
                signals=clean_signals,
                annotations=clean_signals,
                upstream_tasks=[create_noresult])
            cvx_signals = cvx.map(signals=downsample_signals,
                                  annotations=downsample_signals,
                                  upstream_tasks=[create_noresult])
            scr_peaks = scrpeaks.map(signals=cvx_signals,
                                     annotations=cvx_signals,
                                     upstream_tasks=[create_noresult])

            # Feature extraction
            features = extract_features.map(cvx=cvx_signals,
                                            scrpeaks=scr_peaks,
                                            events=events,
                                            parent=raw_signals)
            features_with_metadata = propagate_metadata.map(parent=raw_signals,
                                                            child=features)
            update_noresult = update_flow_metadata.map(
                parent=raw_signals, child=features_with_metadata)
            # Send slack notification
            message = report(files=features_with_metadata,
                             upstream_tasks=[update_noresult])
            notify(message=message)

        logger.debug('Built flow %s with tasks %s', self, self.tasks)