コード例 #1
0
    def handle_import_dataset(self):
        msgBox = tk.messagebox.askyesnocancel(
            "Import Dataset", "Does your dataset contain multiple signals?")
        if msgBox is None:
            return
        else:
            path, dataset, datasetex = self.select_raw_data(msgBox)

            # get list of files to import
            files_to_import = []
            if not msgBox:
                files_to_import.append(path)
            else:
                folder_contents = os.listdir(path)
                csv_files = [
                    os.path.join(path, item) for item in folder_contents
                    if item.endswith(".csv")
                ]
                files_to_import += csv_files

            # import and load dataset
            if path is not None:
                if not datasetex:
                    labelfilenames = [
                        _folder.file_abspath('common', labelfile)
                        for labelfile in [
                            'labels_SWaN.csv', 'labels_MUSS.csv',
                            'labels_ambsed.csv', 'labels_goodbad.csv',
                            'labels_unknown.csv'
                        ]
                    ]
                    import import_dataset
                    import_dataset.main(files_to_import,
                                        name=dataset,
                                        labelfilenames=labelfilenames)
                else:
                    print('dataset %s already imported' % dataset)
コード例 #2
0
def main(filenames,
         *,
         name=None,
         labelfilenames=None,
         zoom=None,
         mag=DEFAULT_MAGNITUDE,
         sample=None,
         day=None):
    if len(filenames) > 1 and not name:
        _helper.errorExit(
            'Must specify a custom dataset --name when importing multiple files'
        )

    if mag <= 0:
        _helper.errorExit('magnitude must be positive')

    if sample is not None and day is not None:
        _helper.errorExit('Can only provide one of --sample and --day')

    start_sample, end_sample = None, None
    if sample is not None:
        start_sample, end_sample = parseRange('sample', sample)

    start_day, end_day = None, None
    if day is not None:
        start_day, end_day = parseRange('day', day)

    # load labels
    if not labelfilenames:
        labelfilenames = [
            _folder.file_abspath('common', 'labels_test.csv'),
            _folder.file_abspath('common', 'labels_unknown.csv')
        ]

    labels = []
    labels_names = set()

    for labelfile in labelfilenames:
        print('Reading labels from %s...' % labelfile)

        with open(labelfile, 'rt') as csvfile:
            reader = csv.DictReader(csvfile)

            if set(reader.fieldnames) != set(['label', 'red', 'green', 'blue'
                                              ]):
                _helper.errorExit('Incorrect label csv headers')

            for row in reader:
                label_name = row['label'].strip()
                rr = float(row['red'].strip())
                gg = float(row['green'].strip())
                bb = float(row['blue'].strip())

                if re.search('[^\w\- ]', label_name, re.ASCII):
                    _helper.errorExit(
                        'Only alphanumeric, underscore, dash, and space allowed in label names: '
                        + label_name)
                if label_name in labels_names:
                    _helper.errorExit('Duplicate label: ' + label_name)

                labels.append((label_name, rr, gg, bb))
                labels_names.add(label_name)

    # process arguments
    signal_names = []
    for filename in filenames:
        signal_names.append(_helper.makeIdFromFilename(filename))
    if len(signal_names) != len(set(signal_names)):
        _helper.errorExit('Duplicate signal names')

    if name:
        if not _helper.checkId(name, False):
            _helper.errorExit(
                'Only alphanumeric and underscore allowed in dataset names')
        dataset = name
    else:
        dataset = signal_names[0]

    if start_sample is not None or end_sample is not None:
        dataset = dataset + strRange('sample', start_sample, end_sample)
    if start_day is not None or end_day is not None:
        dataset = dataset + strRange('day', start_day, end_day)

    out_folder = _helper.datasetDir(dataset)
    tile_folder = _helper.datasetTileDir(dataset)

    if os.path.exists(out_folder):
        _helper.errorExit('Please remove output folder ' + out_folder)

    print('Using output folder ' + out_folder)

    _helper.ensureDirExists(out_folder, False)
    _helper.ensureDirExists(tile_folder, False)

    # read in data
    print('reading header...')

    # open files
    csvfiles = []
    for filename in filenames:
        if filename.endswith('.gz'):
            use_open = gzip.open
        else:
            use_open = open

        csvfiles.append(use_open(filename, 'rt'))

    # read headers
    files_start_ms = []
    dataset_rate = None

    for filename, csvfile in zip(filenames, csvfiles):
        header_rate, header_start_ms = _helper.process_actigraph_header(
            csvfile)

        if dataset_rate == None:
            dataset_rate = int(header_rate)
        elif dataset_rate != int(header_rate):
            _helper.errorExit('Multiple sample rates found')

        files_start_ms.append(header_start_ms)

    # determine sample range
    dataset_start_ms = min(files_start_ms)
    dataset_start_date = datetime.datetime.utcfromtimestamp(dataset_start_ms /
                                                            1000).date()

    if start_sample is not None or end_sample is not None:
        pass

    if start_day is not None or end_day is not None:
        if start_day is not None:
            output_min_ms = 1000 * calendar.timegm(
                (dataset_start_date +
                 datetime.timedelta(days=(start_day - 1))).timetuple())
            start_sample = (max(output_min_ms, dataset_start_ms) -
                            dataset_start_ms) * dataset_rate / 1000
            if start_sample != int(start_sample):
                _helper.errorExit('day start sample error')
            start_sample = int(start_sample)
        else:
            start_sample = None

        if end_day is not None:
            output_max_ms = 1000 * calendar.timegm(
                (dataset_start_date +
                 datetime.timedelta(days=(end_day))).timetuple())
            end_sample = (output_max_ms -
                          dataset_start_ms) * dataset_rate / 1000
            if end_sample != int(end_sample):
                _helper.errorExit('day end sample error')
            end_sample = int(end_sample)
        else:
            end_sample = None

    # determine starting day index
    start_day_index = 1
    if start_sample:
        start_day_index = 1 + (datetime.datetime.utcfromtimestamp(
            dataset_start_ms / 1000 + start_sample / dataset_rate).date() -
                               dataset_start_date).days

    # print header summary
    if len(filenames) > 1:
        for filename, signalname, file_start_ms in zip(filenames, signal_names,
                                                       files_start_ms):
            print('file start:   ',
                  _helper.timeMillisecondToTimeString(file_start_ms),
                  signalname, filename)
    print('input start:  ',
          _helper.timeMillisecondToTimeString(dataset_start_ms), dataset)

    # read data
    sample_len = 3 * len(filenames)
    sample_data = []

    min_smp = 1e100
    max_smp = -1e100

    for fileindex, (filename, file_start_ms, csvfile) in enumerate(
            zip(filenames, files_start_ms, csvfiles)):
        print('reading ' + filename + '...')

        # Checks if csv header is absent and adds the header if needed
        csvstartpos = csvfile.tell()
        firstrow = next(csvfile)
        csvfile.seek(csvstartpos)

        fieldnames = None
        if 'Accelerometer' not in firstrow:
            # No headers present
            DEFAULT_FIELDNAMES = [
                'Timestamp', 'Accelerometer X', 'Accelerometer Y',
                'Accelerometer Z'
            ]
            no_of_fields = len(firstrow.split(','))
            if no_of_fields == 4:
                fieldnames = DEFAULT_FIELDNAMES
            elif no_of_fields == 3:
                fieldnames = DEFAULT_FIELDNAMES[1:]
            else:
                _helper.errorExit(
                    'missing header has unrecognized number of fields')

        if fieldnames != None:
            _helper.warning('input file missing field names, using ' +
                            ','.join(fieldnames))

        reader = csv.DictReader(csvfile, fieldnames=fieldnames)

        if 'Timestamp' in reader.fieldnames:
            _helper.warning(
                'input file has Timestamp field, but it will be ignored')

        # process rows
        reader_sample_index = 0

        sample_offset = (file_start_ms -
                         dataset_start_ms) * dataset_rate / 1000
        if sample_offset != int(sample_offset):
            _helper.errorExit('sample offset error')
        sample_offset = int(sample_offset)

        if start_sample != None:
            sample_offset -= start_sample

        for row in reader:
            data_sample_index = reader_sample_index + sample_offset
            reader_sample_index += 1

            if data_sample_index < 0:
                continue
            if end_sample != None and data_sample_index >= end_sample - (
                    start_sample if start_sample != None else 0):
                break

            x = float(row['Accelerometer X'])
            y = float(row['Accelerometer Y'])
            z = float(row['Accelerometer Z'])

            min_smp = min(min_smp, x, y, z)
            max_smp = max(max_smp, x, y, z)

            while data_sample_index >= len(sample_data):
                sample_data.append([None] * sample_len)

            sample_data[data_sample_index][3 * fileindex + 0] = x
            sample_data[data_sample_index][3 * fileindex + 1] = y
            sample_data[data_sample_index][3 * fileindex + 2] = z

            if reader_sample_index % (60 * 60 * dataset_rate) == 0:
                print('read %d hours...' % (reader_sample_index /
                                            (60 * 60 * dataset_rate)))

    if min_smp < -mag or mag < max_smp:
        _helper.warning('sample exceeds magnitude')
    output_start_ms = dataset_start_ms
    if start_sample != None:
        output_start_ms_offset = start_sample * 1000 / dataset_rate
        if output_start_ms_offset != int(output_start_ms_offset):
            _helper.errorExit('output start offset sample error')
        output_start_ms += int(output_start_ms_offset)
    output_end_ms = output_start_ms + (len(sample_data) -
                                       1) * 1000 / dataset_rate

    # figure out max zoom level, if needed
    if zoom is None:
        for zz in range(10):
            zoom = zz
            if len(sample_data) / math.pow(SUBSAMPLE, zz + 1) <= 2 * TILE_SIZE:
                break

    # print summary
    print('length:       ', len(sample_data))
    print('rate:         ', dataset_rate)
    print('max zoom:     ', zoom)
    print('output start: ',
          _helper.timeMillisecondToTimeString(output_start_ms))
    print('output end:   ', _helper.timeMillisecondToTimeString(output_end_ms))

    # write tiles
    for zoom_level in range(zoom + 1):
        print('writing zoom %d...' % zoom_level)

        zoom_subsample = SUBSAMPLE**zoom_level
        zoom_tile_size = TILE_SIZE * zoom_subsample

        ntiles = int(len(sample_data) / zoom_tile_size)
        if len(sample_data) % zoom_tile_size != 0:
            ntiles += 1

        for tt in range(ntiles):
            tile_id = 'z%02dt%06d' % (zoom_level, tt)

            outfilename = os.path.join(tile_folder, tile_id + '.json')

            with open(outfilename, 'wt') as outfile:
                write_startfile(outfile, zoom_subsample,
                                dataset + ':' + tile_id)

                prev = False
                for ss in range(tt * TILE_SIZE, (tt + 1) * TILE_SIZE + 1):
                    rangesmp = sample_data[ss * zoom_subsample:(ss + 1) *
                                           zoom_subsample]
                    write_sample(outfile, rangesample(rangesmp, sample_len),
                                 prev, sample_len)
                    prev = True

                write_endfile(outfile)

            if (tt + 1) % 1000 == 0:
                print('wrote %d tiles...' % (tt + 1))

    print('writing origin...')

    outfilename = _helper.datasetOriginFilename(dataset)

    with open(outfilename, 'wt') as outfile:
        outfile.write("{\n")
        outfile.write('    "origin": %s\n' % json.dumps(filenames))
        outfile.write('}\n')

    print('writing config...')

    outfilename = _helper.datasetConfigFilename(dataset)

    with open(outfilename, 'wt') as outfile:
        outfile.write('{\n')
        outfile.write('    "title": "%s",\n' % dataset)
        outfile.write('    "tile_size": %d,\n' % TILE_SIZE)
        outfile.write('    "tile_subsample": %d,\n' % SUBSAMPLE)
        outfile.write('    "zoom_max": %d,\n' % zoom)
        outfile.write('    "length": %d,\n' % len(sample_data))
        outfile.write('    "start_time_ms": %s,\n' % output_start_ms)
        outfile.write('    "sample_rate": %d,\n' % dataset_rate)
        outfile.write('    "start_day_idx": %d,\n' % start_day_index)
        outfile.write('    "magnitude": %d,\n' % mag)
        outfile.write('    "signals": ["%s"],\n' % ('", "'.join(signal_names)))
        outfile.write('    "labels": [\n')
        for ii, (ll, rr, gg, bb) in enumerate(labels):
            outfile.write(
                '        { "label": "%s", "color": [ %0.2f, %0.2f, %0.2f ] }%s\n'
                % (ll, rr, gg, bb, ',' if ii + 1 < len(labels) else ''))
        outfile.write('    ]\n')
        outfile.write('}\n')

    print('dataset written to ' + out_folder)
コード例 #3
0
    def _process_request(self, path, vars):
        global _debug_delay
        if _debug_delay:
            time.sleep(_debug_delay)

        if path == '/signaligner.html':
            if 'dataset' in vars and ALNUMUN_RE.match(vars['dataset']):
                dataset = vars['dataset']
            else:
                dataset = 'null'

            if 'session' in vars and ALNUMUN_RE.match(vars['session']):
                session = vars['session']
            else:
                session = SESSION_ERROR

            def replace_data(data):
                data = replace_vars(data, session, False)
                return data

            self._send_header_and_file_data(
                _folder.file_abspath('signaligner/signaligner.html'), False,
                CTYPE_HTML, replace_data)

        elif path == '/signaligner.js':

            def replace_data(data):
                data = replace_mode_config(data)
                return data

            self._send_header_and_file_data(
                _folder.file_abspath('signaligner/signaligner.js'), False,
                CTYPE_JS, replace_mode_config)

        elif path == '/fetchdatasetlist':
            datasets = _helper.getDatasetList()
            self._send_header(200, CTYPE_PLAIN)
            self._send_data(json.dumps(datasets), False)

        elif path == '/fetchdataset':
            if 'dataset' in vars and ALNUMUN_RE.match(vars['dataset']):
                dataset_name = vars['dataset']

                if 'type' in vars and vars['type'] == 'config':
                    file_path = _helper.datasetConfigFilename(dataset_name)
                elif 'type' in vars and vars[
                        'type'] == 'tile' and 'id' in vars and ALNUMUN_RE.match(
                            vars['id']):
                    file_path = os.path.join(
                        _helper.datasetTileDir(dataset_name),
                        vars['id'] + '.json')
                else:
                    self._send_header(404, CTYPE_PLAIN)
                    return

                if not os.path.exists(file_path):
                    self._send_header(404, CTYPE_PLAIN)
                    return

                self._send_header_and_file_data(file_path, False, CTYPE_PLAIN)
            else:
                self._send_header(404, CTYPE_PLAIN)

        elif path == '/fetchlabels':
            if 'dataset' in vars and ALNUMUN_RE.match(vars['dataset']):
                dataset = vars['dataset']

                self._send_header(200, CTYPE_PLAIN)
                labels = _helper.getLabelsLatest(dataset)
                if labels:
                    self._send_data(json.dumps(labels), False)
            else:
                self._send_header(404, CTYPE_PLAIN)

        elif path == '/reportlabels':
            if 'data' in vars:
                data = json.loads(vars['data'])

                if 'dataset' in data and ALNUMUN_RE.match(
                        data['dataset']
                ) and 'session' in data and ALNUMUN_RE.match(data['session']):
                    dataset = data['dataset']
                    session = data['session']

                    with open(
                            _helper.ensureDirExists(
                                _helper.logLabelsFilename(dataset, session),
                                True), 'at') as dfile:
                        dfile.write(json.dumps(data) + '\n')

                    with open(
                            _helper.ensureDirExists(
                                _helper.latestLabelsFilename(dataset, session),
                                True), 'wt') as dfile:
                        dfile.write(json.dumps(data) + '\n')

                    with open(
                            _helper.ensureDirExists(
                                _helper.latestLabelsFilename(dataset, session),
                                True), 'rt') as dfile:
                        response = json.loads(dfile.read())

                    self._send_header(200, CTYPE_PLAIN)
                    self._send_data(json.dumps(response), False)

                else:
                    self._send_header(404, CTYPE_PLAIN)

            else:
                self._send_header(404, CTYPE_PLAIN)

        elif path == '/mturksubmit' or path == '/mturksubmissions':
            if 'data' in vars:
                data = json.loads(vars['data'])

                if 'dataset' in data and ALNUMUN_RE.match(
                        data['dataset']
                ) and 'session' in data and ALNUMUN_RE.match(data['session']):
                    dataset = data['dataset']
                    session = data['session']

                    if path == '/mturksubmit':
                        mturk_submit = _helper.mturkSubmitLabelsFilename(
                            dataset, session)
                        if not os.path.exists(mturk_submit):
                            with open(
                                    _helper.ensureDirExists(
                                        mturk_submit, True), 'wt') as dfile:
                                dfile.write(json.dumps(data) + '\n')

                    submissions = _helper.mturkGetSubmissions(session)

                    total = 0
                    datasets = []
                    for submission in submissions:
                        score = submission['score'] / 100.0
                        score = score**2
                        score *= submission['daysofdata']
                        # minimum of 1 cent for tutorial levels, 20 cents for challenge
                        score = max(score, 0.20)
                        if submission['istutorial']:
                            score *= 0.05
                        total += score
                        datasets.append(submission['dataset'])

                    total = int(total * 100)
                    if session not in _mturk_session_codes:
                        _mturk_session_codes[session] = _helper.makeId()[:3]

                    code = _mturk_session_codes[session]
                    code = code + ('%03d' % total).upper()
                    code = code + hashlib.md5(
                        code.encode('utf-8')).hexdigest()[:3].upper()

                    response = {
                        'amount': '$%d.%02d' % (total // 100, total % 100),
                        'code': code,
                        'datasets': datasets
                    }

                    self._send_header(200, CTYPE_PLAIN)
                    self._send_data(json.dumps(response), False)

                else:
                    self._send_header(404, CTYPE_PLAIN)

            else:
                self._send_header(404, CTYPE_PLAIN)

        elif path == '/log':
            if 'data' in vars:
                with open(
                        _helper.ensureDirExists(
                            _folder.data_abspath('playlog'), True),
                        'at') as dfile:
                    dfile.write(vars['data'] + '\n')

            self._send_header(200, CTYPE_PLAIN)

        elif HTML_RE.match(path):
            if path == '/mturk_start.html':
                global _mode
                if _mode != 'MTURK':
                    self._send_header(200, CTYPE_PLAIN)
                    self._send_data(
                        'mode must be MTURK to request mturk_start.html',
                        False)
                    return

            if 'session' in vars and ALNUMUN_RE.match(vars['session']):
                session = vars['session']
            else:
                session = SESSION_ERROR

            def replace_data(data):
                return replace_vars(data, session, True)

            self._send_header_and_file_data(
                _folder.file_abspath('static' + path), False, CTYPE_HTML,
                replace_data)

        elif PNG_RE.match(path):
            self._send_header_and_file_data(
                _folder.file_abspath('static' + path), True, CTYPE_PNG)

        elif JS_RE.match(path):
            self._send_header_and_file_data(
                _folder.file_abspath('static' + path), False, CTYPE_JS)

        elif CSS_RE.match(path):
            self._send_header_and_file_data(
                _folder.file_abspath('static' + path), False, CTYPE_CSS)

        else:
            self._send_header(404, CTYPE_PLAIN)
コード例 #4
0
def datasetDir(dataset):
    common_dir = _folder.file_abspath('common', 'datasets', dataset)
    if os.path.exists(common_dir):
        return common_dir
    return _folder.data_abspath(_get_dataset_folder(), dataset)
コード例 #5
0
    def handle_run_algo(self):
        if self.datasetSelected is None:
            tk.messagebox.showerror("Alert", "Please import a dataset first.")
            return

        import import_mhealth
        import main
        import import_labels

        algorithm = self.algorithmSelected.get()
        swan = algorithm == 'SWaN'
        muss = algorithm == 'MUSS'
        qc = algorithm == 'QC'

        dataset = self.datasetSelected.get()
        dataset_raw_csv_paths = get_dataset_raw_file_paths(dataset)

        csv_selected = self.algorithmCSVSelected.get()
        run_algo_csv_list = []
        missing_files = []

        for filepath in dataset_raw_csv_paths:
            if csv_selected == "ALL" or _helper.makeIdFromFilename(
                    filepath) == csv_selected:
                run_algo_csv_list.append(filepath)

        for filepath in run_algo_csv_list:
            if not os.path.exists(filepath):
                missing_files.append(filepath)
            else:
                signal_name = _helper.makeIdFromFilename(filepath)
                mhealth_folder = mhealthfolder(dataset, signal_name)
                algo_folder = algofolder(dataset, signal_name)
                if not os.path.exists(mhealth_folder):
                    import_mhealth.main(filepath, mhealth_folder)

                old_cwd = os.path.abspath(os.path.realpath(os.getcwd()))
                os.chdir(_folder.file_abspath('mdcas-python'))
                main.main(mhealth_folder + '/default/',
                          algo_folder + '/default/',
                          80,
                          profiling=False,
                          swan=swan,
                          muss=muss,
                          qc=qc)
                os.chdir(old_cwd)

                if swan:
                    print("Running SWaN algorithm...")
                    import_labels.main(dataset,
                                       algo_folder +
                                       '/default/SWaN_output.csv',
                                       source='Algo',
                                       session='SWaN_' + signal_name)
                elif muss:
                    print("Running MUSS algorithm...")
                    import_labels.main(dataset,
                                       algo_folder +
                                       '/default/muss_output.csv',
                                       source='Algo',
                                       session='MUSS_' + signal_name)
                elif qc:
                    print("Running QC algorithm...")
                    import_labels.main(dataset,
                                       algo_folder + '/default/qc_output.csv',
                                       source='Algo',
                                       session='QC_' + signal_name,
                                       qcfix=True)

        if len(run_algo_csv_list) > 0:
            tk.messagebox.showinfo(
                "Run Algorithm",
                "Algorithm labels successfully added for the following files: "
                + ", ".join([
                    _helper.makeIdFromFilename(file)
                    for file in run_algo_csv_list
                ]))

        if len(missing_files) > 0:
            tk.messagebox.showerror(
                "Run Algorithm",
                "The algorithm was not run on the following missing files. Please"
                "move the files back to their locations from when the dataset was"
                "imported: " + ", ".join(missing_files))