def main(dataset):
    # Read start time from config
    with open(_helper.datasetConfigFilename(dataset), 'rt') as json_file:
        data = json.load(json_file)
        start_time_ms = data['start_time_ms']
        sample_rate = data['sample_rate']

    # Read the last stored label of each unique player or session
    session_labels = _helper.getLabelsLatest(dataset)

    # Write to csv
    csvOutputPath = _helper.exportFilename(dataset)
    _helper.ensureDirExists(csvOutputPath, True)
    with open(csvOutputPath, 'wt') as csv_file:
        writer = csv.writer(csv_file)
        writer.writerow(
            ['START_TIME', 'STOP_TIME', 'PREDICTION', 'SOURCE', 'SESSION'])

        for session_data in session_labels:
            session = session_data['session']
            source = session_data['source']
            for label in session_data['labels']:
                start_time_in_ms = start_time_ms + label[
                    'lo'] * 1000.0 / sample_rate
                start_time = _helper.timeMillisecondToTimeString(
                    start_time_in_ms)
                stop_time_in_ms = start_time_ms + label[
                    'hi'] * 1000.0 / sample_rate
                stop_time = _helper.timeMillisecondToTimeString(
                    stop_time_in_ms)
                prediction = label['label']
                writer.writerow(
                    [start_time, stop_time, prediction, source, session])

    print('output written to', csvOutputPath)
Esempio n. 2
0
def main(filename, outfolder):
    if filename.endswith('.gz'):
        use_open = gzip.open
    else:
        use_open = open

    with use_open(filename, 'rt') as csvfile:
        header_rate, header_start_ms = _helper.process_actigraph_header(
            csvfile)

        header_start_sec = header_start_ms / 1000
        if header_start_sec != int(header_start_sec):
            _helper.errorExit('start time can only have second precision')
        header_start_sec = int(header_start_sec)

        csv_header = csvfile.readline().strip()

        if csv_header != 'Accelerometer X,Accelerometer Y,Accelerometer Z':
            _helper.errorExit(
                'unrecognized CSV header: only "Accelerometer X,Accelerometer Y,Accelerometer Z" supported'
            )

        tm = datetime.datetime.utcfromtimestamp(header_start_sec)
        tm_sample = 0

        outfile = None

        for row in csvfile:
            tm_msec = int(1000 * tm_sample / header_rate + 0.5)

            if outfile == None:
                outfilecsvname = 'NONE-NONE-NA.NONE-NONE.%04d-%02d-%02d-%02d-%02d-%02d-%03d-P0000.sensor.csv' % (
                    tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second,
                    tm_msec)
                outfilename = os.path.join(outfolder, 'default',
                                           'MasterSynced', '%04d' % tm.year,
                                           '%02d' % tm.month, '%02d' % tm.day,
                                           '%02d' % tm.hour, outfilecsvname)
                print('Create new hourly file: %s' % outfilecsvname)

                outfile = open(_helper.ensureDirExists(outfilename, True),
                               'wt')
                outfile.write(
                    'HEADER_TIME_STAMP,X_ACCELERATION_METERS_PER_SECOND_SQUARED,Y_ACCELERATION_METERS_PER_SECOND_SQUARED,Z_ACCELERATION_METERS_PER_SECOND_SQUARED\n'
                )

            tm_str = '%04d-%02d-%02d %02d:%02d:%02d.%03d' % (
                tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second,
                tm_msec)
            outfile.write(tm_str + ',' +
                          (','.join([(e if ('.' in e) else (e + '.0'))
                                     for e in row.strip().split(',')])) + '\n')

            tm_sample += 1
            if tm_sample == header_rate:
                prev_tm = tm
                tm = tm + datetime.timedelta(seconds=1)
                tm_sample = 0

                if prev_tm.year != tm.year or prev_tm.month != tm.month or prev_tm.day != tm.day or prev_tm.hour != tm.hour:
                    outfile.close()
                    outfile = None

        if outfile != None:
            outfile.close()
def main(filenames,
         *,
         name=None,
         labelfilenames=None,
         zoom=None,
         mag=DEFAULT_MAGNITUDE,
         sample=None,
         day=None):
    if len(filenames) > 1 and not name:
        _helper.errorExit(
            'Must specify a custom dataset --name when importing multiple files'
        )

    if mag <= 0:
        _helper.errorExit('magnitude must be positive')

    if sample is not None and day is not None:
        _helper.errorExit('Can only provide one of --sample and --day')

    start_sample, end_sample = None, None
    if sample is not None:
        start_sample, end_sample = parseRange('sample', sample)

    start_day, end_day = None, None
    if day is not None:
        start_day, end_day = parseRange('day', day)

    # load labels
    if not labelfilenames:
        labelfilenames = [
            _folder.file_abspath('common', 'labels_test.csv'),
            _folder.file_abspath('common', 'labels_unknown.csv')
        ]

    labels = []
    labels_names = set()

    for labelfile in labelfilenames:
        print('Reading labels from %s...' % labelfile)

        with open(labelfile, 'rt') as csvfile:
            reader = csv.DictReader(csvfile)

            if set(reader.fieldnames) != set(['label', 'red', 'green', 'blue'
                                              ]):
                _helper.errorExit('Incorrect label csv headers')

            for row in reader:
                label_name = row['label'].strip()
                rr = float(row['red'].strip())
                gg = float(row['green'].strip())
                bb = float(row['blue'].strip())

                if re.search('[^\w\- ]', label_name, re.ASCII):
                    _helper.errorExit(
                        'Only alphanumeric, underscore, dash, and space allowed in label names: '
                        + label_name)
                if label_name in labels_names:
                    _helper.errorExit('Duplicate label: ' + label_name)

                labels.append((label_name, rr, gg, bb))
                labels_names.add(label_name)

    # process arguments
    signal_names = []
    for filename in filenames:
        signal_names.append(_helper.makeIdFromFilename(filename))
    if len(signal_names) != len(set(signal_names)):
        _helper.errorExit('Duplicate signal names')

    if name:
        if not _helper.checkId(name, False):
            _helper.errorExit(
                'Only alphanumeric and underscore allowed in dataset names')
        dataset = name
    else:
        dataset = signal_names[0]

    if start_sample is not None or end_sample is not None:
        dataset = dataset + strRange('sample', start_sample, end_sample)
    if start_day is not None or end_day is not None:
        dataset = dataset + strRange('day', start_day, end_day)

    out_folder = _helper.datasetDir(dataset)
    tile_folder = _helper.datasetTileDir(dataset)

    if os.path.exists(out_folder):
        _helper.errorExit('Please remove output folder ' + out_folder)

    print('Using output folder ' + out_folder)

    _helper.ensureDirExists(out_folder, False)
    _helper.ensureDirExists(tile_folder, False)

    # read in data
    print('reading header...')

    # open files
    csvfiles = []
    for filename in filenames:
        if filename.endswith('.gz'):
            use_open = gzip.open
        else:
            use_open = open

        csvfiles.append(use_open(filename, 'rt'))

    # read headers
    files_start_ms = []
    dataset_rate = None

    for filename, csvfile in zip(filenames, csvfiles):
        header_rate, header_start_ms = _helper.process_actigraph_header(
            csvfile)

        if dataset_rate == None:
            dataset_rate = int(header_rate)
        elif dataset_rate != int(header_rate):
            _helper.errorExit('Multiple sample rates found')

        files_start_ms.append(header_start_ms)

    # determine sample range
    dataset_start_ms = min(files_start_ms)
    dataset_start_date = datetime.datetime.utcfromtimestamp(dataset_start_ms /
                                                            1000).date()

    if start_sample is not None or end_sample is not None:
        pass

    if start_day is not None or end_day is not None:
        if start_day is not None:
            output_min_ms = 1000 * calendar.timegm(
                (dataset_start_date +
                 datetime.timedelta(days=(start_day - 1))).timetuple())
            start_sample = (max(output_min_ms, dataset_start_ms) -
                            dataset_start_ms) * dataset_rate / 1000
            if start_sample != int(start_sample):
                _helper.errorExit('day start sample error')
            start_sample = int(start_sample)
        else:
            start_sample = None

        if end_day is not None:
            output_max_ms = 1000 * calendar.timegm(
                (dataset_start_date +
                 datetime.timedelta(days=(end_day))).timetuple())
            end_sample = (output_max_ms -
                          dataset_start_ms) * dataset_rate / 1000
            if end_sample != int(end_sample):
                _helper.errorExit('day end sample error')
            end_sample = int(end_sample)
        else:
            end_sample = None

    # determine starting day index
    start_day_index = 1
    if start_sample:
        start_day_index = 1 + (datetime.datetime.utcfromtimestamp(
            dataset_start_ms / 1000 + start_sample / dataset_rate).date() -
                               dataset_start_date).days

    # print header summary
    if len(filenames) > 1:
        for filename, signalname, file_start_ms in zip(filenames, signal_names,
                                                       files_start_ms):
            print('file start:   ',
                  _helper.timeMillisecondToTimeString(file_start_ms),
                  signalname, filename)
    print('input start:  ',
          _helper.timeMillisecondToTimeString(dataset_start_ms), dataset)

    # read data
    sample_len = 3 * len(filenames)
    sample_data = []

    min_smp = 1e100
    max_smp = -1e100

    for fileindex, (filename, file_start_ms, csvfile) in enumerate(
            zip(filenames, files_start_ms, csvfiles)):
        print('reading ' + filename + '...')

        # Checks if csv header is absent and adds the header if needed
        csvstartpos = csvfile.tell()
        firstrow = next(csvfile)
        csvfile.seek(csvstartpos)

        fieldnames = None
        if 'Accelerometer' not in firstrow:
            # No headers present
            DEFAULT_FIELDNAMES = [
                'Timestamp', 'Accelerometer X', 'Accelerometer Y',
                'Accelerometer Z'
            ]
            no_of_fields = len(firstrow.split(','))
            if no_of_fields == 4:
                fieldnames = DEFAULT_FIELDNAMES
            elif no_of_fields == 3:
                fieldnames = DEFAULT_FIELDNAMES[1:]
            else:
                _helper.errorExit(
                    'missing header has unrecognized number of fields')

        if fieldnames != None:
            _helper.warning('input file missing field names, using ' +
                            ','.join(fieldnames))

        reader = csv.DictReader(csvfile, fieldnames=fieldnames)

        if 'Timestamp' in reader.fieldnames:
            _helper.warning(
                'input file has Timestamp field, but it will be ignored')

        # process rows
        reader_sample_index = 0

        sample_offset = (file_start_ms -
                         dataset_start_ms) * dataset_rate / 1000
        if sample_offset != int(sample_offset):
            _helper.errorExit('sample offset error')
        sample_offset = int(sample_offset)

        if start_sample != None:
            sample_offset -= start_sample

        for row in reader:
            data_sample_index = reader_sample_index + sample_offset
            reader_sample_index += 1

            if data_sample_index < 0:
                continue
            if end_sample != None and data_sample_index >= end_sample - (
                    start_sample if start_sample != None else 0):
                break

            x = float(row['Accelerometer X'])
            y = float(row['Accelerometer Y'])
            z = float(row['Accelerometer Z'])

            min_smp = min(min_smp, x, y, z)
            max_smp = max(max_smp, x, y, z)

            while data_sample_index >= len(sample_data):
                sample_data.append([None] * sample_len)

            sample_data[data_sample_index][3 * fileindex + 0] = x
            sample_data[data_sample_index][3 * fileindex + 1] = y
            sample_data[data_sample_index][3 * fileindex + 2] = z

            if reader_sample_index % (60 * 60 * dataset_rate) == 0:
                print('read %d hours...' % (reader_sample_index /
                                            (60 * 60 * dataset_rate)))

    if min_smp < -mag or mag < max_smp:
        _helper.warning('sample exceeds magnitude')
    output_start_ms = dataset_start_ms
    if start_sample != None:
        output_start_ms_offset = start_sample * 1000 / dataset_rate
        if output_start_ms_offset != int(output_start_ms_offset):
            _helper.errorExit('output start offset sample error')
        output_start_ms += int(output_start_ms_offset)
    output_end_ms = output_start_ms + (len(sample_data) -
                                       1) * 1000 / dataset_rate

    # figure out max zoom level, if needed
    if zoom is None:
        for zz in range(10):
            zoom = zz
            if len(sample_data) / math.pow(SUBSAMPLE, zz + 1) <= 2 * TILE_SIZE:
                break

    # print summary
    print('length:       ', len(sample_data))
    print('rate:         ', dataset_rate)
    print('max zoom:     ', zoom)
    print('output start: ',
          _helper.timeMillisecondToTimeString(output_start_ms))
    print('output end:   ', _helper.timeMillisecondToTimeString(output_end_ms))

    # write tiles
    for zoom_level in range(zoom + 1):
        print('writing zoom %d...' % zoom_level)

        zoom_subsample = SUBSAMPLE**zoom_level
        zoom_tile_size = TILE_SIZE * zoom_subsample

        ntiles = int(len(sample_data) / zoom_tile_size)
        if len(sample_data) % zoom_tile_size != 0:
            ntiles += 1

        for tt in range(ntiles):
            tile_id = 'z%02dt%06d' % (zoom_level, tt)

            outfilename = os.path.join(tile_folder, tile_id + '.json')

            with open(outfilename, 'wt') as outfile:
                write_startfile(outfile, zoom_subsample,
                                dataset + ':' + tile_id)

                prev = False
                for ss in range(tt * TILE_SIZE, (tt + 1) * TILE_SIZE + 1):
                    rangesmp = sample_data[ss * zoom_subsample:(ss + 1) *
                                           zoom_subsample]
                    write_sample(outfile, rangesample(rangesmp, sample_len),
                                 prev, sample_len)
                    prev = True

                write_endfile(outfile)

            if (tt + 1) % 1000 == 0:
                print('wrote %d tiles...' % (tt + 1))

    print('writing origin...')

    outfilename = _helper.datasetOriginFilename(dataset)

    with open(outfilename, 'wt') as outfile:
        outfile.write("{\n")
        outfile.write('    "origin": %s\n' % json.dumps(filenames))
        outfile.write('}\n')

    print('writing config...')

    outfilename = _helper.datasetConfigFilename(dataset)

    with open(outfilename, 'wt') as outfile:
        outfile.write('{\n')
        outfile.write('    "title": "%s",\n' % dataset)
        outfile.write('    "tile_size": %d,\n' % TILE_SIZE)
        outfile.write('    "tile_subsample": %d,\n' % SUBSAMPLE)
        outfile.write('    "zoom_max": %d,\n' % zoom)
        outfile.write('    "length": %d,\n' % len(sample_data))
        outfile.write('    "start_time_ms": %s,\n' % output_start_ms)
        outfile.write('    "sample_rate": %d,\n' % dataset_rate)
        outfile.write('    "start_day_idx": %d,\n' % start_day_index)
        outfile.write('    "magnitude": %d,\n' % mag)
        outfile.write('    "signals": ["%s"],\n' % ('", "'.join(signal_names)))
        outfile.write('    "labels": [\n')
        for ii, (ll, rr, gg, bb) in enumerate(labels):
            outfile.write(
                '        { "label": "%s", "color": [ %0.2f, %0.2f, %0.2f ] }%s\n'
                % (ll, rr, gg, bb, ',' if ii + 1 < len(labels) else ''))
        outfile.write('    ]\n')
        outfile.write('}\n')

    print('dataset written to ' + out_folder)
def main(dataset,
         filename,
         *,
         source=None,
         session=None,
         stdout=False,
         trim=False,
         qcfix=False):
    dataset_config_filename = _helper.datasetConfigFilename(dataset)

    if not os.path.exists(dataset_config_filename):
        _helper.errorExit('could not find dataset config file: ' +
                          dataset_config_filename)

    with open(dataset_config_filename, 'rt') as configfile:
        config = json.load(configfile)

    sample_rate = config['sample_rate']
    length = config['length']

    start_millisecond = config['start_time_ms']
    print('start time:',
          _helper.timeMillisecondToTimeString(start_millisecond))

    FORMAT_NOTES = 'NOTES'
    FORMAT_NOTES_TIME_FORMAT = '%a %b %d %H:%M:%S %Z %Y'
    FORMAT_NOTES_LENGTH_SECONDS = 10  # how long a note label should try to be
    FORMAT_ACTIVITY_GROUP = 'ACTIVITY_GROUP'
    FORMAT_PREDICTION = 'PREDICTION'
    FORMAT_PREDICTED = 'PREDICTED'
    FORMAT_PREDICTED_LABEL_SECONDS = 30

    with open(filename, 'rt') as csvfile:
        reader = csv.DictReader(csvfile)

        # check if file contains session and source columns
        if 'SESSION' in reader.fieldnames and 'SOURCE' in reader.fieldnames and (
                session or source):
            _helper.errorExit(
                'Session and source info detected in file, will be used instead of given arguments.'
            )
        elif ('SESSION' in reader.fieldnames or 'SOURCE'
              in reader.fieldnames) and ('SESSION' not in reader.fieldnames
                                         or 'SOURCE' not in reader.fieldnames):
            _helper.errorExit(
                'Must provide both session and source fields in file or neither.'
            )
        elif (session is None or source is None) and (session or source):
            _helper.errorExit(
                'Must provide both session and source arguments or neither.')

        if session is None and 'SESSION' not in reader.fieldnames:
            _helper.errorExit(
                "No session argument provided and no session info in file. Cannot import labels."
            )
        if source is None and 'SOURCE' not in reader.fieldnames:
            _helper.errorExit(
                'No source argument provided and no source info in file. Cannot import labels.'
            )

        use_source_session_from_file = ('SESSION' in reader.fieldnames
                                        and 'SOURCE' in reader.fieldnames)

        # figure out format
        format = None
        format_meta = None
        if ('TIME' in reader.fieldnames) and ('TAG' in reader.fieldnames) and (
                'NOTE' in reader.fieldnames):
            format = FORMAT_NOTES
        elif ('START_TIME' in reader.fieldnames) and (
                'STOP_TIME' in reader.fieldnames) and ('ACTIVITY_GROUP.y'
                                                       in reader.fieldnames):
            format = FORMAT_ACTIVITY_GROUP
        elif ('START_TIME' in reader.fieldnames) and (
                'STOP_TIME' in reader.fieldnames) and ('PREDICTION'
                                                       in reader.fieldnames):
            format = FORMAT_PREDICTION
        elif ('HEADER_START_TIME'
              in reader.fieldnames) and ('PREDICTED' in reader.fieldnames):
            format = FORMAT_PREDICTED
            # get label names from header
            format_meta = []
            for field in reader.fieldnames[2:]:
                label = field.split('_')
                if label[0] != 'PROB' or len(label) < 2:
                    sys.stderr.write(
                        'unrecognized field in header: expected PROB_...\n')
                    sys.exit(-1)
                label = ' '.join([word.capitalize() for word in label[1:]])
                format_meta.append(label)
        else:
            sys.stderr.write('could not determine format from header fields\n')
            sys.exit(-1)

        sys.stderr.write('detected %s format\n' % format)
        if use_source_session_from_file:
            sys.stderr.write('reading source and session from file\n')
        else:
            sys.stderr.write('using source %s and session %s\n' %
                             (source, session))

        # process rows
        sessions = set()
        session_labels = {}
        session_sources = {}

        # this will keep track of the time the last label started to make sure they are sorted
        last_label_start_millisecond = 0

        for row in reader:
            # figure out sample range
            if format == FORMAT_NOTES:
                label_start_millisecond = _helper.timeStringToTimeMillisecond(
                    row['TIME'], FORMAT_NOTES_TIME_FORMAT)
                label_stop_millisecond = label_start_millisecond + FORMAT_NOTES_LENGTH_SECONDS * 1000
                label_value = row['TAG']
                label_detail = row['NOTE']
            elif format == FORMAT_ACTIVITY_GROUP:
                label_start_millisecond = _helper.timeStringToTimeMillisecond(
                    row['START_TIME'], _helper.DATE_FORMAT_YMD)
                label_stop_millisecond = _helper.timeStringToTimeMillisecond(
                    row['STOP_TIME'], _helper.DATE_FORMAT_YMD)
                label_value = row['ACTIVITY_GROUP.y']
                label_detail = None
            elif format == FORMAT_PREDICTION:
                label_start_millisecond = _helper.timeStringToTimeMillisecond(
                    row['START_TIME'], _helper.DATE_FORMAT_YMD)
                label_stop_millisecond = _helper.timeStringToTimeMillisecond(
                    row['STOP_TIME'], _helper.DATE_FORMAT_YMD)
                label_value = row['PREDICTION']
                label_detail = None
            elif format == FORMAT_PREDICTED:
                if int(row['PREDICTED']) >= len(format_meta):
                    sys.stderr.write('PREDICTED index out of range')
                    sys.exit(-1)
                label_start_millisecond = _helper.timeStringToTimeMillisecond(
                    row['HEADER_START_TIME'], _helper.DATE_FORMAT_YMD)
                label_stop_millisecond = label_start_millisecond + 1000 * FORMAT_PREDICTED_LABEL_SECONDS
                label_value = format_meta[int(row['PREDICTED'])]
                label_detail = None
            else:
                _helper.errorExit('unknown format error')

            # check labels are in order
            if label_start_millisecond <= last_label_start_millisecond:
                _helper.errorExit('label start times not sorted')
            last_label_start_millisecond = label_start_millisecond

            # apply fix for QC end times, if needed
            if qcfix:
                if label_stop_millisecond % 100 == 88:
                    label_stop_millisecond += 12

            # convert from ms to sample
            label_start_sample_thousand = (label_start_millisecond -
                                           start_millisecond) * sample_rate
            label_stop_sample_thousand = (label_stop_millisecond -
                                          start_millisecond) * sample_rate

            if label_start_sample_thousand % 1000 != 0 or label_stop_sample_thousand % 1000 != 0:
                _helper.errorExit('sample precision error')

            label_start_sample = (label_start_sample_thousand / 1000)
            label_stop_sample = (label_stop_sample_thousand / 1000)

            # figure out source and session
            if use_source_session_from_file:
                current_session = row['SESSION']
                current_source = row['SOURCE']
            else:
                current_session = session
                current_source = source

            if current_source not in VALID_SOURCES:
                _helper.errorExit('unrecognized source: ' + source)

            # for notes, go back and make sure any previous note doesn't overlap this one
            if format == FORMAT_NOTES:
                if current_session in sessions and len(
                        session_labels[current_session]) > 0:
                    session_labels[current_session][-1][1] = min(
                        session_labels[current_session][-1][1],
                        label_start_sample)

            # append this label to the session
            if current_session not in sessions:
                sessions.add(current_session)
                session_labels[current_session] = []
                session_sources[current_session] = current_source

            if session_sources[current_session] != current_source:
                _helper.errorExit('Session with multiple sources detected.')

            session_labels[current_session].append([
                label_start_sample, label_stop_sample, label_value,
                label_detail
            ])

        # write labels out
        for session in sessions:
            labels = session_labels[session]
            source = session_sources[session]

            # this will be used to merge adjacent time windows that have the same label
            last_activity = None

            # keep track of information about labels output
            was_prev = False
            any_outside = False
            any_far_outside = False

            output = ''
            output += '{"session":"%s", "source": "%s", "labels":[' % (session,
                                                                       source)

            for label_start_sample, label_stop_sample, label_value, label_detail in session_labels[
                    session]:
                # see if the label extends beyond the dataset time
                if label_start_sample < 0 or length < label_stop_sample:
                    any_outside = True
                if label_start_sample < 0 - 0.1 * length or length + 0.1 * length < label_stop_sample:
                    any_far_outside = True

                # merge adjacent labels that match
                if not last_activity:
                    last_activity = [
                        label_start_sample, label_stop_sample, label_value,
                        label_detail
                    ]
                elif last_activity[1] == label_start_sample and last_activity[
                        2] == label_value and last_activity[3] == label_detail:
                    last_activity[1] = label_stop_sample
                else:
                    if trimActivity(last_activity, trim, 0, length):
                        output += _helper.activityJSON(last_activity, was_prev)
                        was_prev = True
                    last_activity = [
                        label_start_sample, label_stop_sample, label_value,
                        label_detail
                    ]

            # account for any remaining label
            if last_activity:
                if trimActivity(last_activity, trim, 0, length):
                    output += _helper.activityJSON(last_activity, was_prev)
                    was_prev = True

            output += ']}\n'

            # display warnings about labels
            if any_far_outside:
                _helper.warning('label found FAR OUTSIDE signal in ' + session)
            elif any_outside:
                _helper.warning('label found outside signal in ' + session)

            # do output
            if stdout:
                sys.stdout.write(output)

            else:
                labels_filename = _helper.latestLabelsFilename(
                    dataset, session)
                with open(_helper.ensureDirExists(labels_filename, True),
                          'wt') as labelsfile:
                    labelsfile.write(output)

                print('labels added to', labels_filename)
    def _process_request(self, path, vars):
        global _debug_delay
        if _debug_delay:
            time.sleep(_debug_delay)

        if path == '/signaligner.html':
            if 'dataset' in vars and ALNUMUN_RE.match(vars['dataset']):
                dataset = vars['dataset']
            else:
                dataset = 'null'

            if 'session' in vars and ALNUMUN_RE.match(vars['session']):
                session = vars['session']
            else:
                session = SESSION_ERROR

            def replace_data(data):
                data = replace_vars(data, session, False)
                return data

            self._send_header_and_file_data(
                _folder.file_abspath('signaligner/signaligner.html'), False,
                CTYPE_HTML, replace_data)

        elif path == '/signaligner.js':

            def replace_data(data):
                data = replace_mode_config(data)
                return data

            self._send_header_and_file_data(
                _folder.file_abspath('signaligner/signaligner.js'), False,
                CTYPE_JS, replace_mode_config)

        elif path == '/fetchdatasetlist':
            datasets = _helper.getDatasetList()
            self._send_header(200, CTYPE_PLAIN)
            self._send_data(json.dumps(datasets), False)

        elif path == '/fetchdataset':
            if 'dataset' in vars and ALNUMUN_RE.match(vars['dataset']):
                dataset_name = vars['dataset']

                if 'type' in vars and vars['type'] == 'config':
                    file_path = _helper.datasetConfigFilename(dataset_name)
                elif 'type' in vars and vars[
                        'type'] == 'tile' and 'id' in vars and ALNUMUN_RE.match(
                            vars['id']):
                    file_path = os.path.join(
                        _helper.datasetTileDir(dataset_name),
                        vars['id'] + '.json')
                else:
                    self._send_header(404, CTYPE_PLAIN)
                    return

                if not os.path.exists(file_path):
                    self._send_header(404, CTYPE_PLAIN)
                    return

                self._send_header_and_file_data(file_path, False, CTYPE_PLAIN)
            else:
                self._send_header(404, CTYPE_PLAIN)

        elif path == '/fetchlabels':
            if 'dataset' in vars and ALNUMUN_RE.match(vars['dataset']):
                dataset = vars['dataset']

                self._send_header(200, CTYPE_PLAIN)
                labels = _helper.getLabelsLatest(dataset)
                if labels:
                    self._send_data(json.dumps(labels), False)
            else:
                self._send_header(404, CTYPE_PLAIN)

        elif path == '/reportlabels':
            if 'data' in vars:
                data = json.loads(vars['data'])

                if 'dataset' in data and ALNUMUN_RE.match(
                        data['dataset']
                ) and 'session' in data and ALNUMUN_RE.match(data['session']):
                    dataset = data['dataset']
                    session = data['session']

                    with open(
                            _helper.ensureDirExists(
                                _helper.logLabelsFilename(dataset, session),
                                True), 'at') as dfile:
                        dfile.write(json.dumps(data) + '\n')

                    with open(
                            _helper.ensureDirExists(
                                _helper.latestLabelsFilename(dataset, session),
                                True), 'wt') as dfile:
                        dfile.write(json.dumps(data) + '\n')

                    with open(
                            _helper.ensureDirExists(
                                _helper.latestLabelsFilename(dataset, session),
                                True), 'rt') as dfile:
                        response = json.loads(dfile.read())

                    self._send_header(200, CTYPE_PLAIN)
                    self._send_data(json.dumps(response), False)

                else:
                    self._send_header(404, CTYPE_PLAIN)

            else:
                self._send_header(404, CTYPE_PLAIN)

        elif path == '/mturksubmit' or path == '/mturksubmissions':
            if 'data' in vars:
                data = json.loads(vars['data'])

                if 'dataset' in data and ALNUMUN_RE.match(
                        data['dataset']
                ) and 'session' in data and ALNUMUN_RE.match(data['session']):
                    dataset = data['dataset']
                    session = data['session']

                    if path == '/mturksubmit':
                        mturk_submit = _helper.mturkSubmitLabelsFilename(
                            dataset, session)
                        if not os.path.exists(mturk_submit):
                            with open(
                                    _helper.ensureDirExists(
                                        mturk_submit, True), 'wt') as dfile:
                                dfile.write(json.dumps(data) + '\n')

                    submissions = _helper.mturkGetSubmissions(session)

                    total = 0
                    datasets = []
                    for submission in submissions:
                        score = submission['score'] / 100.0
                        score = score**2
                        score *= submission['daysofdata']
                        # minimum of 1 cent for tutorial levels, 20 cents for challenge
                        score = max(score, 0.20)
                        if submission['istutorial']:
                            score *= 0.05
                        total += score
                        datasets.append(submission['dataset'])

                    total = int(total * 100)
                    if session not in _mturk_session_codes:
                        _mturk_session_codes[session] = _helper.makeId()[:3]

                    code = _mturk_session_codes[session]
                    code = code + ('%03d' % total).upper()
                    code = code + hashlib.md5(
                        code.encode('utf-8')).hexdigest()[:3].upper()

                    response = {
                        'amount': '$%d.%02d' % (total // 100, total % 100),
                        'code': code,
                        'datasets': datasets
                    }

                    self._send_header(200, CTYPE_PLAIN)
                    self._send_data(json.dumps(response), False)

                else:
                    self._send_header(404, CTYPE_PLAIN)

            else:
                self._send_header(404, CTYPE_PLAIN)

        elif path == '/log':
            if 'data' in vars:
                with open(
                        _helper.ensureDirExists(
                            _folder.data_abspath('playlog'), True),
                        'at') as dfile:
                    dfile.write(vars['data'] + '\n')

            self._send_header(200, CTYPE_PLAIN)

        elif HTML_RE.match(path):
            if path == '/mturk_start.html':
                global _mode
                if _mode != 'MTURK':
                    self._send_header(200, CTYPE_PLAIN)
                    self._send_data(
                        'mode must be MTURK to request mturk_start.html',
                        False)
                    return

            if 'session' in vars and ALNUMUN_RE.match(vars['session']):
                session = vars['session']
            else:
                session = SESSION_ERROR

            def replace_data(data):
                return replace_vars(data, session, True)

            self._send_header_and_file_data(
                _folder.file_abspath('static' + path), False, CTYPE_HTML,
                replace_data)

        elif PNG_RE.match(path):
            self._send_header_and_file_data(
                _folder.file_abspath('static' + path), True, CTYPE_PNG)

        elif JS_RE.match(path):
            self._send_header_and_file_data(
                _folder.file_abspath('static' + path), False, CTYPE_JS)

        elif CSS_RE.match(path):
            self._send_header_and_file_data(
                _folder.file_abspath('static' + path), False, CTYPE_CSS)

        else:
            self._send_header(404, CTYPE_PLAIN)
                session_slideshow_interactions.append(
                    mturk_slideshow_count_dict[page])

    session_row_data += session_slideshow_interactions

    session_zoom_time_spent = []

    if 'major_zoom_levels' not in info:
        session_zoom_time_spent += ["NA"] * len(major_zoom_levels)
    else:
        major_zoom_time_dict = info['major_zoom_levels']
        for zoom in sorted(major_zoom_levels):
            if zoom not in major_zoom_time_dict:
                session_zoom_time_spent.append(0.0)
            else:
                session_zoom_time_spent.append(major_zoom_time_dict[zoom] /
                                               1000.0)

    session_row_data += session_zoom_time_spent

    writer.writerow(session_row_data)

if args.stdout:
    sys.stdout.write(buffer.getvalue())
else:
    csvOutputPath = _helper.exportFilename('log_analysis')
    _helper.ensureDirExists(csvOutputPath, True)
    with open(csvOutputPath, 'wt') as csv_file:
        csv_file.write(buffer.getvalue())
    print('output written to', csvOutputPath)
def main(source_dataset, dest_dataset, *, notrim=False):
    # Process arguments to get name of dataset

    source_config = _helper.datasetConfigFilename(source_dataset)
    dest_config = _helper.datasetConfigFilename(dest_dataset)

    source = {}
    dest = {}

    with open(source_config, 'rt') as configfile:
        config = json.load(configfile)
        source['sample_rate'] = config['sample_rate']
        source['start_time_ms'] = config['start_time_ms']

    with open(dest_config, 'rt') as configfile:
        config = json.load(configfile)
        dest['sample_rate'] = config['sample_rate']
        dest['length'] = config['length']
        dest['start_time_ms'] = config['start_time_ms']

    if source['sample_rate'] != dest['sample_rate']:
        _helper.errorExit("Source and dest datasets should have the same sample rate")

    start_sample = ((dest['start_time_ms'] - source['start_time_ms']) / 1000) * source['sample_rate']

    if start_sample != int(start_sample):
        _helper.errorExit("Source and dest datasets are not offset by an integer number of samples")

    start_sample = int(start_sample)
    end_sample = int(start_sample + dest['length'])

    source_labels = _helper.getLabelsLatest(source_dataset)
    if source_labels:
        for session in source_labels:

            session_name = session['session']
            source_name = session['source']
            session_labels = session['labels']
            label_filename = _helper.latestLabelsFilename(dest_dataset, session_name)

            output = ''
            output += ('{"session":"%s", "source": "%s", "labels":[' % (session_name, source_name))
            was_prev = False

            for ll in session_labels:
                label_start = ll['lo']
                label_end = ll['hi']
                label_name = ll['label']
                label = [label_start, label_end, label_name]

                if notrim:
                    output += _helper.activityJSON(label, was_prev)
                    was_prev = True

                elif label_end > start_sample and label_start < end_sample:

                    # Trim label start if needed
                    if label_start < start_sample:
                        label[0] = start_sample

                    # Trim label end if needed
                    if label_end > end_sample:
                        label[1] = end_sample

                    # Start label offset from 0
                    label[0] -= start_sample
                    label[1] -= start_sample

                    output += _helper.activityJSON(label, was_prev)
                    was_prev = True

            output += ']}\n'

            _helper.ensureDirExists(label_filename, True)
            with open(label_filename, 'wt') as labelsfile:
                labelsfile.write(output)
            print('labels added to ', label_filename)
Esempio n. 8
0
    def __init__(self, stream1, stream2):
        self.stream1 = stream1
        self.stream2 = stream2

    def write(self, data):
        self.stream1.write(data)
        self.stream2.write(data)

    def flush(self):
        self.stream1.flush()
        self.stream2.flush()


logfilename = _folder.data_abspath(
    'log', 'signalauncher.' + str(os.getpid()) + '.txt')
logfile = open(_helper.ensureDirExists(logfilename, True), 'wt')
sys.stdout = Logger(sys.stdout, logfile)
sys.stderr = Logger(sys.stderr, logfile)


# utility functions
def datasetexists(dataset):
    out_folder = _helper.datasetDir(dataset)
    return os.path.exists(out_folder)


def mhealthfolder(dataset, signal):
    return _folder.data_abspath('algo', dataset, 'mhealth', signal)


def algofolder(dataset, signal):