def main(dataset): # Read start time from config with open(_helper.datasetConfigFilename(dataset), 'rt') as json_file: data = json.load(json_file) start_time_ms = data['start_time_ms'] sample_rate = data['sample_rate'] # Read the last stored label of each unique player or session session_labels = _helper.getLabelsLatest(dataset) # Write to csv csvOutputPath = _helper.exportFilename(dataset) _helper.ensureDirExists(csvOutputPath, True) with open(csvOutputPath, 'wt') as csv_file: writer = csv.writer(csv_file) writer.writerow( ['START_TIME', 'STOP_TIME', 'PREDICTION', 'SOURCE', 'SESSION']) for session_data in session_labels: session = session_data['session'] source = session_data['source'] for label in session_data['labels']: start_time_in_ms = start_time_ms + label[ 'lo'] * 1000.0 / sample_rate start_time = _helper.timeMillisecondToTimeString( start_time_in_ms) stop_time_in_ms = start_time_ms + label[ 'hi'] * 1000.0 / sample_rate stop_time = _helper.timeMillisecondToTimeString( stop_time_in_ms) prediction = label['label'] writer.writerow( [start_time, stop_time, prediction, source, session]) print('output written to', csvOutputPath)
def main(filename, outfolder): if filename.endswith('.gz'): use_open = gzip.open else: use_open = open with use_open(filename, 'rt') as csvfile: header_rate, header_start_ms = _helper.process_actigraph_header( csvfile) header_start_sec = header_start_ms / 1000 if header_start_sec != int(header_start_sec): _helper.errorExit('start time can only have second precision') header_start_sec = int(header_start_sec) csv_header = csvfile.readline().strip() if csv_header != 'Accelerometer X,Accelerometer Y,Accelerometer Z': _helper.errorExit( 'unrecognized CSV header: only "Accelerometer X,Accelerometer Y,Accelerometer Z" supported' ) tm = datetime.datetime.utcfromtimestamp(header_start_sec) tm_sample = 0 outfile = None for row in csvfile: tm_msec = int(1000 * tm_sample / header_rate + 0.5) if outfile == None: outfilecsvname = 'NONE-NONE-NA.NONE-NONE.%04d-%02d-%02d-%02d-%02d-%02d-%03d-P0000.sensor.csv' % ( tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second, tm_msec) outfilename = os.path.join(outfolder, 'default', 'MasterSynced', '%04d' % tm.year, '%02d' % tm.month, '%02d' % tm.day, '%02d' % tm.hour, outfilecsvname) print('Create new hourly file: %s' % outfilecsvname) outfile = open(_helper.ensureDirExists(outfilename, True), 'wt') outfile.write( 'HEADER_TIME_STAMP,X_ACCELERATION_METERS_PER_SECOND_SQUARED,Y_ACCELERATION_METERS_PER_SECOND_SQUARED,Z_ACCELERATION_METERS_PER_SECOND_SQUARED\n' ) tm_str = '%04d-%02d-%02d %02d:%02d:%02d.%03d' % ( tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second, tm_msec) outfile.write(tm_str + ',' + (','.join([(e if ('.' in e) else (e + '.0')) for e in row.strip().split(',')])) + '\n') tm_sample += 1 if tm_sample == header_rate: prev_tm = tm tm = tm + datetime.timedelta(seconds=1) tm_sample = 0 if prev_tm.year != tm.year or prev_tm.month != tm.month or prev_tm.day != tm.day or prev_tm.hour != tm.hour: outfile.close() outfile = None if outfile != None: outfile.close()
def main(filenames, *, name=None, labelfilenames=None, zoom=None, mag=DEFAULT_MAGNITUDE, sample=None, day=None): if len(filenames) > 1 and not name: _helper.errorExit( 'Must specify a custom dataset --name when importing multiple files' ) if mag <= 0: _helper.errorExit('magnitude must be positive') if sample is not None and day is not None: _helper.errorExit('Can only provide one of --sample and --day') start_sample, end_sample = None, None if sample is not None: start_sample, end_sample = parseRange('sample', sample) start_day, end_day = None, None if day is not None: start_day, end_day = parseRange('day', day) # load labels if not labelfilenames: labelfilenames = [ _folder.file_abspath('common', 'labels_test.csv'), _folder.file_abspath('common', 'labels_unknown.csv') ] labels = [] labels_names = set() for labelfile in labelfilenames: print('Reading labels from %s...' % labelfile) with open(labelfile, 'rt') as csvfile: reader = csv.DictReader(csvfile) if set(reader.fieldnames) != set(['label', 'red', 'green', 'blue' ]): _helper.errorExit('Incorrect label csv headers') for row in reader: label_name = row['label'].strip() rr = float(row['red'].strip()) gg = float(row['green'].strip()) bb = float(row['blue'].strip()) if re.search('[^\w\- ]', label_name, re.ASCII): _helper.errorExit( 'Only alphanumeric, underscore, dash, and space allowed in label names: ' + label_name) if label_name in labels_names: _helper.errorExit('Duplicate label: ' + label_name) labels.append((label_name, rr, gg, bb)) labels_names.add(label_name) # process arguments signal_names = [] for filename in filenames: signal_names.append(_helper.makeIdFromFilename(filename)) if len(signal_names) != len(set(signal_names)): _helper.errorExit('Duplicate signal names') if name: if not _helper.checkId(name, False): _helper.errorExit( 'Only alphanumeric and underscore allowed in dataset names') dataset = name else: dataset = signal_names[0] if start_sample is not None or end_sample is not None: dataset = dataset + strRange('sample', start_sample, end_sample) if start_day is not None or end_day is not None: dataset = dataset + strRange('day', start_day, end_day) out_folder = _helper.datasetDir(dataset) tile_folder = _helper.datasetTileDir(dataset) if os.path.exists(out_folder): _helper.errorExit('Please remove output folder ' + out_folder) print('Using output folder ' + out_folder) _helper.ensureDirExists(out_folder, False) _helper.ensureDirExists(tile_folder, False) # read in data print('reading header...') # open files csvfiles = [] for filename in filenames: if filename.endswith('.gz'): use_open = gzip.open else: use_open = open csvfiles.append(use_open(filename, 'rt')) # read headers files_start_ms = [] dataset_rate = None for filename, csvfile in zip(filenames, csvfiles): header_rate, header_start_ms = _helper.process_actigraph_header( csvfile) if dataset_rate == None: dataset_rate = int(header_rate) elif dataset_rate != int(header_rate): _helper.errorExit('Multiple sample rates found') files_start_ms.append(header_start_ms) # determine sample range dataset_start_ms = min(files_start_ms) dataset_start_date = datetime.datetime.utcfromtimestamp(dataset_start_ms / 1000).date() if start_sample is not None or end_sample is not None: pass if start_day is not None or end_day is not None: if start_day is not None: output_min_ms = 1000 * calendar.timegm( (dataset_start_date + datetime.timedelta(days=(start_day - 1))).timetuple()) start_sample = (max(output_min_ms, dataset_start_ms) - dataset_start_ms) * dataset_rate / 1000 if start_sample != int(start_sample): _helper.errorExit('day start sample error') start_sample = int(start_sample) else: start_sample = None if end_day is not None: output_max_ms = 1000 * calendar.timegm( (dataset_start_date + datetime.timedelta(days=(end_day))).timetuple()) end_sample = (output_max_ms - dataset_start_ms) * dataset_rate / 1000 if end_sample != int(end_sample): _helper.errorExit('day end sample error') end_sample = int(end_sample) else: end_sample = None # determine starting day index start_day_index = 1 if start_sample: start_day_index = 1 + (datetime.datetime.utcfromtimestamp( dataset_start_ms / 1000 + start_sample / dataset_rate).date() - dataset_start_date).days # print header summary if len(filenames) > 1: for filename, signalname, file_start_ms in zip(filenames, signal_names, files_start_ms): print('file start: ', _helper.timeMillisecondToTimeString(file_start_ms), signalname, filename) print('input start: ', _helper.timeMillisecondToTimeString(dataset_start_ms), dataset) # read data sample_len = 3 * len(filenames) sample_data = [] min_smp = 1e100 max_smp = -1e100 for fileindex, (filename, file_start_ms, csvfile) in enumerate( zip(filenames, files_start_ms, csvfiles)): print('reading ' + filename + '...') # Checks if csv header is absent and adds the header if needed csvstartpos = csvfile.tell() firstrow = next(csvfile) csvfile.seek(csvstartpos) fieldnames = None if 'Accelerometer' not in firstrow: # No headers present DEFAULT_FIELDNAMES = [ 'Timestamp', 'Accelerometer X', 'Accelerometer Y', 'Accelerometer Z' ] no_of_fields = len(firstrow.split(',')) if no_of_fields == 4: fieldnames = DEFAULT_FIELDNAMES elif no_of_fields == 3: fieldnames = DEFAULT_FIELDNAMES[1:] else: _helper.errorExit( 'missing header has unrecognized number of fields') if fieldnames != None: _helper.warning('input file missing field names, using ' + ','.join(fieldnames)) reader = csv.DictReader(csvfile, fieldnames=fieldnames) if 'Timestamp' in reader.fieldnames: _helper.warning( 'input file has Timestamp field, but it will be ignored') # process rows reader_sample_index = 0 sample_offset = (file_start_ms - dataset_start_ms) * dataset_rate / 1000 if sample_offset != int(sample_offset): _helper.errorExit('sample offset error') sample_offset = int(sample_offset) if start_sample != None: sample_offset -= start_sample for row in reader: data_sample_index = reader_sample_index + sample_offset reader_sample_index += 1 if data_sample_index < 0: continue if end_sample != None and data_sample_index >= end_sample - ( start_sample if start_sample != None else 0): break x = float(row['Accelerometer X']) y = float(row['Accelerometer Y']) z = float(row['Accelerometer Z']) min_smp = min(min_smp, x, y, z) max_smp = max(max_smp, x, y, z) while data_sample_index >= len(sample_data): sample_data.append([None] * sample_len) sample_data[data_sample_index][3 * fileindex + 0] = x sample_data[data_sample_index][3 * fileindex + 1] = y sample_data[data_sample_index][3 * fileindex + 2] = z if reader_sample_index % (60 * 60 * dataset_rate) == 0: print('read %d hours...' % (reader_sample_index / (60 * 60 * dataset_rate))) if min_smp < -mag or mag < max_smp: _helper.warning('sample exceeds magnitude') output_start_ms = dataset_start_ms if start_sample != None: output_start_ms_offset = start_sample * 1000 / dataset_rate if output_start_ms_offset != int(output_start_ms_offset): _helper.errorExit('output start offset sample error') output_start_ms += int(output_start_ms_offset) output_end_ms = output_start_ms + (len(sample_data) - 1) * 1000 / dataset_rate # figure out max zoom level, if needed if zoom is None: for zz in range(10): zoom = zz if len(sample_data) / math.pow(SUBSAMPLE, zz + 1) <= 2 * TILE_SIZE: break # print summary print('length: ', len(sample_data)) print('rate: ', dataset_rate) print('max zoom: ', zoom) print('output start: ', _helper.timeMillisecondToTimeString(output_start_ms)) print('output end: ', _helper.timeMillisecondToTimeString(output_end_ms)) # write tiles for zoom_level in range(zoom + 1): print('writing zoom %d...' % zoom_level) zoom_subsample = SUBSAMPLE**zoom_level zoom_tile_size = TILE_SIZE * zoom_subsample ntiles = int(len(sample_data) / zoom_tile_size) if len(sample_data) % zoom_tile_size != 0: ntiles += 1 for tt in range(ntiles): tile_id = 'z%02dt%06d' % (zoom_level, tt) outfilename = os.path.join(tile_folder, tile_id + '.json') with open(outfilename, 'wt') as outfile: write_startfile(outfile, zoom_subsample, dataset + ':' + tile_id) prev = False for ss in range(tt * TILE_SIZE, (tt + 1) * TILE_SIZE + 1): rangesmp = sample_data[ss * zoom_subsample:(ss + 1) * zoom_subsample] write_sample(outfile, rangesample(rangesmp, sample_len), prev, sample_len) prev = True write_endfile(outfile) if (tt + 1) % 1000 == 0: print('wrote %d tiles...' % (tt + 1)) print('writing origin...') outfilename = _helper.datasetOriginFilename(dataset) with open(outfilename, 'wt') as outfile: outfile.write("{\n") outfile.write(' "origin": %s\n' % json.dumps(filenames)) outfile.write('}\n') print('writing config...') outfilename = _helper.datasetConfigFilename(dataset) with open(outfilename, 'wt') as outfile: outfile.write('{\n') outfile.write(' "title": "%s",\n' % dataset) outfile.write(' "tile_size": %d,\n' % TILE_SIZE) outfile.write(' "tile_subsample": %d,\n' % SUBSAMPLE) outfile.write(' "zoom_max": %d,\n' % zoom) outfile.write(' "length": %d,\n' % len(sample_data)) outfile.write(' "start_time_ms": %s,\n' % output_start_ms) outfile.write(' "sample_rate": %d,\n' % dataset_rate) outfile.write(' "start_day_idx": %d,\n' % start_day_index) outfile.write(' "magnitude": %d,\n' % mag) outfile.write(' "signals": ["%s"],\n' % ('", "'.join(signal_names))) outfile.write(' "labels": [\n') for ii, (ll, rr, gg, bb) in enumerate(labels): outfile.write( ' { "label": "%s", "color": [ %0.2f, %0.2f, %0.2f ] }%s\n' % (ll, rr, gg, bb, ',' if ii + 1 < len(labels) else '')) outfile.write(' ]\n') outfile.write('}\n') print('dataset written to ' + out_folder)
def main(dataset, filename, *, source=None, session=None, stdout=False, trim=False, qcfix=False): dataset_config_filename = _helper.datasetConfigFilename(dataset) if not os.path.exists(dataset_config_filename): _helper.errorExit('could not find dataset config file: ' + dataset_config_filename) with open(dataset_config_filename, 'rt') as configfile: config = json.load(configfile) sample_rate = config['sample_rate'] length = config['length'] start_millisecond = config['start_time_ms'] print('start time:', _helper.timeMillisecondToTimeString(start_millisecond)) FORMAT_NOTES = 'NOTES' FORMAT_NOTES_TIME_FORMAT = '%a %b %d %H:%M:%S %Z %Y' FORMAT_NOTES_LENGTH_SECONDS = 10 # how long a note label should try to be FORMAT_ACTIVITY_GROUP = 'ACTIVITY_GROUP' FORMAT_PREDICTION = 'PREDICTION' FORMAT_PREDICTED = 'PREDICTED' FORMAT_PREDICTED_LABEL_SECONDS = 30 with open(filename, 'rt') as csvfile: reader = csv.DictReader(csvfile) # check if file contains session and source columns if 'SESSION' in reader.fieldnames and 'SOURCE' in reader.fieldnames and ( session or source): _helper.errorExit( 'Session and source info detected in file, will be used instead of given arguments.' ) elif ('SESSION' in reader.fieldnames or 'SOURCE' in reader.fieldnames) and ('SESSION' not in reader.fieldnames or 'SOURCE' not in reader.fieldnames): _helper.errorExit( 'Must provide both session and source fields in file or neither.' ) elif (session is None or source is None) and (session or source): _helper.errorExit( 'Must provide both session and source arguments or neither.') if session is None and 'SESSION' not in reader.fieldnames: _helper.errorExit( "No session argument provided and no session info in file. Cannot import labels." ) if source is None and 'SOURCE' not in reader.fieldnames: _helper.errorExit( 'No source argument provided and no source info in file. Cannot import labels.' ) use_source_session_from_file = ('SESSION' in reader.fieldnames and 'SOURCE' in reader.fieldnames) # figure out format format = None format_meta = None if ('TIME' in reader.fieldnames) and ('TAG' in reader.fieldnames) and ( 'NOTE' in reader.fieldnames): format = FORMAT_NOTES elif ('START_TIME' in reader.fieldnames) and ( 'STOP_TIME' in reader.fieldnames) and ('ACTIVITY_GROUP.y' in reader.fieldnames): format = FORMAT_ACTIVITY_GROUP elif ('START_TIME' in reader.fieldnames) and ( 'STOP_TIME' in reader.fieldnames) and ('PREDICTION' in reader.fieldnames): format = FORMAT_PREDICTION elif ('HEADER_START_TIME' in reader.fieldnames) and ('PREDICTED' in reader.fieldnames): format = FORMAT_PREDICTED # get label names from header format_meta = [] for field in reader.fieldnames[2:]: label = field.split('_') if label[0] != 'PROB' or len(label) < 2: sys.stderr.write( 'unrecognized field in header: expected PROB_...\n') sys.exit(-1) label = ' '.join([word.capitalize() for word in label[1:]]) format_meta.append(label) else: sys.stderr.write('could not determine format from header fields\n') sys.exit(-1) sys.stderr.write('detected %s format\n' % format) if use_source_session_from_file: sys.stderr.write('reading source and session from file\n') else: sys.stderr.write('using source %s and session %s\n' % (source, session)) # process rows sessions = set() session_labels = {} session_sources = {} # this will keep track of the time the last label started to make sure they are sorted last_label_start_millisecond = 0 for row in reader: # figure out sample range if format == FORMAT_NOTES: label_start_millisecond = _helper.timeStringToTimeMillisecond( row['TIME'], FORMAT_NOTES_TIME_FORMAT) label_stop_millisecond = label_start_millisecond + FORMAT_NOTES_LENGTH_SECONDS * 1000 label_value = row['TAG'] label_detail = row['NOTE'] elif format == FORMAT_ACTIVITY_GROUP: label_start_millisecond = _helper.timeStringToTimeMillisecond( row['START_TIME'], _helper.DATE_FORMAT_YMD) label_stop_millisecond = _helper.timeStringToTimeMillisecond( row['STOP_TIME'], _helper.DATE_FORMAT_YMD) label_value = row['ACTIVITY_GROUP.y'] label_detail = None elif format == FORMAT_PREDICTION: label_start_millisecond = _helper.timeStringToTimeMillisecond( row['START_TIME'], _helper.DATE_FORMAT_YMD) label_stop_millisecond = _helper.timeStringToTimeMillisecond( row['STOP_TIME'], _helper.DATE_FORMAT_YMD) label_value = row['PREDICTION'] label_detail = None elif format == FORMAT_PREDICTED: if int(row['PREDICTED']) >= len(format_meta): sys.stderr.write('PREDICTED index out of range') sys.exit(-1) label_start_millisecond = _helper.timeStringToTimeMillisecond( row['HEADER_START_TIME'], _helper.DATE_FORMAT_YMD) label_stop_millisecond = label_start_millisecond + 1000 * FORMAT_PREDICTED_LABEL_SECONDS label_value = format_meta[int(row['PREDICTED'])] label_detail = None else: _helper.errorExit('unknown format error') # check labels are in order if label_start_millisecond <= last_label_start_millisecond: _helper.errorExit('label start times not sorted') last_label_start_millisecond = label_start_millisecond # apply fix for QC end times, if needed if qcfix: if label_stop_millisecond % 100 == 88: label_stop_millisecond += 12 # convert from ms to sample label_start_sample_thousand = (label_start_millisecond - start_millisecond) * sample_rate label_stop_sample_thousand = (label_stop_millisecond - start_millisecond) * sample_rate if label_start_sample_thousand % 1000 != 0 or label_stop_sample_thousand % 1000 != 0: _helper.errorExit('sample precision error') label_start_sample = (label_start_sample_thousand / 1000) label_stop_sample = (label_stop_sample_thousand / 1000) # figure out source and session if use_source_session_from_file: current_session = row['SESSION'] current_source = row['SOURCE'] else: current_session = session current_source = source if current_source not in VALID_SOURCES: _helper.errorExit('unrecognized source: ' + source) # for notes, go back and make sure any previous note doesn't overlap this one if format == FORMAT_NOTES: if current_session in sessions and len( session_labels[current_session]) > 0: session_labels[current_session][-1][1] = min( session_labels[current_session][-1][1], label_start_sample) # append this label to the session if current_session not in sessions: sessions.add(current_session) session_labels[current_session] = [] session_sources[current_session] = current_source if session_sources[current_session] != current_source: _helper.errorExit('Session with multiple sources detected.') session_labels[current_session].append([ label_start_sample, label_stop_sample, label_value, label_detail ]) # write labels out for session in sessions: labels = session_labels[session] source = session_sources[session] # this will be used to merge adjacent time windows that have the same label last_activity = None # keep track of information about labels output was_prev = False any_outside = False any_far_outside = False output = '' output += '{"session":"%s", "source": "%s", "labels":[' % (session, source) for label_start_sample, label_stop_sample, label_value, label_detail in session_labels[ session]: # see if the label extends beyond the dataset time if label_start_sample < 0 or length < label_stop_sample: any_outside = True if label_start_sample < 0 - 0.1 * length or length + 0.1 * length < label_stop_sample: any_far_outside = True # merge adjacent labels that match if not last_activity: last_activity = [ label_start_sample, label_stop_sample, label_value, label_detail ] elif last_activity[1] == label_start_sample and last_activity[ 2] == label_value and last_activity[3] == label_detail: last_activity[1] = label_stop_sample else: if trimActivity(last_activity, trim, 0, length): output += _helper.activityJSON(last_activity, was_prev) was_prev = True last_activity = [ label_start_sample, label_stop_sample, label_value, label_detail ] # account for any remaining label if last_activity: if trimActivity(last_activity, trim, 0, length): output += _helper.activityJSON(last_activity, was_prev) was_prev = True output += ']}\n' # display warnings about labels if any_far_outside: _helper.warning('label found FAR OUTSIDE signal in ' + session) elif any_outside: _helper.warning('label found outside signal in ' + session) # do output if stdout: sys.stdout.write(output) else: labels_filename = _helper.latestLabelsFilename( dataset, session) with open(_helper.ensureDirExists(labels_filename, True), 'wt') as labelsfile: labelsfile.write(output) print('labels added to', labels_filename)
def _process_request(self, path, vars): global _debug_delay if _debug_delay: time.sleep(_debug_delay) if path == '/signaligner.html': if 'dataset' in vars and ALNUMUN_RE.match(vars['dataset']): dataset = vars['dataset'] else: dataset = 'null' if 'session' in vars and ALNUMUN_RE.match(vars['session']): session = vars['session'] else: session = SESSION_ERROR def replace_data(data): data = replace_vars(data, session, False) return data self._send_header_and_file_data( _folder.file_abspath('signaligner/signaligner.html'), False, CTYPE_HTML, replace_data) elif path == '/signaligner.js': def replace_data(data): data = replace_mode_config(data) return data self._send_header_and_file_data( _folder.file_abspath('signaligner/signaligner.js'), False, CTYPE_JS, replace_mode_config) elif path == '/fetchdatasetlist': datasets = _helper.getDatasetList() self._send_header(200, CTYPE_PLAIN) self._send_data(json.dumps(datasets), False) elif path == '/fetchdataset': if 'dataset' in vars and ALNUMUN_RE.match(vars['dataset']): dataset_name = vars['dataset'] if 'type' in vars and vars['type'] == 'config': file_path = _helper.datasetConfigFilename(dataset_name) elif 'type' in vars and vars[ 'type'] == 'tile' and 'id' in vars and ALNUMUN_RE.match( vars['id']): file_path = os.path.join( _helper.datasetTileDir(dataset_name), vars['id'] + '.json') else: self._send_header(404, CTYPE_PLAIN) return if not os.path.exists(file_path): self._send_header(404, CTYPE_PLAIN) return self._send_header_and_file_data(file_path, False, CTYPE_PLAIN) else: self._send_header(404, CTYPE_PLAIN) elif path == '/fetchlabels': if 'dataset' in vars and ALNUMUN_RE.match(vars['dataset']): dataset = vars['dataset'] self._send_header(200, CTYPE_PLAIN) labels = _helper.getLabelsLatest(dataset) if labels: self._send_data(json.dumps(labels), False) else: self._send_header(404, CTYPE_PLAIN) elif path == '/reportlabels': if 'data' in vars: data = json.loads(vars['data']) if 'dataset' in data and ALNUMUN_RE.match( data['dataset'] ) and 'session' in data and ALNUMUN_RE.match(data['session']): dataset = data['dataset'] session = data['session'] with open( _helper.ensureDirExists( _helper.logLabelsFilename(dataset, session), True), 'at') as dfile: dfile.write(json.dumps(data) + '\n') with open( _helper.ensureDirExists( _helper.latestLabelsFilename(dataset, session), True), 'wt') as dfile: dfile.write(json.dumps(data) + '\n') with open( _helper.ensureDirExists( _helper.latestLabelsFilename(dataset, session), True), 'rt') as dfile: response = json.loads(dfile.read()) self._send_header(200, CTYPE_PLAIN) self._send_data(json.dumps(response), False) else: self._send_header(404, CTYPE_PLAIN) else: self._send_header(404, CTYPE_PLAIN) elif path == '/mturksubmit' or path == '/mturksubmissions': if 'data' in vars: data = json.loads(vars['data']) if 'dataset' in data and ALNUMUN_RE.match( data['dataset'] ) and 'session' in data and ALNUMUN_RE.match(data['session']): dataset = data['dataset'] session = data['session'] if path == '/mturksubmit': mturk_submit = _helper.mturkSubmitLabelsFilename( dataset, session) if not os.path.exists(mturk_submit): with open( _helper.ensureDirExists( mturk_submit, True), 'wt') as dfile: dfile.write(json.dumps(data) + '\n') submissions = _helper.mturkGetSubmissions(session) total = 0 datasets = [] for submission in submissions: score = submission['score'] / 100.0 score = score**2 score *= submission['daysofdata'] # minimum of 1 cent for tutorial levels, 20 cents for challenge score = max(score, 0.20) if submission['istutorial']: score *= 0.05 total += score datasets.append(submission['dataset']) total = int(total * 100) if session not in _mturk_session_codes: _mturk_session_codes[session] = _helper.makeId()[:3] code = _mturk_session_codes[session] code = code + ('%03d' % total).upper() code = code + hashlib.md5( code.encode('utf-8')).hexdigest()[:3].upper() response = { 'amount': '$%d.%02d' % (total // 100, total % 100), 'code': code, 'datasets': datasets } self._send_header(200, CTYPE_PLAIN) self._send_data(json.dumps(response), False) else: self._send_header(404, CTYPE_PLAIN) else: self._send_header(404, CTYPE_PLAIN) elif path == '/log': if 'data' in vars: with open( _helper.ensureDirExists( _folder.data_abspath('playlog'), True), 'at') as dfile: dfile.write(vars['data'] + '\n') self._send_header(200, CTYPE_PLAIN) elif HTML_RE.match(path): if path == '/mturk_start.html': global _mode if _mode != 'MTURK': self._send_header(200, CTYPE_PLAIN) self._send_data( 'mode must be MTURK to request mturk_start.html', False) return if 'session' in vars and ALNUMUN_RE.match(vars['session']): session = vars['session'] else: session = SESSION_ERROR def replace_data(data): return replace_vars(data, session, True) self._send_header_and_file_data( _folder.file_abspath('static' + path), False, CTYPE_HTML, replace_data) elif PNG_RE.match(path): self._send_header_and_file_data( _folder.file_abspath('static' + path), True, CTYPE_PNG) elif JS_RE.match(path): self._send_header_and_file_data( _folder.file_abspath('static' + path), False, CTYPE_JS) elif CSS_RE.match(path): self._send_header_and_file_data( _folder.file_abspath('static' + path), False, CTYPE_CSS) else: self._send_header(404, CTYPE_PLAIN)
session_slideshow_interactions.append( mturk_slideshow_count_dict[page]) session_row_data += session_slideshow_interactions session_zoom_time_spent = [] if 'major_zoom_levels' not in info: session_zoom_time_spent += ["NA"] * len(major_zoom_levels) else: major_zoom_time_dict = info['major_zoom_levels'] for zoom in sorted(major_zoom_levels): if zoom not in major_zoom_time_dict: session_zoom_time_spent.append(0.0) else: session_zoom_time_spent.append(major_zoom_time_dict[zoom] / 1000.0) session_row_data += session_zoom_time_spent writer.writerow(session_row_data) if args.stdout: sys.stdout.write(buffer.getvalue()) else: csvOutputPath = _helper.exportFilename('log_analysis') _helper.ensureDirExists(csvOutputPath, True) with open(csvOutputPath, 'wt') as csv_file: csv_file.write(buffer.getvalue()) print('output written to', csvOutputPath)
def main(source_dataset, dest_dataset, *, notrim=False): # Process arguments to get name of dataset source_config = _helper.datasetConfigFilename(source_dataset) dest_config = _helper.datasetConfigFilename(dest_dataset) source = {} dest = {} with open(source_config, 'rt') as configfile: config = json.load(configfile) source['sample_rate'] = config['sample_rate'] source['start_time_ms'] = config['start_time_ms'] with open(dest_config, 'rt') as configfile: config = json.load(configfile) dest['sample_rate'] = config['sample_rate'] dest['length'] = config['length'] dest['start_time_ms'] = config['start_time_ms'] if source['sample_rate'] != dest['sample_rate']: _helper.errorExit("Source and dest datasets should have the same sample rate") start_sample = ((dest['start_time_ms'] - source['start_time_ms']) / 1000) * source['sample_rate'] if start_sample != int(start_sample): _helper.errorExit("Source and dest datasets are not offset by an integer number of samples") start_sample = int(start_sample) end_sample = int(start_sample + dest['length']) source_labels = _helper.getLabelsLatest(source_dataset) if source_labels: for session in source_labels: session_name = session['session'] source_name = session['source'] session_labels = session['labels'] label_filename = _helper.latestLabelsFilename(dest_dataset, session_name) output = '' output += ('{"session":"%s", "source": "%s", "labels":[' % (session_name, source_name)) was_prev = False for ll in session_labels: label_start = ll['lo'] label_end = ll['hi'] label_name = ll['label'] label = [label_start, label_end, label_name] if notrim: output += _helper.activityJSON(label, was_prev) was_prev = True elif label_end > start_sample and label_start < end_sample: # Trim label start if needed if label_start < start_sample: label[0] = start_sample # Trim label end if needed if label_end > end_sample: label[1] = end_sample # Start label offset from 0 label[0] -= start_sample label[1] -= start_sample output += _helper.activityJSON(label, was_prev) was_prev = True output += ']}\n' _helper.ensureDirExists(label_filename, True) with open(label_filename, 'wt') as labelsfile: labelsfile.write(output) print('labels added to ', label_filename)
def __init__(self, stream1, stream2): self.stream1 = stream1 self.stream2 = stream2 def write(self, data): self.stream1.write(data) self.stream2.write(data) def flush(self): self.stream1.flush() self.stream2.flush() logfilename = _folder.data_abspath( 'log', 'signalauncher.' + str(os.getpid()) + '.txt') logfile = open(_helper.ensureDirExists(logfilename, True), 'wt') sys.stdout = Logger(sys.stdout, logfile) sys.stderr = Logger(sys.stderr, logfile) # utility functions def datasetexists(dataset): out_folder = _helper.datasetDir(dataset) return os.path.exists(out_folder) def mhealthfolder(dataset, signal): return _folder.data_abspath('algo', dataset, 'mhealth', signal) def algofolder(dataset, signal):