def checkDataType(dataFilePath): """ Return the data type (transactional or non-transactional) The data type is determined via a data type indicator, defined as the ratio between median absolute deviation and median of the sampling interval. @param dataFilePath: Name of a csv file, the file must have two columns with header "timestamp", and "value" @return dataType: a string with value "transactional" or "non-transactional" """ (timestamp, sig) = readCSVfiles(dataFilePath) medianSamplingInterval, medianAbsoluteDev = estimateMedianAbsoluteDeviation(timestamp) dataTypeIndicator = medianAbsoluteDev / medianSamplingInterval if dataTypeIndicator > 0.2: dataType = "transactional" else: dataType = "non-transactional" return dataType, dataTypeIndicator
def checkDataType(dataFilePath): """ Return the data type (transactional or non-transactional) The data type is determined via a data type indicator, defined as the ratio between median absolute deviation and median of the sampling interval. @param dataFilePath: Name of a csv file, the file must have two columns with header "timestamp", and "value" @return dataType: a string with value "transactional" or "non-transactional" """ (timestamp, sig) = readCSVfiles(dataFilePath) medianSamplingInterval, medianAbsoluteDev = estimateMedianAbsoluteDeviation( timestamp) dataTypeIndicator = medianAbsoluteDev / medianSamplingInterval if dataTypeIndicator > 0.2: dataType = "transactional" else: dataType = "non-transactional" return dataType, dataTypeIndicator
score_value = loadNABscore("value_only") score_time_of_day = loadNABscore("time_of_day") score_day_of_week = loadNABscore("day_of_week") # score_both = loadNABscore("time_of_day_and_day_of_week") fileList = score_value['File'].values better_with_time_of_day = (score_time_of_day['Score'] > score_value['Score']) better_with_day_of_week = (score_day_of_week['Score'] > score_value['Score']) dataPath = NABPath + '/data' useTimeOfDayEncoder = [] useDayOfWeekEncoder = [] for i in xrange(len(score_value)): filename = join(dataPath, score_value['File'][i]) (timestamp, value) = readCSVfiles(filename) (new_sampling_interval, useTimeOfDay, useDayOfWeek) = get_suggested_timescale_and_encoder(timestamp, value) useTimeOfDayEncoder.append(useTimeOfDay) useDayOfWeekEncoder.append(useDayOfWeek) print " file: ", filename, " useTimeOfDay: ", useTimeOfDay, " useDayOfWeek: ", useDayOfWeek, \ " aggregation window: ", new_sampling_interval useTimeOfDayEncoder = np.array(useTimeOfDayEncoder) useDayOfWeekEncoder = np.array(useDayOfWeekEncoder) resultMat = np.array( [score_time_of_day['Score'] > score_value['Score'], useTimeOfDayEncoder]) result = pd.DataFrame(np.transpose([
score_value = loadNABscore("value_only") score_time_of_day = loadNABscore("time_of_day") score_day_of_week = loadNABscore("day_of_week") # score_both = loadNABscore("time_of_day_and_day_of_week") fileList = score_value['File'].values better_with_time_of_day = (score_time_of_day['Score'] > score_value['Score']) better_with_day_of_week = (score_day_of_week['Score'] > score_value['Score']) dataPath = NABPath+'/data' useTimeOfDayEncoder = [] useDayOfWeekEncoder = [] for i in xrange(len(score_value)): filename = join(dataPath, score_value['File'][i]) (timestamp, value) = readCSVfiles(filename) (new_sampling_interval, useTimeOfDay, useDayOfWeek) = get_suggested_timescale_and_encoder(timestamp, value) useTimeOfDayEncoder.append(useTimeOfDay) useDayOfWeekEncoder.append(useDayOfWeek) print " file: ", filename, " useTimeOfDay: ", useTimeOfDay, " useDayOfWeek: ", useDayOfWeek, \ " aggregation window: ", new_sampling_interval useTimeOfDayEncoder = np.array(useTimeOfDayEncoder) useDayOfWeekEncoder = np.array(useDayOfWeekEncoder) resultMat = np.array([score_time_of_day['Score'] > score_value['Score'], useTimeOfDayEncoder]) result = pd.DataFrame(np.transpose([score_day_of_week.File.values, useTimeOfDayEncoder,