Python prepare_directory Examples, utils.utils.prepare_directory Python Examples

Example #1

0

Show file

File: centroids.py Project: rh01/spindle-doctor

def main():
    args = get_args()
    centroids = {}
    df_breakpoints = pd.read_csv(args.src_breakpoint)
    for column in args.columns:
        log('parsing column %s ...' % (column, ))
        data_points = np.array([])
        for src in args.srcs:
            log('parsing %s ...' % (src, ))
            df = pd.read_csv(src, usecols=[column])
            data_points = np.concatenate((data_points, df[column]), axis=0)
        init_centroids = breakpoints_to_centroids(df_breakpoints[column].values)
        kmeans = KMeans(
            init=np.array(init_centroids).reshape((-1, 1)),
            n_clusters=args.symbol_size,
            random_state=0,
            n_jobs=1,
            verbose=0,
            max_iter=500
        ).fit(np.array(data_points).reshape((-1, 1)))
        centroids[column] = np.array(kmeans.cluster_centers_).reshape(-1)

    df_centroids = pd.DataFrame(centroids)
    dest_dir = prepare_directory(args.dest_dir)
    df_centroids.to_csv(
        os.path.join(
            dest_dir,
            'centroid-{0}.csv'.format(args.symbol_size)
        ),
        header=True,
        index=False
    )

Example #2

0

Show file

def main():
    dest_dir = prepare_directory(
        os.path.join('../../build/data', SCOPE, 'initialized'))

    for dataset in DATASETS_TO_PARSE:
        filenames = glob.glob(os.path.join(SRC_DIR, dataset, '*.csv'))
        for filename in filenames:
            log('parsing ' + filename + '...')
            dfChunks = pd.read_csv(filename,
                                   header=None,
                                   names=INPUT_CSV_COLUMNS,
                                   chunksize=CHUNK_SIZE)

            i = 0
            for dfChunk in dfChunks:
                i = i + 1
                header = i is 1
                mode = 'a' if i > 1 else 'w'
                log('parsing chunk %d' % i)

                dfChunk['datetime'] = pd.to_datetime(dfChunk['timestamp'],
                                                     unit='ms')
                dfChunk['u'] = dfChunk['u'] * 0.1
                dfChunk['v'] = dfChunk['v'] * 0.1
                dfChunk['w'] = dfChunk['w'] * 0.1
                dfChunk['x'] = dfChunk['x'] * 0.004
                dfChunk['y'] = dfChunk['y'] * 0.004
                dfChunk['z'] = dfChunk['z'] * 0.004

                dfChunk.to_csv(os.path.join(
                    dest_dir, '%s-%s' % (dataset, os.path.basename(filename))),
                               mode=mode,
                               header=header,
                               index=False,
                               columns=OUTPUT_CSV_COLUMNS)

Example #3

0

Show file

File: rnn.py Project: rh01/spindle-doctor

def visualize(model, sess, epoch, train_mse):
    dest_dir = prepare_directory(os.path.join(
        '../build/plots',
        args.scope,
        args.name
    ))

    if args.sample_size:
        x_axis = np.linspace(
            0,
            len(dataset_in_order['y']) - 1,
            num=args.sample_size,
            dtype=int
        )
        ground_truth = np.reshape(
            np.array(dataset_in_order['y'])[x_axis], (args.sample_size)
        )
        ps = model.prediction.eval(
            session=sess,
            feed_dict={
                model.xs: np.array(dataset_in_order['x'])[x_axis],
            }
        )
        predicted = np.reshape(ps, (args.sample_size))
    else:
        x_axis = np.arange(data_size)
        ground_truth = np.reshape(
            dataset_in_order['y'][0:data_size],
            (data_size)
        )
        ps = np.empty(shape=[0, 1])
        for batch_idx in range(0, batch_count):
            begin_idx = batch_idx * args.batch_size
            end_idx = min(begin_idx + args.batch_size, data_size)

            p = model.prediction.eval(
                session=sess,
                feed_dict={
                    model.xs: dataset_in_order['x'][begin_idx: end_idx],
                }
            )
            ps = np.concatenate((ps, p), axis=0)
            if (batch_idx + 1) % 5000 == 0:
                log('drawing %d' % (batch_idx + 1))
        predicted = np.reshape(ps, (data_size))

    plt.ylim(Y_LIMIT)
    plt.plot(x_axis, ground_truth, 'g.')
    plt.plot(x_axis, predicted, color='red', linestyle='--', linewidth=1)
    title = 'epoch-{0}\nmse = {1}'.format(epoch, train_mse)
    plt.title(title)
    plt.savefig(
        os.path.join(dest_dir, 'epoch-{0}.png'.format(epoch)),
        dpi=400,
        format='png'
    )
    plt.clf()

Example #4

0

Show file

def visualize(mses):
    dest_dir = prepare_directory(
        os.path.join('../build/plots', args.scope, args.name,
                     os.path.basename(args.test_src).rsplit('.', 1)[0]))

    f, axarr = plt.subplots(2, sharex=True, figsize=(7, 5))

    axarr[0].set_title(args.title)
    axarr[0].set_ylabel('Vibration Signal (g)')
    axarr[1].set_ylabel('Reconstruct Error (MSE)')
    plt.xlabel('Bearing Life (390ms)')

    threshold = args.threshold
    anomaly_flags = mses >= threshold
    colors = ['red' if a else 'green' for a in anomaly_flags]
    linestyles = ['dotted' if a else 'solid' for a in anomaly_flags]
    lines = [((x0, y0), (x1, y1))
             for x0, y0, x1, y1 in zip(xs[:-1], ys[:-1], xs[1:], ys[1:])]
    colored_lines = LineCollection(lines,
                                   colors=colors,
                                   linestyles=linestyles,
                                   linewidths=(1, ))
    axarr[0].add_collection(colored_lines)
    axarr[0].autoscale_view()
    axarr[0].plot([], [], c='green', label='predicted normal')
    axarr[0].plot([], [],
                  c='red',
                  linestyle='dotted',
                  label='predicted anomalous')
    bound = xs[int(len(xs) * 0.9)]
    axarr[0].plot([bound, bound], [np.amin(ys), np.amax(ys)],
                  color='blue',
                  linestyle='--',
                  linewidth=1,
                  label='actual anomalous')
    axarr[0].legend()

    axarr[1].plot(xs, mses, color='blue', label='reconstruct error')
    axarr[1].plot([xs[0], xs[-1]], [threshold, threshold],
                  color='blue',
                  linestyle='--',
                  linewidth=1,
                  label='anomaly threshold')
    axarr[1].legend()

    plt.savefig(os.path.join(
        dest_dir,
        '{0}(seed={1}, smooth={2}).eps'.format(args.name, args.seed,
                                               args.smooth)),
                dpi=800,
                format='eps')
    plt.clf()

Example #5

0

Show file

def main():
    args = get_args()
    filename = args.src

    first_datetime, last_datetime, _ = get_datetime(filename)
    bound_datetime = last_datetime - pd.Timedelta(minutes=args.alarm_minutes)

    for column in args.columns:
        log('\n=====================')
        log('Feature "%s":', column)
        thresholds = np.arange(args.thresholds[0], args.thresholds[1],
                               args.thresholds[2])
        rates = []
        for threshold in thresholds:
            df_chunks = pd.read_csv(filename, chunksize=args.chunk_size)
            n_total = 0
            n_above_threshold = 0
            for chunk_idx, df_chunk in enumerate(df_chunks):
                df_chunk['datetime'] = pd.to_datetime(
                    df_chunk['datetime'], infer_datetime_format=True)

                if args.abs:
                    df_chunk[column] = abs(df_chunk[column])

                df_before_time_bound = df_chunk[
                    df_chunk.datetime < bound_datetime]
                df_above_threshold = df_before_time_bound[
                    df_before_time_bound[column] > threshold]

                n_above_threshold += len(df_above_threshold)
                n_total += len(df_before_time_bound)

            rate = float(n_above_threshold) / n_total * 100
            rates.append(rate)
            log('threshold %f = %f%% (%d / %d)' %
                (threshold, rate, n_above_threshold, n_total))

        # visualization
        basename = os.path.basename(filename)
        dest_dir = prepare_directory(os.path.join(args.dest_dir, basename))
        plt.title('%s\n%d minutes alarm of feature "%s"' %
                  (basename, args.alarm_minutes, column))
        plt.xlabel('feature thresholds')
        plt.ylabel('rates above threshold(%)')
        plt.ylim([0, 100])
        plt.plot(thresholds, rates, 'bx-')
        plt.savefig(os.path.join(dest_dir, '%dmin-%s.png' %
                                 (args.alarm_minutes, column)),
                    dpi=400,
                    format='png')
        plt.clf()

Example #6

0

Show file

def main():
    datasets = os.listdir(SRC_DIR)
    for dataset in datasets:
        workingTypes = os.listdir(os.path.join(SRC_DIR, dataset))
        for workingType in workingTypes:
            log('parsing ' + dataset + '/' + workingType + '...')
            destDir = prepare_directory(os.path.join(DEST_DIR, dataset))

            fdInputs = []
            fdOutput = open(
                os.path.join(destDir, '{0}.csv'.format(workingType)), 'w')

            lines = []
            filenames = ['x', 'y', 'z', 'u', 'v', 'w']

            for i in range(0, 6):
                fdInputs.append(None)
                lines.append(None)
                fdInputs[i] = open(
                    os.path.join(SRC_DIR, dataset, workingType,
                                 '{0}.csv'.format(filenames[i])))
                lines[i] = readline(fdInputs[i])

            count = 0
            while all(lines):
                count = count + 1
                if count % 100000 == 0:
                    log(count)

                updateIndices = None
                if all(line[0] == lines[0][0] for line in lines):
                    updateIndices = list(range(0, 6))
                    fdOutput.write('{0},{1},{2},{3},{4},{5},{6}\n'.format(
                        lines[0][0],
                        lines[0][1],
                        lines[1][1],
                        lines[2][1],
                        lines[3][1],
                        lines[4][1],
                        lines[5][1],
                    ))
                else:
                    timestamps = np.array(lines)[:, 0].astype(np.int64)
                    updateIndices = np.where(timestamps == timestamps.min())[0]

                for updateIndex in updateIndices:
                    lines[updateIndex] = readline(fdInputs[updateIndex])

            for i in range(0, 6):
                fdInputs[i].close()
            fdOutput.close()

Example #7

0

Show file

def main():
    args = get_args()
    src_dir = os.path.join('../build/data', args.scope, 'initialized')
    dest_dir = prepare_directory(
        os.path.join('../build/data', args.scope, 'labeled'))
    filenames = glob.glob(os.path.join(src_dir, '*.csv'))
    threshold_step = args.thresholds[2]
    thresholds = np.arange(args.thresholds[0], args.thresholds[1],
                           threshold_step)
    df_breakpoints = pd.read_csv(args.src_breakpoint)
    for filename in filenames:
        log('parsing %s in scope %s' %
            (os.path.basename(filename), args.scope))

        first_datetime, last_datetime, chunk_count = get_datetime(filename)
        total_seconds = (last_datetime - first_datetime).total_seconds()

        df_chunks = pd.read_csv(filename, chunksize=args.chunk_size)

        for chunk_idx, df_chunk in enumerate(df_chunks):
            header = chunk_idx is 0
            mode = 'a' if chunk_idx > 0 else 'w'

            df_chunk['datetime'] = pd.to_datetime(df_chunk['datetime'],
                                                  infer_datetime_format=True)

            log('parsing chunk %d/%d' % (chunk_idx, chunk_count))

            df_chunk['rul'] = (last_datetime - df_chunk['datetime']
                               ).astype('timedelta64[us]') / 1000000
            df_chunk['rulp'] = df_chunk['rul'] / total_seconds
            df_chunk['level_x'] = [
                bisect_left(thresholds, element) for element in df_chunk['x']
            ]
            df_chunk['level_y'] = [
                bisect_left(thresholds, element) for element in df_chunk['y']
            ]
            df_chunk['symbol_x'] = [
                bisect_left(df_breakpoints['x'], element)
                for element in df_chunk['x']
            ]
            df_chunk['symbol_y'] = [
                bisect_left(df_breakpoints['y'], element)
                for element in df_chunk['y']
            ]

            df_chunk.to_csv(os.path.join(dest_dir, os.path.basename(filename)),
                            mode=mode,
                            header=header,
                            index=False)

Example #8

0

Show file

File: kmeans-anomaly-detection.py Project: rh01/spindle-doctor

def visualize_dataset(model, sess, epoch, dataset_name):
    dest_dir = prepare_directory(os.path.join(
        '../build/plots',
        args.scope,
        args.name
    ))

    if args.sample_size:
        args.sample_size = args.sample_size - (args.sample_size % args.batch_size)
        x_axis = np.linspace(
            0,
            len(dataset[dataset_name]) - 1,
            num=args.sample_size,
            dtype=int
        )
        ground_truth = dataset[dataset_name][x_axis, 0]
        assignments, ps = sess.run([model.expanded_assignments, model.prediction], feed_dict={
            model.xs: dataset[dataset_name][x_axis],
            model.ys: dataset[dataset_name][x_axis],
            model.feed_previous: True,
        })
        # ps = model.prediction.eval(
        #     session=sess,
        #     feed_dict={
        #         model.xs: dataset[dataset_name][x_axis],
        #         model.ys: dataset[dataset_name][x_axis],
        #         model.feed_previous: True,
        #     }
        # )
        predicted = np.array(ps)[:, 0]
        assigned = np.array(assignments)[:, 0]

    plt.ylim(Y_LIMIT)
    plt.scatter(x_axis, assigned, color='green', marker='x', s=12)
    plt.scatter(x_axis, predicted, color='blue', s=10, linewidth=0)
    plt.plot(x_axis, abs(predicted - assigned), color='red', linestyle='--', linewidth=1)

    mse = eval_mse(model, sess, dataset_name)

    title = 'epoch-{0}\n{1} mse = {2}'.format(epoch, dataset_name, mse)
    plt.title(title)
    plt.savefig(
        os.path.join(dest_dir, 'epoch-{0}-{1}.png'.format(epoch, dataset_name)),
        dpi=400,
        format='png'
    )
    plt.clf()

    return mse

Example #9

0

Show file

File: extract-feature-level.py Project: rh01/spindle-doctor

def main():
    args = get_args()
    df_breakpoints = pd.read_csv(args.src_breakpoint)
    columns = df_breakpoints.columns.values

    for src, dest in zip(args.srcs, args.dests):
        log('parsing %s ...' % (src, ))
        prepare_directory(os.path.dirname(dest))
        df_chunks = pd.read_csv(src, chunksize=args.chunk_size)
        for chunk_idx, df_chunk in enumerate(df_chunks):
            if chunk_idx % 1 == 0:
                print(chunk_idx)
            for column in columns:
                df_chunk['level_' + column] = [
                    bisect_left(df_breakpoints[column], element)
                    for element in df_chunk[column]
                ]

            header = chunk_idx is 0
            mode = 'a' if chunk_idx > 0 else 'w'
            df_chunk.to_csv(os.path.join(dest),
                            mode=mode,
                            header=header,
                            index=False)

Example #10

0

Show file

def visualize(model, sess):
    dest_dir = prepare_directory(
        os.path.join('../build/plots', args.scope, args.name,
                     os.path.basename(args.test_src).rsplit('.', 1)[0]))
    plt.figure(figsize=(6, 4))
    plt.ylim(args.ylim)
    plt.ylabel('Health Indicator (%)')
    plt.xlabel('Data Entry')
    title = 'HI Prediction Result'

    x_axis = np.linspace(0,
                         len(dataset_in_order['y']) - 1,
                         num=args.sample_size,
                         dtype=int)
    ground_truth = np.reshape(
        np.array(dataset_in_order['y'])[x_axis], (args.sample_size))
    ps = model.prediction.eval(session=sess,
                               feed_dict={
                                   model.xs:
                                   np.array(dataset_in_order['x'])[x_axis],
                               })
    predicted = np.reshape(ps, (args.sample_size))
    if args.smooth:
        predicted = smooth(predicted, args.smooth)
    plt.plot(x_axis,
             ground_truth * 100,
             color='green',
             linewidth=2,
             label='real HI')
    plt.plot(x_axis,
             predicted * 100,
             color='blue',
             linestyle='--',
             linewidth=2,
             label='predicted HI')
    plt.legend()
    plt.title(title)
    plt.savefig(os.path.join(
        dest_dir,
        'test-health-index-batch_step-{0}.eps'.format(args.batch_step)),
                dpi=800,
                format='eps')
    plt.clf()

Example #11

0

Show file

File: regression-and-classification-anomaly-detection.py Project: rh01/spindle-doctor

def visualize_dataset(model, sess, epoch, dataset_name):
    dest_dir = prepare_directory(os.path.join(
        '../build/plots',
        args.scope,
        args.name
    ))

    if args.sample_size:
        args.sample_size = args.sample_size - (args.sample_size % args.batch_size)
        x_axis = np.linspace(
            0,
            len(dataset[dataset_name + '_feature']) - 1,
            num=args.sample_size,
            dtype=int
        )
        ground_truth = dataset[dataset_name + '_label'][x_axis, 0]
        ps = model.prediction.eval(
            session=sess,
            feed_dict={
                model.xs: dataset[dataset_name + '_feature'][x_axis],
                model.ys: dataset[dataset_name + '_label'][x_axis],
            }
        )
        predicted = np.array(ps)[:, 0]

    plt.ylim(Y_LIMIT)
    plt.scatter(x_axis, ground_truth, color='green', marker='x', s=12)
    plt.scatter(x_axis, predicted, color='blue', s=10, linewidth=0)
    plt.plot(x_axis, abs(predicted - ground_truth), color='red', linestyle='--', linewidth=1)

    acc, entropy = eval_metric(model, sess, dataset_name)

    title = 'epoch-{0}\n{1} accuracy = {2}'.format(epoch, dataset_name, acc)
    plt.title(title)
    plt.savefig(
        os.path.join(dest_dir, 'epoch-{0}-{1}.png'.format(epoch, dataset_name)),
        dpi=400,
        format='png'
    )
    plt.clf()

    return acc, entropy

Example #12

0

Show file

def visualize(xs, ys):
    dest_dir = prepare_directory(
        os.path.join('../build/plots', args.scope, args.name,
                     os.path.basename(args.test_src).rsplit('.', 1)[0]))

    plt.ylim(args.ylim)
    plt.ylabel('Accuracy')
    plt.xlabel('Index')
    title = 'Test Accuracy'

    if args.batch_step < 200:
        plt.scatter(xs, ys, color='purple', s=0.1)
    else:
        plt.plot(xs, ys, color='purple', linestyle='--', linewidth=1)

    plt.title(title)
    plt.savefig(os.path.join(
        dest_dir, 'test-accuracy-batch_step-{0}.png'.format(args.batch_step)),
                dpi=400,
                format='png')
    plt.clf()

Example #13

0

Show file

File: adapt.py Project: rh01/spindle-doctor

def main():
    dest_dir = prepare_directory(
        os.path.join('../../build/data', SCOPE, 'initialized'))

    for dataset in DATASETS_TO_PARSE:
        instances = os.listdir(os.path.join(SRC_DIR, dataset))
        for prefix in PREFIXES_TO_PARSE:
            for instance in instances:
                filenames = sorted(
                    glob.glob(
                        os.path.join(SRC_DIR, dataset, instance,
                                     prefix + '_*.csv')))

                i = 0
                length = len(filenames)
                for filename in filenames:
                    i = i + 1
                    header = i is 1
                    mode = 'a' if i > 1 else 'w'
                    log('parsing ' + dataset + '/' + instance + '...' +
                        str(i) + '/' + str(length))

                    df = pd.read_csv(filename,
                                     sep=detectSep(filename),
                                     header=None,
                                     names=INPUT_CSV_COLUMNS)
                    df['year'] = 2017
                    df['month'] = 5
                    df['day'] = 26
                    df['datetime'] = pd.to_datetime(df[DATETIME_FIELDS])
                    df.to_csv(os.path.join(
                        dest_dir,
                        '%s-%s-%s.csv' % (dataset, instance, prefix)),
                              mode=mode,
                              header=header,
                              index=False,
                              columns=OUTPUT_CSV_COLUMNS)

Example #14

0

Show file

def visualize_dataset(model, sess, epoch, dataset_name):
    dest_dir = prepare_directory(
        os.path.join('../build/plots', args.scope, args.name))

    if args.sample_size:
        x_axis = np.linspace(0,
                             len(dataset[dataset_name]) - 1,
                             num=args.sample_size,
                             dtype=int)
        ground_truth = dataset[dataset_name][x_axis, 0, 0]
        ps = model.prediction.eval(session=sess,
                                   feed_dict={
                                       model.xs: dataset[dataset_name][x_axis],
                                       model.ys: dataset[dataset_name][x_axis],
                                   })
        predicted = np.array(ps)[:, 0, 0]

    plt.ylim(Y_LIMIT)
    plt.scatter(x_axis, ground_truth, color='green', marker='x', s=12)
    plt.scatter(x_axis, predicted, color='blue', s=10, linewidth=0)
    plt.plot(x_axis,
             np.absolute(predicted - ground_truth),
             color='red',
             linestyle='--',
             linewidth=1)

    mse = eval_mse(model, sess, dataset_name)

    title = '{0}\nepoch-{1}\nmse = {2}'.format(dataset_name, epoch, mse)
    plt.title(title)
    plt.savefig(os.path.join(dest_dir,
                             'epoch-{0}-{1}.png'.format(epoch, dataset_name)),
                dpi=400,
                format='png')
    plt.clf()

    return mse

Example #15

0

Show file

    # log y-axis
    if args.log_y_axis:
        dy = 0.00001
        t = np.arange(dy, 1.0, dy)
        plt.semilogy(t, np.exp(-t / 5.0), alpha=0.0)

    if args.grid:
        plt.grid(True)

    if args.legend_outside:
        lgd = plt.legend(loc='center right',
                         bbox_to_anchor=(args.legend_outside, 0.5),
                         fontsize=10)
    else:
        if args.legend_location:
            lgd = plt.legend(fontsize=10, loc=args.legend_location)
        else:
            lgd = plt.legend(fontsize=10)

    dest_dir = prepare_directory(os.path.dirname(args.dest))

    if args.legend_outside:
        plt.savefig(args.dest,
                    dpi=1200,
                    format='eps',
                    bbox_extra_artists=(lgd, ),
                    bbox_inches='tight')
    else:
        plt.savefig(args.dest, dpi=1200, format='eps')
    plt.clf()

Example #16

0

Show file

def get_anomaly_flags(df):
    length = len(df)
    normal_length = int(length * 0.9)
    anomalous_length = length - normal_length
    normal_flags = np.repeat(0, normal_length)
    anomalous_flags = np.repeat(1, anomalous_length)
    anomaly_flags = np.concatenate((normal_flags, anomalous_flags))
    return anomaly_flags


if __name__ == '__main__':
    args = get_args()

    for src, dest in zip(args.srcs, args.dests):
        log('parsing %s ...' % (src, ))
        prepare_directory(os.path.dirname(dest))
        df_chunks = pd.read_csv(src, chunksize=args.chunk_size)
        df_result = pd.DataFrame({
            'avg': [],
            'max': [],
            'min': [],
            'fft1': [],
            'fft2': [],
            'paa': [],
        })
        for batch_idx, df_batch in get_batch(df_chunks, args.batch_size):
            if batch_idx % 1000 == 0:
                print(batch_idx)
            values = np.array(df_batch['x'])
            fft1, fft2 = get_fft(values, args.batch_size)
            paa_value = get_paa_value(values)

Example #17

0

Show file

File: classification-anomaly-detection.py Project: rh01/spindle-doctor

    # start session
    sess = tf.InteractiveSession(
        # config=tf.ConfigProto(intra_op_parallelism_threads=N_THREADS)
    )

    # prepare model import or export
    if args.src:
        importSaver = tf.train.Saver()
        importSaver.restore(sess, args.src)
    else:
        # initize variable
        sess.run(tf.global_variables_initializer())

    if args.dest:
        exportSaver = tf.train.Saver()
        prepare_directory(os.path.dirname(args.dest))

    filename = args.log or os.path.join(
        prepare_directory(os.path.join('../build/plots', args.scope,
                                       args.name)), 'log.csv')
    min_validate_mse = 999999
    batch_count, data_size = get_batch_count(dataset['train'], args.batch_size)
    with open(filename, 'w') as fd_log:
        start_time = time.time()

        # before training
        validate_mse = visualize_dataset(model, sess, 0, 'validate')
        anomalous_mse = visualize_dataset(model, sess, 0, 'anomalous')
        print(
            'Epoch\t%d, Batch\t%d, Elapsed time\t%.1fs, Validate MSE\t%s, Anomalous MSE\t%s, Min Validate MSE\t%s'
            % (0, 0, 0, validate_mse, anomalous_mse, min_validate_mse))

Example #18

0

Show file

File: test-classification-anomaly-detection-for-edge.py Project: rh01/spindle-doctor

        while True:
            dataset = read_dataset()
            for batch_idx in range(0, 1):
                begin_idx = batch_idx * args.batch_size
                end_idx = begin_idx + args.batch_size
                xs = dataset[begin_idx:end_idx]

                restored_predictions = sess.run(
                    'compute_cost/Reshape_1:0',
                    feed_dict={
                        'input_layer/xs:0': xs,
                        'input_layer/ys:0': xs,
                        'input_layer/feed_previous:0': True,
                    })
                restored_ys = sess.run('compute_cost/Reshape_3:0',
                                       feed_dict={
                                           'input_layer/xs:0': xs,
                                           'input_layer/ys:0': xs,
                                           'input_layer/feed_previous:0': True,
                                       })
                mse = np.mean((restored_ys - restored_predictions)**2, axis=1)
                is_anomaly = mse[0] > args.threshold

                dest_dir = prepare_directory(
                    os.path.join(args.src, '../../inference-result'))
                with open(os.path.join(dest_dir, 'last.txt'),
                          'w') as fd_result:
                    print('anomaly' if is_anomaly else 'normal')
                    fd_result.write('anomaly' if is_anomaly else 'normal')
            time.sleep(5)

Example #19

0

Show file

File: merge.py Project: rh01/spindle-doctor

def main():
    datasets = os.listdir(SRC_DIR)
    for dataset in datasets:
        dataDirs = os.listdir(os.path.join(SRC_DIR, dataset))
        for dataDir in dataDirs:
            if not os.path.isdir(os.path.join(SRC_DIR, dataset, dataDir)):
                continue
            log('parsing ' + dataset + '/' + dataDir + '...')

            workingType = dataDir.split('_')[0]
            dataType = dataDir.split('_')[1]

            readDir = os.path.join(
                SRC_DIR,
                dataset,
                workingType + '_' + dataType
            )

            channel_map = [None, 'x', 'y', 'z', 'u', 'v', 'w']
            channels = [1, 2, 3] if dataType == 'acc' else [4, 5, 6]
            if dataset in [
                '2017-07-18-168000rpm',
                '2017-08-17-0.35mm',
                '2017-08-21-0.5mm',
                '2017-08-21-0.8mm',
                '2017-08-21-1.0mm',
                '2017-08-21-1.55mm',
                '2017-08-21-2.0mm',
                '2017-08-21-3.175mm',
            ]:
                channels = [4, 5, 6] if dataType == 'acc' else [1, 2, 3]
                channel_map = [None, 'u', 'v', 'w', 'x', 'y', 'z']

            for channel in channels:
                filenames = glob.glob(os.path.join(
                    readDir,
                    'Channel{0}_*.csv'.format(channel)
                ))

                for filename in filenames:
                    df = pd.read_csv(
                        filename,
                        names=INPUT_CSV_COLUMNS,
                        header=None
                    )
                    if dataset == '2017-07-18-168000rpm':
                        df['timestamp'] = pd.to_datetime(
                            df['timestamp'],
                            format='%m/%d/%Y %H:%M:%S.%f'
                        ).astype(np.int64) // int(1e6)
                    elif dataset in [
                        '2017-08-17-0.35mm',
                        '2017-08-21-0.5mm',
                        '2017-08-21-0.8mm',
                        '2017-08-21-1.0mm',
                        '2017-08-21-1.55mm',
                        '2017-08-21-2.0mm',
                        '2017-08-21-3.175mm',
                    ]:
                        df['timestamp'] = pd.to_datetime(
                            df['timestamp'],
                            format='%m/%d/%Y %H:%M:%S.%f'
                        ).astype(np.int64) // int(1e6)
                    else:
                        df['timestamp'] = pd.to_datetime(
                            df['timestamp'],
                            format='%Y%m%d%H%M%S%f'
                        ).astype(np.int64) // int(1e6)
                    destDir = prepare_directory(os.path.join(
                        '../../build/data/', SCOPE, 'merged', dataset, workingType
                    ))
                    df.to_csv(
                        os.path.join(
                            destDir,
                            channel_map[channel] + '.csv'
                        ),
                        mode='a',
                        header=False,
                        index=False
                    )

Example #20

0

Show file

def main():
    thresholds = np.arange(args.thresholds[0], args.thresholds[1],
                           args.thresholds[2])

    table_true_alarm = np.empty([
        len(args.columns),
        len(args.srcs),
        len(thresholds),
    ])
    table_false_alarm = np.empty([
        len(args.columns),
        len(args.srcs),
        len(thresholds),
    ])

    for src_idx, src in enumerate(args.srcs):
        log('\n=====================')
        log('File %d "%s":' % (src_idx, src))

        _, last_datetime, _ = get_datetime(src)
        bound_datetime = last_datetime - pd.Timedelta(
            minutes=args.alarm_minutes)

        df_chunks = pd.read_csv(src, chunksize=args.chunk_size)
        df_featured = add_df_feature(df_chunks)

        for column_idx, column in enumerate(args.columns):
            log('\n\tFeature "%s":' % column)

            for threshold_idx, threshold in enumerate(thresholds):
                df_before_time_bound = df_featured[
                    df_featured.datetime < bound_datetime]
                df_total_alarm = df_before_time_bound[
                    df_before_time_bound[column] > threshold]

                n_total_alarm = len(df_total_alarm)
                n_true_alarm = 1 if n_total_alarm > 0 else 0
                n_false_alarm = n_total_alarm - n_true_alarm

                table_true_alarm[column_idx][src_idx][
                    threshold_idx] = n_true_alarm
                table_false_alarm[column_idx][src_idx][
                    threshold_idx] = n_false_alarm

                log('\tthreshold = %f, n_true_alarm = %d, n_total_alarm = %d' %
                    (threshold, n_true_alarm, n_true_alarm + n_false_alarm))

    log('\ntrue alarm table')
    log('================\n')
    log(table_true_alarm)
    log('\nfalse alarm table')
    log('=================\n')
    log(table_false_alarm)

    table_total_alarm = table_true_alarm + table_false_alarm
    prevent_zero_division = np.vectorize(lambda total_alarm: 1
                                         if total_alarm == 0 else total_alarm)
    table_true_alarm_indicator = table_true_alarm.astype(bool).astype(float)

    log('\ntrue alarm indicator table')
    log('==========================\n')
    log(table_true_alarm_indicator)

    for column_idx, column in enumerate(args.columns):
        true_alarms = np.average(table_true_alarm[column_idx], axis=0)
        total_alarms = np.average(table_total_alarm[column_idx], axis=0)
        true_alarm_indicators = np.average(
            table_true_alarm_indicator[column_idx], axis=0)
        log('\ntrue_alarm_indicators')
        log('=====================\n')
        log(true_alarm_indicators)

        # visualization
        dest_dir = prepare_directory(os.path.join(args.dest_dir))

        fig, ax_true_alarm_indicator = plt.subplots()
        ax_true_alarm_indicator.set_xlabel('Feature Thresholds')
        ax_true_alarm_indicator.set_ylabel('True Alarm Indicator (%)',
                                           color='blue')
        ax_true_alarm_indicator.plot(thresholds,
                                     true_alarm_indicators * 100,
                                     color='blue',
                                     marker='.')
        ax_true_alarm_indicator.set_ylim([0, 100])
        ax_true_alarm_indicator.tick_params('y', colors='blue')

        ax_n_total_alarm = ax_true_alarm_indicator.twinx()
        ax_true_alarm_indicator.set_zorder(ax_n_total_alarm.get_zorder() + 1)
        ax_true_alarm_indicator.patch.set_visible(False)
        ax_n_total_alarm.yaxis.tick_right()
        ax_n_total_alarm.set_ylabel('Total Alarm Count', color='red')
        ax_n_total_alarm.tick_params('y', colors='red')
        ax_n_total_alarm.plot(thresholds,
                              total_alarms,
                              color='red',
                              marker='.')

        plt.title('%d minutes alarm of feature "%s"' %
                  (args.alarm_minutes, column))
        plt.savefig(os.path.join(
            dest_dir, '%dmin-%f,%f,%fth-%s.png' %
            (args.alarm_minutes, args.thresholds[0], args.thresholds[1],
             args.thresholds[2], column)),
                    dpi=400,
                    format='png')
        plt.clf()

Example #21

0

Show file

            breakpoints[column].append(n.ppf(probability))
        equal_breakpoints[column] = np.linspace(minValue, maxValue, args.symbol_size + 1)[1:-1]
        print('==== Report ====')
        print('lens\t', lens)
        print('sums\t', sums)
        print('mean\t', mean)
        print('std\t', std)
        print('minValue\t', minValue)
        print('maxValue\t', maxValue)
        print('step\t', step)
        print('len(breakpoints)\t', len(breakpoints[column]))
        print('breakpoints\t', breakpoints[column])

    df_breakpoints = pd.DataFrame(breakpoints)
    df_equal_breakpoints = pd.DataFrame(equal_breakpoints)
    dest_dir = prepare_directory(args.dest_dir)
    df_breakpoints.to_csv(
        os.path.join(
            dest_dir,
            'breakpoint-{0}.csv'.format(args.symbol_size)
        ),
        header=True,
        index=False
    )
    df_equal_breakpoints.to_csv(
        os.path.join(
            dest_dir,
            'equal-breakpoint-{0}.csv'.format(args.symbol_size)
        ),
        header=True,
        index=False