Example #1
0
def compute_events(data_dir, start_key, end_key, label='', output_dir=None):
    if output_dir != None:
        make_dir(output_dir)

    df_merged = pd.DataFrame()
    for csv in os.listdir(data_dir):
        ha = HeadAnnotator() 
        if not csv.find('.csv') == -1:
            fi_path = '%s/%s' % (data_dir, csv)
            df = pd.read_csv(fi_path)
            ha = HeadAnnotator() 
            event_hash, event_list = ha.annotate_events(df)
            df_p = ha.df
            og_cols = df.columns.tolist()
            fe_cols = df_p.columns.tolist()
            print event_hash
            for c in og_cols:
                if not c in fe_cols:
                    df_p[c] = df[c]
            if output_dir != None:
                sub_dir = '%s/events' % output_dir
                make_dir(sub_dir)
                # assume start key and end key event s have the same
                for i in xrange(len(event_hash[start_key])):
                    if len(event_hash[end_key]) > i:
                        start = event_hash[start_key][i]
                        end = event_hash[end_key][i]
                        df_sub = df_p.loc[start:end]
                        df_sub['original_index'] = df_sub.index
                        # add a class so the training data is 'labeled'
                        df_sub['turn_sentiment'] = label
                        print csv
                        df_sub.to_csv('%s/%s-%s.csv' % (sub_dir, csv.split('.')[0], i), index=False)
            if output_dir != None:
                df_p.to_csv('%s/%s' % (output_dir, csv), index=False)
                visualize.plot_diagnostics(df_p, ha.active_features, '%s/%s' % (output_dir,csv.split('.')[0]), y_col='noseX')
            df_merged = pd.concat([df_merged, df_p])
    return df_merged
Example #2
0
 def print_test_data(m_df, cf, cf_string):
     Y_test = cf.predict(m_df[active_cols])
     print cf_string, ' accuracy', \
             np.sum(Y_test == m_df['class']) / float(len(m_df))
     df_test['class'] = Y_test
     plot_diagnostics(df_test, active_cols, '%s/head-turn-test-%s' % (base_dir, cf_string))
Example #3
0
    # the directory (labeled data) we are looking to load head turns from
    data_dir = sys.argv[1]
    # number of clusters to cluster the data on
    k = int(sys.argv[2])
    base_dir = "data"  # dir to read data from
    m_dir = "%s/%s" % (base_dir, data_dir)
    output_dir = "data/merged"
    ignore_columns = [
        "date",
        "frameIndex",
        "class",
        "time",
        "noseX_raw",
        "noseY_raw",
        "faceBottom",
        "faceTop",
        "faceLeft",
        "faceRight",
        "noseX",
        "noseY",
        "isFrontFace",
    ]
    df, active_features = generate_training_set(
        m_dir, k=k, window_size=window_size, relevant_features=relevant_features
    )
    df.to_csv("%s/%s.csv" % (output_dir, data_dir), index=False)
    plot_diagnostics(df, active_features, "%s/%s" % (output_dir, data_dir))
    config["active_features"] = active_features
    with open("config.json", "w") as outfile:
        json.dump(config, outfile)