Ejemplo n.º 1
0
def plot_station(directory, station_id):
    # File path
    id_caltech = "0003"
    month = "*"
    day = "*-"
    time = "*"
    filename = id_caltech + station_id + "_" + month + day + time + ".pkl"
    path_regx = directory + filename

    # Plot each file matching path_regx
    handles = []
    names = []
    for pathname in glob.glob(path_regx):
        df = dir_c.load_obj(pathname)
        data = dc.df_to_entry(df)
        reason = dc.clean_data(data, more_info=True)[1]
        if reason == dc.ShortTime:
            plt.figure(1)
            handle = plot_profile(df)
        else:
            plt.figure(2)
            handle = plot_profile(df)
        handles.append(handle)
        names.append(pathname)
    plt.legend(handles, names)
    plt.show()
Ejemplo n.º 2
0
    def sans_query(self, query):
        cleaned_query = data_cleaner.clean_data(query)
        documents_list = []

        for query_word in cleaned_query:
            documents_list += Query.query_one_word(query_word,
                                                   self.inverted_index)
        return list(set(documents_list))
Ejemplo n.º 3
0
 def process_files(self):
     file_to_tokens = {}
     for file in self.filenames:
         file_to_tokens[file] = open('./sample-corpus/' + file,
                                     "r").read().lower()
         file_to_tokens[file] = data_cleaner.clean_data(
             file_to_tokens[file])
     return file_to_tokens
Ejemplo n.º 4
0
def datapoint_length_vals(df):
    data = dc.df_to_entry(df)
    tests = (lambda data: dc.session_length(data) / np.timedelta64(1, 'h'),
             lambda data: dc.average_gap(data) / np.timedelta64(1, 's'),
             lambda data: dc.max_gap(data) / np.timedelta64(1, 'm'),
             lambda data: data.energyDemand)
    results = list(dc.clean_data(data, other_tests=tests))
    return results
Ejemplo n.º 5
0
def model_with_all_preprocessing_and_features(no_bag_of_words=False):
    print('## Download and load the data')
    dataset = get_dataset()
    print('## Clean the data')
    cleaned_X = clean_data(dataset['X'])
    print('## Feature Extraction')
    all_features_X = extract_features(cleaned_X, dataset['X'], no_bag_of_words=no_bag_of_words)
    print('## Training and Evaluation')
    train_the_model(all_features_X, dataset['Y'])
Ejemplo n.º 6
0
def model_with_only_preprocessing():
    print('## Download and load the data')
    dataset = get_dataset()
    print('## Clean the data')
    cleaned_X = clean_data(dataset['X'])
    print('## Only perform BoW')
    all_features_X = perform_bag_of_words(cleaned_X)
    print('## Training and Evaluation')
    train_the_model(all_features_X, dataset['Y'])
Ejemplo n.º 7
0
def gap_vals(df):
    data = dc.df_to_entry(df)
    tests = (lambda data: dc.session_length(data) / np.timedelta64(1, 'h'),
             lambda data: dc.max_gap(data, more_info=True),
             lambda data: data.energyDemand)
    results = list(dc.clean_data(data, other_tests=tests))

    # Deal with gap tuple
    gap_info = results[3]
    # Convert gap length to minute
    results[3] = gap_info[0] / np.timedelta64(1, 'm')
    results.append(gap_info[2])

    return results