예제 #1
0
    def save(self, dirname):
        self._check_if_fitted()

        mkdir(dirname)
        save_tree(self.tree, self.node_to_class, self.node_to_classes,
                  self.class_maps, join(dirname, 'tree'))

        models_dct = {}
        models_dirname = join(dirname, 'models')
        mkdir(models_dirname)
        for node_id, model in self.models.items():
            fname = f'model{node_id}'
            model.save(join(models_dirname, fname))
            models_dct[node_id] = fname
        with open(join(models_dirname, 'models_fnames.yaml'),
                  'w',
                  encoding='utf-8') as file:
            yaml_dump(models_dct, file)

        encoders_dct = {}
        encoders_dirname = join(dirname, 'encoders')
        mkdir(encoders_dirname)
        for node_id, encoder in self.encoders.items():
            fname = f'encoder{node_id}.sav'
            joblib_dump(encoder, join(encoders_dirname, fname))
            encoders_dct[node_id] = fname
        with open(join(encoders_dirname, 'encoders_fnames.yaml'),
                  'w',
                  encoding='utf-8') as file:
            yaml_dump(encoders_dct, file)
예제 #2
0
def main():
    print("[INFO] Start train script...")
    
    print("[INFO] Loading the data...")
    X, y = load_data()
    
    print("[INFO] Training the model...")
    pipeline = get_model_pipeline()
    pipeline.fit(X, y)
    
    print("[INFO] Saving the model pipeline artifact...")
    joblib_dump(pipeline, 'model/model_v1.joblib')
    
    print("[INFO] train script ended")
예제 #3
0
파일: start.py 프로젝트: lphan/ML-Practices
    def saveDependencies(cls, model, filename, joblib=False):
        # Find and export the Python version and Library version
        lib = [requirement for requirement in freeze(local_only=True)]
        dependencies = dict([(li.split('==')[0], li.split('==')[1])
                             for li in lib])

        with open('dependencies.json', 'w') as outfile:
            json.dump(dependencies, outfile)

        if joblib:
            # serialize with Numpy arrays by using library joblib
            joblib_dump(model, filename)
        else:
            # serialize standard Python objects
            dump(model, open(filename, 'wb'))
예제 #4
0
def train_one_model_per_frequency(model_dirname, frequency, num_epochs=DEFAULT_EPOCHS, batch_size=DEFAULT_BATCH_SIZE):
    model_frequency_dirname = os_path_join(model_dirname, 'k_'+str(frequency))
    os_mkdir(model_frequency_dirname)
    print('train_one_model_per_frequency. model_dirname={}, frequency={}'.format(model_dirname, frequency))

    X_all, y_all = get_training_data()
    X_all_frequency, y_all_frequency = X_all[:, frequency, :, :], y_all[:, frequency, :, :]
    print('train_one_model_per_frequency: got data')
    (X_frequency_train, X_frequency_valid, _), (y_frequency_train, y_frequency_valid, _) = split_examples(X_all_frequency, y_all_frequency)
    X_frequency_train = X_frequency_train.reshape(-1, X_frequency_train.shape[-2] * X_frequency_train.shape[-1])
    X_frequency_valid = X_frequency_valid.reshape(-1, X_frequency_valid.shape[-2] * X_frequency_valid.shape[-1])
    y_frequency_train = y_frequency_train.reshape(-1, y_frequency_train.shape[-2] * y_frequency_train.shape[-1])
    y_frequency_valid = y_frequency_valid.reshape(-1, y_frequency_valid.shape[-2] * y_frequency_valid.shape[-1])
    print('X_frequency_train.shape =', X_frequency_train.shape)
    print('y_frequency_train.shape =', y_frequency_train.shape)
    min_max_scaler = MinMaxScaler()
    model = KerasRegressor(build_fn=get_model_keras,
                           epochs=num_epochs,
                           batch_size=batch_size,
                           verbose=1)

    estimators = []
    # estimators.append(('standardize', StandardScaler()))
    estimators.append(('standardize', min_max_scaler))
    estimators.append(('mlp', model))
    pipeline = Pipeline(estimators)
    # kfold = KFold(n_splits=10, random_state=DEFAULT_RANDOM_SEED)

    print('train_one_model_per_frequency: begin training frequency={}'.format(frequency))
    # results = cross_val_score(pipeline, X_frequency_train, y_frequency_train, cv=kfold, verbose=1, error_score='raise')
    # print("Larger: %.4f (%.4f) MSE" % (results.mean(), results.std()))

    # Export the regressor to a file

    pipeline.fit(X_frequency_train, y_frequency_train)

    model_k_save_path = os_path_join(model_frequency_dirname, MODEL_DATA_SAVE_FNAME)
    joblib_dump(pipeline, model_k_save_path)

    prediction = pipeline.predict(X_frequency_valid)
    print('mean_squared_error(y_valid, prediction) =', mean_squared_error(y_frequency_valid, prediction))
예제 #5
0
def serialize(filename, obj, format=DEFAULT_FORMAT):
    if format & JOBLIB_FORMAT:
        if not has_joblib:
            raise RuntimeError(
                    'Missing library. Format (JOBLIB_FORMAT) not available.')
        joblib_dump(obj, filename, compress=3 if format & BZIP2_FORMAT else 0)
        return
    if format & BZIP2_FORMAT:
        open_fn = bz2.BZ2File
    else:
        open_fn = open
    with open_fn(filename, 'wb') as f:
        if format & PICKLE_FORMAT:
            pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
        elif format & YAML_FORMAT:
            if not has_yaml:
                raise RuntimeError(
                        'Missing library. Format (YAML_FORMAT) not available.')
            yaml.dump(obj, stream=f)
        else:
            raise ValueError('Unknown format value.')
예제 #6
0
def write_graph_stats_neig_lists(G, inputs):
    out_comp_nm = inputs['dir_nm'] + inputs['out_comp_nm']
    n_nodes_net = nx.number_of_nodes(G)
    n_edges_net = nx.number_of_edges(G)

    with open(out_comp_nm + '_metrics.out', "a") as fid:
        print("No. of nodes in network = ", n_nodes_net, file=fid)
        print("No. of edges in network = ", n_edges_net, file=fid)

    myGraphdict = nx.to_dict_of_dicts(G)

    folNm = inputs['dir_nm'] + inputs['graph_files_dir'] + "/neig_dicts"
    if not os_path.exists(folNm):
        os_mkdir(folNm)
    neig_lens = []
    for node, val in myGraphdict.items():
        with open(folNm + "/" + node, 'wb') as f:
            joblib_dump(val, f)
        neig_lens.append(len(val))
    with open(out_comp_nm + '_metrics.out', "a") as fid:
        print("Max number of neighbors = ", max(neig_lens), file=fid)
        print("Avg number of neighbors = %.2f " % np_mean(neig_lens), file=fid)
    logging_info("Finished writing neighbor lists.")
예제 #7
0
pred = round(clf.predict(X=X_test), 2)
pred

# In[ ]:

pred

# In[ ]:

savetxt(fname='pred.csv', X=pred, fmt='%f', delimiter=',')

# # Сохранение модели

# In[ ]:

joblib_dump(value=clf, filename='model.pkl', compress=9)
joblib_dump(value=enc_categ, filename='enc_categ.pkl', compress=9)
joblib_dump(value=enc_text, filename='enc_text.pkl', compress=9)

# In[ ]:

valid_sets = {
    'location':
    list(set(data_train[strCols[0]].value_counts(dropna=False).index)),
    'contract':
    list(set(data_train[strCols[1]].value_counts(dropna=False).index))
}

with open('valid_sets.json', 'w') as fp:
    dump(valid_sets, fp)
예제 #8
0
 def dump_model_joblib(self, model, name):
     model_path = os.path.join(self.output_dir, name + ".joblib")
     joblib_dump(model, model_path)