コード例 #1
0
ファイル: manually_label.py プロジェクト: rbinrais/py-ml
def ask_for_labels(folder, filename):
    df = get_csv(folder, filename)
    df, index = hand_label(df)
    df = auto_label(df, index)
    name, ext = filename.split(".")
    name += "_with_labels"
    filename = name + "." +ext
    write_csv(df, folder, filename)
    azure_helper.upload_to_blob(filename,folder+"/"+filename)
コード例 #2
0
ファイル: predict_labels.py プロジェクト: rbinrais/py-ml
def generate_predictions():
    data = load_data_from_blob(
        "comments.csv")  #pd.read_csv("wikipedia/comments.csv")
    features = generate_features(data)
    features = feature_size_pad(features)
    model_file = "clf.joblib"
    azure_helper.download_from_blob(model_file, model_file)
    label_encoder_file = "label_encoder.joblib"
    azure_helper.download_from_blob(label_encoder_file, label_encoder_file)
    clf = joblib.load(model_file)
    labels = clf.predict(features)
    label_encoder = joblib.load(label_encoder_file)
    text_labels = label_encoder.inverse_transform(labels)
    data["labels"] = text_labels
    data.to_csv("predicted_labels.csv", index=False)
    azure_helper.upload_to_blob("predicted_labels.csv", "predicted_labels.csv")
コード例 #3
0
ファイル: modeling.py プロジェクト: rbinrais/py-ml
def generate_clf(csv):
    df = load_data_from_blob(csv)
    features = do_word2vec(df["comments"])
    features = feature_post_processing(features)
    labels, encoder = transform_labels(df["labels"])
    clf = SVC(class_weight="balanced",
              tol=1e-5,
              gamma="scale",
              kernel="sigmoid",
              random_state=42,
              C=0.8)
    clf.fit(features, labels)

    model_file_name = 'clf.joblib'
    joblib.dump(clf, model_file_name)
    encoder_file_name = 'label_encoder.joblib'
    joblib.dump(encoder, encoder_file_name)

    azure_helper.upload_to_blob(model_file_name, model_file_name)
    azure_helper.upload_to_blob(encoder_file_name, encoder_file_name)
コード例 #4
0
def _save_data(df, file_name):
    if os.path.exists(file_name):
        tmp = pd.read_csv(file_name)
        df = df.append(tmp)
    df.to_csv(file_name, index=False)
    azure_helper.upload_to_blob(file_name, file_name)