df = encode_and_bind(df, 'CodeSonar Rule') df = encode_and_bind(df, 'Severity') df = encode_and_bind(df, 'CWE') df = df[np.isfinite(df['True Positive'])] X = df.drop('True Positive', axis=1) y = df.loc[:, 'True Positive'] #parameters for models defined here, simply change init_labels, trn_tst_split, splits to change experiment stop = 300 #stopping value for number of queries init_labels = 0.005 #initially labelled portion of the dataset trn_tst_split = 0.2 #train test split to use for each fold splits = 5 #number of k folds al_unc = AlExperiment(X, y, model=LogisticRegression(penalty='l1', solver='liblinear'), performance_metric='accuracy_score', stopping_criteria='num_of_queries', stopping_value=stop) al_unc.split_AL(test_ratio=trn_tst_split, initial_label_rate=init_labels, split_count=splits, all_class=True) al_unc.set_query_strategy(strategy='QueryInstanceUncertainty') al_unc.set_performance_metric(performance_metric='accuracy_score') al_unc.start_query(multi_thread=False) # print(al.get_experiment_result()) # al.plot_learning_curve() analyser = ExperimentAnalyser(x_axis='num_of_queries') analyser.add_method('uncertainty', al_unc.get_experiment_result()) al_qbc = AlExperiment(X,
from sklearn.datasets import load_iris from alipy.experiment import AlExperiment # Get the data X, y = load_iris(return_X_y=True) for strategy in [ 'QueryInstanceQBC', 'QueryInstanceUncertainty', 'QueryInstanceRandom', 'QureyExpectedErrorReduction', 'QueryInstanceGraphDensity', 'QueryInstanceQUIRE', 'QueryInstanceBMDR', 'QueryInstanceSPAL', 'QueryInstanceLAL', 'QueryExpectedErrorReduction' ]: # init the AlExperiment al = AlExperiment(X, y, stopping_criteria='num_of_queries', stopping_value=50) # split the data by using split_AL() al.split_AL(split_count=5) # al.set_query_strategy(strategy=strategy) # al.set_performance_metric('accuracy_score') # al.start_query(multi_thread=True) # or set the data split indexes by input the specific parameters from alipy.data_manipulate import split train, test, lab, unlab = split(X=X,
from sklearn.feature_extraction.text import TfidfVectorizer as tfidfvec phy_data = pd.read_pickle("./phy_2go.pkl") classes = np.load("./phy_2go_class.npy") X, y = phy_data['Body'], classes vectorizer = tfidfvec(max_features=5000, min_df=10, ngram_range=(1, 1)) vectorizer.fit(X) X_Vect = vectorizer.transform(X) print("vectorized") al = AlExperiment(X_Vect, y, model=LinearSVC(multi_class='crammer_singer'), stopping_criteria='num_of_queries', stopping_value=100, batch_size=5) print(classes) print("constructed") # split the data by using split_AL() from alipy.data_manipulate import split x = 50 / X.shape[0] print(x) train, test, lab, unlab = split(X=X, y=y, test_ratio=0.3, initial_label_rate=x, split_count=1)
data_cumh = pd.concat([temp2, temp1]) X, y = data_cumh['Review'], data_cumh['Rating (Star)'] print(y) vectorizer = tfidfvec(max_features=5000, min_df=10, ngram_range=(1, 2)) vectorizer.fit(X) X_Vect = vectorizer.transform(X) scaler.fit(X_Vect) X_Vect = scaler.transform(X_Vect) print("vectorized") al = AlExperiment(X_Vect, y, stopping_criteria='num_of_queries', stopping_value=250, batch_size=1) print(data_cumh.shape) print("constructed") # split the data by using split_AL() from alipy.data_manipulate import split x = 20 / X.shape[0] print(x) train, test, lab, unlab = split(X=X, y=y, test_ratio=0.3, initial_label_rate=x,
from sklearn.datasets import load_iris from alipy.experiment import AlExperiment # Get the data X, y = load_iris(return_X_y=True) # init the AlExperiment al = AlExperiment(X, y) # split the data by using split_AL() al.split_AL(test_ratio=0.3, initial_label_rate=0.05, split_count=10) # or set the data split indexes by input the specific parameters from alipy.data_manipulate import split train, test, lab, unlab = split(X=X, y=y, test_ratio=0.3, initial_label_rate=0.05, split_count=10) al.set_data_split(train_idx=train, test_idx=test, label_idx=lab, unlabel_idx=unlab) # set the query strategy # using the a pre-defined strategy al.set_query_strategy(strategy="QueryInstanceUncertainty") # or using your own query strategy # class my_qs_class: