예제 #1
0
파일: lof.py 프로젝트: david1983/rtads_ml
def fit():
    req = request.get_json()
    neighburs = 2
    if ("params" in req):
        neighbours = req["params"]["neighbours"]
        user_id = req["params"]["user_id"]
        project_id = req["params"]["project_id"]
        filename = req["params"]["filename"]
        if (user_id == None or project_id == None or filename == None):
            return apierrors.NoData()
    else:
        return apierrors.NoData()

    fullPath = user_id + "/" + project_id + "/" + filename
    dataset = read_file(fullPath)
    clf = LocalOutlierFactor(n_neighbors=int(neighbours))
    rawX = pd.read_csv(StringIO(dataset.decode('utf-8')))
    X = preProcess(dataset=rawX)
    y_pred = clf.fit_predict(X)
    y_pred_outliers = y_pred

    s = pickle.dumps(clf)
    write_file(user_id, project_id, "pickle.pkl", s)
    resultObj = {
        "dataset": json.loads(rawX.to_json()),
        "labels": json.loads(pd.DataFrame(y_pred_outliers).to_json()),
    }

    return json.dumps(resultObj)
예제 #2
0
파일: knn.py 프로젝트: david1983/rtads_ml
def fit():
    req = request.get_json()
    neighburs = 2
    algorithm = "ball_tree"
    metric = "euclidean"
    if ("params" in req):
        if("neighbours" in req["params"]): neighburs = req["params"]["neighbours"]
        if("algorithm" in req["params"]): algorithm = req["params"]["algorithm"]
        if("metric" in req["params"]): metric = req["params"]["metric"]
        user_id = req["params"]["user_id"]
        project_id = req["params"]["project_id"]
        filename = req["params"]["filename"]
        if (user_id == None or project_id == None or filename == None):  return apierrors.NoData()
    else:
        return apierrors.NoData();

    fullPath = user_id + "/" + project_id + "/" + filename
    dataset = read_file(fullPath)
    rawX = pd.read_csv(StringIO(dataset.decode('utf-8')))
    X = preProcess(dataset=rawX)

    print(X)
    print(neighburs,algorithm,metric)
    NN = NearestNeighbors(n_neighbors=int(neighburs), algorithm=algorithm, metric=metric)
    s = pickle.dumps(NN)
    write_file(user_id, project_id, "pickle.pkl", s)
    nbrs = NN.fit(X)
    distances, indices = nbrs.kneighbors(X)    
    data = rawX.to_json()
    indexes = pd.DataFrame(indices).to_json()
    return '{ "dataset": ' + data + ', "indexes": '+indexes+', "distances": '+ pd.DataFrame(distances).to_json() +'}'
예제 #3
0
파일: svm.py 프로젝트: david1983/rtads_ml
def fit():
    nu=0.3
    kernel="rbf"
    gamma=0.1   
    degree=3
    coef0=0.0
    tol=0.001
    shrinking=True
    req = request.get_json()
    if "params" in req:
        if("nu" in req["params"]): nu = req["params"]["nu"]
        if("kernel" in req["params"]): kernel = req["params"]["kernel"]
        if("gamma" in req["params"]): gamma = req["params"]["gamma"]        
        if("degree" in req["params"]): degree = req["params"]["degree"]        
        if("coef0" in req["params"]): coef0 = req["params"]["coef0"]        
        if("tol" in req["params"]): tol = req["params"]["tol"]        
        if("shrinking" in req["params"]): shrinking = req["params"]["shrinking"]        
        user_id = req["params"]["user_id"]
        project_id = req["params"]["project_id"]
        filename = req["params"]["filename"]
    else:
        return apierrors.NoData()

    print("start")
    fullPath = user_id + "/"+project_id+"/" + filename
    print(fullPath)
    dataset = read_file(fullPath)
    print(dataset)
    if(dataset==None): return apierrors.ErrorMessage("dataset not found")
    rawX = pd.read_csv(StringIO(dataset.decode('utf-8')))
    X = preProcess(dataset=rawX)
    X_train = X[0:int(len(X) * 0.66)]
    print(nu,kernel,gamma)
    # fit the model
    clf = svm.OneClassSVM(degree=int(degree), coef0=float(coef0),tol=float(tol),shrinking=bool(shrinking), nu=float(nu), kernel=kernel, gamma=float(gamma))
    clf.fit(X_train)
    y_pred_train = clf.predict(X_train)
    y_pred_test = clf.predict(X)
    n_error_train = y_pred_train[y_pred_train == -1].size
    n_error_test = y_pred_test[y_pred_test == -1].size
    s = pickle.dumps(clf)
    write_file(user_id, project_id, "pickle.pkl", s)

    return json.dumps({          
        "dataset": json.loads(rawX.to_json()),        
        "labels": y_pred_test.tolist()
    })
예제 #4
0
def fit():
    req = request.get_json()
    eps = 0.7
    min_samples = 4
    if ("params" in req):
        eps = float(req["params"]["eps"])
        min_samples = float(req["params"]["min"])
        user_id = req["params"]["user_id"]
        project_id = req["params"]["project_id"]
        filename = req["params"]["filename"]
        if (user_id == None or project_id == None or filename == None):
            return apierrors.NoData()
        if "max" in req:
            max_samples = req["params"]["max"]
    else:
        return apierrors.NoData()

    fullPath = user_id + "/" + project_id + "/" + filename
    dataset = read_file(fullPath)
    if (dataset == None): return apierrors.ErrorMessage("dataset not found")

    rawX = pd.read_csv(StringIO(dataset.decode('utf-8')))
    X = preProcess(dataset=rawX)
    print(type(X[0][0]))
    DB = DBSCAN(eps, min_samples)
    s = pickle.dumps(DB)
    write_file(user_id, project_id, "pickle.pkl", s)
    db = DB.fit(X)
    print("ok")
    core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
    core_samples_mask[db.core_sample_indices_] = True
    labels = db.labels_

    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)

    resultObj = {
        "clusters": n_clusters_,
        "dataset": json.loads(rawX.to_json()),
        "labels": json.loads(pd.DataFrame(labels).to_json())
    }
    # resultObj = {}
    return json.dumps(resultObj)
예제 #5
0
파일: pca.py 프로젝트: david1983/rtads_ml
def fit():
    req = request.get_json()
    dimensions = 2
    if ("params" in req):
        dimensions = req["params"]["dimensions"]
        user_id = req["params"]["user_id"]
        project_id = req["params"]["project_id"]
        filename = req["params"]["filename"]
        if (user_id == None or project_id == None or filename == None):
            return apierrors.NoData()
    else:
        return apierrors.NoData()

    fullPath = user_id + "/" + project_id + "/" + filename
    dataset = read_file(fullPath)
    if (dataset == None): return apierrors.ErrorMessage("dataset not found")

    pca = PCA(n_components=dimensions)
    transformed = pca.fit_transform(dataset)
    resultObj = {"original": dataset, "transformed": transformed}
    # resultObj = {}
    return json.dumps(resultObj)
예제 #6
0
def analyse():
    from pandas.plotting import scatter_matrix
    req = request.get_json()
    user_id = req["params"]["user_id"]
    project_id = req["params"]["project_id"]
    filename = req["params"]["filename"]
    fullPath = user_id + "/" + project_id + "/" + filename
    dataset_file = read_file(fullPath)
    if (dataset_file == None):
        return apierrors.ErrorMessage("dataset not found")

    file = StringIO(dataset_file.decode('utf-8'))
    dataset = pd.read_csv(file)
    if "label_encode" in req:
        dataset = pd.read_csv(file, dtype="unicode")
        dataset = dataset.apply(le().fit_transform)
    dataset = dataset.fillna(0)

    hp = plt.subplot()
    dataset.hist(ax=hp, figsize=(12, 12))
    dp = dataset.plot(kind='density')
    bp = dataset.plot(kind='box')
    sm = scatter_matrix(dataset, figsize=(12, 12))

    resultset = {
        "plot":
        write_base64_img(user_id, project_id, "plot.png",
                         plot(dataset.plot())),
        "hp_plot":
        write_base64_img(user_id, project_id, "hp.png", plot(hp)),
        "dp_plot":
        write_base64_img(user_id, project_id, "dp.png", plot(dp)),
        "bp_plot":
        write_base64_img(user_id, project_id, "bp.png", plot(bp)),
        "sm_plot":
        write_base64_img(user_id, project_id, "sm.png", plot(sm[0][0]))
    }
    return json.dumps(resultset)
예제 #7
0
def fit():
    req = request.get_json()
    if ("params" in req):
        eps = req["params"]["eps"]
        min_samples = req["params"]["min"]
        user_id = req["params"]["user_id"]
        project_id = req["params"]["project_id"]
        filename = req["params"]["project_id"]
        if (user_id == None or project_id == None or filename == None):
            return apierrors.NoData()
        if "max" in req:
            max_samples = req["params"]["max"]
    else:
        return apierrors.NoData()

    fullPath = user_id + "/" + project_id + "/" + filename

    dataset = read_file(fullPath)
    if (dataset == None): return apierrors.ErrorMessage("dataset not found")
    le = preprocessing.LabelEncoder()
    y = pd.read_csv(StringIO(dataset.decode('utf-8')))
    y = y.fillna(y.bfill())

    p = d = q = range(0, 2)

    # Generate all different combinations of p, q and q triplets
    pdq = list(itertools.product(p, d, q))

    # Generate all different combinations of seasonal p, q and q triplets
    seasonal_pdq = [(x[0], x[1], x[2], 12)
                    for x in list(itertools.product(p, d, q))]
    #test all different combinations of p d q parameters

    warnings.filterwarnings("ignore")  # specify to ignore warning messages

    pdqparams = []
    for param in pdq:
        for param_seasonal in seasonal_pdq:
            try:
                mod = sm.tsa.statespace.SARIMAX(
                    y,
                    order=param,
                    seasonal_order=param_seasonal,
                    enforce_stationarity=False,
                    enforce_invertibility=False)

                results = mod.fit()
                pdqparams.append([param, param_seasonal, results.aic])
            except:
                continue

    for i in pdqparams:
        if(minval == None): minval = i[2]
        if (i[2] < minval):
            minval = i[2]
            minparams = i

    mod = sm.tsa.statespace.SARIMAX(y,
                                    order=minparams[0],
                                    seasonal_order= minparams[1],
                                    enforce_stationarity=False,
                                    enforce_invertibility=False)

    results = mod.fit()

    s = pickle.dumps(mod)
    write_file(user_id, project_id, "pickle.pkl", s)

    return ""