Ejemplo n.º 1
0
def fit():
    req = request.get_json()
    neighburs = 2
    if ("params" in req):
        neighbours = req["params"]["neighbours"]
        user_id = req["params"]["user_id"]
        project_id = req["params"]["project_id"]
        filename = req["params"]["filename"]
        if (user_id == None or project_id == None or filename == None):
            return apierrors.NoData()
    else:
        return apierrors.NoData()

    fullPath = user_id + "/" + project_id + "/" + filename
    dataset = read_file(fullPath)
    clf = LocalOutlierFactor(n_neighbors=int(neighbours))
    rawX = pd.read_csv(StringIO(dataset.decode('utf-8')))
    X = preProcess(dataset=rawX)
    y_pred = clf.fit_predict(X)
    y_pred_outliers = y_pred

    s = pickle.dumps(clf)
    write_file(user_id, project_id, "pickle.pkl", s)
    resultObj = {
        "dataset": json.loads(rawX.to_json()),
        "labels": json.loads(pd.DataFrame(y_pred_outliers).to_json()),
    }

    return json.dumps(resultObj)
Ejemplo n.º 2
0
def fit():
    req = request.get_json()
    neighburs = 2
    algorithm = "ball_tree"
    metric = "euclidean"
    if ("params" in req):
        if("neighbours" in req["params"]): neighburs = req["params"]["neighbours"]
        if("algorithm" in req["params"]): algorithm = req["params"]["algorithm"]
        if("metric" in req["params"]): metric = req["params"]["metric"]
        user_id = req["params"]["user_id"]
        project_id = req["params"]["project_id"]
        filename = req["params"]["filename"]
        if (user_id == None or project_id == None or filename == None):  return apierrors.NoData()
    else:
        return apierrors.NoData();

    fullPath = user_id + "/" + project_id + "/" + filename
    dataset = read_file(fullPath)
    rawX = pd.read_csv(StringIO(dataset.decode('utf-8')))
    X = preProcess(dataset=rawX)

    print(X)
    print(neighburs,algorithm,metric)
    NN = NearestNeighbors(n_neighbors=int(neighburs), algorithm=algorithm, metric=metric)
    s = pickle.dumps(NN)
    write_file(user_id, project_id, "pickle.pkl", s)
    nbrs = NN.fit(X)
    distances, indices = nbrs.kneighbors(X)    
    data = rawX.to_json()
    indexes = pd.DataFrame(indices).to_json()
    return '{ "dataset": ' + data + ', "indexes": '+indexes+', "distances": '+ pd.DataFrame(distances).to_json() +'}'
Ejemplo n.º 3
0
def predict():
    
    req=request.get_json()
    if ("params" in req):  
        data = req["params"]["data"]
        user_id = req["params"]["user_id"]
        project_id = req["params"]["project_id"]
        filename = req["params"]["filename"]
        if (user_id == None or project_id == None or filename == None):  return apierrors.NoData()
    else:
        return apierrors.NoData();

    P = Projects(user_id, project_id)
    project = P.read(id=project_id)
    P.addDataset(data)
    dataset = P.getDataset()
    # reshape the dataset  to a dataframe like object
    X = {}
    if(type(dataset[0]["data"]) == str): dataset[0]["data"] = json.loads(dataset[0]["data"])
    
    for k in dataset[0]["data"]:        
        X[k] = []    
    
    for i in dataset:   
        if(i["data"]=="data"): continue
        if(type(i["data"]) == str): obj=json.loads(i["data"])
        else: obj=json.loads(json.dumps(i["data"]))        
        for k in obj:
            X[k].append(obj[k])

    
    # convert the array to pandas dataframe
    rawX = pd.DataFrame(X)    
    X = preProcess(dataset=rawX)
    pkl_file = get_pickle(user_id + "/"+project_id+"/pickle.pkl")    
    if(pkl_file==None): return apierrors.ErrorMessage("No pickle file found, maybe you should train the model first")    
    model = pickle.load(pkl_file)    
    nbrs = model.fit(X)
    distances, indices = nbrs.kneighbors(X)      
    
    obj = [X[0]]

    # for i in X[0]:
    #     print(i)
    #     obj.append([i])
    
    data = rawX.to_json()
    indexes = pd.DataFrame(indices).to_json()        
    print(nbrs.kneighbors(obj))
    return '{ "data": ' + data + ', "indexes": '+indexes+'}'
Ejemplo n.º 4
0
def fit():
    req = request.get_json()
    eps = 0.7
    min_samples = 4
    if ("params" in req):
        eps = float(req["params"]["eps"])
        min_samples = float(req["params"]["min"])
        user_id = req["params"]["user_id"]
        project_id = req["params"]["project_id"]
        filename = req["params"]["filename"]
        if (user_id == None or project_id == None or filename == None):
            return apierrors.NoData()
        if "max" in req:
            max_samples = req["params"]["max"]
    else:
        return apierrors.NoData()

    fullPath = user_id + "/" + project_id + "/" + filename
    dataset = read_file(fullPath)
    if (dataset == None): return apierrors.ErrorMessage("dataset not found")

    rawX = pd.read_csv(StringIO(dataset.decode('utf-8')))
    X = preProcess(dataset=rawX)
    print(type(X[0][0]))
    DB = DBSCAN(eps, min_samples)
    s = pickle.dumps(DB)
    write_file(user_id, project_id, "pickle.pkl", s)
    db = DB.fit(X)
    print("ok")
    core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
    core_samples_mask[db.core_sample_indices_] = True
    labels = db.labels_

    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)

    resultObj = {
        "clusters": n_clusters_,
        "dataset": json.loads(rawX.to_json()),
        "labels": json.loads(pd.DataFrame(labels).to_json())
    }
    # resultObj = {}
    return json.dumps(resultObj)
Ejemplo n.º 5
0
def fit():
    req = request.get_json()
    dimensions = 2
    if ("params" in req):
        dimensions = req["params"]["dimensions"]
        user_id = req["params"]["user_id"]
        project_id = req["params"]["project_id"]
        filename = req["params"]["filename"]
        if (user_id == None or project_id == None or filename == None):
            return apierrors.NoData()
    else:
        return apierrors.NoData()

    fullPath = user_id + "/" + project_id + "/" + filename
    dataset = read_file(fullPath)
    if (dataset == None): return apierrors.ErrorMessage("dataset not found")

    pca = PCA(n_components=dimensions)
    transformed = pca.fit_transform(dataset)
    resultObj = {"original": dataset, "transformed": transformed}
    # resultObj = {}
    return json.dumps(resultObj)
Ejemplo n.º 6
0
def fit():
    nu=0.3
    kernel="rbf"
    gamma=0.1   
    degree=3
    coef0=0.0
    tol=0.001
    shrinking=True
    req = request.get_json()
    if "params" in req:
        if("nu" in req["params"]): nu = req["params"]["nu"]
        if("kernel" in req["params"]): kernel = req["params"]["kernel"]
        if("gamma" in req["params"]): gamma = req["params"]["gamma"]        
        if("degree" in req["params"]): degree = req["params"]["degree"]        
        if("coef0" in req["params"]): coef0 = req["params"]["coef0"]        
        if("tol" in req["params"]): tol = req["params"]["tol"]        
        if("shrinking" in req["params"]): shrinking = req["params"]["shrinking"]        
        user_id = req["params"]["user_id"]
        project_id = req["params"]["project_id"]
        filename = req["params"]["filename"]
    else:
        return apierrors.NoData()

    print("start")
    fullPath = user_id + "/"+project_id+"/" + filename
    print(fullPath)
    dataset = read_file(fullPath)
    print(dataset)
    if(dataset==None): return apierrors.ErrorMessage("dataset not found")
    rawX = pd.read_csv(StringIO(dataset.decode('utf-8')))
    X = preProcess(dataset=rawX)
    X_train = X[0:int(len(X) * 0.66)]
    print(nu,kernel,gamma)
    # fit the model
    clf = svm.OneClassSVM(degree=int(degree), coef0=float(coef0),tol=float(tol),shrinking=bool(shrinking), nu=float(nu), kernel=kernel, gamma=float(gamma))
    clf.fit(X_train)
    y_pred_train = clf.predict(X_train)
    y_pred_test = clf.predict(X)
    n_error_train = y_pred_train[y_pred_train == -1].size
    n_error_test = y_pred_test[y_pred_test == -1].size
    s = pickle.dumps(clf)
    write_file(user_id, project_id, "pickle.pkl", s)

    return json.dumps({          
        "dataset": json.loads(rawX.to_json()),        
        "labels": y_pred_test.tolist()
    })
Ejemplo n.º 7
0
def predict():
    req = request.get_json()
    if "params" in req:        
        data = req["params"]["data"]
        user_id = req["params"]["user_id"]
        project_id = req["params"]["project_id"]
        filename = req["params"]["filename"]
    else:
        return apierrors.NoData()
    print("start")
    P = Projects(user_id, project_id)
    project = P.read(id=project_id)
    P.addDataset(data)
    dataset = P.getDataset()    
    # reshape the dataset  to a dataframe like object
    X = {}
    if(type(dataset[0]["data"]) == str): dataset[0]["data"] = json.loads(dataset[0]["data"])
    
    for k in dataset[0]["data"]:        
        X[k] = []    

    for i in dataset:               
        if(type(i["data"]) == str): obj=json.loads(i["data"])
        else: obj=json.loads(json.dumps(i["data"]))
        for k in obj:
            X[k].append(obj[k])
    # convert the array to pandas dataframe
    
    rawX = pd.DataFrame(X)    
    
    X = preProcess(dataset=rawX)
    
    pkl_file = get_pickle(user_id + "/"+project_id+"/pickle.pkl")    
    if(pkl_file==None): return apierrors.ErrorMessage("No pickle file found, maybe you should train the model first")    
    
    model = pickle.load(pkl_file)        
    obj = [X[0]]     
    labels = pd.DataFrame(model.predict(X)).to_json()
    return json.dumps({
        "dataset": json.loads(rawX.to_json()),
        "labels": json.loads(labels)
    })
Ejemplo n.º 8
0
def fit():
    req = request.get_json()
    if ("params" in req):
        eps = req["params"]["eps"]
        min_samples = req["params"]["min"]
        user_id = req["params"]["user_id"]
        project_id = req["params"]["project_id"]
        filename = req["params"]["project_id"]
        if (user_id == None or project_id == None or filename == None):
            return apierrors.NoData()
        if "max" in req:
            max_samples = req["params"]["max"]
    else:
        return apierrors.NoData()

    fullPath = user_id + "/" + project_id + "/" + filename

    dataset = read_file(fullPath)
    if (dataset == None): return apierrors.ErrorMessage("dataset not found")
    le = preprocessing.LabelEncoder()
    y = pd.read_csv(StringIO(dataset.decode('utf-8')))
    y = y.fillna(y.bfill())

    p = d = q = range(0, 2)

    # Generate all different combinations of p, q and q triplets
    pdq = list(itertools.product(p, d, q))

    # Generate all different combinations of seasonal p, q and q triplets
    seasonal_pdq = [(x[0], x[1], x[2], 12)
                    for x in list(itertools.product(p, d, q))]
    #test all different combinations of p d q parameters

    warnings.filterwarnings("ignore")  # specify to ignore warning messages

    pdqparams = []
    for param in pdq:
        for param_seasonal in seasonal_pdq:
            try:
                mod = sm.tsa.statespace.SARIMAX(
                    y,
                    order=param,
                    seasonal_order=param_seasonal,
                    enforce_stationarity=False,
                    enforce_invertibility=False)

                results = mod.fit()
                pdqparams.append([param, param_seasonal, results.aic])
            except:
                continue

    for i in pdqparams:
        if(minval == None): minval = i[2]
        if (i[2] < minval):
            minval = i[2]
            minparams = i

    mod = sm.tsa.statespace.SARIMAX(y,
                                    order=minparams[0],
                                    seasonal_order= minparams[1],
                                    enforce_stationarity=False,
                                    enforce_invertibility=False)

    results = mod.fit()

    s = pickle.dumps(mod)
    write_file(user_id, project_id, "pickle.pkl", s)

    return ""