Python getdataset Examples

Programming Language: Python

Namespace/Package Name: database

Method/Function: getdataset

Examples at hotexamples.com: 2

Python getdataset - 2 examples found. These are the top rated real world Python examples of database.getdataset extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: project_clustering.py Project: DixitPatel/News-Aggregator

logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")


def nnalgo(X):
    nbrs = NearestNeighbors(n_neighbors=5, algorithm="auto").fit(X)
    distances, indices = nbrs.kneighbors(X)
    return [distances, indices]


print("Loading Data from database")

from database import getdataset

sql = "SELECT * FROM news ORDER BY RAND()"
dataset = getdataset(sql)


print("%d documents" % len(dataset.data))
print("%d categories" % len(dataset.target_names))
print()

print("Extracting features from the training dataset using a sparse vectorizer")
t0 = time()

# max no of features
n_features = 10000
use_idf = True

vectorizer = TfidfVectorizer(max_df=0.6, max_features=n_features, stop_words="english", use_idf=use_idf)

Example #2

Show file

File: classfication.py Project: DixitPatel/News-Aggregator

print(categories)
'''
dataset = fetch_20newsgroups2(data_home='C:\ml_datasets\classification', subset='train', categories=categories,
                             shuffle=True, random_state=42)

dataset_test= fetch_20newsgroups2(data_home='C:/ml_datasets/classification/test', subset='train', categories=categories,
                             shuffle=True, random_state=42)

'''
from database import getdataset


sql="SELECT * from news where subset=0  order by RAND()"
#sql="SELECT * from news order by RAND()" 
dataset_train=getdataset(sql)

#sqlsub="SELECT id from news where subset=0  order by RAND() LIMIT 40 "

sql2="SELECT * from news where subset=1 order by RAND()"

dataset_test=getdataset(sql2)



from stemming import *

stems(dataset_train)
stems(dataset_test)