コード例 #1
0
ファイル: kdiag.py プロジェクト: Dabz/kdiag
def main():
    parser = argparse.ArgumentParser(
        description='Gather information about the Apache Kafka host.')
    parser.add_argument(
        'command',
        nargs='?',
        default="gather",
        choices=["gather", "display", "test"],
        help='command to perform, either gather (default), analyze or display')

    parser.add_argument('--directory', help='input or output directory')

    parser.add_argument(
        '--force',
        '-f',
        nargs="?",
        const=True,
        default=False,
        type=bool,
        help='force the output even if the directory contains data')

    ns = parser.parse_args(sys.argv[1:])

    if ns.command == "gather":
        directory = ns.directory
        if directory is None:
            directory = tempfile.mkdtemp('_output', 'kdiag_')

        writer.validate(directory, force=ns.force)
        gather()
        sys.stdout.write("\n")
        writer.write(directory,
                     environment.Environment.getInstance(),
                     force=ns.force)
    elif ns.command == "display":
        directory = ns.directory
        if directory is None:
            raise Exception("require input directory to be specified")
        display(directory)
    elif ns.command == "test":
        directory = ns.directory
        if directory is None:
            raise Exception("require input directory to be specified")

        reader.read(directory)
        test()

        pass
コード例 #2
0
ファイル: clf.py プロジェクト: ataki/hospitalfinder
def main(argv):
    raise Exception("Dont' call this from main. Instead, open up a Python interpreter in the project directory and type `from clf import *` ")

    if len(argv) < 3:
        print "Usage: python naive_bayes.py <train_data> <test_data>"
        sys.exit(1)

    y, X = reader.read(argv[1], **{
        'extractFeaturesFn': extractFeatures2009, 
        'extractLabelsFn': extractLabel, 
        'limit': LIMIT
    })

    testY, testX = reader.read(argv[2], **{
        'extractFeaturesFn': extractFeatures2010, 
        'extractLabelsFn': extractLabel, 
        'limit': LIMIT
    })
コード例 #3
0
ファイル: pca.py プロジェクト: ataki/hospitalfinder
def main(argv):
	if len(argv) < 3:
		print "Usage: python pca.py <train_data> <test_data>"
		sys.exit(1)

	y, x = reader.read(argv[1], extractFeaturesFn=extractFeatures, extractLabelsFn=extractLabel, limit=LIMIT)
	# testY, testX = reader.read(argv[2], extractFeaturesFn=extractFeatures, extractLabelsFn=extractLabel, limit=LIMIT)

	k = 3

	model = Model(k)
	compressedFeatures = model.pca(x)
コード例 #4
0
ファイル: test.py プロジェクト: LucyLu717/Algorithms
def test_weightedquickunion_compression():
    qf = UFapi.WeightedQuickUnion(10, True)
    nums = reader.read(PATH)
    for p, q in nums:
        qf.union(p, q)
    # 1 1 1 3 3 1 1 1 3 3
    assert qf.connected(0, 1)
    assert qf.connected(4, 9)
    assert qf.find(6) == 1
    assert qf.find(2) == 1
    assert qf.find(8) == 3
    assert qf.count() == 2
コード例 #5
0
ファイル: test.py プロジェクト: LucyLu717/Algorithms
def test_quickunion():
    qf = UFapi.QuickUnion(10)
    nums = reader.read(PATH)
    for p, q in nums:
        qf.union(p, q)
    # 1 1 1 8 8 1 1 1 8 8
    assert qf.connected(0, 1)
    assert qf.connected(4, 9)
    assert qf.find(6) == 1
    assert qf.find(2) == 1
    assert qf.find(8) == 8
    assert qf.count() == 2
コード例 #6
0
ファイル: kmeans.py プロジェクト: ataki/hospitalfinder
def main(argv):
	if len(argv) < 3:
		print "Usage: python kmeans.py <train_data> <test_data>"
		sys.exit(1)

	y, x = reader.read(argv[1], extractFeaturesFn=extractFeatures, extractLabelsFn=extractLabel, limit=LIMIT)
	# testY, testX = reader.read(argv[2], extractFeaturesFn=extractFeatures, extractLabelsFn=extractLabel, limit=LIMIT)

	print np.shape(x)
	print np.shape(y)

	model = Model()
	model.train(x, y)
	model.runKmeans(x)
	distances = model.distanceToCentroids(x)
コード例 #7
0
ファイル: final.py プロジェクト: ataki/hospitalfinder
if __name__ == "__main__":
    # main(sys.argv)
    print WARNING + "Don't call this via the command line; instead, open up ipython and type in `from final import *`"
    sys.exit(1)
else:

    # -----------------------------------------------------------

    print OKBLUE
    print "Reading in data"
    print ENDC

    y, X = reader.read("data/2009", **{
        'extractFeaturesFn': extractFeatures2009, 
        'extractLabelsFn': extractLabel, 
        'limit': LIMIT
    })
    testY, testX = reader.read("data/2010", **{
        'extractFeaturesFn': extractFeatures2010, 
        'extractLabelsFn': extractLabel, 
        'limit': LIMIT
    })

    print OKGREEN
    print "Done reading data"
    print ENDC

    # -----------------------------------------------------------
    # Preprocess for linear regression
コード例 #8
0
ファイル: clf.py プロジェクト: ataki/hospitalfinder
        'limit': LIMIT
    })

# ----------------------------------------------------------
# Exec

if __name__ == "__main__":
    main(sys.argv)
else:

    # ------- Actual ---------

    # setup for scripting
    y, X = reader.read("data/2009", **{
        'extractFeaturesFn': extractFeatures2009, 
        'extractLabelsFn': extractLabel, 
        'limit': LIMIT
    })
    
    # idxs = np.where(X[:, 3] == 0)
    # X = X[idxs]
    # y = y[idxs]

    # Process and remove drugs and procedures and 
    # leave it up to PCA to reduce dimensionality
    # procedures = X[:, 3:11]
    # drugTypes = X[:, 12:19]
    # X = np.delete(X, range(3, 19), axis=1)

    # idxs = np.where(y==10)[0][:25]
    # X = np.delete(X, idxs, axis=0)
コード例 #9
0
ファイル: linreg.py プロジェクト: ataki/hospitalfinder
def main(argv):
	if len(argv) < 2:
		print "Usage: python linear_regression.py <data>"
		sys.exit(1)

	Y, X = reader.read(argv[1], **{
		'extractFeaturesFn': extractFeatures,
		'extractLabelsFn': extractTarget,
		'limit': LIMIT
	})

	# Take out invalid values
	takeOutInvalid = False
	XY = np.array([xy for xy in np.hstack((X, Y.reshape(-1, 1)))
			if not takeOutInvalid or all([i > -7 for i in xy])])
	XY = np.random.permutation(XY)
	X = XY[:, :-1]
	Y = XY[:, -1]
	print len(Y)

	# Applying average feature values to invalid values
	averages = []
	numInvalidByFeature = []
	for j in range(len(X[0])):
		averages.append(sum([x[j] for x in X]) / len(X))
		numInvalidByFeature.append(0)
	numInvalid = 0
	numBadX = 0
	numInvalidInX = 0
	cleanX = []
	cleanY = []
	for i in range(len(X)):
		numInvalidInX = 0
		for j in range(len(X[0])):
			if X[i][j] <= -7:
				X[i][j] = averages[j]
				numInvalid = numInvalid + 1
				numInvalidInX = numInvalidInX + 1
				numInvalidByFeature[j] = numInvalidByFeature[j] + 1
		if numInvalidInX > len(X[0]) / 15:
			numBadX = numBadX + 1
		else:
			cleanX.append(X[i])
			cleanY.append(Y[i])
	print numInvalid
	# 107123
	print numBadX
	# 9854 xs have >1/10 invalid cells
	# 14582 xs have >1/15 invalid cells
	# 19452 xs have >1/20 invalid cells

	# Take out invalid features (i.e. features with too many invalid values)
	invalidFeatures = [] # Indices of invalid features
	for j in range(len(X[0])):
		if numInvalidByFeature[j] > len(X) / 10:
			invalidFeatures.append(j)
	for i in range(len(cleanX)):
		cleanX[i] = np.delete(cleanX[i], invalidFeatures)

	X = np.array(cleanX)
	Y = np.array(cleanY)
	print X.shape, Y.shape

	# Add K-means features
	X = addKMeansFeatures(X, Y)

	# Create model

	# model = linear_model.LinearRegression()

	# model = linear_model.Lasso(alpha=.01)

	# model = linear_model.LassoCV(alphas=[0.001, 0.01, 0.1, 1, 10, 100])
	# model.fit(X, Y)
	# print model.alpha_

	# model = linear_model.Ridge(alpha=100)

	model = linear_model.RidgeCV(normalize=True, alphas=[0.0001, 0.001, 0.01, 0.05, 0.1, 0.2, 0.5, 1, 10, 100, 1000, 10000, 100000])
	model.fit(X, Y)
	print 'alpha:', model.alpha_

	# model = linear_model.ElasticNet(alpha=.1, l1_ratio=.1)

	model.fit(X, Y)
	print 'intercept:', model.intercept_
	print 'coef:', model.coef_

	# Feature selection, increases performance a lot
	X = fsel(model, X, Y)

	# Error over m
	plotTrainingTestError(model, X, Y)
コード例 #10
0
ファイル: kmeans.py プロジェクト: ataki/hospitalfinder
		sys.exit(1)

	y, x = reader.read(argv[1], extractFeaturesFn=extractFeatures, extractLabelsFn=extractLabel, limit=LIMIT)
	# testY, testX = reader.read(argv[2], extractFeaturesFn=extractFeatures, extractLabelsFn=extractLabel, limit=LIMIT)

	print np.shape(x)
	print np.shape(y)

	model = Model()
	model.train(x, y)
	model.runKmeans(x)
	distances = model.distanceToCentroids(x)
	

if __name__ == "__main__":
	main(sys.argv)
else:
	DEFAULT_TRAIN = './data/2009'
	DEFAULT_TEST = './data/2010'
	
	y, x = reader.read(DEFAULT_TRAIN, extractFeaturesFn=extractFeatures, extractLabelsFn=extractLabel, limit=LIMIT)

	model = Model()
	model.train(x, y)




	ty, tx = reader.read(DEFAULT_TEST, extractFeaturesFn=extractFeatures, extractLabelsFn=extractLabel)
	model.test()
コード例 #11
0
ファイル: to_csv.py プロジェクト: ataki/hospitalfinder
outfile = open('./2010.csv', 'w')

def writeline(arr):
	global outfile
	return outfile.write(",".join(arr) + "\n")

def extractTimeWithMd(line):
	return int(line[291:293])

def extractFeatures2010(line):
	"""
	Extract features based on specs from 2010
	"""
	return [extract(line, spec) for _, spec in features["2010"]]

def extractLabel(line):
	"""
	Main label extraction fn.
	"""
	return extractTimeWithMd(line)

y, X  = read("./data/2010", **{
	'extractFeaturesFn': extractFeatures2010, 
	'extractLabelsFn': extractLabel
})

writeline([spec[0] for spec in features["2010"]] + ['timeWithMD(y)'])
for example in np.hstack([X, y.reshape(-1,1)]):
	writeline([str(a) for a in example])

outfile.close()