Python Model.cluster Examples

Programming Language: Python

Namespace/Package Name: pattern.vector

Class/Type: Model

Method/Function: cluster

Examples at hotexamples.com: 4

Python Model.cluster - 4 examples found. These are the top rated real world Python examples of pattern.vector.Model.cluster extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Model(26)

reduce(7)

append(5)

similarity(4)

cluster(2)

neighbors(2)

search(2)

document(1)

feature_selection(1)

filter(1)

idf(1)

ig(1)

load(1)

lsa(1)

nearest_neighbors(1)

save(1)

Example #1

Show file

File: extractFeatures.py Project: man-dsc/Resesarch2018

def extract():
    print 'Extracting features from app descriptions...\n'
    if os.path.exists(OUTPUT_PATH):
        shutil.rmtree(OUTPUT_PATH)
    os.makedirs(OUTPUT_PATH)

    for dir in os.listdir(INPUT_PATH):
        if not dir.startswith('.'):
            os.makedirs("{}/{}".format(OUTPUT_PATH, dir))
            for file in os.listdir('{}/'.format(INPUT_PATH) + dir):
                with open('{}/{}/{}'.format(INPUT_PATH, dir, file), 'rb') as f:
                    reader = csv.reader(f)
                    next(reader)
                    with open('{}/{}/{}'.format(OUTPUT_PATH, dir, file),
                              'wb') as r:
                        writer = csv.writer(r)
                        for app in reader:
                            name = app[0]
                            description = app[2]

                            # Prepare an app description string for NLTK and LDA processing
                            preparedDescription = prepare_description(
                                description)

                            # Extract 3 word featurlets from the description
                            featurelets = featurelet_extraction(
                                preparedDescription)

                            list = []
                            for feature in featurelets:
                                featurelet = '{} {} {}'.format(
                                    feature[0], feature[1], feature[2])
                                list.append(
                                    Document(featurelet, name=featurelet))

                            # Perform hierarchical clustering
                            m = Model(list)
                            cluster = m.cluster(method=HIERARCHICAL,
                                                k=3,
                                                iterations=1000,
                                                distance=COSINE)

                            # Organize clusters into features and alternative tokens
                            (features,
                             alterTokens) = group(cluster, [], [], [])

                            # Write results to file
                            writer.writerow(
                                [name, description, features, alterTokens])
                        r.close()
                    f.close()

Example #2

Show file

File: DsenseClustering.py Project: suen/dsense

	def kmeansCluster(self, documentList, k, iteration, distance, seed, p):
		if distance.lower() == "cosine":
			distance = COSINE
		elif distance.lower() == "euclidean":
			distance = EUCLIDEAN
		elif distance.lower() == "manhattan":
			distance = MANHATTAN
		else:
			return "invalid distance"

		if seed.lower() == "kmpp":
			seed = KMPP
		elif seed.lower() =="random":
			seed = RANDOM
		else:
			return "invalid random"
		
		if type(k) is not int:
			return "k is not int"

		if type(iteration) is not int:
			return "iterartion is not int"

		if type(p) is not float and type(p) is not int:
			return "p is not float"

		if type(documentList) is not list:
			return "document List is not list"

		self.iteration = iteration
		self.seed = seed
		self.p = p
		self.distance = distance
		
		model = Model(documentList)
		cluster = model.cluster(method=KMEANS, k=k, iterations=iteration, distance=distance,seed=seed,p=p)
		return cluster

Example #3

Show file

File: pattern_vector.py Project: vishalbelsare/pattern_CLiPS

d4 = Document('The dog is happy.', name='dog2')
m = Model([d1, d2, d3, d4])
m.reduce(2)
for d in m.documents:
    print
    print d.name
    for concept, w1 in m.lsa.vectors[d.id].items():
        for feature, w2 in m.lsa.concepts[concept].items():
            if w1 != 0 and w2 != 0:
                print(feature, w1 * w2)
# clustering
d1 = Document('Cats are independent pets.', name='cat')
d2 = Document('Dogs are trustworthy pets.', name='dog')
d3 = Document('Boxes are made of cardboard.', name='box')
m = Model((d1, d2, d3))
print m.cluster(method=HIERARCHICAL, k=2)
# hierarchical clustering
cluster = Cluster((1, Cluster((2, Cluster((3, 4))))))
print cluster.depth
print cluster.flatten(1)
# training a classifier
nb = NB()
for review, rating in csv('data/input/reviews.csv'):
    v = Document(review, type=int(rating), stopwords=True)
    nb.train(v)
print nb.classes
print nb.classify(Document('A good movie!'))
# testing a classifier
data = csv('data/input/reviews.csv')
data = [(review, int(rating)) for review, rating in data]
data = [

Example #4

Show file

File: ClusterLSI.py Project: PonteIneptique/ehri-python-pattern

class ClusterLSI(object):
	def __init__(self):
		"""Setting up ClusterLSI environment
		"""
		
		
		self.field = "scopeAndContent"
		self.limit = False
		self.identifier = "idDoc"
		
		self.model = False
		self.cluster = False
		
		self.depth = 0
		
		self.outputNodes = "./clus-nodes.csv"
		self.outputEdges = "./clus-edges.csv"
	
	
	def normalize(self, s):
		"""Normalize a string
		
		Keyword arguments:
		s	---	string
		
		"""
		if type(s) == unicode: 
			return s.encode('utf8', 'ignore')
		else:
			return str(s)

	def modeling(self, descriptions, field = False, limit = False):
		"""Model returns a pattern.vector.Model object which is a list of pattern.vector.Document using Ehri.Get() descriptions
		Keyword arguments:
		descriptions ---	EHRI.get() description object
		field 	---		Field to look into, override defaut self.field
		limit	---		Debug option. Limit the model to $limit items
		"""
		if field:
			self.field = field
		if limit:
			self.limit = limit
		D = []
		
		#Creating Pattern Document element from data we got from Neo4J
		#
		
		#For debug reasons, we could set a limit
		if self.limit:
			i = 0
		for description in descriptions:
			D.append(Document(description[self.field], name=description[self.identifier]))
			#And stop the iteration when i reaches the limit
			if self.limit:
				i += 1
				if i == self.limit:
					break
		#Then, creating a model from our array
		self.model = Model(D)
		
		return self.model 
		
	def clusterize(self, model = False):
		"""Returns a cluster of given model
		
		Keyword arguments:
		model	---	If set, override instance model
		
		"""
		if model:
			self.model = model
		self.cluster = self.model.cluster(method=HIERARCHICAL, k=2)
		return self.cluster

	def flatten(self, array, typeOf = "str"):
		"""Returns a 1 dimension list with given type of item inside given array
		
		Keyword arguments:
		array	---	A list of items
		typeOf	---	Type of item the function should return
		
		"""
		#Flatten an array
		if typeOf == "str":
			return [element for element in array if isinstance(element, basestring)]
		elif typeOf == "list":
			return [element for element in array if isinstance(element, list)]

	def csv(self, array, parents = False, fake = 0):
		"""Return a tuple of csv string with given items and number of fake items
		
		Keyword arguments:
		array	---	A list of items
		parents	---	A list of parents
		fake	---	An index for fake parents 
		
		"""
		string = "" 
		#Making list of elements, avoid calling it once more
		currents = self.flatten(array, "str")
		children = self.flatten(array, "list")
		
		if len(currents) == 0:
			fake += 1
		Ffake = fake
		#If we have parents, we have parents connections
		if parents:
			for element in currents:
				for parent in parents:
					string += self.normalize(element) + ";" + parent + "\n"
		
		#Taking care of children
		for child in children:
			if len(currents) > 0:
				Sstring, Ffake = self.csv(child, currents, Ffake)
			else:
				Sstring, Ffake = self.csv(child, ["fake-"+str(fake)], Ffake)
			string += Sstring
				
			
		return string, Ffake
		

	def clusterToArray(self, Graph):
		"""Convert a cluster object to an array list with n-depth where depth is same as cluster.depth
		
		Keyword arguments:
		
		Graph	---	Cluster or list
		"""
		array = []
		
		Docs = [element for element in Graph if isinstance(element, pattern.vector.Document)]
		Clusts = [element for element in Graph if isinstance(element, list)]
		
		for node in Docs:
			array.append(node.name)
		for node in Clusts:
			array.append(self.clusterToArray(node))
		return array
	
	def save(self, descriptions, csv, fakes = 0, nodesName = False, edgesName = False ):
		"""Output cluster into csv files
		
		Keyword arguments:
		descriptions	---	EHRI.get() description item
		fakes	---	Number of fakes parents
		nodesName	---	Filename for Nodes's CSV file
		edgesName	---	Filename for Edges's CSV file
		
		"""
		if nodesName:
			self.outputNodes = nodesName
		if edgesName:
			self.outputEdges = edgesName
		
		
		f = open(self.outputNodes, "wt")
		f.write("id;label;type\n")
		for description in descriptions:
			f.write(self.normalize(description[self.identifier] + ";" + description[self.identifier] + ";1\n"))
		i=0
		while i <= fakes:
			f.write("fake-" + str(i) + ";" + "fake" + str(i) + ";0\n")
			i+= 1
		f.close()

		f = open(self.outputEdges, "wt")
		f.write("source;target\n");
		f.write(csv)
		f.close()