Example #1
0
class ExampleInstanceExamer(object):

    def __init__(self):
        self.example_instance = None
        self.feature_set = None
        
    def load(self, example_file, feature_set_file):
        self.example_instance = ExampleInstance()
        self.example_instance.from_xml_file(example_file)
        
        self.feature_set = FeatureSet()
        self.feature_set.from_xml_file(feature_set_file)
        
    def exam(self):
        
        factor_list = self.example_instance.get_factors()
        for factor in factor_list:
            #check words in factor
            if len(factor.words)==0:
                print WarningTypes["NO_WORD"] + "\n    " + str(factor) + "\n"
            for w in factor.words:
                if self.feature_set.has_word_feature(w) == False:
                    return ErrorTypes["NO_WORD_FEATURE"] + "\n    " + str(factor) + "\n    " + str(w) + "\n"
                
            #check groundings in factor
            if len(factor.groundings.groundings)==0:
                print WarningTypes["NO_GROUNDING"] + "\n    " + str(factor) + "\n"
            for g in factor.groundings.groundings:
                if g.grounding_type == "object":
                    if False == self.example_instance.world.has_object(g.type, g.name):
                        return ErrorTypes["NO_OBJECT_IN_WORLD"] + "\n    " + str(factor) + "\n    " + str(g) +"\n"
                    #elif False == self.feature_set.has_object_type_feature(g):
                    #    return ErrorTypes["NO_OBJECT_FEATURE"] + "\n    " + str(factor) + "\n    " + str(g) +"\n"
                elif g.grounding_type == "constraint":
                    if None==g.parent or None==g.child:
                        return ErrorTypes["NO_OBJECT"] + "\n    " + str(factor) + "\n     " + str(g) + "\n"
                    elif False == self.example_instance.world.has_object(g.parent.type, g.parent.name):
                        return ErrorTypes["NO_OBJECT_IN_WORLD"] + "\n    [parent]    " + str(factor) + "\n    " + str(g) +"\n"
                    elif False == self.example_instance.world.has_object(g.child.type, g.child.name):
                        return ErrorTypes["NO_OBJECT_IN_WORLD"] + "\n    [child]    " + str(factor) + "\n    " + str(g) +"\n"
                    elif False == self.example_instance.world.has_object(g.parent.type, g.parent.name):
                        return ErrorTypes["NO_OBJECT_IN_WORLD"] + "\n    [parent]    " + str(factor) + "\n    " + str(g) +"\n"
                    elif False == self.example_instance.world.has_object(g.child.type, g.child.name):
                        return ErrorTypes["NO_OBJECT_IN_WORLD"] + "\n    [child]    " + str(factor) + "\n    " + str(g) +"\n"
                elif g.grounding_type == "func_kernel":
                    if None==g.object:
                        return ErrorTypes["NO_OBJECT"] + "\n    " + str(factor) + "\n     " + str(g) + "\n"
                    elif False == self.example_instance.world.has_object(g.object.type, g.object.name):
                        return ErrorTypes["NO_OBJECT_IN_WORLD"] + "\n    [parent]    " + str(factor) + "\n    " + str(g) +"\n"
                    
        return "Good"
                    
                

        
def runFull():
    nbrUserids = KNNSearch.Search([ 1.0 , -1.2,  1.0, 7.79 ], 2000)
    
#     print "Neighbours\n"
#     pprint.pprint(nbrUserids)
    
    split = FeatureSet.featureExtract(nbrUserids)
    
#     print "split\n"
#     pprint.pprint(split)
    
#     testData = getTestData()
#     print "Test Data\n"
#     pprint.pprint(testData);
#     
#     sys.exit(0)
    
    featureSet = split[0][0]
    interested = split[0][1]
    notinterested = split[0][2]
    
    z = [True] * len(featureSet[0])
    w = [True] * len(featureSet[0])
    
    C = 0.03
    #C = 0.3
    model = Model(compress=z, has_none=w, C=C)
    model.fit(featureSet, interested)
    
    testData = getTestData()
    
    result = runModel(model, testData)
    
    print result
def getTestData():
    test = pd.read_csv("../Input/train2.csv")
    testDict = {}
    
    for record in test.iterrows():
        record = record[1]
        uid = record['user']
        if uid not in testDict:
            testDict[uid] = []
        testDict[uid].append({
            'eid': record['event'],
            'invited': record['invited']
        })
        
    testData = {}
    for uid, events in testDict.iteritems():
        eDict = {e['eid']: (e['invited']) for e in events}
        features_dict = FeatureSet.process(uid, eDict)
        X = []
        for e in events:
            eid = e['eid']
            X.append(features_dict[eid])
        testData[uid] = { 
            'X': X,
            'events': events
        }
        
    return testData
Example #4
0
	def Features(self, mode = None, filterExpression = None):
		"""
		获取一个包含多个Feature的迭代器。
		参数:
			mode: 打开方式。
				OpenMode.ReadOnly: 以pGP.SearchCursor的方式打开
				OpenMode.WriteOnly: 以pGP.InsertCursor的方式打开
				OpenMode.ReadWrite: 以pGP.UpdateCursor的方式打开
			filter:集合过滤器。参见ESRI的WhereClause选项
		"""
		if not self.isValid_:
			return FeatureSet(None, OpenMode.Invalid, self)

		if self.currentFeatureSet_:
			self.currentFeatureSet_.SetInvalid()
		if mode == None:
			mode = OpenMode.ReadOnly
		if mode == OpenMode.ReadOnly:
			if filterExpression:
				rows = pGP.SearchCursor(self.path_, filterExpression)
			else:
				rows = pGP.SearchCursor(self.path_)
			self.currentFeatureSet_ = FeatureSet(rows, mode, self)
		elif mode == OpenMode.WriteOnly:
			if filterExpression:
				rows = pGP.InsertCursor(self.path_, filterExpression)
			else:
				rows = pGP.InsertCursor(self.path_)
			self.currentFeatureSet_ = FeatureSet(rows, mode, self)
		elif mode == OpenMode.ReadWrite:
			if filterExpression:
				rows = pGP.UpdateCursor(self.path_, filterExpression)
			else:
				rows = pGP.UpdateCursor(self.path_)
			self.currentFeatureSet_ = FeatureSet(rows, mode, self)
	
		return self.currentFeatureSet_
Example #5
0
class FeatureClass:
	def __init__(self, featclsName):
		self._Reset()
		self.path_ = featclsName
		
		if pGP.Exists(featclsName):
			desc = pGP.Describe(featclsName)
			fcprops = get_featureclass_properties(featclsName)
			if desc.DatasetType.lower() != "featureclass":
				return
			
			self.isValid_ = True
			
			self.idFieldName_ = fcprops.sFID
			self.shapeFieldName_ = fcprops.sShapeField
			self.shapeType_ = fcprops.sShapeType
			self.spatialRef_ = fcprops.pSpatialRef
			
			self.dir_ = os.path.split(self.path_)[0]
			self.baseName_ = os.path.split(self.path_)[1]
			self.count_ = pGP.GetCount_management(self.path_)
			self.desc_ = desc
	
	def _Reset(self):
		self.isValid_ = False
		
		self.dir_ = None
		self.baseName_ = None
		self.count_ = None
		
		self.idFieldName_ = None
		self.shapeFieldName_ = None
		self.shapeType_ = None
		self.spatialRef_ = None
		
		self.desc_ = None
		
		self.currentFeatureSet_ = None
		
	def Directory(self):
		return self.dir_
	
	def BaseName(self):
		return self.baseName_
	
	def ShapeTypeName(self):
		return self.shapeType_
	
	def SetShapeTypeName(self, typeName):
		if self.isValid_:
			return False
		self.shapeType_ = typeName
		return True
	
	def ShapeFieldName(self):
		return self.shapeFieldName_
	
	def IDFieldName(self):
		return self.idFieldName_
	
	def SpatialRef(self):
		return self.spatialRef_
	
	def SetSpatialRef(self, spatialRef):
		self.spatialRef_ = spatialRef
		
	def ArcGISDescription(self):
		return self.desc_
	def AddField(self, field_name, data_type, *args):
		validFieldName = pGP.ValidateFieldName(field_name)
		pGP.AddField_management( self.path_, validFieldName, data_type, *args )
		return validFieldName
	def Features(self, mode = None, filterExpression = None):
		"""
		获取一个包含多个Feature的迭代器。
		参数:
			mode: 打开方式。
				OpenMode.ReadOnly: 以pGP.SearchCursor的方式打开
				OpenMode.WriteOnly: 以pGP.InsertCursor的方式打开
				OpenMode.ReadWrite: 以pGP.UpdateCursor的方式打开
			filter:集合过滤器。参见ESRI的WhereClause选项
		"""
		if not self.isValid_:
			return FeatureSet(None, OpenMode.Invalid, self)

		if self.currentFeatureSet_:
			self.currentFeatureSet_.SetInvalid()
		if mode == None:
			mode = OpenMode.ReadOnly
		if mode == OpenMode.ReadOnly:
			if filterExpression:
				rows = pGP.SearchCursor(self.path_, filterExpression)
			else:
				rows = pGP.SearchCursor(self.path_)
			self.currentFeatureSet_ = FeatureSet(rows, mode, self)
		elif mode == OpenMode.WriteOnly:
			if filterExpression:
				rows = pGP.InsertCursor(self.path_, filterExpression)
			else:
				rows = pGP.InsertCursor(self.path_)
			self.currentFeatureSet_ = FeatureSet(rows, mode, self)
		elif mode == OpenMode.ReadWrite:
			if filterExpression:
				rows = pGP.UpdateCursor(self.path_, filterExpression)
			else:
				rows = pGP.UpdateCursor(self.path_)
			self.currentFeatureSet_ = FeatureSet(rows, mode, self)
	
		return self.currentFeatureSet_
	
	def Flush(self):
		if self.currentFeatureSet_:
			self.currentFeatureSet_.Flush()
		
	def Count(self):
		if self.isValid_:
			return self.count_
'''
Created on Oct 9, 2015

@author: ydq
'''

from FeatureSet import *

if __name__ == '__main__':
    
    FILE = "feature_set.xml"
    feature_set = FeatureSet()
    feature_set.from_xml_file(FILE)
    
    print feature_set
Example #7
0
'''
Created on Oct 9, 2015

@author: ydq
'''

from FeatureSet import *

if __name__ == '__main__':

    FILE = "feature_set.xml"
    feature_set = FeatureSet()
    feature_set.from_xml_file(FILE)

    print feature_set
def generate_synthetic_data(data_path,
                            seq_lengths,
                            samples_per_class,
                            num_files,
                            sequences_type="default",
                            features_set="default",
                            kwargs="default"):
    """generating synthetic time series data with our simple, uniqe method.

    Parameters:
    data_path : str.
        Path to the where the generated data will be saved.
        
    seq_lengths : list.
        List of all time series sequences length to be generated. e.g [50, 100] will generate data with 50 and 100 sequence length.
        
    samples_per_class : int
        How many instances will be generated to each class.
        
    num_files : int
        How many files to be created. e.g if we are using 10 sequences, 10 samples per class, 2 files, we will have totally 2 * (10 * 10) samples of synthetic data.
        Why not create all samples in same file? because of memory issues. we split so that each file will contain ~500K samples.
        
    sequences_type : list or str, default = "default" 
        Which kind of sequnces type to be created. e.g ["Up", "HighPeak", "SinWave"].
        if default - take all possible sequences. this is the recommended option.
        
    features_set : list or str, default = "default" 
        Same to sequences_type but regarding which features to be used as target for training (the "y"). e.g ["Max", "Min", "Peaks"]
        if default - take all possible features. this is the recommended option.

    Returns
    -------
    None, cause all files will be written to disk.

   """
    path = data_path
    num_samples = samples_per_class
    num_times = num_files
    all_sequences_type = sequences.get_all_sequences(
    ) if sequences_type == "default" else sequences_type
    feature_set = fs.get_all_features(
    ) if features_set == "default" else features_set
    for i in range(num_times):
        for seq in seq_lengths:
            if not os.path.exists(path + str(seq)):
                os.makedirs(path + str(seq))
            if not os.path.exists(path + "{}/{}".format(seq, num_samples)):
                os.makedirs(path + "{}/{}".format(seq, num_samples))
            y = []
            for sequence_type in all_sequences_type:
                kwargs = get_kwargs(sequence_type, num_samples, seq)
                y_samples = eval(
                    "sequences.{}(**kwargs).generate_data()".format(
                        sequence_type))
                y.append(y_samples)

            y = np.array([np.array(yi) for yi in y])
            y = y.reshape(len(y) * y[0].shape[0], y[0].shape[1])
            file_name = path + "{}/{}/{}_part{}_x_test.gz".format(
                seq, num_samples, seq, i)
            dump(y, file_name)

            y = fs.create_regression_tastks_no_multi(y, feature_set)
            file_name = path + "{}/{}/{}_part{}_y_test.gz".format(
                seq, num_samples, seq, i)
            dump(y, file_name)
            y = None
Example #9
0
 def load(self, example_file, feature_set_file):
     self.example_instance = ExampleInstance()
     self.example_instance.from_xml_file(example_file)
     
     self.feature_set = FeatureSet()
     self.feature_set.from_xml_file(feature_set_file)