Exemple #1
0
 def distribute(path, signs):
     data_center = os.path.dirname(os.path.realpath(__file__)) + "/data_center"
     file = open(path, "r")
     for line in file:
         try:
             dict = JsonReaderWriter.decode(line)
             if 'text' in dict:
                 datetime =  dtu.to_s(dtu.parse_to_obj(dict['created_at'].encode(), dtu.WEEK_MON_TIME_ZONE_YEAR), dtu.YEAR_MON_DAY_TIME)
                 text = dict['text'].encode().lower()
                 for com in signs:
                     if com in text:
                         name = dict['user']['name'] if 'user' in dict and 'name' in dict['user'] else ''
                         dist_path = data_center + "/" + com
                         JsonReaderWriter.append({'text' : rp.replace_http(text, rp.HTTP_TAG), 'datetime' : datetime, 'username' : name}, dist_path)
         except:
             continue
     file.close()        
	def create_dates_vector(self,start_date, end_date, coocurring_precision):
		if coocurring_precision==MatrixFeatureTransformer.hour_precision:
			start_date = DateTimeUtils.round_to_the_hour_before(start_date)
			end_date = DateTimeUtils.round_to_the_hour_after(end_date)
			time_vector = DateTimeUtils.hours_between(start_date , end_date)
			
		
		elif coocurring_precision == MatrixFeatureTransformer.quarter_hour_precision:
			start_date = DateTimeUtils.round_to_the_hour_before(start_date)
			end_date = DateTimeUtils.round_to_the_hour_after(end_date)
			time_vector = DateTimeUtils.quarterhours_between(start_date , end_date)[0:-1]
			#print time_vector
		
		
			
			
			
		
		else:
			raise Exception("NOT IMPLEMENTED EXCEPTION : Branch coocurring_precision "+coocurring_precision+" not yet implemented ")
			
		print "start_date "+str(start_date)
		print "end_date "+str(end_date)
		print coocurring_precision
		print "number of date values "+str(len(time_vector))
			
		return time_vector
    def create_dates_vector(self, start_date, end_date, coocurring_precision):
        if coocurring_precision == MatrixFeatureTransformer.hour_precision:
            start_date = DateTimeUtils.round_to_the_hour_before(start_date)
            end_date = DateTimeUtils.round_to_the_hour_after(end_date)
            time_vector = DateTimeUtils.hours_between(start_date, end_date)
            return time_vector

        elif coocurring_precision == MatrixFeatureTransformer.quarter_hour_precision:
            start_date = DateTimeUtils.round_to_the_hour_before(start_date)
            end_date = DateTimeUtils.round_to_the_hour_after(end_date)
            time_vector = DateTimeUtils.quarterhours_between(
                start_date, end_date)[0:-1]

            return time_vector

        else:
            raise Exception(
                "NOT IMPLEMENTED EXCEPTION : Branch coocurring_precision " +
                coocurring_precision + " not yet implemented ")
    def transform(self):
        data_copy = copy.deepcopy(self.data)
        if self.coocurring_precision == MatrixFeatureTransformer.hour_precision:
            time_id = 0
            for target_date in self.time_vector:
                #mark all the realizations that correspond to target_date
                for date_interval in data_copy.keys():
                    realization = data_copy[date_interval]
                    #get all the hours representing the interval
                    start_realization_time = DateTimeUtils.round_to_the_hour_before(
                        DataExtractor.start_date_of_realization(date_interval))
                    end_realization_time = DateTimeUtils.round_to_the_hour_before(
                        DataExtractor.end_date_of_realization(date_interval))

                    hours = DateTimeUtils.hours_between(
                        start_realization_time, end_realization_time)
                    do_break = False

                    for hour in hours:

                        if hour == target_date:
                            #integrate the vector to the column time_id of the matrix
                            vector = self.transform_realization_to_vector(
                                realization)
                            self.matrix_data[
                                time_id, :] = self.vector_transformer_operator(
                                    self.matrix_data[time_id, :], vector)

                        if hour > target_date:
                            #we need to advance the target date
                            do_break = True
                            break

                    if do_break:
                        break

                    del data_copy[date_interval]
                time_id += 1

            if not self.let_importance_scores_to_1:
                self.realization_importance_score = self.ir_importance_score(
                    self.matrix_data)
            print "the number of non zeros for " + self.feature_name + " values is : " + str(
                np.count_nonzero(self.matrix_data)) + "/" + str(
                    np.size(self.matrix_data))

        elif self.coocurring_precision == MatrixFeatureTransformer.quarter_hour_precision:
            time_id = 0
            for target_date in self.time_vector:
                #mark all the realizations that correspond to target_date
                for date_interval in data_copy.keys():
                    realization = data_copy[date_interval]
                    #get all the hours representing the interval
                    start_realization_time = DateTimeUtils.round_to_the_quarter_before(
                        DataExtractor.start_date_of_realization(date_interval))
                    end_realization_time = DateTimeUtils.round_to_the_quarter_before(
                        DataExtractor.end_date_of_realization(date_interval))
                    quarters = DateTimeUtils.quarterhours_between(
                        start_realization_time, end_realization_time)
                    do_break = False

                    #print "date interval is "+date_interval
                    #print "hours are "+str(quarters)
                    #print "target date is "+str(target_date)

                    #print
                    #print

                    for quarter in quarters:

                        if quarter == target_date:
                            #integrate the vector to the column time_id of the matrix
                            vector = self.transform_realization_to_vector(
                                realization)
                            self.matrix_data[
                                time_id, :] = self.vector_transformer_operator(
                                    self.matrix_data[time_id, :], vector)

                        if quarter > target_date:
                            #we need to advance the target date
                            do_break = True
                            break

                        #if quarter<target_date:
                        #raise Exception("UNEXPECTED BRANCH EXCEPTION ")

                    if do_break:
                        break

                    del data_copy[date_interval]
                time_id += 1

            if not self.let_importance_scores_to_1:
                self.realization_importance_score = self.ir_importance_score(
                    self.matrix_data)
            '''
			for testing 
			'''
            '''self.matrix_data = np.zeros((4,8))
			self.matrix_data[0,0]=1
			self.matrix_data[2,0]=1
			self.matrix_data[0,1]=1
			self.matrix_data[3,2]=1
			#----
			self.matrix_data[1,5]=1
			self.matrix_data[3,5]=1
			self.matrix_data[1,6]=1
			self.matrix_data[2,6]=1
			self.matrix_data[2,7]=1'''

            [
                self.matrix_data, self.time_vector
            ] = self._do_aggregated_special_transformation_with_replative_importance(
                self.matrix_data, 4, self.feature_coef, self.time_vector,
                self.do_normalize)
            print np.shape(self.matrix_data)
            print len(self.time_vector)
            print "the number of non zeros for " + self.feature_name + " values is : " + str(
                np.count_nonzero(self.matrix_data)) + "/" + str(
                    np.size(self.matrix_data))

        else:
            raise Exception(
                "NOT IMPLEMENTED EXCEPTION : Branch coocurring_precision " +
                coocurring_precision + " not yet implemented ")
	def transform(self):
		data_copy = copy.deepcopy(self.data)
		if self.coocurring_precision==MatrixFeatureTransformer.hour_precision:
			time_id = 0
			for target_date in self.time_vector:
				#mark all the realizations that correspond to target_date
				for date_interval in data_copy.keys():
					realization = data_copy[date_interval]
					#get all the hours representing the interval
					start_realization_time = DateTimeUtils.round_to_the_hour_before(DataExtractor.start_date_of_realization(date_interval))
					end_realization_time = DateTimeUtils.round_to_the_hour_before(DataExtractor.end_date_of_realization(date_interval))
					
					
					
					hours = DateTimeUtils.hours_between(start_realization_time , end_realization_time)
					do_break = False
					
					for hour in hours:
						
						if hour == target_date:
							#integrate the vector to the column time_id of the matrix
							vector = self.transform_realization_to_vector(realization)
							self.matrix_data[time_id, : ] = self.vector_transformer_operator(self.matrix_data[time_id, : ],vector)
						
						if hour>target_date:
							#we need to advance the target date
							do_break = True
							break
						
					if do_break:
						break
					
					del data_copy[date_interval]
				time_id +=1
			
			if not self.let_importance_scores_to_1:
				self.realization_importance_score = self.ir_importance_score(self.matrix_data)
			print "the number of non zeros for "+self.feature_name+" values is : "+str(np.count_nonzero(self.matrix_data))+"/"+str(np.size(self.matrix_data))
			
		elif self.coocurring_precision == MatrixFeatureTransformer.quarter_hour_precision:
			time_id = 0
			for target_date in self.time_vector:
				#mark all the realizations that correspond to target_date
				for date_interval in data_copy.keys():
					realization = data_copy[date_interval]
					#get all the hours representing the interval
					start_realization_time = DateTimeUtils.round_to_the_quarter_before(DataExtractor.start_date_of_realization(date_interval))
					end_realization_time = DateTimeUtils.round_to_the_quarter_before(DataExtractor.end_date_of_realization(date_interval))
					quarters = DateTimeUtils.quarterhours_between(start_realization_time , end_realization_time)
					do_break = False
					
					#print "date interval is "+date_interval
					#print "hours are "+str(quarters)
					#print "target date is "+str(target_date)
					
					#print 
					#print 
					
					for quarter in quarters:
						
						if quarter == target_date:
							#integrate the vector to the column time_id of the matrix
							vector = self.transform_realization_to_vector(realization)
							self.matrix_data[time_id, : ] = self.vector_transformer_operator(self.matrix_data[time_id, : ],vector)
						
						if quarter>target_date:
							#we need to advance the target date
							do_break = True
							break
						
						#if quarter<target_date:
							#raise Exception("UNEXPECTED BRANCH EXCEPTION ")
							
					if do_break:
						break
					
					del data_copy[date_interval]
				time_id +=1
			
			if not self.let_importance_scores_to_1:
				self.realization_importance_score = self.ir_importance_score(self.matrix_data)
			
			
			
			'''
			for testing 
			'''
			'''self.matrix_data = np.zeros((4,8))
			self.matrix_data[0,0]=1
			self.matrix_data[2,0]=1
			self.matrix_data[0,1]=1
			self.matrix_data[3,2]=1
			#----
			self.matrix_data[1,5]=1
			self.matrix_data[3,5]=1
			self.matrix_data[1,6]=1
			self.matrix_data[2,6]=1
			self.matrix_data[2,7]=1'''
			
			
			[self.matrix_data, self.time_vector] = self._do_aggregated_special_transformation_with_replative_importance(self.matrix_data, 4, self.feature_coef, self.time_vector,self.do_normalize)
			print np.shape(self.matrix_data)
			print len(self.time_vector)
			print "the number of non zeros for "+self.feature_name+" values is : "+str(np.count_nonzero(self.matrix_data))+"/"+str(np.size(self.matrix_data))
			
			
			
			
		
		else:
			raise Exception("NOT IMPLEMENTED EXCEPTION : Branch coocurring_precision "+coocurring_precision+" not yet implemented ")