def update_file(self): print "--------------------------------------------------------" '''We have to respect both temporal and spatial localities, as well as to model updates themselves''' '''Make use of the UpdateManager for the last aspect''' '''1) If there is a file that has been updated, check if we should continue editing it''' if self.current_updated_file == None or time.time()-self.last_update_time > 30: #TODO: This threshold should be changed by a real distribution '''2) Select a random file of the given type to update (this is a simple approach, which can be sophisticated, if necessary, by adding individual "edit probabilities" to files based on distributions)''' self.current_updated_file, self.current_updated_file_type = self.file_system.get_file_based_on_type_popularity( self.file_type_update_probabilities, self.stereotype_file_types_extensions) self.last_update_time = time.time() if self.current_updated_file != None: print "FILE TO EDIT: ", self.current_updated_file '''3) Select the type of update to be done (Prepend, Middle or Append)''' update_type = self.file_system.get_fitness_proportionate_element(self.file_update_location_probabilities) if not DEBUG: '''4) Select the size of the update to be done (1%, 40% of the content)''' file_size = os.path.getsize(self.current_updated_file) (function, kv_params) = self.file_update_sizes[self.current_updated_file_type] relative_size = float(get_random_value_from_fitting(function, kv_params)) updated_bytes = abs(int(file_size - (file_size*relative_size))) #TODO: At the moment we only consider additions of content in updates if updated_bytes > FILE_SIZE_MAX: updated_bytes = FILE_SIZE_MAX print "UPDATE TYPE: ", update_type, " UPDATE SIZE: ", updated_bytes content_type = DATA_CHARACTERIZATIONS_PATH + self.file_system.get_type_of_file(self.current_updated_file, self.stereotype_file_types_extensions) self.file_update_manager.modify_file(self.current_updated_file, update_type, content_type, updated_bytes) else: print "WARNING: No files to update!" '''5) Return the path to the locally updated file to be transferred to the sandbox''' return self.current_updated_file
def get_file_size(self, mime): (function, kv_params) = self.file_size_fitting[mime] file_size = get_random_value_from_fitting(function, kv_params) '''Avoid extremely large or small waiting times due to statistical functions''' if file_size > self.file_size_max: file_size = self.file_size_max if file_size < self.file_size_min: file_size = self.file_size_min return file_size
def get_waiting_time(self, state1, state2): if TO_WAIT_STATIC != 0: return TO_WAIT_STATIC # use static interval if its defined (function, kv_params) = self.transition_interarrival_fittings[state1][state2] waiting_time = get_random_value_from_fitting(function, kv_params) '''Avoid extremely large or small waiting times due to statistical functions''' if waiting_time > MAX_WAITING_TIME: waiting_time = MAX_WAITING_TIME if waiting_time < MIN_WAITING_TIME: waiting_time = MIN_WAITING_TIME return waiting_time
def get_file_size(self, mime): (function, kv_params) = self.file_size_fitting[mime] file_size = get_random_value_from_fitting(function, kv_params) '''Avoid extremely large or small waiting times due to statistical functions''' if file_size > self.file_size_max: file_size = self.file_size_max if file_size < self.file_size_min: file_size = self.file_size_min return file_size
def create_file(self): '''Prior creating a file, we first decide which type of file to create''' file_type = self.file_system.get_fitness_proportionate_element(self.stereotype_file_types_probabilities) '''After choosing the type, we proceed by generating the size of the file''' (function, kv_params) = self.file_types_sizes[file_type] size = int(get_random_value_from_fitting(function, kv_params)) '''Ensure that files are not huge''' if size > FILE_SIZE_MAX: size = FILE_SIZE_MAX '''After generating the file size, we should decide the path for the new file''' synthetic_file_base_path = self.file_system.get_random_fs_directory(FS_SNAPSHOT_PATH) '''Create a realistic name''' synthetic_file_base_path += get_random_alphanumeric_string(random.randint(1,20)) + \ self.r.choice(self.stereotype_file_types_extensions[file_type]) '''Invoke SDGen to generate realistic file contents''' characterization = DATA_CHARACTERIZATIONS_PATH + file_type success = True if not DEBUG: try: '''Decide whether we have to create a new file or to take deduplicated content''' if self.file_level_deduplication_ratio < self.r.random(): cp = subprocess.call(['java', '-jar', DATA_GENERATOR_PATH, characterization, str(size), synthetic_file_base_path], cwd=DATA_GENERATOR_PROPERTIES_DIR) print "--------------------------------------------------------" print "CREATING [NEW] FILE: ", synthetic_file_base_path, str(size) else: '''Get a random file as content and store it with a new name''' src_path, file_type = self.file_system.get_file_based_on_type_popularity(self.stereotype_file_types_probabilities, self.stereotype_file_types_extensions) if src_path== None: return None print "--------------------------------------------------------" print "CREATING [DEDUPLICATED] FILE: ", synthetic_file_base_path, str(size) shutil.copyfile(src_path, synthetic_file_base_path) except Exception as ex: print ex success = False if success: self.file_system.add_node_to_fs(synthetic_file_base_path) return synthetic_file_base_path return None
def create_file_system_snapshot(self): print "Creating initial file system snapshot..." '''Get initial number of directories for this user''' (function, kv_params) = self.directory_count_distribution num_dirs = get_random_value_from_fitting(function, kv_params) '''Change config file of Impressions''' fs_config = '' for line in open(FS_IMAGE_CONFIG_PATH, 'r'): if "Parent_Path: " in line: line = "Parent_Path: " + FS_SNAPSHOT_PATH + " 1\n" if not DEBUG and not os.path.exists(FS_SNAPSHOT_PATH): os.makedirs(FS_SNAPSHOT_PATH) if "Numdirs" in line: line = "Numdirs: " + str(num_dirs) + " N\n" fs_config = ''.join([fs_config, line]) fs_config_file = open(FS_IMAGE_CONFIG_PATH, 'w') print >> fs_config_file, fs_config[:-1] fs_config_file.close() '''Create the file system''' time.sleep(1) if not DEBUG: subprocess.call([FS_IMAGE_PATH, FS_IMAGE_CONFIG_PATH]) time.sleep(1)
filename = "operation_inter{}_scipy".format('.csv') # para cada linea del fichero generar un fichero dat index_line = 0 for fs_line in open(filename, 'r'): # print fs_line index_line += 1 line = fs_line.split(',') print index_line, line title = line[0] profile = line[1] op1 = line[2] op2 = line[3] dist = line[4] args = ','.join(line[5:]) print dist, args ob = eval(args) file_out_name = "csv/{}_{}_{}.dat".format(profile, op1, op2) test = open(file_out_name, "w") print file_out_name for i in range(1000): value = get_random_value_from_fitting(dist, ob) # print value print >> test, value test.close() print "End read!"
print fs_line items = fs_line.split(',') line = items index_line+=1 print line title = line[0] mime = line[1] dist = line[2] args = ','.join(line[3:]) print args ob = eval(args) outfile = "test_{}{}.dat".format(mime, index_line) test = open(outfile, "w") print outfile for i in range(2000): value = get_random_value_from_fitting(dist,ob) print >> test, value test.close() """ test = open("test.dat", "w") # generar un archio dat for i in range(1000): value = get_random_value_from_fitting('genpareto',{'shape':3.776394061,'scale':67176.96527,'threshold':-2.220446049e-15}) print >> test, value # print stats.genpareto(2.9948, scale=2.4671, loc=0.0250).rvs() #c=[0.7180, 0.9328, 1.2021] #print stats.fisk.rvs(0.5201, 0.4190) test.close() """
fatiguelife OK inversegaussian OK lognormal OK generalized pareto NOT EXACT logistic NO loglogistic/fisk NO ''' numpy.random.seed(RANDOM_SEED) function = "genextreme" kv_params = {'shape':-0.954000701932126,'scale':0.0455224790093405,'loc':0.0691909139925137} for i in range(10): print get_random_value_from_fitting(function, kv_params) #v = numpy.random.gumbel(loc=1.20212649309532, scale=0.932804666751013, size=10000) #fitting = genextreme(-0.5471, loc=0.1189, scale=0.1105) #fitting = genpareto(2.1686, scale=2.6006e+003, loc=-2.2204e-015) #fitting = stats.genextreme(-0.698811055279666, scale=942.089026948802, loc=1200.79721156363) #fitting = lognorm(1.38272913665692, scale=math.exp(8.222)) #print math.exp(8.222) #fitting = stats.invgauss(5.3146e+06, scale=927.7) #fitting = stats.fatiguelife(20.6005318028672, scale=559477.198848146) #(1.4064e+006, scale=34.0631) #fitting = stats.cauchy(1488.0640353570552, 596.42456464706072) #fitting = stats.fisk(0.4190, shape=0.5201) #mu=9.31524829249769 sigma=41.5219061720147 #fitting = lognorm(2.8638, scale=math.exp(7.003)) #print math.exp(7.003) #fitting = lognorm(2.0881, scale=math.exp(8.8915))