def SaltAndPepper(src, percetage): NoiseImg=src NoiseNum=int(percetage*src.shape[0]*src.shape[1]) for i in range(NoiseNum): randX=random.random_integers(0,src.shape[0]-1) randY=random.random_integers(0,src.shape[1]-1) if random.random_integers(0,1)==0: NoiseImg[randX,randY]=0 else: NoiseImg[randX,randY]=255 cv2.imshow('PepperandSalt', NoiseImg)
def PepperandSalt(src, percetage): NoiseImg = src NoiseNum = int(percetage * src.shape[0] * src.shape[1]) for i in range(NoiseNum): randX = random.random_integers(0, src.shape[0] - 1) randY = random.random_integers(0, src.shape[1] - 1) if random.random_integers(0, 1) <= 0.5: NoiseImg[randX, randY] = 0 else: NoiseImg[randX, randY] = 255 return NoiseImg
def PepperandSalt(src,pepperPer,saltPer): #椒盐噪声 NoiseImg=src pepperNum = int(pepperPer*src.shape[0]*src.shape[1]) saltNum = int(saltPer*src.shape[0]*src.shape[1]) for i in range(pepperNum): randX=random.random_integers(0,src.shape[0]-1) randY=random.random_integers(0,src.shape[1]-1) NoiseImg[randX, randY] = 255 for i in range(saltNum): randX = random.random_integers(0, src.shape[0] - 1) randY = random.random_integers(0, src.shape[1] - 1) NoiseImg[randX,randY]=0 return NoiseImg
def step(self, received): """ Called every frame to get commands """ if self.send_name: self.send_name = False return "SimpleBot" else: # For now, always move the first ship randomly return "t 0 1 {}".format(random.random_integers(0,359))
def set_data(self, data, subset_data=None, **args): if args.get("skipIfSame", 1): if checksum(data) == checksum(self.raw_data) and \ checksum(subset_data) == checksum(self.raw_subset_data): return self.domain_data_stat = [] self.attr_values = {} self.original_data = self.original_subset_data = None self.scaled_data = self.scaled_subset_data = None self.no_jittering_scaled_data = self.no_jittering_scaled_subset_data = None self.valid_data_array = self.valid_subset_data_array = None self.raw_data = None self.raw_subset_data = None self.have_data = False self.have_subset_data = False self.data_has_class = False self.data_has_continuous_class = False self.data_has_discrete_class = False self.data_class_name = None self.data_domain = None self.data_class_index = None if data is None: return full_data = self.merge_data_sets(data, subset_data) self.raw_data = data self.raw_subset_data = subset_data len_data = data and len(data) or 0 self.attribute_names = [attr.name for attr in full_data.domain] self.attribute_name_index = dict([(full_data.domain[i].name, i) for i in range(len(full_data.domain))]) self.attribute_flip_info = {} self.data_domain = full_data.domain self.data_has_class = bool(full_data.domain.class_var) self.data_has_continuous_class = bool(self.data_has_class and full_data.domain.class_var.var_type == VarTypes.Continuous) self.data_has_discrete_class = bool(self.data_has_class and full_data.domain.class_var.var_type == VarTypes.Discrete) self.data_class_name = self.data_has_class and full_data.domain.class_var.name if self.data_has_class: self.data_class_index = self.attribute_name_index[self.data_class_name] self.have_data = bool(self.raw_data and len(self.raw_data) > 0) self.have_subset_data = bool(self.raw_subset_data and len(self.raw_subset_data) > 0) self.domain_data_stat = getCached(full_data, DomainBasicStats, (full_data,)) sort_values_for_discrete_attrs = args.get("sort_values_for_discrete_attrs", 1) for index in range(len(full_data.domain)): attr = full_data.domain[index] if attr.var_type == VarTypes.Discrete: self.attr_values[attr.name] = [0, len(attr.values)] elif attr.var_type == VarTypes.Continuous: self.attr_values[attr.name] = [self.domain_data_stat[index].min, self.domain_data_stat[index].max] # the original_data, no_jittering_scaled_data and validArray are arrays # that we can cache so that other visualization widgets don't need to # compute it. The scaled_data on the other hand has to be computed for # each widget separately because of different # jitter_continuous and jitter_size values if getCached(data, "visualizationData") and subset_data == None: self.original_data, self.no_jittering_scaled_data, self.valid_data_array = getCached(data, "visualizationData") self.original_subset_data = self.no_jittering_scaled_subset_data = self.valid_subset_data_array = np.array( []).reshape([len(self.original_data), 0]) else: no_jittering_data = np.hstack((full_data.X, full_data.Y)).T valid_data_array = no_jittering_data != np.NaN original_data = no_jittering_data.copy() for index in range(len(data.domain)): attr = data.domain[index] if attr.var_type == VarTypes.Discrete: # see if the values for discrete attributes have to be resorted variable_value_indices = get_variable_value_indices(data.domain[index], sort_values_for_discrete_attrs) if 0 in [i == variable_value_indices[attr.values[i]] for i in range(len(attr.values))]: # make the array a contiguous, otherwise the putmask # function does not work line = no_jittering_data[index].copy() indices = [np.where(line == val, 1, 0) for val in range(len(attr.values))] for i in range(len(attr.values)): np.putmask(line, indices[i], variable_value_indices[attr.values[i]]) no_jittering_data[index] = line # save the changed array original_data[index] = line # reorder also the values in the original data no_jittering_data[index] = ((no_jittering_data[index] * 2.0 + 1.0) / float(2 * len(attr.values))) elif attr.var_type == VarTypes.Continuous: diff = self.domain_data_stat[index].max - self.domain_data_stat[ index].min or 1 # if all values are the same then prevent division by zero no_jittering_data[index] = (no_jittering_data[index] - self.domain_data_stat[index].min) / diff self.original_data = original_data[:, :len_data] self.original_subset_data = original_data[:, len_data:] self.no_jittering_scaled_data = no_jittering_data[:, :len_data] self.no_jittering_scaled_subset_data = no_jittering_data[:, len_data:] self.valid_data_array = valid_data_array[:, :len_data] self.valid_subset_data_array = valid_data_array[:, len_data:] if data: setCached(data, "visualizationData", (self.original_data, self.no_jittering_scaled_data, self.valid_data_array)) if subset_data: setCached(subset_data, "visualizationData", (self.original_subset_data, self.no_jittering_scaled_subset_data, self.valid_subset_data_array)) # compute the scaled_data arrays scaled_data = np.concatenate([self.no_jittering_scaled_data, self.no_jittering_scaled_subset_data], axis=1) # Random generators for jittering random = np.random.RandomState(seed=self.jitter_seed) rand_seeds = random.random_integers(0, sys.maxsize - 1, size=len(data.domain)) for index, rseed in zip(list(range(len(data.domain))), rand_seeds): # Need to use a different seed for each feature random = np.random.RandomState(seed=rseed) attr = data.domain[index] if attr.var_type == VarTypes.Discrete: scaled_data[index] += (self.jitter_size / (50.0 * max(1, len(attr.values)))) * \ (random.rand(len(full_data)) - 0.5) elif attr.var_type == VarTypes.Continuous and self.jitter_continuous: scaled_data[index] += self.jitter_size / 50.0 * (0.5 - random.rand(len(full_data))) scaled_data[index] = np.absolute(scaled_data[index]) # fix values below zero ind = np.where(scaled_data[index] > 1.0, 1, 0) # fix values above 1 np.putmask(scaled_data[index], ind, 2.0 - np.compress(ind, scaled_data[index])) if self.have_subset_data: # Fix all subset instances which are also in the main data # to have the same jittered values ids_to_indices = dict((inst.id, i) for i, inst in enumerate(self.raw_data)) subset_ids_map = [[i, ids_to_indices[s.id]] for i, s in enumerate(self.raw_subset_data) if s.id in ids_to_indices] if len(subset_ids_map): subset_ids_map = np.array(subset_ids_map) subset_ids_map[:, 0] += len_data scaled_data[:, subset_ids_map[:, 0]] = \ scaled_data[:, subset_ids_map[:, 1]] self.scaled_data = scaled_data[:, :len_data] self.scaled_subset_data = scaled_data[:, len_data:]
for geom in link.GetGeometries(): geom.SetDiffuseColor([0.862745,0.862745,0.862745,0.2]) geom.SetTransparency(0.2) ind = ind+1 print "Number of joints:------", repr(robot.GetActiveDOF()) pdb.set_trace() h=1 phi=2.5 xxrange=array([-0.8,-0.4]) yyrange=array([-0.6,0.6]) nb=20 noise = 0.0 Psurf,xx,yy,zz = GenerateSurface(h, phi, xxrange,yyrange,nb) Psurfnoise,xxnoise,yynoise,zznoise = GenerateSurface(h, phi, xxrange,0.5*yyrange,nb,noise) id = random.random_integers(0, shape(Psurfnoise)[0]-1,100) numpy.savetxt('KUKASurf.txt',Psurfnoise[id,:]) #save the data for learning handles.append(env.plot3(points=Psurfnoise[id,:],pointsize=0.015,colors=array(((1,0.5,0))),drawstyle=1)) for i in range(nb): pxmesh = np.vstack([xx[i,:],yy[i,:],zz[i,:]]).T handles.append(env.drawlinestrip(points=pxmesh,linewidth=2.5,colors=array(((0,1,0,0.5))))) for i in range(nb): pxmesh = np.vstack([xx[:,i],yy[:,i],zz[:,i]]).T handles.append(env.drawlinestrip(points=pxmesh,linewidth=2.5,colors=array(((0,1,0,0.5))))) env.UpdatePublishedBodies() #raw_input('press enter to continue') # Joints=numpy.zeros(7) # Joints[0] = radians(-30)
def set_data(self, data, **args): if args.get("skipIfSame", 1): if checksum(data) == checksum(self.raw_data): return self.domain_data_stat = [] self.attr_values = {} self.original_data = None self.scaled_data = None self.no_jittering_scaled_data = None self.valid_data_array = None self.raw_data = None self.have_data = False self.data_has_class = False self.data_has_continuous_class = False self.data_has_discrete_class = False self.data_class_name = None self.data_domain = None self.data_class_index = None if data is None: return full_data = data self.raw_data = data len_data = data and len(data) or 0 self.attribute_names = [attr.name for attr in full_data.domain] self.attribute_name_index = dict([(full_data.domain[i].name, i) for i in range(len(full_data.domain))]) self.attribute_flip_info = {} self.data_domain = full_data.domain self.data_has_class = bool(full_data.domain.class_var) self.data_has_continuous_class = full_data.domain.has_continuous_class self.data_has_discrete_class = full_data.domain.has_discrete_class self.data_class_name = self.data_has_class and full_data.domain.class_var.name if self.data_has_class: self.data_class_index = self.attribute_name_index[self.data_class_name] self.have_data = bool(self.raw_data and len(self.raw_data) > 0) self.domain_data_stat = getCached(full_data, DomainBasicStats, (full_data,)) sort_values_for_discrete_attrs = args.get("sort_values_for_discrete_attrs", 1) for index in range(len(full_data.domain)): attr = full_data.domain[index] if attr.is_discrete: self.attr_values[attr.name] = [0, len(attr.values)] elif attr.is_continuous: self.attr_values[attr.name] = [self.domain_data_stat[index].min, self.domain_data_stat[index].max] if 'no_data' in args: return # the original_data, no_jittering_scaled_data and validArray are arrays # that we can cache so that other visualization widgets don't need to # compute it. The scaled_data on the other hand has to be computed for # each widget separately because of different # jitter_continuous and jitter_size values if getCached(data, "visualizationData"): self.original_data, self.no_jittering_scaled_data, self.valid_data_array = getCached(data, "visualizationData") else: no_jittering_data = np.c_[full_data.X, full_data.Y].T valid_data_array = ~np.isnan(no_jittering_data) original_data = no_jittering_data.copy() for index in range(len(data.domain)): attr = data.domain[index] if attr.is_discrete: # see if the values for discrete attributes have to be resorted variable_value_indices = get_variable_value_indices(data.domain[index], sort_values_for_discrete_attrs) if 0 in [i == variable_value_indices[attr.values[i]] for i in range(len(attr.values))]: # make the array a contiguous, otherwise the putmask # function does not work line = no_jittering_data[index].copy() indices = [np.where(line == val, 1, 0) for val in range(len(attr.values))] for i in range(len(attr.values)): np.putmask(line, indices[i], variable_value_indices[attr.values[i]]) no_jittering_data[index] = line # save the changed array original_data[index] = line # reorder also the values in the original data no_jittering_data[index] = ((no_jittering_data[index] * 2.0 + 1.0) / float(2 * len(attr.values))) elif attr.is_continuous: diff = self.domain_data_stat[index].max - self.domain_data_stat[ index].min or 1 # if all values are the same then prevent division by zero no_jittering_data[index] = (no_jittering_data[index] - self.domain_data_stat[index].min) / diff self.original_data = original_data self.no_jittering_scaled_data = no_jittering_data self.valid_data_array = valid_data_array if data: setCached(data, "visualizationData", (self.original_data, self.no_jittering_scaled_data, self.valid_data_array)) # compute the scaled_data arrays scaled_data = self.no_jittering_scaled_data # Random generators for jittering random = np.random.RandomState(seed=self.jitter_seed) rand_seeds = random.random_integers(0, 2 ** 30 - 1, size=len(data.domain)) for index, rseed in zip(list(range(len(data.domain))), rand_seeds): # Need to use a different seed for each feature random = np.random.RandomState(seed=rseed) attr = data.domain[index] if attr.is_discrete: scaled_data[index] += (self.jitter_size / (50.0 * max(1, len(attr.values)))) * \ (random.rand(len(full_data)) - 0.5) elif attr.is_continuous and self.jitter_continuous: scaled_data[index] += self.jitter_size / 50.0 * (0.5 - random.rand(len(full_data))) scaled_data[index] = np.absolute(scaled_data[index]) # fix values below zero ind = np.where(scaled_data[index] > 1.0, 1, 0) # fix values above 1 np.putmask(scaled_data[index], ind, 2.0 - np.compress(ind, scaled_data[index])) self.scaled_data = scaled_data[:, :len_data]
def set_data(self, data, **args): if args.get("skipIfSame", 1): if checksum(data) == checksum(self.raw_data): return self.domain_data_stat = [] self.attr_values = {} self.original_data = None self.scaled_data = None self.no_jittering_scaled_data = None self.valid_data_array = None self.raw_data = None self.have_data = False self.data_has_class = False self.data_has_continuous_class = False self.data_has_discrete_class = False self.data_class_name = None self.data_domain = None self.data_class_index = None if data is None: return full_data = data self.raw_data = data len_data = data and len(data) or 0 self.attribute_names = [attr.name for attr in full_data.domain] self.attribute_name_index = dict([ (full_data.domain[i].name, i) for i in range(len(full_data.domain)) ]) self.attribute_flip_info = {} self.data_domain = full_data.domain self.data_has_class = bool(full_data.domain.class_var) self.data_has_continuous_class = full_data.domain.has_continuous_class self.data_has_discrete_class = full_data.domain.has_discrete_class self.data_class_name = self.data_has_class and full_data.domain.class_var.name if self.data_has_class: self.data_class_index = self.attribute_name_index[ self.data_class_name] self.have_data = bool(self.raw_data and len(self.raw_data) > 0) self.domain_data_stat = getCached(full_data, DomainBasicStats, (full_data, )) sort_values_for_discrete_attrs = args.get( "sort_values_for_discrete_attrs", 1) for index in range(len(full_data.domain)): attr = full_data.domain[index] if attr.is_discrete: self.attr_values[attr.name] = [0, len(attr.values)] elif attr.is_continuous: self.attr_values[attr.name] = [ self.domain_data_stat[index].min, self.domain_data_stat[index].max ] if 'no_data' in args: return # the original_data, no_jittering_scaled_data and validArray are arrays # that we can cache so that other visualization widgets don't need to # compute it. The scaled_data on the other hand has to be computed for # each widget separately because of different # jitter_continuous and jitter_size values if getCached(data, "visualizationData"): self.original_data, self.no_jittering_scaled_data, self.valid_data_array = getCached( data, "visualizationData") else: no_jittering_data = np.c_[full_data.X, full_data.Y].T valid_data_array = ~np.isnan(no_jittering_data) original_data = no_jittering_data.copy() for index in range(len(data.domain)): attr = data.domain[index] if attr.is_discrete: # see if the values for discrete attributes have to be resorted variable_value_indices = get_variable_value_indices( data.domain[index], sort_values_for_discrete_attrs) if 0 in [ i == variable_value_indices[attr.values[i]] for i in range(len(attr.values)) ]: # make the array a contiguous, otherwise the putmask # function does not work line = no_jittering_data[index].copy() indices = [ np.where(line == val, 1, 0) for val in range(len(attr.values)) ] for i in range(len(attr.values)): np.putmask(line, indices[i], variable_value_indices[attr.values[i]]) no_jittering_data[ index] = line # save the changed array original_data[ index] = line # reorder also the values in the original data no_jittering_data[index] = ( (no_jittering_data[index] * 2.0 + 1.0) / float(2 * len(attr.values))) elif attr.is_continuous: diff = self.domain_data_stat[ index].max - self.domain_data_stat[ index].min or 1 # if all values are the same then prevent division by zero no_jittering_data[index] = ( no_jittering_data[index] - self.domain_data_stat[index].min) / diff self.original_data = original_data self.no_jittering_scaled_data = no_jittering_data self.valid_data_array = valid_data_array if data: setCached(data, "visualizationData", (self.original_data, self.no_jittering_scaled_data, self.valid_data_array)) # compute the scaled_data arrays scaled_data = self.no_jittering_scaled_data # Random generators for jittering random = np.random.RandomState(seed=self.jitter_seed) rand_seeds = random.random_integers(0, 2**30 - 1, size=len(data.domain)) for index, rseed in zip(list(range(len(data.domain))), rand_seeds): # Need to use a different seed for each feature random = np.random.RandomState(seed=rseed) attr = data.domain[index] if attr.is_discrete: scaled_data[index] += (self.jitter_size / (50.0 * max(1, len(attr.values)))) * \ (random.rand(len(full_data)) - 0.5) elif attr.is_continuous and self.jitter_continuous: scaled_data[index] += self.jitter_size / 50.0 * ( 0.5 - random.rand(len(full_data))) scaled_data[index] = np.absolute( scaled_data[index]) # fix values below zero ind = np.where(scaled_data[index] > 1.0, 1, 0) # fix values above 1 np.putmask(scaled_data[index], ind, 2.0 - np.compress(ind, scaled_data[index])) self.scaled_data = scaled_data[:, :len_data]
def set_data(self, data, subset_data=None, **args): if args.get("skipIfSame", 1): if checksum(data) == checksum(self.raw_data) and \ checksum(subset_data) == checksum(self.raw_subset_data): return self.domain_data_stat = [] self.attr_values = {} self.original_data = self.original_subset_data = None self.scaled_data = self.scaled_subset_data = None self.no_jittering_scaled_data = self.no_jittering_scaled_subset_data = None self.valid_data_array = self.valid_subset_data_array = None self.raw_data = None self.raw_subset_data = None self.have_data = False self.have_subset_data = False self.data_has_class = False self.data_has_continuous_class = False self.data_has_discrete_class = False self.data_class_name = None self.data_domain = None self.data_class_index = None if data is None: return full_data = self.merge_data_sets(data, subset_data) self.raw_data = data self.raw_subset_data = subset_data len_data = data and len(data) or 0 self.attribute_names = [attr.name for attr in full_data.domain] self.attribute_name_index = dict([(full_data.domain[i].name, i) for i in range(len(full_data.domain))]) self.attribute_flip_info = {} self.data_domain = full_data.domain self.data_has_class = bool(full_data.domain.class_var) self.data_has_continuous_class = \ isinstance(full_data.domain.class_var, ContinuousVariable) self.data_has_discrete_class = \ isinstance(full_data.domain.class_var, DiscreteVariable) self.data_class_name = self.data_has_class and full_data.domain.class_var.name if self.data_has_class: self.data_class_index = self.attribute_name_index[self.data_class_name] self.have_data = bool(self.raw_data and len(self.raw_data) > 0) self.have_subset_data = bool(self.raw_subset_data and len(self.raw_subset_data) > 0) self.domain_data_stat = getCached(full_data, DomainBasicStats, (full_data,)) sort_values_for_discrete_attrs = args.get("sort_values_for_discrete_attrs", 1) for index in range(len(full_data.domain)): attr = full_data.domain[index] if isinstance(attr, DiscreteVariable): self.attr_values[attr.name] = [0, len(attr.values)] elif isinstance(attr, ContinuousVariable): self.attr_values[attr.name] = [self.domain_data_stat[index].min, self.domain_data_stat[index].max] # the original_data, no_jittering_scaled_data and validArray are arrays # that we can cache so that other visualization widgets don't need to # compute it. The scaled_data on the other hand has to be computed for # each widget separately because of different # jitter_continuous and jitter_size values if getCached(data, "visualizationData") and subset_data == None: self.original_data, self.no_jittering_scaled_data, self.valid_data_array = getCached(data, "visualizationData") self.original_subset_data = self.no_jittering_scaled_subset_data = self.valid_subset_data_array = np.array( []).reshape([len(self.original_data), 0]) else: no_jittering_data = np.hstack((full_data.X, full_data.Y)).T valid_data_array = no_jittering_data != np.NaN original_data = no_jittering_data.copy() for index in range(len(data.domain)): attr = data.domain[index] if isinstance(attr, DiscreteVariable): # see if the values for discrete attributes have to be resorted variable_value_indices = get_variable_value_indices(data.domain[index], sort_values_for_discrete_attrs) if 0 in [i == variable_value_indices[attr.values[i]] for i in range(len(attr.values))]: # make the array a contiguous, otherwise the putmask # function does not work line = no_jittering_data[index].copy() indices = [np.where(line == val, 1, 0) for val in range(len(attr.values))] for i in range(len(attr.values)): np.putmask(line, indices[i], variable_value_indices[attr.values[i]]) no_jittering_data[index] = line # save the changed array original_data[index] = line # reorder also the values in the original data no_jittering_data[index] = ((no_jittering_data[index] * 2.0 + 1.0) / float(2 * len(attr.values))) elif isinstance(attr, ContinuousVariable): diff = self.domain_data_stat[index].max - self.domain_data_stat[ index].min or 1 # if all values are the same then prevent division by zero no_jittering_data[index] = (no_jittering_data[index] - self.domain_data_stat[index].min) / diff self.original_data = original_data[:, :len_data] self.original_subset_data = original_data[:, len_data:] self.no_jittering_scaled_data = no_jittering_data[:, :len_data] self.no_jittering_scaled_subset_data = no_jittering_data[:, len_data:] self.valid_data_array = valid_data_array[:, :len_data] self.valid_subset_data_array = valid_data_array[:, len_data:] if data: setCached(data, "visualizationData", (self.original_data, self.no_jittering_scaled_data, self.valid_data_array)) if subset_data: setCached(subset_data, "visualizationData", (self.original_subset_data, self.no_jittering_scaled_subset_data, self.valid_subset_data_array)) # compute the scaled_data arrays scaled_data = np.concatenate([self.no_jittering_scaled_data, self.no_jittering_scaled_subset_data], axis=1) # Random generators for jittering random = np.random.RandomState(seed=self.jitter_seed) rand_seeds = random.random_integers(0, sys.maxsize - 1, size=len(data.domain)) for index, rseed in zip(list(range(len(data.domain))), rand_seeds): # Need to use a different seed for each feature random = np.random.RandomState(seed=rseed) attr = data.domain[index] if isinstance(attr, DiscreteVariable): scaled_data[index] += (self.jitter_size / (50.0 * max(1, len(attr.values)))) * \ (random.rand(len(full_data)) - 0.5) elif isinstance(attr, ContinuousVariable) and self.jitter_continuous: scaled_data[index] += self.jitter_size / 50.0 * (0.5 - random.rand(len(full_data))) scaled_data[index] = np.absolute(scaled_data[index]) # fix values below zero ind = np.where(scaled_data[index] > 1.0, 1, 0) # fix values above 1 np.putmask(scaled_data[index], ind, 2.0 - np.compress(ind, scaled_data[index])) if self.have_subset_data: # Fix all subset instances which are also in the main data # to have the same jittered values ids_to_indices = dict((inst.id, i) for i, inst in enumerate(self.raw_data)) subset_ids_map = [[i, ids_to_indices[s.id]] for i, s in enumerate(self.raw_subset_data) if s.id in ids_to_indices] if len(subset_ids_map): subset_ids_map = np.array(subset_ids_map) subset_ids_map[:, 0] += len_data scaled_data[:, subset_ids_map[:, 0]] = \ scaled_data[:, subset_ids_map[:, 1]] self.scaled_data = scaled_data[:, :len_data] self.scaled_subset_data = scaled_data[:, len_data:]
def DistortAN(self, movechance=.15): ''' Randomly replace atom types. ''' for i in range(0, self.atoms.shape[0]): if (random.uniform(0, 1) < movechance): self.atoms[i] = random.random_integers( 1, PARAMS["MAX_ATOMIC_NUMBER"])
line=fin.readline() line=line.replace("[&R]","") # reformat rate multiplier for STEM line=line.replace("'","") line=line.replace(":0.0;",";") # Remove the 0.0 branch length at the end treeVector.append(line) fin.close() #save the k trees into a file try: treeDirectory = "RandomlyChosenTrees_T1_100k/" os.mkdir(treeDirectory) except OSError: pass #directory already exists treeFileOut = treeDirectory + "Ne" + str(n) + "_t" + str(t) + "_k" + str(k) + "_rep" + str(r) + ".tre" fout=open(treeFileOut, 'w') fout.writelines(treeVector) fout.close() #with open(filename) as f: # automatically closes input file as soon as it's done # fileData=f.readlines() if __name__== '__main__': #K= [2,4,8,16,32,64] randNums = random.random_integers(0, 100, 126) # Picks k random numbers between 1 and 99 with replacement getTrees(100, randNums) #reps=100 #for k in K: #getTrees(k, 100, randNums) #reps=100
def get_list(key, page_size, page_num, param_dict): re_login() page_data = { 'curpage': page_num , 'RecordsPerPage': page_size , 'QueryID': random.random_integers(1, 9) , 'ID': '' , 'turnpage': page_num - 1 if page_num - 1 > 0 else page_num + 1 , 'tpagemode': 'L' , 'Fields': '' , 'DisplayMode': 'listmode' , 'dbPrefix': param_dict['dbPrefix'] , 'PageName': param_dict['pagename'] , 'sorttype': "(FFD,'RANK') desc" , 'isinEn': param_dict['isinEn'] } # 获取查询列表 list_url = 'https://kns.cnki.net/kns/brief/brief.aspx?' + urllib.parse.urlencode(page_data) r_list_doc = session.get(list_url, headers=headers, timeout=global_timeout) r_list_doc.encoding = 'utf-8' log.info(list_url) soup = BeautifulSoup(r_list_doc.text, 'lxml', from_encoding='utf-8') headers['Referer'] = list_url trs = soup.select('.GridTableContent tr') # 去除标题栏 err_cn = 0 for tr in trs[1:]: tds = tr.select('td') # 序号 tr_order = tds[0].text # 标题名 tr_title = tds[1].select('a')[0].text tr_title = tr_title.replace("'", "-") # 作者 tr_authors = "" authors_a = tds[2].select('a') for author_a in authors_a: tr_authors = tr_authors + "_" + author_a.text # 首作 tr_author = "" if len(authors_a) > 0: tr_author = authors_a[0].text tr_author = tr_author.replace("'", "-") # 刊名 from_source = "" if len(tds) > 3: if len(tds[3].select('a')) > 0: from_source = tds[3].select('a')[0].text # 发表时间 tr_time = "" if len(tds) > 4: tr_time = tds[4].text # 被引 tr_db = "" if len(tds) > 5: tr_db = tds[5].text # 下载 https://kns.cnki.net/kns/download.aspx tr_down_url = "" tr_down_title = "" if len(tds) > 6: if len(tds[6].select('a')) > 0: tr_down_url = tds[6].select('a')[0].attrs['href'] tr_down_title = tds[6].select('a')[0].attrs['title'] # 阅读 type = "" if len(tds) > 7: if len(tds[7].select('a')) > 0: type = tds[7].select('a')[0].attrs['title'] if type == "HTML阅读": tr_file_type = ".pdf" elif type == "阅读": tr_file_type = ".caj" else: tr_file_type = "" log.info("文件类型未知,原文类型{}".format(type)) # 输出表格列表 log.info( "{},{},{},{},{},{}".format(tr_order, tr_title, tr_author, tr_file_type, tr_time.strip(), tr_db.strip())) # 文件重复去重 file_will_write = os.path.join(file_dir, tr_title) if check_if_preserve(tr_title, tr_author): log.info('\t文章没有权限下载,继续下一个 ... {}'.format(tr_down_title)) time.sleep(15) continue if check_before_download(tr_title, tr_author, from_source): log.info('\t文件不存在,开始下载 ... {}'.format(file_will_write)) article_url = 'https://kns.cnki.net' + tds[1].select('a')[0].attrs['href'] article_response = session.get(article_url, headers=headers, timeout=global_timeout) article_soup = BeautifulSoup(article_response.text, 'lxml', from_encoding='utf-8') pdf_down = article_soup.select_one("#pdfDown") # 有pdf下载按钮才会触发 if pdf_down: download_url = pdf_down.attrs['href'] if not str(download_url).startswith("http"): download_url = 'https://kns.cnki.net' + download_url if str(download_url).startswith("https://chkdx.cnki.net"): log.info('\tpdf下载链接无权限 ... 文章链接{}'.format(download_url)) else: log.info('\t下载链接 ... {}'.format(download_url)) try: download(tr_title, tr_author, download_url, tr_authors) except: log.error(traceback.format_exc()) log.error("下载失败: {0},{1}".format(tr_title, download_url)) err_cn = err_cn + 1 if err_cn >= 10: exit() time.sleep(15) else: log.info('\t无pdf下载链接 ... 文章链接{}'.format(article_url))
def get_list(key, page_num, param_dict): page_data = { 'curpage': page_num , 'RecordsPerPage': '20' , 'QueryID': random.random_integers(1, 9) , 'ID': '' , 'turnpage': page_num - 1 if page_num - 1 > 0 else page_num + 1 , 'tpagemode': 'L' , 'Fields': '' , 'DisplayMode': 'listmode' , 'dbPrefix': param_dict['dbPrefix'] , 'PageName': param_dict['pagename'] , 'sorttype': "(FFD,'RANK') desc" , 'isinEn': param_dict['isinEn'] } # 获取查询列表 list_url = 'http://kns.cnki.net/kns/brief/brief.aspx?' + urllib.parse.urlencode(page_data) r_list_doc = session.get(list_url, headers=headers) r_list_doc.encoding = 'utf-8' # print(r_list_doc.text) print(list_url) soup = BeautifulSoup(r_list_doc.text, 'lxml', from_encoding='utf-8') headers['Referer'] = list_url trs = soup.select('.GridTableContent tr') # 去除标题栏 for tr in trs[1:]: tds = tr.select('td') # 序号 tr_order = tds[0].text # 标题名 tr_title = tds[1].select('a')[0].text # 作者 tr_authors = "" authors_a = tds[2].select('a') for author_a in authors_a: tr_authors = tr_authors + "_" + author_a.text # 首作 tr_author = "" if len(authors_a) > 0: tr_author = authors_a[0].text # 发表时间 tr_time = tds[4].text # 数据库 tr_db = tds[5].text # 下载链接 http://kns.cnki.net/kns/download.aspx tr_down_url = tds[7].select('a')[0].attrs['href'] # 文件类型 type = tds[8].select('a')[0].attrs['title'] if type == "HTML阅读": tr_file_type = ".pdf" elif type == "阅读": tr_file_type = ".caj" else: tr_file_type = "" log.info("文件类型未知,原文类型{}".format(type)) # 输出表格列表 log.info("{},{},{},{},{},{}".format(tr_order, tr_title, tr_author, tr_file_type, tr_time.strip(), tr_db.strip())) # 文件重复去重 file_will_write = os.path.join(file_dir, tr_title) if_down = True # 去掉包含关键字的题目 key_ignore = ["总目次", "索引", "总目录"] for key_i in key_ignore: if key_i in tr_title: log.info('\t当前文章标题包含关键字 {} ,已忽略下载'.format(key_i)) if_down = False break # 相同网站文件重复去重-标题名加作者 if tr_title+"_"+tr_author in files_m: log.info('\t文件已存在当前网站目录列表 ... {}'.format(os.path.join(file_dir, tr_title))) if_down = False # 不同网站重复去重-根据标题 if tr_title in other_list: log.info('\t文件已存在其他网站目录列表 ... {}'.format(os.path.join(file_dir, tr_title))) if_down = False # for f in file_dir_files: # if f.startswith(tr_title): # print('\t{},{}'.format(f, tr_title)) # print('\t文件已存在 ... {}'.format(os.path.join(file_dir, f))) # if_down = False # with open(file_m, "a") as fm: # fm.write(tr_title + "," + os.path.join(file_dir, f) + "\n") # continue if if_down: log.info('\t文件不存在,开始下载 ... {}'.format(file_will_write)) download_url = 'http://kns.cnki.net/kns' + tr_down_url[2:] + '&dflag=pdfdown' log.info('\t下载链接 ... {}'.format(download_url)) download(tr_title, tr_author, download_url) time.sleep(6)