def sim_red_data(reds, gains=None, shape=(10, 10), gain_scatter=.1): """ Simulate noise-free random but redundant (up to differing gains) visibilities. Args: reds: list of lists of baseline-pol tuples where each sublist has only redundant pairs gains: pre-specify base gains to then scatter on top of in the {(index,antpol): np.array} format. Default gives all ones. shape: tuple of (Ntimes, Nfreqs). Default is (10,10). gain_scatter: Relative amplitude of per-antenna complex gain scatter. Default is 0.1. Returns: gains: true gains used in the simulation in the {(index,antpol): np.array} format true_vis: true underlying visibilities in the {(ind1,ind2,pol): np.array} format data: simulated visibilities in the {(ind1,ind2,pol): np.array} format """ data, true_vis = {}, {} ants = list( set([ ant for bls in reds for bl in bls for ant in [(bl[0], bl[2][0]), (bl[1], bl[2][1])] ])) if gains is None: gains = {} else: gains = deepcopy(gains) for ant in ants: gains[ant] = gains.get(ant, 1 + gain_scatter * noise( (1, ))) * np.ones(shape, dtype=np.complex) for bls in reds: true_vis[bls[0]] = noise(shape) for (i, j, pol) in bls: data[(i, j, pol)] = true_vis[bls[0]] * gains[(i, pol[0])] * gains[ (j, pol[1])].conj() return gains, true_vis, data
def TaylorSeries(H,t1,t2,order,noise,args=()): '''Return an estimate of H using the Taylor expansion method to the specified order. H : exact Hamiltonian underlying the unitary evolution t1 : first measurement of U t2 : second measurement of U order : order of Taylor series noise : noise function to apply to unitaries args : arguments to pass to noise function return : (numpy array) estimate of Hamiltonian''' dim=H.shape[0] dt=t2-t1 # t1 : initial time U_init=noise(expi(H,t1),*args) # t2 : final time U_final=noise(expi(H,t2),*args) # propagator between t1 and t2 U=numpy.dot(U_final,dagger(U_init)) identity=Identity(dim) A=identity-U An=A # zeroth order term logU=numpy.zeros((dim,dim),dtype=complex) for n in range(1,order+1): logU=logU-An/float(n) An=numpy.dot(An,A) G=1j*logU/dt return G
def make_hera_obs(aa, lsts=DEFAULT_LSTS, fqs=DEFAULT_FQS, pols=['xx', 'yy'], T_rx=150., inttime=10.7, rfi_impulse=.02, rfi_scatter=.001, nsrcs=200, gain_spread=.1, dly_rng=(-20, 20), xtalk=3.): info = hera_cal.omni.aa_to_info(aa) reds = info.get_reds() ants = list( set([ ant for bls in reds for bl in bls for ant in [(bl[0], bl[2][0]), (bl[1], bl[2][1])] ])) data, true_vis = {}, {} if gains is None: gains = {} else: gains = deepcopy(gains) for ant in ants: gains[ant] = gains.get(ant, 1 + gain_scatter * noise( (1, ))) * np.ones(shape, dtype=np.complex) for bls in reds: true_vis[bls[0]] = noise(shape) for (i, j, pol) in bls: data[(i, j, pol)] = true_vis[bls[0]] * gains[(i, pol[0])] * gains[ (j, pol[1])].conj() return gains, true_vis, data
def document_features(self, document): print "document_features entered" document_words = set(document) features = {} #for word in document: #features['contains(%s)' % word] = (word in document_words) #print document f=open("xyz.txt","wb") for word in document: f.writelines(word+" ") f.close() de=open("xyz.txt","rb") doc=de.read() de.close() print "ie_process called" ie_preprocess(doc,self.t,self.chunker) noise() g=open("efgh.txt","rb") st=g.readline() while st!="": features['contains(%s)' % st] = True st=g.readline() g.close() return features
def Derivative(H,t,dt,noise,args=()): '''Return an estimate of H using the (3 point) time derivative method H : exact Hamiltonian underlying the unitary t : central time at which measurements are taken dt : timestep noise : noise function to apply to unitaries args : args to pass to noise function return : numpy array containing estimate of Hamiltonian''' # t : Central timestep U=noise(expi(H,t),*args) # t-h Uminus=noise(expi(H,t-dt),*args) # t+h Uplus=noise(expi(H,t+dt),*args) return (0.5j/dt)*numpy.dot((Uplus-Uminus),dagger(U))
def createStrata(heightMap, height=64, noise=lambda x, y: pnoise2(x, y, octaves=8)): width, depth = heightMap.shape block = Chunk(np.full((width, depth, height), AIR), waterLevel=heightMap.metadata["waterLevel"]) for i, x in enumerate(tqdm(np.linspace(0, 1, num=width), "Soiling...")): for j, z in enumerate(np.linspace(0, 1, num=depth)): dirtThickness = noise(x, z) / 24 - 4 dirtTransition = heightMap[i, j] stoneTransition = dirtTransition + dirtThickness for y in range(height): if y == 0: blockType = LAVA elif y <= stoneTransition: blockType = STONE elif y <= dirtTransition: blockType = DIRT else: blockType = AIR block[i, j, y] = blockType return block
def createSurfaceLayer(block, heightMap, noise=lambda x, y: pnoise2(x, y, octaves=8)): width, depth, height = block.shape waterLevel = block.metadata["waterLevel"] for i, x in enumerate(tqdm(np.linspace(0, 1, num=width), "Growing...")): for j, z in enumerate(np.linspace(0, 1, num=depth)): sandChance = noise(x, z) > 8 gravelChance = noise(x, z) > 12 y = heightMap[i, j] blockAbove = block[i, j, y + 1] if blockAbove == WATER and gravelChance: block[i, j, y] = GRAVEL elif blockAbove == AIR: if y <= waterLevel and sandChance: block[i, j, y] = SAND else: block[i, j, y] = GRASS return block
binary = bin(num)[2:].zfill(numBit) print(binary + " convert To: " + channelCoding(binary)) print("\n") if __name__ == '__main__': Nodes = generateNodes() HuffmanCode = generateHuffman(Nodes) decoder = HuffmanDecoder(HuffmanCode) plainText = "alirezazarenejad" cipherText = sourceCoding(HuffmanCode, plainText) print("encode huffman code is: " + cipherText) codeWord = channelCoding(cipherText) print("channelCoding: " + codeWord) decodedWord = channelDecoding(codeWord) print("decoded word:" + decodedWord) decodedData = destinationDecoding(decoder, decodedWord) print("decodedData : ", decodedData) print("\n\nwith noise:") noisyCodeWord = noise(codeWord) print("noisy codeword: " + noisyCodeWord) decodedWordNoisy = channelDecoding(noisyCodeWord) print("decoded word:" + decodedWordNoisy) decodedDataNoisy = destinationDecoding(decoder, decodedWordNoisy) print("decodedData : ", decodedDataNoisy)
def detect(filename, folder, file_no): # print 'in' img = cv2.imread(filename) img = cv2.medianBlur(img, 1) #smothing image # extracting white characters image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) imgray = crop(image) # imgray = image imgray = cv2.resize(imgray, (880, 100)) (h, w) = imgray.shape (img_h, img_w) = (h, w) drawing = noise(imgray, imgray.shape, 1000, 25000) cv2.imshow('win1', drawing) cv2.waitKey() cv2.imwrite('E:\Results\\res.jpg', drawing) # character recorgnition image = cv.LoadImage("E:\Results\\res.jpg", cv.CV_LOAD_IMAGE_GRAYSCALE) lang = ['eng'] result = 0 for i in lang: data = pytesser.iplimage_to_string(image, i, pytesser.PSM_SINGLE_LINE, makebox=True) # print data word = '' section = [] #all the characters and coordinates in a list line = [] for i in data: if i == ' ' or i == '\n': line.append(word) word = '' elif (i != '\n'): word += i if i == '\n': section.append(line) line = [] coord = [] chars = [] #coordinates of the characters count = 0 for i in section: for j in i: # print count if j.isdigit() and count < 5 and count > 0: coord.append(int(j)) count += 1 count = 0 chars.append(coord) coord = [] # removing all symbols dele = 0 delete = [] for i in section: if not i[0].isdigit() and not i[0].isalpha(): delete.append(dele) dele += 1 delete.reverse() for i in delete: section.pop(i) chars.pop(i) c = 0 point = [] for i in chars: cv2.rectangle(drawing, (i[0], i[1]), (i[2], i[3]), (255, 255, 255), 2) cv2.imshow('rec', drawing) cv2.waitKey(1) cv2.imwrite('E:\\report\\res.jpg', drawing) counter = 0 line_coordinates = [0] if len(chars) == 1: for i in chars: if i[0] > 400: line_coordinates.append(i[0] / 2) line_coordinates.append(i[0]) # for debugging, draws box using coordinates received from make box function for i in chars: counter += 1 if c == 0: point.append(i[2]) c = 1 elif c == 1: point.append(i[0]) line = ((point[0] + point[1]) / 2) line_coordinates.append(line) cv2.line(drawing, (line, img_h), (line, 0), (255, 255, 255), 2) c = 0 point = [] if counter == len(chars): if img_w - i[2] < 150: line_coordinates.append(img_w) else: line_coordinates.append(i[2]) line_coordinates.append(img_w) cv2.line(drawing, (i[2], img_h), (i[2], 0), (255, 255, 255), 2) # spliting image and processing iterator = 1 id = '' while iterator < len(line_coordinates): roi = imgray[0:img_h, line_coordinates[iterator - 1]:line_coordinates[iterator]] iterator += 1 corrected = noise(roi, roi.shape, 300, 20000) cv2.imwrite('E:\Results\\res.jpg', corrected) cv2.imshow('win1', corrected) cv2.waitKey(1) image = cv.LoadImage("E:\Results\\res.jpg", cv.CV_LOAD_IMAGE_GRAYSCALE) data = pytesser.iplimage_to_string(image, 'enm', 7) # print data for i in data: if i == '!': i = '1' if i.isalpha() or i.isdigit(): if i == 'O' or i == 'o': i = '0' if i == 'L' or i == 'l': i = '1' if i == 'i' or i == 'I': i = '1' id = id + i # print id if id != '': print id sol = check(id) final = sol[0] alternative = sol[1] possible = sol[2] else: return None # print sol if not final: os.chdir(folder) filename = 'error' + str(file_no) img_name = filename + '.jpg' txt_name = filename + '.txt' cv2.imwrite(img_name, img) if len(alternative) > 1: #storing alternatives in a text file f = open(txt_name, 'w') for i in alternative: f.write(i + '\n') f.close() return None else: return possible
#!/usr/bin/python import sys import re from Bio import AlignIO from noise import * for i in range(1, len(sys.argv)): k, align, l, m = noise(sys.argv[i]) for record in align: out = '' for i in k: out += str(record.seq[i]) sys.stdout.write(out + '\n') if len(k) > 0: sys.stderr.write(str(l) + ' columns were removed from this alignment\n') elif len(m) == 0: sys.stderr.write('Error: Empty file\n') elif len(k) == 0: sys.stderr.write('All columns were removed from this alignment\n')
from noise import * import os import cv2 if __name__ == '__main__': path = 'C:/Users/Steffany/Documents/Javeriana/Semestre 9/Procesamiento de Imagenes/imagenes' image_name = 'lena.png' path_file = os.path.join(path, image_name) # se crea la ruta de la imagen image = cv2.imread(path_file) imagegris = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) cv2.imshow('image', imagegris) # se muestra la imagen lena en grises cv2.waitKey(0) ns = noise("s&p",imagegris.astype(np.float)/255) # se llama la clase noise con la imagen lena en grises y el ruido s&n ns = noise("gauss",imagegris.astype(np.float)/255) # se llama la clase noise con la imagen lena en grises y el ruido gauss
def detect(filename, folder, file_no): # print 'in' img = cv2.imread(filename) img = cv2.medianBlur(img, 1) #smothing image # extracting white characters image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) imgray = crop(image) # imgray = image imgray = cv2.resize(imgray, (880,100)) (h,w) = imgray.shape (img_h,img_w) = (h,w) drawing = noise(imgray, imgray.shape, 1000, 25000) cv2.imshow('win1', drawing) cv2.waitKey() cv2.imwrite('E:\Results\\res.jpg', drawing) # character recorgnition image = cv.LoadImage("E:\Results\\res.jpg", cv.CV_LOAD_IMAGE_GRAYSCALE) lang = ['eng'] result = 0 for i in lang: data = pytesser.iplimage_to_string(image, i, pytesser.PSM_SINGLE_LINE,makebox=True) # print data word = '' section = [] #all the characters and coordinates in a list line = [] for i in data: if i == ' ' or i=='\n': line.append(word) word = '' elif (i!='\n'): word+=i if i == '\n': section.append(line) line = [] coord = [] chars = [] #coordinates of the characters count = 0 for i in section: for j in i: # print count if j.isdigit() and count < 5 and count > 0: coord.append(int(j)) count += 1 count = 0 chars.append(coord) coord = [] # removing all symbols dele= 0 delete=[] for i in section: if not i[0].isdigit() and not i[0].isalpha(): delete.append(dele) dele += 1 delete.reverse() for i in delete: section.pop(i) chars.pop(i) c = 0 point = [] for i in chars: cv2.rectangle(drawing,(i[0],i[1]),(i[2],i[3]),(255,255,255),2) cv2.imshow('rec', drawing) cv2.waitKey(1) cv2.imwrite('E:\\report\\res.jpg', drawing) counter = 0 line_coordinates = [0] if len(chars) == 1: for i in chars: if i[0] > 400: line_coordinates.append(i[0]/2) line_coordinates.append(i[0]) # for debugging, draws box using coordinates received from make box function for i in chars: counter += 1 if c == 0: point.append(i[2]) c = 1 elif c == 1: point.append(i[0]) line = ((point[0] + point[1]) / 2) line_coordinates.append(line) cv2.line(drawing,(line,img_h),(line,0),(255,255,255),2) c = 0 point = [] if counter == len(chars): if img_w - i[2] < 150: line_coordinates.append(img_w) else: line_coordinates.append(i[2]) line_coordinates.append(img_w) cv2.line(drawing,(i[2],img_h),(i[2],0),(255,255,255),2) # spliting image and processing iterator = 1 id = '' while iterator < len(line_coordinates): roi = imgray[0:img_h, line_coordinates[iterator-1]:line_coordinates[iterator]] iterator += 1 corrected = noise(roi, roi.shape, 300, 20000) cv2.imwrite('E:\Results\\res.jpg', corrected) cv2.imshow('win1', corrected) cv2.waitKey(1) image = cv.LoadImage("E:\Results\\res.jpg", cv.CV_LOAD_IMAGE_GRAYSCALE) data = pytesser.iplimage_to_string(image, 'enm', 7 ) # print data for i in data: if i == '!': i = '1' if i.isalpha() or i.isdigit(): if i == 'O' or i =='o': i = '0' if i == 'L' or i == 'l': i = '1' if i == 'i' or i == 'I': i = '1' id = id + i # print id if id != '': print id sol = check(id) final = sol[0] alternative = sol[1] possible = sol[2] else: return None # print sol if not final: os.chdir(folder) filename = 'error' + str(file_no) img_name = filename + '.jpg' txt_name = filename + '.txt' cv2.imwrite(img_name, img) if len(alternative) > 1: #storing alternatives in a text file f = open(txt_name, 'w') for i in alternative: f.write(i+'\n') f.close() return None else: return possible
#!/usr/bin/python import sys import re from Bio import AlignIO from noise import * for i in range(1,len(sys.argv)): k,align,l,m = noise(sys.argv[i]) for record in align: out = '' id = '' for i in k: out += str(record.seq[i]) sys.stdout.write('>' + record.id + '\n' + out +'\n') if len(k) > 0: #show how many columns are removed from the alignment sys.stderr.write(str(l)+' columns were removed from this alignment\n') elif len(m) == 0: #error message when there is no record in the given alignment file sys.stderr.write('Error: Empty file\n') elif len(k) == 0: #return a message if all columns are removed sys.stderr.write('All columns were removed from this alignment\n')
def main(): ## Parse footprint descriptor #fd = parseFD(fd_name) fd = genArbitFD() ## Compute stack distance distribution SD = sd(fd) ## Obj size dst obj_dst = obj_size("data/akamai1.bin.sizeCntObj.json") object_sizes = defaultdict(lambda : 0) for i in range(no_objects): sz= random.randint(1, 100)#obj_dst.sample() object_sizes[sz] += 1 all_sizes = list(object_sizes.keys()) count = [] all_sizes.sort() for a in all_sizes: count.append(object_sizes[a]) sum_count = sum(count) obj_sizes = all_sizes size_dst = [float(c)/sum_count for c in count] objects = range(len(obj_sizes)) sz_dict = defaultdict(lambda : 0) o_sizes = copy.deepcopy(obj_sizes) o_sizes.sort() plt.plot(o_sizes) plt.savefig("obj_sz_dst.png") for i in range(len(obj_sizes)): sz_dict[i] = obj_sizes[i] nn = noise(SD, obj_sizes, size_dst) nn.modelNoise() trace, trace_prop, obj_hit_dst = generate_trace(SD, sz_dict) trace_count = defaultdict(lambda : 0) for t in trace_prop[4000]: trace_count[t] += 1 trace_keys = list(trace_count.keys()) trace_keys.sort() trace_vals = [] for t in trace_keys: trace_vals.append(trace_count[t]) sum_vals = sum(trace_vals) trace_vals = [float(t)/sum_vals for t in trace_vals] plt.clf() plt.plot(trace_keys, trace_vals) plt.xlabel("stack_distance") plt.ylabel("Count") plt.savefig("stack_distance.png") allvals = list(obj_hit_dst.keys()) allvals.sort() plot_vals = [] for a in allvals: plot_vals.append(obj_hit_dst[a]) sum_vals = sum(plot_vals) plt_vals = [float(x)/sum_vals for x in plot_vals] plt.clf() plt.plot(allvals, plt_vals) plt.savefig("ObjInWay_2.png")
def app2(uploaded_file): st.header("Missing value and Noise detection") # missing value def noise(data): l = len(data.columns) col_list = list(data.columns) noise = data.isnull().sum() noisy_col = [] for i in range(l): if(noise[i] != 0): noisy_col.append(col_list[i]) return noisy_col # correct those column def correct(data, noisy_col): imputer = SimpleImputer(strategy="median") for i in range(len(noisy_col)): imputer.fit(data[[noisy_col[i]]]) data[[noisy_col[i]]] = imputer.transform(data[[noisy_col[i]]]) return data # uneccesary data columns def unecessary_col(data, label): l = len(data.columns) col_list = list(data.columns) index = l-1 for i in range(l): if label == col_list[i]: index = i corr_value = [] unused_col = [] for i in range(l): cor_val = data[col_list[index]].corr(data[col_list[i]]) corr_value.append(cor_val) for i in range(l): if(corr_value[i] < 0.05 and corr_value[i] > -0.05): unused_col.append(col_list[i]) if(len(unused_col) == 0): return "No unnecessary column", data return unused_col, data # download the correct dataset def filedownload(df, filename): csv = df.to_csv(index=None) # strings <-> bytes conversions filename = filename+".csv" b64 = base64.b64encode(csv.encode()).decode() href = f'<a href="data:file/csv;base64,{b64}" download={filename}>Download {filename} File</a>' return href # column data noise reduction def noise_reduction(new_data, user_input): Q1 = new_data[user_input].quantile(0.25) Q3 = new_data[user_input].quantile(0.75) IQR = Q3 - Q1 lower_limit = Q1 - 1.5*IQR upper_limit = Q3 + 1.5*IQR mid = (lower_limit + upper_limit)/2 for i in range(len(new_data)): curr_value = new_data.iloc[i][user_input] if(curr_value < lower_limit or curr_value > upper_limit): new_data.at[i, user_input] = random.randint( int(lower_limit), int(upper_limit)) return new_data # noise present or not def noise_check(new_data, user_input): Q1 = new_data[user_input].quantile(0.25) Q3 = new_data[user_input].quantile(0.75) IQR = Q3 - Q1 lower_limit = Q1 - 1.5*IQR upper_limit = Q3 + 1.5*IQR check = False stg = "Noise not present" #mid = (lower_limit + upper_limit)/2 for i in range(len(new_data)): curr_value = new_data.iloc[i][user_input] if(curr_value < lower_limit or curr_value > upper_limit): check = True if(check): stg = "Noise present" break return stg def drop_col(data, string): for i in range(len(string)): data = data.drop([string[i]], axis=1) return data if uploaded_file is not None: df = pd.read_csv(uploaded_file) st.markdown('**1.1. Glimpse of dataset**') st.write(df) df = df.loc[:, ~df.columns.str.match("Unnamed")] for label, content in df.items(): if pd.api.types.is_string_dtype(content): df[label] = content.astype("category").cat.codes + 1 st.markdown("**New prepared dataset**") st.write(df) noise_col = noise(df) if(len(noise_col) == 0): st.success("No missing value present") else: st.warning("These column contain missing values :\n") for i in range(len(noise_col)): st.error(noise_col[i]) col_list = list(df.columns) new_data = correct(df, noise_col) st.markdown(filedownload(new_data, "Corrected_dataset"), unsafe_allow_html=True) # missing values case ends label = st.selectbox("Enter Your label name", col_list) if label != "": string, new_data = unecessary_col(df, label) if(string == "No unnecessary column"): st.success("No unnecessary column") else: st.warning("Unecessary columns are : ") for i in range(len(string)): st.error(string[i]) # Drop unnecessary columns if string != "No unnecessary column": drop = st.checkbox("Drop unncessary column") if drop: new_data = drop_col(new_data, string) st.write(new_data) st.markdown(filedownload(new_data, "New_corrected_dataset"), unsafe_allow_html=True) agree = st.checkbox("Detect column noise") col_list = list(df.columns) if agree: a = 0 st.markdown("Dataset anamolies") user_input = st.selectbox("Select the column", col_list) if user_input != "": if user_input not in col_list: st.write("Enter valid column") else: # user_input = int(user_input) plt.figure(figsize=(9, 3)) sns.set_theme(style="whitegrid") ax = sns.boxplot(x=new_data[user_input]) # ax.set(ylim=(0, 1)) # plt.xticks(rotation=90) st.pyplot(plt) stg = noise_check(new_data, user_input) if(stg == "Noise not present"): st.success(stg) else: st.error(stg) a = 1 if a == 1: # noise remove part if st.button("Reduction of noise in that column"): noise_free_data = noise_reduction(new_data, user_input) st.write(new_data) st.markdown(filedownload( noise_free_data, "Noise_reduced_dataset"), unsafe_allow_html=True) if st.button("Reduce noise from all the columns"): for col in col_list: noise_free_data = noise_reduction( new_data, user_input) new_data = noise_free_data st.write(new_data) st.markdown(filedownload( noise_free_data, "Noise_reduced_dataset"), unsafe_allow_html=True) st.markdown("Custom Removing Columns") col_list = list(new_data.columns) l=len(col_list) i=0 c = st.slider('Select How many times you want to drop columns', 0, l, 0) while(c>0): c1=list(new_data.columns) l1 = st.selectbox("Select the column to be dropped", col_list, key=c) c=c-1 if(l1 not in c1): st.write("Column already dropped") else: new_data=new_data.drop(l1, axis=1) st.write(new_data) st.markdown(filedownload(new_data, "New_corrected_dataset"), unsafe_allow_html=True) return new_data
if __name__ == "__main__": ############################################# # 1. Read lena and create noisy lena Images # ############################################# path = "C:/Users/ACER/Desktop/Semestre10/Imagenes/Presentaciones/Semana 6/Imagenes" name = "lena.png" path_name = os.path.join(path, name) #Join path and name lena = cv2.imread(path_name) #Read Image lena = cv2.cvtColor(lena, cv2.COLOR_BGR2GRAY) #Change lena to Gray CS #cv2.imshow("lena", lena) #If you want to see the Image. ########################################################################################### # 1.1 Generate lena_gauss_noisy & lena_s&p_noisy using noise() provided by Julian Quiroga # ########################################################################################### lena_gauss_noisy = noise("gauss", lena.astype(np.float) / 255) #Generate gaussian noisy lena lena_gauss_noisy = (255 * lena_gauss_noisy).astype(np.uint8) lena_sp_noisy = noise("s&p", lena.astype(np.float) / 255) lena_sp_noisy = (255 * lena_sp_noisy).astype( np.uint8) # Generate Salt and pepper noisy lena #cv2.imshow("lena gauss noise", lena_gauss_noisy) #If you want to see the Image. #cv2.imshow("lena s&p noise", lena_sp_noisy) #If you want to see the Image. ################################# # 1.2 Filter Noisy lena´s with: # ################################# ###########################################