def make_drugs_vs_genes_array(self, plot_heatmap=False): print('making drug vs genes array') dg_arr = np.zeros((len(self.drug_ids.keys()), len(self.gene_ids.keys()))) tot_count = len(self.drugs) for i, (drug, gene) in enumerate(zip(self.drugs, self.genes_with_drugs)): progress(i, tot_count) if np.isnan(self.max_clinic_stage[i]): print(drug, gene) dg_arr[self.drug_ids[drug], self.gene_ids[gene]] = 1#self.max_clinic_stage[i] if plot_heatmap: fig = plt.figure() sns.heatmap(dg_arr) fig.suptitle('drug vs gene array', fontsize=14) plt.xlabel('gene id', fontsize=12) plt.ylabel('drug id', fontsize=12) fig.savefig('./drugsVsGenes.png') self.drugs_genes_array = dg_arr with open('drugs_vs_genes.pickle', 'wb') as f: pickle.dump(dg_arr, f, protocol=pickle.HIGHEST_PROTOCOL) np.savetxt('drugs_vs_genes.csv', dg_arr, delimiter=',')
def main(): data = {} host_list = build_host_list(args) print("================================================") print("Checking Live Host and Alternate Names -> Shodan") print("================================================\n") incrementer = 0 for host in host_list: try: r = requests.get("https://www.shodan.io/host/%s" % host) if r.status_code == 200: if host not in data: data[host] = [] alternates = check_alternate_names(r.text) if bool(alternates): for alt in alternates: data[host].append(alt) except: continue incrementer += 1 progress.progress(incrementer, len(host_list)) with open(args.output, "w") as handle: handle.write(json.dumps(data, indent=4))
def handle1(data): # data = np.fromstring(data, dt) # print(data) global prog prog += len(data) progress(prog) f.write(data)
def retrieve_export_request(scan_list): increment = 0 print("[*] Save Directory: %s" % PATH) print("[*] Formats => " + str(FORMATS)) print("[*] Downloading Files....") for name, id in scan_list.items(): for format in FORMATS: if format == "nessus" or format == "csv": export_request = json.loads( requests.post( NESSUS_INSTANCE + "/scans/%s/export" % str(id), headers=HEADERS, verify=False, data=json.dumps({"format": format})).content.decode()) elif format == "pdf" or format == "html": export_request = json.loads( requests.post(NESSUS_INSTANCE + "/scans/%s/export" % str(id), headers=HEADERS, verify=False, data=json.dumps({ "format": format, "chapters": "vuln_hosts_summary" })).content.decode()) export_request_check(name, id, export_request, format) ''' Show Progress ''' increment += 1 progress.progress(increment, len(FORMATS) * len(scan_list))
def p(self, epoch, mb, NBatches, mul=None): pstring = "{:>03d} {:>05d}/{:>05d}, " #pstring += "{} since save. " #pstring += "{} since load. " #pstring += "{} loaded, " pstring += "{} saved. " #if self.model.loadmomentum: #pstring += "Restore momentum." #else: #pstring += "Discard momentum." pstring += " Success Combo?!? -->" mul = self.NSaves if mul is None else mul den = self.combolength * mul if self.combo >= den: den = self.combolength num = self.combo % den if num == 0: p = 1.0 else: p = 1.0*num / den progress.progress(p, pstring.format(epoch, mb, NBatches, #self.sincesave, #self.sinceload, #self.failures, self.saves, ))
def comparison(self, name, func, num_checks, input): checked = 0 out = collections.defaultdict(lambda: collections.defaultdict(list)) futures = {} with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: # Start the read operations and mark each future with its filepath for path, values in input.items(): for mods in values.values(): # don't bother if last round didn't have duplicates if len(mods) <= 1: continue for mod in mods: filepath = os.path.join(self.base, mod, path) futures[executor.submit(func, filepath)] = (mod, path) for future in concurrent.futures.as_completed(futures): mod, path = futures[future] key = future.result() out[path][key].append(mod) checked += 1 progress(checked, num_checks, name) dupes = 0 with open('_%s.txt', 'w') as f: with open('_%s_conflicts.txt', 'w') as fc: for path, values in out.iteritems(): for key, mods in values.iteritems(): key_str = ','.join([path, str(key)] + mods) + '\n' f.write(key_str) if len(mods) > 1: dupes += len(mods) fc.write(key_str) return dupes, out
def add_reactassoc_relations(self, weight=0.1): f_in = open(REACTASSOC_DATA, 'r') contents = f_in.readlines() f_in.close() tot_count = len(contents) for i, ln in enumerate(contents): progress(i, tot_count) ln = ln.replace('\n', '') primary_gene = ln.split('\t')[0] other_genes = ln.split('\t')[1:] try: prim_gene_id = self.gene_ids[primary_gene] except KeyError: print('Primary gene not found.. skipping') continue contrib_from_primary = self.genes_vs_disease_array[prim_gene_id, :] for j, other_gene in enumerate(other_genes): if j > 2: break try: oth_gene_id = self.gene_ids[other_gene] except KeyError: continue orig_oth_score = self.genes_vs_disease_array[oth_gene_id, :] average_score = np.mean([contrib_from_primary, orig_oth_score], axis=0) self.genes_vs_disease_array[oth_gene_id, :] = average_score*weight + (1-weight)*orig_oth_score
def cve_plugin_search(plugins, inputname=""): check_auth() plugin_id_list = [] cve_to_lower() cve_data_dict = {} print("") if CONFIG_MODE == True: print( bcolors.OKGREEN + "[+]" + bcolors.ENDC + " Parsing each plugin for possible match on CVEs for CVE FILE: %s" % os.path.basename(inputname)) else: print(bcolors.OKGREEN + "[+]" + bcolors.ENDC + " Parsing each plugin for possible match on CVEs") for count in range(len(plugins)): details = json.loads( requests.get("https://%s:8834/plugins/plugin/%d" % (host, int(plugins[count])), headers=headers, verify=False).content) for i in range(len(details['attributes'])): if details['attributes'][i]['attribute_value'].lower() in cve_list: if details["attributes"][i]["attribute_value"].upper( ) in cve_data_dict: if plugins[count] in cve_data_dict[details["attributes"][ i]["attribute_value"].upper()]: continue else: cve_data_dict[details["attributes"][i][ "attribute_value"].upper()][plugins[count]] = {} cve_data_dict[ details["attributes"][i]["attribute_value"].upper( )][plugins[count]]["name"] = details["name"] cve_data_dict[details["attributes"][i] ["attribute_value"].upper()][ plugins[count]]["hosts"] = [] else: cve_data_dict[details["attributes"][i] ["attribute_value"].upper()] = {} cve_data_dict[details["attributes"][i]["attribute_value"]. upper()][plugins[count]] = {} cve_data_dict[details["attributes"][i] ["attribute_value"].upper()][ plugins[count]]["name"] = details["name"] cve_data_dict[details["attributes"][i]["attribute_value"]. upper()][plugins[count]]["hosts"] = [] progress.progress(count, len(plugins) - 1) print("\n") return json.dumps(cve_data_dict)
def naive_bayes(cache_model): print("Running Naive-Bayes Classifier Training") idx = 0 total = len(data) start_progress("Pre-processing {} of data".format(total)) for gender, comment in data: idx += 1 word_exist = {} word_not_exist = {} if gender not in list_of_gender: list_of_gender.append(gender) for word in word_tokenize(comment): word_exist[word] = True word_not_exist[word] = False if word not in list_of_words: list_of_words.append(word) for gen in list_of_gender: if gen == gender: train_data_gender.append((word_exist, gen)) else: train_data_gender.append((word_not_exist, gen)) progress(idx / total * 100) end_progress() print("\nFinished pre-processing ({} data)".format(total)) print("Training {} gender data".format(total)) main_gender_classifier = NaiveBayesClassifier.train(train_data_gender) if cache_model: cache.cache_model(main_gender_classifier, "model/gender_classifier_{}.p".format(total)) print("Cross validation") average_accuracy = 0 size = len(train_data_gender) for i in range(1, 9): test_set = train_data_gender[round((i - 1) * size / 8):round((i) * size / 8)] training_set = train_data_gender[0:round((i - 1) * size / 8)] training_set.extend(train_data_gender[round((i) * size / 8):]) gender_classifier = NaiveBayesClassifier.train(training_set) print("Test-{0}: {1:.2%}".format( i, classify.accuracy(gender_classifier, test_set))) average_accuracy += classify.accuracy(gender_classifier, test_set) average_accuracy /= 8 print("Average accuracy: " + "{0:.2%}\n".format(average_accuracy)) return main_gender_classifier
def calculateHashes(filespaths): fileInfos = [] total = len(filespaths) i = 0 for filepath in filespaths: i += 1 progress.progress(i, total, "Calculating Hashes") fileInfos.append({"filepath":filepath,"digest":calculateHash(filepath)}) return fileInfos
def Menu(): choice = input("What you option you want to use?\n1 = history\n2 = progress\n3 = main\n") if choice == "1": History() elif choice == "2": progress() else: Main()
def playHalf(p1, p2, numG, games): t = TicTacToe(p1, p2, verbose=self.VERBOSE) result = np.zeros(3) for g in range(numG): if g % math.ceil(0.05 * numGames) == 0: progress(games + g, numGames, status="Playing Games") print() winner = t.play() t.reset() result[winner + 1] += 1 return result
def merge(pdfs, course_code): print('\n') progress(0, 1, status='merging '+course_code) from PyPDF2 import PdfFileMerger merger = PdfFileMerger() for pdf in pdfs: merger.append(pdf) merger.write(course_code+'\\'+'merged.pdf') progress(1, 1, status='merged '+course_code) print('\n')
def conv2_mexh_var(patch, scales, dx): import progress import numpy as np from conv2_mexh import conv2_mexh patch[patch == -9999.0] = 0 patch[patch == -32767] = 0 patch[np.isnan(patch)] = 0 [nrows, ncols] = np.shape(patch) nodes = nrows * ncols #Normalize patch to have unit variance patch = patch/np.nanstd(patch) #initialize the output vectors: Vcwt = np.zeros((1,np.size(scales))) #Determine extent of edge effecst at largest wavelt scale sampled. NaN values will be assigned to the fringe of each C grid in the loop so that the same number of nodes are used at each scale for determining Vcwt: fringeEval = np.ceil(4*np.max(scales)) #start counter k = 0 for a in scales: progress.progress(a,np.max(scales),'Doing long job') #update counter #Compute the 2D CWT by calling Conv2_mexh function (below) C = conv2_mexh(patch,a,dx) # Mask edge effects with naN (no Data) C[(np.arange(0,fringeEval)).astype(int),:] = np.NaN C[:,(np.arange(0,fringeEval)).astype(int)] = np.NaN C[np.arange((nrows-fringeEval),nrows).astype(int),:] = np.NaN C[:,(np.arange(ncols-fringeEval,ncols)).astype(int)] = np.NaN #find NaNs and replace with 0 ind = np.argwhere(np.isnan(C)) C[np.isnan(C)] = 0 #now calculate the wavelet variance at current scale, using number of real-valued nodes Vcwt[0,k] = 1/(2*(nodes - ind.shape[0]))*np.sum(np.sum(C**2,1),0) #frequency and wavelegth vectors wave = 2*np.pi*dx*scales/(5/2)**(1/2) frq = 1/wave k = k +1 return(Vcwt,frq,wave)
def make_results(self): lap_start = time.time() for i, result in enumerate(self.pool): lap_start = progress(i, lap_start, self.input_len, self.start_time, self.message) yield result progress(i, lap_start, self.input_len, self.start_time, self.message, done=True)
def sync(D,M,DM,idc=True,stub=False): for dest in D: names = [] for d in D: if d != dest: names += list(d.N2A) if not names: print "Unchanged: %s" % dest.bin continue b = fileutil.change_ext(dest.bin, "") progress.progress("Sync'ing %s with the others..." % dest.bin) if idc: newnames_idc = open("%s-new.idc" % b, "w") existingnames_idc = open("%s-existing.idc" % b, "w") print >> newnames_idc, idch print >> existingnames_idc, idch if stub: newnames_s = open("%s-new.S" % b, "w") existingnames_s = open("%s-existing.S" % b, "w") A = {} for k,n in enumerate(sorted(names)): progress.progress(float(k) / len(names)) da = [] for d in D: if n in d.N2A: da.append((d, d.N2A[n])) m,s,c,fp = FindBestMatch(da, dest, M, DM) if m: if m in A: if s > A[m][1]: print "better match, replacing" A[m] = (n,s,c,fp) else: A[m] = (n,s,c,fp) print "saving..." for m,(n,s,c,fp) in A.iteritems(): #~ print "NSTUB(%10s, %s)%s // %s" % ("0x%X"%m,n," " * (30-len(n)),c) if m in dest.A2N: #~ print " => already defined as %s" % (dest.A2N[m]) if stub: print >> existingnames_s, "NSTUB(%10s, %s)%s // [already defined as %s] %s" % ("0x%X"%m,n," " * (30-len(n)), (dest.A2N[m]), c) if idc: print >> existingnames_idc, ' MakeName(%10s, "%s");%s // [already defined as %s] %s' % ("0x%X"%m,n," " * (30-len(n)), (dest.A2N[m]), c) else: if stub: print >> newnames_s, "NSTUB(%10s, %s)%s // %s" % ("0x%X"%m,n," " * (30-len(n)),c) if idc: print >> newnames_idc, ' MakeName(%10s, "%s");%s // %s' % ("0x%X"%m,n," " * (30-len(n)),c) if idc: print >> newnames_idc, "}" print >> existingnames_idc, "}"
def download_files(course_links, course_code): local_file_path = [] path = course_code total = len(course_links) i=0 print('\n') msg='downloading.... '+str(i)+'/'+str(total)+' of '+str(total)+' files' i+=1 for url in course_links: filename = url.rsplit('/',1) filename = filename[1].rsplit('.',1) j=0 while os.path.exists(path+'\\'+filename[0]+'.'+filename[1]): filename[0]=filename[0].rsplit('_',1) filename[0]=filename[0][0] filename[0]+='_'+str(j) j+=1 filename=filename[0]+'.'+filename[1] for month in get_range('month'): if re.search(month, url, re.IGNORECASE): filename =month+'_'+filename for year in get_range('year'): if re.search(year, url, re.IGNORECASE): filename =year+'_'+filename pathlib.Path(path).mkdir(parents=True, exist_ok=True) http = urllib3.PoolManager() try: response = http.request('GET', url, preload_content=False, retries=False, timeout=10.0) except urllib3.exceptions.NewConnectionError: msg='Connection failed for url: '+url except urllib3.exceptions.TimeoutError: msg='Connection timed out for url: '+url else: with open(path+'\\'+filename, 'wb') as out: while True: data = response.read(100) if not data: break out.write(data) out.close() local_file_path.append(path+'\\'+filename) response.release_conn() msg='downloaded '+str(i)+'/'+str(total)+' of '+str(total)+' files' progress(i, total, status=msg) i+=1 return local_file_path
def handle(data): global start, expect, prog array = np.fromstring(data, dtype=np.uint8) length = array.size if start: f.write(data) prog += length progress(prog) # if expect >= length: # expect -= length # else: # if expect < 0: # i = -expect # expect = 0 # else: # i = 0 # while expect < length: # while i < 8 and expect < length: # if (i < 4 and array[expect] == 0xfb) or (i >= 4 and array[expect] == 0xff): # expect += 1 # i += 1 # else: # print('*********error*********') # f.close() # sys.exit(1) # if i < 8: # expect = -i # return # else: # expect += 940 # expect -= length else: i = 0 while i < length: if array[i] == 0xfb: expect += 1 if expect == 4: start = True if i < 3: head = '\xfb' * (3 - i) data = head + data else: data = data[i - 3:] expect = 0 handle(data) return else: expect = 0 i += 1
def send_data(): ser = serial.Serial('/dev/ttyUSB0', 57600, timeout=10) string = '\x40' * 95 i = 0 k = 0 while True: i += 1 s = string + format(i, '05d') k += len(s) progress(k) ser.write(s.encode()) # if i == 100: # sleep(100) sleep(0.1) ser.close()
def run(self, start_frame=0, end_frame=None): # Check input video exists. if not os.path.exists(self.input_video_path): raise Exception('Video does not exist!!') # Set up output directory. self._make_output_directory() # Save out frames to output directory. if not end_frame: end_frame = self.total_frames frames_to_get = numpy.arange(start_frame, end_frame, self.frame_step) frame_path_list = [] prog = progress(len(frames_to_get)) for i, frame in enumerate(frames_to_get): prog.update(i) video_frame = self.vs.get_frame_no(frame).image() width, height = video_frame.size new_size = (int(width/self.resize_factor), int(height/self.resize_factor)) video_frame = video_frame.resize(new_size) filename = os.path.join(self.output_path, 'static', 'frame-%06d.jpeg' % frame) relative_path = os.path.join('static', 'frame-%06d.jpeg' % frame) video_frame.save(filename) frame_path_list.append(relative_path) frame_to_get_str_list = [str(frame) for frame in frames_to_get] prog.end() # Make webpage. make_page(self.output_path, 'index', frame_path_list, frame_to_get_str_list)
async def _dump_page(campaign_id, filename, pageno, playerid, is_gm): global session, pages, done response = await session.get( 'https://app.roll20.net/campaigns/chatarchive/{}/?p={}'.format( campaign_id, pageno), allow_redirects=False) async with response: if response.status != 200: raise HTTPError(response.status) parser = ChatParser(filename, playerid, is_gm) async for chunk in response.content.iter_chunked(64 * 1024): encoding = response.charset or 'utf-8' parser.process(chunk.decode(encoding)) parser.finalize() done = done + 1 progress(done, pages)
def make_chunks(is_verbose=False, show_progress=False): """Make chunks of the file passed.""" src, des = rem('grab') # Open the file in binary FILE_STREAM = open(src, 'rb') rem_read_size('register', 104857600) # After registering show progress if show_progress: progress_bar = progress.progress('chunk') progress_bar.start() count = 0 while True: read_size = rem_read_size('grab') READ_CHUNK = FILE_STREAM.read(read_size) # 52428800) # 209715200) count += 1 if not READ_CHUNK: return True if is_verbose: print("Copying chunk: {}".format(count)) copy.copy_chunks(READ_CHUNK, count) # Wait for progress bar to end progress_bar.join()
def send_file(): ser = serial.Serial('/dev/ttyUSB0', 57600, timeout=10) file = open( '/home/longzhou/Data/chaoyangxiyuan/2017-09-08/1504850902_left.dat', 'rb') size = 0 while True: data = file.read(100) if len(data) == 0: break ser.write(data) size += len(data) progress(size) sleep(0.1) ser.close() print("done")
def import_upx(args, opts): """ Import the contents of a upx file. Does not import the upx file itself. Passes a list of imported oids. """ if not args: raise ShellSyntaxError("No files/dir passed") oids = [] newfiles = [] for arg in args: if os.path.isdir(arg): print " - Processing upx files in directory %s" % arg files = sys_utils.get_files_from_directory(arg) p = progress.progress(len(files)) for f in files: uoids, noids = import_upxfile(f) oids.extend(uoids) newfiles.extend(noids) p.tick() elif os.path.isfile(arg): print " - Processing file %s ..." uoids, noids = import_upxfile(arg) oids.extend(uoids) newfiles.extend(noids) else: print " - %s not found" % (arg) print " - Extracted %d files %d are new" % (len(oids), len(newfiles)) return oids
def multiply_arrays(self, plot_heatmap=True, save_pickle=True, operation='mult'): if operation == 'mult': drugsVdisease = np.matmul(self.drugs_genes_array, self.genes_vs_disease_array) # find number of genes linked in chembl per drug num_genes_per_drug = np.sum(self.drugs_genes_array, axis=1) # divide by number of genes per drug to compute an average drug-disease # score across a range of intermediate genes drugsVdisease = drugsVdisease/num_genes_per_drug[:, None] elif operation == 'max': drugsVdisease = np.zeros((self.drugs_genes_array.shape[0], self.genes_vs_disease_array.shape[1])) tot_count = self.drugs_genes_array.shape[0] for i, drug_id in enumerate(range(self.drugs_genes_array.shape[0])): progress(i, tot_count) drug_to_genes = self.drugs_genes_array[drug_id, :] for disease_id in range(self.genes_vs_disease_array.shape[1]): genes_to_disease = self.genes_vs_disease_array[:, disease_id] max_score = np.max(genes_to_disease*drug_to_genes) drugsVdisease[drug_id, disease_id] = max_score if plot_heatmap: fig = plt.figure() sns.heatmap(drugsVdisease) fig.suptitle('drugs vs diseases array', fontsize=14) plt.xlabel('disease id', fontsize=12) plt.ylabel('drug id', fontsize=12) fig.savefig('./drugsVsDiseases-{}.png'.format(operation)) fig = plt.figure() sns.heatmap(self.drugs_straight_to_diseases_array) fig.suptitle('drugs vs diseases array', fontsize=14) plt.xlabel('disease id', fontsize=12) plt.ylabel('drug id', fontsize=12) fig.savefig('./drugsVsDiseases-chembl.png') if save_pickle: with open('drugs_vs_disease_{}.pickle'.format(operation), 'wb') as f: pickle.dump(drugsVdisease, f, protocol=pickle.HIGHEST_PROTOCOL) np.savetxt('drugs_vs_disease_{}.csv'.format(operation), drugsVdisease, delimiter=',') if operation == 'mult': self.drugs_disease_mult_array = drugsVdisease elif operation == 'max': self.drugs_disease_max_array = drugsVdisease
def do(src, des, is_verbose=False, show_progress=False): """Copy of src to dst.""" if is_verbose or show_progress: print("{} -> {}".format(src, des)) # If the filesize is zero then use simple_copy if not sep.check_filesize(): input('Simple copy') copy.simple_copy() return True if show_progress: rem_size('register', os.path.getsize(src)) # Make a temp folder to keep the files tmp_dir = os.path.join(os.path.dirname(des), 'cpf_temp') if is_verbose: print("Making temp directory..") os.mkdir(tmp_dir) rem_dir('register', tmp_dir) # Start breaking into chunks if is_verbose: print("Making chunks...") sep.make_chunks(is_verbose, show_progress) if is_verbose: print("Combining chunks..") if show_progress: progress_bar = progress.progress('copy') progress_bar.start() # Now combine the stuff combine.combine_chunks() # Remove the folder if is_verbose or show_progress: if show_progress: # A newline is necessary after showing the progress print('') print('Cleaning up...') # Wait for the progress_bar thread to end if show_progress: progress_bar.join() cleanup.pass_names(tmp_dir) try: rmtree(tmp_dir) except Exception: pass rem('unregister') rem_dir('unregister')
def test_tick(self, mock_stdout): subject = progress() subject.write = MagicMock() self.assertEqual(subject.count, 0) subject.tick() self.assertEqual(subject.count, 1) subject.write.assert_called_once_with()
def read_prescriptions(Prescriptions, gplookup, chemlookup): """ for each line of the prescription file: get location and postcode from gplookup table get chemical from chemlookup table process BNF code into components print comma-separated record to output """ f_info = os.stat(Prescriptions) size = f_info.st_size with open(Prescriptions, "rb") as csvfile: preader = csv.reader(csvfile) preader.next() # skip header pdata.writeheader() linecount=0 for line in preader: linecount = linecount + 1 if linecount % 1000 == 0: progress.progress(csvfile.tell(), size) o = pdata() o.sha = line[0] o.pct = line[1] o.practice = line[2] o.bnfcode = line[3] o.bnfname = line[4] o.items = line[5] o.nic = line[6] o.act_cost = line[7] o.quantity = line[8] o.period = line[9] o.year = o.period[0:4] o.month = o.period[4:6] o.chemical_code = o.bnfcode[0:9] o.chemical_name = chemlookup[o.chemical_code] o.product = o.bnfcode[9:11] o.generic = (o.product == "AA") o.equivalent = o.chemical_code + "AA" + o.bnfcode[13:15] + o.bnfcode[13:15] (o.xgrid, o.ygrid, o.postcode) = gplookup[o.practice] o.writeline() pass
def main(pop_size, graph_file, cross_pb, mut_pb, num_gen, hof_size): global TOURNAMENT_SIZE TOURNAMENT_SIZE = 20 global NUM_NODES NUM_NODES = int(graph_file) global ROOT ROOT = random.randint(1, NUM_NODES) global CAPACITY CAPACITY = random.randint(3, NUM_NODES) random_adjacency_matrix() # Hall Of Fame is terminology from deap but # I like it so I am giving it credit here hof = hall_of_fame.hof(hof_size) population = generate_population(pop_size + (pop_size % 2) ,int(NUM_NODES * (NUM_NODES + 1) / 2)) fitness_evaluation(population) hof.update(population) start = time.clock() progress.startProgress("Generation Progress") # GA Execution # Mutation function can be changed from here for cur_gen in range(num_gen): children = crossover(population, cross_pb) population = mutation(children, mut_pb) fitness_evaluation(population) hof.update(population) progress.progress((cur_gen / num_gen) * 100) progress.endProgress() print hof print "Time: ", time.clock() - start print "ROOT: ", ROOT print "CAPACITY: ", CAPACITY
def test_defaults(self, mock_stdout): subject = progress() self.assertEqual(subject.count_format, '{:>5}') self.assertEqual(subject.render_order, subject.renderable_components()) self.assertEqual(subject.progress_format, 'Processed: {count}') self.assertEqual(subject.stream, stdout) self.assertEqual(subject.width, None) # Computed from args. self.assertEqual(subject.computed_render_order, ['count'])
def test_blank_components(self, mock_stdout): subject = progress() subject.renderable_components = MagicMock( return_value=['foo', 'bar', 'buzz']) self.assertEqual(subject.blank_components(), { 'foo': '', 'bar': '', 'buzz': '' })
def handle(data): global start, expect, i, prog array = np.fromstring(data, dtype=np.uint8) length = array.size if start: f.write(data) prog += length progress(prog) while expect < length: while i < 8 and expect < length: if array[expect] == 0xfb if i < 4 else array[ expect] == 0xff: expect += 1 i += 1 else: print('*********error*********') f.close() sys.exit(1) if i < 8: expect = 0 return else: i = 0 expect += 500 expect -= length else: i = 0 while i < length: if array[i] == 0xfb: expect += 1 if expect == 4: start = True expect = 0 i += 1 if i < length: data = data[i:] i = 4 handle(data) i = 4 return else: expect = 0 i += 1
def do_import_folder(self): fname = str(QtGui.QFileDialog.getExistingDirectory(None, "Import Folder")) if not fname: return # Get a list of all files to be imported flist = [] for data in os.walk(fname, followlinks = True): for f in data[2]: flist.append(os.path.join(data[0],f)) for f in progress(flist, "Importing Files","Stop"): status = import_file(f) print(status)
def loadFile(self): """ Responsible for the following : - Loading desired image - Passing the image path to main extraction function - show the images in their position """ self.statusbar.showMessage("Loading Image File") self.loaded_image, self.loaded_image_format = QtWidgets.QFileDialog.getOpenFileName( None, "Load Image File", filter="*.jpg;; *.jpeg") self.logger.debug("Image File Loaded") # CHECK CONDITIONS if self.loaded_image == "": self.logger.debug("loading cancelled") self.statusbar.showMessage("Loading cancelled") pass else: self.logger.debug("starting extraction of data") try: self.logger.debug("Progressive Image ..") self.jpeg_extracted = pr.progress(self.loaded_image) self.progressive = True except TypeError: self.logger.debug("Loaded image %s is not progressive " % self.loaded_image) self.showMessage("Warning !", "You need to load a progressive Image", QtWidgets.QMessageBox.Ok, QtWidgets.QMessageBox.Warning) self.progressive = False pass if self.progressive: self.imageLoaded.setPixmap( QtGui.QPixmap(self.loaded_image).scaled(250, 250)) self.logger.debug("Loaded Image %s" % self.loaded_image) self.statusbar.showMessage("Extracting Image ... ") self.logger.debug("saving ") pr.save_images(self.jpeg_extracted, "results") self.logger.debug("Done") self.scrollArea.show() for indx, widget in enumerate( sorted(self.photos, key=lambda x: x.objectName())): self.logger.debug("Showing results/out%s.jpg" % indx) widget.setPixmap( QtGui.QPixmap("results/out%s.jpg" % indx).scaled( 250, 250)) self.statusbar.clearMessage() self.statusbar.showMessage("Images are saved in results/ ")
def test_render_count(self, mock_stdout): subject = progress() width = randint(0, 100) subject.count = randint(0, 100) expected_render = subject.count_format.format(subject.count) self.assertEqual(subject.render_count(width), expected_render) subject.count_format = '{:>10}' expected_render = subject.count_format.format(subject.count) self.assertEqual(subject.render_count(width), expected_render)
def process(mod_name, oid_list, opts=None, force=False): """ Calls a module over an oid_list without returning results. """ logger.debug("process %s %s", mod_name, oid_list) if not opts: opts = {} # Clean up and validate inputs mod_type = get_mod_type(mod_name) if not mod_type: logger.error("Module %s not found", mod_name) return False oid_list = cleanup_oid_list(mod_name, oid_list) if not options.validate_opts(mod_name, opts): logger.error("Failed to validate opts for %s : %s", mod_name, opts) return False try: # Prune analysis that already exists new_list = [] for oid in oid_list: if not exists(mod_name, oid, opts) or force: new_list.append(oid) if len(new_list) == 0: # Everything was already processed return True # Process the oid_list if len(new_list) == 1 or not config.multiproc_on or mod_type in ["analyzers"]: ret_val = True if mod_type in ["extractors", "source"]: p = progress.progress(len(new_list)) for oid in new_list: if not single_call_module(mod_type, mod_name, oid, opts): ret_val = False p.tick() return ret_val else: # Don't keep the return value of analyzers and map_reducers, return False if they return None if not single_call_module(mod_type, mod_name, new_list, opts): ret_val = False return ret_val else: # Multiprocessing is on and not an analysis module if mod_type in ["extractors", "source"]: func = initialized_modules[mod_name].process elif mod_type in ["map_reducers"]: func = initialized_modules[mod_name].mapper else: raise otypes.UnrecognizedModule("Attempt to call module not of known type.") return mp.multi_map(func, new_list, opts, True) except: datastore.cleanup() raise
def write_video(video_url, full_path, filename, chunk_size=4096): size = int(requests.head(video_url).headers['Content-Length']) size_on_disk = check_if_file_exists(full_path, filename) if size_on_disk < size: with open(full_path + "/" + filename, 'wb') as fd: r = requests.get(video_url, stream=True) current_size = 0 for chunk in r.iter_content(chunk_size=chunk_size): fd.write(chunk) current_size += chunk_size s = progress(current_size, size, filename) print(s, end='', flush=True) print(s) else: print("{0} already downloaded, skipping...".format(filename))
def single_call_module(type, mod_name, oid_list, opts): """ Calls any module type with one oid_list """ if type in ["extractors", "source"]: return initialized_modules[mod_name].process(oid_list, opts) elif type in ["analyzers"]: return initialized_modules[mod_name].results(oid_list, opts) elif type in ["map_reducers"]: p = progress.progress(len(oid_list)) jobid = get_cid_from_oid_list(oid_list) results = [] for oid in oid_list: results.append( initialized_modules[mod_name].mapper(oid, opts, jobid) ) p.tick() return initialized_modules[mod_name].reducer(results, opts, jobid) else: raise otypes.UnrecognizedModule("Attempt to call module not in module list")
def unarchive(args, opts): """ Try in unarchive (unzip and untar), passes a list of ununarchived oids Syntax: unarchive <oid_1> <oid_2> ... <oid_n> """ valid, invalid = api.valid_oids(args) if not valid: raise ShellSyntaxError("No valid oids found") oids = api.expand_oids(valid) unarchived = [] newfiles = [] print " - Attempting to unarchive (zip, tar) %d files" % len(oids) p = progress.progress(len(oids)) for oid in oids: data = api.get_field(api.source(oid), oid, "data") if not data: print "Not able to process %s" % (oid) p.tick() continue tmp = tmp_file(oid, data) if not tmp: continue aoids = [] noids = [] if tarfile.is_tarfile(tmp): # tar print " - Unpacking a tar file" aoids, noids = import_tarfile(tmp, parent_oid=oid) elif zipfile.is_zipfile(tmp): # zip print " - Unpacking a zip file" aoids, noids = import_zipfile(tmp, parent_oid=oid) unarchived.extend(aoids) newfiles.extend(noids) os.remove(tmp) p.tick() if unarchived: unarchived.extend(unarchive(unarchived, opts)) # Unpacked children print " - Extracted %d files %d are new" % (len(unarchived), len(newfiles)) return unarchived
def import_directory(self, directory): """ Process the local directory calling the local import on each file """ files_list = sys_utils.get_files_from_directory(directory) if files_list == None: return None, 0 oids = [] num_new_files = 0 p = progress.progress(len(files_list)) for file_location in files_list: oid, new_file = self.import_file(file_location) p.tick() if oid: oids.append(oid) if new_file: num_new_files += 1 oids = list(set(oids)) # assert uniqueness return oids, num_new_files
def import_files(files_list): if not isinstance(files_list, list): logger.error("files must be of type list.") return None, 0 try: new_file_count = 0 oids = [] p = progress.progress(len(files_list)) for file_location in files_list: oid, new_file = import_file(file_location) p.tick() if oid: oids.append(oid) if new_file: new_file_count += 1 except: datastore.cleanup() raise oids = list(set(oids)) # assert uniqueness return oids, new_file_count
def untar(args, opts): """ Try to untar items passed, passes a list of untarred oids Syntax: untar <oid_1> <oid_2> ... <oid_n> """ valid, invalid = api.valid_oids(args) if not valid: raise ShellSyntaxError("No valid oids found") oids = api.expand_oids(valid) untarred = [] newfiles = [] p = progress.progress(len(oids)) print " - Attempting to untar %d files" % len(oids) for oid in oids: src = api.source(oid) data = api.get_field(src, oid, "data") if not data: print "No data found for %s" % (oid) p.tick() continue tmpname = oid + ".tar.tmp" tmp = tmp_file(tmpname, data) if not tmp: continue if tarfile.is_tarfile(tmp): toids, nfiles = import_tarfile(tmp, parent_oid=oid) untarred.extend(toids) newfiles.extend(nfiles) os.remove(tmp) p.tick() if untarred: untarred.extend(untar(untarred, opts)) # Untar children print " - %d files extracted, %d files are new" % (len(untarred), len(newfiles)) return untarred
def extrapolate(d): idapy.select_dump(d) srcfiles = [] d.SRCFILES = {} progress("finding source file names...") for k,f in enumerate(sorted(d.FUNCS)): progress(float(k) / len(d.FUNCS)) s = sourcefile(d, f, partial=False) if s: srcfiles.append(s) progress("finding source file groups...") srcfiles = list(set(srcfiles)) for k,s in enumerate(srcfiles): progress(float(k) / len(srcfiles)) g = findgrp(s) if g is None: print "WARNING: source file not found:", s continue for a in g: if a in d.SRCFILES: print "ERROR:", d.SRCFILES[a], s d.SRCFILES[a] = s print "Found source files for %d subs; %d subs reference file name directly" % (len(d.SRCFILES), len(srcfiles))
def upx(args, opts): """ Try to upx unpack items passed, passes a list of unpacked oids Syntax: upx <oid_1> <oid_2> ... <oid_n> """ valid, invalid = api.valid_oids(args) if not valid: raise ShellSyntaxError("No valid oids found") oids = api.expand_oids(valid) unupx = [] newfiles = [] p = progress.progress(len(oids)) print " - Attempting to UPX unpack %d files" % len(oids) for oid in oids: data = api.get_field(api.source(oid), oid, "data") if not data: print "No data found for %s" % (oid) p.tick() continue meta = api.retrieve("file_meta", oid) name = meta["names"].pop() tmpname = name + ".unpacked_upx" tmp = tmp_file(tmpname, data) if not tmp: continue if is_upx(tmp): uoids, noids = import_upxfile(tmp, parent_oid=oid) unupx.extend(uoids) newfiles.extend(noids) os.remove(tmp) p.tick() print " - %d files extracted, %d are new" % (len(unupx), len(newfiles)) return unupx
def test_names(): """Go through the collection and show possible new names Search the cards for sounds or images with file names that look like MD5 hashes, rename the files and change the notes. """ test_string = u'' nids = mw.col.db.list("select id from notes") for nid in progress(nids, "Dehashilating", "This is all wrong!"): n = mw.col.getNote(nid) for (name, value) in n.items(): rs = re.search(hash_name_pat, value) if None == rs: continue try: new_name_ = new_media_name(rs.group(1), rs.group(2), n) except ValueError: continue test_string += u'{0}{1} → {2}\n'.format( rs.group(1), rs.group(2), new_name_) if (test_string): showText('These new names will be used:\n' + test_string) return test_string
optimizer = tf.train.AdamOptimizer(learningRate) train = optimizer.minimize(cost) init = tf.initialize_all_variables() LEARNING_COUNT = 10000 BATCH_SIZE = 100 with tf.Session() as sess: sess.run(init) bat = batch.Batch(x_train, y_train) for i in range(LEARNING_COUNT): batch_xs, batch_ys = bat.next_batch(BATCH_SIZE) sess.run(train, feed_dict={X:batch_xs, Y:batch_ys, drop_prob:0.6, input_drop_prob:0.7}) pg.progress(LEARNING_COUNT, i, sess.run(cost, feed_dict={X:batch_xs, Y:batch_ys, drop_prob:0.6, input_drop_prob:0.7})) pg.complete() correct = tf.equal(tf.argmax(y_test, 1), tf.argmax(hypothesis, 1)) accuracy = tf.reduce_mean(tf.cast(correct, "float")) print "accuracy: ", sess.run(accuracy, feed_dict={X:x_test, drop_prob:1., input_drop_prob:1.}) test_set = np.loadtxt('./data/test.csv', delimiter=',', skiprows= 1) test_set = np.reshape(test_set, (len(test_set), 28, 28, 1)) result = sess.run(hypothesis, feed_dict={X:test_set, drop_prob:1., input_drop_prob:1.}) result = sess.run(tf.argmax(result, 1)) with open("result.csv", "w") as f: f.write("ImageId,Label\n")
def main(): parser = argparse.ArgumentParser(description="Calculate the statistical mapping of OGD street types (edgecatego) to" "OSM street types (highway).") parser.add_argument("-H", "--hostname", dest="hostname", required=True, help="Host name or IP Address") parser.add_argument("-d", "--database", dest="database", required=True, help="The name of the database") parser.add_argument("-t", "--table", dest="table", required=True, help="The database table to read from") parser.add_argument("-P", "--primary-key", dest="primary_key", required=True, help="The name of the primary key column") parser.add_argument("-u", "--user", dest="user", required=False, help="The database user") parser.add_argument("-p", "--password", dest="password", required=False, help="The database password") args = parser.parse_args() # Try to connect try: conn = psycopg2.connect( host=args.hostname, database=args.database, user=args.user, password=args.password ) except Exception as e: print("I am unable to connect to the database (%s)." % e.message) sys.exit(1) cur = conn.cursor() try: cur.execute("select %s from %s" % (args.primary_key, args.table)) except Exception as e: print("I can't SELECT (%s)!" % e) rows = cur.fetchall() total = len(rows) processed = 0 street_type_mapping = {} progress.startprogress("Processing all streets") for source_street in rows: percent = processed / total * 100.0 progress.progress(round(percent, 0)) objectid = source_street[0] statement = """ select s.edgecatego as source_type, l.highway as target_type, sum(ST_Length(ST_Intersection(ST_Buffer(l.way, 10, 'endcap=flat join=round'), s.geom2))) as length from planet_osm_line l left join """ + args.table + """ s on ( ST_Intersects(l.way, ST_Envelope(s.geom2)) and ST_Intersects(s.geom2, ST_Buffer(l.way, 10, 'endcap=flat join=round')) ) where l.highway is not null and s.""" + args.primary_key + """ = %s group by edgecatego, highway """ try: cur.execute(statement, (objectid,)) results = cur.fetchall() for result in results: source_type = result[0] target_type = result[1] length = result[2] if not source_type in street_type_mapping: street_type_mapping[source_type] = {} if target_type in street_type_mapping[source_type]: street_type_mapping[source_type][target_type] += length else: street_type_mapping[source_type][target_type] = length except Exception as e: print("I can't SELECT (%s)!" % e) sys.exit(1) processed += 1 pprint(street_type_mapping)
def train_test(epochs, eta, save_weights, save_errors, resume, init_name, nonlinearity_name, use_cifar10, batchsize=128): # import (deferred until now to make --help faster) import numpy as np import theano import theano.tensor as T import lasagne if use_cifar10 is True: print('Using CIFAR-10') import cifar10 as dataset num_classes = 10 else: print('Using CIFAR-100') import cifar100 as dataset num_classes = 100 import progress # instantiate network print("Instantiating network...") input_var = T.tensor4('inputs') target_var = T.ivector('targets') if nonlinearity_name == 'relu': f = lasagne.nonlinearities.rectify elif nonlinearity_name == 'elu': f = lasagne.nonlinearities.elu elif nonlinearity_name == 'gelu': def gelu(x): return 0.5 * x * (1 + T.tanh(T.sqrt(2 / np.pi) * (x + 0.044715 * T.pow(x, 3)))) f = gelu network = build_vgg(input_var, num_classes, f, init_name) print("%d layers with weights, %d parameters" % (sum(hasattr(l, 'W') for l in lasagne.layers.get_all_layers(network)), lasagne.layers.count_params(network, trainable=True))) # load dataset print("Loading dataset...") X_train, y_train, X_test, y_test = dataset.load_dataset( path=os.path.join(os.path.dirname(__file__), 'data')) # if validate == 'test': X_val, y_val = X_test, y_test # elif validate: # X_val, y_val = X_train[-5000:], y_train[-5000:] # X_train, y_train = X_train[:-5000], y_train[:-5000] # define training function print("Compiling training function...") prediction = ll.get_output(network) prediction = T.clip(prediction, 1e-7, 1 - 1e-7) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var).mean() l2_loss = 5e-4 * lasagne.regularization.regularize_network_params( network, lasagne.regularization.l2, {'regularizable': True}) params = lasagne.layers.get_all_params(network, trainable=True) eta = theano.shared(lasagne.utils.floatX(eta), name='eta') # updates = lasagne.updates.nesterov_momentum( # loss + l2_loss, params, learning_rate=eta) updates = lasagne.updates.adam( loss + l2_loss, params, learning_rate=eta) train_fn = theano.function([input_var, target_var], loss, updates=updates) l2_fn = theano.function([], l2_loss) # define validation/testing function print("Compiling testing function...") test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var).mean() test_err = 1 - lasagne.objectives.categorical_accuracy(test_prediction, target_var).mean() test_fn = theano.function([input_var, target_var], [test_loss, test_err]) start_epoch = 0 if save_errors: errors = [] if resume is True: errors = list(np.load(save_errors)['errors'].reshape(-1)) for i in range(epochs-1,-1,-1): try: with np.load(save_weights+'_'+str(i)+'.npz') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, param_values) start_epoch = i+1 print('Restored!', i, start_epoch) break except: True if start_epoch == 0: assert False, "could not resume" # Finally, launch the training loop. print("Starting training...") orig_lr = eta.get_value() for epoch in range(start_epoch, epochs): # eta.set_value(lasagne.utils.floatX(orig_lr * max(0.1 ** (epoch//25), 1e-7))) # restoration friendly code # drop at half and then at three fourths through training if 100 <= epoch < 125: eta.set_value(orig_lr * lasagne.utils.floatX(0.1)) elif epoch >= 125: eta.set_value(orig_lr * lasagne.utils.floatX(0.01)) # In each epoch, we do a full pass over the training data: train_loss = 0 train_batches = len(X_train) // batchsize batches = dataset.iterate_minibatches(X_train, y_train, batchsize, shuffle=True) # augmentation is mandatory! batches = dataset.augment_minibatches(batches) batches = generate_in_background(batches) batches = progress.progress( batches, desc='Epoch %d/%d, Batch ' % (epoch + 1, epochs), total=train_batches) for inputs, targets in batches: train_loss += train_fn(inputs, targets) # And possibly a full pass over the validation data: # if validate: # val_loss = 0 # val_err = 0 # val_batches = len(X_val) // batchsize # for inputs, targets in dataset.iterate_minibatches(X_val, y_val, batchsize, shuffle=False): # loss, err = test_fn(inputs, targets) # val_loss += loss # val_err += err # else: test_loss = 0 test_err = 0 test_batches = len(X_test) // batchsize for inputs, targets in dataset.iterate_minibatches(X_test, y_test, batchsize, shuffle=False): loss, err = test_fn(inputs, targets) test_loss += loss test_err += err # Then we print the results for this epoch: train_loss /= train_batches l2_loss = l2_fn() print(" CE loss:\t%.6f" % train_loss) print(" L2 loss: \t%.6f" % l2_loss) print(" Loss: \t%.6f" % (train_loss+l2_loss)) if save_errors: errors.extend([train_loss, l2_loss]) # if validate: # val_loss /= val_batches # val_err /= val_batches # print(" validation loss:\t%.6f" % val_loss) # print(" validation error:\t%.2f%%" % (val_err * 100)) # if save_errors: # errors.extend([val_loss, val_err]) # else: test_loss /= test_batches test_err /= test_batches print(" test loss:\t%.6f" % test_loss) print(" test error:\t%.2f%%" % (test_err * 100)) if save_errors: errors.extend([test_loss, test_err]) if epoch % 25 == 0 and epoch > 100: # Optionally, we dump the network weights to a file if save_weights: np.savez(save_weights+'_'+str(epoch), *lasagne.layers.get_all_param_values(network)) # Optionally, we dump the learning curves to a file if save_errors: np.savez(save_errors, errors=np.asarray(errors).reshape(epoch+1, -1)) # After training, we compute and print the test error: test_loss = 0 test_err = 0 test_batches = len(X_test) // batchsize for inputs, targets in dataset.iterate_minibatches(X_test, y_test, batchsize, shuffle=False): loss, err = test_fn(inputs, targets) test_loss += loss test_err += err print("Final results:") print(" test loss:\t\t%.6f" % (test_loss / test_batches)) print(" test error:\t\t%.2f%%" % (test_err / test_batches * 100)) # we dump the network weights to a file np.savez(save_weights, *lasagne.layers.get_all_param_values(network)) # Optionally, we dump the learning curves to a file np.savez(save_errors, errors=np.asarray(errors).reshape(epochs, -1))
def main(): parser = argparse.ArgumentParser(description="Look for streets in the OGD table that are not covered by " "OpenStreetMap and write them into another table.") parser.add_argument("-H", "--hostname", dest="hostname", required=False, help="Host name or IP Address") parser.add_argument("-d", "--database", dest="database", required=True, help="The name of the database") parser.add_argument("-r", "--region", dest="region", required=True, help="The region to extract streets for") parser.add_argument("-t", "--table", dest="table", required=True, help="The database table to read from") parser.add_argument("-P", "--primary-key", dest="primary_key", required=True, help="The name of the primary key column") parser.add_argument("-n", "--name-column", dest="name_column", required=True, help="The name column") parser.add_argument("-s", "--source-tag", dest="source_tag", required=True, help="The text that should be written into the OSM source tag") parser.add_argument("-u", "--user", dest="user", required=False, help="The database user") parser.add_argument("-p", "--password", dest="password", required=False, help="The database password") args = parser.parse_args() show_progress = False # Read and parse the street type mapping file street_mapping_select = "null, null," with open(os.path.join(os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))), "street-type-mapping.json")) as data_file: data = json.load(data_file) if args.region in data: region_data = data[args.region] street_mapping_select = "case " for highway_mapping_key in region_data["highway"]: street_mapping_select += "when s.%s='%s' then '%s'\n" % ("edgecatego", highway_mapping_key, region_data["highway"][highway_mapping_key]) street_mapping_select += "end as highway, " street_mapping_select += "case " for fixme_mapping_key in region_data["fixme"]: street_mapping_select += "when s.%s='%s' then '%s'\n" % ("edgecatego", fixme_mapping_key, region_data["fixme"][fixme_mapping_key]) street_mapping_select += "end as fixme, " # Try to connect try: conn = psycopg2.connect( host=args.hostname, database=args.database, user=args.user, password=args.password ) except Exception as e: print("I am unable to connect to the database (%s)." % e.message) sys.exit(1) cur = conn.cursor() try: cur.execute(""" select """ + args.primary_key + """ from """ + args.table + """ where """ + args.primary_key + """ not in ( select """ + args.primary_key + """ from """ + args.table + """_uncovered ) """) except Exception as e: print("I can't SELECT the not-yet-calculated streets (%s)!" % e) rows = cur.fetchall() total = len(rows) processed = 0 statement = """ insert into """ + args.table + """_uncovered select objectid, name, highway, fixme, geom, source, round(cast((sum(intersection_length) / ogd_length * 100.0) as numeric), 0) as coverage from (select s.""" + args.primary_key + """ as objectid, s.""" + args.name_column + """ as name, """ + street_mapping_select + """ ST_AsEWKT(s.geom2) as geom, cast('""" + args.source_tag + """' as text) as source, ST_Length(ST_Intersection(l.buffer, s.geom2)) as intersection_length, ST_Length(s.geom2) as ogd_length from osm_street_buffer l right join """ + args.table + """ s on ( ST_Intersects(l.way, ST_Envelope(s.geom2))) where s.""" + args.primary_key + """ = %s group by """ + args.primary_key + """, """ + args.name_column + """, s.edgecatego, s.geom2, intersection_length) as subquery group by objectid, name, highway, fixme, geom, source, ogd_length; """ if show_progress: progress.startprogress("Processing all streets") for source_street in rows: if show_progress: percent = processed / total * 100.0 progress.progress(round(percent, 0)) objectid = source_street[0] try: cur.execute(statement, (objectid,)) conn.commit() except Exception as e: print("I can't INSERT the data (%s)!" % e) sys.exit(1) processed += 1 if show_progress: progress.endprogress()
def train_test(depth, growth_rate, dropout, augment, validate, epochs, eta, save_weights, save_errors, resume, nonlinearity_name, use_cifar10, batchsize=64): # import (deferred until now to make --help faster) import numpy as np import theano import theano.tensor as T import lasagne import densenet_fast_custom as densenet # or "import densenet" for slower version if use_cifar10 is True: import cifar10 as dataset num_classes = 10 else: print('Using CIFAR-100') import cifar100 as dataset num_classes = 100 import progress # instantiate network print("Instantiating network...") input_var = T.tensor4('inputs') target_var = T.ivector('targets') network = densenet.build_densenet(input_var=input_var, depth=depth, classes=num_classes, growth_rate=growth_rate, dropout=dropout, nonlinearity_name=nonlinearity_name) print("%d layers with weights, %d parameters" % (sum(hasattr(l, 'W') for l in lasagne.layers.get_all_layers(network)), lasagne.layers.count_params(network, trainable=True))) # load dataset print("Loading dataset...") X_train, y_train, X_test, y_test = dataset.load_dataset( path=os.path.join(os.path.dirname(__file__), 'data')) if validate == 'test': X_val, y_val = X_test, y_test elif validate: X_val, y_val = X_train[-5000:], y_train[-5000:] X_train, y_train = X_train[:-5000], y_train[:-5000] # define training function print("Compiling training function...") prediction = lasagne.layers.get_output(network) prediction = T.clip(prediction, 1e-7, 1 - 1e-7) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() # note: The paper says 1e-4 decay, but 1e-4 in Torch is 5e-5 elsewhere l2_loss = 1e-4 * lasagne.regularization.regularize_network_params( network, lasagne.regularization.l2, {'trainable': True}) params = lasagne.layers.get_all_params(network, trainable=True) eta = theano.shared(lasagne.utils.floatX(eta), name='eta') updates = lasagne.updates.nesterov_momentum( loss + l2_loss, params, learning_rate=eta, momentum=0.9) # updates = lasagne.updates.adam( # loss + l2_loss, params, learning_rate=eta) train_fn = theano.function([input_var, target_var], loss, updates=updates) # define validation/testing function print("Compiling testing function...") test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var) update_var_prediction = lasagne.layers.get_output(network, deterministic=True, batch_norm_update_averages=True, batch_norm_use_averages=False) loss_var_update = lasagne.objectives.categorical_crossentropy(update_var_prediction, target_var) loss_var_update = loss_var_update.mean() update_var_fn = theano.function([input_var, target_var], loss_var_update) test_loss = test_loss.mean() test_acc = lasagne.objectives.categorical_accuracy(test_prediction, target_var).mean() test_fn = theano.function([input_var, target_var], [test_loss, test_acc]) l2_fn = theano.function([], l2_loss) with np.load("./wider_07_100.npz") as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, param_values) # Finally, launch the training loop. print("Starting training...") if save_errors: errors = [] val_err = 0 val_acc = 0 val_batches = len(X_test) // batchsize for inputs, targets in dataset.iterate_minibatches(X_test, y_test, batchsize, shuffle=False): err, acc = test_fn(inputs, targets) val_err += err val_acc += acc if validate or True: # HACK: validate on test set, for debugging print(" validation loss:\t%.6f" % (val_err / val_batches)) print(" validation error:\t%.2f%%" % ( 100 - val_acc / val_batches * 100)) for epoch in range(5): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = len(X_train) // batchsize batches = dataset.iterate_minibatches(X_train, y_train, batchsize, shuffle=True) if augment: batches = dataset.augment_minibatches(batches) batches = generate_in_background(batches) batches = progress.progress( batches, desc='Epoch %d/%d, Batch ' % (epoch + 1, epochs), total=train_batches) for inputs, targets in batches: train_err += update_var_fn(inputs, targets) # And possibly a full pass over the validation data: if validate: val_err = 0 val_acc = 0 val_batches = len(X_val) // batchsize for inputs, targets in dataset.iterate_minibatches(X_val, y_val, batchsize, shuffle=False): err, acc = test_fn(inputs, targets) val_err += err val_acc += acc else: # HACK: validate on test set, for debugging val_err = 0 val_acc = 0 val_batches = len(X_test) // batchsize for inputs, targets in dataset.iterate_minibatches(X_test, y_test, batchsize, shuffle=False): err, acc = test_fn(inputs, targets) val_err += err val_acc += acc # Then we print the results for this epoch: print(" training loss:\t%.6f" % (train_err / train_batches)) l2_err = l2_fn() print(" L2 loss: \t%.6f" % l2_err) if save_errors: errors.extend([train_err / train_batches, l2_err]) if validate or True: # HACK: validate on test set, for debugging print(" validation loss:\t%.6f" % (val_err / val_batches)) print(" validation error:\t%.2f%%" % ( 100 - val_acc / val_batches * 100)) if save_errors: errors.extend([val_err / val_batches, 100 - val_acc / val_batches * 100]) if save_weights and epoch % 20 == 0: np.savez(save_weights, *lasagne.layers.get_all_param_values(network)) print('Saved') # After training, we compute and print the test error: test_err = 0 test_acc = 0 test_batches = len(X_test) // batchsize for inputs, targets in dataset.iterate_minibatches(X_test, y_test, batchsize, shuffle=False): err, acc = test_fn(inputs, targets) test_err += err test_acc += acc print("Final results:") print(" test loss:\t\t%.6f" % (test_err / test_batches)) print(" test error:\t\t%.2f%%" % ( 100 - test_acc / test_batches * 100))
def train_test(depth, growth_rate, dropout, augment, validate, epochs, eta, save_weights, save_errors, resume, nonlinearity_name, use_cifar10, batchsize): # import (deferred until now to make --help faster) import numpy as np import theano import theano.tensor as T import lasagne import densenet_fast as densenet # or "import densenet" for slower version if use_cifar10 is True: import cifar10 as dataset num_classes = 10 else: print('Using CIFAR-100') import cifar100 as dataset num_classes = 100 import progress # instantiate network print("Instantiating network...") input_var = T.tensor4('inputs') target_var = T.ivector('targets') network = densenet.build_densenet(input_var=input_var, depth=depth, classes=num_classes, growth_rate=growth_rate, dropout=dropout, nonlinearity_name=nonlinearity_name) print("%d layers with weights, %d parameters" % (sum(hasattr(l, 'W') for l in lasagne.layers.get_all_layers(network)), lasagne.layers.count_params(network, trainable=True))) # load dataset print("Loading dataset...") X_train, y_train, X_test, y_test = dataset.load_dataset( path=os.path.join(os.path.dirname(__file__), 'data')) if validate == 'test': X_val, y_val = X_test, y_test elif validate: X_val, y_val = X_train[-5000:], y_train[-5000:] X_train, y_train = X_train[:-5000], y_train[:-5000] # define training function print("Compiling training function...") prediction = lasagne.layers.get_output(network) # note: The Keras implementation clips predictions for the categorical # cross-entropy. This doesn't seem to have a positive effect here. prediction = T.clip(prediction, 1e-7, 1 - 1e-7) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var).mean() # note: The paper says 1e-4 decay, but 1e-4 in Torch is 5e-5 elsewhere. # However, 1e-4 seems to work better than 5e-5, so we use 1e-4. # note: Torch includes biases in L2 decay. This seems to be important! So # we decay all 'trainable' parameters, not just 'regularizable' ones. l2_loss = 1e-4 * lasagne.regularization.regularize_network_params( network, lasagne.regularization.l2, {'trainable': True}) params = lasagne.layers.get_all_params(network, trainable=True) eta = theano.shared(lasagne.utils.floatX(eta), name='eta') updates = lasagne.updates.nesterov_momentum( loss + l2_loss, params, learning_rate=eta, momentum=0.9) train_fn = theano.function([input_var, target_var], loss, updates=updates) l2_fn = theano.function([], l2_loss) # define validation/testing function print("Compiling testing function...") test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var).mean() test_err = 1 - lasagne.objectives.categorical_accuracy(test_prediction, target_var).mean() test_fn = theano.function([input_var, target_var], [test_loss, test_err]) start_epoch = 0 if save_errors: errors = [] if resume is True: errors = list(np.load(save_errors)['errors'].reshape(-1)) for i in range(epochs-1,-1,-1): try: with np.load(save_weights+'_'+str(i)+'.npz') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, param_values) start_epoch = i+1 print(i, start_epoch) break except: True if start_epoch == 0: assert False, "could not resume" # Finally, launch the training loop. print("Starting training...") orig_lr = eta.get_value() for epoch in range(start_epoch, epochs): # shrink learning rate at 50% and 75% into training if epochs // 2 <= epoch < epochs * 3 // 4: eta.set_value(orig_lr * lasagne.utils.floatX(0.1)) elif epoch >= epochs * 3 // 4: eta.set_value(orig_lr * lasagne.utils.floatX(0.01)) # In each epoch, we do a full pass over the training data: train_loss = 0 train_batches = len(X_train) // batchsize batches = dataset.iterate_minibatches(X_train, y_train, batchsize, shuffle=True) if augment: batches = dataset.augment_minibatches(batches) batches = generate_in_background(batches) batches = progress.progress( batches, desc='Epoch %d/%d, Batch ' % (epoch + 1, epochs), total=train_batches) for inputs, targets in batches: train_loss += train_fn(inputs, targets) # And possibly a full pass over the validation data: if validate: val_loss = 0 val_err = 0 val_batches = len(X_val) // batchsize for inputs, targets in dataset.iterate_minibatches(X_val, y_val, batchsize, shuffle=False): loss, err = test_fn(inputs, targets) val_loss += loss val_err += err else: test_loss = 0 test_err = 0 test_batches = len(X_test) // batchsize for inputs, targets in dataset.iterate_minibatches(X_test, y_test, batchsize, shuffle=False): loss, err = test_fn(inputs, targets) test_loss += loss test_err += err # Then we print the results for this epoch: train_loss /= train_batches l2_loss = l2_fn() print(" training loss:\t%.6f" % train_loss) print(" L2 loss: \t%.6f" % l2_loss) if save_errors: errors.extend([train_loss, l2_loss]) if validate: val_loss /= val_batches val_err /= val_batches print(" validation loss:\t%.6f" % val_loss) print(" validation error:\t%.2f%%" % (val_err * 100)) if save_errors: errors.extend([val_loss, val_err]) else: test_loss /= test_batches test_err /= test_batches print(" test loss:\t%.6f" % test_loss) print(" test error:\t%.2f%%" % (test_err * 100)) if save_errors: errors.extend([test_loss, test_err]) if epoch % 1 == 0: # Optionally, we dump the network weights to a file if save_weights: np.savez(save_weights+'_'+str(epoch), *lasagne.layers.get_all_param_values(network)) # Optionally, we dump the learning curves to a file if save_errors: np.savez(save_errors, errors=np.asarray(errors).reshape(-1, 4)) # After training, we compute and print the test error: test_loss = 0 test_err = 0 test_batches = len(X_test) // batchsize for inputs, targets in dataset.iterate_minibatches(X_test, y_test, batchsize, shuffle=False): loss, err = test_fn(inputs, targets) test_loss += loss test_err += err print("Final results:") print(" test loss:\t\t%.6f" % (test_loss / test_batches)) print(" test error:\t\t%.2f%%" % (test_err / test_batches * 100)) # Optionally, we dump the network weights to a file if save_weights: np.savez(save_weights, *lasagne.layers.get_all_param_values(network)) # Optionally, we dump the learning curves to a file if save_errors: np.savez(save_errors, errors=np.asarray(errors).reshape(epochs, -1))
def main(files, module, **extra): '''Main driver for a generic file processor The workflow proceeds as follows. #. For the root process, call :py:func:`init_root` and update any input files #. Test if a subset of files have already been processed and remove these #. Broadcast new set of files to be processed to all nodes (if necessary) #. Broadcast files already processed to all nodes (if necessary) #. ?? #. Initialize data for each process including root #. If no files, finalize and return #. Initialize data for each worker process #. Process files #. Reduce data to root worker #. Finalize data on root worker .. sourcecode:: py >>> from core.app import file_processor >>> from arachnid.util import crop >>> file_processor.main(['stack_01.spi'], crop) :Parameters: files : list List of filenames, tuple groups or lists of filenames module : module Main module containing entry points extra : dict Unused extra keyword arguments ''' progname = os.path.basename(sys.argv[0]) if progname[:4] == 'ara-': progname = progname[4:] if progname[:3] == 'sp-': progname = progname[3:] restart_file = os.path.join(os.path.dirname(extra['output']), '.restart.'+progname) if 'output' in extra else None if extra['worker_count'] > multiprocessing.cpu_count(): _logger.warn("Number of workers exceeds number of cores: %d > %d"%(extra['worker_count'], multiprocessing.cpu_count())) _logger.debug("File processer - begin") process, initialize, finalize, reduce_all, init_process, init_root = getattr(module, "process"), getattr(module, "initialize", None), getattr(module, "finalize", None), getattr(module, "reduce_all", None), getattr(module, "init_process", None), getattr(module, "init_root", None) monitor=None if mpi_utility.is_root(**extra): if init_root is not None: _logger.debug("Init-root") f = init_root(files, extra) if f is not None: files = f _logger.debug("Test dependencies1: %d"%len(files)) files, finished = check_dependencies(files, restart_file, **extra) extra['finished'] = finished _logger.debug("Test dependencies2: %d"%len(files)) else: extra['finished']=None _logger.debug("Start processing1") tfiles = mpi_utility.broadcast(files, **extra) # Why? if not mpi_utility.is_root(**extra): tfiles = set([os.path.basename(f) for f in tfiles]) files = [f for f in files if f in tfiles] _logger.debug("Start processing2") extra['finished'] = mpi_utility.broadcast(extra['finished'], **extra) if initialize is not None: _logger.debug("Init") f = initialize(files, extra) _logger.debug("Init-2") if f is not None: files = f #files = mpi_utility.broadcast(files, **extra) _logger.debug("Start processing3") if len(files) == 0: if mpi_utility.is_root(**extra): _logger.debug("No files to process") if finalize is not None: finalize(files, **extra) return if mpi_utility.is_root(**extra): _logger.debug("Setup progress monitor") monitor = progress(len(files)) if restart_file is not None: tracing.backup(restart_file) restart_fout = open(restart_file, 'w') if restart_file is not None else None if restart_fout is not None: for f in finished: fileid = spider_utility.spider_id(f) if spider_utility.is_spider_filename(f) else f restart_fout.write(str(fileid)+'\n') current = 0 _logger.debug("Start processing") ignored_errors=[0] for index, filename in mpi_utility.mpi_reduce(process, files, init_process=init_process, ignored_errors=ignored_errors, **extra): if mpi_utility.is_root(**extra): try: monitor.update() if reduce_all is not None: current += 1 try: filename = reduce_all(filename, file_index=index, file_count=len(files), file_completed=current, **extra) except: ignored_errors[0]+=1 if _logger.getEffectiveLevel()==logging.DEBUG or 1 == 1: _logger.exception("Reduce to root failed") else: _logger.warn("Reduce to root failed - report this problem to the developer") if isinstance(filename, tuple): filename, msg = filename else: msg=filename _logger.info("Finished: %d,%d - Time left: %s - %s"%(current, len(files), monitor.time_remaining(True), str(msg))) else: _logger.info("Finished: %d,%d - Time left: %s"%(current, len(files), monitor.time_remaining(True))) except: _logger.exception("Error in root process") del files[:] else: if restart_fout is not None: if spider_utility.is_spider_filename(filename): filename=spider_utility.spider_id(filename) restart_fout.write(str(filename)+'\n') restart_fout.flush() if ignored_errors[0] > 0: see_also="\n\nSee .%s.crash_report for more details"%os.path.basename(sys.argv[0]) _logger.warn("Errors occurred during run"+see_also) if restart_fout is not None: restart_fout.close() if len(files) == 0: raise ValueError, "Error in root process" if mpi_utility.is_root(**extra): if finalize is not None: finalize(files, **extra)
def dehashilate(): """Go through the collection and clean up MD5-ish names Search the cards for sounds or images with file names that look like MD5 hashes, rename the files and change the notes. """ mdir = mw.col.media.dir() new_names_dict = {} rename_exec_list = [] bad_mv_text = u'' mw.checkpoint(_("Dehashilate")) nids = mw.col.db.list("select id from notes") for nid in progress(nids, "Dehashilating", "This is all wrong!"): n = mw.col.getNote(nid) for (name, value) in n.items(): for match in re.findall(hash_name_pat, value): rs = re.search(hash_name_pat, value) if None == rs: # Should be redundant with the for match ...: # loop. RAS 2012-06-23 continue old_name = '{0}{1}'.format(rs.group(1), rs.group(2)) try: new_name = new_names_dict[old_name] except KeyError: try: new_name = new_media_name(rs.group(1), rs.group(2), n) except ValueError: continue do_rename = True else: do_rename = False if do_rename: src = os.path.join(mdir, old_name) dst = os.path.join(mdir, new_name) try: os.rename(src, dst) except OSError: # print u'Problem movivg {0} → {1}\n'.format(src, dst) bad_mv_text += u'{0} → {1}\n'.format(src, dst) else: new_names_dict[old_name] = new_name n[name] = value.replace(old_name, new_name) n.flush() rename_exec_list.append(dict(nid=nid, flds=n.joinedFields())) mw.col.db.executemany("update notes set flds =:flds where id =:nid", rename_exec_list) # This is a bit of voodo code. Without it the cards weren't # synced. Maybe this helps. (Cribbed from anki.find, but don't # keep extra list of nids.) RAS 2012-06-20 # And it doesn't work. RAS 2012-07-13 # """File # "/home/roland/Anki-tests/addons/dehashilator/dehashilator.py", # line 268, in dehashilate # mw.col.updateFieldCache([re_dict[nids] for re_dict in # rename_exec_list]) # TypeError: unhashable type: 'list'""" # mw.col.updateFieldCache([re_dict[nids] for re_dict in rename_exec_list]) mw.reset() if bad_mv_text: showText(_(u'These files weren’t renamed:\n') + bad_mv_text)