def test_watch(self): seq = [1, 2, 3, 4, 5] w = lambda: 1 bar = frogress.bar(seq, watch=w) self.assertIsInstance(bar, frogress.WatchBar) self.assertEqual(bar.watch, w) self.assertIs(bar.iterable, seq)
def execute(): with global_connection(): result = [] for doc in frogress.bar(load_documents(Courts.CONSTITUTIONAL)): doc_info = doc.content_info result.append(doc_info) output.save_csv(pd.DataFrame(result), 'data')
def main(): if args.file: check_available_storage() output = '' if not args.quiet: print('{} {}'.format(name, __version__)) print('Generiere {} ({})'.format(args.file, sizeof_fmt(total_vcard_size))) for i in frogress.bar(range(args.count), steps=args.count, widgets=widgets): output += generate_vcard() if ( i % 250 ) == 0 or i == args.count - 1: # schreibe immer 250 vcards in datei with open(args.file, mode='a', encoding='utf-8') as f: f.write(output) output = '' print("\n") else: for i in range(args.count): output += generate_vcard() if ( i % 250 ) == 0 or i == args.count - 1: # schreibe immer 250 vcards in datei with open(args.file, mode='a', encoding='utf-8') as f: f.write(output) output = '' else: for i in range(args.count): print(generate_vcard(), end='')
def download_iamges(image_data, n_images, output_dir): """ Download a specified number of images to out_dir. :param _elementtree._element_iterator image_data: information to download images :param int n_images: number of images to download :param str output_dir: directory to store the images """ urls = (make_thumb_url(image) for image in image_data) reqs = (grequests.get(url) for url in urls) responses = grequests.imap(reqs) responses = frogress.bar(responses, steps=n_images) print('\nDownloading {} images'.format(n_images)) os.makedirs(output_dir, exist_ok=True) for r in responses: try: url = urllib.parse.urlparse(r.url) filename, _ = os.path.splitext(os.path.basename(url.path)) output_file_path = os.path.join(output_dir, filename + '.jpg') with open(output_file_path, 'wb') as output_file: output_file.write(r.content) finally: r.close()
def get_hash(self, filename): #hasher = hashlib.md5() hasher = xxhash.xxh64() widgets = [ frogress.PercentageWidget, frogress.BarWidget, frogress.TransferWidget(filename + ' '), frogress.EtaWidget, frogress.TimerWidget ] filesize = self._get_file_size(filename) try: with open(filename, 'rb') as f: chunks = self._iter_file(f, self.blocksize) if filesize == 0: for chunk in chunks: hasher.update(chunk) print( "100.0%% | [##########] | %s 0-bytes | ETA: -- | Time: 0.0s" % filename) else: for chunk in frogress.bar(chunks, source=f, widgets=widgets): hasher.update(chunk) print except IOError as e: return None return hasher.hexdigest()
def brute_force(self): """ Experimental method for running brute force approach for decrypting of text encrypted by Vigenere cipher. """ self.logger.info("Trying to decryption of text '{}' by brute forcing".format(self._encrypted_text)) if self._key_len: possible_key_lens = [self._key_len] else: possible_key_lens = self._get_possible_key_lens() if not possible_key_lens: self.logger.error("There is no possible key length or very short, application ends") return # Increase a threshold in comparison to threshold in GAs. threshold_score = (THRESHOLD_SCORE_PER_WORD * 1.25) * len(self._encrypted_text.split()) final_key = None for key_len in possible_key_lens: space_dim = 26 ** key_len # 26 is the sice of alphabet population = Population(self._encrypted_text, self._force_train, POPULATION_SIZE, key_len) for key in frogress.bar(self._bruteforce_keys_generation(key_len), steps=space_dim): score = population.evaluate_fitness_of_member(key) self.logger.info("Key '{}' (score: {})".format(key, score)) if score > threshold_score: final_key = key break final_plaintext = Vigenere(final_key).decipher(self._encrypted_text) self.logger.info("Final key: '{}'".format(final_key)) self.logger.info("Final plaintext: '{}'".format(self._correct_plaintext(final_plaintext)))
def part1(): print("Reading dustmap...") dust = pd.read_csv(dust_path, sep = ' ', usecols = [0,1,-1], names = ['procell','skycell','SDSS_i_avg'], header = None) dustprocells = np.asarray(dust["procell"].values).astype(int) dustskycells = np.asarray(dust["skycell"].values).astype(int) dust_avg = np.asarray(dust["SDSS_i_avg"].values) print("Reading Mag lim catalog") data = Table.read(cut_path, format = 'fits') data = data.to_pandas() skycells = np.asarray(data["skyCellID"].values).astype(int) procells = np.asarray(data["projectionid"].values).astype(int) mag = np.asarray(data["max_iKronMag"].values) for ii in frogress.bar(range(skycells.shape[0])): skycell = skycells[ii] procell = procells[ii] idx = np.where((dustprocell s== procell) & (dustskycells == skycell))[0] if idx.size != 1: print(idx, procell, skycell) print("Failed") break if dust_avg[idx] > -20.0: mag[ii] -= dust_avg[idx] else: print("No Mask") mag[ii] = 0.0 idx = (mag < mag_cut) outpro = procells[idx] outpro = outpro.reshape((outpro.size, 1)) outsky = skycells[idx] outsky = outsky.reshape((outsky.size, 1)) inpu = np.hstack((outpro, outsky)) print("Calculating ra, dec borders...") outpro = inpu[:,0] outsky = inpu[:,1] minras, maxras, mindecs, maxdecs = tools.get_centers(outpro, outsky, borders = True) print("Check fully missing masks...") idx = np.where(np.abs(minras - (-99)) < 0.0001)[0] idbroken = np.where(np.abs(minras - (-999) ) <0.0001)[0] print("There are %s masks that cannot be processed" % idbroken.size) brokenpro = np.take(outpro, idx) startcell, M, zone, xsub, ysub, xsize, ysize, mindec, maxdec, prodec = tools.get_cell_info(brokenpro) step = 360.0/M cen = step*(brokenpro - startcell) highra = cen + step/2.0 lowra = cen - step/2.0 bx = lowra < 0.0 lowra[bx] = lowra[bx] + 360.0 minras[idx] = lowra mindecs[idx] = mindec maxras[idx] = highra maxdecs[idx] = maxdec output = pd.DataFrame({'pro':outpro,'sky':outsky,'minras':minras,'maxras':maxras,'mindecs':mindecs,'maxdecs':maxdecs}) output.to_csv(outfile_1, index = False)
def part2(rank): steps = 2643 - 635 chunky = int(steps/size) rest = steps - chunky*size mini = chunky*rank maxi = chunky*(rank + 1) if rank >= (size - 1) - rest: maxi += 2 + rank - size + rest) mini += rank - size + 1 + rest if rank == size - 1: maxi = steps + 1 mini += 635 maxi += 635 mini = int(mini) maxi = int(maxi) bad_pixels = np.zeros(0) for pro in frogress.bar(range(mini, maxi)): flag = False for i in range(0, 100): try: foohdu = fits.open("%s/mask_%04d.%3d0.fits" % (mask_directory, pro, i), ignore_missing_end = True) break except: pass if i == 99: flag = True if flag == True: continue w = wcs.WCS(foohdu[1].header) for sky in range(0, 100): try: mask = fits.open('%s/mask_%04d.%03d.fits' % (mask_directory, pro, sky), ignore_missing_end = True) body = mask[1].data width = body.shape[1] height = body.shape[0] except: continue #cut off borders of skycell body = body[20:-20,20:-20] badpix = np.where(body != 0) bady = badpix[0] badx = badpix[1] bady += 20 badx += 20 world = w.all_pix2world(badx, bady, 0, ra_dec_order = True) ra = np.radians(world[0]) dec = np.radians(np.subtract(90.0, world[1])) pix = hp.pixelfunc.ang2pix(NSIDE, dec, ra) pix = np.unique(pix) pix = pix.astype(int) bad_pixels = np.append(bad_pixels, pix) bad_pixels = np.unique(bad_pixels) np.savetxt("%s/mask_pixel_save_%s.dat" % (mid_directory, rank), bad_pixels)
def main(): args = parse_args() db_conn = pymysql.connect(**args.mysql_url) insert_tpl = ''' INSERT INTO `mag_papers` ( `paper_id`, `original_paper_title`, `normalized_paper_title`, `paper_publish_year`, `paper_publish_date`, `paper_doi`, `original_venue_name`, `normalized_venue_name`, `journal_id_mapped_to_venue_name`, `conference_series_id_mapped_to_venue_name`, `paper_rank` ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ''' if args.create_tables: print('Creating tables and indexes') create_tables_and_indexes(db_conn.cursor()) db_conn.commit() print('Reading', args.input_csv, '...') input_file = utils.open_compressed_file(args.input_csv) cursor = db_conn.cursor() with input_file, cursor: csvreader = csv.reader( input_file, delimiter='\t', quoting=csv.QUOTE_NONE, ) records = (parse_papers_record(r) for r in csvreader) records_truncated = (( r.paper_id[:50], r.original_paper_title[:255], r.normalized_paper_title[:255], r.paper_publish_year, r.paper_publish_date, r.paper_doi[:255], r.original_venue_name[:255], r.normalized_venue_name[:255], r.journal_id_mapped_to_venue_name[:255], r.converence_series_id_mapped_to_venue_name[:255], r.paper_rank, ) for r in records) records_with_progress = frogress.bar( records_truncated, steps=args.expected_records, ) cursor.executemany(insert_tpl, records_with_progress) db_conn.commit()
def main(): output = '' count = 1000 widgets = [frogress.BarWidget, frogress.PercentageWidget, frogress.ProgressWidget('vCard ')] for i in frogress.bar(range(count), steps=count, widgets=widgets): output += generate_vCard() with codecs.open('output.vcf', 'w', 'utf-8') as f: f.write(output)
def test_file(self): with tempfile.NamedTemporaryFile('w') as tmp: text = 'foobar\n' * 25 tmp.write(text) tmp.flush() f = open(tmp.name) bar = frogress.bar([], source=f) self.assertIsInstance(bar, frogress.TransferBar) self.assertEqual(bar.step_callback, f.tell) self.assertEqual(bar.steps, len(text))
def bin_redshifts(redshift, min_z, max_z, z_steps): z_edges = np.linspace(min_z, max_z, num=z_steps) bin_num = np.zeros(redshift.size) for ii, z in frogress.bar(enumerate(redshift)): it = 0 upper_z = z_edges[it] while z > upper_z: it += 1 upper_z = z_edges[it] bin_num[ii] = it return bin_num
def main(path_input): # read the original fragments file df = pd.read_csv(path_input, sep="\t", header=None, names=["chr", "start", "end", "cb", "counts"]) # generate a table describing each chromosome's length chrs = "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chrX chrY chrM" ends = "197195432 181748087 159599783 155630120 152537259 149517037 152524553 131738871 124076172 129993255 121843856 121257530 120284312 125194864 103494974 98319150 95272651 90772031 61342430 166650296 15902555 16299" chrs = chrs.split(" ") ends = ends.split(" ") df_contigs = pd.DataFrame({"chromosome": chrs, "len": ends}) df_contigs.len = df_contigs.len.astype(int) # find problematic fragments that exceed contig size (due to cell ranger bug) problematics = [] with pysam.Tabixfile(path_input) as tbx: for i, contig in df_contigs[ df_contigs.chromosome != "chrM"].iterrows(): for row in tbx.fetch(contig.chromosome, contig.len - 1, contig.len, parser=pysam.asBed()): problematics.append(row) # write to problematics.csv df_problematics = pd.DataFrame( map(lambda item: (item.contig, item.start, item.end, item.name), problematics)) df_problematics.to_csv("problematics.csv", sep="\t", header=False, index=False) # iterate through problematic fragments and correct the coordinates for p in bar(problematics): # retrieve the proper end of the contig proper_end = df_contigs.loc[df_contigs.chromosome == p.contig, "len"].values[0] # correct df.loc[(df.chr == p.contig) & (df.start == p.start) & (df.cb == p.name), "end"] = proper_end # write to disk df.to_csv("fragments.tsv", sep="\t", header=False, index=False)
def fit(self, train_data, test_data, n_iter=1, prefix=None): learning_curve = [] for i in range(n_iter): n_error = 0 n_predicted = 0 train_der = GreedyDiarizationErrorRate() shuffle(train_data) for example in bar(train_data): if len(example) == 2: observations, labels = example partial_labels = [None] * len(labels) else: observations, labels, partial_labels = example predicted = [ pl for pl, state in self._infer(observations, labels, partial_labels) ] assert len(predicted) == len(observations) train_der(*convert_labels(labels, predicted)) err, length = binary_loss_with_matching(predicted, labels, return_rational=True) n_predicted += length n_error += err print("iteration {}".format(i)) print("error: {:.2%}/{:.2%}".format(n_error / n_predicted, abs(train_der))) w = deepcopy(self.model.weight) self.model.avg() test_loss, test_der = self.test( test_data, dump=prefix.format(i) if prefix is not None else None) print("test: {:.2%}/{:.2%}".format(test_loss, test_der)) self.model.weight = w learning_curve.append({ "train_loss": n_error / n_predicted, "train_der": abs(train_der), "test_loss": test_loss, "test_der": test_der }) return learning_curve
def main(): output = '' count = 1000 widgets = [ frogress.BarWidget, frogress.PercentageWidget, frogress.ProgressWidget('vCard ') ] for i in frogress.bar(range(count), steps=count, widgets=widgets): output += generate_vCard() with codecs.open('output.vcf', 'w', 'utf-8') as f: f.write(output)
def parse_file(xml_file, db): context = etree.iterparse(xml_file, tag=oca_tag('Index')) for action, case in frogress.bar(context): # If case already exists in DB delete it, # if we have delete instructions don't re-add it, # otherwise parse the case and insert it into the various tables. parse_case(case, db) # Clear the case element to free memory case.clear()
def show_progress(title, iterable, **kwargs): sleep = kwargs.pop('sleep', 0.01) timeout = kwargs.pop('timeout', 2) line = ' %s ' % title print(line.center(80, '=') + '\n') started = datetime.datetime.now() for item in frogress.bar(iterable, **kwargs): if sleep: time.sleep(sleep) if (datetime.datetime.now() - started).seconds > timeout: print('\n[Timeout reached: %.1fs]' % timeout) break print('\n')
def test_watch_file(self): seq = [1, 2, 3, 4, 5] w = lambda: 1 with tempfile.NamedTemporaryFile('w') as tmp: text = 'foobar\n' * 25 tmp.write(text) tmp.flush() f = open(tmp.name) bar = frogress.bar(seq, watch=w, source=f) self.assertIsInstance(bar, frogress.TransferWatchBar) self.assertIs(bar.watch, w) self.assertEqual(bar.step_callback, f.tell) self.assertEqual(bar.steps, len(text)) self.assertIs(bar.iterable, seq)
def main(): output = '' # Angegebene Anzahl an VCards generieren if not args.quiet: for i in frogress.bar(range(args.count), steps=args.count, widgets=widgets): output += generate_vcard() print("\n") else: for i in range(args.count): output += generate_vcard() # VCards in Datei schreiben with open(args.filename, 'w') as f: f.write(output)
def clean_index(dirname): # Always create the index from scratch if not os.path.exists(dirname): os.mkdir(dirname) print(dirname, get_schema()) ix = index.create_in(dirname, get_schema()) writer = ix.writer() doclist = [f for f in listdir(datapath) if isfile(join(datapath, f))] # Assume we have a function that gathers the filenames of the # documents to be indexed #for path in my_docs(): l = len(doclist) #for i, filename in enumerate(doclist): for filename in frogress.bar(doclist): add_doc(writer, u""+datapath + filename) writer.commit()
def get_perturbed_magnitudes(): # Read the matched file for SDSS z and PS magnitudes df = pandas.read_csv( "/work/dominik.zuercher/Output/match_PS_GAMA/matched_spec_new.dat", delim_whitespace=1, usecols=(0, 3, 4, 5, 6), header=None, names=(["zred", "rPS", "gPS", "iPS", "blue"])) # Read in Dominik's median errors as a function of g band magnitude and then perturb gmag, gmag_err = np.loadtxt("/work/dominik.zuercher/g_band_error.txt", unpack=1) rmag, rmag_err = np.loadtxt("/work/dominik.zuercher/r_band_error.txt", unpack=1) gspl = UnivariateSpline(gmag, np.log10(gmag_err)) rspl = UnivariateSpline(rmag, np.log10(rmag_err)) # Check the splines # xx = np.arange(15.0, 24.0, 0.01) # np.savetxt("test_gspl.dat", zip(xx, 10.0**gspl(xx))) # np.savetxt("test_rspl.dat", zip(xx, 10.0**rspl(xx))) df["gPS_pert"] = df.zred.values * 0.0 df["rPS_pert"] = df.zred.values * 0.0 import frogress for ii in frogress.bar(range(df.zred.size)): # First get the apparent magnitude at the redshift appmag_i = get_app_magnitude(df.zred.values[ii]) appmag_g = df.gPS.values[ii] + (appmag_i - df.iPS.values[ii]) appmag_r = df.rPS.values[ii] + (appmag_i - df.iPS.values[ii]) # Now perturb according to the errors g_err = 10.**gspl(appmag_g) r_err = 10.**rspl(appmag_r) appmag_g += np.random.normal() * g_err appmag_r += np.random.normal() * r_err df.gPS_pert.values[ii] = appmag_g df.rPS_pert.values[ii] = appmag_r df.to_csv("Perturbed_match.dat", sep=" ", index=False)
def _run_evolution(self, key_len): """ Method for running evolution process. It works with 1 set of probable solutions (determined by key_len). :param int key_len: a length of key (solution - population member) :return: fittest member with its statistics :rtype: OrderedDict """ self.logger.info("Evolution started") fittest_member_stats = OrderedDict() population = Population(self._encrypted_text, self._force_train, POPULATION_SIZE, key_len) population.init_new() threshold_score = THRESHOLD_SCORE_PER_WORD * len(self._encrypted_text.split()) for gen_nr in frogress.bar(range(1, MAX_GENERATION_AMOUNT + 1)): population.calculate_fitness() self.logger.debug("Generation number {}.".format(gen_nr)) self.logger.debug(population) # change this line if you want to see each members of populations with score! member, score = population.get_fittest_member() if gen_nr == MAX_GENERATION_AMOUNT or score >= threshold_score: self.logger.info( "Fittest member from all generations is: '{}' (score: {}, from generation: {})".format( member, score, gen_nr) ) fittest_member_stats["member"] = member fittest_member_stats["fitness_score"] = score fittest_member_stats["generation"] = gen_nr fittest_member_stats["key_len"] = key_len fittest_member_stats["reached_threshold"] = True if score >= threshold_score else False return fittest_member_stats # Forming new population - picking pairs of parents, do crossover and do mutation. parent_pairs = population.perform_roulette_wheel() # crossovered_pairs = population.perform_1p_crossover(parent_pairs) crossovered_pairs = population.perform_2p_crossover(parent_pairs) population.perform_mutation(crossovered_pairs)
def get_cuts(color, bin_num, bins, z_steps, confidence, include_PS_errors): cuts = np.zeros(z_steps - 1) for it in frogress.bar(range(1, z_steps)): colors = color[bin_num == it] hist, edges = np.histogram(colors, bins=bins) try: popt, pcov = curve_fit(double_gaussian, edges[:-1] + (edges[1] - edges[0]) / 2., hist) except: cuts[it - 1] = -1000 continue red_ind = np.argmax([popt[1], popt[4]]) if red_ind == 0: red_mu = popt[1] red_sigma = popt[2] else: red_mu = popt[4] red_sigma = popt[5] if include_PS_errors == False: cut = red_mu - confidence * np.abs(red_sigma) else: cut = red_mu - confidence * np.sqrt(red_sigma**2.0 + red_err**2.0 + green_err**2.0) cuts[it - 1] = cut if (it % 10 == 0): plt.figure(it / 10) plt.hist(colors, bins=bins) gr_range = edges[:-1] + (edges[1] - edges[0]) / 2. plt.plot(gr_range, double_gaussian(gr_range, popt[0], popt[1], popt[2], popt[3], popt[4], popt[5]), 'k-', lw=0.3) plt.axvline(cut, color='r', lw=0.3) plt.savefig("%s/test_hist_%s.pdf" % (output_dir, it / 10)) return cuts
def get_cuts(color, bin_num, bins, z_steps, confidence, include_PS_errors): cuts = np.zeros(z_steps - 1) for it in frogress.bar(range(1, z_steps)): colors = color[bin_num == it] hist, edges = np.histogram(colors, bins=bins) try: popt, pcov = curve_fit(double_gaussian, edges[:-1] + (edges[1] - edges[0]) / 2., hist) except: cuts[it - 1] = -1000 continue red_ind = np.argmax([popt[1], popt[4]]) if red_ind == 0: red_mu = popt[1] blue_mu = popt[4] red_sigma = popt[2] else: red_mu = popt[4] blue_mu = popt[1] red_sigma = popt[5] xx = edges[:-1] + (edges[1] - edges[0]) / 2. res = double_gaussian(xx, popt[0], popt[1], popt[2], popt[3], popt[4], popt[5]) cut = xx[np.argmin(res[(xx < red_mu) & (xx > blue_mu)])] cuts[it - 1] = cut """ plt.figure(it) plt.hist(colors, bins = bins) gr_range = edges[:-1] + (edges[1] - edges[0])/2. plt.plot(gr_range, double_gaussian(gr_range, popt[0], popt[1], popt[2], popt[3], popt[4], popt[5]), 'k-', lw = 0.3) plt.axvline(cut, color = 'r', lw = 0.3) plt.savefig("%s/test_hist_%s.pdf" % ("/work/dominik.zuercher/Output/match_PS_GAMA", it )) """ return cuts
def tosankey(self): '''db to sankey json format. needs to be multiprocessed''' # define needed constants self.constants() # :( mp will thro a PicklingError # PicklingError: Can't pickle <type 'instancemethod'>: attribute lookup __builtin__.instancemethod failed # pool = mp.Pool(processes=4) # pool.apply_async(self.profiletosankey, self.browsers) # pool.close() # pool.join() print 'Counting needed values with database commands. Please wait.' for b in frogress.bar(self.browsers): self.profiletosankey(b) # write to the dictionary and json print '\nWriting nodes to json.' nodesncv = self.nodemaker(self.nonconversion, self.ncv[0]) nodescv = self.nodemaker(self.conversion, self.cv[0]) self.nodes = nodesncv + nodescv print 'Writing links to json.' linksncv = self.linkmaker(self.nonconversion) linkscv = self.linkmaker(self.conversion) self.links = linksncv + linkscv print 'The final file is at %s' % self.jsonfile def writejson(links, nodes, outfile): '''lol so much repeating and helper functions''' linksandnodes = {"links": links, "nodes": nodes} with open(outfile, 'w') as f: json.dump(linksandnodes, f) writejson(self.links, self.nodes, self.jsonfile)
def tosankey(self): '''db to sankey json format. needs to be multiprocessed''' # define needed constants self.constants() # :( mp will thro a PicklingError # PicklingError: Can't pickle <type 'instancemethod'>: attribute lookup __builtin__.instancemethod failed # pool = mp.Pool(processes=4) # pool.apply_async(self.profiletosankey, self.browsers) # pool.close() # pool.join() print 'Counting needed values with database commands. Please wait.' for b in frogress.bar(self.browsers): self.profiletosankey(b) # write to the dictionary and json print '\nWriting nodes to json.' nodesncv = self.nodemaker(self.nonconversion, self.ncv[0]) nodescv = self.nodemaker(self.conversion, self.cv[0]) self.nodes = nodesncv + nodescv print 'Writing links to json.' linksncv = self.linkmaker(self.nonconversion) linkscv = self.linkmaker(self.conversion) self.links = linksncv + linkscv print 'The final file is at %s' % self.jsonfile def writejson(links, nodes, outfile): '''lol so much repeating and helper functions''' linksandnodes = {"links":links, "nodes":nodes} with open(outfile, 'w') as f: json.dump(linksandnodes, f) writejson(self.links, self.nodes, self.jsonfile)
def main(): if args.file: check_available_storage() output = '' if not args.quiet: print('{} {}'.format(name, __version__)) print('Generiere {} ({})'.format(args.file, sizeof_fmt(total_vcard_size))) for i in frogress.bar(range(args.count), steps=args.count, widgets=widgets): output += generate_vcard() if (i % 250) == 0 or i == args.count - 1: # schreibe immer 250 vcards in datei with open(args.file, mode='a', encoding='utf-8') as f: f.write(output) output = '' print("\n") else: for i in range(args.count): output += generate_vcard() if (i % 250) == 0 or i == args.count - 1: # schreibe immer 250 vcards in datei with open(args.file, mode='a', encoding='utf-8') as f: f.write(output) output = '' else: for i in range(args.count): print(generate_vcard(), end='')
def main(f_old, f_new, f_out_prefix): START_TIME = time.time() parser = ET.XMLParser(remove_blank_text=True) tree_old = ET.parse(f_old, parser) tree_new = ET.parse(f_new, parser) all_new_ways = tree_new.findall('way') all_old_ways = tree_old.findall('way') removeways = [] # loop through all ways in old file and look for deleted ways print('Scanning for deleted ways') for i, way_old in frogress.bar(enumerate(all_old_ways), steps=len(all_old_ways)): if reportprogress: print('{}/{} ways done'.format(i+1, len(all_new_ways)), end='\r') transid = way_old.find('tag[@k="TRANSID"]').attrib['v'] way_new = tree_new.find('way/tag[@k="TRANSID"][@v="' + transid + '"]/..') if way_new is not None: # corresponding new way exists, so remove it removeways.append(way_old) # loop through all ways in new file and look for changed/new ways print('Scanning for changed/new ways') for i, way_new in frogress.bar(enumerate(all_new_ways), steps=len(all_new_ways)): if reportprogress: print('{}/{} ways done'.format(i+1, len(all_new_ways)), end='\r') changes = [] # dict with tags of new way transid = way_new.find('./tag[@k="TRANSID"]').get('v') # way from old file with mathing TRANSID way_old = tree_old.find('.//way/tag[@k="TRANSID"][@v="' + transid + '"]/..') # check if way is new (nonexistent in old) if way_old is None: ET.SubElement(way_new, 'tag', dict(k='ELVEGDIFF_CHANGES', v='NEW WAY')) continue # skip the rest of the checks # check if tags have changed changes.extend(tag_changes(way_old, way_new)) # check if nodes have changed nds_old = way_old.findall('nd') nds_new = way_new.findall('nd') nodes_old = [] nodes_new = [] for nds, nodelist in [(nds_old, nodes_old), (nds_new, nodes_new)]: for nd in nds: for node in nd.getparent().itersiblings(tag='node', preceding=True): if node.get('id') == nd.attrib['ref']: nodelist.append(node) break changes.extend(node_changes(nodes_old, nodes_new)) if changes: ET.SubElement(way_new, 'tag', dict(k='ELVEGDIFF_CHANGES', v='\n'.join(changes))) else: # remove element way_new.getparent().remove(way_new) # remove the ways to be removed for way_old in removeways: way_old.getparent().remove(way_old) # loop through all childless nodes and check if they're referenced anywhere allnodes = tree_old.findall('node') + tree_new.findall('node') print('Removing unreferenced nodes without tags') for i, node in frogress.bar(enumerate(allnodes), steps=len(allnodes)): if reportprogress: print('{}/{} nodes done'.format(i+1, len(allnodes)), end='\r') if len(node): # node has children, skip continue # assume the node is unreferenced and should be removed, # and check for references below remove = True id_ = node.attrib['id'] next_ways = node.itersiblings(tag='way') # node should be referenced here # TODO: if it is possible that nodes are references ABOVE where they are in the file, # include the following line and use itertools.chain to chain next_ways and prev_ways #prev_ways = node.itersiblings(tag='way', preceding=True) # included just in case for way in next_ways: for nd in way.iterchildren(tag='nd'): if nd.get('ref') == id_: # <node> is referenced remove = False break # if we found a referencing <nd>, break out of loop if not remove: break # if no nd is found referencing this node, remove it if remove: node.getparent().remove(node) print('Scanning for changes in tagged nodes') old_node_list = [n for n in tree_old.findall('node') if len(n)] new_node_list = [n for n in tree_new.findall('node') if len(n)] old_nodes = {} new_nodes = {} for nodes, dct in [(old_node_list, old_nodes), (new_node_list, new_nodes)]: for node in nodes: latlon = (node.attrib['lat'], node.attrib['lon']) dct[latlon] = node # added/deleted nodes added_nodes_latlon = set(new_nodes.keys()).difference(old_nodes.keys()) deleted_nodes_latlon = set(old_nodes.keys()).difference(new_nodes.keys()) # add change description tag to added nodes for latlon in added_nodes_latlon: ET.SubElement(new_nodes[latlon], 'tag', dict(k='ELVEGDIFF_CHANGES', v='NEW TAGGED NODE')) # changed nodes changed_nodes_latlon = [] for latlon in set(new_nodes.keys()).intersection(old_nodes.keys()): changes = tag_changes(old_nodes[latlon], new_nodes[latlon]) if changes: changed_nodes_latlon.append(latlon) ET.SubElement(new_nodes[latlon], 'tag', dict(k='ELVEGDIFF_CHANGES', v='\n'.join(changes))) # if nodes are neither changed nor added, remove from new file unchanged_nodes_latlon = set(new_nodes.keys()).difference(added_nodes_latlon, changed_nodes_latlon) for latlon in unchanged_nodes_latlon: # TODO: don't remove it it's referenced somewhere new_nodes[latlon].getparent().remove(new_nodes[latlon]) # if nodes are not deleted, remove from old file undeleted_nodes_latlon = set(old_nodes.keys()).difference(deleted_nodes_latlon) for latlon in undeleted_nodes_latlon: # TODO: don't remove it it's referenced somewhere old_nodes[latlon].getparent().remove(old_nodes[latlon]) print('Finished in {:.1f} seconds'.format(time.time()-START_TIME)) tree_new.write(r'{}changed.osm'.format(f_out_prefix), pretty_print=True, xml_declaration=True, encoding='UTF-8') tree_old.write(r'{}deleted.osm'.format(f_out_prefix), pretty_print=True, xml_declaration=True, encoding='UTF-8')
def test_bar_passes_parameters(self): seq = [1, 2, 3, 4, 5] progressbar = frogress.bar(seq) self.assertEqual(progressbar.iterable, seq)
def iter(what): import frogress return frogress.bar(what)
def db2json(credentials, name, browsers, operating): '''Format data from MySQLdb to Sankey json file, see sankeygreenhouse.json for an example.''' # connect (MYSQL_HOST, MYSQL_USER, MYSQL_PASSWORD, MYSQL_DB) = credentials db = MySQLdb.connect(host=MYSQL_HOST, passwd=MYSQL_PASSWORD, user=MYSQL_USER, db=MYSQL_DB) cur = db.cursor() def repeatcmd(name, *args): '''Return int count value. Create the command by adding together strings. ''' cmd = 'SELECT COUNT(*) FROM %s' % name n = len(args) if n/2: cmd += ' WHERE ' for i in xrange(n): # this makes me unhappy if i % 2: continue if i != 0: cmd += 'and ' cmd += '%s = %s ' % (args[i], args[i+1]) cur.execute(cmd) return cur.fetchone()[0] # strings name = scrub(name) outfile = name + '.json' # variables, defaults cv, CV, conversion, outcv = 'conversion', '1', defaultdict(int), name + 'ncv.json' ncv, NCV, nonconversion, outncv = 'nonconversion', '0', defaultdict(int), name + 'cv.json' # distinguishing s = ' ' # ncv has addl variable s # os -> browser -> conversion print 'Converting data to links.' for b in frogress.bar(browsers): # browser to nonconversion nonconversion[browsers[b]+s, ncv] += repeatcmd(name, cv, NCV, 'browser', b) # browser to conversion conversion[browsers[b], cv] += repeatcmd(name, cv, CV, 'browser', b) # os to browser # differentiating by ncv/cv slows it down by 3 times for os in operating: # os to browser to nonconversion nonconversion[operating[os]+s+browsers[b][0], browsers[b]+s] += \ repeatcmd(name, 'OS', os, 'browser', b, cv, NCV) # os to browser to conversion conversion[operating[os]+browsers[b][0], browsers[b]] += \ repeatcmd(name, 'OS', os, 'browser', b, cv, CV) # write to the dictionary and json print '\nWriting nodes to json.' def nodemaker(links, extra=[]): '''Convert integer 2-keyed links dictionary to nodes list. Optional extra (SINGLE) parameter to add.''' nodes = [s for (s,t), v in links.items() if v > 0] nodes.append(extra) nodes = set(nodes) return [{"name":n} for n in nodes] nodesncv = nodemaker(nonconversion, ncv) nodescv = nodemaker(conversion, cv) nodes = nodesncv + nodescv print 'Writing links to json.' def linkmaker(links): '''Convert integer 2-keyed links dictionary to links list''' return [{"source":s, "target":t, "value":v} for (s,t), v in links.items() if v > 0] linksncv = linkmaker(nonconversion) linkscv = linkmaker(conversion) links = linksncv + linkscv def writejson(links, nodes, outfile): '''lol so much repeating and helper functions''' linksandnodes = {"links":links, "nodes":nodes} with open(outfile, 'w') as f: json.dump(linksandnodes, f) #writejson(linksncv, nodesncv, outncv) #writejson(linkscv, nodescv, outcv) writejson(links, nodes, outfile)
def test_context(self): with mock.patch('%s.open' % __name__, mock.mock_open(read_data='first line'), create=True) as m: with frogress.bar(open('fake_file')) as f: result = f.read() self.assertEqual(result, 'first line')
def iterable2dataframe(iterable, field_name='public_info'): result = [] for row in frogress.bar(iterable): result.append(getattr(row, field_name)) return pd.DataFrame(result)
def es_index_construction(relative_list, index_name, corpus): ''' :param url_list: list of URLs to get traffic for and upload to ElasticSearch (input relative path) :param index_name: :param collection: :param num_months_traffic: :return: ''' def chunks(l, n): # For item i in a range that is a length of l, for i in range(0, len(l), n): # Create an index range for l of n items: yield l[i:i + n] #iterate through chunks of URLs to get traffic appended_data = [] for url_chunk in frogress.bar(list(chunks(relative_list, 500))): urls = format(url_chunk).replace('[', '').replace(']', '') query1 = """SELECT relative_path, timelessness, content_type, title, primary_sub_channel, workflow_status, avg(entrances) AS avg_organic_entrances_last_3_months FROM (SELECT content.relative_path, timelessness, content_type, title, content.primary_sub_channel, content.workflow_status, trunc(traffic_date, 'mm') AS month, sum(entrances) AS entrances FROM investopedia.page_traffic LEFT JOIN investopedia.content using(relative_path) WHERE content.relative_path IN ({urls}) AND traffic_channel = 'organic search' and traffic_date >= '2017-08-01' and traffic_date < '2017-11-01' GROUP BY trunc(traffic_date, 'mm'),timelessness, content_type, title, content.relative_path, content.primary_sub_channel, content.workflow_status) traffic GROUP BY relative_path, timelessness, content_type, title, primary_sub_channel, workflow_status""".format( urls=urls) temp_df = run_ibi_query(query1) print(len(temp_df)) appended_data.append(temp_df) traffic_df = pd.concat(appended_data, ignore_index=True) traffic_df['url'] = traffic_df['relative_path'].apply( lambda x: x.strip('/')) #merge corpus with traffic_df to include traffic corpus_df = pd.DataFrame.from_dict(corpus) merged_df = corpus_df.merge(traffic_df[[ 'url', 'avg_organic_entrances_last_3_months', 'primary_sub_channel', 'workflow_status' ]], how='left', on='url', copy=False) merged_df.rename(columns={ 'avg_organic_entrances_last_3_months': 'avg_entrances', 'primary_sub_channel': 'actual_sub_channel' }, inplace=True) merged_df['avg_entrances'] = merged_df['avg_entrances'].apply( lambda x: 0 if np.isnan(x) else x) merged_df['actual_sub_channel'] = merged_df['actual_sub_channel'].fillna( '') merged_df = merged_df.fillna(0) es_corpus = merged_df.to_dict(orient='records') #import pdb;pdb.set_trace() #select a subset of keys we wish to upload to ElasticSearch es_list = [{ your_key: dic[your_key] for your_key in [ 'advertising_channel', 'author', 'avg_entrances', 'actual_sub_channel', 'bodyTEXT', 'channel', 'created', 'sitedate', 'sub_advertising_channel', 'sub_channel', 'subtype', 'summary', 'syndate', 'timelessness', 'title', 'type', 'updated', 'url', 'workflow_status' ] if your_key in dic } for dic in es_corpus] for new in es_list: if new['syndate'] != None: new['syndate'] = datetime.datetime.utcfromtimestamp( int(new['syndate'])).strftime('%Y-%m-%dT%H:%M:%S+00:00') new['sitedate'] = datetime.datetime.utcfromtimestamp( int(new['sitedate'])).strftime('%Y-%m-%dT%H:%M:%S+00:00') #run configuration from investopedia_recirc to establish connection to ElasticSearch service configuration() #create index with provided name InvestopediaRecirc.create_index(index_name=index_name, force=True, using='default') print('index with name: {} created.'.format(index_name)) #upload subsetted corpus to ES InvestopediaRecirc.bulk_update( [InvestopediaRecirc(**doc) for doc in es_list]) print('corpus uploaded to {}.'.format(index_name))
np.savetxt(outdir + "widths.txt", widths) np.savetxt(outdir + "heights.txt", heights) np.savetxt(outdir + "pros.txt", pros) np.savetxt(outdir + "skys.txt", skys) np.savetxt(outdir + "outs.txt", outs) names = np.genfromtxt(outdir + "names.txt", dtype=str) ras = np.genfromtxt(outdir + "ras.txt") decs = np.genfromtxt(outdir + "decs.txt") widths = np.genfromtxt(outdir + "widths.txt") heights = np.genfromtxt(outdir + "heights.txt") pros = np.genfromtxt(outdir + "pros.txt") skys = np.genfromtxt(outdir + "skys.txt") outs = np.genfromtxt(outdir + "outs.txt") names = names[it:] ras = ras[it:] decs = decs[it:] widths = widths[it:] heights = heights[it:] pros = pros[it:] skys = skys[it:] outs = outs[it:] for j in range(len(names)): names[j] = str(outdir) + str(os.path.basename(names[j])) for i in frogress.bar(range(len(names))): tools.edge_correct(names[i], pros[i], skys[i], ras[i], decs[i], outs[i], widths[i], heights[i], indir, outdir)
endung = "." + endung break elif endung.lower() == "x": print "Aufwiedersehen\n" exit() else: print "Bitte Eingabe richtig machen " except ValueError: print "Bitte richtige Eingabe" #hier beginntder Prozess der einzelnen Ordner path = wd + '/%s' for Buchstabe in ALPHA: if not os.path.exists(path % Buchstabe): os.makedirs(path % Buchstabe) # Listet alle Dateien im Suchverzeichnis auf mit mit der Endung txt for file in frogress.bar(os.listdir(hiermachen)): # wenn der Dateiname (wenn groß dann automatisch klein ".lower()" endet mit *.bin # und der erste Buchtabe (Groß zum vergleich mit Alpha) file[0].upper() in ALPHA vorkommt if file.lower().endswith(endung.lower()) and file[0].upper() in ALPHA: # zum einsparen einer Schleife nehme den 1.Buschtaben der Datei in Groß um das zielverzeichniss zu finden #datei = open("datenbank.csv", "a") with open("datenbank.csv", "a") as datei: logdatei = open("copy.log", "a") Buchstabe = file[0].upper() sourcefile = os.path.join(hiermachen, file) destinationfile = os.path.join(path % Buchstabe, file) #Debug Ausgabe print"%s ==> %s" % (file,destinationfile) shutil.move(sourcefile, destinationfile) datei.write("\t%s,\t%s\n" % (file, destinationfile)) logdatei.write("%s nach %s verschoben\n" % (file, destinationfile)) logdatei.close()
def procedure(rank, size): steps = 2643.0 - 635.0 chunky = int(steps/size) rest = steps - chunky*size mini = chunky*rank maxi = chunky*(rank + 1) if rank >= (size - 1 - rest): maxi += 2 + rank - size + rest mini += rank - size + 1 + rest mini += 635 maxi += 635 mini = int(mini) maxi = int(maxi) if color == "iband": filter_ = "SDSS i" elif color == "red": filter_ = "SDSS r" elif color == "green": filter_ = "SDSS g" else: print("Defined filter not available. Choose red, green or iband") return imap = mwdust.Combined15(filter = filter_, sf10 = True) print("Creating Grid...") foos = np.arange(mini, maxi, 1) full = tools.create_grid(foos) print("Kernel %s doing %s up to %s" % (rank, mini, maxi)) print("Calculating borders of the cells") minras, maxras, mindecs, maxdecs = tools.get_centers(full[:,0], full[:,1], borders = True, maskdetect = True) print("Using %s objects in Kernel %s" % (full.shape[0], rank)) print("Converting coordinates to galactic system") idy = np.where(np.abs(minras + 99.0) < 0.0001))[0] idy2 = np.where(np.abs(minras + 999.0) < 0.0001))[0] idy = np.append(idy, idy2) idy = np.unique(idy) for it in idy: minras[it] = 0.0 mindecs[it] = 0.0 maxras[it] = 0.0 maxdecs[it] = 0.0 c_maxs = SkyCoord(ra = maxras*u.degree, dec = maxdecs*u.degree, frame = 'icrs') c_mins = SkyCoord(ra = minras*u.degree, dec = mindecs*u.degree, frame = 'icrs') c_maxs = c_maxs.galactic c_mins = c_mins.galactic ramins = c_mins.l.degree ramaxs = c_maxs.l.degree decmaxs = c_mins.b.degree decmins = c_maxs.b.degree print("Calculating extinction...") dustyness = np.zeros(0) for idx in frogress.bar(range(full.shape[0])): if idx in idy: dustyness = np.append(dustyness, -99.0) continue dustynesscolor = np.zeros(0) for b in np.linspace(decmins[idx], decmaxs[idx], 5): for l in np.linspace(ramins[idx], ramaxs[idx], 5): if color == "red": foo = rmap(l, b, depth) elif color == "iband": foo = imap(l, b, depth) elif color == "green": foo = gmap(l, b, depth) dustynesscolor = np.append(dustynesscolor, foo) dustynesscolor = np.average(dustynesscolor) dustyness = np.append(dustyness, dustynesscolor) np.savetxt(str(outdir)+str(color)+"parts/dust_catalog_"+str(color)+"_"+str(rank)+".csv", np.hstack((full, dustyness.reshape(dustyness.size, 1))))
def db2json(credentials, name, browsers, operating): '''Format data from MySQLdb to Sankey json file, see sankeygreenhouse.json for an example.''' # connect (MYSQL_HOST, MYSQL_USER, MYSQL_PASSWORD, MYSQL_DB) = credentials db = MySQLdb.connect(host=MYSQL_HOST, passwd=MYSQL_PASSWORD, user=MYSQL_USER, db=MYSQL_DB) cur = db.cursor() def repeatcmd(name, *args): '''Return int count value. Create the command by adding together strings. ''' cmd = 'SELECT COUNT(*) FROM %s' % name n = len(args) if n / 2: cmd += ' WHERE ' for i in xrange(n): # this makes me unhappy if i % 2: continue if i != 0: cmd += 'and ' cmd += '%s = %s ' % (args[i], args[i + 1]) cur.execute(cmd) return cur.fetchone()[0] # strings name = scrub(name) outfile = name + '.json' # variables, defaults cv, CV, conversion, outcv = 'conversion', '1', defaultdict( int), name + 'ncv.json' ncv, NCV, nonconversion, outncv = 'nonconversion', '0', defaultdict( int), name + 'cv.json' # distinguishing s = ' ' # ncv has addl variable s # os -> browser -> conversion print 'Converting data to links.' for b in frogress.bar(browsers): # browser to nonconversion nonconversion[browsers[b] + s, ncv] += repeatcmd(name, cv, NCV, 'browser', b) # browser to conversion conversion[browsers[b], cv] += repeatcmd(name, cv, CV, 'browser', b) # os to browser # differentiating by ncv/cv slows it down by 3 times for os in operating: # os to browser to nonconversion nonconversion[operating[os]+s+browsers[b][0], browsers[b]+s] += \ repeatcmd(name, 'OS', os, 'browser', b, cv, NCV) # os to browser to conversion conversion[operating[os]+browsers[b][0], browsers[b]] += \ repeatcmd(name, 'OS', os, 'browser', b, cv, CV) # write to the dictionary and json print '\nWriting nodes to json.' def nodemaker(links, extra=[]): '''Convert integer 2-keyed links dictionary to nodes list. Optional extra (SINGLE) parameter to add.''' nodes = [s for (s, t), v in links.items() if v > 0] nodes.append(extra) nodes = set(nodes) return [{"name": n} for n in nodes] nodesncv = nodemaker(nonconversion, ncv) nodescv = nodemaker(conversion, cv) nodes = nodesncv + nodescv print 'Writing links to json.' def linkmaker(links): '''Convert integer 2-keyed links dictionary to links list''' return [{ "source": s, "target": t, "value": v } for (s, t), v in links.items() if v > 0] linksncv = linkmaker(nonconversion) linkscv = linkmaker(conversion) links = linksncv + linkscv def writejson(links, nodes, outfile): '''lol so much repeating and helper functions''' linksandnodes = {"links": links, "nodes": nodes} with open(outfile, 'w') as f: json.dump(linksandnodes, f) #writejson(linksncv, nodesncv, outncv) #writejson(linkscv, nodescv, outcv) writejson(links, nodes, outfile)
import subprocess import time import frogress #Bildschirm sauber machen os.system('clear') program = raw_input( "Was wollen Sie machen Dateien (e)rstellen oder (l)öschen : ") #abfrage nach der Anzahl der zu erstellenden Dateien if program.lower() == ('e'): anzahl = input("Wieviiel files sollen erstellt werden :") #abfrage nach dem preafix der zu erstellenden Dateien endungen = raw_input("Welche Endung soll die Datei haben :") #i ist nur eine variable für den itterator #for i in range(0, anzahl): for i in frogress.bar(range(0, anzahl)): ran = random.choice(string.ascii_letters) ran1 = random.choice(string.ascii_letters) subprocess.call("touch %s%s.%s" % (ran, ran1, endungen), shell=True) #open ('%s%s.txt' % (ran,ran1), 'a').close() #print ("touch %s%s.txt") % (ran,ran1) elif program.lower() == ('l'): #erstellt eine liste von dateien und ordnern im derzeitigen verzeichnis und entfernt dateien mit .py sowie Ordner filelist = [ f for f in os.listdir(".") if not f.endswith(".py") and not os.path.isdir(f) ] for f in filelist: #print f #löscht dateien os.remove(f)
def main(): args = parse_args() db_conn = pymysql.connect(**args.mysql_url) insert_tpl = ''' INSERT INTO `mag_papers` ( `paper_id`, `original_paper_title`, `normalized_paper_title`, `paper_publish_year`, `paper_publish_date`, `paper_doi`, `original_venue_name`, `normalized_venue_name`, `journal_id_mapped_to_venue_name`, `conference_series_id_mapped_to_venue_name`, `paper_rank` ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ''' if args.create_tables: print('Creating tables and indexes') create_tables_and_indexes(db_conn.cursor()) db_conn.commit() print('Reading', args.input_csv, '...') input_file = utils.open_compressed_file(args.input_csv) cursor = db_conn.cursor() with input_file, cursor: csvreader = csv.reader( input_file, delimiter='\t', quoting=csv.QUOTE_NONE, ) records = ( parse_papers_record(r) for r in csvreader ) records_truncated = ( ( r.paper_id[:50], r.original_paper_title[:255], r.normalized_paper_title[:255], r.paper_publish_year, r.paper_publish_date, r.paper_doi[:255], r.original_venue_name[:255], r.normalized_venue_name[:255], r.journal_id_mapped_to_venue_name[:255], r.converence_series_id_mapped_to_venue_name[:255], r.paper_rank, ) for r in records ) records_with_progress = frogress.bar( records_truncated, steps=args.expected_records, ) cursor.executemany(insert_tpl, records_with_progress) db_conn.commit()