def handle(self, *args, **options): def parse_buscador(r): pq = PyQuery(r.content) return pq('div.storelocator_result') suc_dir = os.path.join(settings.DATASETS_ROOT, 'sucursales') if not os.path.exists(suc_dir): os.makedirs(suc_dir) FILENAME = 'carrefour_%s.csv' % datetime.now().strftime("%Y-%m-%d-%H%M%S") FILENAME = os.path.join(suc_dir, FILENAME) writer = unicodecsv.DictWriter(open(FILENAME, 'wb'), SUCURSAL_COLS) writer.writeheader() ciudades = City.objects.filter(country__name='Argentina', population__gt=DESDE) results = [] bar = Bar('Obteniendo sucursales de Carrefour', suffix='%(percent)d%%') for city in bar.iter(ciudades): r = requests.post('http://www.carrefour.com.ar/storelocator/index/search/', {'search[address]': 'Mendoza, Argentina', 'search[geocode]': '%s, %s' % (city.latitude, city.longitude)}) results.extend(parse_buscador(r)) # html = '\n\n'.join(PyQuery(r).html() for r in results) # f = open(FILENAME + '.html', 'w') # f.write(html.encode('utf8')) CONOCIDOS = [] nuevas = 0 bar = Bar('Extrayendo información de nuevas sucursales', suffix='%(percent)d%%') for suc in bar.iter(results): supermercado = self.parse_suc(suc) nombre = supermercado['nombre'] if nombre in CONOCIDOS: # print("%s ya cargado" % nombre) continue CONOCIDOS.append(nombre) # print(supermercado) writer.writerow(supermercado) nuevas += 1 print "Se encontraron %d sucursales únicas de Carrefour (%d resultados)" % (nuevas, len(ciudades))
def handle(self, *args, **options): if len(args) != 1: raise CommandError( 'dame el geojson, pa' ) geojson = args[0] if geojson.startswith('http'): fh = urllib2.urlopen(geojson) else: fh = open(args[0]) self.data = json.load(fh) suc_dir = os.path.join(settings.DATASETS_ROOT, 'sucursales') if not os.path.exists(suc_dir): os.makedirs(suc_dir) FILENAME = self.FILENAME % datetime.now().strftime("%Y-%m-%d-%H%M%S") FILENAME = os.path.join(suc_dir, FILENAME) writer = unicodecsv.DictWriter(open(FILENAME, 'wb'), fieldnames=self.get_columnas()) writer.writeheader() bar = Bar('Convirtiendo ', suffix='%(percent)d%%') for feature in bar.iter(self.entrada()): sucursal = self.parse_sucursal(feature) writer.writerow(sucursal)
def backPropDifWay(ts): w0 = (numpy.random.uniform(-1, 1, (2, 2))) w1 = numpy.random.uniform(-1, 1, (2, 1)) b0 = numpy.random.uniform(-1, 1, (1, 2)) b1 = numpy.random.uniform(-1, 1, (1, 1)) lamb = 5 error = 0 bar = Bar( "Learning", suffix= "Iterations: %(index)d/%(max)d, Elapsed: %(elapsed_td)s, %(error)s") bar.error = error for epochs in bar.iter(range(500000)): output = numpy.matrix([numpy.array(y) for x, y in ts]).T a0 = numpy.matrix([x for x, y in ts]) dot1 = numpy.dot(a0, w0) + b0 a1 = sigmoid(dot1) dot2 = numpy.dot(a1, w1) + b1 a2 = sigmoid(dot2) delta1 = numpy.multiply(sigmoidDeriv(dot2), (output - a2)) delta0 = numpy.multiply(sigmoidDeriv(dot1), numpy.dot(delta1, w1.T)) w0 = w0 + lamb * numpy.dot(a1.T, delta0) w1 = w1 + lamb * numpy.dot(a2.T, delta1) b0 = b0 + lamb * delta0 b1 = b1 + lamb * delta1 bar.error = sum(abs(a2 - output)) / 4 print(delta0.shape, delta1.shape) print(w0.shape, w1.shape) return (a2)
def _rforest_plot(self, pen_params): bar = Bar( width=40, suffix='%(percent)d%%' ) X, Y = np.meshgrid(pen_params['n_estimators'], pen_params['max_depth']) print 'Getting errors for {}...'.format(self.method) Z = np.array([ self.k_fold_results(**{ 'n_estimators': x, 'max_depth': y }).mean() for x in pen_params['n_estimators'] for y in bar.iter(pen_params['max_depth']) ]) Z.shape = (len(X), len(Y)) fig, ax = plt.subplots() p = ax.contourf(X, Y, Z, cmap='RdYlBu') ax.set_xlabel('n_estimators') ax.set_ylabel('max_depth') ax.set_title('rforest test error rate') plt.colorbar(p) plt.savefig('test_error_rforest.png')
def handle(self, *args, **options): re_summary = re.compile(r"<(\w*) class=\"?summary\"?>.*?</\1>", flags=re.I | re.S) re_section = re.compile(r"\<h2>(.*?)(</h2>|<br>)", flags=re.I | re.S) bar = Bar(width=20, suffix="%(percent)d%% %(index)d/%(max)d %(elapsed_td)s ETA %(eta_td)s") qs = Article.objects.filter(source="BHC") for a in bar.iter(qs): a.section_set.all().delete() s = a.content n = len(s) summary_match = re_summary.search(s) start = summary_match.end() + 1 if summary_match else 0 section_no = 0 section_name = "Introduction" def add_section(content): section = Section(section_no=section_no, title=section_name) section.article = a section.content = content.strip() section.save() for h2 in re_section.finditer(s): if h2.start() >= start: add_section(s[start : h2.start() - 1]) section_no += 1 section_name = h2.group(1) start = h2.end() else: if start < n: add_section(s[start:n])
def make_me_a_rockstar(self): self.repo = git.Repo.init(self.repo_path) progress_msg = 'Making you a Rockstar Programmer' bar = Bar(progress_msg, suffix='%(percent)d%%') for commit_date in bar.iter(self._get_dates_list()): self._edit_and_commit(str(uuid.uuid1()), commit_date) self._make_last_commit() print('\nYou are now a Rockstar Programmer!')
def main(): dialect = csv.Sniffer().sniff(EJEMPLO) reader = csv.reader(open(sys.argv[1]), dialect=dialect) writer = csv.DictWriter(open('productos.csv', 'w'), fieldnames=PRODUCTO_COLS) writer.writeheader() bar = Bar('Normalizando CSV', suffix='%(percent)d%%') for l in bar.iter(reader): data = normalizar(dict(zip(headers, l))) writer.writerow(data)
def tune_model(self): """ Tunes self.model using a GridSearch. """ grid = { 'ridge': ParameterGrid({ 'alpha': [i / 2. for i in range(1, 21)], }), 'svc': ParameterGrid({ 'C': [i / 2. for i in range(1, 21)], 'gamma': [i / 10. for i in range(1, 10)] }), 'svr': ParameterGrid({ 'C': [i / 2. for i in range(1, 21)], 'epsilon': [i / 10. for i in range(11)] }), 'rforest': ParameterGrid({ 'n_estimators': range(25, 501, 25), 'max_depth': range(2, 10), 'min_samples_split': range(5, 101, 5) }) }[self.method] best = {'params': None, 'score': 0} bar = Bar(message='Searching...', width=40, suffix='%(percent)d%%') for params in bar.iter(grid): # RUNTIME REDUCER # if np.random.uniform() > 0.975: # break score = cross_val_score( self.model(**params), self.dirty_X, np.squeeze(self.dirty_y), cv=8, n_jobs=4 ).mean() if score > best['score']: best.update({ 'params': params, 'score': score }) bestmod = self.model( n_jobs=-1, **best['params'] ) bestmod.fit( self.dirty_X, np.squeeze(self.dirty_y) ) self.bestmod = bestmod
def all_pers(self): """ Calculates PER for the whole dataset. See per, team_pers for method on calculating PER. """ bar = Bar(width=40) print 'Calculating PERs...' return pd.concat( [self.team_pers(s) for s in bar.iter(self.seasons)] ).groupby(level=self.data.index.names).first()
def handle(self, *args, **options): if len(args) == 0 or not args[0]: raise CommandError("No path is provided.") path = args[0] bar = Bar("Importing...", suffix="%(percent)d%% %(index)d/%(max)d ETA %(eta_td)s") for filename in bar.iter(os.listdir(path)): basename = os.path.basename(filename) filename = os.path.join(path, filename) try: self.do_file(filename) except Exception as e: traceback.print_exc() raise CommandError("{0}: {1}".format(basename, e))
def update_plugin(): setup_repo() if args.zip_name.lower() == 'all': zips = [ z for z in os.listdir(repo.upload_dir) if (os.path.isfile(os.path.join(repo.upload_dir, z)) and z.lower().endswith('.zip')) ] else: zips = [args.zip_name] if not zips: if args.zip_name.lower() == 'all': print('No plugins archives found in uploads directory') else: print('No plugin archive name defined') return False repo.output = False # nix qgis_repo output, since using progress bar up_bar = Bar("Updating plugins in '{0}'".format(repo.repo_name), fill='=', max=len(zips)) up_bar.start() for i in up_bar.iter(range(0, len(zips))): try: repo.update_plugin(zips[i], name_suffix=args.name_suffix, auth=args.auth, auth_role=args.auth_role, git_hash=args.git_hash, versions=args.versions, keep_zip=args.keep_zip, untrusted=args.untrusted, invalid_fields=args.invalid_fields) except KeyboardInterrupt: return False if args.sort_xml: print("Sorting repo plugins.xml") post_sort = QgisPluginTree.plugins_sorted_by_name( repo.plugins_tree.plugins()) repo.plugins_tree.set_plugins(post_sort) return True
def update_plugin(): setup_repo() if args.zip_name.lower() == 'all': zips = [z for z in os.listdir(repo.upload_dir) if (os.path.isfile(os.path.join(repo.upload_dir, z)) and z.lower().endswith('.zip'))] else: zips = [args.zip_name] if not zips: if args.zip_name.lower() == 'all': print('No plugins archives found in uploads directory') else: print('No plugin archive name defined') return False repo.output = False # nix qgis_repo output, since using progress bar up_bar = Bar("Updating plugins in '{0}'".format(repo.repo_name), fill='=', max=len(zips)) up_bar.start() for i in up_bar.iter(range(0, len(zips))): try: repo.update_plugin( zips[i], name_suffix=args.name_suffix, auth=args.auth, auth_role=args.auth_role, git_hash=args.git_hash, versions=args.versions, keep_zip=args.keep_zip, untrusted=args.untrusted, invalid_fields=args.invalid_fields ) except KeyboardInterrupt: return False if args.sort_xml: print("Sorting repo plugins.xml") post_sort = QgisPluginTree.plugins_sorted_by_name( repo.plugins_tree.plugins()) repo.plugins_tree.set_plugins(post_sort) return True
def score_friends(self, friends, agg=True): if not friends: raise ValueError('{} has no friends.'.format(self.user)) users = [ NetworkParser(u, self.path_to_keys, self.ntweets) for u in friends ] bar = Bar( width=40, suffix='%(percent)d%%' ) print 'Scoring {}\'s network...'.format(self.user) tweet_scores = pd.concat( [u.mean_scores(agg=agg) for u in bar.iter(users)] ) return tweet_scores.groupby('user').mean().reset_index()
def main(): prods13 = Producto.objects.extra(where=["CHAR_LENGTH(upc) = 13"]) prods12 = Producto.objects.extra(where=["CHAR_LENGTH(upc) = 12"]) antes = DescripcionAlternativa.objects.count() bar = Bar('Migrando', suffix='%(percent)d%%') with transaction.atomic(): for p13 in bar.iter(prods13): try: p12 = prods12.get(upc=p13.upc[:12]) except Producto.DoesNotExist: continue p13.agregar_descripcion(descripcion=p12.descripcion, ignorar=True) for precio in p12.precios.all(): precio.producto = p13 precio.save(update_fields=['producto']) p12.delete() despues = DescripcionAlternativa.objects.count() print "se unificaron %d productos" % (despues - antes)
def _non_forest_plot(self, pen_params): bar = Bar( width=40, suffix='%(percent)d%%' ) values = pen_params.values()[0] print 'Getting errors for {}...'.format(self.method) errors = np.array([ (alpha, 1 - self.k_fold_results(**{ pen_params.keys()[0]: alpha }).mean()) for alpha in bar.iter(values) ]) fig, ax = plt.subplots() ax.plot(errors[:, 0], errors[:, 1]) ax.set_title('{} test error rate'.format(self.method)) ax.set_xlabel( 'penalty parameter {}'.format(pen_params.keys()[0]) ) ax.set_ylabel('test error') plt.savefig('test_error_{METHOD}_{PARAM}.png'.format( METHOD=self.method, PARAM=pen_params.keys()[0] ))
def handle(self, *args, **options): if len(args) != 1: raise CommandError('dame el geojson, pa') geojson = args[0] if geojson.startswith('http'): fh = urllib2.urlopen(geojson) else: fh = open(args[0]) self.data = json.load(fh) suc_dir = os.path.join(settings.DATASETS_ROOT, 'sucursales') if not os.path.exists(suc_dir): os.makedirs(suc_dir) FILENAME = self.FILENAME % datetime.now().strftime("%Y-%m-%d-%H%M%S") FILENAME = os.path.join(suc_dir, FILENAME) writer = unicodecsv.DictWriter(open(FILENAME, 'wb'), fieldnames=self.get_columnas()) writer.writeheader() bar = Bar('Convirtiendo ', suffix='%(percent)d%%') for feature in bar.iter(self.entrada()): sucursal = self.parse_sucursal(feature) writer.writerow(sucursal)
args = parser.parse_args() seasons = sorted(args.seasons) sel = ['date','team','o:team','game_number','t:points','to:points', #indexing and general 't:minutes','t:three pointers made','t:assists','t:field goals made', 't:turnovers','t:field goals attempted','t:free throws attempted','t:free throws made', 't:defensive rebounds','t:offensive rebounds','t:steals','t:blocks','t:fouls',#PER volume stats 'to:defensive rebounds','to:offensive rebounds','to:free throws attempted','to:turnovers', #PER pace stats 't:three pointers attempted','t:LSP','to:field goals attempted','to:field goals made', 'to:three pointers made', 'to:three pointers attempted',#for clustering 't:ats margin','t:site']#gambling stats print 'Fetching game data...' bar = Bar(width=40) for season in bar.iter(seasons): season_team_game_stats(sel, season) data = pd.concat( [data_from_json(season) for season in seasons] ) # assumes 0 minute games were a normal length and just errors in # data entry data['t.minutes'].replace(0,240,inplace = True) data.to_pickle('data/team_data_{0}_{1}.pkl'.format( seasons[0], seasons[-1] )) data.to_csv('data/team_data_{0}_{1}.csv'.format( seasons[0], seasons[-1] ))
def generate(chordlist, definitions, destdir="chords", template="external_chord.svg.j2"): """ Generate chord diagrams based on a definitions file Args: chordlist(list [str]): list of chord names to generate definitions(dict): dictionary describing chords (fret positions etc) Kwargs: destdir(str): output directory for chord diagrams """ if not os.path.isdir(destdir): try: os.makedirs(destdir) except (IOError, OSError) as E: print("Cannot create output directory {0.filename} (0.strerror}". format(E)) destdir = os.path.realpath(os.curdir) cfg = {} try: with codecs.open('fretboard.yml', mode="r", encoding="utf-8") as cfile: cfg.update(yaml.safe_load(cfile)) except: raise env = Environment(loader=FileSystemLoader('templates')) tpl = env.get_template(template) missing = set([]) print("progress") pbar = Bar("{:20}".format("Rendering Chords:"), max=len(chordlist)) try: for chordname in pbar.iter(chordlist): if chordname in definitions: ch = definitions.get(chordname) else: altname = get_alt_name(chordname) ch = definitions.get(chordname) if ch is None: missing.add(chordname) continue if 'name' not in ch: ch['name'] = symbolise(chordname) # replaces characters that cause shell problems chordfile = safe_name(chordname) with codecs.open("{}/{}.svg".format(destdir, chordfile), mode='w', encoding="utf-8") as output: output.write(tpl.render(merge_ctx(cfg, **ch))) except: print("Failed to render {}".format(chordname)) raise return missing
def with_progress_bar(queryset, message='', total=None): progress_bar = Bar(message, max=total or queryset.count()) for instance in progress_bar.iter(queryset): yield instance progress_bar.finish()
from progress.bar import Bar it = [] bar = Bar("test") for elem in bar.iter(it): pass
def mirror_repo(): setup_repo() mirror_temp = 'mirror-temp' mirror_dir = os.path.join(SCRIPT_DIR, mirror_temp) merge_xml = 'merged.xml' if args.only_download and args.skip_download: print('Both --only-download and --skip-download specified! ' 'Choose either, but not both.') return False if args.skip_download: tree = QgisPluginTree(os.path.join(mirror_dir, merge_xml)) else: xml_url = args.plugins_xml_url if not xml_url or not xml_url.lower().endswith('.xml'): print('Missing plugins.xml or URL does not end with .xml') return False url_parts = urlparse(xml_url) b_name = '{0}_{1}'.format( url_parts.hostname.replace('.', '-'), os.path.splitext(os.path.basename(xml_url))[0]) if not os.path.exists(mirror_dir): os.mkdir(mirror_dir) repo.remove_dir_contents(mirror_dir, strict=False) q_vers = args.qgis_versions.replace(' ', '').split(',') \ if args.qgis_versions is not None else None if q_vers is None: urls = [xml_url] names = ['{0}.xml'.format(b_name)] else: urls = ['{0}?qgis={1}'.format(xml_url, v) for v in q_vers] names = ['{0}_{1}.xml'.format(b_name, v.replace('.', '-')) for v in q_vers] tree = QgisPluginTree() dl_bar = Bar('Downloading/merging xml', fill='=', max=len(urls)) dl_bar.start() try: for i in dl_bar.iter(range(0, len(urls))): out_xml = os.path.join(mirror_dir, names[i]) download(urls[i], out=out_xml, bar=None) tree.merge_plugins(out_xml) except KeyboardInterrupt: return False print("Sorting merged plugins") name_sort = QgisPluginTree.plugins_sorted_by_name(tree.plugins()) tree.set_plugins(name_sort) xml = tree.to_xml() print("Writing merged plugins to '{0}/{1}'".format(mirror_temp, merge_xml)) with open(os.path.join(mirror_dir, merge_xml), 'w') as f: f.write(xml) if args.only_xmls: return True downloads = {} elements = {} for p in tree.plugins(): dl_url = p.findtext("download_url") file_name = p.findtext("file_name") if all([file_name, dl_url, dl_url not in downloads]): downloads[file_name] = dl_url elements[file_name] = p # for testing against plugins.qgis.org # if len(downloads) == 10: # break if not args.skip_download: repo.remove_dir_contents(repo.upload_dir) dl_bar = Bar('Downloading plugins', fill='=', max=len(downloads)) dl_bar.start() try: for f_name, dl_url in dl_bar.iter(downloads.iteritems()): out_dl = os.path.join(repo.upload_dir, f_name) download(dl_url, out=out_dl, bar=None) except KeyboardInterrupt: return False if args.only_download: print("Downloads complete, exiting since --only-download specified") return True zips = [z for z in os.listdir(repo.upload_dir) if (os.path.isfile(os.path.join(repo.upload_dir, z)) and z.lower().endswith('.zip'))] if not zips: print('No plugins archives found in uploads directory') return False repo.output = False # nix qgis_repo output, since using progress bar up_bar = Bar("Adding plugins to '{0}'".format(repo.repo_name), fill='=', max=len(downloads)) up_bar.start() try: for zip_name in up_bar.iter(downloads.iterkeys()): repo.update_plugin( zip_name, name_suffix=args.name_suffix, auth=args.auth, auth_role=args.auth_role, # don't remove existing or just-added plugins when mirroring versions='none', untrusted=True, invalid_fields=(not args.validate_fields) ) # plugins are 'untrusted,' until overwritten with mirrored repo data except KeyboardInterrupt: return False print("Sort plugins in '{0}'".format(repo.repo_name)) # Sorting is the right thing to do here, plus... # Helps ensure 'startswith' finding of plugins will find earliest occurrance # of a partial version, e.g. plugin.1.0 is found before plugin.1.0.1 init_sort = QgisPluginTree.plugins_sorted_by_name( repo.plugins_tree.plugins()) repo.plugins_tree.set_plugins(init_sort) up_bar = Bar("Updating '{0}' plugins with mirrored repo data" .format(repo.repo_name), fill='=', max=len(elements)) up_bar.start() cp_tags = ['about', 'average_vote', 'author_name', 'create_date', 'deprecated', 'description', 'downloads', 'experimental', 'external_dependencies', 'homepage', 'rating_votes', 'repository', 'tags', 'tracker', 'trusted', 'update_date', 'uploaded_by'] maybe_missing = [] needs_resorted = False try: for file_name, el in up_bar.iter(elements.iteritems()): nam, _ = os.path.splitext(file_name) p = repo.plugins_tree.find_plugin_by_package_name(nam, starts_with=True) if not p: # maybe the base version has been adjusted, try again temp_nam = re.sub(r'((\d+\.)?(\d+\.)?(\d+))', r'.\1', nam) p = repo.plugins_tree.find_plugin_by_package_name( temp_nam, starts_with=True) if not p: maybe_missing.append(file_name) continue else: p = p[0] # print("Updating '{0}'...".format(p[0].get('name'))) for tag in cp_tags: tag_el = el.find(tag) tag_p = p.find(tag) if tag_el is not None and tag_p is not None: txt = tag_el.text # print(" {0}: {1} <- {2}".format(tag, tag_p.text, txt)) if tag in QgisPlugin.metadata_types('cdata'): if tag_el.text is not None: txt = etree.CDATA(tag_el.text) tag_p.text = txt # update plugin name ns = args.name_suffix if args.name_suffix is not None \ else repo.plugin_name_suffix if el.get('name') is not None: el_name = u"{0}{1}".format(el.get('name'), ns) if p.get('name') != el_name: needs_resorted = True p.set('name', el_name) except KeyboardInterrupt: return False if needs_resorted: print("Re-sorting plugins in '{0}'".format(repo.repo_name)) re_sort = QgisPluginTree.plugins_sorted_by_name( repo.plugins_tree.plugins()) repo.plugins_tree.set_plugins(re_sort) print("Writing '{0}' {1}".format(repo.repo_name, repo.plugins_xml_name)) repo.write_plugins_xml(repo.plugins_tree_xml()) print('\nDone mirroring...') print("Plugin results:\n attempted: {0}\n mirrored: {1}" .format(len(tree.plugins()), len(repo.plugins_tree.plugins()))) if maybe_missing: print('\nWARNING (version conflicts): plugins downloaded but MAY not ' 'be in XML after update:\n {0}\n' .format(', '.join(maybe_missing))) return True
def crack_compression(secret_length=16, matched=b"", target=0, working_product=None, candidate_length=1): global total_compressions # If we've got enough characters, go on and exit out if len(matched) >= secret_length: return matched, # If we don't already have a list of stuff to begin with, then start with printables if working_product is None: current_product = set(CHARSET[:]) # Otherwise try every combination of printables with the best matches from the parent caller else: current_product = set(product(working_product, CHARSET)) current_product = set( reduce(lambda a, b: a + b, item) for item in current_product) # Get our baseline size if target == 0: c = zlib.compressobj() target = len(c.compress(TOTAL + matched) + c.flush(zlib.Z_SYNC_FLUSH)) # What we already know is matched print("Current matched: %s" % matched) # Candidate length print("Trying candidates of length: %s" % candidate_length) # Total number of things to try # Num of previous matches * len(printables) # Or just len(printables) print("Number of permutations: %s" % len(current_product)) # Target to achieve. This is the length that was achieved the last try print("Target: %s" % target) # This is going to be a dictionary of all the compressed lengths of our tries length_dict = {} # Loading bar loading = Bar("Processing") # For every group of characters for group in loading.iter(current_product): total_compressions += 1 # Compress it and add the length to our table c = zlib.compressobj() grouplen = len( c.compress(TOTAL + matched + group) + c.flush(zlib.Z_SYNC_FLUSH)) length_dict[group] = grouplen # Best length # Can be shared by multiple keys best_length = min(length_dict.values()) # Grab all our keys that have the min length best_keys = list( filter(lambda x: length_dict[x] == best_length, length_dict.keys())) best_keys.sort() # Number of keys # If it's one then we've matched a character unique = len(best_keys) # Print some data print("Best length is: %s" % best_length) print("Number of candidates: %s" % unique) print("Candidates: %s" % best_keys[:256]) # Only one character is shorter # Huzzah! if unique == 1: print("Found Unique!") print() # Now work from what we already had matched + what we just matched matched = matched + list(best_keys)[0] return crack_compression(secret_length, matched, best_length) elif candidate_length >= 8 * secret_length: return best_keys # Multiple things could work # Try and distinguish with another character else: print("No Unique!") print() if unique <= 5: second_best_keys = list( filter(lambda x: length_dict[x] == best_length + 1, length_dict.keys())) second_best_keys.sort() best_keys.extend(second_best_keys[:30]) # Same as above except we're not matching anything # Instead we're supplying all our possible matches so we can brute force them with another character return crack_compression(secret_length, matched, best_length, best_keys, candidate_length + 1)
main_repo = git.Repo('.') # Update the main repository. main_repo.git.fetch('origin') # Gather the list of merged MRs. MERGE_REQUEST_TRAILER_PREFIX = 'Merge-request: !' merged_mr_ids = set() branch_point = None bar = Bar('Searching for merged merge requests... %(hexsha)s') bar.hexsha = '0000000000000000000000000000000000000000' parents = main_repo.git.rev_list('--first-parent', '--min-parents=2', 'origin/release') for sha in bar.iter(parents.split('\n')): sha = sha.rstrip() if not sha: continue commit = main_repo.commit(sha) bar.hexsha = commit.hexsha # See if we're still tracking merges into the release branch. if not commit.summary.endswith('into release') and branch_point is None: branch_point = commit.hexsha for line in commit.message.split('\n'): if line.startswith(MERGE_REQUEST_TRAILER_PREFIX): mr_id = int(line[len(MERGE_REQUEST_TRAILER_PREFIX):]) merged_mr_ids.add(mr_id) bar = Bar('Searching for the first ineligible commit... %(hexsha)s') bar.hexsha = '0000000000000000000000000000000000000000'
def generator(n): bar = Bar(msg) for i in bar.iter(range(int(n))): yield yield
def rate_stats(self): """ Calculates rate stats for each team through self.as_of. Offensive rate stats are all per 100 possessions. Defensive rate stats are a mix of per 100 possessions and of success rates. OFFENSE - assist rate - three point attempt rate - free throw attempt rate - field goal attempt rate DEFENSE - steals + blocks - three point attempts allowed - fouls - field goal percentage allowed """ idx = pd.IndexSlice working = pd.read_sql( """ SELECT season, team, date, "o.team", "t.assists", "t.three_pointers_attempted", "t.free_throws_attempted", "t.field_goals_attempted", "t.steals", "t.blocks", "t.fouls", "to.three_pointers_attempted", "to.field_goals_made", "to.field_goals_attempted" FROM game_data; """, self.db ) working['date'] = working['date'].apply(self.make_date) working.set_index(['season', 'team', 'date'], inplace=True) working.sort(inplace=True) print 'Calculating rate stats...' bar = Bar( width=40, suffix='%(percent)d%%' ) for i in bar.iter(working.index): working.loc[i, 'o.poss'] = self.poss_table.loc[ idx[i[0], self.poss_table.loc[i, 'o.team'], i[2]], 'poss' ] o_stats = pd.DataFrame( { 'assist_rate': working['t.assists'] / self.poss_table['poss'] * 100, '3p_attempt_rate': working['t.three_pointers_attempted'] / self.poss_table['poss'] * 100, 'FT_attempt_rate': working['t.free_throws_attempted'] / self.poss_table['poss'] * 100, 'FG_attempt_rate': working['t.field_goals_attempted'] / self.poss_table['poss'] * 100 } ).sort() d_stats = pd.DataFrame( { 'st+bl_rate': (working['t.steals'] + working['t.blocks']) / working['o.poss'] * 100, '3PA_allowed': working['to.three_pointers_attempted'] / working['o.poss'] * 100, 'foul_rate': working['t.fouls'] / working['o.poss'] * 100, 'FG%_allowed': working['to.field_goals_made'] / working['to.field_goals_attempted'] * 100 } ).sort() return [o_stats, d_stats]
def team_pers(self, s): """ Returns (season, team, date)-indexed team efficiency ratings for dataset through self.as_of for season. Team efficiency rating calculated as player efficiency rating, but for whole teams. Player efficiency rating calculation taken from: http://www.basketball-reference.com/about/per.html """ idx = pd.IndexSlice try: working = self.data.xs(s, level='season') except KeyError: return pd.DataFrame() if len(working.dropna(how='any')) == 0: return pd.DataFrame() dates = working.index.get_level_values('date').unique() teams = working.index.get_level_values('team').unique() factors, vops, drb_percs, lg_pace = self.season_per_adjusters( s ) # limits dates to those not already present in database # if latest handles case where table exists but season has not # yet been processed try: yearscheck = pd.read_sql( 'SELECT * FROM aPERs WHERE season={};'.format(s), self.db ) if s in yearscheck['season']: latest = self.get_max_date(s, 'aPERs') if latest: dates = dates[dates > latest] except sql.OperationalError: pass if len(dates) == 0: out = pd.read_sql( """ SELECT * FROM aPERs WHERE season={}; """.format(s), self.db ) out['date'] = out['date'].apply( lambda x: dt.datetime( *time.strptime(x, '%Y-%m-%d %H:%M:%S')[:6] ) ) return out.set_index(['season', 'team', 'date']) bar = Bar( width=40, suffix='%(percent)d%%' ) print 'Calculating PERs for season {}...'.format(s) out = pd.concat( [self.per(s, t, d, factors, vops, drb_percs, lg_pace) for t, d in bar.iter( [(te, da) for te in teams for da in dates] )] ).sort() out.to_sql('aPERs', self.db, if_exists='append') return out