Ejemplo n.º 1
0
    def handle(self, *args, **options):

        def parse_buscador(r):
            pq = PyQuery(r.content)
            return pq('div.storelocator_result')

        suc_dir = os.path.join(settings.DATASETS_ROOT, 'sucursales')
        if not os.path.exists(suc_dir):
            os.makedirs(suc_dir)

        FILENAME = 'carrefour_%s.csv' % datetime.now().strftime("%Y-%m-%d-%H%M%S")
        FILENAME = os.path.join(suc_dir, FILENAME)

        writer = unicodecsv.DictWriter(open(FILENAME, 'wb'), SUCURSAL_COLS)
        writer.writeheader()

        ciudades = City.objects.filter(country__name='Argentina',
                                       population__gt=DESDE)
        results = []

        bar = Bar('Obteniendo sucursales de Carrefour', suffix='%(percent)d%%')
        for city in bar.iter(ciudades):
            r = requests.post('http://www.carrefour.com.ar/storelocator/index/search/',
                              {'search[address]': 'Mendoza, Argentina',
                               'search[geocode]': '%s, %s' % (city.latitude,
                                                              city.longitude)})
            results.extend(parse_buscador(r))

        # html = '\n\n'.join(PyQuery(r).html() for r in results)
        # f = open(FILENAME + '.html', 'w')
        # f.write(html.encode('utf8'))

        CONOCIDOS = []
        nuevas = 0
        bar = Bar('Extrayendo información de nuevas sucursales', suffix='%(percent)d%%')
        for suc in bar.iter(results):
            supermercado = self.parse_suc(suc)
            nombre = supermercado['nombre']
            if nombre in CONOCIDOS:
                # print("%s ya cargado" % nombre)
                continue
            CONOCIDOS.append(nombre)
            # print(supermercado)
            writer.writerow(supermercado)
            nuevas += 1

        print "Se encontraron %d sucursales únicas de Carrefour (%d resultados)" % (nuevas,
                                                                                    len(ciudades))
Ejemplo n.º 2
0
    def handle(self, *args, **options):
        if len(args) != 1:
            raise CommandError(
                'dame el geojson, pa'
            )

        geojson = args[0]
        if geojson.startswith('http'):
            fh = urllib2.urlopen(geojson)
        else:
            fh = open(args[0])
        self.data = json.load(fh)

        suc_dir = os.path.join(settings.DATASETS_ROOT, 'sucursales')
        if not os.path.exists(suc_dir):
            os.makedirs(suc_dir)

        FILENAME = self.FILENAME % datetime.now().strftime("%Y-%m-%d-%H%M%S")
        FILENAME = os.path.join(suc_dir, FILENAME)
        writer = unicodecsv.DictWriter(open(FILENAME, 'wb'),
                                       fieldnames=self.get_columnas())
        writer.writeheader()
        bar = Bar('Convirtiendo ', suffix='%(percent)d%%')
        for feature in bar.iter(self.entrada()):
            sucursal = self.parse_sucursal(feature)
            writer.writerow(sucursal)
Ejemplo n.º 3
0
def backPropDifWay(ts):
    w0 = (numpy.random.uniform(-1, 1, (2, 2)))
    w1 = numpy.random.uniform(-1, 1, (2, 1))
    b0 = numpy.random.uniform(-1, 1, (1, 2))
    b1 = numpy.random.uniform(-1, 1, (1, 1))
    lamb = 5
    error = 0
    bar = Bar(
        "Learning",
        suffix=
        "Iterations: %(index)d/%(max)d, Elapsed: %(elapsed_td)s, %(error)s")
    bar.error = error
    for epochs in bar.iter(range(500000)):
        output = numpy.matrix([numpy.array(y) for x, y in ts]).T
        a0 = numpy.matrix([x for x, y in ts])
        dot1 = numpy.dot(a0, w0) + b0
        a1 = sigmoid(dot1)
        dot2 = numpy.dot(a1, w1) + b1
        a2 = sigmoid(dot2)
        delta1 = numpy.multiply(sigmoidDeriv(dot2), (output - a2))
        delta0 = numpy.multiply(sigmoidDeriv(dot1), numpy.dot(delta1, w1.T))
        w0 = w0 + lamb * numpy.dot(a1.T, delta0)
        w1 = w1 + lamb * numpy.dot(a2.T, delta1)
        b0 = b0 + lamb * delta0
        b1 = b1 + lamb * delta1
        bar.error = sum(abs(a2 - output)) / 4
    print(delta0.shape, delta1.shape)
    print(w0.shape, w1.shape)
    return (a2)
Ejemplo n.º 4
0
        def _rforest_plot(self, pen_params):
            bar = Bar(
                width=40,
                suffix='%(percent)d%%'
            )

            X, Y = np.meshgrid(pen_params['n_estimators'],
                               pen_params['max_depth'])
            print 'Getting errors for {}...'.format(self.method)
            Z = np.array([
                             self.k_fold_results(**{
                                 'n_estimators': x,
                                 'max_depth': y
                             }).mean() for x in pen_params['n_estimators']
                             for y in bar.iter(pen_params['max_depth'])
                             ])
            Z.shape = (len(X), len(Y))
            fig, ax = plt.subplots()

            p = ax.contourf(X, Y, Z,
                            cmap='RdYlBu')
            ax.set_xlabel('n_estimators')
            ax.set_ylabel('max_depth')
            ax.set_title('rforest test error rate')
            plt.colorbar(p)
            plt.savefig('test_error_rforest.png')
Ejemplo n.º 5
0
    def handle(self, *args, **options):
        re_summary = re.compile(r"<(\w*) class=\"?summary\"?>.*?</\1>", flags=re.I | re.S)
        re_section = re.compile(r"\<h2>(.*?)(</h2>|<br>)", flags=re.I | re.S)
        bar = Bar(width=20, suffix="%(percent)d%% %(index)d/%(max)d %(elapsed_td)s ETA %(eta_td)s")

        qs = Article.objects.filter(source="BHC")
        for a in bar.iter(qs):
            a.section_set.all().delete()
            s = a.content
            n = len(s)
            summary_match = re_summary.search(s)
            start = summary_match.end() + 1 if summary_match else 0
            section_no = 0
            section_name = "Introduction"

            def add_section(content):
                section = Section(section_no=section_no, title=section_name)
                section.article = a
                section.content = content.strip()
                section.save()

            for h2 in re_section.finditer(s):
                if h2.start() >= start:
                    add_section(s[start : h2.start() - 1])
                    section_no += 1
                section_name = h2.group(1)
                start = h2.end()
            else:
                if start < n:
                    add_section(s[start:n])
Ejemplo n.º 6
0
 def make_me_a_rockstar(self):
     self.repo = git.Repo.init(self.repo_path)
     progress_msg = 'Making you a Rockstar Programmer'
     bar = Bar(progress_msg, suffix='%(percent)d%%')
     for commit_date in bar.iter(self._get_dates_list()):
         self._edit_and_commit(str(uuid.uuid1()), commit_date)
     self._make_last_commit()
     print('\nYou are now a Rockstar Programmer!')
Ejemplo n.º 7
0
 def make_me_a_rockstar(self):
     self.repo = git.Repo.init(self.repo_path)
     progress_msg = 'Making you a Rockstar Programmer'
     bar = Bar(progress_msg, suffix='%(percent)d%%')
     for commit_date in bar.iter(self._get_dates_list()):
         self._edit_and_commit(str(uuid.uuid1()), commit_date)
     self._make_last_commit()
     print('\nYou are now a Rockstar Programmer!')
Ejemplo n.º 8
0
def main():
    dialect = csv.Sniffer().sniff(EJEMPLO)
    reader = csv.reader(open(sys.argv[1]), dialect=dialect)
    writer = csv.DictWriter(open('productos.csv', 'w'), fieldnames=PRODUCTO_COLS)
    writer.writeheader()
    bar = Bar('Normalizando CSV', suffix='%(percent)d%%')
    for l in bar.iter(reader):
        data = normalizar(dict(zip(headers, l)))
        writer.writerow(data)
Ejemplo n.º 9
0
def main():
    dialect = csv.Sniffer().sniff(EJEMPLO)
    reader = csv.reader(open(sys.argv[1]), dialect=dialect)
    writer = csv.DictWriter(open('productos.csv', 'w'),
                            fieldnames=PRODUCTO_COLS)
    writer.writeheader()
    bar = Bar('Normalizando CSV', suffix='%(percent)d%%')
    for l in bar.iter(reader):
        data = normalizar(dict(zip(headers, l)))
        writer.writerow(data)
Ejemplo n.º 10
0
    def tune_model(self):
        """
        Tunes self.model using a GridSearch.
        """

        grid = {
            'ridge': ParameterGrid({
                'alpha': [i / 2. for i in range(1, 21)],
            }),
            'svc': ParameterGrid({
                'C': [i / 2. for i in range(1, 21)],
                'gamma': [i / 10. for i in range(1, 10)]
            }),
            'svr': ParameterGrid({
                'C': [i / 2. for i in range(1, 21)],
                'epsilon': [i / 10. for i in range(11)]
            }),
            'rforest': ParameterGrid({
                'n_estimators': range(25, 501, 25),
                'max_depth': range(2, 10),
                'min_samples_split': range(5, 101, 5)
            })
        }[self.method]

        best = {'params': None, 'score': 0}

        bar = Bar(message='Searching...',
                  width=40,
                  suffix='%(percent)d%%')
        for params in bar.iter(grid):
            # RUNTIME REDUCER
#            if np.random.uniform() > 0.975:
#                break
            score = cross_val_score(
                self.model(**params),
                self.dirty_X,
                np.squeeze(self.dirty_y),
                cv=8,
                n_jobs=4
            ).mean()
            if score > best['score']:
                best.update({
                    'params': params,
                    'score': score
                })

        bestmod = self.model(
            n_jobs=-1,
            **best['params']
        )
        bestmod.fit(
            self.dirty_X,
            np.squeeze(self.dirty_y)
        )
        self.bestmod = bestmod
Ejemplo n.º 11
0
    def all_pers(self):
        """
        Calculates PER for the whole dataset. See per, team_pers for
        method on calculating PER.
        """


        bar = Bar(width=40)
        print 'Calculating PERs...'
        return pd.concat(
            [self.team_pers(s) for s in bar.iter(self.seasons)]
        ).groupby(level=self.data.index.names).first()
Ejemplo n.º 12
0
    def handle(self, *args, **options):
        if len(args) == 0 or not args[0]:
            raise CommandError("No path is provided.")

        path = args[0]
        bar = Bar("Importing...",
                  suffix="%(percent)d%% %(index)d/%(max)d ETA %(eta_td)s")
        for filename in bar.iter(os.listdir(path)):
            basename = os.path.basename(filename)
            filename = os.path.join(path, filename)
            try:
                self.do_file(filename)
            except Exception as e:
                traceback.print_exc()
                raise CommandError("{0}: {1}".format(basename, e))
Ejemplo n.º 13
0
def update_plugin():
    setup_repo()
    if args.zip_name.lower() == 'all':
        zips = [
            z for z in os.listdir(repo.upload_dir)
            if (os.path.isfile(os.path.join(repo.upload_dir, z))
                and z.lower().endswith('.zip'))
        ]
    else:
        zips = [args.zip_name]

    if not zips:
        if args.zip_name.lower() == 'all':
            print('No plugins archives found in uploads directory')
        else:
            print('No plugin archive name defined')
        return False

    repo.output = False  # nix qgis_repo output, since using progress bar
    up_bar = Bar("Updating plugins in '{0}'".format(repo.repo_name),
                 fill='=',
                 max=len(zips))
    up_bar.start()
    for i in up_bar.iter(range(0, len(zips))):
        try:
            repo.update_plugin(zips[i],
                               name_suffix=args.name_suffix,
                               auth=args.auth,
                               auth_role=args.auth_role,
                               git_hash=args.git_hash,
                               versions=args.versions,
                               keep_zip=args.keep_zip,
                               untrusted=args.untrusted,
                               invalid_fields=args.invalid_fields)
        except KeyboardInterrupt:
            return False

    if args.sort_xml:
        print("Sorting repo plugins.xml")
        post_sort = QgisPluginTree.plugins_sorted_by_name(
            repo.plugins_tree.plugins())
        repo.plugins_tree.set_plugins(post_sort)

    return True
Ejemplo n.º 14
0
def update_plugin():
    setup_repo()
    if args.zip_name.lower() == 'all':
        zips = [z for z in os.listdir(repo.upload_dir)
                if (os.path.isfile(os.path.join(repo.upload_dir, z))
                    and z.lower().endswith('.zip'))]
    else:
        zips = [args.zip_name]

    if not zips:
        if args.zip_name.lower() == 'all':
            print('No plugins archives found in uploads directory')
        else:
            print('No plugin archive name defined')
        return False

    repo.output = False  # nix qgis_repo output, since using progress bar
    up_bar = Bar("Updating plugins in '{0}'".format(repo.repo_name),
                 fill='=', max=len(zips))
    up_bar.start()
    for i in up_bar.iter(range(0, len(zips))):
        try:
            repo.update_plugin(
                zips[i],
                name_suffix=args.name_suffix,
                auth=args.auth,
                auth_role=args.auth_role,
                git_hash=args.git_hash,
                versions=args.versions,
                keep_zip=args.keep_zip,
                untrusted=args.untrusted,
                invalid_fields=args.invalid_fields
            )
        except KeyboardInterrupt:
            return False

    if args.sort_xml:
        print("Sorting repo plugins.xml")
        post_sort = QgisPluginTree.plugins_sorted_by_name(
            repo.plugins_tree.plugins())
        repo.plugins_tree.set_plugins(post_sort)

    return True
Ejemplo n.º 15
0
    def score_friends(self, friends, agg=True):
        if not friends:
            raise ValueError('{} has no friends.'.format(self.user))

        users = [
            NetworkParser(u, self.path_to_keys, self.ntweets)
            for u in friends
        ]

        bar = Bar(
            width=40,
            suffix='%(percent)d%%'
        )
        print 'Scoring {}\'s network...'.format(self.user)
        tweet_scores = pd.concat(
            [u.mean_scores(agg=agg) for u in bar.iter(users)]
        )

        return tweet_scores.groupby('user').mean().reset_index()
Ejemplo n.º 16
0
def main():
    prods13 = Producto.objects.extra(where=["CHAR_LENGTH(upc) = 13"])
    prods12 = Producto.objects.extra(where=["CHAR_LENGTH(upc) = 12"])

    antes = DescripcionAlternativa.objects.count()
    bar = Bar('Migrando', suffix='%(percent)d%%')
    with transaction.atomic():
        for p13 in bar.iter(prods13):
            try:
                p12 = prods12.get(upc=p13.upc[:12])
            except Producto.DoesNotExist:
                continue

            p13.agregar_descripcion(descripcion=p12.descripcion, ignorar=True)
            for precio in p12.precios.all():
                precio.producto = p13
                precio.save(update_fields=['producto'])
            p12.delete()
    despues = DescripcionAlternativa.objects.count()
    print "se unificaron %d productos" % (despues - antes)
Ejemplo n.º 17
0
def main():
    prods13 = Producto.objects.extra(where=["CHAR_LENGTH(upc) = 13"])
    prods12 = Producto.objects.extra(where=["CHAR_LENGTH(upc) = 12"])

    antes = DescripcionAlternativa.objects.count()
    bar = Bar('Migrando', suffix='%(percent)d%%')
    with transaction.atomic():
        for p13 in bar.iter(prods13):
            try:
                p12 = prods12.get(upc=p13.upc[:12])
            except Producto.DoesNotExist:
                continue

            p13.agregar_descripcion(descripcion=p12.descripcion,
                                    ignorar=True)
            for precio in p12.precios.all():
                precio.producto = p13
                precio.save(update_fields=['producto'])
            p12.delete()
    despues = DescripcionAlternativa.objects.count()
    print "se unificaron %d productos" % (despues - antes)
Ejemplo n.º 18
0
 def _non_forest_plot(self, pen_params):
     bar = Bar(
         width=40,
         suffix='%(percent)d%%'
     )
     values = pen_params.values()[0]
     print 'Getting errors for {}...'.format(self.method)
     errors = np.array([
                           (alpha, 1 - self.k_fold_results(**{
                               pen_params.keys()[0]: alpha
                           }).mean()) for alpha in bar.iter(values)
                           ])
     fig, ax = plt.subplots()
     ax.plot(errors[:, 0], errors[:, 1])
     ax.set_title('{} test error rate'.format(self.method))
     ax.set_xlabel(
         'penalty parameter {}'.format(pen_params.keys()[0])
     )
     ax.set_ylabel('test error')
     plt.savefig('test_error_{METHOD}_{PARAM}.png'.format(
         METHOD=self.method,
         PARAM=pen_params.keys()[0]
     ))
Ejemplo n.º 19
0
    def handle(self, *args, **options):
        if len(args) != 1:
            raise CommandError('dame el geojson, pa')

        geojson = args[0]
        if geojson.startswith('http'):
            fh = urllib2.urlopen(geojson)
        else:
            fh = open(args[0])
        self.data = json.load(fh)

        suc_dir = os.path.join(settings.DATASETS_ROOT, 'sucursales')
        if not os.path.exists(suc_dir):
            os.makedirs(suc_dir)

        FILENAME = self.FILENAME % datetime.now().strftime("%Y-%m-%d-%H%M%S")
        FILENAME = os.path.join(suc_dir, FILENAME)
        writer = unicodecsv.DictWriter(open(FILENAME, 'wb'),
                                       fieldnames=self.get_columnas())
        writer.writeheader()
        bar = Bar('Convirtiendo ', suffix='%(percent)d%%')
        for feature in bar.iter(self.entrada()):
            sucursal = self.parse_sucursal(feature)
            writer.writerow(sucursal)
Ejemplo n.º 20
0
    args = parser.parse_args()
    seasons = sorted(args.seasons)


    sel = ['date','team','o:team','game_number','t:points','to:points', #indexing and general
           't:minutes','t:three pointers made','t:assists','t:field goals made',
           't:turnovers','t:field goals attempted','t:free throws attempted','t:free throws made',
           't:defensive rebounds','t:offensive rebounds','t:steals','t:blocks','t:fouls',#PER volume stats
           'to:defensive rebounds','to:offensive rebounds','to:free throws attempted','to:turnovers', #PER pace stats
           't:three pointers attempted','t:LSP','to:field goals attempted','to:field goals made',
           'to:three pointers made', 'to:three pointers attempted',#for clustering
           't:ats margin','t:site']#gambling stats

    print 'Fetching game data...'
    bar = Bar(width=40)
    for season in bar.iter(seasons):
        season_team_game_stats(sel, season)

    data = pd.concat(
        [data_from_json(season) for season in seasons]
    )
    # assumes 0 minute games were a normal length and just errors in
    # data entry
    data['t.minutes'].replace(0,240,inplace = True)

    data.to_pickle('data/team_data_{0}_{1}.pkl'.format(
        seasons[0], seasons[-1]
    ))
    data.to_csv('data/team_data_{0}_{1}.csv'.format(
        seasons[0], seasons[-1]
    ))
Ejemplo n.º 21
0
def generate(chordlist,
             definitions,
             destdir="chords",
             template="external_chord.svg.j2"):
    """
    Generate chord diagrams based on a definitions file

    Args:
        chordlist(list [str]): list of chord names to generate
        definitions(dict): dictionary describing chords (fret positions etc)

    Kwargs:
        destdir(str): output directory  for chord diagrams
    """
    if not os.path.isdir(destdir):
        try:
            os.makedirs(destdir)
        except (IOError, OSError) as E:
            print("Cannot create output directory {0.filename} (0.strerror}".
                  format(E))
            destdir = os.path.realpath(os.curdir)

    cfg = {}

    try:
        with codecs.open('fretboard.yml', mode="r", encoding="utf-8") as cfile:
            cfg.update(yaml.safe_load(cfile))
    except:
        raise

    env = Environment(loader=FileSystemLoader('templates'))
    tpl = env.get_template(template)

    missing = set([])

    print("progress")
    pbar = Bar("{:20}".format("Rendering Chords:"), max=len(chordlist))
    try:
        for chordname in pbar.iter(chordlist):
            if chordname in definitions:
                ch = definitions.get(chordname)
            else:
                altname = get_alt_name(chordname)
                ch = definitions.get(chordname)

            if ch is None:
                missing.add(chordname)
                continue

            if 'name' not in ch:
                ch['name'] = symbolise(chordname)

            # replaces characters that cause shell problems
            chordfile = safe_name(chordname)

            with codecs.open("{}/{}.svg".format(destdir, chordfile),
                             mode='w',
                             encoding="utf-8") as output:
                output.write(tpl.render(merge_ctx(cfg, **ch)))
    except:
        print("Failed to render {}".format(chordname))
        raise

    return missing
Ejemplo n.º 22
0
def with_progress_bar(queryset, message='', total=None):
    progress_bar = Bar(message, max=total or queryset.count())
    for instance in progress_bar.iter(queryset):
        yield instance
    progress_bar.finish()
Ejemplo n.º 23
0
from progress.bar import Bar

it = []

bar = Bar("test")
for elem in bar.iter(it):
    pass
Ejemplo n.º 24
0
def mirror_repo():
    setup_repo()
    mirror_temp = 'mirror-temp'
    mirror_dir = os.path.join(SCRIPT_DIR, mirror_temp)
    merge_xml = 'merged.xml'

    if args.only_download and args.skip_download:
        print('Both --only-download and --skip-download specified! '
              'Choose either, but not both.')
        return False

    if args.skip_download:
        tree = QgisPluginTree(os.path.join(mirror_dir, merge_xml))
    else:
        xml_url = args.plugins_xml_url
        if not xml_url or not xml_url.lower().endswith('.xml'):
            print('Missing plugins.xml or URL does not end with .xml')
            return False
        url_parts = urlparse(xml_url)
        b_name = '{0}_{1}'.format(
            url_parts.hostname.replace('.', '-'),
            os.path.splitext(os.path.basename(xml_url))[0])

        if not os.path.exists(mirror_dir):
            os.mkdir(mirror_dir)
        repo.remove_dir_contents(mirror_dir, strict=False)

        q_vers = args.qgis_versions.replace(' ', '').split(',') \
            if args.qgis_versions is not None else None
        if q_vers is None:
            urls = [xml_url]
            names = ['{0}.xml'.format(b_name)]
        else:
            urls = ['{0}?qgis={1}'.format(xml_url, v)
                    for v in q_vers]
            names = ['{0}_{1}.xml'.format(b_name, v.replace('.', '-'))
                     for v in q_vers]

        tree = QgisPluginTree()
        dl_bar = Bar('Downloading/merging xml', fill='=', max=len(urls))
        dl_bar.start()
        try:
            for i in dl_bar.iter(range(0, len(urls))):
                out_xml = os.path.join(mirror_dir, names[i])
                download(urls[i], out=out_xml, bar=None)
                tree.merge_plugins(out_xml)
        except KeyboardInterrupt:
            return False

        print("Sorting merged plugins")
        name_sort = QgisPluginTree.plugins_sorted_by_name(tree.plugins())
        tree.set_plugins(name_sort)

        xml = tree.to_xml()

        print("Writing merged plugins to '{0}/{1}'".format(mirror_temp,
                                                           merge_xml))
        with open(os.path.join(mirror_dir, merge_xml), 'w') as f:
            f.write(xml)
        if args.only_xmls:
            return True

    downloads = {}
    elements = {}
    for p in tree.plugins():
        dl_url = p.findtext("download_url")
        file_name = p.findtext("file_name")
        if all([file_name, dl_url, dl_url not in downloads]):
            downloads[file_name] = dl_url
            elements[file_name] = p
            # for testing against plugins.qgis.org
            # if len(downloads) == 10:
            #     break

    if not args.skip_download:
        repo.remove_dir_contents(repo.upload_dir)

        dl_bar = Bar('Downloading plugins', fill='=', max=len(downloads))
        dl_bar.start()
        try:
            for f_name, dl_url in dl_bar.iter(downloads.iteritems()):
                out_dl = os.path.join(repo.upload_dir, f_name)
                download(dl_url, out=out_dl, bar=None)
        except KeyboardInterrupt:
            return False

    if args.only_download:
        print("Downloads complete, exiting since --only-download specified")
        return True

    zips = [z for z in os.listdir(repo.upload_dir)
            if (os.path.isfile(os.path.join(repo.upload_dir, z))
                and z.lower().endswith('.zip'))]
    if not zips:
        print('No plugins archives found in uploads directory')
        return False

    repo.output = False  # nix qgis_repo output, since using progress bar
    up_bar = Bar("Adding plugins to '{0}'".format(repo.repo_name),
                 fill='=', max=len(downloads))
    up_bar.start()
    try:
        for zip_name in up_bar.iter(downloads.iterkeys()):
            repo.update_plugin(
                zip_name,
                name_suffix=args.name_suffix,
                auth=args.auth,
                auth_role=args.auth_role,
                # don't remove existing or just-added plugins when mirroring
                versions='none',
                untrusted=True,
                invalid_fields=(not args.validate_fields)
            )
            # plugins are 'untrusted,' until overwritten with mirrored repo data
    except KeyboardInterrupt:
        return False

    print("Sort plugins in '{0}'".format(repo.repo_name))
    # Sorting is the right thing to do here, plus...
    # Helps ensure 'startswith' finding of plugins will find earliest occurrance
    # of a partial version, e.g. plugin.1.0 is found before plugin.1.0.1
    init_sort = QgisPluginTree.plugins_sorted_by_name(
        repo.plugins_tree.plugins())
    repo.plugins_tree.set_plugins(init_sort)

    up_bar = Bar("Updating '{0}' plugins with mirrored repo data"
                 .format(repo.repo_name),
                 fill='=', max=len(elements))
    up_bar.start()
    cp_tags = ['about', 'average_vote', 'author_name', 'create_date',
               'deprecated', 'description', 'downloads', 'experimental',
               'external_dependencies', 'homepage', 'rating_votes',
               'repository', 'tags', 'tracker', 'trusted', 'update_date',
               'uploaded_by']
    maybe_missing = []
    needs_resorted = False
    try:
        for file_name, el in up_bar.iter(elements.iteritems()):
            nam, _ = os.path.splitext(file_name)
            p = repo.plugins_tree.find_plugin_by_package_name(nam,
                                                              starts_with=True)
            if not p:  # maybe the base version has been adjusted, try again
                temp_nam = re.sub(r'((\d+\.)?(\d+\.)?(\d+))', r'.\1', nam)
                p = repo.plugins_tree.find_plugin_by_package_name(
                    temp_nam, starts_with=True)
            if not p:
                maybe_missing.append(file_name)
                continue
            else:
                p = p[0]

            # print("Updating '{0}'...".format(p[0].get('name')))
            for tag in cp_tags:
                tag_el = el.find(tag)
                tag_p = p.find(tag)
                if tag_el is not None and tag_p is not None:
                    txt = tag_el.text
                    # print("  {0}: {1} <- {2}".format(tag, tag_p.text, txt))
                    if tag in QgisPlugin.metadata_types('cdata'):
                        if tag_el.text is not None:
                            txt = etree.CDATA(tag_el.text)
                    tag_p.text = txt
            # update plugin name
            ns = args.name_suffix if args.name_suffix is not None \
                else repo.plugin_name_suffix
            if el.get('name') is not None:
                el_name = u"{0}{1}".format(el.get('name'), ns)
                if p.get('name') != el_name:
                    needs_resorted = True
                    p.set('name', el_name)
    except KeyboardInterrupt:
        return False

    if needs_resorted:
        print("Re-sorting plugins in '{0}'".format(repo.repo_name))
        re_sort = QgisPluginTree.plugins_sorted_by_name(
            repo.plugins_tree.plugins())
        repo.plugins_tree.set_plugins(re_sort)

    print("Writing '{0}' {1}".format(repo.repo_name, repo.plugins_xml_name))
    repo.write_plugins_xml(repo.plugins_tree_xml())

    print('\nDone mirroring...')

    print("Plugin results:\n  attempted: {0}\n  mirrored: {1}"
          .format(len(tree.plugins()), len(repo.plugins_tree.plugins())))

    if maybe_missing:
        print('\nWARNING (version conflicts): plugins downloaded but MAY not '
              'be in XML after update:\n  {0}\n'
              .format(', '.join(maybe_missing)))

    return True
def crack_compression(secret_length=16,
                      matched=b"",
                      target=0,
                      working_product=None,
                      candidate_length=1):
    global total_compressions

    # If we've got enough characters, go on and exit out
    if len(matched) >= secret_length:
        return matched,

    # If we don't already have a list of stuff to begin with, then start with printables
    if working_product is None:
        current_product = set(CHARSET[:])

    # Otherwise try every combination of printables with the best matches from the parent caller
    else:
        current_product = set(product(working_product, CHARSET))

        current_product = set(
            reduce(lambda a, b: a + b, item) for item in current_product)

    # Get our baseline size
    if target == 0:
        c = zlib.compressobj()

        target = len(c.compress(TOTAL + matched) + c.flush(zlib.Z_SYNC_FLUSH))

    # What we already know is matched
    print("Current matched: %s" % matched)

    # Candidate length
    print("Trying candidates of length: %s" % candidate_length)

    # Total number of things to try
    # Num of previous matches * len(printables)
    # Or just len(printables)
    print("Number of permutations: %s" % len(current_product))

    # Target to achieve. This is the length that was achieved the last try
    print("Target: %s" % target)

    # This is going to be a dictionary of all the compressed lengths of our tries
    length_dict = {}

    # Loading bar
    loading = Bar("Processing")

    # For every group of characters
    for group in loading.iter(current_product):
        total_compressions += 1

        # Compress it and add the length to our table
        c = zlib.compressobj()

        grouplen = len(
            c.compress(TOTAL + matched + group) + c.flush(zlib.Z_SYNC_FLUSH))

        length_dict[group] = grouplen

    # Best length
    # Can be shared by multiple keys
    best_length = min(length_dict.values())

    # Grab all our keys that have the min length
    best_keys = list(
        filter(lambda x: length_dict[x] == best_length, length_dict.keys()))

    best_keys.sort()

    # Number of keys
    # If it's one then we've matched a character
    unique = len(best_keys)

    # Print some data
    print("Best length is: %s" % best_length)

    print("Number of candidates: %s" % unique)

    print("Candidates: %s" % best_keys[:256])

    # Only one character is shorter
    # Huzzah!
    if unique == 1:
        print("Found Unique!")
        print()

        # Now work from what we already had matched + what we just matched
        matched = matched + list(best_keys)[0]

        return crack_compression(secret_length, matched, best_length)

    elif candidate_length >= 8 * secret_length:
        return best_keys

    # Multiple things could work
    # Try and distinguish with another character
    else:
        print("No Unique!")
        print()

        if unique <= 5:
            second_best_keys = list(
                filter(lambda x: length_dict[x] == best_length + 1,
                       length_dict.keys()))

            second_best_keys.sort()

            best_keys.extend(second_best_keys[:30])

        # Same as above except we're not matching anything
        # Instead we're supplying all our possible matches so we can brute force them with another character
        return crack_compression(secret_length, matched, best_length,
                                 best_keys, candidate_length + 1)
Ejemplo n.º 26
0
main_repo = git.Repo('.')

# Update the main repository.
main_repo.git.fetch('origin')

# Gather the list of merged MRs.
MERGE_REQUEST_TRAILER_PREFIX = 'Merge-request: !'
merged_mr_ids = set()
branch_point = None

bar = Bar('Searching for merged merge requests... %(hexsha)s')
bar.hexsha = '0000000000000000000000000000000000000000'
parents = main_repo.git.rev_list('--first-parent', '--min-parents=2',
                                 'origin/release')

for sha in bar.iter(parents.split('\n')):
    sha = sha.rstrip()
    if not sha:
        continue
    commit = main_repo.commit(sha)
    bar.hexsha = commit.hexsha
    # See if we're still tracking merges into the release branch.
    if not commit.summary.endswith('into release') and branch_point is None:
        branch_point = commit.hexsha
    for line in commit.message.split('\n'):
        if line.startswith(MERGE_REQUEST_TRAILER_PREFIX):
            mr_id = int(line[len(MERGE_REQUEST_TRAILER_PREFIX):])
            merged_mr_ids.add(mr_id)

bar = Bar('Searching for the first ineligible commit... %(hexsha)s')
bar.hexsha = '0000000000000000000000000000000000000000'
Ejemplo n.º 27
0
 def generator(n):
     bar = Bar(msg)
     for i in bar.iter(range(int(n))):
         yield
     yield
Ejemplo n.º 28
0
    def rate_stats(self):
        """
        Calculates rate stats for each team through self.as_of.

        Offensive rate stats are all per 100 possessions.
        Defensive rate stats are a mix of per 100 possessions and
        of success rates.

        OFFENSE
        - assist rate
        - three point attempt rate
        - free throw attempt rate
        - field goal attempt rate

        DEFENSE
        - steals + blocks
        - three point attempts allowed
        - fouls
        - field goal percentage allowed
        """

        idx = pd.IndexSlice
        working = pd.read_sql(
            """
            SELECT season, team, date, "o.team",
            "t.assists", "t.three_pointers_attempted",
            "t.free_throws_attempted", "t.field_goals_attempted",
            "t.steals", "t.blocks", "t.fouls", "to.three_pointers_attempted",
            "to.field_goals_made", "to.field_goals_attempted" FROM game_data;
            """,
            self.db
        )

        working['date'] = working['date'].apply(self.make_date)
        working.set_index(['season', 'team', 'date'], inplace=True)
        working.sort(inplace=True)

        print 'Calculating rate stats...'

        bar = Bar(
            width=40,
            suffix='%(percent)d%%'
        )

        for i in bar.iter(working.index):
            working.loc[i, 'o.poss'] = self.poss_table.loc[
                idx[i[0], self.poss_table.loc[i, 'o.team'], i[2]],
                'poss'
            ]

        o_stats = pd.DataFrame(
            {
                'assist_rate': working['t.assists'] /
                               self.poss_table['poss'] * 100,
                '3p_attempt_rate': working['t.three_pointers_attempted'] /
                                   self.poss_table['poss'] * 100,
                'FT_attempt_rate': working['t.free_throws_attempted'] /
                                   self.poss_table['poss'] * 100,
                'FG_attempt_rate': working['t.field_goals_attempted'] /
                                   self.poss_table['poss'] * 100
            }
        ).sort()

        d_stats = pd.DataFrame(
            {
                'st+bl_rate': (working['t.steals'] + working['t.blocks']) /
                              working['o.poss'] * 100,
                '3PA_allowed': working['to.three_pointers_attempted'] /
                               working['o.poss'] * 100,
                'foul_rate': working['t.fouls'] / working['o.poss'] * 100,
                'FG%_allowed': working['to.field_goals_made'] /
                               working['to.field_goals_attempted'] * 100
            }
        ).sort()

        return [o_stats, d_stats]
Ejemplo n.º 29
0
    def team_pers(self, s):
        """
        Returns (season, team, date)-indexed team efficiency ratings
        for dataset through self.as_of for season.

        Team efficiency rating calculated as player efficiency rating,
        but for whole teams. Player efficiency rating calculation taken
        from:
        http://www.basketball-reference.com/about/per.html
        """

        idx = pd.IndexSlice
        try:
            working = self.data.xs(s, level='season')
        except KeyError:
            return pd.DataFrame()
        if len(working.dropna(how='any')) == 0:
            return pd.DataFrame()
        dates = working.index.get_level_values('date').unique()
        teams = working.index.get_level_values('team').unique()
        factors, vops, drb_percs, lg_pace = self.season_per_adjusters(
            s
        )

        # limits dates to those not already present in database
        # if latest handles case where table exists but season has not
        # yet been processed
        try:
            yearscheck = pd.read_sql(
                'SELECT * FROM aPERs WHERE season={};'.format(s),
                self.db
            )
            if s in yearscheck['season']:
                latest = self.get_max_date(s, 'aPERs')
                if latest:
                    dates = dates[dates > latest]
        except sql.OperationalError:
            pass

        if len(dates) == 0:
            out = pd.read_sql(
                """
                SELECT * FROM aPERs WHERE season={};
                """.format(s),
                self.db
            )

            out['date'] = out['date'].apply(
                lambda x: dt.datetime(
                    *time.strptime(x, '%Y-%m-%d %H:%M:%S')[:6]
                )
            )

            return out.set_index(['season', 'team', 'date'])

        bar = Bar(
            width=40,
            suffix='%(percent)d%%'
        )

        print 'Calculating PERs for season {}...'.format(s)

        out = pd.concat(
            [self.per(s, t, d, factors, vops, drb_percs, lg_pace)
             for t, d in bar.iter(
                [(te, da) for te in teams for da in dates]
            )]
        ).sort()

        out.to_sql('aPERs', self.db, if_exists='append')

        return out