Beispiel #1
0
def run():
    for model, num_to_create in to_create.items():
        model_name = model._meta.model_name
        bar = Bar('Creating {}'.format(model_name), max=num_to_create)
        model_count = model.objects.count()
        create_f = globals()['populate_{}'.format(model_name)]

        for i in range(num_to_create):
            ident = '{}{}'.format(model_name, i)
            if i < model_count:
                unit = model.objects.all()[i]
            else:
                unit = create_f(model, i)
            globals()[ident] = unit
            bar.next()
        bar.finish()

    # This bit is special: Associate all rpms with the first repo,
    # for maximum relational query fun
    
    num_units = platform.ContentUnit.objects.count() 
    repo = globals()['repository0']
    bar = Bar('Adding all units to {} repo'.format(repo.slug))
    bar.max = num_units
    for unit in platform.ContentUnit.objects.all():
        repo.add_units(unit)
        bar.next()
    bar.finish()
Beispiel #2
0
def mismas_features_distinto_humor(corpus):
    print("Buscando tweets con mismos valores de features pero distinto de humor...")

    humoristicos = [tweet for tweet in corpus if tweet.es_humor]
    no_humoristicos = [tweet for tweet in corpus if not tweet.es_humor]

    res = []

    bar = IncrementalBar("Buscando en tweets\t\t", max=len(humoristicos) * len(no_humoristicos),
                         suffix=SUFIJO_PROGRESS_BAR)
    bar.next(0)
    for tweet_humor in humoristicos:
        for tweet_no_humor in no_humoristicos:
            if tweet_humor.features == tweet_no_humor.features:
                res.append((tweet_humor, tweet_no_humor))
                if tweet_humor.texto_original == tweet_no_humor.texto_original:
                    print("-----MISMO TEXTO ORIGINAL------")
                if tweet_humor.texto == tweet_no_humor.texto:
                    print("----------MISMO TEXTO----------")
                if tweet_humor.id == tweet_no_humor.id:
                    print("-----------MISMO ID------------")
                if tweet_humor.cuenta == tweet_no_humor.cuenta:
                    print("----------MISMA CUENTA---------")
                print('')
                print(tweet_humor.id)
                print(tweet_humor.texto)
                print("------------")
                print(tweet_no_humor.id)
                print(tweet_no_humor.texto)
                print("------------")
                print('')
            bar.next()
    bar.finish()

    return res
Beispiel #3
0
 def calcular_feature_thread(self, tweets, nombre_feature, identificador):
     if len(tweets) > 0:
         bar = IncrementalBar("Calculando feature " + nombre_feature + ' - ' + unicode(identificador),
                              max=len(tweets),
                              suffix=SUFIJO_PROGRESS_BAR)
         bar.next(0)
         feature = self.features[nombre_feature]
         self.abortar_si_feature_no_es_thread_safe(feature)
         for tweet in tweets:
             tweet.features[feature.nombre] = feature.calcular_feature(tweet)
             bar.next()
         bar.finish()
Beispiel #4
0
def save_frames(source, vertices, images_dir):
    print('Saving frames...')
    if not os.path.isdir(images_dir):
        os.makedirs(images_dir)
    bar = IncrementalBar(max=len(vertices))
    angle_change = 360 // len(vertices)
    for i, v in enumerate(vertices):
        update(source, v, angle_change=angle_change)
        mlab.savefig(filename=os.path.join(images_dir, frame_fn(i)))
        bar.next()
    bar.finish()
    mlab.close()
Beispiel #5
0
 def calcular_features_thread(self, tweets, identificador):
     if len(tweets) > 0:
         bar = IncrementalBar("Calculando features - " + unicode(identificador),
                              max=len(tweets) * len(self.features),
                              suffix=SUFIJO_PROGRESS_BAR)
         bar.next(0)
         for tweet in tweets:
             for feature in list(self.features.values()):
                 self.abortar_si_feature_no_es_thread_safe(feature)
                 tweet.features[feature.nombre] = feature.calcular_feature(tweet)
                 bar.next()
         bar.finish()
def guardar_parecidos_con_distinto_humor(pares_parecidos_distinto_humor):
    with closing(open_db()) as conexion:
        with closing(conexion.cursor()) as cursor:
            consulta = "INSERT INTO tweets_parecidos_distinto_humor VALUES (%s, %s)" \
                       + " ON DUPLICATE KEY UPDATE id_tweet_no_humor = %s"

            bar = IncrementalBar("Guardando tweets parecidos\t", max=len(pares_parecidos_distinto_humor),
                                 suffix=SUFIJO_PROGRESS_BAR)
            bar.next(0)

            for tweet_humor, tweet_no_humor in pares_parecidos_distinto_humor:
                cursor.execute(consulta, (tweet_humor.id, tweet_no_humor.id, tweet_no_humor.id))
                bar.next()

            conexion.commit()
            bar.finish()
def cross_validation_y_reportar(clasificador, features, clases, numero_particiones):
    skf = cross_validation.StratifiedKFold(clases, n_folds=numero_particiones)
    features = np.array(features)
    clases = np.array(clases)
    matrices = []
    medidas = defaultdict(list)

    bar = IncrementalBar("Realizando cross-validation\t", max=numero_particiones, suffix=SUFIJO_PROGRESS_BAR)
    bar.next(0)
    for entrenamiento, evaluacion in skf:
        clasificador.fit(features[entrenamiento], clases[entrenamiento])
        clases_predecidas = clasificador.predict(features[evaluacion])
        matriz_de_confusion = metrics.confusion_matrix(clases[evaluacion], clases_predecidas).flatten()
        matrices.append(matriz_de_confusion)
        for medida, valor_medida in calcular_medidas(*matriz_de_confusion).items():
            medidas[medida].append(valor_medida)
        bar.next()

    bar.finish()

    promedios = {}

    print('')
    print("Resultados de cross-validation:")
    print('')
    for medida, valor_medida in medidas.items():
        print("\t{medida: >18s}:\t{valor_medida}".format(medida=medida, valor_medida=valor_medida))
        promedio = np.mean(valor_medida)
        promedios[medida] = promedio
        delta = np.std(valor_medida) * 1.96 / math.sqrt(numero_particiones)
        print("Intervalo de confianza 95%:\t{promedio:0.4f} ± {delta:0.4f} --- [{inf:0.4f}, {sup:0.4f}]".format(
            promedio=promedio, delta=delta, inf=promedio - delta, sup=promedio + delta))
        print('')

    imprimir_matriz_metricas(
        promedios['Precision No humor'],
        promedios['Recall No humor'],
        promedios['F1-score No humor'],
        promedios['Precision Humor'],
        promedios['Recall Humor'],
        promedios['F1-score Humor'],
    )

    print('')
    print('')
    print('')
Beispiel #8
0
    def render(self, ctx, invert=False, filename=None, pbar=False):
        """ Generate image of layer.

        Parameters
        ----------
        ctx : :class:`GerberContext`
            GerberContext subclass used for rendering the image

        filename : string <optional>
            If provided, save the rendered image to `filename`

        pbar : bool <optional>
            If true, render a progress bar
        """
        ctx.set_bounds(self.bounds)
        ctx._paint_background()

        if invert:
            ctx.invert = True
            ctx._clear_mask()
        for p in self.primitives:
            ctx.render(p)
        if invert:
            ctx.invert = False
            ctx._render_mask()

        _pbar = None
        if pbar:
            try:
                from progress.bar import IncrementalBar
                _pbar = IncrementalBar(
                    self.filename, max=len(self.primitives)
                )
            except ImportError:
                pbar = False

        for p in self.primitives:
            ctx.render(p)
            if pbar:
                _pbar.next()
        if pbar:
            _pbar.finish()

        if filename is not None:
            ctx.dump(filename)
Beispiel #9
0
def guardar_parecidos_con_distinto_humor(pares_parecidos_distinto_humor):
    with closing(mysql.connector.connect(user=DB_USER, password=DB_PASS, host=DB_HOST, database=DB_NAME)) as conexion:
        with closing(conexion.cursor()) as cursor:
            consulta = (
                "INSERT INTO tweets_parecidos_distinto_humor VALUES (%s, %s)"
                + " ON DUPLICATE KEY UPDATE id_tweet_no_humor = %s"
            )

            bar = IncrementalBar(
                "Guardando tweets parecidos\t", max=len(pares_parecidos_distinto_humor), suffix=SUFIJO_PROGRESS_BAR
            )
            bar.next(0)

            for tweet_humor, tweet_no_humor in pares_parecidos_distinto_humor:
                cursor.execute(consulta, (tweet_humor.id, tweet_no_humor.id, tweet_no_humor.id))
                bar.next()

            conexion.commit()
            bar.finish()
Beispiel #10
0
def _create_unfilled_voxel_data(
        model_id, edge_length_threshold=0.1, voxel_config=None,
        overwrite=False, example_ids=None):
    from template_ffd.data.ids import get_example_ids
    from shapenet.core import cat_desc_to_id
    from template_ffd.model import load_params
    import numpy as np
    from progress.bar import IncrementalBar
    if voxel_config is None:
        voxel_config = _default_config
    cat_id = cat_desc_to_id(load_params(model_id)['cat_desc'])
    if example_ids is None:
        example_ids = get_example_ids(cat_id, 'eval')
    mesh_dataset = get_inferred_mesh_dataset(model_id, edge_length_threshold)
    voxel_dataset = get_voxel_dataset(
        model_id, edge_length_threshold, voxel_config, filled=False,
        auto_save=False)
    if not overwrite:
        example_ids = [i for i in example_ids if i not in voxel_dataset]
    if len(example_ids) == 0:
        return
    print('Creating %d voxels for model %s' % (len(example_ids), model_id))

    kwargs = dict(
        voxel_dim=voxel_config.voxel_dim,
        exact=voxel_config.exact,
        dc=voxel_config.dc,
        aw=voxel_config.aw)

    with mesh_dataset:
        bar = IncrementalBar(max=len(example_ids))
        for example_id in example_ids:
            bar.next()
            mesh = mesh_dataset[example_id]
            vertices, faces = (
                np.array(mesh[k]) for k in ('vertices', 'faces'))
            binvox_path = voxel_dataset.path(example_id)
            # x, z, y = vertices.T
            # vertices = np.stack([x, y, z], axis=1)
            bio.mesh_to_binvox(
                vertices, faces, binvox_path, **kwargs)
        bar.finish()
Beispiel #11
0
    def render_deferred(self):

        if not len(self._deferred):
            return

        print("Optimizing deferred elements")
        paths = self._optimize_deferred().paths

        print("Rendering Paths")
        try:
            from progress.bar import IncrementalBar
            _pbar = IncrementalBar(max=len(paths))
        except ImportError:
            _pbar = None

        for path in paths:
            self._render_path(path)
            if _pbar:
                _pbar.next()
        if _pbar:
            _pbar.finish()
Beispiel #12
0
def guardar_features(tweets, **opciones):
    nombre_feature = opciones.pop("nombre_feature", None)
    conexion = mysql.connector.connect(user=DB_USER, password=DB_PASS, host=DB_HOST, database=DB_NAME)
    cursor = conexion.cursor()

    consulta = "INSERT INTO features VALUES (%s, %s, %s) ON DUPLICATE KEY UPDATE valor_feature = %s"

    if nombre_feature:
        mensaje = "Guardando feature " + nombre_feature
    else:
        mensaje = "Guardando features"

    bar = IncrementalBar(mensaje, max=len(tweets), suffix=SUFIJO_PROGRESS_BAR)
    bar.next(0)

    for tweet in tweets:
        if nombre_feature:
            cursor.execute(
                consulta,
                (
                    tweet.id,
                    nombre_feature,
                    unicode(tweet.features[nombre_feature]),
                    unicode(tweet.features[nombre_feature]),
                ),
            )
        else:
            for nombre_feature, valor_feature in tweet.features.items():
                cursor.execute(consulta, (tweet.id, nombre_feature, unicode(valor_feature), unicode(valor_feature)))
        bar.next()

    conexion.commit()
    bar.finish()

    cursor.close()
    conexion.close()
Beispiel #13
0
def cargar_parecidos_con_distinto_humor():
    with closing(open_db()) as conexion:
        # buffered=True así sé la cantidad que son antes de iterarlos.
        with closing(conexion.cursor() if DB_ENGINE == 'sqlite3' else conexion.cursor(buffered=True)) as cursor:
            consulta = """
            SELECT id_tweet_humor,
                   id_tweet_no_humor
            FROM   tweets_parecidos_distinto_humor
            """

            cursor.execute(consulta)

            pares_ids_parecidos_con_distinto_humor = []

            bar = IncrementalBar("Cargando tweets parecidos\t", max=cursor.rowcount, suffix=SUFIJO_PROGRESS_BAR)
            bar.next(0)

            for par_ids in cursor:
                pares_ids_parecidos_con_distinto_humor.append(par_ids)
                bar.next()

            bar.finish()

            return pares_ids_parecidos_con_distinto_humor
Beispiel #14
0
def cargar_parecidos_con_distinto_humor():
    with closing(mysql.connector.connect(user=DB_USER, password=DB_PASS, host=DB_HOST, database=DB_NAME)) as conexion:
        # buffered=True así sé la cantidad que son antes de iterarlos.
        with closing(conexion.cursor(buffered=True)) as cursor:
            consulta = """
            SELECT id_tweet_humor,
                   id_tweet_no_humor
            FROM   tweets_parecidos_distinto_humor
            """

            cursor.execute(consulta)

            pares_ids_parecidos_con_distinto_humor = []

            bar = IncrementalBar("Cargando tweets parecidos\t", max=cursor.rowcount, suffix=SUFIJO_PROGRESS_BAR)
            bar.next(0)

            for par_ids in cursor:
                pares_ids_parecidos_con_distinto_humor.append(par_ids)
                bar.next()

            bar.finish()

            return pares_ids_parecidos_con_distinto_humor
Beispiel #15
0
def guardar_features(tweets, **opciones):
    nombre_feature = opciones.pop('nombre_feature', None)
    conexion = open_db()
    cursor = conexion.cursor()

    consulta = "INSERT INTO features VALUES (%s, %s, %s) ON DUPLICATE KEY UPDATE valor_feature = %s"

    if nombre_feature:
        mensaje = 'Guardando feature ' + nombre_feature
    else:
        mensaje = 'Guardando features'

    bar = IncrementalBar(mensaje, max=len(tweets), suffix=SUFIJO_PROGRESS_BAR)
    bar.next(0)

    for tweet in tweets:
        if nombre_feature:
            cursor.execute(
                consulta,
                (
                    tweet.id,
                    nombre_feature,
                    unicode(tweet.features[nombre_feature]),
                    unicode(tweet.features[nombre_feature])
                )
            )
        else:
            for nombre_feature, valor_feature in tweet.features.items():
                cursor.execute(consulta, (tweet.id, nombre_feature, unicode(valor_feature), unicode(valor_feature)))
        bar.next()

    conexion.commit()
    bar.finish()

    cursor.close()
    conexion.close()
Beispiel #16
0
def install(package_list):
    '''
    Install A Specified Package(s)
    '''
    if platform == 'linux' or platform == 'darwin':
        password = getpass('Enter your password: '******''
        # otherwise the variable would be undefined..

    packages = package_list.split(',')
    turbocharge = Installer()

    click.echo('\n')

    os_bar = IncrementalBar('Getting Operating System...', max=1)
    os_bar.next()

    for package_name in packages:
        package_name = package_name.strip(' ')

        if platform == 'linux':
            click.echo('\n')
            finding_bar = IncrementalBar('Finding Requested Packages...',
                                         max=1)

            if package_name in devpackages_linux:
                show_progress(finding_bar)
                turbocharge.install_task(
                    devpackages_linux[package_name],
                    f'{constant.apt_script} {package_name}', password,
                    f'{package_name} --version',
                    [f'{devpackages_linux[package_name]} Version'])

            if package_name in applications_linux:
                show_progress(finding_bar)
                turbocharge.install_task(
                    applications_linux[package_name],
                    f'{constant.snap_script} {package_name}', password, '', [])

            if package_name == 'chrome':
                show_progress(finding_bar)
                try:
                    click.echo('\n')

                    password = getpass("Enter your password: "******"choco install {package_name} -y",
                    password="",
                    test_script=f"{package_name} --version",
                    tests_passed=[
                        f'{devpackages_windows[package_name]} Version'
                    ])

            elif package_name in applications_windows:
                show_progress(finding_bar)
                turbocharge.install_task(
                    package_name=applications_windows[package_name],
                    script=f"choco install {package_name} -y",
                    password="",
                    test_script="",
                    tests_passed=[])

            elif package_name not in devpackages_windows and package_name not in applications_windows:
                click.echo('\n')
                click.echo(click.style(':( Package Not Found! :(', fg='red'))

        if platform == 'darwin':
            click.echo('\n')
            finding_bar = IncrementalBar('Finding Requested Packages...',
                                         max=1)

            if package_name in devpackages_windows:
                show_progress(finding_bar)
                turbocharge.install_task(
                    package_name=devpackages_macos[package_name],
                    script=f"brew install {package_name}",
                    password="",
                    test_script=f"{package_name} --version",
                    tests_passed=[
                        f'{devpackages_macos[package_name]} Version'
                    ])
                # test _scirpt is just a string here..

            elif package_name in applications_windows:
                show_progress(finding_bar)
                turbocharge.install_task(
                    package_name=applications_macos[package_name],
                    script=f"brew cask install {package_name}",
                    password="",
                    test_script="",
                    tests_passed=[])

            elif package_name not in devpackages_macos and package_name not in applications_macos:
                click.echo('\n')
                click.echo(click.style(':( Package Not Found! :(', fg='red'))
Beispiel #17
0
class SysExParser(object):
    def __init__(self,send_func,debug=False):
        super(SysExParser,self).__init__()
        self.send_func  = send_func
        self.debug      = debug
        self.dump_file  = None
        self.dump_on    = False
        self.dump_ram   = False
        self.printer    = MessagePrinter(debug=self.debug)
        self.handlers   = {
            # FILE FUNCTIONS  FILE_F
            "F_DHDR":      self.handleFileDumpHeader,
            "F_DPKT":      self.handleFileDumpDataBlock,
            "DIR_HDR":     self.handleFileDumpHeader,
            "F_WAIT"     : noop,
            "F_CANCEL"   : cancel,
            "F_ERR"      : cancel,
            # DEVICE COMMAND  DEVICE_CMD
            "STAT_ANSWER": self.handleStatusAnswer,
            "DATA_HEADER": self.handleDirectoryAnswer,
            "DATA_DUMP"  : self.handleDataDump,
            "DIR_ANSWER" : self.handleDirectoryAnswer,
            "D_WAIT"     : noop,
            "D_ACK"      : noop,
            "D_CANCEL"   : cancel,
            "D_ERR"      : cancel,
        }

        self.dump_start = [ "F_DREQ", "DATA_REQUEST" ]
        self.dump_stop  = [ "F_CANCEL", "D_CANCEL"]

    def __del__(self):
        self.closeDumpFile()

    def createDumpFile(self,filename=None):
        if not filename:
            timestamp = time.strftime("%Y%m%d%H%M%S")
            filename="dump_%s.bin" % mktimestamp()
        self.dump_file = open(filename,"wb")
        
    def closeDumpFile(self):
        if not self.dump_file: return
        self.dump_file.close()
        self.dump_file = None

    def startDump(self,filename,size):
        if not self.dump_on: return
        self.dump_written = 0
        self.dump_size = size
        self.closeDumpFile()
        self.createDumpFile(filename)
        print "Dumping '%s'" % filename
        showsize = ' 0x%(index)06x' if self.dump_ram else ''
        self.bar = IncrementalBar(
            max=size,
            suffix = '%(percent)d%% [%(elapsed_td)s / %(eta_td)s]' + showsize)

    def stopDump(self):
        if not self.dump_on: return
        self.bar.finish()
        self.closeDumpFile()
        self.dump_on = False
        
    def dump(self,data,filename=None):
        if not self.dump_on: return
        if not self.dump_file:
            self.createDumpFile()
        if self.dump_written == self.dump_size:
            print "Discarding", len(data), "bytes, dump has ended"
        elif len(data) + self.dump_written > self.dump_size:
            discard = len(data) + self.dump_written - self.dump_size
            self.dump_file.write(bytearray(data[:-discard]))
            self.bar.next(self.dump_size-self.dump_written)
            self.dump_written = self.dump_size
            self.bar.finish()
            leftover = data[-discard:]
            for i in leftover:
                if i != 0:
                    print "Discarding non-NUL data:", hexdump(leftover)
                    break
        else:
            self.dump_file.write(bytearray(data))
            self.dump_written += len(data)
            self.bar.next(len(data))
        
    # FILE FUNCTIONS  FILE_F
    def handleFileDumpHeader(self,msg,timestamp):
        self.sendSysEx( MSCEIMessage(fromName="F_WAIT"),timestamp=timestamp+1)
        offset=17
        data = []
        for i in xrange(2):
            data += conv7_8(msg[offset:offset+8])
            offset += 8
        location = ''
        while msg[offset] != 0:
            location += chr(msg[offset])
            offset += 1
        offset+=1
        cc = msg[offset]
        cc_calc = checksum(msg[1:offset])
        if cc == cc_calc:
            filename = str(bytearray(msg[5:16])).strip()
            length = struct.unpack('>I',list2str(data[4:8]))[0]
            self.startDump(filename,length)
            self.dump(data[8:])
            self.sendSysEx( MSCEIMessage(fromName="F_ACK"),
                            timestamp=timestamp+2)
        else:
            self.sendSysEx( MSCEIMessage(fromName="F_NACK"),
                            timestamp=timestamp+2)
        return True
        
    def handleFileDumpDataBlock(self,msg,timestamp):
        self.sendSysEx( MSCEIMessage(fromName="F_WAIT"),timestamp=timestamp+1)
        noctets = msg[5]
        offset=6
        data = []
        for i in xrange(noctets):
            data += conv7_8(msg[offset:offset+8])
            offset += 8
        cc = msg[offset]
        cc_calc = checksum(msg[1:offset])
        if cc == cc_calc:
            self.dump(data)
            self.sendSysEx( MSCEIMessage(fromName="F_ACK"),
                            timestamp=timestamp+2)
        else:
            self.sendSysEx( MSCEIMessage(fromName="F_NACK"),
                            timestamp=timestamp+2)
        return True

    # DEVICE COMMAND  DEVICE_CMD
    def handleStatusAnswer(self,msg,timestamp):
        self.sendSysEx( MSCEIMessage(fromName="D_WAIT"),timestamp=timestamp+1)
        offset= 5 + 3*8
        cc = msg[offset]
        cc_calc = checksum(msg[1:offset])
        if cc == cc_calc:
            self.sendSysEx( MSCEIMessage(fromName="D_ACK"),
                            timestamp=timestamp+2)
            if self.dump_ram:
                self.dump_on = True
                self.startDump("ramdump_%s.bin" % mktimestamp(), 2097060)
                time.sleep(0.1)
                self.sendSysEx( MSCEIMessage(fromName="F_ACK"),
                                timestamp=timestamp+3)
                return True
        else:
            self.sendSysEx( MSCEIMessage(fromName="D_NACK"),
                            timestamp=timestamp+2)
        return False

    def handleDataDump(self,msg,timestamp):
        self.sendSysEx( MSCEIMessage(fromName="D_WAIT"))
        noctets = msg[5]
        offset=6
        data = []
        for i in xrange(noctets):
            data += conv7_8(msg[offset:offset+8])
            offset += 8
        cc = msg[offset]
        cc_calc = checksum(msg[1:offset])
        if cc == cc_calc:
            self.dump(data)
            self.sendSysEx( MSCEIMessage(fromName="D_ACK"),
                            timestamp=timestamp+2)
        else:
            self.sendSysEx( MSCEIMessage(fromName="D_NACK"),
                            timestamp=timestamp+2)
        return True

    def handleDirectoryAnswer(self,msg,timestamp):
        #time.sleep(0.1)
        self.sendSysEx( MSCEIMessage(fromName="D_WAIT"),timestamp=timestamp+1)
        offset = 8 + 11 + 1
        data = []
        for i in xrange(2):
            data += conv7_8(msg[offset:offset+8])
            offset += 8
        offset += 11
        cc = msg[offset]
        cc_calc = checksum(msg[1:offset])
        if cc == cc_calc:
            filename = str(bytearray(msg[8:19])).strip()
            length = struct.unpack('>I',list2str(data[4:8]))[0]
            self.startDump(filename,length)
            #time.sleep(0.1)
            self.sendSysEx( MSCEIMessage(fromName="D_ACK"),
                            timestamp=timestamp+2)
        else:
            self.sendSysEx( MSCEIMessage(fromName="D_NACK"),
                            timestamp=timestamp+2)
        return True
        
    def parse(self, msg, timestamp, acceptUnhandled=True):
        if msg[0] != 0xF0:
            print 'Non-sysex message'
            print [ hex(b) for b in msg ]
            print
            return acceptUnhandled
Beispiel #18
0
    def train(self, ):
        T = 0
        print("start training: {}_{}_{}_{}_{}_{}".format(
            "Q_onpolicy", self.alpha, self.alpha_end, self.epsilon,
            self.epsilon_end, self.maxepisode))
        Q_value = self.get_Q_value()
        self.data.append([T, Q_value])
        s = self.game.reset()
        p_A, p_B = self.gen_policy(s)
        a = self.choose_action(p_A, p_B)
        bar = Bar('Training',
                  max=self.maxepisode,
                  suffix='%(index)d/%(max)d - %(elapsed)ds/%(eta)ds')
        while T < self.maxepisode:

            #take action:
            s_prime, r_A, r_B, done, _ = self.game.step_encoded_action(a)
            #self.game.render()
            p_A, p_B = self.gen_policy(s_prime)
            a_prime = self.choose_action(p_A, p_B)
            a_A, a_B = self.game.decode_action(a_prime)
            p_A = np.zeros(self.nA, dtype=float)
            p_B = np.zeros(self.nA, dtype=float)
            p_A[a_A] = 1
            p_B[a_B] = 1
            self.learn(s, a, s_prime, r_A, r_B, done, p_A, p_B)
            self.alpha *= self.alpha_decay
            self.epsilon *= self.epsilon_decay
            Q_value_prime = self.get_Q_value()
            if s == self.game.encode_state(
                    col_A=2, col_B=1, row_A=0, row_B=0,
                    ball=1) and a == self.game.encode_action(a_A=2, a_B=0):
                self.data.append([T + 1, Q_value_prime])
            #print("step: {}, Q: {}".format(T, Q_value_prime))
            err_Q = np.abs(Q_value_prime - Q_value)
            Q_value = Q_value_prime
            #print("step: {}, Err_Q: {}".format(T, err_Q))
            s = s_prime
            a = a_prime
            T += 1
            if done:
                #print("yes")
                #self.game.render()
                s = self.game.reset()
                a = self.choose_action(p_A, p_B)
            else:
                s = s_prime
            bar.next()
        bar.finish()

        #np.save("Qtable_Q_offpolicy.npy", self.Q)
        final_policy = self.gen_policy(
            self.game.encode_state(col_A=2, col_B=1, row_A=0, row_B=0, ball=1))
        self.final_policy = np.array(final_policy)
        print(final_policy[0])
        print(final_policy[1])
        print(final_policy[0].sum())
        print(final_policy[1].sum())
        #print(final_policy.reshape(5,5).sum(axis=1))
        #print(final_policy.reshape(5,5).sum(axis=0))
        #print(final_policy.sum())

        pass
Beispiel #19
0
def find_metaclonotypes(
    project_path = "tutorial48",
    source_path = os.path.join(path_to_base,'tcrdist','data','covid19'),
    antigen_enriched_file = 'mira_epitope_48_610_YLQPRTFL_YLQPRTFLL_YYVGYLQPRTF.tcrdist3.csv',
    ncpus = 4, 
    seed = 3434):
    """
    This functions encapsulates a complete 
    workflow for finding meta-clonotypes in antigen-enriched data.
    """
    np.random.seed(seed)
    if not os.path.isdir(project_path):
        os.mkdir(project_path)
    ############################################################################
    # Step 1: Select and load a antigen-enriched (sub)repertoire.           ####
    ############################################################################
    print(f"INITIATING A TCRrep() with {antigen_enriched_file}")
    assert os.path.isfile(os.path.join(source_path, antigen_enriched_file))
        # Read file into a Pandas DataFrame <df>
    df = pd.read_csv(os.path.join(source_path, antigen_enriched_file))
        # Drop cells without any gene usage information
    df = df[( df['v_b_gene'].notna() ) & (df['j_b_gene'].notna()) ]
        # Initialize a TCRrep class, using ONLY columns that are complete and unique define a a clone.
        # Class provides a 'count' column if non is present
        # Counts of identical subject:VCDR3 'clones' will be aggregated into a TCRrep.clone_df.
    from tcrdist.repertoire import TCRrep
    tr = TCRrep(cell_df = df[['subject','cell_type','v_b_gene', 'j_b_gene', 'cdr3_b_aa']], 
                organism = "human", 
                chains = ['beta'], 
                compute_distances = True)
    tr.cpus = ncpus
    ############################################################################
    # Step 1.1: Estimate Probability of Generation                          ####
    ############################################################################
    ### It will be useful later to know the pgen of each
    from tcrdist.automate import auto_pgen
    print(f"COMPUTING PGEN WITH OLGA (Sethna et al 2018)")
    print("FOR ANTIGEN-ENRICHED CLONES TO BE USED FOR SUBSEQUENT ANALYSES")
    auto_pgen(tr)

    # Tip: Users of tcrdist3 should be aware that by default a <TCRrep.clone_df> 
    # DataFrame is created out of non-redundant cells in the cell_df, and 
    # pairwise distance matrices automatically computed.
    # Notice that attributes <tr.clone_df>  and  <tr.pw_beta> , <tr.pw_cdr3_b_aa>, 
    # are immediately accessible.
    # Attributes <tr.pw_pmhc_b_aa>, <tr.pw_cdr2_b_aa>, and <tr.pw_cdr1_b_aa>  
    # are also available if <TCRrep.store_all_cdr> is set to True.
    # For large datasets, i.e., >15,000 clones, this approach may consume too much 
    # memory so <TCRrep.compute_distances> is automatically set to False. 
                                    
    ############################################################################
    # Step 2: Synthesize an Inverse Probability Weighted VJ Matched Background #
    ############################################################################
    # Generating an appropriate set of unenriched reference TCRs is important; for
    # each set of antigen-associated TCRs, discovered by MIRA, we created a two part
    # background. One part consists of 100,000 synthetic TCRs whose V-gene and J-gene
    # frequencies match those in the antigen-enriched repertoire, using the software
    # OLGA (Sethna et al. 2019; Marcou et al. 2018). The other part consists of
    # 100,000 umbilical cord blood TCRs sampled uniformly from 8 subjects (Britanova
    # et al., 2017). This mix balances dense sampling of sequences near the
    # biochemical neighborhoods of interest with broad sampling of TCRs from an
    # antigen-naive repertoire. Importantly, we adjust for the biased sampling by
    # using the V- and J-gene frequencies observed in the cord-blood data (see
    # Methods for details about inverse probability weighting adjustment). Using this
    # approach we are able to estimate the abundance of TCRs similar to a centroid
    # TCR in an unenriched background repertoire of ~1,000,000 TCRs, using a
    # comparatively modest background dataset of 200,000 TCRs. While this estimate
    # may underestimate the true specificity, since some of the neighborhood TCRs in
    # the unenriched background repertoire may in fact recognize the antigen of
    # interest, it is useful for prioritizing neighborhoods and selecting a radius
    # for each neighborhood that balances sensitivity and specificity.
    # Initialize a TCRsampler -- human, beta, umbilical cord blood from 8 people.
    print(f"USING tcrsampler TO CONSTRUCT A CUSTOM V-J MATCHED BACKGROUND")
    from tcrsampler.sampler import TCRsampler
    ts = TCRsampler(default_background = 'britanova_human_beta_t_cb.tsv.sampler.tsv')
    # Stratify sample so that each subject contributes similarly to estimate of 
    # gene usage frequency
    from tcrdist.background import get_stratified_gene_usage_frequency
    ts = get_stratified_gene_usage_frequency(ts = ts, replace = True) 
    # Synthesize an inverse probability weighted V,J gene background that matches 
    # usage in your enriched repertoire 
    df_vj_background = tr.synthesize_vj_matched_background(ts = ts, chain = 'beta')
    # Get a randomly drawn stratified sampler of beta, cord blood from 
    # Britanova et al. 2016 
    # Dynamics of Individual T Cell Repertoires: From Cord Blood to Centenarians
    from tcrdist.background import  sample_britanova
    df_britanova_100K = sample_britanova(size = 100000)
    # Append frequency columns using, using sampler above
    df_britanova_100K = get_gene_frequencies(ts = ts, df = df_britanova_100K)
    df_britanova_100K['weights'] = 1
    df_britanova_100K['source'] = "stratified_random"
    # Combine the two parts of the background into a single DataFrame
    df_bkgd = pd.concat([df_vj_background.copy(), df_britanova_100K.copy()], axis = 0).\
        reset_index(drop = True)                                              
    # Assert that the backgrounds have the expected number of rows.
    assert df_bkgd.shape[0] == 200000
    # Save the background for future use
    background_outfile = os.path.join(project_path, f"{antigen_enriched_file}.olga100K_brit100K_bkgd.csv")
    print(f'WRITING {background_outfile}')
    df_bkgd.to_csv(background_outfile, index = False)
    # Load the background to a TCRrep without computing pairwise distances 
    # (i.e., compute_distances = False)
    tr_bkgd = TCRrep(
        cell_df = df_bkgd,
        organism = "human", 
        chains = ['beta'], 
        compute_distances = False)
    # Compute rectangular distances. Those are, distances between each clone in 
    # the antigen-enriched repertoire and each TCR in the background.
    # With a single 1 CPU and < 10GB RAM, 5E2x2E5 = 100 million pairwise distances, 
    # across CDR1, CDR2, CDR2.5, and CDR3 
    # 1min 34s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each) 
    # %timeit -r 1 tr.compute_rect_distances(df = tr.clone_df, df2 = tr_bkdg.clone_df, store = False)
    ############################################################################
    # Step 4: Calculate Distances                                          #####
    ############################################################################
    print(f"COMPUTING RECTANGULARE DISTANCE")
    tr.compute_sparse_rect_distances(
        df = tr.clone_df, 
        df2 = tr_bkgd.clone_df,
        radius=50,
        chunk_size = 100)
    scipy.sparse.save_npz(os.path.join(project_path, f"{antigen_enriched_file}.rw_beta.npz"), tr.rw_beta)
        # Tip: For larger dataset you can use a sparse implementation: 
        # 30.8 s ± 0 ns per loop ; tr.cpus = 6
        # %timeit -r tr.compute_sparse_rect_distances(df = tr.clone_df, df2 = tr_bkdg.clone_df,radius=50, chunk_size=85)
    ############################################################################
    # Step 5: Examine Density ECDFS                                        #####
    ############################################################################
        # Investigate the density of neighbors to each TCR, based on expanding 
        # distance radius.
    from tcrdist.ecdf import distance_ecdf, _plot_manuscript_ecdfs
    import matplotlib.pyplot as plt
        # Compute empirical cumulative density function (ecdf)
        # Compare Antigen Enriched TCRs (against itself).
    thresholds, antigen_enriched_ecdf = distance_ecdf(
        tr.pw_beta,
        thresholds=range(0,50,2))
        # Compute empirical cumulative density function (ecdf)
        # Compare Antigen Enriched TCRs (against) 200K probability 
        # inverse weighted background
    thresholds, background_ecdf = distance_ecdf(
        tr.rw_beta,
        thresholds=range(0,50,2),
        weights= tr_bkgd.clone_df['weights'], 
        absolute_weight = True)
        # plot_ecdf similar to tcrdist3 manuscript #
    antigen_enriched_ecdf[antigen_enriched_ecdf == antigen_enriched_ecdf.min()] = 1E-10
    f1 = _plot_manuscript_ecdfs(
        thresholds, 
        antigen_enriched_ecdf, 
        ylab= 'Proportion of Antigen Enriched TCRs', 
        cdr3_len=tr.clone_df.cdr3_b_aa.str.len(), 
        min_freq=1E-10)
    f1.savefig(os.path.join(project_path, f'{antigen_enriched_file}.ecdf_AER_plot.png'))
    f2 = _plot_manuscript_ecdfs(
        thresholds,
        background_ecdf,
        ylab= 'Proportion of Reference TCRs',
        cdr3_len=tr.clone_df.cdr3_b_aa.str.len(),
        min_freq=1E-10)
    f2.savefig(os.path.join(project_path, f'{antigen_enriched_file}.ecdf_BUR_plot.png'))
    ############################################################################
    # Step 6: Find optimal radii  (theta = 1E5                             #####
    ############################################################################
    # To ascertain which meta-clonotypes are likely to be most specific, 
    # take advantage of an existing function <bkgd_cntrl_nn2>.                                                                                                                                  
    #  d888   .d8888b.  8888888888     888888888  
    # d8888  d88P  Y88b 888            888        
    #   888  888    888 888            888        
    #   888  888    888 8888888        8888888b.  
    #   888  888    888 888                 "Y88b 
    #   888  888    888 888      888888       888 
    #   888  Y88b  d88P 888            Y88b  d88P 
    # 8888888 "Y8888P"  8888888888      "Y8888P"                                         
   
    level_tag = '1E5'
    from tcrdist.neighbors import bkgd_cntl_nn2
    centers_df  = bkgd_cntl_nn2(
        tr               = tr,
        tr_background    = tr_bkgd,
        weights          = tr_bkgd.clone_df.weights,
        ctrl_bkgd        = 10**-5, 
        col              = 'cdr3_b_aa',
        add_cols         = ['v_b_gene', 'j_b_gene'],
        ncpus            = 4,
        include_seq_info = True,
        thresholds       = [x for x in range(0,50,2)],
        generate_regex   = True,
        test_regex       = True,
        forced_max_radius = 36)

    ############################################################################
    # Step 6.2: (theta = 1E5) ALL meta-clonotypes .tsv file                   ##
    ############################################################################
    # save center to project_path for future use
    centers_df.to_csv( os.path.join(project_path, f'{antigen_enriched_file}.centers_bkgd_ctlr_{level_tag}.tsv'), sep = "\t" )
    
    # Many of meta-clonotypes contain redundant information. 
    # We can winnow down to less-redundant list. We do this 
    # by ranking clonotypes from most to least specific. 
        # <min_nsubject> is minimum publicity of the meta-clonotype,  
        # <min_nr> is minimum non-redundancy
    # Add neighbors, K_neighbors, and nsubject columns
    from tcrdist.public import _neighbors_variable_radius, _neighbors_sparse_variable_radius
    centers_df['neighbors'] = _neighbors_variable_radius(pwmat=tr.pw_beta, radius_list = centers_df['radius'])
    centers_df['K_neighbors'] = centers_df['neighbors'].apply(lambda x : len(x))
    # We determine how many <nsubjects> are in the set of neighbors 
    centers_df['nsubject']  = centers_df['neighbors'].\
            apply(lambda x: tr.clone_df['subject'].iloc[x].nunique())
    centers_df.to_csv( os.path.join(project_path, f'{antigen_enriched_file}.centers_bkgd_ctlr_{level_tag}.tsv'), sep = "\t" )

    from tcrdist.centers import rank_centers
    ranked_centers_df = rank_centers(
        centers_df = centers_df, 
        rank_column = 'chi2joint', 
        min_nsubject = 2, 
        min_nr = 1)
    ############################################################################
    # Step 6.3:  (theta = 1E5) NR meta-clonotypes .tsv file                  ###
    ############################################################################
    # Output, ready to search bulk data.
    ranked_centers_df.to_csv( os.path.join(project_path, f'{antigen_enriched_file}.ranked_centers_bkgd_ctlr_{level_tag}.tsv'), sep = "\t" )
    ############################################################################
    # Step 6.4: (theta = 1E5) Output Meta-Clonotypes HTML Summary            ###
    ############################################################################
    # Here we can make a svg logo for each NR meta-clonotype
    if ranked_centers_df.shape[0] > 0:
        from progress.bar import IncrementalBar
        from tcrdist.public import make_motif_logo
        cdr3_name = 'cdr3_b_aa'
        v_gene_name = 'v_b_gene'
        svgs = list()
        svgs_raw = list()
        bar = IncrementalBar('Processing', max = ranked_centers_df.shape[0])
        for i,r in ranked_centers_df.iterrows():
            bar.next()
            centroid = r[cdr3_name]
            v_gene   = r[v_gene_name]
            svg, svg_raw = make_motif_logo( tcrsampler = ts, 
                                            pwmat = tr.pw_beta,
                                            clone_df = tr.clone_df,
                                            centroid = centroid ,
                                            v_gene = v_gene ,
                                            radius = r['radius'],
                                            pwmat_str = 'pw_beta',
                                            cdr3_name = 'cdr3_b_aa',
                                            v_name = 'v_b_gene',
                                            gene_names = ['v_b_gene','j_b_gene'])
            svgs.append(svg)
            svgs_raw.append(svg_raw)
        bar.next();bar.finish()
        ranked_centers_df['svg']      = svgs
        ranked_centers_df['svg_raw'] = svgs_raw

        def shrink(s):
            return s.replace('height="100%"', 'height="20%"').replace('width="100%"', 'width="20%"')
        labels =['cdr3_b_aa','v_b_gene', 'j_b_gene', 'pgen',
                'radius', 'regex','nsubject','K_neighbors', 
                'bkgd_hits_weighted','chi2dist','chi2re','chi2joint']
        
        output_html_name = os.path.join(project_path, f'{antigen_enriched_file}.ranked_centers_bkgd_ctlr_{level_tag}.html')
        # 888    888 88888888888 888b     d888 888      
        # 888    888     888     8888b   d8888 888      
        # 888    888     888     88888b.d88888 888      
        # 8888888888     888     888Y88888P888 888      
        # 888    888     888     888 Y888P 888 888      
        # 888    888     888     888  Y8P  888 888      
        # 888    888     888     888   "   888 888      
        # 888    888     888     888       888 88888888
        with open(output_html_name, 'w') as output_handle:
            for i,r in ranked_centers_df.iterrows():
                #import pdb; pdb.set_trace()
                svg, svg_raw = r['svg'],r['svg_raw']
                output_handle.write("<br></br>")
                output_handle.write(shrink(svg))
                output_handle.write(shrink(svg_raw))
                output_handle.write("<br></br>")
                output_handle.write(pd.DataFrame(r[labels]).transpose().to_html())
                output_handle.write("<br></br>")
    # To ascertain which meta-clonotypes are likely to be most specific, 
    # take advantage of an existing function <bkgd_cntrl_nn2>.       
    #  d888   .d8888b.  8888888888       .d8888b.  
    # d8888  d88P  Y88b 888             d88P  Y88b 
    #   888  888    888 888             888        
    #   888  888    888 8888888         888d888b.  
    #   888  888    888 888             888P "Y88b 
    #   888  888    888 888      888888 888    888 
    #   888  Y88b  d88P 888             Y88b  d88P 
    # 8888888 "Y8888P"  8888888888       "Y8888P" 
    ############################################################################
    # Step 6.5: Find optimal radii  (theta = 1E6)                            ###
    ############################################################################
    level_tag = '1E6'
    from tcrdist.neighbors import bkgd_cntl_nn2
    centers_df  = bkgd_cntl_nn2(
        tr               = tr,
        tr_background    = tr_bkgd,
        weights          = tr_bkgd.clone_df.weights,
        ctrl_bkgd        = 10**-6, 
        col              = 'cdr3_b_aa',
        add_cols         = ['v_b_gene', 'j_b_gene'],
        ncpus            = 4,
        include_seq_info = True,
        thresholds       = [x for x in range(0,50,2)],
        generate_regex   = True,
        test_regex       = True,
        forced_max_radius = 36)
    ############################################################################
    # Step 6.6: (theta = 1E6) ALL meta-clonotypes .tsv file                   ##
    ############################################################################
    # save center to project_path for future use
    centers_df.to_csv( os.path.join(project_path, f'{antigen_enriched_file}.centers_bkgd_ctlr_{level_tag}.tsv'), sep = "\t" )
    
    # Many of meta-clonotypes contain redundant information. 
    # We can winnow down to less-redundant list. We do this 
    # by ranking clonotypes from most to least specific. 
        # <min_nsubject> is minimum publicity of the meta-clonotype,  
        # <min_nr> is minimum non-redundancy
    # Add neighbors, K_neighbors, and nsubject columns
    from tcrdist.public import _neighbors_variable_radius, _neighbors_sparse_variable_radius
    centers_df['neighbors'] = _neighbors_variable_radius(pwmat=tr.pw_beta, radius_list = centers_df['radius'])
    centers_df['K_neighbors'] = centers_df['neighbors'].apply(lambda x : len(x))
    # We determine how many <nsubjects> are in the set of neighbors 
    centers_df['nsubject']  = centers_df['neighbors'].\
            apply(lambda x: tr.clone_df['subject'].iloc[x].nunique())
    centers_df.to_csv( os.path.join(project_path, f'{antigen_enriched_file}.centers_bkgd_ctlr_{level_tag}.tsv'), sep = "\t" )

    from tcrdist.centers import rank_centers
    ranked_centers_df = rank_centers(
        centers_df = centers_df, 
        rank_column = 'chi2joint', 
        min_nsubject = 2, 
        min_nr = 1)
    ############################################################################
    # Step 6.7:  (theta = 1E6) NR meta-clonotypes .tsv file                  ###
    ############################################################################
    # Output, ready to search bulk data.
    ranked_centers_df.to_csv( os.path.join(project_path, f'{antigen_enriched_file}.ranked_centers_bkgd_ctlr_{level_tag}.tsv'), sep = "\t" )

    ############################################################################
    # Step 6.8: (theta = 1E6) Output Meta-Clonotypes HTML Summary            ###
    ############################################################################
    # Here we can make a svg logo for each meta-clonotype
    from progress.bar import IncrementalBar
    from tcrdist.public import make_motif_logo
    if ranked_centers_df.shape[0] > 0:
        cdr3_name = 'cdr3_b_aa'
        v_gene_name = 'v_b_gene'
        svgs = list()
        svgs_raw = list()
        bar = IncrementalBar('Processing', max = ranked_centers_df.shape[0])
        for i,r in ranked_centers_df.iterrows():
            bar.next()
            centroid = r[cdr3_name]
            v_gene   = r[v_gene_name]
            svg, svg_raw = make_motif_logo( tcrsampler = ts, 
                                            pwmat = tr.pw_beta,
                                            clone_df = tr.clone_df,
                                            centroid = centroid ,
                                            v_gene = v_gene ,
                                            radius = r['radius'],
                                            pwmat_str = 'pw_beta',
                                            cdr3_name = 'cdr3_b_aa',
                                            v_name = 'v_b_gene',
                                            gene_names = ['v_b_gene','j_b_gene'])
            svgs.append(svg)
            svgs_raw.append(svg_raw)
        bar.next();bar.finish()
        ranked_centers_df['svg']      = svgs
        ranked_centers_df['svg_raw'] = svgs_raw

        def shrink(s):
            return s.replace('height="100%"', 'height="20%"').replace('width="100%"', 'width="20%"')
        labels =['cdr3_b_aa', 'v_b_gene', 'j_b_gene', 'pgen', 'radius', 'regex','nsubject','K_neighbors', 'bkgd_hits_weighted','chi2dist','chi2re','chi2joint']
        
        output_html_name = os.path.join(project_path, f'{antigen_enriched_file}.ranked_centers_bkgd_ctlr_{level_tag}.html')
        # 888    888 88888888888 888b     d888 888      
        # 888    888     888     8888b   d8888 888      
        # 888    888     888     88888b.d88888 888      
        # 8888888888     888     888Y88888P888 888      
        # 888    888     888     888 Y888P 888 888      
        # 888    888     888     888  Y8P  888 888      
        # 888    888     888     888   "   888 888      
        # 888    888     888     888       888 88888888     
        with open(output_html_name, 'w') as output_handle:
            for i,r in ranked_centers_df.iterrows():
                #import pdb; pdb.set_trace()
                svg, svg_raw = r['svg'],r['svg_raw']
                output_handle.write("<br></br>")
                output_handle.write(shrink(svg))
                output_handle.write(shrink(svg_raw))
                output_handle.write("<br></br>")
                output_handle.write(pd.DataFrame(r[labels]).transpose().to_html())
                output_handle.write("<br></br>")
def analogy_exp(embs, hdataset, hparams):
    """
    Runs the two big analogy datasets on the set of embeddings passed
    to it. Calculates 3cosadd and 3cosmul.
    :param embs: Embeddings class, a hilbert embeddings object.
    :param hdataset: HilbertDataset object
    :param hparams: unused - kept for interface functionality
    :return: ResultsHolder object
    """
    results = ResultsHolder(ANALOGY)

    # normalize for faster sim calcs.
    embs.matrix = F.normalize(embs.matrix, p=2, dim=1)

    # for showing over time
    total_iter = sum(len(samples) for samples in hdataset.values())
    iter_step = 100
    bar = IncrementalBar('Running analogy experiments', max=total_iter)

    # iterate over the two analogy datasets
    for dname, samples in hdataset.items():
        correct_cosadd = 0
        correct_cosmul = 0
        missing_words = 0
        missing_answer = 0
        total_all_embeddings = 0

        # w1 is to w2 as w3 is to w4
        for i, (w1, w2, w3, w4) in enumerate(samples):
            if i % iter_step == 0: bar.next(n=iter_step)

            if not embs.has_w(w4):
                missing_answer += 1
                continue

            e1 = embs.get_vec(w1).reshape(-1, 1)
            e2 = embs.get_vec(w2).reshape(-1, 1)
            e3 = embs.get_vec(w3).reshape(-1, 1)

            # get cos sims for each of them with the dataset
            sim_all = embs.matrix.mm(torch.cat([e1, e2, e3], dim=1))

            # calculuate 3cosadd
            cos_add = sim_all[:, 1] + sim_all[:, 2] - sim_all[:, 0]

            # 3cosmul requires all similarities to be nonnegative, conveniently told to us in a footnote.
            # see footnote 7 in http://anthology.aclweb.org/W/W14/W14-1618.pdf
            sim_all = (sim_all + 1) / 2
            cos_mul = (sim_all[:, 1] * sim_all[:, 2]) / (
                sim_all[:, 0] + 0.001)  # add epsilon to avoid divide by 0

            # make sure we don't get the vecs themselves
            have_all_embs = True
            for wi in (w1, w2, w3):
                try:
                    w_id = embs.get_id(wi)
                    cos_add[w_id] = -np.inf
                    cos_mul[w_id] = -np.inf
                except KeyError:
                    missing_words += 1
                    have_all_embs = False

            # get the best with argmax
            best_w_add = embs.get_token(cos_add.argmax())
            best_w_mul = embs.get_token(cos_mul.argmax())

            # count up for final accuracy
            correct_cosadd += 1 if w4 == best_w_add else 0
            correct_cosmul += 1 if w4 == best_w_mul else 0
            total_all_embeddings += 1 if have_all_embs else 0

        # save the accuracies
        results.add_ds_results(
            dname, {
                '3cosadd':
                correct_cosadd / len(samples),
                '3cosmul':
                correct_cosmul / len(samples),
                '3cosadd_had_answer':
                correct_cosadd / (len(samples) - missing_answer),
                '3cosmul_had_answer':
                correct_cosmul / (len(samples) - missing_answer),
                '3cosadd_full_coverage':
                correct_cosadd / total_all_embeddings,
                '3cosmul_full_coverage':
                correct_cosmul / total_all_embeddings,
                'missing_words':
                missing_words / (3 * len(samples)),
                'missing_answer':
                missing_answer / len(samples),
                'coverage':
                total_all_embeddings / len(samples),
            })

    bar.finish()
    return results
Beispiel #21
0
def plot_all_results(Usol, x, t):
    """
    Plot pressure, density, velocity and lambda
    :param U: Solution tensor
    :return:
    """
    Res = np.zeros_like(Usol)
    # Res[0] = P
    # Res[1] = Rho  # Density
    # Res[2] = velocity  #
    # Res[3] = Phi
    # Res[4] = lambda
    prog_bar = IncrementalBar(
        'Finished simulation. '
        'Computing plot variables...', max=len(t))

    for tind, time in enumerate(t):

        U = Usol[tind, :, :]

        # Compute physical values from U state vectors
        F, pp = f(U)
        S, C = s(U, F, pp)
        (u, E, PHI, LAMBD, V, P) = pp

        Res[tind, 0, :] = P
        Res[tind, 1, :] = U[0]
        Res[tind, 2, :] = u
        Res[tind, 3, :] = PHI
        Res[tind, 4, :] = LAMBD
        prog_bar.next()

    Pressure_plot = np.minimum(Res[:, 0, :] * 1e3,
                               np.ones_like(Res[:, 0, :]) * 50)
    plot_u_t(
        x,
        t,
        Pressure_plot,
        #title=r'Pressure $P$ (GPa) $\times 10^{-3}$', fign=0)
        title=r'Pressure $P$ (GPa)',
        fign=0)

    Density_plot = np.minimum(Res[:, 1, :] * 1e3,
                              np.ones_like(Res[:, 0, :]) * 5)
    plot_u_t(
        x,
        t,
        Density_plot,
        #title=r'Density $\rho$ (g/mm$^3$)', fign=1)
        title=r'Density $\rho$ (g/cm$^3$)',
        fign=1)

    Velocity_plot = np.minimum(Res[:, 2, :], np.ones_like(Res[:, 0, :]) * 5)
    Velocity_plot = np.maximum(Velocity_plot, np.ones_like(Res[:, 0, :]) * -5)
    plot_u_t(x,
             t,
             Velocity_plot,
             title=r'Velocity $u$ (mm . $\mu$ s$^{-1}$)',
             fign=2)

    plot_u_t(x, t, Res[:, 3, :], title=r'$\phi$ ', fign=3)

    plot_u_t(x, t, Res[:, 4, :], title=r'$\lambda$ ', fign=4)

    return F
Beispiel #22
0
def _quasi_public_meta_clonotypes(
    clone_df,
    pwmat,
    tcrsampler,
    cdr3_name='cdr3_d_aa',
    v_gene_name='v_d_gene',
    nr_filter=True,
    output_html_name="quasi_public_clones.html",
    sort_columns=['nsubject', 'K_neighbors'],
    sort_ascending=False,
    labels=[
        'clone_id',
        'cdr3_d_aa',
        'v_d_gene',
        'j_d_gene',
        'radius',
        'neighbors',
        'K_neighbors',
        #'cdr3s',
        'nsubject',
        'qpublic',
        'cdr3_d_aa.summary',
        'v_d_gene.summary',
        'j_d_gene.summary',
        'subject.summary'
    ],
    fixed_radius=False,
    radius=None,
    query_str='qpublic == True & K_neighbors > 1',
    kargs_member_summ={
        'key_col': 'neighbors',
        'count_col': 'count',
        'addl_cols': ['subject'],
        'addl_n': 4
    },
    kargs_motif={
        'pwmat_str': 'pw_delta',
        'cdr3_name': 'cdr3_d_aa',
        'v_name': 'v_d_gene',
        'gene_names': ['v_d_gene', 'j_d_gene']
    }):
    """
	_quasi_public_meta_clonotypes


	Parameters
	----------
	clone_df : pd.DataFrame
		Clones information with standard tcrdist3 column names.
	pwmat : np.array
		Pairwise distances
	tcrsamper : tcrsampler.TCRsampler
		TCRSampler instance initialized with appropriate background
		set.
	cdr3_name : str
		Column name for amino acid CDR3 e.g., 'cdr3_d_aa'.
	v_gene_name : str
		Column name for TR[ABGD]V gene e.g., 'v_d_gene'.
	nr_filter : bool
		If True, sequqences with the exact same neighbors as another set will be
		dropped
	output_html_name : str
		Filename for the output html output.
	labels : list
		List of columns to display on html output beneath each logo plot. 
	fixed_radius : False
		If False, clone_df must have a column radius. 
		If True, argument radius will be used to define 
		maximum distance from centroid to neighboring TCR.
	radius : int or None
		Theshold distance (<=) for neighborhood membership.
		If int, then all centroids will be assigned the same
		radius. Alterntively radius can be provided for each
		centroid sequence by including radius as a numeric column
		in clone_df.
	query_str : str
		The string to include sequences in output. For instance
		'qpublic == True and K_neighbors > 3', implies that only 
		grouping of 4 or more TCRs from at leåast two individuals
		will be retained. Alternatively, 'nsubject > 1' or 
		'qpublic == True' could be used as true minimum requirements 
		for quasi-publicity.
	kargs_member_summ : dict
		kwargs 
	kargs_motif : dict
		kwargs for the motif genertation 

	Returns
	-------
	Returns DataFrames in a Dictionary. 
		nn_summary : pd.DataFrame 
			DataFrame matchign clone_df with summary measures added
		quasi_public_df pd.DataFrame
			Dataframe with only those rows that match the <query_str> and nr_filter.
	{'nn_summary': nn_summary : pd.DataFrame, 
	 'quasi_public_df': quasi_public_df : nn_summary : pd.DataFrame}

	Notes
	-----
	Importantly a html file is written displaying the quasi-public meta-clonotypes
	
	The easiest way to integrate this with existing nieghbor_diff 
	add 'neighbors' and 'K_neighbors' to the clone df. Other columns 
	could be added as well, and then displayed if added to the 
	lis of labels.

	nn_clone_df = pd.concat([tr.clone_df, ndif[['neighbors', 'K_neighbors','val_0','ct_0']] ], axis = 1)

	Examples
	--------
	"""
    if 'neighbors' not in clone_df.columns:
        if fixed_radius:
            clone_df['radius'] = radius
            clone_df['neighbors'] = _neighbors_fixed_radius(pwmat=pwmat,
                                                            radius=radius)
        else:
            assert 'radius' in clone_df.columns, "if not using fixed_radius, the clone_df must have a numeric 'radius' columns"
            clone_df['neighbors'] = _neighbors_variable_radius(
                pwmat=pwmat, radius_list=clone_df.radius)

    if 'K_neighbors' not in clone_df.columns:
        if fixed_radius:
            clone_df['K_neighbors'] = _K_neighbors_fixed_radius(pwmat=pwmat,
                                                                radius=radius)
        else:
            clone_df['K_neighbors'] = _K_neighbors_variable_radius(
                pwmat=pwmat, radius_list=clone_df.radius)

    if 'nsubject' not in clone_df.columns:
        clone_df['nsubject']   = clone_df['neighbors'].\
         apply(lambda x: clone_df['subject'].iloc[x].nunique())

    if 'qpublic' not in clone_df.columns:
        clone_df['qpublic']     = clone_df['nsubject'].\
         apply(lambda x: x > 1)

    nn_summary = member_summ(res_df=clone_df,
                             clone_df=clone_df,
                             **kargs_member_summ)
    nn_summary = nn_summary.rename(
        columns={k: f'{k}.summary'
                 for k in nn_summary.columns})

    clone_df['cdr3s'] = clone_df['neighbors'].apply(
        lambda x: clone_df[cdr3_name].iloc[x].to_list())

    clone_df = pd.concat([clone_df, nn_summary], axis=1)

    quasi_public_df = clone_df.query(query_str).\
     sort_values(sort_columns, ascending = sort_ascending).\
     reset_index(drop = True).\
     copy()

    if quasi_public_df.shape[0] == 0:
        raise ValueError(
            "UNFORTUNATELY NO QUASI PUBLIC CLOONES WERE FOUND, CONSIDER YOUR QUERY STRINGENCY"
        )

    quasi_public_df['unique_set'] = test_for_subsets(
        quasi_public_df['neighbors'])

    if nr_filter:
        quasi_public_df = filter_is(quasi_public_df, 'unique_set',
                                    1).reset_index(drop=True)

    print(
        f"GENERATING {quasi_public_df.shape[0]} QUASI-PUBLIC MOTIFS SATISFYING {query_str}"
    )
    bar = IncrementalBar('Processing', max=quasi_public_df.shape[0])
    svgs = list()
    svgs_raw = list()
    for i, r in quasi_public_df.iterrows():
        bar.next()
        centroid = r[cdr3_name]
        v_gene = r[v_gene_name]
        svg, svg_raw = make_motif_logo(tcrsampler=tcrsampler,
                                       pwmat=pwmat,
                                       clone_df=clone_df,
                                       centroid=centroid,
                                       v_gene=v_gene,
                                       radius=r['radius'],
                                       **kargs_motif)
        svgs.append(svg)
        svgs_raw.append(svg_raw)
    bar.next()
    bar.finish()

    quasi_public_df['svg'] = svgs
    quasi_public_df['svg_raw'] = svgs_raw

    def shrink(s):
        s = s.replace('height="100%"', 'height="20%"')
        s = s.replace('width="100%"', 'width="20%"')
        return s

    print(labels)

    with open(output_html_name, 'w') as output_handle:
        for i, r in quasi_public_df.iterrows():
            #import pdb; pdb.set_trace()
            svg, svg_raw = r['svg'], r['svg_raw']
            output_handle.write("<br></br>")
            output_handle.write(shrink(svg))
            output_handle.write(shrink(svg_raw))
            output_handle.write("<br></br>")
            output_handle.write(pd.DataFrame(r[labels]).transpose().to_html())
            output_handle.write("<br></br>")

    return {
        'nn_summary': nn_summary,
        'quasi_public_df': quasi_public_df,
        'clone_df': clone_df
    }
print('Phone #: ' + phone)  # Prints 'phone' variable.
print('Hours of Operation: ')  # Prints heading for hours.
# Prints variable hours.
print(hours[:9] + ': ' + hours[10:28])
print(hours[29:32] + ':       ' + hours[33:51])
print(hours[52:55] + ':       ' + hours[56:74])
print(hours[75:84] + ': ' + hours[85:103])
print('Review Overview: ')  # Prints heading for Reviews.
# Calls 'circles' function and prints what it returns and the variable 'rating'.
print(circles(rating) + ': ' + str(rating) + ' Rating')

excellentBar = IncrementalBar('Excellent: ',
                              max=total)  # Creates an IncrementalBar item.
for i in range(
        excellent):  # Iterates n times, n = number of excellent reviews.
    excellentBar.next()  # Updates bar length.
print(' : ' + str(int(round(excellent / total, 2) * 100)) +
      '%')  # Prints number of reviews and percentage.

verygoodBar = IncrementalBar('Very Good: ',
                             max=total)  # Creates an IncrementalBar item.
for i in range(verygood):  # Iterates n times, n = number of very good reviews.
    verygoodBar.next()  # Updates bar length.
print(' : ' + str(int(round(verygood / total, 2) * 100)) +
      '%')  # Prints number of reviews and percentage.

averageBar = IncrementalBar('Average:   ',
                            max=total)  # Creates an IncrementalBar item.
for i in range(average):  # Iterates n times, n = number of average reviews.
    averageBar.next()  # Updates bar length.
print(' : ' + str(int(round(average / total, 2) * 100)) +
Beispiel #24
0
def main(args):

    if not os.path.exists('results'):
        os.makedirs('results')

    if not os.path.exists('counters'):
        os.makedirs('counters')

    exp_type = utils.create_file_prefix(args.positive_fraction,
                                        args.with_delta, args.fraction,
                                        args.sampler_size, args.pop)

    send_strategy = SendStrategy.SendDelta(
    ) if args.with_delta else SendStrategy.SendVector()

    for dataset in args.datasets:
        print("Working on", dataset, "dataset")

        if not os.path.exists('results/{}'.format(dataset)):
            os.makedirs('results/{}'.format(dataset))

        if not os.path.exists('counters/{}'.format(dataset)):
            os.makedirs('counters/{}'.format(dataset))

        if args.create_dataset_files:
            # Read the dataset and prepare it for training, validation and test
            names = ['user_id', 'item_id', 'rating', 'utc']
            df = pd.read_csv('original_datasets/' + dataset + '.tsv',
                             sep='\t',
                             dtype={
                                 'rating': 'float64',
                                 'utc': 'int64'
                             },
                             header=0,
                             names=names)
            df = df.groupby('user_id').filter(lambda x: len(x) >= 20)
            print(df.shape[0], 'interactions read')
            df, _ = utils.convert_unique_idx(df, 'user_id')
            df, _ = utils.convert_unique_idx(df, 'item_id')
            user_size = len(df['user_id'].unique())
            item_size = len(df['item_id'].unique())
            print('Found {} users and {} items'.format(user_size, item_size))
            total_user_lists = utils.create_user_lists(df, user_size, 4)
            train_user_lists, validation_user_lists, test_user_lists = utils.split_train_test(
                total_user_lists,
                test_size=0.2,
                validation_size=args.validation_size)
            #train_interactions_size = sum([len(user_list) for user_list in train_user_lists])
            #print('{} interactions considered for training'.format(train_interactions_size))

            if not os.path.exists('sets'):
                os.makedirs('sets')
            with open('sets/{}_trainingset.tsv'.format(dataset), 'w') as out:
                for u, train_list in enumerate(train_user_lists):
                    for i in train_list:
                        out.write(
                            str(u) + '\t' + str(i) + '\t' + str(1) + '\n')
            with open('sets/{}_testset.tsv'.format(dataset), 'w') as out:
                for u, test_list in enumerate(test_user_lists):
                    for i in test_list:
                        out.write(
                            str(u) + '\t' + str(i) + '\t' + str(1) + '\n')
            continue

        df = pd.read_csv('sets/{}_trainingset.tsv'.format(dataset),
                         sep='\t',
                         names=['user_id', 'item_id', 'rating'])
        df, reverse_dict = utils.convert_unique_idx(df, 'item_id')
        user_size = len(df['user_id'].unique())
        item_size = len(df['item_id'].unique())
        print('Found {} users and {} items'.format(user_size, item_size))
        train_user_lists = utils.create_user_lists(df, user_size, 3)
        train_interactions_size = sum(
            [len(user_list) for user_list in train_user_lists])
        print('{} interactions considered for training'.format(
            train_interactions_size))

        if args.pop:
            print("Analyzing popularity... \r")
            most_popular_items = (args.pop,
                                  utils.get_popularity(train_user_lists))
            print("Done.")
        else:
            most_popular_items = None

        if args.pop == 3:
            splitting_epochs = [
                int(7 * args.n_epochs / 8),
                int(3 * args.n_epochs / 4),
                int(args.n_epochs / 2)
            ]

        # Set parameters based on arguments
        if args.fraction == 0:
            round_modifier = int(train_interactions_size)
        else:
            round_modifier = int(train_interactions_size /
                                 (args.fraction * user_size))

        sampler_dict = {
            'single': 1,
            'uniform': int(train_interactions_size / user_size)
        }
        sampler_size = sampler_dict.get(args.sampler_size)

        # Build final triplet samplers
        triplet_samplers = [
            TripletSampler(train_user_lists[u], item_size, sampler_size)
            for u in range(user_size)
        ]

        for n_factors in args.n_factors:
            exp_setting_1 = "_F" + str(n_factors)
            for lr in args.lr:
                exp_setting_2 = exp_setting_1 + "_LR" + str(lr)

                # Create server and clients
                server_model = ServerModel(item_size, n_factors)
                server = Server(server_model, lr, args.fraction,
                                args.positive_fraction, args.mp, send_strategy,
                                most_popular_items)
                clients = [
                    Client(u, ClientModel(n_factors), triplet_samplers[u],
                           train_user_lists[u], sampler_size)
                    for u in range(user_size)
                ]

                # Start training
                for i in range(args.n_epochs * round_modifier):
                    if i % round_modifier == 0:
                        bar = IncrementalBar('Epoch ' +
                                             str(int(i / round_modifier + 1)),
                                             max=round_modifier)
                    bar.next()
                    server.train_model(clients)

                    if args.pop:
                        if args.pop == 3:
                            if len(splitting_epochs) > 0:
                                if (i + 1) % (splitting_epochs[-1] *
                                              round_modifier) == 0:
                                    splitting_epochs.pop()
                                    server.new_step()
                        else:
                            if (i + 1) % (args.step_every *
                                          round_modifier) == 0:
                                server.new_step()

                    # Evaluation
                    if ((i + 1) % (args.eval_every * round_modifier)) == 0:
                        exp_setting_3 = exp_setting_2 + "_I" + str(
                            (i + 1) / round_modifier)
                        results = server.predict(clients, max_k=100)
                        with open(
                                'results/{}/{}{}.tsv'.format(
                                    dataset, exp_type, exp_setting_3),
                                'w') as out:
                            for u in range(len(results)):
                                for e, p in results[u].items():
                                    out.write(
                                        str(u) + '\t' + str(reverse_dict[e]) +
                                        '\t' + str(p) + '\n')

                final_dict = {k: 0 for k in range(item_size)}
                for i in server.big_list:
                    final_dict[i] += 1

                with open('counters/{}/{}.tsv'.format(dataset, exp_type),
                          'w') as out:
                    for k, v in final_dict.items():
                        out.write(str(reverse_dict[k]) + '\t' + str(v) + '\n')
Beispiel #25
0
def laba3(db_file_name, count_range, schema, schema_data):
    results = {
        'linear': [],
        'binary': [],
        'binary+sort': [],
        'multimap': [],
        'hashtable_map_good': [],
        'hashtable_map_bad': [],
        'bad_collisions': [],
        'good_collisions': []
    }
    key = 'fio'
    max_count_iterations = 2
    iterations = len(count_range)
    bar = IncrementalBar('Countdown', max=iterations)
    bar.start()

    for count in count_range:
        bar.next()
        print('\n')

        for count_iterations in range(max_count_iterations):
            generate(db_file_name, count, schema, schema_data)
            fp_map = defaultdict(list)
            fp_list = load_fp_from_file(db_file_name)
            query_obj = random.choice(fp_list)
            query = getattr(query_obj, key)

            print('check lin')
            linear = check_time(linear_search)(fp_list, key, query)
            print('check sort+bin')
            sort_and_bin_search = check_time(sort_and_binary_seach)(fp_list,
                                                                    key, query)
            print('check bin')
            bin_search = check_time(binary_search)(fp_list, key, query)
            print('check multimap')
            map_search = check_time(fp_map.__getitem__)(query)

            print('check hashtable good')
            fp_custom_map_good = HashTable()
            for el in fp_list:
                el.set_hash_type('good')
                fp_map[getattr(el, key)].append(el)
                fp_custom_map_good.add(el)
            query_obj.set_hash_type('good')
            custom_map_good_search = check_time(fp_custom_map_good.get)(
                Hashes.good_hash(query))

            print('check hashtable bad')
            fp_custom_map_bad = HashTable()
            for el in fp_list:
                el.set_hash_type('bad')
                fp_custom_map_bad.add(el)
            query_obj.set_hash_type('bad')
            custom_map_bad_search = check_time(fp_custom_map_bad.get)(
                Hashes.bad_hash(query))

            results['linear'].append((count, linear))
            results['binary'].append((count, bin_search))
            results['binary+sort'].append((count, sort_and_bin_search))
            results['multimap'].append((count, map_search))
            results['hashtable_map_good'].append(
                (count, custom_map_good_search))
            results['hashtable_map_bad'].append((count, custom_map_bad_search))
            results['bad_collisions'].append(
                (count, fp_custom_map_bad.collision_count))
            results['good_collisions'].append(
                (count, fp_custom_map_good.collision_count))

    plot_graph(results, count_range, max_count_iterations)
    print('bad_collisions: ', results['bad_collisions'])
    print('good_collisions: ', results['good_collisions'])
    bar.finish()
    return results
Beispiel #26
0
def animate_pixels(imfile1,imfile2,outfile,color=False,verbose=False):
    """Animates a pixel-motion transition between two images. Images must have
    the exact same number of pixels. Animation is saved as "outfile".

    Parameters
    ----------
    imfile1 : str or file object
        The file name or file object for the first image
    imfile2 : str or file object
        The file name or file object for the second image
    outfile : str
        The output file name
    color : bool, optional
        If True, runs in color mode
    verbose : bool, optional
        If True, displays a progress bar in the console
    """

    # Read in images
    if color:
        img1 = np.array(imread(imfile1))/255
        img2 = np.array(imread(imfile2))/255
    else:
        img1 = np.array(imread(imfile1,as_gray=True))/255
        img2 = np.array(imread(imfile2,as_gray=True))/255

    # Check number of pixels
    if img1.shape[0]*img1.shape[1] != img2.shape[0]*img2.shape[1]:
        raise ValueError("Images must have the name number of pixels")

    # Sort pixels by saturation (if grayscale) or hue (if color)
    if verbose: bar1 = IncrementalBar("Sorting\t\t", max=2,suffix='%(percent)d%%')
    if color: rows1,cols1,colors1 = color_to_coords(img1)
    else: rows1,cols1,colors1 = grayscale_to_coords(img1)
    if verbose: bar1.next()
    if color: rows2,cols2,colors2 = color_to_coords(img2)
    else: rows2,cols2,colors2 = grayscale_to_coords(img2)
    if verbose: bar1.next(); bar1.finish()

    # n is number of frames of one-directional transition
    # buffer is number of stationary frames before and after the transitions
    # total is number of frames for two transitions with 2 buffer periods each
    n=100
    buffer = 10
    total = 2*n+4*buffer

    # np.linspace creates evenly spaced position and color arrays for transition
    if verbose: bar2 = IncrementalBar("Interpolating\t",max=4,suffix='%(percent)d%%')
    colors = np.linspace(colors1,colors2,n)
    if verbose: bar2.next()
    rows = np.linspace(rows1+.5,rows2+.5,n)
    if verbose: bar2.next()
    cols = np.linspace(cols1+.5,cols2+.5,n)
    if verbose: bar2.next()
    pos = np.dstack((rows,cols))
    if verbose: bar2.next(); bar2.finish()

    # Calculate the aspect ratio of the two images
    aspect_ratio1 = img1.shape[0]/img1.shape[1]
    aspect_ratio2 = img2.shape[0]/img2.shape[1]

    plt.ioff()
    # Figure will always have default matplotlib 6.4 inch width
    fig = plt.figure(figsize=(6.4,max(aspect_ratio1,aspect_ratio2)*6.4))
    ax = fig.add_subplot(111)
    ax.set_aspect("equal")
    plt.axis("off")
    plt.xlim((0,max(img1.shape[1],img2.shape[1])))
    plt.ylim((0,max(img1.shape[0],img2.shape[0])))

    # Markers are measured in points, which are 1/72nd of an inch. Calculates
    # pixel size in points
    pixels = max(img1.shape[1],img2.shape[1])
    pixels_per_inch = pixels/6.4
    size = 72/pixels_per_inch

    # core object is a scatter plot with square markers set to pixel size
    if color:
        points = ax.scatter(rows[0],cols[0],c=colors1,marker='s',s=size**2)
    else:
        points = ax.scatter(rows[0],cols[0],c=colors1,cmap="gray",marker='s',s=size**2,vmin=0,vmax=1)

    # update function changes the scatter plot at each frame
    # set_color works for rgb, set_array works for grayscale
    def update(j):
        if j >= buffer and j < buffer+n:
            i = j-buffer
            points.set_offsets(pos[i])
            if color: points.set_color(colors[i])
            else: points.set_array(colors[i])
        elif j >= 3*buffer+n and j < 3*buffer+2*n:
            i = n-(j-(3*buffer+n))-1
            points.set_offsets(pos[i])
            if color: points.set_color(colors[i])
            else: points.set_array(colors[i])
        if verbose: bar3.next()

    if verbose: bar3 = IncrementalBar("Rendering\t",max=total,suffix='%(percent)d%%')

    # Create FuncAnimation with 60-millisecond inteval between frames
    ani = animation.FuncAnimation(fig,update,frames=total,interval=60)

    # Save animation and close the figure
    ani.save(outfile)
    if verbose: bar3.next(); bar3.finish()
    plt.close(fig)
    plt.ion()
Beispiel #27
0
def trash_videos(time_limit, extensions, trash_folder_name, sudo):
    """Trash the videos that are shorter than time_limit to get rid of
    the shooting errors.

    Parameters
    ----------
    time_limit : int
        Duration limit. If a video has a duration smaller than time_limit, it is
        moved into trash_folder_name.
    extensions : dict
        Contains the lists of extensions for each type of file.
    trash_folder_name : string
        Name of the folder where to put the trashed videos. Equal to 'Trash' by
        default but can be change in the video-logging/data.yaml file.
    sudo : bool
        Whether sudo mode is activated or not.
    """
    def move_to_trash(file, duration, trash_folder_name):
        """Move a video to trash if it is too short.

        Check if a directory named trash_folder_name exists in current directory.
        If not, create it. Then, move `file` in trash_folder_name if `duration`
        is smaller than `time_limit`.

        Parameters
        ----------
        file : string
            File to check.
        duration : int
            Duration of video file.
        trash_folder_name : string
            Name of the folder where to put the trashed videos. Equal to 'Trash'
            by default but can be change in the video-logging/data.yaml file.
        """
        if duration < time_limit:
            if os.path.exists(trash_folder_name
                              ):  # if 'trash_folder_name' already exists
                if os.path.isfile(
                        trash_folder_name
                ):  # if 'trash_folder_name' is a regular file
                    raise BadFolderName(
                        f"You have a file named '{trash_folder_name}' in the current working directory, which is not a valid file name because this tool uses it as a directory name. You may consider changing the 'trash_folder_name' default in 'data.yaml'."
                    )
                else:  # if 'trash_folder_name' is a directory
                    pass
            else:  # if 'trash_folder_name' does not exist
                os.mkdir(f'./{trash_folder_name}')

            os.rename(file, os.path.join(trash_folder_name, file))
            return True
        return False

    check_parent(sudo)
    n = get_number_files(extensions, directory='Videos')
    if n == 0:
        raise EmptyFolder(
            "Nothing to do here, this folder does not countain any video.")

    bar = IncrementalBar(f"Trashing videos of duration <= {time_limit}s...",
                         max=n)
    nb_trashed = 0
    for file in os.listdir():
        extension = os.path.splitext(file)[1]
        if extension in extensions['Videos']:
            with VideoFileClip(file) as clip:
                # we need to wait a little so that bad things do not happen
                time.sleep(.001)
                duration = clip.duration
            is_moved = move_to_trash(
                file, duration,
                trash_folder_name)  # warning: side effect happening here
            if is_moved:
                nb_trashed += 1
            bar.next()

    bar.finish()
    term = "s" if nb_trashed >= 2 else ""
    return f"{nb_trashed} video{term} trashed."
Beispiel #28
0
                    # Check the loss
                    loss = error_function(output, labels)
                    optimiser.zero_grad()

                    loss.backward()

                    return loss

                inputs, labels = inputs.to(device), labels.to(device)

                # Call the closure and read the loss
                loss = optimiser.step(closure)

                training_loss += loss.item() * inputs.size(0)

                training_bar.next()

            # Training timer
            training_time = helper.with_decimal_places(time() - training_timer,
                                                       2)
            print(f" | time taken: {training_time} seconds")

            validation_bar = IncrementalBar(
                message='Validating',
                max=len(validation_loader),
                suffix="%(percent)d%% [%(elapsed_td)s / %(eta_td)s]")
            validation_timer = time()

            # -------------------
            #   VALIDATION STEP
            # -------------------
Beispiel #29
0
    # Get coordinate
    coord = C['coord']
    filepaths = C['filepaths']

    # Iterate over filepaths for this coordinate
    for filepath in filepaths:
        try:
            with h5py.File(filepath, 'r') as f:  # Open HDF5 file

                # Find tile coords, Slice tiles and write files
                ind = couple_indexer(f, coord)
                imgcnt += couple_slicer(f, ind, hdfcnt, coord, targetpath)

                # Finish up iteration
                hdfcnt += 1  # Count processed files
                bar.next()  # Show progress bar

        # Break out of loops in case of file error
        except OSError:
            print('Error opening file:\n' + filepath)
            fileerror = True
            break

    if fileerror:
        break

bar.finish()  # Finish progress bar

# Report done if no error occurred
if not fileerror:
    print('Done processing {} files. Wrote {} image files.'.format(
Beispiel #30
0
def install(package_list):
    '''
    Install A Specified Package(s)
    '''
    password = getpass('Enter your password: '******',')
    turbocharge = Installer()
    click.echo('\n')
    os_bar = IncrementalBar('Getting Operating System...', max = 1)
    os_bar.next()
    for package_name in packages:
        package_name = package_name.strip(' ')
        if platform == 'linux':
            click.echo('\n')
            finding_bar = IncrementalBar('Finding Requested Packages...', max = 1)

            if package_name in devpackages:
                show_progress(finding_bar)
                turbocharge.install_task(devpackages[package_name], f'sudo -S apt-get install -y {package_name}', password, f'{package_name} --version', [f'{devpackages[package_name]} Version'])

            if package_name in applications:
                show_progress(finding_bar)
                turbocharge.install_task(applications[package_name], f'sudo -S snap install --classic {package_name}', password, '', [])

            if package_name == 'chrome':
                show_progress(finding_bar)
                try:    
                    click.echo('\n')
                    password = getpass("Enter your password: "******"wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb".split(), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                    proc.wait()
                    second = Popen("sudo -S apt-get install -y ./google-chrome-stable_current_amd64.deb".split(), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                    # Popen only accepts byte-arrays so you must encode the string
                    second.communicate(password.encode())
                    
                    # stdoutput = (output)[0].decode('utf-8') 
                    click.echo(click.style('\n\n 🎉 Successfully Installed Chrome! 🎉 \n'))             
                    # Testing the successful installation of the package
                    testing_bar = IncrementalBar('Testing package...', max = 100)
                    for _ in range(1, 21):
                        time.sleep(0.045)
                        testing_bar.next()
                    os.system('cd --')
                    for _ in range(21, 60):
                        time.sleep(0.045)
                        testing_bar.next()
                    for _ in range(60, 101):
                        time.sleep(0.03)
                        testing_bar.next()
                    click.echo('\n')
                    click.echo(click.style('Test Passed: Chrome Launch ✅\n', fg='green'))
                except  subprocess.CalledProcessError as e:
                    click.echo(e.output)
                    click.echo('An Error Occurred During Installation...', err = True)
       
            if package_name == 'anaconda':
                show_progress(finding_bar)
                username = getuser()
                try:    
                    installer_progress = Spinner(message=f'Installing {package_name}...', max=100)
                    # sudo requires the flag '-S' in order to take input from stdin
                    for _ in range(1, 35):
                        time.sleep(0.01)
                        installer_progress.next()
                    os.system("wget https://repo.anaconda.com/archive/Anaconda3-2020.07-Linux-x86_64.sh -O ~/anaconda.sh")
                    for _ in range(35, 61):
                        time.sleep(0.01)
                        installer_progress.next()
                    os.system('bash ~/anaconda.sh -b -p $HOME/anaconda3')
                    for _ in range(61, 91):
                        time.sleep(0.01)
                        installer_progress.next()
                    os.system(f'echo "export PATH="/home/{username}/anaconda3/bin:$PATH"" >> ~/.bashrc')
                    # Popen only accepts byte-arrays so you must encode the string
                    proc.communicate(password.encode())
                    for _ in range(90, 101):
                        time.sleep(0.01)
                        installer_progress.next()
                    # stdoutput = (output)[0].decode('utf-8')
                    click.echo(click.style(f'\n\n 🎉 Successfully Installed {package_name}! 🎉 \n'))
                except  subprocess.CalledProcessError as e:
                    click.echo(e.output)
                    click.echo('An Error Occurred During Installation...', err = True)

            if package_name == 'miniconda':
                show_progress(finding_bar)
                username = getuser()
                try:    
                    installer_progress = Spinner(message=f'Installing {package_name}...', max=100)
                    # sudo requires the flag '-S' in order to take input from stdin
                    for _ in range(1, 35):
                        time.sleep(0.01)
                        installer_progress.next()
                    os.system("wget https://repo.anaconda.com/archive/Anaconda3-2020.07-Linux-x86_64.sh -O ~/miniconda.sh")
                    for _ in range(35, 61):
                        time.sleep(0.01)
                        installer_progress.next()
                    os.system('bash ~/anaconda.sh -b -p $HOME/anaconda3')
                    for _ in range(61, 91):
                        time.sleep(0.01)
                        installer_progress.next()
                    os.system(f'echo "export PATH="/home/{username}/anaconda3/bin:$PATH"" >> ~/.bashrc')
                    for _ in range(90, 101):
                        time.sleep(0.01)
                        installer_progress.next()
                    # stdoutput = (output)[0].decode('utf-8')
                    click.echo(click.style(f'\n\n 🎉 Successfully Installed {package_name}! 🎉 \n'))
                except  subprocess.CalledProcessError as e:
                    click.echo(e.output)
                    click.echo('An Error Occurred During Installation...', err = True)

            elif package_name not in devpackages and package_name not in applications and package_name != 'chrome' and package_name != 'anaconda' and package_name != 'miniconda':
                click.echo('\n')
                click.echo(click.style(':( Package Not Found! :(', fg='red'))
 def root_search(self):
     roots = []
     prestems = []
     poststems = []
     bar = IncrementalBar('Searching for prestems and poststems...',
                          max=len(self.text.split(' ')))
     for u in self.text.split(' '):
         if u in self.roots:
             bar.next()
             continue
         for s in self.suffixes:
             if u.endswith(s):
                 prestems.append(u.rstrip(s))
                 break
         for p in self.prefixes:
             if u.startswith(p):
                 poststems.append(u.lstrip(p))
                 break
         bar.next()
     bar.finish()
     bar = IncrementalBar('Splitting prestems and prefixes...',
                          max=len(prestems))
     for u in prestems:
         coincidence_found = False
         for p in self.prefixes:
             if u.startswith(p):
                 roots.append(u.lstrip(p))
                 coincidence_found = True
                 break
         if coincidence_found == True:
             bar.next()
             continue
         else:
             roots.append(u)
             bar.next()
     bar.finish()
     bar = IncrementalBar('Splitting poststems and suffixes...',
                          max=len(poststems))
     for u in poststems:
         coincidence_found = False
         for s in self.suffixes:
             if u.endswith(s):
                 roots.append(u.rstrip(s))
                 coincidence_found = True
                 break
         if coincidence_found == True:
             bar.next()
             continue
         else:
             roots.append(u)
             bar.next()
     bar.finish()
     return roots
def migrate(callback):
    connection = op.get_bind()

    s = sa.select([n.c.node, n.c.path])
    nodes = connection.execute(s).fetchall()
    bar = IncrementalBar('Migrating node paths...', max=len(nodes))
    for node, path in nodes:
        account, sep, rest = path.partition('/')
        match = callback(account)
        if not match:
            bar.next()
            continue
        path = sep.join([match, rest])
        u = n.update().where(n.c.node == node).values({'path':path})
        connection.execute(u)
        bar.next()
    bar.finish()

    s = sa.select([v.c.muser]).distinct()
    musers = connection.execute(s).fetchall()
    bar = IncrementalBar('Migrating version modification users...',
                         max=len(musers)
    )
    for muser, in musers:
        match = callback(muser)
        if not match:
            bar.next()
            continue
        u = v.update().where(v.c.muser == muser).values({'muser': match})
        connection.execute(u)
        bar.next()
    bar.finish()

    s = sa.select([p.c.public_id, p.c.path])
    public = connection.execute(s).fetchall()
    bar = IncrementalBar('Migrating public paths...', max=len(public))
    for id, path in public:
        account, sep, rest = path.partition('/')
        match = callback(account)
        if not match:
            bar.next()
            continue
        path = sep.join([match, rest])
        u = p.update().where(p.c.public_id == id).values({'path':path})
        connection.execute(u)
        bar.next()
    bar.finish()

    s = sa.select([x.c.feature_id, x.c.path])
    xfeatures = connection.execute(s).fetchall()
    bar = IncrementalBar('Migrating permission paths...', max=len(xfeatures))
    for id, path in xfeatures:
        account, sep, rest = path.partition('/')
        match = callback(account)
        if not match:
            bar.next()
            continue
        path = sep.join([match, rest])
        u = x.update().where(x.c.feature_id == id).values({'path':path})
        connection.execute(u)
        bar.next()
    bar.finish()

    s = sa.select([xvals.c.feature_id, xvals.c.key, xvals.c.value])
    s = s.where(xvals.c.value != '*')
    xfeaturevals = connection.execute(s).fetchall()
    bar = IncrementalBar('Migrating permission holders...',
                         max=len(xfeaturevals))
    for feature_id, key, value in xfeaturevals:
        account, sep, group = value.partition(':')
        match = callback(account)
        if not match:
            bar.next()
            continue
        new_value = sep.join([match, group])
        u = xvals.update()
        u = u.where(and_(
                xvals.c.feature_id == feature_id,
                xvals.c.key == key,
                xvals.c.value == value))
        u = u.values({'value':new_value})
        connection.execute(u)
        bar.next()
    bar.finish()

    s = sa.select([g.c.owner, g.c.name, g.c.member])
    groups = connection.execute(s).fetchall()
    bar = IncrementalBar('Migrating group owners & members...',
                         max=len(groups))
    for owner, name, member in groups:
        owner_match = callback(owner)
        member_match = callback(member)
        if owner_match or member_match:
            u = g.update()
            u = u.where(and_(
                g.c.owner == owner,
                g.c.name == name,
                g.c.member == member))
            values = {}
            if owner_match:
                values['owner'] = owner_match
            if member_match:
                values['member'] = member_match
            u = u.values(values)
            connection.execute(u)
            bar.next()
    bar.finish()
Beispiel #33
0
def jds_wf_simple_reader(directory, no_of_spectra_to_average, skip_data_blocks,
                         VminNorm, VmaxNorm, colormap, custom_dpi,
                         save_long_file_aver, dyn_spectr_save_init,
                         dyn_spectr_save_norm):

    current_time = time.strftime("%H:%M:%S")
    current_date = time.strftime("%d.%m.%Y")

    # *** Creating a folder where all pictures and results will be stored (if it doesn't exist) ***
    result_folder = 'RESULTS_JDS_waveform_' + directory.split('/')[-2]
    if not os.path.exists(result_folder):
        os.makedirs(result_folder)
    service_folder = result_folder + '/Service'
    if not os.path.exists(service_folder):
        os.makedirs(service_folder)
    if dyn_spectr_save_init == 1:
        initial_spectra_folder = result_folder + '/Initial spectra'
        if not os.path.exists(initial_spectra_folder):
            os.makedirs(initial_spectra_folder)

    # *** Search JDS files in the directory ***

    file_list = find_files_only_in_current_folder(directory, '.jds', 1)
    print('')

    if len(
            file_list
    ) > 1:  # Check if files have same parameters if there are more then one file in list
        # Check if all files (except the last) have same size
        same_or_not = check_if_all_files_of_same_size(directory, file_list, 1)

        # Check if all files in this folder have the same parameters in headers
        equal_or_not = check_if_JDS_files_of_equal_parameters(
            directory, file_list)

        if same_or_not and equal_or_not:
            print(
                '\n\n\n        :-)  All files seem to be of the same parameters!  :-) \n\n\n'
            )
        else:
            print(
                '\n\n\n ************************************************************************************* '
            )
            print(
                ' *                                                                                   *'
            )
            print(
                ' *   Seems files in folders are different check the errors and restart the script!   *'
            )
            print(
                ' *                                                                                   *  '
                '\n ************************************************************************************* \n\n\n'
            )

            decision = int(
                input(
                    '* Enter "1" to start processing, or "0" to stop the script:     '
                ))
            if decision != 1:
                sys.exit(
                    '\n\n\n              ***  Program stopped! *** \n\n\n')

    # To print in console the header of first file
    print('\n  First file header parameters: \n')

    # *** Data file header read ***
    [
        df_filename, df_filesize, df_system_name, df_obs_place, df_description,
        CLCfrq, df_creation_timeUTC, Channel, ReceiverMode, Mode, Navr,
        TimeRes, fmin, fmax, df, frequency, freq_points_num, data_block_size
    ] = FileHeaderReaderJDS(directory + file_list[0], 0, 1)

    # Main loop by files start
    for file_no in range(len(file_list)):  # loop by files

        # *** Opening datafile ***
        fname = directory + file_list[file_no]

        # *********************************************************************************

        # *** Data file header read ***
        [
            df_filename, df_filesize, df_system_name, df_obs_place,
            df_description, CLCfrq, df_creation_timeUTC, Channel, ReceiverMode,
            Mode, Navr, TimeRes, fmin, fmax, df, frequency, freq_points_num,
            data_block_size
        ] = FileHeaderReaderJDS(fname, 0, 0)

        # Create long data files and copy first data file header to them
        if file_no == 0 and save_long_file_aver == 1:

            with open(fname, 'rb') as file:
                # *** Data file header read ***
                file_header = file.read(1024)

            # *** Creating a name for long timeline TXT file ***
            tl_file_name = df_filename + '_Timeline.txt'
            tl_file = open(
                tl_file_name,
                'w')  # Open and close to delete the file with the same name
            tl_file.close()

            # *** Creating a binary file with data for long data storage ***
            file_data_a_name = df_filename + '_Data_chA.dat'
            file_data_a = open(file_data_a_name, 'wb')
            file_data_a.write(file_header)
            file_data_a.seek(574)  # FFT size place in header
            file_data_a.write(np.int32(data_block_size).tobytes())
            file_data_a.seek(624)  # Lb place in header
            file_data_a.write(np.int32(0).tobytes())
            file_data_a.seek(628)  # Hb place in header
            file_data_a.write(np.int32(data_block_size / 2).tobytes())
            file_data_a.seek(632)  # Wb place in header
            file_data_a.write(np.int32(data_block_size / 2).tobytes())
            file_data_a.seek(636)  # Navr place in header
            file_data_a.write(
                bytes([np.int32(Navr * no_of_spectra_to_average)]))
            file_data_a.close()

            if Channel == 2:
                file_data_b_name = df_filename + '_Data_chB.dat'
                file_data_b = open(file_data_b_name, 'wb')
                file_data_b.write(file_header)
                file_data_b.seek(574)  # FFT size place in header
                file_data_b.write(np.int32(data_block_size).tobytes())
                file_data_b.seek(624)  # Lb place in header
                file_data_b.write(np.int32(0).tobytes())
                file_data_b.seek(628)  # Hb place in header
                file_data_b.write(np.int32(data_block_size / 2).tobytes())
                file_data_b.seek(632)  # Wb place in header
                file_data_b.write(np.int32(data_block_size / 2).tobytes())
                file_data_b.seek(636)  # Navr place in header
                file_data_b.write(
                    bytes([np.int32(Navr * no_of_spectra_to_average)]))
                file_data_b.close()

            del file_header

        # !!! Make automatic calculations of time and frequency resolutions for waveform mode!!!

        # Manually set frequencies for one channel mode

        if (Channel == 0 and int(CLCfrq / 1000000)
                == 66) or (Channel == 1 and int(CLCfrq / 1000000) == 66):
            freq_points_num = 8192
            frequency = np.linspace(0.0, 33.0, freq_points_num)

        # Manually set frequencies for two channels mode
        if Channel == 2 or (Channel == 0 and int(CLCfrq / 1000000) == 33) or (
                Channel == 1 and int(CLCfrq / 1000000) == 33):
            freq_points_num = 8192
            frequency = np.linspace(16.5, 33.0, freq_points_num)
        # For new receiver (temporary):
        if Channel == 2 and int(CLCfrq / 1000000) == 80:
            freq_points_num = 8192
            frequency = np.linspace(0.0, 40.0, freq_points_num)

        # Calculation of number of blocks and number of spectra in the file
        if Channel == 0 or Channel == 1:  # Single channel mode
            no_of_av_spectra_per_file = (df_filesize - 1024) / (
                2 * data_block_size * no_of_spectra_to_average)
        else:  # Two channels mode
            no_of_av_spectra_per_file = (df_filesize - 1024) / (
                4 * data_block_size * no_of_spectra_to_average)

        no_of_blocks_in_file = (df_filesize - 1024) / data_block_size

        no_of_av_spectra_per_file = int(no_of_av_spectra_per_file)
        fine_clock_frq = (int(CLCfrq / 1000000.0) * 1000000.0)

        # Real time resolution of averaged spectra
        real_av_spectra_dt = (1 / fine_clock_frq) * (
            data_block_size - 4) * no_of_spectra_to_average

        if file_no == 0:
            print(' Number of blocks in file:             ',
                  no_of_blocks_in_file)
            print(' Number of spectra to average:         ',
                  no_of_spectra_to_average)
            print(' Number of averaged spectra in file:   ',
                  no_of_av_spectra_per_file)
            print(' Time resolution of averaged spectrum: ',
                  round(real_av_spectra_dt * 1000, 3), ' ms.')
            print('\n  *** Reading data from file *** \n')

        # *******************************************************************************
        #                           R E A D I N G   D A T A                             *
        # *******************************************************************************

        with open(fname, 'rb') as file:
            file.seek(
                1024 + data_block_size * 4 *
                skip_data_blocks)  # Jumping to 1024 byte from file beginning

            # *** DATA READING process ***

            # Preparing arrays for dynamic spectra
            dyn_spectra_ch_a = np.zeros(
                (int(data_block_size / 2), no_of_av_spectra_per_file), float)
            if Channel == 2:  # Two channels mode
                dyn_spectra_ch_b = np.zeros(
                    (int(data_block_size / 2), no_of_av_spectra_per_file),
                    float)

            # !!! Fake timing. Real timing to be done!!!
            # TimeFigureScaleFig = np.linspace(0, no_of_av_spectra_per_file, no_of_av_spectra_per_file+1)
            # for i in range(no_of_av_spectra_per_file):
            #     TimeFigureScaleFig[i] = str(TimeFigureScaleFig[i])

            time_scale_fig = []
            time_scale_full = []

            bar = IncrementalBar(' File ' + str(file_no + 1) + ' of ' +
                                 str(len(file_list)) + ' reading: ',
                                 max=no_of_av_spectra_per_file,
                                 suffix='%(percent)d%%')

            for av_sp in range(no_of_av_spectra_per_file):

                bar.next()

                # Reading and reshaping all data with readers
                if Channel == 0 or Channel == 1:  # Single channel mode
                    wf_data = np.fromfile(file,
                                          dtype='i2',
                                          count=no_of_spectra_to_average *
                                          data_block_size)
                    wf_data = np.reshape(
                        wf_data, [data_block_size, no_of_spectra_to_average],
                        order='F')
                if Channel == 2:  # Two channels mode
                    wf_data = np.fromfile(file,
                                          dtype='i2',
                                          count=2 * no_of_spectra_to_average *
                                          data_block_size)
                    wf_data = np.reshape(
                        wf_data,
                        [data_block_size, 2 * no_of_spectra_to_average],
                        order='F')

                # Timing
                timeline_block_str = jds_waveform_time(wf_data, CLCfrq,
                                                       data_block_size)
                time_scale_fig.append(timeline_block_str[-1][0:12])
                time_scale_full.append(df_creation_timeUTC[0:10] + ' ' +
                                       timeline_block_str[-1][0:12])

                # Nulling the time blocks in waveform data
                wf_data[data_block_size - 4:data_block_size, :] = 0

                # Scaling of the data - seems to be wrong in absolute value
                wf_data = wf_data / 32768.0

                if Channel == 0 or Channel == 1:  # Single channel mode
                    wf_data_ch_a = wf_data  # All the data is channel A data
                    del wf_data  # Deleting unnecessary array to free the memory

                if Channel == 2:  # Two channels mode

                    # Resizing to obtain the matrix for separation of channels
                    wf_data_new = np.zeros(
                        (2 * data_block_size, no_of_spectra_to_average))
                    for i in range(2 * no_of_spectra_to_average):
                        if i % 2 == 0:
                            wf_data_new[0:data_block_size,
                                        int(i / 2)] = wf_data[:, i]  # Even
                        else:
                            wf_data_new[data_block_size:2 * data_block_size,
                                        int(i / 2)] = wf_data[:, i]  # Odd
                    del wf_data  # Deleting unnecessary array to free the memory

                    # Separating the data into two channels
                    wf_data_ch_a = np.zeros(
                        (data_block_size,
                         no_of_spectra_to_average))  # Preparing empty array
                    wf_data_ch_b = np.zeros(
                        (data_block_size,
                         no_of_spectra_to_average))  # Preparing empty array
                    wf_data_ch_a[:, :] = wf_data_new[0:(
                        2 * data_block_size):2, :]  # Separation to channel A
                    wf_data_ch_b[:, :] = wf_data_new[1:(
                        2 * data_block_size):2, :]  # Separation to channel B
                    del wf_data_new

                # preparing matrices for spectra
                spectra_ch_a = np.zeros_like(wf_data_ch_a)
                if Channel == 2:
                    spectra_ch_b = np.zeros_like(wf_data_ch_b)

                # Calculation of spectra
                for i in range(no_of_spectra_to_average):
                    spectra_ch_a[:, i] = np.power(
                        np.abs(np.fft.fft(wf_data_ch_a[:, i])), 2)
                    if Channel == 2:  # Two channels mode
                        spectra_ch_b[:, i] = np.power(
                            np.abs(np.fft.fft(wf_data_ch_b[:, i])), 2)

                # Storing only first (left) mirror part of spectra
                spectra_ch_a = spectra_ch_a[:int(data_block_size / 2), :]
                if Channel == 2:
                    spectra_ch_b = spectra_ch_b[:int(data_block_size / 2), :]

                # At 33 MHz the specter is usually upside down, to correct it we use flip up/down
                if int(CLCfrq / 1000000) == 33:
                    spectra_ch_a = np.flipud(spectra_ch_a)
                    if Channel == 2:
                        spectra_ch_b = np.flipud(spectra_ch_b)

                # Plotting first waveform block and first immediate spectrum in a file
                if av_sp == 0:  # First data block in a file
                    i = 0  # First immediate spectrum in a block

                    # Prepare parameters for plot
                    data_1 = wf_data_ch_a[:, i]
                    if Channel == 0 or Channel == 1:  # Single channel mode
                        no_of_sets = 1
                        data_2 = []
                    if Channel == 2:
                        no_of_sets = 2
                        data_2 = wf_data_ch_b[:, i]

                    suptitle = ('Waveform data, first block in file ' +
                                str(df_filename))
                    Title = (ReceiverMode + ', Fclock = ' +
                             str(round(CLCfrq / 1000000, 1)) +
                             ' MHz, Description: ' + str(df_description))

                    TwoOrOneValuePlot(
                        no_of_sets,
                        np.linspace(no_of_sets, data_block_size,
                                    data_block_size), data_1, data_2,
                        'Channel A', 'Channel B', 1, data_block_size, -0.6,
                        0.6, -0.6, 0.6, 'ADC clock counts', 'Amplitude, V',
                        'Amplitude, V', suptitle, Title, service_folder + '/' +
                        df_filename[0:14] + ' Waveform first data block.png',
                        current_date, current_time, software_version)

                    # Prepare parameters for plot
                    data_1 = 10 * np.log10(spectra_ch_a[:, i])
                    if Channel == 0 or Channel == 1:  # Single channel mode
                        no_of_sets = 1
                        data_2 = []
                    if Channel == 2:
                        no_of_sets = 2
                        data_2 = 10 * np.log10(spectra_ch_b[:, i])

                    suptitle = ('Immediate spectrum, first in file ' +
                                str(df_filename))
                    Title = (ReceiverMode + ', Fclock = ' +
                             str(round(CLCfrq / 1000000, 1)) +
                             ' MHz, Description: ' + str(df_description))

                    TwoOrOneValuePlot(
                        no_of_sets, frequency, data_1, data_2, 'Channel A',
                        'Channel B', frequency[0], frequency[-1], -80, 60, -80,
                        60, 'Frequency, MHz', 'Intensity, dB', 'Intensity, dB',
                        suptitle, Title,
                        service_folder + '/' + df_filename[0:14] +
                        ' Immediate spectrum first in file.png', current_date,
                        current_time, software_version)

                # Deleting the unnecessary matrices
                del wf_data_ch_a
                if Channel == 2:
                    del wf_data_ch_b

                # Calculation the averaged spectrum
                aver_spectra_ch_a = spectra_ch_a.mean(axis=1)[:]
                if Channel == 2:
                    aver_spectra_ch_b = spectra_ch_b.mean(axis=1)[:]

                # Plotting only first averaged spectrum
                if av_sp == 0:

                    # Prepare parameters for plot
                    data_1 = 10 * np.log10(aver_spectra_ch_a)
                    if Channel == 0 or Channel == 1:  # Single channel mode
                        no_of_sets = 1
                        data_2 = []
                    if Channel == 2:
                        no_of_sets = 2
                        data_2 = 10 * np.log10(aver_spectra_ch_b)

                    suptitle = ('Average spectrum, first data block in file ' +
                                str(df_filename))
                    Title = (ReceiverMode + ', Fclock = ' +
                             str(round(CLCfrq / 1000000, 1)) +
                             ' MHz, Avergaed spectra: ' +
                             str(no_of_spectra_to_average) +
                             ', Description: ' + str(df_description))

                    TwoOrOneValuePlot(
                        no_of_sets, frequency, data_1, data_2, 'Channel A',
                        'Channel B', frequency[0], frequency[-1], -80, 60, -80,
                        60, 'Frequency, MHz', 'Intensity, dB', 'Intensity, dB',
                        suptitle, Title,
                        service_folder + '/' + df_filename[0:14] +
                        ' Average spectrum first data block in file.png',
                        current_date, current_time, software_version)

                # Adding calculated averaged spectrum to dynamic spectra array
                dyn_spectra_ch_a[:, av_sp] = aver_spectra_ch_a[:]
                if Channel == 2:
                    dyn_spectra_ch_b[:, av_sp] = aver_spectra_ch_b[:]

            bar.finish()

        # file.close()  # Close the data file

        # Saving averaged spectra to long data files
        if save_long_file_aver == 1:
            temp = dyn_spectra_ch_a.transpose().copy(order='C')
            file_data_a = open(file_data_a_name, 'ab')
            file_data_a.write(temp)
            file_data_a.close()
            if Channel == 2:
                temp = dyn_spectra_ch_b.transpose().copy(order='C')
                file_data_b = open(file_data_b_name, 'ab')
                file_data_b.write(temp)
                file_data_b.close()

            # Saving time data to ling timeline file
            with open(tl_file_name, 'a') as tl_file:
                for i in range(no_of_av_spectra_per_file):
                    tl_file.write((time_scale_full[i][:]) + ' \n')  # str
            del time_scale_full

        # Log data (make dB scale)
        with np.errstate(invalid='ignore', divide='ignore'):
            dyn_spectra_ch_a = 10 * np.log10(dyn_spectra_ch_a)
            if Channel == 2:
                dyn_spectra_ch_b = 10 * np.log10(dyn_spectra_ch_b)

        # If the data contains minus infinity values change them to particular values
        dyn_spectra_ch_a[np.isinf(dyn_spectra_ch_a)] = 40
        if Channel == 2:
            dyn_spectra_ch_b[np.isinf(dyn_spectra_ch_b)] = 40

        # *******************************************************************************
        #             P L O T T I N G    D Y N A M I C    S P E C T R A                 *
        # *******************************************************************************

        # if dyn_spectr_save_init == 1 or dyn_spectr_save_norm == 1:
        #    print('\n  *** Making figures of dynamic spectra *** \n')

        if dyn_spectr_save_init == 1:
            # Plot of initial dynamic spectra

            v_min_a = np.min(dyn_spectra_ch_a)
            v_max_a = np.max(dyn_spectra_ch_a)
            v_min_b = v_min_a
            v_max_b = v_max_a
            if Channel == 2:
                v_min_b = np.min(dyn_spectra_ch_b)
                v_max_b = np.max(dyn_spectra_ch_b)

            if Channel == 0 or Channel == 1:  # Single channel mode
                dyn_spectra_ch_b = dyn_spectra_ch_a

            suptitle = ('Dynamic spectrum (initial) ' + str(df_filename) +
                        ' - Fig. ' + str(1) + ' of ' + str(1) +
                        '\n Initial parameters: dt = ' +
                        str(round(TimeRes * 1000., 3)) + ' ms, df = ' +
                        str(round(df / 1000., 3)) + ' kHz, Receiver: ' +
                        str(df_system_name) + ', Place: ' + str(df_obs_place) +
                        '\n' + ReceiverMode + ', Fclock = ' +
                        str(round(CLCfrq / 1000000, 1)) +
                        ' MHz, Avergaed spectra: ' +
                        str(no_of_spectra_to_average) + ' (' +
                        str(round(no_of_spectra_to_average * TimeRes, 3)) +
                        ' sec.), Description: ' + str(df_description))

            fig_file_name = (initial_spectra_folder + '/' + df_filename[0:14] +
                             ' Initial dynamic spectrum fig.' + str(0 + 1) +
                             '.png')

            if Channel == 0 or Channel == 1:  # Single channel mode
                OneDynSpectraPlot(dyn_spectra_ch_a, v_min_a, v_max_a, suptitle,
                                  'Intensity, dB', no_of_av_spectra_per_file,
                                  time_scale_fig, frequency, freq_points_num,
                                  colormap, 'UTC Time, HH:MM:SS.msec',
                                  fig_file_name, current_date, current_time,
                                  software_version, custom_dpi)

            if Channel == 2:
                TwoDynSpectraPlot(dyn_spectra_ch_a, dyn_spectra_ch_b, v_min_a,
                                  v_max_a, v_min_b, v_max_b, suptitle,
                                  'Intensity, dB', 'Intensity, dB',
                                  no_of_av_spectra_per_file, time_scale_fig,
                                  time_scale_fig, frequency, freq_points_num,
                                  colormap, 'Channel A', 'Channel B',
                                  fig_file_name, current_date, current_time,
                                  software_version, custom_dpi)

        if dyn_spectr_save_norm == 1:

            # Normalization and cleaning of data

            Normalization_dB(dyn_spectra_ch_a.transpose(), freq_points_num,
                             no_of_av_spectra_per_file)
            if Channel == 2:
                Normalization_dB(dyn_spectra_ch_b.transpose(), freq_points_num,
                                 no_of_av_spectra_per_file)

            simple_channel_clean(dyn_spectra_ch_a, 8)
            if Channel == 2:
                simple_channel_clean(dyn_spectra_ch_b, 8)

            # Plot of normalized and cleaned dynamic spectra

            suptitle = ('Normalized and cleaned dynamic spectrum (initial) ' +
                        str(df_filename) + ' - Fig. ' + str(0 + 1) + ' of ' +
                        str(1) + '\n Initial parameters: dt = ' +
                        str(round(TimeRes * 1000, 3)) + ' ms, df = ' +
                        str(round(df / 1000., 3)) + ' kHz, Receiver: ' +
                        str(df_system_name) + ', Place: ' + str(df_obs_place) +
                        '\n' + ReceiverMode + ', Fclock = ' +
                        str(round(CLCfrq / 1000000, 1)) +
                        ' MHz, Avergaed spectra: ' +
                        str(no_of_spectra_to_average) + ' (' +
                        str(round(no_of_spectra_to_average * TimeRes, 3)) +
                        ' sec.), Description: ' + str(df_description))

            fig_file_name = (result_folder + '/' + df_filename[0:14] +
                             ' Normalized and cleaned dynamic spectrum fig.' +
                             str(0 + 1) + '.png')

            if Channel == 0 or Channel == 1:  # Single channel mode
                OneDynSpectraPlot(dyn_spectra_ch_a, VminNorm, VmaxNorm,
                                  suptitle, 'Intensity, dB',
                                  no_of_av_spectra_per_file, time_scale_fig,
                                  frequency, freq_points_num, colormap,
                                  'UTC Time, HH:MM:SS.msec', fig_file_name,
                                  current_date, current_time, software_version,
                                  custom_dpi)
            if Channel == 2:
                TwoDynSpectraPlot(dyn_spectra_ch_a, dyn_spectra_ch_b, VminNorm,
                                  VmaxNorm, VminNorm, VmaxNorm, suptitle,
                                  'Intensity, dB', 'Intensity, dB',
                                  no_of_av_spectra_per_file, time_scale_fig,
                                  time_scale_fig, frequency, freq_points_num,
                                  colormap, 'Channel A', 'Channel B',
                                  fig_file_name, current_date, current_time,
                                  software_version, custom_dpi)
        del time_scale_fig, file_data_a
        if Channel == 2:
            del file_data_b

    results_files_list = []
    results_files_list.append(file_data_a_name)
    if Channel == 2:
        results_files_list.append(file_data_b_name)

    return results_files_list
Beispiel #34
0
    def _run_epoch(
        self,
        model,
        dataloader,
        optimize=False,
        save_activations=False,
        reweight=None,
        bit_pretrained=False,
        adv_metrics=False,
    ):
        """Runs the model on a given dataloader.

        Note:
            The latter item in the returned tuple is what is necessary to run
            GEORGECluster.train and GEORGECluster.evaluate.

        Args:
            model(nn.Module): A PyTorch model.
            dataloader(DataLoader): The dataloader. The dataset within must
                subclass GEORGEDataset.
            optimize(bool, optional): If True, the model is trained on self.criterion.
            save_activations(bool, optional): If True, saves the activations in
                `outputs`. Default is False.
            bit_pretrained(bool, optional): If True, assumes bit_pretrained and does not evaluate
                performance metrics

        Returns:
            metrics(Dict[str, Any]) A dictionary object that stores the metrics defined
                in self.config['metric_types'].
            outputs(Dict[str, Any]) A dictionary object that stores artifacts necessary
                for model analysis, including labels, activations, and predictions.
        """
        dataset = dataloader.dataset
        self._check_dataset(dataset)
        type_to_num_classes = {
            label_type: dataset.get_num_classes(label_type)
            for label_type in LABEL_TYPES
            if label_type in dataset.Y_dict.keys()
        }
        outputs = {
            "metrics": None,
            "activations": [],
            "superclass": [],
            "subclass": [],
            "true_subclass": [],
            "alt_subclass": [],
            "targets": [],
            "probs": [],
            "preds": [],
            "losses": [],
            "reweight": [],
        }
        activations_handle = self._init_activations_hook(
            model, outputs["activations"])
        if optimize:
            progress_prefix = "Training"
            model.train()
        else:
            progress_prefix = "Evaluation"
            model.eval()

        per_class_meters = self._init_per_class_meters(type_to_num_classes)
        metric_meters = {
            k: AverageMeter()
            for k in ["loss", "acc", "loss_rw", "acc_rw"]
        }

        progress = self.config["show_progress"]
        if progress:
            bar = ProgressBar(progress_prefix, max=len(dataloader), width=50)

        for batch_idx, (inputs, targets) in enumerate(dataloader):
            batch_size = len(inputs)
            if self.use_cuda:
                inputs, targets = move_to_device([inputs, targets],
                                                 device=self.device)

            type_to_labels = {}
            for label_type in type_to_num_classes.keys():
                type_to_labels[label_type] = targets[label_type]
                outputs[label_type].append(targets[label_type])

            if optimize and not bit_pretrained:
                logits = model(inputs)
                loss_targets = targets["superclass"]
                co = self.criterion(logits, loss_targets, targets["subclass"])
                loss, (losses, corrects), _ = co
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
            else:
                with torch.no_grad():
                    logits = model(inputs)
                    loss_targets = targets["superclass"]
                    if bit_pretrained:
                        if progress:
                            bar.suffix = PROGRESS_BAR_SUFFIX.format(
                                batch=batch_idx + 1,
                                size=len(dataloader),
                                total=format_timedelta(bar.elapsed_td),
                                eta=format_timedelta(bar.eta_td),
                                **{k: 0
                                   for k in prog_metric_names},
                            )
                            bar.next()
                        continue
                    co = self.criterion(logits, loss_targets,
                                        targets["subclass"])
                    loss, (losses, corrects), _ = co

            if not save_activations:
                outputs["activations"].pop()  # delete activations

            reweight_vec = (None if reweight is None else
                            reweight[targets["true_subclass"]])

            metrics = self._compute_progress_metrics(
                losses,
                corrects,
                type_to_labels,
                type_to_num_classes,
                per_class_meters,
                reweight=reweight_vec,
            )
            acc, preds = compute_accuracy(logits.data,
                                          loss_targets.data,
                                          return_preds=True)

            outputs["probs"].append(
                F.softmax(logits, dim=1).detach().cpu()[:, 1])
            outputs["preds"].append(preds)
            outputs["losses"].append(losses.detach().cpu())
            outputs["targets"].append(loss_targets.detach().cpu())
            if reweight_vec is not None:
                outputs["reweight"].append(reweight_vec.cpu())

            self._update_metrics(metric_meters, acc, loss, losses, corrects,
                                 batch_size, reweight_vec)

            PROGRESS_BAR_STR = PROGRESS_BAR_SUFFIX

            if self.compute_auroc:
                sub_map = dataloader.dataset.get_class_map("subclass")
                assert set(sub_map.keys()) == {0,
                                               1}  # must be a binary problem
                targets_cat, probs_cat = torch.cat(
                    outputs["targets"]), torch.cat(outputs["probs"])
                auroc = compute_roc_auc(targets_cat, probs_cat)
                metrics["auroc"] = auroc
                has_alt_subclass = "alt_subclass" in dataloader.dataset.Y_dict
                for key in ["subclass", "true_subclass"
                            ] + ["alt_subclass"] * has_alt_subclass:
                    sub_map = dataloader.dataset.get_class_map(key)
                    neg_subclasses = sub_map[0]
                    pos_subclasses = sub_map[1]
                    if len(neg_subclasses) == len(pos_subclasses) == 1:
                        # only one subclass in each superclass
                        rob_auroc = auroc
                    else:
                        subclass_labels = torch.cat(outputs[key])
                        paired_aurocs = []
                        for neg_subclass in neg_subclasses:
                            for pos_subclass in pos_subclasses:
                                inds = ((subclass_labels == neg_subclass)
                                        | (subclass_labels
                                           == pos_subclass)).cpu()
                                subset_pair_auroc = compute_roc_auc(
                                    targets_cat[inds], probs_cat[inds])
                                paired_aurocs.append(subset_pair_auroc)
                        rob_auroc = min(paired_aurocs)
                    metrics[f"{key}_rob_auroc"] = rob_auroc
                if not has_alt_subclass:
                    metrics["alt_subclass_rob_auroc"] = auroc
                PROGRESS_BAR_STR += (
                    " | AUROC: {auroc:.4f} | R AUROC: {subclass_rob_auroc:.4f} | "
                    "TR AUROC: {true_subclass_rob_auroc:.4f} | AR AUROC: {alt_subclass_rob_auroc:.4f}"
                )

            if progress:
                bar.suffix = PROGRESS_BAR_STR.format(
                    batch=batch_idx + 1,
                    size=len(dataloader),
                    total=format_timedelta(bar.elapsed_td),
                    eta=format_timedelta(bar.eta_td),
                    **{
                        **metrics,
                        **{k: v.avg
                           for k, v in metric_meters.items()}
                    },
                )
                bar.next()
        if progress:
            bar.finish()
        if activations_handle:
            activations_handle.remove()

        for k, v in outputs.items():
            if type(v) == list and len(v) > 0:
                outputs[k] = concatenate_iterable(v)

        if bit_pretrained:
            return outputs["metrics"], outputs

        outputs["metrics"] = metrics
        outputs["metrics"].update(
            {k: float(v.avg)
             for k, v in metric_meters.items()})
        outputs["metrics"].update(self._compute_aggregate_metrics(outputs))
        self._print_output_metrics(outputs)

        if adv_metrics:
            scaa = np.mean([
                ga.avg * 100
                for ga in np.array(per_class_meters[f"per_true_subclass_accs"])
            ])
            self.logger.info(
                f'All accs: {[ga.avg * 100 for ga in np.array(per_class_meters[f"per_true_subclass_accs"])]}'
            )
            self.logger.info(f"SCAA: {scaa:.3f}")
            ap = sklearn.metrics.average_precision_score(
                outputs["targets"],
                outputs["probs"],
                sample_weight=outputs["reweight"]
                if reweight_vec is not None else None,
            )
            self.logger.info(f"MaP: {ap:.4f}")

        return outputs["metrics"], outputs
Beispiel #35
0
class SampleDumpHandler(object):
    def __init__(self,debug=False,samplelist=None):
        super(SampleDumpHandler,self).__init__()
        self.debug=debug
        self.samplelist = samplelist
        self.reset()
        
    def __del__(self):
        if len(self.data):
            self.saveFile()

    def reset(self):
        self.header  = {}
        self.data = []
        self.lastpacket = 0
        self.raw = []
        self.packetcounter = 0
        self.dump_start = 0
        self.exppacket = 0
        self.starttime = 0
        
    def parse(self,msg):
        status = None
        if msg[3] == 0x1:
            status = self.parseHeader(msg)
        elif msg[3] == 0x2:
            status = self.parsePacket(msg)
        elif msg[3] == 0x3:
            status = self.parseRequest(msg)
        elif msg[3] == 0x7F and self.dump_start > 0:
            status = self.continueDump()
        return status
    
    def parseHeader(self, msg):
        self.reset()
        if len(msg) != 21:
            print "Size mismatch, is", len(msg)
            return HandshakeMessage.NAK(packetnumber=self.lastpacket)

        speriod = int(msg[9]  << 14 | msg[8]  << 7 | msg[7])
        srate   = 1./(speriod *1e-9)
        self.header = {
            "target_id"        : msg[2],
            "sample_number"    : msg[5] << 7 | msg[4],
            "sample_format"    : msg[6],
            "sample_period"    : speriod,
            "sample_rate"      : srate,
            "sample_length"    : msg[12] << 14 | msg[11] << 7 | msg[10],
            "sample_loop_start": msg[15] << 14 | msg[14] << 7 | msg[13],
            "sample_loop_end"  : msg[18] << 14 | msg[17] << 7 | msg[16],
            "loop_type"        : msg[19],
            }

        if self.debug:
            print "Sample Dump Header"
            print "  Data:"
            for k,v in self.header.iteritems():
                print "    %s:" % k, v

        self.raw += msg
        format = int(self.header["sample_format"])
        length = int(self.header["sample_length"])
        self.exppacket = (format+6)/7*length/120+1
        self.starttime = time.time()
        self.bar = IncrementalBar(
            "Receiving sample dump", max=self.exppacket,
            suffix = '%(percent)d%% [%(elapsed_td)s / %(eta_td)s]')
        return HandshakeMessage.ACK(packetnumber=self.lastpacket)
    
    def parsePacket(self, msg):
        if not 0xF7 in msg:
            print "printSampleDumpDataPacket: could not find EOX"
            return HandshakeMessage.NAK(packetnumber=self.lastpacket)
        
        cs = msg.index(0xF7)-1
        calced_cs = checksum(msg[1:cs])
        if self.debug:
            print "Sample Dump Data Packet"
            print "  Data:"
            print "    Packet count", msg[4]
            print "  checksum:", hex(msg[cs]), \
                "(calculated 0x%x)" % calced_cs
        if msg[cs] != calced_cs:
            print "Checksum mismatch:", hex(msg[cs]), "should be", hex(calced_cs)
            return HandshakeMessage.NAK(packetnumber=self.lastpacket)
        offset = 5
        format = int(self.header['sample_format'])

        if format == 14:
            self.data += msg[offset:offset+120]
        else:
            print format, "bit samples are not supported"
        self.lastpacket = msg[4]
        self.raw += msg
        self.packetcounter += 1
        self.bar.next()
        return HandshakeMessage.ACK(packetnumber=self.lastpacket)

    def parseRequest(self,msg):
        self.reset()
        if not 0xF7 in msg:
            print "printSampleDumpDataPacket: could not find EOX"
            return HandshakeMessage.NAK(packetnumber=self.lastpacket)

        samplenumber = int(msg[5] << 7 | msg[4])

        print "Received Sample Dump Request for sample", samplenumber
        if self.debug:
            print "  Data:"
            print "        targetid:",  msg[2]
            print "    samplenumber:", samplenumber

        samplefile = None
        if self.samplelist and samplenumber < len(self.samplelist):
            samplefile = self.samplelist[samplenumber]
            print "Selected list index", samplenumber, repr(samplefile)
        if not samplefile or not os.path.exists(samplefile):
            samplefile = "sample.sds"
            print "Selected fallback", repr(samplefile)
        if not os.path.exists(samplefile):
            print "No sample to send"
            return HandshakeMessage.Cancel(packetnumber=self.lastpacket)
            
        f = open(samplefile, "rb")
        self.raw = [ ord(i) for i in f.read() ]
        f.close()
        n = self.raw.count(0xF7)
        if n > 0:
            print "Sending", n, "Sample Dump Packets (+ header)"
            self.starttime = time.time()
            self.dump_start = self.raw.index(0xF7)+1
            self.packetcounter += 1
            return self.raw[:self.dump_start]
        
        return HandshakeMessage.Cancel(packetnumber=self.lastpacket)

    def continueDump(self):
        n = self.raw[self.dump_start:].count(0xF7)
        if n == 0:
            elapsed = time.time()-self.starttime
            print "Sent %d packets in %.1f seconds (%.1f bytes/sec)" % (
                self.packetcounter, elapsed, len(self.raw)/elapsed)
            self.reset()
            return HandshakeMessage.EOF(packetnumber=self.lastpacket)
        
        ds = self.dump_start
        self.dump_start = self.raw.index(0xF7,self.dump_start)+1
        if self.packetcounter % 100 == 0:
            print "Sent %d packets" % self.packetcounter
        self.packetcounter += 1
        return self.raw[ds:self.dump_start]
        
    def saveFile(self, filename=None):
        self.bar.finish()
        if not filename:
            timestamp = time.strftime("%Y%m%d%H%M%S")
            filename = "sample_%s" % timestamp

        rate = self.packetcounter*120/(time.time()-self.starttime)
        print "Packets received: %d/%d" % (self.packetcounter, self.exppacket)
        print "Average rate:     %.1f bytes/sec" % rate
        print "Saving to", filename

        # concatenation of sysex messages
        with open(filename+".sds", "wb") as f:
            f.write(bytearray(self.raw))

        # adjust data size to sample length
        nsamples = int(self.header.get('sample_length',len(self.data)/2))
        self.data = self.data[:nsamples*2]
        
        # sample data only (7-in-8-bit chunks, big-endian: .dcba987 .6543210)
        with open(filename+".dmp", "wb") as f:
            f.write(bytearray(self.data))

        # decoded sample data
        format = int(self.header['sample_format'])
        out  = []
        if format == 14:
            pos  = 0
            while pos < len(self.data):
                # assume big-endian
                tmp = self.data[pos] << 7 | self.data[pos+1]
                # convert to s16le
                tmp = u2s(tmp<<2)
                out.append(tmp & 0xFF)
                out.append((tmp >> 8) & 0xFF)
                pos += 2
            print
        else:
            print format, "bit samples are not supported"
        
        if len(out):
            # write raw file
            with open(filename+".raw", "wb") as f:
                f.write(bytearray(out))
            # write WAV file
            writeWAV(filename+".wav",int(self.header.get("sample_rate", 22050)),
                     bytearray(out))
        # sample properties
        with open(filename+".txt", "w") as f:
            f.writelines( [ "%s: %s\n" % i for i in self.header.iteritems() ] )
            f.writelines(
                [ "file_%s: %s.%s\n" % (suffix,filename,suffix) for suffix in [
                    'sds', 'raw', 'dmp', 'wav' ] ])
        self.reset()
Beispiel #36
0
def download(url, path=DEFAULT_PATH):
    """Download HTML page and page assets (img, css files) from given 'url'."""
    # Generate output 'page_name' and 'file_path' and load page
    page_name = get_filename(url=url)
    file_path = get_full_path(path, page_name)

    # Make request, edit Soup object and save data into output file
    content = make_request(url)
    soup = BeautifulSoup(content, "html.parser")

    # Get list of links
    links = get_links(tag_meta=ASSET_TAGS, url=url, soup=soup)

    # Edit Soup object and replace links to loclal files
    if links:
        # Generate folder name and path
        folder_name = get_foldername(url=url)
        folder_path = get_full_path(path, folder_name)

        # Create output directory (id doesn't exist)
        if not os.path.isdir(folder_path):
            create_dir(local_path=folder_path)

        to_download = []  # Initiate download queue

        # Iterate links and edit soup object
        for link_dict in links:
            # Destructure link's dict
            fact_link, abs_link, tag = itemgetter('fact_link', 'abs_link',
                                                  'tag')(link_dict)

            # Generate file_name, local path & local link for item
            file_name = get_filename(url=abs_link)
            local_path = get_full_path(path, folder_name, file_name)
            local_link = get_full_path(folder_name, file_name)

            # Edit soup object
            soup = edit_soup(url=fact_link,
                             tag=tag,
                             meta=ASSET_TAGS[tag],
                             local_link=local_link,
                             soup=soup)

            # Add asset's absolute url and local_path into queue
            to_download.append((abs_link, local_path))

    # Save modified soup
    save_file(data=soup.prettify(), local_path=file_path, mode='w')

    # Initiate progress bar and download assets
    progress_bar = IncrementalBar('Loading resourses:', max=len(to_download))
    for abs_link, local_path in to_download:
        try:
            content = make_request(abs_link)
            save_file(data=content, local_path=local_path)
        except Exception:
            logger.error(f'Asset \'{abs_link}\' was not downloaded.')
        progress_bar.next()  # Iterate progress bar

    # Finish progess_bar & return output's file path
    progress_bar.finish()
    return file_path
Beispiel #37
0
def cargar_tweets(limite=None, agregar_sexuales=False, cargar_features=True):
    """Carga todos los tweets, inclusive aquellos para evaluación, aunque no se quiera evaluar,
    y aquellos mal votados, así se calculan las features para todos. Que el filtro se haga luego."""
    conexion = open_db()
    if DB_ENGINE == 'sqlite3':
        cursor = conexion.cursor()
    else:
        cursor = conexion.cursor(buffered=True)  # buffered así sé la cantidad que son antes de iterarlos

    if agregar_sexuales:
        consulta_sexuales_tweets = ""
        consulta_limite_sexuales = ""
    else:
        consulta_sexuales_tweets = "censurado_tweet = 0"
        consulta_limite_sexuales = "AND " + consulta_sexuales_tweets
    consulta_sexuales_features = consulta_sexuales_tweets

    if limite:
        consulta = "SELECT id_tweet FROM tweets WHERE evaluacion = 0 " + consulta_limite_sexuales + " ORDER BY RAND() LIMIT "\
                   + unicode(limite)

        cursor.execute(consulta)

        bar = IncrementalBar("Eligiendo tweets aleatorios\t", max=cursor.rowcount, suffix=SUFIJO_PROGRESS_BAR)
        bar.next(0)

        ids = []

        for (tweet_id,) in cursor:
            ids.append(tweet_id)
            bar.next()

        bar.finish()

        str_ids = '(' + unicode(ids).strip('[]L') + ')'
        consulta_prueba_tweets = "T.id_tweet IN {ids}".format(ids=str_ids)
        consulta_prueba_features = "id_tweet IN {ids}".format(ids=str_ids)

    else:
        consulta_prueba_features = ""
        consulta_prueba_tweets = ""

    if not agregar_sexuales and limite:
        restricciones_tweets = "WHERE " + consulta_sexuales_tweets + " AND " + consulta_prueba_tweets
        restricciones_features = "WHERE " + consulta_sexuales_features + " AND " + consulta_prueba_features
    elif not agregar_sexuales:
        restricciones_tweets = "WHERE " + consulta_sexuales_tweets
        restricciones_features = "WHERE " + consulta_sexuales_features
    elif limite:
        restricciones_tweets = "WHERE " + consulta_prueba_tweets
        restricciones_features = "WHERE " + consulta_prueba_features
    else:
        restricciones_tweets = ""
        restricciones_features = ""

    if DB_ENGINE == 'sqlite3':
            consulta = """
    SELECT id_account,
           T.id_tweet,
           text_tweet,
           favorite_count_tweet,
           retweet_count_tweet,
           eschiste_tweet,
           censurado_tweet,
           name_account,
           followers_count_account,
           evaluacion,
           votos,
           votos_humor,
           promedio_votos,
           categoria_tweet
    FROM   tweets AS T
           NATURAL JOIN twitter_accounts
                        LEFT JOIN (SELECT id_tweet,
                                          Avg(voto) AS promedio_votos,
                                          Count(*) AS votos,
                                          Count(case when voto <> 'x' then 1 else NULL end) AS votos_humor
                                   FROM   votos
                                   WHERE voto <> 'n'
                                   GROUP  BY id_tweet) V
                               ON ( V.id_tweet = T.id_tweet )
    {restricciones}
    """.format(restricciones=restricciones_tweets)
    else:
        consulta = """
    SELECT id_account,
           T.id_tweet,
           text_tweet,
           favorite_count_tweet,
           retweet_count_tweet,
           eschiste_tweet,
           censurado_tweet,
           name_account,
           followers_count_account,
           evaluacion,
           votos,
           votos_humor,
           promedio_votos,
           categoria_tweet
    FROM   tweets AS T
           NATURAL JOIN twitter_accounts
                        LEFT JOIN (SELECT id_tweet,
                                          Avg(voto) AS promedio_votos,
                                          Count(*) AS votos,
                                          Count(If(voto <> 'x', 1, NULL)) AS votos_humor
                                   FROM   votos
                                   WHERE voto <> 'n'
                                   GROUP  BY id_tweet) V
                               ON ( V.id_tweet = T.id_tweet )
    {restricciones}
    """.format(restricciones=restricciones_tweets)

    cursor.execute(consulta)

    bar = IncrementalBar("Cargando tweets\t\t\t", max=(999999 if DB_ENGINE == 'sqlite3' else cursor.rowcount), suffix=SUFIJO_PROGRESS_BAR)
    bar.next(0)

    resultado = {}

    for (id_account, tweet_id, texto, favoritos, retweets, es_humor, censurado, cuenta, seguidores, evaluacion, votos,
         votos_humor, promedio_votos, categoria) in cursor:
        tweet = Tweet()
        tweet.id = tweet_id
        tweet.texto_original = texto
        tweet.texto = texto
        tweet.favoritos = favoritos
        tweet.retweets = retweets
        tweet.es_humor = es_humor
        tweet.es_chiste = es_humor
        tweet.censurado = censurado
        tweet.cuenta = cuenta
        tweet.seguidores = seguidores
        tweet.evaluacion = evaluacion
        tweet.categoria = categoria
        if votos:
            tweet.votos = int(votos)  # Esta y la siguiente al venir de count y sum, son decimal.
        if votos_humor:
            tweet.votos_humor = int(votos_humor)
        if promedio_votos:
            tweet.promedio_de_humor = promedio_votos

        resultado[tweet.id] = tweet
        bar.next()

    bar.finish()

    if cargar_features:
        consulta = """
        SELECT id_tweet,
               nombre_feature,
               valor_feature
        FROM   features
               NATURAL JOIN tweets
        {restricciones}
        """.format(restricciones=restricciones_features)

        cursor.execute(consulta)

        bar = IncrementalBar("Cargando features\t\t", max=(9999999 if DB_ENGINE == 'sqlite3' else cursor.rowcount), suffix=SUFIJO_PROGRESS_BAR)
        bar.next(0)

        for (id_tweet, nombre_feature, valor_feature) in cursor:
            if id_tweet in resultado:
                resultado[id_tweet].features[nombre_feature] = valor_feature
            bar.next()

        bar.finish()

        cursor.close()
        conexion.close()

    return list(resultado.values())
Beispiel #38
0
#!/usr/bin/env python2
# coding=utf-8
from __future__ import absolute_import, division, print_function, unicode_literals

import os
import sys

from progress.bar import IncrementalBar


sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

from clasificador.herramientas.define import SUFIJO_PROGRESS_BAR
import clasificador.herramientas.utils

if __name__ == "__main__":
    largo = 1000

    bar = IncrementalBar('Calculando', max=largo, suffix=SUFIJO_PROGRESS_BAR)
    bar.next(0)

    for i in xrange(largo):
        clasificador.herramientas.utils.ejecutar_comando("echo 1")
        bar.next()

    bar.finish()
def scrapezillowdata(zillow_urls, header_input):
    # Initialize progress bar
    bar = IncrementalBar(" Scraping Zillow", max=len(zillow_urls))

    # Initialize list to store home data during loop over each home's Zillow url
    home_data_list = []

    # Loop over each home Zillow URL and scrape pertinent details
    for url in zillow_urls:
        # First, obtain the HTML from the current home Zillow URL using gethtml.py
        home_html = gethtml(url, header_input)

        # The home address is simply taken directly from its own URL.
        home_address = (url.replace("https://www.zillow.com/homedetails/",
                                    "").replace("-", " ").split("/", 1)[0])

        # First, we search for the home's sell price. In Zillow, this variable is under a
        # "span" class="ds-status-details" tag. The find method will find this variable and store it into a tag
        # (i.e. ds_status_details). Generally, Zillow will show "Sold" and the sell price in this tag. Therefore, we
        # check this tag for the key word "sold" that we know will generally be contained in the tag's text. If the key
        # word is found in the tag's text, then we store the text found in the tag into the appropriate variable while
        # removing the unwanted characters. If the key word is not found, then the appropriate variable will retain its
        # initialization value of "n/a".
        ds_status_details = home_html.find("span", class_="ds-status-details")
        sold_price = "n/a"
        if "sold" in ds_status_details.text.lower():
            sold_price = (ds_status_details.text.replace("Sold", "").replace(
                ": $", "").replace(",", ""))

        # Next, we search for the number of beds, baths, and the home's square footage. In Zillow, each one of these
        # variables is under a "span" class="ds-bed-bath-living-area" tag. The find_all method will find each one of
        # these variables and store them into a result set (i.e. ds_bed_bath_living_area). Each item of the result set
        # will either contain number of beds and "bd", number of baths and "ba", or the home's size and "Square Feet".
        # We loop over the result set checking each item for key words that we know will be contained in
        # the item's text. If the key word is found in the item's text, then we store the text found in the item into
        # the appropriate variable while removing the unwanted characters. If the key word is not found, then the
        # appropriate variable will retain its initialization value of "n/a".
        ds_bed_bath_living_area = home_html.find_all(
            "span", class_="ds-bed-bath-living-area")
        beds = "n/a"
        baths = "n/a"
        size = "n/a"
        for item in ds_bed_bath_living_area:
            if "bd" in item.text.lower():
                beds = item.text.replace(" bd", "")
                continue
            if "ba" in item.text.lower():
                baths = item.text.replace(" ba", "")
                continue
            if "square feet" in item.text.lower() or "sqft" in item.text.lower(
            ):
                size = item.text.replace(",",
                                         "").replace("Square Feet", "sqft")
                continue

        # Next, we search for the home type, year built, heating, cooling, parking, and lot size. In Zillow, each one of
        # these variables is under a "li" class="ds-home-fact-list-item" tag. The find_all method will find each one of
        # these variables and store them into a result set (i.e. ds_home_fact_list_items). Each item of the result set
        # has a child "span" class="Text-c11n-8-11-1__aiai24-0 sc-pTWqp jMCspH" tag (i.e. the "label" tag)
        # AND a child "span" class="Text-c11n-8-11-1__aiai24-0 hqfqED" tag (i.e. the "value" tag).
        # For example, for "home type" information (generally the first item in the result set), there will be a
        # "label" tag that will contain the text "Type" and there will be a "value" tag that will contain the text
        # "Single Family". We loop over the result set checking each item's "label" tag for key words that we know will
        # be contained in that tag. If the key word is found in the item's "label" tag, then we store the text found in
        # the item's adjacent "value" tag into the appropriate variable while removing the unwanted characters.
        # If the key word is not found, then the appropriate variable will retain its initialization value of "n/a".
        ds_home_fact_list_items = home_html.find_all(
            "li", class_="ds-home-fact-list-item")
        home_type = "n/a"
        year_built = "n/a"
        heating = "n/a"
        cooling = "n/a"
        parking = "n/a"
        lot_size = "n/a"
        for item in ds_home_fact_list_items:
            if ("type" in item.find(
                    "span", class_="Text-c11n-8-11-1__aiai24-0 sc-pTWqp jMCspH"
            ).text.lower()):
                home_type = item.find(
                    "span", class_="Text-c11n-8-11-1__aiai24-0 hqfqED").text
                continue
            if ("year built" in item.find(
                    "span", class_="Text-c11n-8-11-1__aiai24-0 sc-pTWqp jMCspH"
            ).text.lower()):
                year_built = item.find(
                    "span", class_="Text-c11n-8-11-1__aiai24-0 hqfqED").text
                continue
            if ("heating" in item.find(
                    "span", class_="Text-c11n-8-11-1__aiai24-0 sc-pTWqp jMCspH"
            ).text.lower()):
                heating = item.find(
                    "span", class_="Text-c11n-8-11-1__aiai24-0 hqfqED").text
                continue
            if ("cooling" in item.find(
                    "span", class_="Text-c11n-8-11-1__aiai24-0 sc-pTWqp jMCspH"
            ).text.lower()):
                cooling = item.find(
                    "span", class_="Text-c11n-8-11-1__aiai24-0 hqfqED").text
                continue
            if ("parking" in item.find(
                    "span", class_="Text-c11n-8-11-1__aiai24-0 sc-pTWqp jMCspH"
            ).text.lower()):
                parking = item.find(
                    "span", class_="Text-c11n-8-11-1__aiai24-0 hqfqED").text
                continue
            if ("lot" in item.find(
                    "span", class_="Text-c11n-8-11-1__aiai24-0 sc-pTWqp jMCspH"
            ).text.lower()):
                lot_size = item.find(
                    "span",
                    class_="Text-c11n-8-11-1__aiai24-0 hqfqED").text.replace(
                        ",", "")
                continue

        # Append home data information to list
        home_data_list.append([
            home_address,
            sold_price,
            beds,
            baths,
            size,
            home_type,
            year_built,
            heating,
            cooling,
            parking,
            lot_size,
        ])

        bar.next()  # to advance progress bar
    bar.finish()  # to finish the progress bar
    print()  # to add space following progress bar

    # Convert home_data_list into pandas dataframe.
    home_data = list2frame(home_data_list)

    return home_data
Beispiel #40
0
def check_rds_instance(rds_name, states, connection, auto_name):

    # Create RDS client
    rds = boto3.client('rds')
    print(
        '\n' + tag +
        'Creating Database\nPlease wait as it typically takes 10-15 minutes before an instance is available.'
    )

    # Create progress bar and continuously update it
    bar = IncrementalBar(rds_name, max=len(states), suffix='')
    while True:
        global creating
        global backing_up
        global available
        global monitoring
        global logging
        global count

        # Check RDS instance
        response = rds.describe_db_instances(DBInstanceIdentifier=rds_name)
        instances = response.get('DBInstances')
        status = instances[0].get('DBInstanceStatus').title()

        # Handle 'Creating' status
        if status == 'Creating' and not creating:
            creating = True
            bar.next()
            count += 1
            print(str(count) + '/' + str(len(states)) + ' | Status: ' + status,
                  end='\r',
                  flush=True)

        # Handle 'Backing-Up' status
        elif status == 'Backing-Up' and not backing_up:
            backing_up = True
            bar.next()
            count += 1
            print(str(count) + '/' + str(len(states)) + ' | Status: ' + status,
                  end='\r',
                  flush=True)

        # Handle 'Available' status
        elif status == 'Available' and not available:
            available = True
            bar.next()
            count += 1
            print(str(count) + '/' + str(len(states)) + ' | Status: ' + status,
                  end='\r',
                  flush=True)
            break

        # Handle 'Configuring-Enhanced-Monitoring' status
        elif status == 'Configuring-Enhanced-Monitoring' and not monitoring:
            monitoring = True
            bar.next()
            count += 1
            print(str(count) + '/' + str(len(states)) + ' | Status: ' + status,
                  end='\r',
                  flush=True)

        # Handle 'Configuring-Log-Exports' status
        elif status == 'Configuring-Log-Exports' and not logging:
            logging = True
            bar.next()
            count += 1
            print(str(count) + '/' + str(len(states)) + ' | Status: ' + status,
                  end='\r',
                  flush=True)

        # Sleep for 30 seconds between checks
        time.sleep(30)

    # Finish progress bar
    bar.finish()

    # Check for schema and grab endpoint
    check_schema = False
    while not check_schema:

        # Automatically create postgresql based on auto_name
        if auto_name:
            check_schema = True
            response = rds.describe_db_instances(DBInstanceIdentifier=rds_name)
            instances = response.get('DBInstances')
            endpoint = instances[0].get('Endpoint').get('Address')

            cps.create_postgres_sql(rds_name, auto_name, endpoint, connection)

        # Ask for schema file if auto_name not provided
        else:
            print('\n' + tag + 'Database Ready\n' +
                  'Please specify the schema filename (excluding .json):',
                  end=' ')
            schema_name = input()
            if schema_name != '' and not schema_name.endswith('.json'):
                check_schema = True

                response = rds.describe_db_instances(
                    DBInstanceIdentifier=rds_name)
                instances = response.get('DBInstances')
                endpoint = instances[0].get('Endpoint').get('Address')

                cps.create_postgres_sql(rds_name, schema_name, endpoint,
                                        connection)

            # Handle invalid input
            else:
                print(
                    Style.BRIGHT +
                    'Invalid entry. Please enter a valid schema filename exlcuding the ".json" extension.\n'
                )
Beispiel #41
0
    def download(self):
        bar = IncrementalBar('Downloading ', max=10)

        self.driver.get(CME_LINK +
                        '/tools-information/quikstrike/options-calendar.html')
        first_window = self.driver.window_handles[0]
        bar.next()
        sleep(5)
        self.driver.get(
            CME_TOOLS_LINK +
            '/User/QuikStrikeView.aspx?viewitemid=IntegratedCMEOptionExpirationCalendar'
        )
        bar.next()
        sleep(5)
        self.driver.find_element_by_xpath(
            '//a[@id="MainContent_ucViewControl_IntegratedCMEOptionExpirationCalendar_ucViewControl_hlCMEProducts"]'
        ).click()
        bar.next()
        sleep(5)
        for handle in self.driver.window_handles:
            if handle != first_window:
                self.driver.switch_to_window(handle)
                self.driver.find_element_by_xpath(
                    '//a[@id="ctl00_cphMain_lvTabs_ctrl3_lbTab"]').click()
                bar.next()
                sleep(3)
                self.driver.find_element_by_xpath(
                    '//a[@id="cphMain_ucProductBrowser_ucProductFilter_ucTrigger_lnkTrigger"]'
                ).click()
                bar.next()
                sleep(3)
                self.driver.find_element_by_xpath(
                    '//input[@id="cphMain_ucProductBrowser_ucProductFilter_ucGroupList_rblGroups_4"]'
                ).click()
                bar.next()
                sleep(3)
                self.driver.find_element_by_xpath(
                    '//input[@id="cphMain_ucProductBrowser_ucProductFilter_ucContractTypeList_rblContractType_1"]'
                ).click()
                bar.next()
                sleep(3)
                self.driver.find_element_by_xpath(
                    '//input[@id="cphMain_ucProductBrowser_ucProductFilter_btnApply"]'
                ).click()
                bar.next()
                sleep(3)
                self.driver.find_element_by_xpath(
                    '//a[@id="cphMain_ucProductBrowser_ucProductActions_ucTrigger_lnkTrigger"]'
                ).click()
                bar.next()
                sleep(3)
                self.driver.find_element_by_xpath(
                    '//a[@id="cphMain_ucProductBrowser_ucProductActions_lnkExport"]'
                ).click()
                bar.next()
                # self.driver.find_element_by_xpath(
                #     '//a[@id="cphMain_ucProductBrowser_ucProductActions_lnkShowExpirations"]').click()
                # bar.next()
                # sleep(4)
                # iframe = self.driver.find_element_by_xpath('//iframe[@id="mainFrame"]')
                # self.driver.switch_to_frame(iframe)
                # bar.next()
                # sleep(4)
                # self.driver.find_element_by_xpath('//a[@id="ctl03_ucExport_lnkTrigger"]').click()
                # bar.next()
                sleep(5)
                bar.finish()
Beispiel #42
0
    if include_wireframe:
        mlab.triangular_mesh(x,
                             y,
                             z,
                             faces,
                             color=(0, 0, 0),
                             representation='wireframe')
    mlab.show()


with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    v, f = sess.run([verts, faces])
    vis_mesh(v, f)
    for _ in range(n2):

        try:
            bar = IncrementalBar(max=n1)
        except NameError:
            bar = None
        for i in range(n1):
            if bar is not None:
                bar.next()
            sess.run(opt)
            # print(loss_val)
        v, f, loss_val = sess.run([verts, faces, loss])
        vis_mesh(v, f)
        print(loss_val)
        if bar is not None:
            bar.finish()
Beispiel #43
0
def hyperpack(hyperpack_list):
    '''
    Install Large Packs Of Applications And Packages
    '''
    os_bar = IncrementalBar('Getting Operating System...', max=1)
    os_bar.next()

    installer = Installer()
    updater = Updater()
    cleaner = Uninstaller()

    hyperpacks = hyperpack_list.split(',')

    password = ""

    if platform == 'linux' or platform == 'darwin':
        password = getpass('Enter your password: '******'\n')

        password_bar = IncrementalBar('Verifying Password...', max=1)

        exitcode = is_password_valid(password)

        if exitcode == 1:
            click.echo('Wrong Password Entered... Aborting Installation!')
            return

        password_bar.next()

    click.echo('\n')
    if platform == 'linux':
        for hyperpack in hyperpacks:
            hyper_pack = hyperpkgs[hyperpack]

            packages = hyper_pack.packages.split(',')
            apps = hyper_pack.applications.split(',')

            # Installing Required Packages
            for package in packages:
                installer.install_task(
                    devpackages_linux[package],
                    f'sudo -S apt-get install -y {package}', password,
                    f'{package} --version',
                    [f'{devpackages_linux[package]} Version'])

            # Installing Required Applications
            for app in apps:
                installer.install_task(
                    applications_linux[app],
                    f'sudo -S snap install --classic {app}', password, '', [])

            # Updating Required Packages
            for package in packages:
                updater.updatepack(package, password)

            for app in apps:
                updater.updateapp(app, password)

            cleaner.clean(password)

    elif platform == 'win32':
        for hyperpack in hyperpacks:
            hyper_pack = hyperpkgs[hyperpack]

            packages = hyper_pack.packages.split(',')
            apps = hyper_pack.applications.split(',')

            for package in packages:
                installer.install_task(
                    package_name=devpackages_windows[package],
                    script=f'choco install {package} -y',
                    password="",
                    test_script=f'{package} --version',
                    tests_passed=[f'{devpackages_windows[package]} Version'])

            for package in packages:
                updater.updatepack(package, password="")

            for app in apps:
                installer.install_task(package_name=applications_windows[app],
                                       script=f'choco install {app} -y',
                                       password="",
                                       test_script='',
                                       tests_passed=[])

            for app in apps:
                updater.updateapp(app, password="")
    elif platform == 'darwin':
        for hyperpack in hyperpacks:
            hyper_pack = hyperpkgs[hyperpack]

            packages = hyper_pack.packages.split(',')
            apps = hyper_pack.applications.split(',')

            for package in packages:
                installer.install_task(
                    package_name=devpackages_macos[package],
                    script=f'brew install {package}',
                    password="",
                    test_script=f'{package} --version',
                    tests_passed=[f'{devpackages_macos[package]} Version'])

            for package in packages:
                updater.updatepack(package, password="")

            for app in apps:
                installer.install_task(package_name=applications_macos[app],
                                       script=f'brew cask install {app}',
                                       password="",
                                       test_script='',
                                       tests_passed=[])

            for app in apps:
                updater.updateapp(app, password="")
Beispiel #44
0
def populate_database(video_directory,
                      embedder,
                      pose_estimator,
                      cursor,
                      num_people=None,
                      frames_per_person=None):
    folders = [
        f for f in os.listdir(video_directory)
        if os.path.isdir(os.path.join(video_directory, f))
    ]
    for person_number, folder in enumerate(folders):
        if person_number == num_people:
            return

        frame_info = os.path.join(video_directory,
                                  folder + '.labeled_faces.txt')

        with open(frame_info) as info_file:
            csv_data = csv.reader(info_file, delimiter=',')
            embedding = None
            csv_data = list(csv_data)
            num_frames = len(csv_data)
            if frames_per_person:
                num_frames = min(len(csv_data), frames_per_person)
            bar = IncrementalBar(
                f'Adding person {person_number + 1:>3} of {num_people:>3}',
                max=num_frames)

            frame_indices = np.arange(len(csv_data))
            if frames_per_person and len(csv_data) > frames_per_person:
                frame_indices = np.linspace(0,
                                            len(csv_data) - 1,
                                            num=frames_per_person)
                frame_indices = frame_indices.astype(np.uint8)

            for frame_num in frame_indices:
                image_path = csv_data[frame_num][0].replace('\\', '/')
                image_path = os.path.join(video_directory, image_path)
                image = Image.open(image_path)
                image = crop_to_face(image)

                if image is None:
                    bar.next()
                    continue

                if embedding is None:
                    embedding = embedder.embed(image)
                    embedding = embedding.flatten()
                    c.execute(
                        'INSERT INTO videos (id, embedding) values' +
                        '  (?, ?)', (person_number, embedding))

                pose = pose_estimator.estimate_pose(image)
                landmarks = get_normalized_landmarks(image)

                if landmarks is None:
                    bar.next()
                    continue

                c.execute(
                    'INSERT INTO frames (video_id, image_path, pose, landmarks)'
                    + ' values (?, ?, ?, ?)',
                    (person_number, image_path, pose, landmarks))
                bar.next()
        print()
class AuthorCrawler:
    visitedProfileURL = []
    queueProfileURL = []
    visitedArticleURL = []
    queueArticleURL = []
    numberOfCrawlerProfile = 0

    def __init__(self):
        self.baseURL = 'https://www.researchgate.net/'
        from progress.bar import IncrementalBar
        self.progress_bar = IncrementalBar('Crawling', max=MIN_NUMBER_OF_PROFILE, suffix='%(percent)d%% %(remaining)s remaining - eta %(eta_td)s')

    def crawl(self):
        self.queueProfileURL.extend(START_PAGES)
        os.makedirs(AFTER_CRAWL_AUTHOR_DIR, exist_ok=True)
        while self.numberOfCrawlerProfile < MIN_NUMBER_OF_PROFILE:
            while len(self.queueProfileURL) == 0:
                if len(self.queueArticleURL) == 0:
                    self.progress_bar.finish()
                    return
                try:
                    self.queueProfileURL.extend(filter(lambda x: x not in self.visitedProfileURL and x not in self.queueProfileURL,self.getAuthorFromArticle(self.queueArticleURL.pop(0))))
                except:
                    pass
            try:
                self.progress_bar.next()
                self.crawlProfile(self.queueProfileURL.pop(0))
            except:
                pass
        self.progress_bar.finish()

    def getAuthorFromArticle(self, url):

        r = requests.get(url)
        s = BeautifulSoup(r.text, 'html.parser')

        authors = s.findAll('a', class_='display-name')
        authorsList = []
        for author in authors:
            authorsList.append(self.baseURL +author['href'])
        return authorsList

    def getArticleIDFromURL(self, url):
        return re.findall(r'publication/(?P<id>\d+)_', url)[0]

    def crawlProfile(self, profURL):
        if not profURL.endswith('publications'):
            profURL += '/publications'
        r = requests.get(profURL)
        s = BeautifulSoup(r.text, 'html.parser')
        name = s.find('h1', class_='profile-header-name')
        name = name.text
        n = 1
        articles = []
        while True:
            url = profURL+'/'+n.__str__()
            n+=1
            res = self.parseProfilePage(url)
            if res is None or len(res) == 0:
                break
            articles.extend(res)
        self.queueArticleURL.extend(filter(lambda x: x not in self.visitedArticleURL and x not in self.queueArticleURL,map(lambda x : x[0],articles)))
        js = {}
        js['Name'] = name
        js['Article'] = articles

        file_name = '{}.json'.format(name)
        with open(os.path.join(AFTER_CRAWL_AUTHOR_DIR , file_name), 'w') as outfile:
            json.dump(js, outfile)
        self.numberOfCrawlerProfile +=1
        print(self.numberOfCrawlerProfile)

    def parseProfilePage(self, url):  # return top 10 article url
        r = requests.get(url)
        s = BeautifulSoup(r.text, 'html.parser')
        articles = s.findAll('a', class_='ga-publication-item')
        result = []
        for article in articles:
            result.append((self.baseURL + article['href'], self.getArticleIDFromURL(article['href'])))
        return result
def prepare_datasets(datapath,
                     workdirpath,
                     dataset_type,
                     image_shape=(256, 256),
                     input_mask=None,
                     fraction=None,
                     n_slice_per_file=None,
                     realimag_img=True,
                     realimag_kspace=True,
                     kspace_norm=None,
                     img_norm=None):
    """
    Prepares the work directory, and prepares data into easy-to-used, eventually masked data.
    """
    # getting the list of usable files
    files = [
        f for f in os.listdir(datapath)
        if (os.path.isfile(os.path.join(datapath, f)) and ('.h5' in f))
    ]
    if fraction:
        files = files[:int(np.floor(fraction * len(files)))]

    output_dir = os.path.join(workdirpath, dataset_type)
    if not os.path.isdir(output_dir):
        os.mkdir(output_dir)

    index_dict = {}

    if files != []:
        bar = IncrementalBar('{} dataset_files'.format(dataset_type),
                             max=len(files))

    for f in files:

        filepath = os.path.join(output_dir, f)
        if os.path.isfile(filepath):
            continue
        h5f = h5py.File(os.path.join(datapath, f), 'r')
        if ('kspace' not in h5f):
            continue

        if n_slice_per_file is None:
            n_slices = h5f['kspace'].shape[0]
        else:
            n_slices = n_slice_per_file
        index_dict[filepath] = n_slices

        kdata_array = np.empty((n_slices, *image_shape, 2))
        kdata_clean_array = np.empty((n_slices, *image_shape, 2))
        image_array = np.empty((n_slices, *image_shape, 2))
        image_clean_array = np.empty((n_slices, *image_shape, 2))
        mask_array = np.empty((n_slices, *image_shape))
        inverse_mask_array = np.empty((n_slices, *image_shape))

        k = 0
        imin = int(np.floor(h5f['kspace'].shape[0] / 2 - n_slices / 2))
        imax = imin + n_slices
        for i, kdata_raw in enumerate(h5f['kspace']):
            if i < imin or i >= imax:
                continue

            ### cropping
            image_clean = ifft(kdata_raw)
            image_clean = crop(image_clean, size=image_shape)

            #normalize image
            if img_norm['np']:
                image_clean = img_norm['np'](image_clean)

            kdata_clean = fft(image_clean)

            if kspace_norm['np']:
                kdata_clean = kspace_norm['np'](kdata_clean)

            ### apply mask
            if input_mask:
                mask = input_mask.get_mask(kdata_clean)
                kdata = kdata_clean * mask + 0.0
                mask_array[k, :, :] = mask
                inverse_mask = mask == 0
                inverse_mask = inverse_mask.astype(np.float)
                inverse_mask_array[k, :, :] = inverse_mask
            else:
                kdata = kdata_clean

            image = ifft(kdata)
            image_clean = ifft(kdata_clean)

            # filling arrays
            if realimag_kspace:
                kdata_array[k, :, :, 0] = np.real(kdata)
                kdata_array[k, :, :, 1] = np.imag(kdata)
                kdata_clean_array[k, :, :, 0] = np.real(kdata_clean)
                kdata_clean_array[k, :, :, 1] = np.imag(kdata_clean)
            else:
                kdata_array[k, :, :, 0] = np.abs(kdata)
                kdata_array[k, :, :, 1] = np.angle(kdata)
                kdata_clean_array[k, :, :, 0] = np.abs(kdata_clean)
                kdata_clean_array[k, :, :, 1] = np.angle(kdata_clean)

            if realimag_img:
                image_array[k, :, :, 0] = np.real(image)
                image_array[k, :, :, 1] = np.imag(image)
                image_clean_array[k, :, :, 0] = np.real(image_clean)
                image_clean_array[k, :, :, 1] = np.imag(image_clean)
            else:
                image_array[k, :, :, 0] = np.abs(image)
                image_array[k, :, :, 1] = np.angle(image)
                image_clean_array[k, :, :, 0] = np.abs(image_clean)
                image_clean_array[k, :, :, 1] = np.angle(image_clean)

            k += 1
        h5f.close()
        outfile = h5py.File(filepath, 'w')
        outfile.create_dataset('kspace_masked', data=kdata_array)
        outfile.create_dataset('kspace_ground_truth', data=kdata_clean_array)
        outfile.create_dataset('image_masked', data=image_array)
        outfile.create_dataset('image_ground_truth', data=image_clean_array)
        outfile.create_dataset('mask', data=mask_array)
        outfile.create_dataset('inverse_mask', data=inverse_mask_array)
        outfile.close()
        bar.next()
    if index_dict != {}:
        with open(os.path.join(output_dir, 'index.json'), 'w') as fp:
            json.dump(index_dict, fp)
        with open(os.path.join(output_dir, 'format.json'), 'w') as fp:
            json.dump((*image_shape, 2), fp)
    return output_dir
Beispiel #47
0
 def install_task(self, package_name : str, script : str, password : str, test_script : str, tests_passed):
     try:    
         installer_progress = Spinner(message=f'Installing {package_name}...', max=100)
         # sudo requires the flag '-S' in order to take input from stdin
         for _ in range(1, 75):
             time.sleep(0.01)
             installer_progress.next()
         
         proc = Popen(script.split(), stdin=PIPE, stdout=PIPE, stderr=PIPE)\
         # Popen only accepts byte-arrays so you must encode the string
         output, error = proc.communicate(password.encode())
         if proc.returncode != 0:
             click.echo(click.style('❎ Installation Failed... ❎', fg='red', blink=True, bold=True))
             debug = click.prompt('Would you like us to debug the failed installation?[y/n]')
             if debug == 'y':
                 debugger = Debugger()
                 debugger.debug(password, error)
                 logs = click.prompt('Would you like to see the logs?[y/n]', type=str)
                 if logs == 'y':
                     final_output = error.decode('utf-8')
                     if final_output == '':
                         click.echo('There were no logs found...')
                         return
                     else:
                         click.echo(final_output)
                         return
                 return
             else:
                 logs = click.prompt('Would you like to see the logs?[y/n]', type=str)
                 if logs == 'y':
                     final_output = output.decode('utf-8')
                     if final_output == '':
                         click.echo('There were no logs found...')
                         return
                     else:
                         click.echo(final_output)
                         return
                 return
         click.echo(click.style(f'\n\n 🎉 Successfully Installed {package_name}! 🎉 \n', fg='green', bold=True))
         # Testing the successful installation of the package
         testing_bar = IncrementalBar('Testing package...', max = 100)
         if tests_passed == [] and test_script == '':
             click.echo('\n')
             click.echo(click.style(f'Test Passed: {package_name} Launch ✅\n', fg='green'))
             return
         for _ in range(1, 21):
             time.sleep(0.002)
             testing_bar.next()
         os.system('cd --')
         for _ in range(21, 60):
             time.sleep(0.002)
             testing_bar.next()
         proc = Popen(test_script.split(), stdin=PIPE, stdout=PIPE, stderr=PIPE)
         for _ in range(60, 101):
             time.sleep(0.002)
             testing_bar.next()
         click.echo('\n')
         for test in tests_passed:
             click.echo(click.style(f'Test Passed: {test} ✅\n', fg='green'))
         return
     except  subprocess.CalledProcessError as e:
         click.echo(e.output)
         click.echo('An Error Occurred During Installation...', err = True)
Beispiel #48
0
    def run(self, http_method):
        from progress.bar import IncrementalBar
        log20x = logging.getLogger("log20x")
        log40x = logging.getLogger("log40x")
        err_log = logging.getLogger("err_logger")

        case_total = self.sample.sample_total() * self.payload.sample_total()

        bar = IncrementalBar(u'RUNNING', max=case_total)

        # executor = ThreadPoolExecutor(max_workers=100)
        with ThreadPoolExecutor(max_workers=100) as executor:
            # for fn, ln, test_url in self._generate_url_req():
            #     bar.next()
            #     try:
            #         r = self.RequestMethod[http_method](test_url, headers={
            #             "User-Agent": r"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36",
            #             "WAF-Test-Case-ID": "%s (%d)" % (fn, ln)
            #         })
            #         if r.status_code / 200 == 1:
            #             log20x.info("[%d] %s (%s:%d)" % (r.status_code, test_url, fn, ln))
            #         elif r.status_code >= 500:
            #             err_log.error("[%d] %s" % (r.status_code, test_url))
            #         else:
            #             log40x.info("[%d] %s" % (r.status_code, test_url))
            #
            #     except requests.exceptions.ConnectionError, e:
            #         err_log.error("[%s] %s" % (e, test_url))
            #     self.test_total += 1
            # bar.finish()
            chrome_ua = r"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36"

            fe = []
            for fn, ln, test_url in self._generate_url_req():
                test_header = {
                    "User-Agent": chrome_ua,
                    "Waf-Test-Case": "%s:%d" % (os.path.basename(fn), ln)
                }
                fe.append(
                    executor.submit(self.RequestMethod[http_method],
                                    test_url,
                                    headers=test_header))

            for f in as_completed(fe):
                try:
                    r = f.result()
                    test_case_id = 'unknown'
                    test_url = r.request.url
                    if 'Waf-Test-Case' in r.headers:
                        test_case_id = r.headers['Waf-Test-Case']
                    bar.next()
                    if r.status_code / 200 == 1:
                        log20x.info("[%d] %s (%s)" %
                                    (r.status_code, test_url, test_case_id))
                    elif r.status_code >= 500:
                        err_log.error("[%d] %s (%s)" %
                                      (r.status_code, test_url, test_case_id))
                    else:
                        log40x.info("[%d] %s (%s)" %
                                    (r.status_code, test_url, test_case_id))
                    self.test_total += 1
                except requests.exceptions.ConnectionError, e:
                    err_log.error("%s" % e)
Beispiel #49
0
        missing = json.loads(fault.data)
    elif isinstance(fault.data, types.ListType):
        missing = fault.data

    if '' in missing:
        del missing[missing.index(''):]

    bar = IncrementalBar('Uploading', max=len(missing))
    bar.suffix = '%(percent).1f%% - %(eta)ds'
    with open(path) as fp:
        for hash in missing:
            offset = hashes.index(unhexlify(hash)) * blocksize
            fp.seek(offset)
            block = fp.read(blocksize)
            client.update_container_data(container, StringIO(block))
            bar.next()
    bar.finish()

    return client.create_object_by_hashmap(container, object, map, **kwargs)


def download(client, container, object, path):

    res = client.retrieve_object_hashmap(container, object)
    blocksize = int(res['block_size'])
    blockhash = res['block_hash']
    bytes = res['bytes']
    map = res['hashes']

    if os.path.exists(path):
        h = HashMap(blocksize, blockhash)
Beispiel #50
0
    os.makedirs(tempDir, exist_ok=True)
    dir_ = os.path.join(args.path, "")
    print(f"{Fore.GREEN}All settings valid, proceeding...")
    print(f"Downloading {filename[0]}")
    chunkSize = 10240
    try:
        r = requests.get(url + filename[0], stream=True)
        with open(tempDir + filename[0], "wb") as f:
            pbar = IncrementalBar(
                "Downloading",
                max=int(r.headers["Content-Length"]) / chunkSize,
                suffix="%(percent)d%%",
            )
            for chunk in r.iter_content(chunk_size=chunkSize):
                if chunk:  # filter out keep-alive new chunks
                    pbar.next()
                    f.write(chunk)
            pbar.finish()
    except Exception:
        print(f"Download {Fore.RED}failed, please try again. Exiting.")
        sys.exit()
    print(f"Download {Fore.GREEN}done")

    # Extraction
    spinnerExtract = Spinner("Extracting... ")
    spinnerExtract.start()
    try:
        shutil.unpack_archive(tempDir + filename[0], tempDir)
    except Exception:
        print(f"Extraction {Fore.RED}failed, please try again. Exiting.")
        exit()
Beispiel #51
0
    def find_solutions(self, graph_setting_groups):
        results = {}
        # check for solutions for a specific set of interaction settings
        logging.info("Number of interaction settings groups being processed: "
                     + str(len(graph_setting_groups)))
        for strength, graph_setting_group in sorted(
                graph_setting_groups.items(), reverse=True):
            logging.info("processing interaction settings group with "
                         "strength " + str(strength))
            logging.info(str(len(graph_setting_group)) +
                         " entries in this group")
            logging.info("running with " +
                         str(self.number_of_threads) + " threads...")

            temp_results = []
            bar = IncrementalBar('Propagating quantum numbers...',
                                 max=len(graph_setting_group))
            bar.update()
            if self.number_of_threads > 1:
                with Pool(self.number_of_threads) as p:
                    for result in p.imap_unordered(
                            self.propagate_quantum_numbers,
                            graph_setting_group, 1):
                        temp_results.append(result)
                        bar.next()
            else:
                for graph_setting_pair in graph_setting_group:
                    temp_results.append(self.propagate_quantum_numbers(
                        graph_setting_pair))
                    bar.next()
            bar.finish()
            logging.info('Finished!')
            if strength not in results:
                results[strength] = []
            results[strength].extend(temp_results)

        for k, v in results.items():
            logging.info(
                "number of solutions for strength ("
                + str(k) + ") after qn propagation: "
                + str(sum([len(x[0]) for x in v])))

        # remove duplicate solutions, which only differ in the interaction qn S
        results = remove_duplicate_solutions(results, self.filter_remove_qns,
                                             self.filter_ignore_qns)

        node_non_satisfied_rules = []
        solutions = []
        for result in results.values():
            for (tempsolutions, non_satisfied_laws) in result:
                solutions.extend(tempsolutions)
                node_non_satisfied_rules.append(non_satisfied_laws)
        logging.info("total number of found solutions: " +
                     str(len(solutions)))
        violated_laws = []
        if len(solutions) == 0:
            violated_laws = analyse_solution_failure(node_non_satisfied_rules)
            logging.info("violated rules: " + str(violated_laws))

        # finally perform combinatorics of identical external edges
        # (initial or final state edges) and prepare graphs for
        # amplitude generation
        match_external_edges(solutions)
        final_solutions = []
        for sol in solutions:
            final_solutions.extend(
                perform_external_edge_identical_particle_combinatorics(sol)
            )

        return (final_solutions, violated_laws)
Beispiel #52
0
def main():
    parser = argparse.ArgumentParser()
    help_text_language = 'The language to which to translate e.g. "nl"'
    help_text_language += '\nCheck for language codes:'
    help_text_language += '\nhttps://sites.google.com/site/tomihasa/google-language-codes'
    help_text_service = 'The translation service to use; google (default) or deepl'
    parser.add_argument('-file', help='SRT subtitle file to translate')
    parser.add_argument('-language', help=help_text_language)
    parser.add_argument('-service', help=help_text_service)
    args = parser.parse_args()

    if args.file is None or args.language is None:
        parser.print_help()
        print('')
        raise SyntaxError('One or more argument is missing')

    t0 = time.clock()

    input_file_name = args.file
    language = args.language

    try:
        print('\n')
        if args.service == 'deepl':
            print('Using www.deepl.com translation service.')
            srt_translator = DeeplTranslator(language)
        else:
            print('Using translate.google.com translation service.')
            srt_translator = GoogleTranslator(language)

        output_file_name, file_extension = path.splitext(input_file_name)
        output_file_name = output_file_name + '.' + args.language + file_extension
        file_encoding = get_file_encoding(args.file)

        print('Input file:          {}'.format(input_file_name))
        print('Input file encoding: {}'.format(file_encoding))
        print('Output file:         {}\n'.format(output_file_name))

        input_file = open(args.file, "r", encoding=file_encoding)
        input_file_data = input_file.read()

        subs = list(srt.parse(input_file_data))
        progress_bar = IncrementalBar('Translating', max=len(subs))

        for sub in subs:
            merge_is_needed = sub_merge_needed(sub.content)
            if merge_is_needed:
                text_to_be_translated, newline_count = remove_newline_char_from_line(
                    sub.content)
                line_to_add_newlines = srt_translator.translate(
                    text_to_be_translated)
                sub.content = add_newline_char_to_line(line_to_add_newlines,
                                                       newline_count)
            else:
                sub.content = srt_translator.translate(sub.content)
            # print('translated-sub: {}'.format(sub.content))
            progress_bar.next()

        progress_bar.finish()
        srt_translation = srt.compose(subs)

        output_file = open(output_file_name, "w", encoding='utf-8')
        output_file.write(srt_translation)

        t1 = time.clock()

        print('\nSuccessfully translated the SRT file.')
        print('This translation took {:.2F} seconds to complete.'.format(t1 -
                                                                         t0))
        print('Output saved as: {}'.format(output_file_name))
    except Exception as Exc:
        print('\nOperation failed due to an exception.')
Beispiel #53
0
def tweets_parecidos_con_distinto_humor(corpus):
    print("Buscando tweets muy parecidos pero con distinto valor de humor...")

    parecidos_con_distinto_humor = set()

    ids_parecidos_con_distinto_humor = cargar_parecidos_con_distinto_humor()

    if ids_parecidos_con_distinto_humor:
        corpus_por_id = {tweet.id: tweet for tweet in corpus}
        for id_tweet_humor, id_tweet_no_humor in ids_parecidos_con_distinto_humor:
            parecidos_con_distinto_humor.add((corpus_por_id[id_tweet_humor], corpus_por_id[id_tweet_no_humor]))
    else:
        subcorpus_cuentas_de_humor = []
        subsubcorpus_cuentas_de_humor_humor = []
        subsubcorpus_cuentas_de_humor_no_humor = []
        for tweet in corpus:
            if tweet.es_chiste:
                subcorpus_cuentas_de_humor.append(tweet)
                if tweet.es_humor:
                    subsubcorpus_cuentas_de_humor_humor.append(tweet)
                else:
                    subsubcorpus_cuentas_de_humor_no_humor.append(tweet)

        subsubcorpus_cuentas_de_humor_no_humor_por_largo = defaultdict(list)

        bar = IncrementalBar("Tokenizando\t\t\t", max=len(subcorpus_cuentas_de_humor),
                             suffix=SUFIJO_PROGRESS_BAR)
        bar.next(0)
        for tweet_cuenta_humor in subcorpus_cuentas_de_humor:
            tweet_cuenta_humor.oraciones = Freeling.procesar_texto(tweet_cuenta_humor.texto_original)
            tweet_cuenta_humor.tokens = list(itertools.chain(*tweet_cuenta_humor.oraciones))
            bar.next()
        bar.finish()

        for tweet_no_humor in subsubcorpus_cuentas_de_humor_no_humor:
            subsubcorpus_cuentas_de_humor_no_humor_por_largo[len(tweet_no_humor.tokens)].append(tweet_no_humor)

        bar = IncrementalBar("Buscando en tweets\t\t", max=len(subsubcorpus_cuentas_de_humor_humor),
                             suffix=SUFIJO_PROGRESS_BAR)
        bar.next(0)
        for tweet_humor in subsubcorpus_cuentas_de_humor_humor:
            margen = int(round(len(tweet_humor.tokens) / 5))
            largo_min = len(tweet_humor.tokens) - margen
            largo_max = len(tweet_humor.tokens) + margen

            for largo in range(largo_min, largo_max + 1):
                for tweet_no_humor in subsubcorpus_cuentas_de_humor_no_humor_por_largo[largo]:
                    if distancia_edicion(tweet_humor.tokens, tweet_no_humor.tokens)\
                            <= max(len(tweet_humor.tokens), len(tweet_no_humor.tokens)) / 5:
                        parecidos_con_distinto_humor.add((tweet_humor, tweet_no_humor))
                        print('')
                        print(tweet_humor.id)
                        print(tweet_humor.texto_original)
                        print("------------")
                        print(tweet_no_humor.id)
                        print(tweet_no_humor.texto_original)
                        print("------------")
                        print('')
            bar.next()
        bar.finish()

        guardar_parecidos_con_distinto_humor(parecidos_con_distinto_humor)

    return parecidos_con_distinto_humor
def scrape_collins():
    global lx, lx_completed, lx_begins, lx_last_val
    global ly, ly_completed, ly_begins, ly_last_val
    global lz, lz_completed, lz_begins, lz_last_val
    #INITIALISE
    cache_file = read_cache()

    #SCRAPE METADATA
    if not lx_completed:
        print("Scraping meta-data")
        bar = IncrementalBar("Scraping stage 1/3",
                             max=len(ascii_lowercase),
                             suffix='%(percent).1f%% - %(index)s of %(max)s')
        for char in ascii_lowercase:
            data = BeautifulSoup(scraper.get(
                "https://www.collinsdictionary.com/browse/english/words-starting-with-"
                + char).content.decode("UTF-8"),
                                 features="html.parser")
            for d in data.body.find("ul", class_="columns2").find_all("a"):
                lx.append(d['href'])
            bar.next()
        cache_file.write("#0\n")
        for item in lx:
            cache_file.write(str(item) + "\n")
        cache_file.write("#END0\n")
        lx_completed = True
        cache_file.flush()
        bar.finish()
    else:
        print("Using cached data for stage 1/3.")
    #SCRAPE WORD LIST

    if not ly_completed:
        print("Building word list")
        if not ly_begins:
            cache_file.write("#1\n")
            data = BeautifulSoup(scraper.get(
                "https://www.collinsdictionary.com/browse/english/words-starting-with-digit"
            ).content.decode("UTF-8"),
                                 features="html.parser")
            for d in data.body.find("ul", class_="columns2").find_all("a"):
                ly.append(d['href'])
                cache_file.write(d['href'] + "\n")
                cache_file.flush()
        cache_file.close()
        cache_file = read_cache()
        bar = IncrementalBar("Scraping stage 2/3",
                             max=len(lx),
                             suffix='%(percent).1f%% - %(index)s of %(max)s')
        for url in lx:
            newrl = url.strip()
            newrl_c = strip_url(newrl)
            ly_last_val_c = strip_url(ly_last_val)
            if min(ly_last_val_c, newrl_c) == newrl_c:
                pass
                bar.next()
            else:
                data = BeautifulSoup(
                    scraper.get(newrl).content.decode("UTF-8"),
                    features="html.parser")
                for d in data.body.find("ul", class_="columns2").find_all("a"):
                    ly.append(d['href'])
                    cache_file.write(d['href'] + "\n")
                cache_file.flush()
            bar.next()
        cache_file.write("#END1\n")
        bar.finish()
        ly_completed = True
        cache_file.flush()
    else:
        print("Using cached data for layer 2/3.")

    #SCRAPE DICTIONARY
    if not (lx_completed and ly_completed):
        print("Something went awry. Forcing a restart.")
        print("Clearing local cache...", end="", flush=True)
        os.remove("cache.data")
        print(" done.", end="\n")
    else:
        print("Scraping dictionary...")
        checked_file = open("checked.txt", mode="a+")
        cache_file = read_cache()
        bar = IncrementalBar("Scraping stage 3/3",
                             max=len(ly),
                             suffix='%(percent).1f%% - %(index)s of %(max)s')
        if not lz_begins:
            cache_file.write("#2\n")
        for url in ly:
            newrl = url.strip()
            if min(strip_url(newrl),
                   strip_url(lz_last_val.strip())) == strip_url(newrl):
                bar.next()
                pass
            else:
                bar.next()
                data = BeautifulSoup(
                    scraper.get(newrl).content.decode("utf-8"),
                    features="html.parser")
                essence = data.find_all("div", class_="dictentry dictlink")
                essence = str(essence)
                out_file.write(essence)
                out_file.flush()
                checked_file.write(strip_url(newrl) + "\n")
                checked_file.flush()
                cache_file.write(newrl + "\n")
                cache_file.flush()
        checked_file.flush()
        checked_file.close()
        out_file.flush()
        out_file.close()
        cache_file.write("#END2\n")
        cache_file.flush()
        cache_file.close()
        bar.finish()
        print(" done.")