Ejemplo n.º 1
0
    def return_file_objects(self):
        """
		Function that returnss a list of imgur_object that can be later iterated
		"""
        print('Loading images from {}'.format(self._link))
        for links in progress_bar(self.generate_download_links(),
                                  unit='connections',
                                  total=self._supp_len,
                                  desc='Loading images'):
            try:
                req_obj = imgur_object(link=links, verbose=self.verbose)
                self.files.append(req_obj)
            except NetworkError as ne:
                print('NetworkError is raised in link {}'.format(
                    links)) if self.verbose else None
                print(ne)
            except TypeError as te:
                print(te)
            else:
                pass
        print(
            'Final list length is {length}, expencted length is {explength}.\nSuccessful rate is {percentage}'
            .format(length=len(self.files),
                    explength=self._supp_len,
                    percentage=(str(len(self.files) / self._supp_len * 100)) +
                    '%')) if self.verbose else None
        return self.files
Ejemplo n.º 2
0
def adversarial_fit(model: tf.keras.Model,
                    generator: tf.keras.Model,
                    discriminator: tf.keras.Model,
                    data_set,
                    latent_dim: int,
                    epochs: int,
                    batch_size=256,
                    silent: bool = False) -> None:
    """
    Run Adversarial training of the model

    :param model: GAN model as joined generator and discriminator
    :param generator: generator model
    :param discriminator: discriminator model
    :param data_set: data set interface object generating desired data set
    :param latent_dim: dimension of latent space used to generate samples
    :param epochs: number of epochs to train for
    :param batch_size: number of data points in single batch
    :param silent: if False print progress bar
    """
    n_batches_in_epoch = int(data_set.n_data_points / batch_size)
    for epoch in progress_bar(range(epochs), disable=silent):
        for batch_index in range(n_batches_in_epoch):
            # set-up data batch
            samples = generator(
                data_set.latent_batch(batch_size // 2, latent_dim=latent_dim))
            x, y = data_set.batch(samples, size=batch_size)
            # train discriminator
            discriminator_loss, _ = discriminator.train_on_batch(x, y)
            # train generator
            inputs = data_set.latent_batch(size=batch_size,
                                           latent_dim=latent_dim)
            labels = np.ones([batch_size, 1])
            model.train_on_batch(inputs, labels)
Ejemplo n.º 3
0
    def build_features(self, args, raw_data):
        features = {}

        for split, data in raw_data.items():
            self.split_feats = []
            print(f"Building features for {split}")

            for convo in progress_bar(data, total=len(data)):
                so_far = []

                for turn in convo['delexed']:
                    speaker, utt = turn['speaker'], turn['text']
                    _, _, action, values, _ = turn['targets']

                    if speaker in ['agent', 'customer']:
                        utt_str = f'{speaker}|{utt}'
                        so_far.append(utt_str)
                    else:  # create a training example during every action
                        context = so_far.copy()  # [::-1] to reverse
                        self.collect_examples(context, action, values)
                        action_str = f'action|{action}'
                        so_far.append(action_str)

            features[split] = self.split_feats
        return features
Ejemplo n.º 4
0
def get_embeddings(file_):
    embs = dict()
    with open(file_, 'r') as f:
        for l in progress_bar(f):
            l_split = l.strip().split()
            embs[l_split[0]] = [float(em) for em in l_split[1:]]
    print("Got {} embeddings from {}".format(len(embs), file_))

    return embs
Ejemplo n.º 5
0
def run_eval(args, datasets, model, exp_logger, kb_labels, split='dev'):
    dataloader, num_examples = setup_dataloader(datasets, args.batch_size,
                                                split)
    exp_logger.start_eval(num_examples, kind=args.filename)
    loss_func = torch.nn.CrossEntropyLoss(ignore_index=-1)
    num_outputs = len(model.outputs)
    model.eval()

    preds, labels, convo_ids, turn_counts = [], [], [], []
    for batch in progress_bar(dataloader,
                              total=len(dataloader),
                              desc=f"Epoch {exp_logger.epoch}"):
        batch = tuple(t.to(device) for t in batch)
        full_history, batch_targets, context_tokens, tools = prepare_inputs(
            args, batch)

        with torch.no_grad():
            if args.task == 'ast':
                batch_scores = model(full_history, context_tokens)
                batch_loss = ast_loss(batch_scores, batch_targets, loss_func)
            elif args.task == 'cds':
                batch_scores = model(full_history, context_tokens, tools)
                batch_loss = cds_loss(batch_scores, batch_targets, loss_func)

        if args.cascade:
            batch_turn_count = batch_targets.pop()
            batch_convo_id = batch_targets.pop()

        if args.quantify or split == 'dev':
            exp_logger.eval_loss += batch_loss.mean().item()
            exp_logger.batch_steps += 1

        preds.append(batch_scores)
        labels.append(batch_targets)
        convo_ids.append(batch_convo_id if args.cascade else 0)
        turn_counts.append(batch_turn_count if args.cascade else 0)

        if args.debug:
            if len(turn_counts) > 10:
                break

    grouped_preds = [
        torch.cat([pred[i] for pred in preds], dim=0)
        for i in range(num_outputs)
    ]
    grouped_labels = [
        torch.cat([label[i] for label in labels], dim=0)
        for i in range(num_outputs)
    ]
    ci_and_tc = (torch.cat(convo_ids, dim=0),
                 torch.cat(turn_counts, dim=0)) if args.cascade else (0, 0)

    utils = {'kb_labels': kb_labels, 'ci_and_tc': ci_and_tc}
    metrics, res_name = quantify(args, grouped_preds, grouped_labels, utils)
    exp_logger.end_eval(metrics, kind=args.filename)
    return (metrics, res_name) if split == 'dev' else metrics
Ejemplo n.º 6
0
  def micro_f1(self, classes, predictions, targets):
    true_positive, false_positive, false_negative, true_negative = 0,0,0,0

    for cls in progress_bar(classes):
      for pred, tar in zip(predictions, targets):
        if pred == cls and tar == cls:
          true_positive += 1
        elif pred == cls and tar != cls:
          false_positive += 1
        elif pred != cls and tar == cls:
          false_negative += 1
        elif pred != cls and tar != cls:
          true_negative += 1

    micro = Tester.single_f1(true_positive, false_positive, false_negative)
    print("Micro average is {:.3f}".format(micro))
    return micro
Ejemplo n.º 7
0
def preprocessing():
    # Read the local CSV subject export
    classifications = pd.read_csv("classification-export.csv")

    classifications['metadata'] = classifications['metadata'].apply(
        lambda x: json.loads(x))
    classifications['locations'] = classifications['locations'].apply(
        lambda x: json.loads(x))

    # Include in subject_set_ids all subject sets you want to keep
    subject_set_ids = []
    classifications = classifications.loc[
        classifications['subject_set_id'].isin(subject_set_ids)]

    classifications['smooth'] = 0
    classifications['features'] = 0
    classifications['star'] = 0

    # Copy data from metadata into correct/new CSV headers with progress bar
    with progress_bar(total=len(classifications)) as current_progress:
        for index, row in classifications.iterrows():
            current_progress.update(1)
            for column in row['metadata']:
                # Find the Right Ascension and Declination in the metadata and assign to columns
                if column in ['ra', 'dec', '!ra', '!dec']:
                    stripped_punctuation = column.strip(string.punctuation)
                    classifications.loc[
                        index, stripped_punctuation] = row['metadata'][column]
                # Find the image name, titled 'iauname', in the metadata and assign to a column
                if column in ['iauname', '!iauname']:
                    classifications.loc[index,
                                        'filename'] = row['metadata'][column]
            for column in row['locations']:
                classifications.loc[index, 'image'] = row['locations'][column]

    # Drop unnecessary columns and rearrange
    classifications = classifications[[
        'subject_id', 'classifications_count', 'ra', 'dec', 'image',
        'filename', 'smooth', 'features', 'star'
    ]]

    # Create a parsed CSV for DB import
    classifications.to_csv('parsed-subject-set.csv',
                           index=False,
                           encoding='utf-8')
Ejemplo n.º 8
0
def CompressFiles(files, name, zip_extension="tar.gz", overwrite=False):
    zipped_file = zipped_contents_folder / get_current_date(
    ) / f"{name}.{zip_extension}"
    if zipped_file.exists():
        if overwrite:
            os.remove(
                zipped_file.absolute().as_posix())  # nu sterge, unfortunatelly
        else:
            return zipped_file
    else:
        if not zipped_file.parent.exists():
            zipped_file.parent.mkdir(exist_ok=1)

    zip_file_client = z.ZipFile(zipped_file.as_posix(), "w", z.ZIP_DEFLATED)

    print(
        "\nUserWarnings for duplicate files in zip are supressed.\ncompressing ..."
    )
    compress_progress = progress_bar(iterable=files,
                                     desc="compress",
                                     total=len(files),
                                     unit="file",
                                     ncols=120)
    for file in files:
        compress_progress.update(1)
        if file in zip_file_client.namelist():
            print(f"{file} already exists...")
            continue

        if file == zipped_file.absolute().as_posix():
            print(f"skipped: {file}")
            continue

        try:
            zip_file_client.write(file, compress_type=z.ZIP_DEFLATED)
            compress_progress.desc = file
        except (PermissionError, FileNotFoundError):
            pass

    compress_progress.close()
    zip_file_client.close()
    print(f"files compressed at: {zipped_file.as_posix()}")

    return zipped_file
Ejemplo n.º 9
0
    def backup_to_harddrive(self, less_24_mode=True, zip_mode=True):

        # gather what you need and what is the mode
        files = []

        external_drives = self.get_external_drives()
        if not external_drives:
            raise ConnectionError("there are no external hard drives ONLINE to backup to.")

        drive_chosen = external_drives[0]
        if len(external_drives) > 1:

            print(external_drives)
            while 1:
                try:
                    index = int(input("choose drive to backup to:"))
                    if 1 <= index <= len(external_drives):
                        drive_chosen = external_drives[index - 1]

                    break
                except ValueError:
                    print("invalid index. repeat")

        before = time.time()

        print(f"\nBackup to: {drive_chosen} started at: {get_current_datetime()}\n")
        input("_____")

        for backup_file in progress_bar(
            iterable=files,
            desc="backup",
            total=len(files),
            unit="file",
            ncols=120
        ):
            self.copy_file_to_harddrive(backup_file.file, backup_file.destination)
        print(f"\nBackup to: {drive_chosen} ended at: {get_current_datetime()}")

        execution_time = time.time() - before
        execution_time = seconds_to_time(int(execution_time))
        print(f"\nbackup duration: [ {execution_time} ] seconds")
Ejemplo n.º 10
0
  def macro_f1(self, classes, predictions, targets):
    total_f1 = []
    for cls in progress_bar(classes):

      true_positive, false_positive, false_negative, true_negative = 0,0,0,0
      for pred, tar in zip(predictions, targets):
        if pred == cls and tar == cls:
          true_positive += 1
        elif pred == cls and tar != cls:
          false_positive += 1
        elif pred != cls and tar == cls:
          false_negative += 1
        elif pred != cls and tar != cls:
          true_negative += 1

      f1 = Tester.single_f1(true_positive, false_positive, false_negative)
      if f1 >= 0:
        total_f1.append(f1)

    macro = np.average(total_f1)
    print("Macro average is {:.3f}".format(macro))
    return macro
Ejemplo n.º 11
0
    def build_features(self, args, raw_data):
        features = {}

        for split, data in raw_data.items():
            self.split_feats = []
            print(f"Building features for {split}")

            for convo in progress_bar(data, total=len(data)):
                so_far = []

                for turn in convo['delexed']:
                    speaker, text = turn['speaker'], turn['text']
                    utterance = f"{speaker}|{text}"

                    if speaker == 'agent':
                        context = so_far.copy()
                        support_items = turn['candidates'], convo[
                            'convo_id'], turn['turn_count']
                        self.collect_one_example(context, turn['targets'],
                                                 support_items)
                        so_far.append(utterance)
                    elif speaker == 'action':
                        context = so_far.copy()
                        self.collect_examples(context, turn['targets'],
                                              convo['convo_id'],
                                              turn['turn_count'])
                        so_far.append(utterance)
                    else:
                        so_far.append(utterance)

                context = so_far.copy()  # the entire conversation
                end_targets = turn['targets'].copy()
                end_targets[1] = 'end_conversation'
                end_targets[4] = -1
                support_items = convo['convo_id'], turn['turn_count']
                self.collect_one_example(context, end_targets, support_items)

            features[split] = self.split_feats
        return features
Ejemplo n.º 12
0
  def accuracy(self, task):
    batch_test_loss, batch_bleu, batch_success = [], [], []
    bleu_scores, accuracy = [], []

    for test_pair in progress_bar(self.test_data):
      test_input, test_output = test_pair
      loss, predictions, visual = run_inference(self.model, test_input, \
                        test_output, criterion=NLLLoss(), teach_ratio=0)

      targets = test_output.data.tolist()
      predicted_tokens = [vocab.index_to_word(x, task) for x in predictions]
      target_tokens = [vocab.index_to_word(z[0], task) for z in targets]

      test_loss = loss.data[0] / test_output.size()[0]
      bleu_score = BLEU.compute(predicted_tokens, target_tokens)
      turn_success = all([pred == tar[0] for pred, tar in zip(predictions, targets)])

      batch_test_loss.append(test_loss)
      batch_bleu.append(bleu_score)
      batch_success.append(turn_success)

    return batch_processing(batch_test_loss, batch_bleu, batch_success)
    def extract_features(self, examples, mode='train'):
        subarrays = []
        for i, method in enumerate(self.feature_methods):
            name = method.__name__
            feature_filename = get_result_filename('{}.{}.npy'.format(
                name, mode))
            try:
                os.mkdir(os.path.dirname(feature_filename))
            except FileExistsError:
                pass
            if os.access(feature_filename, os.R_OK):
                features = np.load(feature_filename)
            else:
                feature_list = []
                for example in progress_bar(examples, desc=name):
                    feature_list.append(method(example))
                features = np.vstack(feature_list)
                np.save(feature_filename, features)

            # Set a selected feature source to all zeroes
            if i in self.ablate:
                features *= 0
            subarrays.append(features)
        return np.hstack(subarrays)
Ejemplo n.º 14
0
    def fit(self,
            data_set: DataSetI,
            n_epochs: int,
            batch_size: int = 256,
            silent: bool = False) -> None:
        """
        Fit model to generate given data set

        :param data_set: DataSetI object containing desired data set
        :param n_epochs: number of iterations run in training
        :param batch_size: size of data batch
        :param silent: if False print progress bar
        """
        n_batches_in_epoch = int(data_set.n_data_points / batch_size)

        for epoch in progress_bar(range(n_epochs), disable=silent):
            for batch_index in range(n_batches_in_epoch):
                # set-up data batch
                samples = data_set.latent_batch(batch_size, self._latent_dim)
                labels = data_set.real_batch(batch_size)

                _ = self._model.train_on_batch(samples, labels)

        return
gain = args["--gain"]



run = dr.EventGenerator(args["--input"], max_events=args["--maxevents"])
NN = min(len(run), args["--maxevents"])

integral = np.zeros(NN, dtype='f4')
integral_weighted = np.zeros(NN, dtype='f4')
max_pos = np.zeros(NN, dtype='i4')
arrival_time = np.zeros(NN, dtype='f4')
arrival_time_no_calib = np.zeros(NN, dtype='f4')
trapz = np.zeros(NN, dtype='f4')
simps = np.zeros(NN, dtype='f4')

for i, event in enumerate(progress_bar(run, leave=True)):
    raw_data = event.data[ch][gain]
    stop_cell = event.header.stop_cells[ch][gain]
    calibrated = raw_data - offset[stop_cell:stop_cell+run.roi]
    t = cell_width[stop_cell:stop_cell+run.roi].cumsum()

    max_pos[i] = np.argmax(calibrated)

    s = slice(max_pos[i]-half_integration_window, max_pos[i]+half_integration_window+1)
    samples = np.arange(s.start, s.stop)
    cells = dr.sample2cell(samples, stop_cell, total_cells=1024)    
    DLE = partial(digital_leading_edge_discriminator, data=calibrated, threshold=1000)

    arrival_time[i] = DLE(time=t)
    arrival_time_no_calib[i] = DLE(time=np.arange(len(calibrated)))
    integral[i] = calibrated[s].sum()
Ejemplo n.º 16
0

if __name__ == '__main__':
    fig, axs = plt.subplots(1, 2, figsize=(14, 6))

    for ax in axs:
        low = ax is axs[0]

        lens = range(10, 300, 10) if low else range(1000, 30000, 1000)
        py_time = []
        np_time = []
        numba1_time = []
        numba2_time = []
        c_time = []

        for l in progress_bar(lens, desc='Lower' if low else 'Upper'):
            rands = [random.random() for _ in range(l)]
            numpy_rands = np.array(rands)

            numba1_time.append(
                timeit.timeit(lambda: numba_standard_deviation(numpy_rands),
                              number=1000))
            numba2_time.append(
                timeit.timeit(
                    lambda: numba_longer_standard_deviation(numpy_rands),
                    number=1000))
            np_time.append(
                timeit.timeit(lambda: np.std(numpy_rands), number=1000))
            c_time.append(
                timeit.timeit(lambda: std.standard_dev(rands), number=1000))
            if low:
def main():
    args = get_parser().parse_args()

    storage_conn = get_native_storage_conn(args.native_metering_connection)
    total_amount = count_samples(storage_conn, args.start_timestamp,
                                 args.end_timestamp)
    print('%s samples will be migrated to Gnocchi.' % total_amount)

    # NOTE: we need service credentials to init gnocchiclient
    config_file = ([args.ceilometer_config_file]
                   if args.ceilometer_config_file else None)
    gnocchi_conf = service.prepare_service([], config_file)
    logger = log.getLogger()
    log_conf = cfg.ConfigOpts()
    log.register_options(log_conf)
    log_conf.set_override('log_file', args.log_file)
    log_conf.set_override('debug', True)
    log.setup(log_conf, 'ceilometer_migration')
    time_filters = []
    if args.start_timestamp:
        time_filters.append({">=": {'timestamp': args.start_timestamp}})
    if args.end_timestamp:
        time_filters.append({"<": {'timestamp': args.end_timestamp}})

    gnocchi_publisher = gnocchi.GnocchiPublisher(gnocchi_conf, "gnocchi://")

    batch_size = args.batch_migration_size
    if total_amount == 'Unknown':
        total_amount = None
    orderby = [{"message_id": "asc"}]
    last_message_id = None
    migrated_amount = 0
    if progress_bar:
        pbar = progress_bar(total=total_amount, ncols=100, unit='samples')
    else:
        pbar = None
    while migrated_amount < total_amount:
        if time_filters and last_message_id:
            filter_expr = {
                'and': time_filters + [{
                    ">": {
                        "message_id": last_message_id
                    }
                }]
            }
        elif time_filters and not last_message_id:
            if len(time_filters) == 1:
                filter_expr = time_filters[0]
            else:
                filter_expr = {'and': time_filters}
        elif not time_filters and last_message_id:
            filter_expr = {">": {"message_id": last_message_id}}
        else:
            filter_expr = None
        samples = storage_conn.query_samples(filter_expr=filter_expr,
                                             orderby=orderby,
                                             limit=batch_size)
        samples = list(samples)
        if not samples:
            break
        last_message_id = samples[-1].message_id
        for sample in samples:
            logger.info(
                'Migrating sample with message_id: %s, meter: %s, '
                'resource_id: %s' %
                (sample.message_id, sample.counter_name, sample.resource_id))
        samples_dict = [sample.as_dict() for sample in samples]
        gnocchi_publisher.publish_samples(samples_dict)
        length = len(samples)
        migrated_amount += length
        if pbar:
            pbar.update(length)
    logger.info("=========== %s metrics data migration done ============" %
                total_amount)
Ejemplo n.º 18
0
    def backup_to_server(self, less_24_mode=True, zip_mode=True):
        if not self.is_server_online():
            self.logger_backup.info(
                f"warning: server {self.server_http_url} is OFFLINE")

        backup_start_time = time()
        backup_start_datetime = get_current_datetime()

        if zip_mode:
            # files generation
            self.logger_backup.info("generating files ...")
            files_collection = ExtractFiles(*GetMainContents(),
                                            less_24_mode=less_24_mode,
                                            verbose=1,
                                            overwrite=self.overwrite)
            self.logger_backup.info("generated.")

            if less_24_mode:
                _name = "less_24_files"
            else:
                _name = "all_files"

            # Path object
            tar_gz_file = CompressFiles(files_collection,
                                        _name,
                                        overwrite=self.overwrite)
            del _name

            if not self.is_server_online():
                if windows:
                    windows_notification(
                        "Backup Client",
                        "start python webserver for backup! (2 mins left until start)",
                        5, "assets/icons/backup.ico", 1)
                self.logger_backup.info(
                    f"server: {self.server_http_url} is OFFLINE (this time sleeping 2 minutes)"
                )
                sleep(2 * 60)

            # last chance
            if not self.is_server_online():
                self.logger_backup.exception(
                    f"server: {self.server_http_url} is OFFLINE")
                raise ConnectionError(
                    f"server: {self.server_http_url} is OFFLINE")

            self.logger_backup.info(
                f"\nBackup to: {self.server_http_url} started at: {backup_start_datetime}\n"
            )

            subprocess.call(
                self.pscp_command.safe_substitute(
                    file_path=tar_gz_file.absolute().as_posix(),
                    destination_folder=server_backup_folder))

            tar_gz_file.unlink()

            response = requests.post(self.server_http_url,
                                     json={
                                         "extract":
                                         1,
                                         "zip_path":
                                         server_backup_folder +
                                         tar_gz_file.name
                                     })
            if response.status_code != 200:
                try:
                    response.raise_for_status()
                except requests.RequestException:
                    self.logger_backup.exception(
                        f"Status code: {response.status_code}\n")
                    response.raise_for_status()

            if not response.json()["extract_result"] == "success":
                raise ValueError(response.json()["extract_result"])

            backup_finish_datetime = get_current_datetime()
            self.logger_backup.info(
                f"tar.gz extracted on server successfully\nBackup to: {self.server_http_url} ended at: {backup_finish_datetime}"
            )

            # tar_gz_file.unlink(missing_ok=False)
            self.logger_backup.info(f"{tar_gz_file.name} deleted")

        else:
            # NON ZIP MODE

            # files generation
            self.logger_backup.info("generating files ...")
            files_collection = ExtractFiles(*GetMainContents(),
                                            less_24_mode=less_24_mode,
                                            verbose=1,
                                            overwrite=1)
            self.logger_backup.info("generated.")
            if less_24_mode:
                _name = "less_24_files"
            else:
                _name = "all_files"

            self.logger_backup.info(
                "generating array with BackupApplicationFiles files ...")
            files_collection = InstantiateWithBackupApplicationFile(
                files_collection, server_backup_folder, _name)
            del _name
            self.logger_backup.info("generated.")

            if not self.is_server_online():
                if windows:
                    windows_notification(
                        "Backup Client",
                        "start python webserver for backup! (2 mins left until start)",
                        5, "assets/icons/backup.ico", 1)

                self.logger_backup.warning(
                    f"server: {self.server_http_url} is OFFLINE (this time sleeping 2 minutes)"
                )
                sleep(2 * 60)

            # last chance
            if not self.is_server_online():
                self.logger_backup.exception(
                    f"server: {self.server_http_url} is OFFLINE")
                raise ConnectionError(
                    f"server: {self.server_http_url} is OFFLINE")

            distance = len(files_collection)
            backup_progress = progress_bar(iterable=range(distance),
                                           desc="backup_to_server",
                                           total=distance,
                                           unit="file",
                                           ncols=120)

            self.logger_backup.info(
                f"\nBackup to: {self.server_http_url} started at: {get_current_datetime()}\n"
            )
            if less_24_mode:
                self.logger_backup.info("copying less 24 files ...")
            else:
                self.logger_backup.info("copying all files ...")

            backup_start_time = time()
            total_exception_during_backup = 0
            iterator = 0
            while iterator < distance:
                try:
                    self.copy_quiet(files_collection[iterator].file,
                                    files_collection[iterator].dirname)
                    backup_progress.update(1)

                except Exception as error:
                    total_exception_during_backup += 1
                    self.logger_backup.exception(
                        f"\nexception occured at index={iterator}\nfile: {self.files_collection[iterator].file}\ndirname: {self.files_collection[iterator].dirname}\n",
                        print__=False)

                    backup_progress.update(-1)
                    iterator -= 1

                iterator += 1

            backup_progress.close()

            self.logger_backup.info(
                f"{total_exception_during_backup} exceptions occured during backup (see them in log)"
            )
            self.logger_backup.info(
                f"\nBackup to: {self.server_http_url} ended at: {get_current_datetime()}"
            )

        backup_duration = seconds_to_time(int(time() - backup_start_time))
        self.logger_backup.info(f"\nbackup duration: {backup_duration}")

        if windows:
            windows_notification("Backup Client",
                                 f"backup finished:\n({backup_duration})", 5,
                                 "assets/icons/backup.ico", 1)

        size_in_bytes = 0
        for file in files_collection:
            try:
                size_in_bytes += os.path.getsize(file)
            except (FileNotFoundError, PermissionError):
                pass

        total_size = convert_size_in_bytes(size_in_bytes)
        orig_seconds = backup_duration.seconds
        orig_minutes = backup_duration.minutes
        try:
            orig_hours = backup_duration.hours
        except AttributeError:
            orig_hours = 0

        try:
            orig_minutes = backup_duration.minutes
        except AttributeError:
            orig_minutes = 0

        hours = orig_hours + orig_minutes / 60 + orig_seconds / 3600
        minutes = orig_hours * 60 + orig_minutes + orig_seconds / 60
        seconds = orig_hours * 3600 + orig_minutes * 60 + orig_seconds

        metadata = {
            "started_datetime": backup_start_datetime,
            "finish_datetime": backup_finish_datetime,
            "total_files": len(files_collection),
            "total_size": f"{total_size[0]}{total_size[1]}",
            "total_size_in_bytes": size_in_bytes,
            "total_exceptions": 0,
            "process_interrupted": False,
            "process_interrupted_times": 0,
            "duration_hours": fixed_set_precision_float(hours, 3),
            "duration_minutes": fixed_set_precision_float(minutes, 3),
            "duration_seconds": fixed_set_precision_float(seconds, 3)
        }

        metadata_name = f"backup_metadata_{get_current_date()}_{get_current_time().replace(':', '.')}.json"
        # remote location
        write_json_to_file(metadata,
                           current_date_remote_metadata_folder / metadata_name)

        # local project
        write_json_to_file(metadata, metadata_folder / metadata_name)
Ejemplo n.º 19
0
    def minimize(
        self,
        coordinates: np.array,
        num_steps: int,
        patience: int = 50,
        silent: bool = True,
    ) -> Tuple[Any, Any]:
        """
        Minimize TSP for given coordinate points

        :param coordinates: coordinates of cities to minimize TSP
        :param num_steps: number of iterations
        :param patience: number of epochs without improving solution before terminating
        :param silent: if False print progress bar during execution

        :return: tuple with best route and its length
        """
        self.distance_matrix = distance.cdist(coordinates, coordinates)

        population = self.initialize_population(coordinates.shape[0], self.population_size, self.extra_initialization_rate)
        self.history["min_fitness"] = np.zeros(num_steps)
        self.history["mean_fitness"] = np.zeros(num_steps)
        self.history["max_fitness"] = np.zeros(num_steps)

        crossover_schedule = self.crossover_schedule_type(num_steps, self.crossover_rate)
        mutation_schedule = self.mutation_schedule_type(num_steps, self.mutation_rate)

        for generation in progress_bar(range(num_steps), disable=silent):
            elite = self.selection(self.fitness, population, int(self.elitism_rate * self.population_size))

            self.validate_population(population.numpy())

            num_to_crossover = int(self.crossover_rate * self.population_size)
            mating_pool = self.selection(self.fitness, population, num_to_crossover)
            offspring = self.create_offspring(mating_pool, int((1 - self.elitism_rate) * self.population_size))

            self.validate_population(population.numpy())

            num_to_mutate = int(self.mutation_rate * self.population_size)
            to_mutate = tf.random.uniform(
                [num_to_mutate, ], maxval=int((1 - self.elitism_rate) * self.population_size), dtype="int32"
            )
            offspring = slice_update(offspring, indices=to_mutate, updates=self.mutate(tf.gather(offspring, to_mutate)))

            self.validate_population(population.numpy())

            # concatenate all solutions and create next generation
            population = tf.concat([elite, offspring], axis=0)

            fitness = tf.map_fn(self.fitness, population)

            self.history["mean_fitness"][generation] = fitness.numpy().mean()
            self.history["min_fitness"][generation] = fitness.numpy().min()
            self.history["max_fitness"][generation] = fitness.numpy().max()
            self.history["epoch"] = generation

            self.crossover_rate = crossover_schedule[generation]
            self.mutation_rate = mutation_schedule[generation]

            validation = self.history["min_fitness"][generation - patience: generation]
            if np.all(np.diff(validation) == 0) and generation >= patience:
                return (
                    np.array_split(population[fitness.numpy().argmin()].numpy(), self.n_agents),
                    fitness.numpy().min()
                )

        return np.array_split(population[fitness.numpy().argmin()].numpy(), self.n_agents), fitness.numpy().min()
Ejemplo n.º 20
0
def scores_for_proteins(proteins: Iterable, genes_data: DataFrame,
                        big_wig_path: str) -> Tuple[Dict, Namespace]:
    """Load conservation scores, average when needed, and transform into protein space."""

    bw = pyBigWig.open(big_wig_path)

    score_tracks = {}
    skipped_premature = set()
    skipped_key_error = set()
    mapping_to_many = set()
    skipped_track_mismatch = set()

    for protein in progress_bar(proteins):

        if '*' in protein.sequence[:-1]:
            skipped_premature.add(protein)
            continue

        gene = protein.gene
        chrom = 'chr' + gene.chrom
        try:
            protein_data = genes_data.loc[[(chrom, protein.refseq)]]
        except KeyError:
            skipped_key_error.add(protein)
            continue

        protein_tracks = []

        # transcript might map to more than one genomic locations
        for genomic_location in protein_data.itertuples(index=False):

            try:
                track = extract_track(genomic_location, protein, chrom, bw)
            except MismatchError:
                skipped_track_mismatch.add(protein)
                continue
            except TypeError:
                skipped_key_error.add(protein)
                continue

            protein_tracks.append(track)

        protein_tracks = [track for track in protein_tracks if track]

        if not protein_tracks:
            continue
        elif len(protein_tracks) > 1:
            mapping_to_many.add(protein)
            protein_track = [mean(scores) for scores in zip(*protein_tracks)]
        else:
            protein_track = protein_tracks[0]

        score_tracks[protein] = convert_to_aa_scores(protein_track)

    print(
        f'Averaged data for {len(mapping_to_many)} proteins mapping to more than one genomic location.'
    )
    # print({protein.refseq for protein in mapping_to_many})

    print(
        f'Skipped {len(skipped_premature)} proteins with premature stop codons.'
    )
    # print({protein.gene.name for protein in skipped_premature})

    print(
        f'Failed to find genomic data for {len(skipped_key_error)} proteins.')
    # print({(protein.gene.name, protein.gene.chrom, protein.gene.strand) for protein in skipped_key_error})

    print(
        f'Conflicting genomic and protein level coordinates for {len(skipped_track_mismatch)} proteins.'
    )
    # print({protein.gene.name for protein in skipped_track_mismatch})

    details = Namespace(mapping_to_many_regions=mapping_to_many,
                        skipped=Namespace(
                            premature_stop_codon=skipped_premature,
                            no_genomic_data=skipped_key_error,
                            track_mismatch=skipped_track_mismatch))

    return score_tracks, details
        for index, user_id in enumerate(user_ids):
            print("---------------------")
            print("USER ID:", index, user_id)

            lookup = {
                "user_id": user_id,
                "timeline_length": None,
                "error_type": None,
                "error_message": None,
                "start_at": generate_timestamp(),
                "end_at": None
            }
            timeline = []

            try:
                for status in progress_bar(job.fetch_statuses(user_id=user_id),
                                           total=job.status_limit):
                    timeline.append(job.parse_status(status))

                lookup["timeline_length"] = len(timeline)
            except Exception as err:
                lookup["error_type"] = err.__class__.__name__
                lookup["error_message"] = str(err)
            lookup["end_at"] = generate_timestamp()
            print(lookup)
            lookups.append(lookup)

            if any(timeline):
                print("SAVING", len(timeline), "TIMELINE TWEETS...")
                errors = job.save_timeline(timeline)
                if errors:
                    pprint(errors)
cell_width = pd.read_csv(args["--tc"])["cell_width"].values
run = dr.EventGenerator(args["--input"], max_events=args["--maxevents"])
offset = np.genfromtxt(args["--offset"])[:, 0]
# trick to omit np.roll
offset = np.concatenate((offset, offset))
cell_width = np.concatenate([cell_width] * 5)
cell_width = np.roll(cell_width, 1)

ch = args["--channel"]
gain = args["--gain"]

bins = [np.linspace(50, 80, 301), np.linspace(-500, 2500, 601)]
histo, _, _ = np.histogram2d([], [], bins=bins)

for event in progress_bar(run, leave=True):
    raw_data = event.data[ch][gain]
    stop_cell = event.header.stop_cells[ch][gain]

    calibrated = raw_data - offset[stop_cell:stop_cell + run.roi]
    t = cell_width[stop_cell:stop_cell + run.roi].cumsum()
    h, _, _ = np.histogram2d(t, calibrated, bins=bins)
    histo += h

# normalize histo along y
histo /= histo.mean(axis=1)[:, np.newaxis]

plt.figure()
plt.imshow(
    histo.T,
    cmap="viridis",
ch = args["--channel"]
gain = args["--gain"]

run = dr.EventGenerator(args["--input"], max_events=args["--maxevents"])
NN = min(len(run), args["--maxevents"])

integral = np.zeros(NN, dtype='f4')
integral_weighted = np.zeros(NN, dtype='f4')
max_pos = np.zeros(NN, dtype='i4')
arrival_time = np.zeros(NN, dtype='f4')
arrival_time_no_calib = np.zeros(NN, dtype='f4')
trapz = np.zeros(NN, dtype='f4')
simps = np.zeros(NN, dtype='f4')

for i, event in enumerate(progress_bar(run, leave=True)):
    raw_data = event.data[ch][gain]
    stop_cell = event.header.stop_cells[ch][gain]
    calibrated = raw_data - offset[stop_cell:stop_cell + run.roi]
    t = cell_width[stop_cell:stop_cell + run.roi].cumsum()

    max_pos[i] = np.argmax(calibrated)

    s = slice(max_pos[i] - half_integration_window,
              max_pos[i] + half_integration_window + 1)
    samples = np.arange(s.start, s.stop)
    cells = dr.sample2cell(samples, stop_cell, total_cells=1024)
    DLE = partial(digital_leading_edge_discriminator,
                  data=calibrated,
                  threshold=1000)
offset = np.genfromtxt(args["--offset"])[:,0]
# trick to omit np.roll
offset = np.concatenate((offset, offset))
cell_width = np.concatenate([cell_width]*5)
cell_width = np.roll(cell_width, 1)


ch = args["--channel"]
gain = args["--gain"]



bins = [np.linspace(50, 80, 301), np.linspace(-500, 2500, 601)]
histo, _, _ = np.histogram2d([],[], bins=bins)

for event in progress_bar(run, leave=True):
    raw_data = event.data[ch][gain]
    stop_cell = event.header.stop_cells[ch][gain]
    
    calibrated = raw_data - offset[stop_cell:stop_cell+run.roi]
    t = cell_width[stop_cell:stop_cell+run.roi].cumsum()
    h, _, _ = np.histogram2d(t, calibrated, bins=bins)
    histo += h




# normalize histo along y
histo /= histo.mean(axis=1)[:, np.newaxis]

Ejemplo n.º 25
0
def _create_source(index, dictionary, tfidf, symmetric, dominant, nonzero_limit, dtype):
    """Build a sparse term similarity matrix using a term similarity index.

    Returns
    -------
    matrix : :class:`scipy.sparse.coo_matrix`
        The sparse term similarity matrix.

    """
    assert isinstance(index, TermSimilarityIndex)
    assert dictionary is not None
    matrix_order = len(dictionary)

    if matrix_order == 0:
        raise ValueError('Dictionary provided to SparseTermSimilarityMatrix must not be empty')

    logger.info("constructing a sparse term similarity matrix using %s", index)

    if nonzero_limit is None:
        nonzero_limit = matrix_order

    def tfidf_sort_key(term_index):
        if isinstance(term_index, tuple):
            term_index, *_ = term_index
        term_idf = tfidf.idfs[term_index]
        return (-term_idf, term_index)

    if tfidf is None:
        columns = sorted(dictionary.keys())
        logger.info("iterating over %i columns in dictionary order", len(columns))
    else:
        assert max(tfidf.idfs) == matrix_order - 1
        columns = sorted(tfidf.idfs.keys(), key=tfidf_sort_key)
        logger.info("iterating over %i columns in tf-idf order", len(columns))

    nonzero_counter_dtype = _shortest_uint_dtype(nonzero_limit)

    column_nonzero = np.array([0] * matrix_order, dtype=nonzero_counter_dtype)
    if dominant:
        column_sum = np.zeros(matrix_order, dtype=dtype)
    if symmetric:
        assigned_cells = set()
    row_buffer = array('Q')
    column_buffer = array('Q')
    if dtype is np.float16 or dtype is np.float32:
        data_buffer = array('f')
    elif dtype is np.float64:
        data_buffer = array('d')
    else:
        raise ValueError('Dtype %s is unsupported, use numpy.float16, float32, or float64.' % dtype)

    def cell_full(t1_index, t2_index, similarity):
        if dominant and column_sum[t1_index] + abs(similarity) >= 1.0:
            return True  # after adding the similarity, the matrix would cease to be strongly diagonally dominant
        assert column_nonzero[t1_index] <= nonzero_limit
        if column_nonzero[t1_index] == nonzero_limit:
            return True  # after adding the similarity, the column would contain more than nonzero_limit elements
        if symmetric and (t1_index, t2_index) in assigned_cells:
            return True  # a similarity has already been assigned to this cell
        return False

    def populate_buffers(t1_index, t2_index, similarity):
        column_buffer.append(t1_index)
        row_buffer.append(t2_index)
        data_buffer.append(similarity)
        column_nonzero[t1_index] += 1
        if symmetric:
            assigned_cells.add((t1_index, t2_index))
        if dominant:
            column_sum[t1_index] += abs(similarity)

    try:
        from tqdm import tqdm as progress_bar
    except ImportError:
        def progress_bar(iterable):
            return iterable

    for column_number, t1_index in enumerate(progress_bar(columns)):
        column_buffer.append(column_number)
        row_buffer.append(column_number)
        data_buffer.append(1.0)

        if nonzero_limit <= 0:
            continue

        t1 = dictionary[t1_index]
        num_nonzero = column_nonzero[t1_index]
        num_rows = nonzero_limit - num_nonzero
        most_similar = [
            (dictionary.token2id[term], similarity)
            for term, similarity in index.most_similar(t1, topn=num_rows)
            if term in dictionary.token2id
        ] if num_rows > 0 else []

        if tfidf is None:
            rows = sorted(most_similar)
        else:
            rows = sorted(most_similar, key=tfidf_sort_key)

        for t2_index, similarity in rows:
            if cell_full(t1_index, t2_index, similarity):
                continue
            if not symmetric:
                populate_buffers(t1_index, t2_index, similarity)
            elif not cell_full(t2_index, t1_index, similarity):
                populate_buffers(t1_index, t2_index, similarity)
                populate_buffers(t2_index, t1_index, similarity)

    data_buffer = np.frombuffer(data_buffer, dtype=dtype)
    row_buffer = np.frombuffer(row_buffer, dtype=np.uint64)
    column_buffer = np.frombuffer(column_buffer, dtype=np.uint64)
    matrix = sparse.coo_matrix((data_buffer, (row_buffer, column_buffer)), shape=(matrix_order, matrix_order))

    logger.info(
        "constructed a sparse term similarity matrix with %0.06f%% density",
        100.0 * matrix.getnnz() / matrix_order**2,
    )

    return matrix
Ejemplo n.º 26
0
import json
import random
import torch

from tqdm import tqdm as progress_bar
from transformers import BertTokenizer, BertModel, RobertaTokenizer, RobertaModel

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

print("Loading data ...")
utt_texts = json.load(open(f'data/utterances.json', 'r'))
num_cands = len(utt_texts)
utt_vectors = []

cand_embeds, cand_segments, cand_masks = [], [], []
for cand_text in progress_bar(utt_texts, total=num_cands):
    cand_inputs = tokenizer(cand_text, return_tensors="pt")
    with torch.no_grad():
        cand_outputs = model(**cand_inputs)
    utt_vectors.append(cand_outputs.pooler_output)

utt_vectors = torch.cat(utt_vectors)
print("utt_vectors: {}".format(utt_vectors.shape))
torch.save(utt_vectors, 'data/utt_vectors.pt')
Ejemplo n.º 27
0
def style_transfer(
    content_image_path,
    style_image_path,
    outputs_dir,
    n_epochs: int,
    content_weight: float = 3e-2,
    style_weights: tuple = (20000, 500, 12, 1, 1),
    smoothness_weight: float = 5e-2,
    content_layer: str = "block4_conv2",
    style_layers: tuple = ("block1_conv1", "block2_conv1", "block3_conv1",
                           "block4_conv1", "block5_conv1"),
    save_frequency: int = None,
):
    width, height = tf.keras.preprocessing.image.load_img(
        content_image_path).size
    save_frequency = save_frequency or n_epochs

    content_image = K.variable(
        read_image_as_tensor(content_image_path, (height, width)))
    style_image = K.variable(
        read_image_as_tensor(style_image_path, (height, width)))
    generated_image = K.placeholder(
        (1, height, width, 3))  # tensor placeholder for generated image

    input_as_tensor = tf.concat([content_image, style_image, generated_image],
                                axis=0)
    model = tf.keras.applications.vgg19.VGG19(input_tensor=input_as_tensor,
                                              weights="imagenet",
                                              include_top=False)
    layer_to_output_mapping = {
        layer.name: layer.output
        for layer in model.layers
    }

    # Extract features from the content layer
    content_features = layer_to_output_mapping[content_layer]
    base_image_features = content_features[0, :, :, :]  # 0 corresponds to base
    combination_features = content_features[
        2, :, :, :]  # 2 corresponds to generated

    # Compute total loss
    content_loss_value = content_weight * feature_reconstruction_loss(
        base_image_features, combination_features)
    style_loss_value = style_loss_for_all_layers(style_layers, style_weights,
                                                 layer_to_output_mapping)
    smoothness_loss_value = smoothness_weight * smoothness_loss(
        generated_image)
    total_loss = content_loss_value + style_loss_value + smoothness_loss_value

    # Compute gradients of output img with respect to total_loss
    grads = K.gradients(total_loss, generated_image)
    outputs = [total_loss] + grads
    loss_and_grads = K.function([generated_image], outputs)
    # Initialize the generated image from random noise
    x = np.random.uniform(0, 255, (1, height, width, 3)) - 128.

    # Fit over the total iterations
    for epoch in progress_bar(range(n_epochs)):
        x, min_val, info = fmin_l_bfgs_b(
            # extract loss function from tf model
            func=lambda x: loss_and_grads([x.reshape(
                (1, height, width, 3))])[0],
            x0=x.flatten(),
            # extract gradients from tf model
            fprime=lambda x: loss_and_grads([x.reshape(
                (1, height, width, 3))])[1].flatten().astype("float64"),
            maxfun=20,
        )

        if epoch % save_frequency == 0:
            generated_image = tensor_to_image(x.copy(), width, height)
            io.imsave(
                os.path.join(outputs_dir,
                             f"generated_image_at_{epoch}_epoch.jpg"),
                generated_image)

    return tensor_to_image(x.copy(), width, height)
Ejemplo n.º 28
0
def main(config_path: str, resume_path=None):
    # Read the experiment configuration
    config = read_config(config_path)

    # Generate experiment file structure
    logs_dir, config_save_path, video_path, checkpoint_path, code_save_path = paths(config_path)

    # Read config
    save_config(config, config_save_path)
    save_code(code_save_path)

    # Set random seed
    torch.manual_seed(config.seed)

    # Instantiate components
    env, policies, storages = instantiate(config)

    # Resume from given checkpoint
    if resume_path is not None:
        policies = torch.load(resume_path)

        # Gather name form "<anything>-123<anything>"
        start_iteration = int(re.findall('-(\d+)\.tar', resume_path)[0])
        for policy in policies:
            policy.scheduler.current_iteration = start_iteration
            # TODO also save scheduler values when serializing model
            policy.sync_scheduled_values()

        print(f'Resuming from iteration {start_iteration}')
    else:
        start_iteration = 0

    if config.viz_scripted_mode:
        [*env_history, end_reason] = simulate_episode(env, policies, SCRIPTED)
        create_animation(env_history, video_path % (0, 'scripted'))
        return

    with warnings.catch_warnings():
        # Silence tensorflow (2.0) deprecated usages of numpy
        warnings.simplefilter('ignore', FutureWarning)
        logs_writer = SummaryWriter(logs_dir)

    if config.compare_interval > 0:
        comparison_policies, _ = read_models(config.comparison_models_dir)

    try:
        start_time = time()

        # Main training loop
        for update_number in progress_bar(range(start_iteration, config.num_iterations), 'Training'):
            # Collect rollouts and update weights
            training_history = perform_update(config, env, policies, storages)

            # Write progress summaries
            if do_this_iteration(config.log_interval, update_number, config.num_iterations):
                log_layers(policies, logs_writer, update_number)
                log_scalars(training_history, logs_writer, update_number, env)

            # Evaluate and record video
            if do_this_iteration(config.eval_interval, update_number, config.num_iterations):
                for sampling_method in [SAMPLE, DETERMINISTIC]:
                    [*env_history, _] = simulate_episode(env, policies, sampling_method)
                    create_animation(env_history, video_path % (update_number, action_source_names[sampling_method]))

            # Checkpoint current model weights
            if do_this_iteration(config.save_interval, update_number, config.num_iterations):
                save_model(policies, checkpoint_path % update_number)

            # Evaluate against other models
            if do_this_iteration(config.compare_interval, update_number, config.num_iterations):
                won_statuses, rewards = play_against_others(env, policies, comparison_policies, config.comparison_num_episodes)
                log_comparisons(won_statuses, rewards, logs_writer, update_number)

                elapsed_time = start_time - time()  # in seconds
                elapsed_time /= 60  # in minutes
                elapsed_time /= 60  # in hours
                if elapsed_time >= config.max_run_time:
                    break

    except KeyboardInterrupt:
        print('Stopped training, finishing up...')

    # Save final weights
    if config.save_interval > 0:
        save_model(policies, checkpoint_path % update_number)

    if config.eval_interval > 0:
        [*env_history, _] = simulate_episode(env, policies, SAMPLE)
        create_animation(env_history, video_path % (update_number, action_source_names[SAMPLE]))

    # Save hyperparams and metrics (comparisons against others and themselves)
    selves_wons, selves_rewards     = play_against_others(env, policies, [policies],          config.comparison_num_episodes)
    if config.compare_interval > 0:
        others_wons, others_rewards = play_against_others(env, policies, comparison_policies, config.comparison_num_episodes)
    else:
        others_wons, others_rewards = None, None
    log_hyperparams_and_metrics(config,
                                selves_wons, selves_rewards,
                                others_wons, others_rewards,
                                logs_writer, start_time)

    # TODO log final
    # Flush logs
    logs_writer.close()