Ejemplo n.º 1
0
    def get_proba(self, sentences):
        if self.model_name == 'bert':
            examples_for_processing = [(example[1], self.MAX_SEQ_LENGTH,
                                        self.bert_tokenizer)
                                       for example in sentences.iterrows()]
            process_count = cpu_count() - 1
            print(f'Preparing to convert {len(sentences)} examples..')
            print(f'Spawning {process_count} processes..')
            with Pool(process_count) as p:
                features = list(
                    tqdm_notebook(p.imap(self.convert_example_to_bert_feature,
                                         examples_for_processing),
                                  total=len(sentences)))
            all_input_ids, all_input_mask, all_segment_ids = self.get_bert_tensors(
                features)
            eval_data = TensorDataset(all_input_ids, all_input_mask,
                                      all_segment_ids)
            eval_sampler = SequentialSampler(eval_data)
            eval_dataloader = DataLoader(eval_data,
                                         sampler=eval_sampler,
                                         batch_size=self.EVAL_BATCH_SIZE)

            model = BertForSequenceClassification.from_pretrained(
                'bert/', cache_dir='cache/', num_labels=len(self.classes))
            model.to(self.device)

            probas = None

            for input_ids, input_mask, segment_ids in tqdm_notebook(
                    eval_dataloader, desc="Predicting"):
                input_ids = input_ids.to(self.device)
                input_mask = input_mask.to(self.device)
                segment_ids = segment_ids.to(self.device)

                with torch.no_grad():
                    logits = model(input_ids,
                                   segment_ids,
                                   input_mask,
                                   labels=None)
                    prob = torch.nn.functional.softmax(logits, dim=1)

                if probas is None:
                    probas = prob.detach().cpu().numpy()
                else:
                    probas = np.append(probas,
                                       prob.detach().cpu().numpy(),
                                       axis=0)
            return probas
        else:
            return self.model.predict_proba(sentences)
Ejemplo n.º 2
0
def pos_analysis(df, group_cols=None, round_decimal=1):
    # Assumes nltk universal pos-tagging
    # & df['pos'] has the part-of-speech tags
    # analysis along the POS used in the paper

    pos_syms = ['NOUN', 'PRON', 'ADJ', 'ADP', 'VERB']
    pos_names = ['Nouns', 'Pronouns', 'Adjectives', 'Adpositions', 'Verbs']

    if group_cols is not None:
        groups = df.groupby(group_cols)
        group_stats = []
        group_lens = []
        for n, gg in tqdm_notebook(groups):
            g_stats = defaultdict(set)
            group_lens.append(len(gg))
            for t, p in zip(gg.tokens, gg.pos):
                for x, y in zip(t, p):
                    g_stats[y[1]].add(x)
            group_stats.append(g_stats)

        for ps, pn in zip(pos_syms, pos_names):
            u_pos = []
            u_pos_norm = []
            for i, s in enumerate(group_stats):
                u_pos.append(len(s[ps]))
                u_pos_norm.append(u_pos[-1] / group_lens[i])
            print(pn, '{:.{}f}'.format(np.mean(u_pos), round_decimal),
                  '{:.{}f}'.format(np.mean(u_pos_norm), round_decimal))
    else:
        for ps, pn in zip(pos_syms, pos_names):
            print(
                pn,
                df.pos.apply(lambda x: len([i[0] for i in x if i[1] == ps])).
                mean().round(round_decimal))
def outlier_dbscan(data):
    columns = [
        'wet_mean', 'green_mean', 'bright_mean', 'ARVI_mean', 'SAVI_mean',
        'NDBI_mean', 'mNDWI_mean', 'NDWI_mean', 'mNDVI_mean', 'NDVI_mean',
        'wet_p50', 'green_p50', 'bright_p50', 'ARVI_p50', 'SAVI_p50',
        'NDBI_p50', 'mNDWI_p50', 'NDWI_p50', 'mNDVI_p50', 'NDVI_p50',
        'S2_B12mean', 'S2_B11mean', 'S2_B8mean', 'S2_B4mean', 'S2_B3mean',
        'S2_B2mean', 'S2_B12med', 'S2_B11med', 'S2_B8med', 'S2_B4med',
        'S2_B3med', 'S2_B2med'
    ]
    t_c = data.TRAIN_CLASS.unique()
    for i in tqdm_notebook(range(len(t_c)),
                           desc='Processing Clustering Outlier data'):
        cl_data = data.loc[data.TRAIN_CLASS == t_c[i], columns].dropna()
        st_sc = Normalizer()
        model_ = DBSCAN(eps=.05,
                        min_samples=10).fit(st_sc.fit_transform(cl_data))
        cl_data['label'] = model_.labels_
        data.loc[cl_data.index, 'OUTLIER'] = cl_data.label
    data['OUTLIER'] = data.OUTLIER.apply(lambda y: 0 if y >= 0 else -1)
    data_outlier = data.loc[data.OUTLIER < 0, ['x', 'TRAIN_CLASS']].groupby(
        'TRAIN_CLASS').agg('count').rename(columns={
            'x': 'COUNT_OUTLIER'
        }).reset_index()
    fig = px.bar(data_outlier,
                 x="TRAIN_CLASS",
                 y="COUNT_OUTLIER",
                 title="OUTLIER")
    fig.show()
    return data
Ejemplo n.º 4
0
def _build_tqdm_iterator(iterable, verbose, **kwargs):
    """
    Build an iterable, possibly using tqdm (either in notebook or regular mode)
    Parameters
    ----------
    iterable
    verbose
    total
        Length of the iterator, helps in cases where tqdm is not detecting the total length.

    Returns
    -------
    """
    def _isnotebook():
        try:
            shell = get_ipython().__class__.__name__
            if shell == 'ZMQInteractiveShell':
                return True  # Jupyter notebook or qtconsole
            elif shell == 'TerminalInteractiveShell':
                return False  # Terminal running IPython
            else:
                return False  # Other type (?)
        except NameError:
            return False  # Probably standard Python interpreter

    if verbose:
        if _isnotebook():
            iterator = tqdm_notebook(iterable, **kwargs)
        else:
            iterator = tqdm(iterable, **kwargs)

    else:
        iterator = iterable
    return iterator
def rfe_cat(train_x, train_y, valid_x, valid_y, min_):
    train_pool = Pool(train_x, train_y, cat_features=[0])
    valid_pool = Pool(valid_x, valid_y, cat_features=[0])
    f1_score_ = []
    num_feature = []
    feature_name = []
    print('Start Recursive Feature Elimination')
    for i in tqdm_notebook(range(min_, 36),
                           desc='Iterating Feature Elimination'):
        model = CatBoostClassifier(iterations=50,
                                   random_seed=1234,
                                   used_ram_limit='10gb')
        summary = model.select_features(
            train_pool,
            eval_set=valid_pool,
            features_for_select='0-34',
            num_features_to_select=i,
            steps=2,
            algorithm=EFeaturesSelectionAlgorithm.RecursiveByShapValues,
            shap_calc_type=EShapCalcType.Regular,
            train_final_model=True,
            logging_level='Silent',
        )
        f1_ = f1_score(valid_y,
                       model.predict(valid_pool).tolist(),
                       average='micro')
        f1_score_.append(f1_)
        num_feature.append(i)
        feature_name.append(summary['selected_features_names'])
    print('Best F-1 score: ', max(f1_score_))
    indices = f1_score_.index(max(f1_score_))
    print('Best Number feature: ', num_feature[indices])
    print('Selected of Feature names: \n', feature_name[indices])
    return feature_name[indices]
def get_data(path, train=True):
    ids = next(os.walk(path + "/image"))[2]
    X = np.zeros((len(ids), im_height, im_width, 1), dtype=np.float32)
    if train:
        y = np.zeros((len(ids), im_height, im_width, 1), dtype=np.float32)
    print('Getting and resizing images ... ')
    for n, id_ in tqdm_notebook(enumerate(ids), total=len(ids)):
        # Load images
        img = load_img(path + '/image/' + id_, color_mode="grayscale")
        x_img = img_to_array(img)
        x_img = resize(x_img, (128, 128, 1),
                       mode='constant',
                       preserve_range=True)

        # Load annotation
        if train:
            an_ = id_[:6] + "_gtFine_polygons.json"
            with open(path_train + "/annotation/" + an_) as f:
                data = json.load(f)
            mask = img_to_array(create_mask(data))
            mask = resize(mask, (128, 128, 1),
                          mode='constant',
                          preserve_range=True)

        # Save images
        X[n, ..., 0] = x_img.squeeze() / 255
        if train:
            y[n] = mask / 255
    print('Done!')
    if train:
        return X, y
    else:
        return X
Ejemplo n.º 7
0
def evaluate_on_dataset(model,
                        data_loader,
                        criterion,
                        device,
                        detailed=True,
                        kl_div=True):
    epoch_loss = AverageMeter()
    model.eval()
    epoch_confidence = []
    for batch in tqdm_notebook(data_loader):
        img = batch['image'].to(device)
        labels = batch['label'].to(device)  # emotion_distribution
        logits = model(img)

        # Calculate loss
        loss = criterion(logits, labels)

        if detailed:
            if kl_div:
                epoch_confidence.append(
                    torch.exp(logits).cpu())  # logits are log-soft-max
            else:
                epoch_confidence.append(F.softmax(
                    logits, dim=-1).cpu())  # logits are pure logits

        b_size = len(labels)
        epoch_loss.update(loss.item(), b_size)

    if detailed:
        epoch_confidence = torch.cat(epoch_confidence).numpy()

    return epoch_loss.avg, epoch_confidence
Ejemplo n.º 8
0
def evaluate_on_dataset(model,
                        data_loader,
                        use_vision,
                        criterion,
                        device,
                        detailed=True):
    epoch_loss = AverageMeter()
    epoch_acc = AverageMeter()
    model.eval()
    epoch_confidence = []
    for batch in tqdm_notebook(data_loader):
        labels = batch['emotion'].to(device)
        tokens = batch['tokens'].to(device)
        if use_vision:
            img = batch['image'].to(device)
            logits = model(tokens, img)
        else:
            logits = model(tokens)

        # Calculate loss
        loss = criterion(logits, labels)
        guessed_correct = logits.argmax(1) == labels
        acc = torch.mean(guessed_correct.double())

        if detailed:
            epoch_confidence.append(F.softmax(logits, dim=-1).cpu())

        b_size = len(labels)
        epoch_loss.update(loss.item(), b_size)
        epoch_acc.update(acc.item(), b_size)

    if detailed:
        epoch_confidence = torch.cat(epoch_confidence).numpy()

    return epoch_loss.avg, epoch_acc.avg, epoch_confidence
Ejemplo n.º 9
0
def single_epoch_train(model, data_loader, use_vision, criterion, optimizer,
                       device):
    epoch_loss = AverageMeter()
    epoch_acc = AverageMeter()
    model.train()
    for batch in tqdm_notebook(data_loader):
        labels = batch['emotion'].to(device)
        tokens = batch['tokens'].to(device)

        if use_vision:
            img = batch['image'].to(device)
            logits = model(tokens, img)
        else:
            logits = model(tokens)

        # Calculate loss
        loss = criterion(logits, labels)
        acc = torch.mean((logits.argmax(1) == labels).double())

        # Back prop.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        b_size = len(labels)
        epoch_loss.update(loss.item(), b_size)
        epoch_acc.update(acc.item(), b_size)
    return epoch_loss.avg, epoch_acc.avg
Ejemplo n.º 10
0
def get_data(path, train=False):
    ids = [1]
    X = np.zeros((1, im_height, im_width, 1), dtype=np.float32)
    if train:
        y = np.zeros((len(ids), im_height, im_width, 1), dtype=np.float32)
    print("Getting and resizing images ... ")
    for n, id_ in tqdm_notebook(enumerate(ids), total=len(ids)):

        # Load images
        img = load_img(path, color_mode="grayscale")
        print(img)
        x_img = img_to_array(img)
        x_img = resize(x_img, (128, 128, 1),
                       mode="constant", preserve_range=True)

        # Save images
        X[n, ..., 0] = x_img.squeeze() / 255
        if train:
            y[n] = mask / 255

    print("Done!")
    if train:
        return X, y
    else:
        return X
Ejemplo n.º 11
0
def _default_bar_func_mapping():
    return {
        'tqdm': lambda args: lambda x: tqdm(x, **args),
        'tqdm_notebook': lambda args: lambda x: tqdm_notebook(x, **args),
        'False': lambda args: iter,
        'None': lambda args: iter,
    }
Ejemplo n.º 12
0
def get_neocr_dicts(xml_dir):
    xml_files = glob.glob(f'{xml_dir}/*.xml')
    xml_files.sort()

    dataset_dicts = []
    for idx, xml_file in enumerate(tqdm_notebook(xml_files)):
        # Load XML format to Dict
        doc = xmltodict.parse(open(xml_file).read())

        filename = os.path.join(img_dir, doc['annotation']['filename'])
        height, width = cv2.imread(filename).shape[:2]

        record = {}
        record["file_name"] = filename
        record["image_id"] = idx
        record[
            "height"] = height  # different from doc['annotation']['properties']['height']
        record[
            "width"] = width  # different from doc['annotation']['properties']['width']

        # for single object
        if not type(doc['annotation']['object']) == list:
            doc['annotation']['object'] = [doc['annotation']['object']]

        objs = []
        # Explore every object
        for ann_object in doc['annotation']['object']:

            # Get bbox of this object
            rectangle = []
            for pts in ann_object['polygon']['pt']:
                x, y = float(pts['x']), float(
                    pts['y'])  # int could not be dumped to json file
                coordinate = [x, y]
                rectangle.append(coordinate)
            rectangle = np.array(rectangle)

            x_min, y_min = np.min(rectangle, axis=0)
            x_max, y_max = np.max(rectangle, axis=0)

            obj = {
                "bbox": [x_min, y_min, x_max, y_max],
                "bbox_mode":
                BoxMode.XYXY_ABS,
                "category_id":
                0,
                # Specify coordinates so that it goes around the boundary.
                "segmentation":
                [[x_min, y_min, x_min, y_max, x_max, y_max, x_max, y_min]],
            }

            objs.append(obj)

        record["annotations"] = objs
        dataset_dicts.append(record)

    return dataset_dicts
Ejemplo n.º 13
0
 def modeling(list_of_df, base_estimator):
     list_models = []
     for i in tqdm_notebook(range((len(list_of_df)))):
         x_train = list_of_df[i].drop(self.target, axis=1)
         y_train = list_of_df[i][self.target]
         model = copy.deepcopy(base_estimator)
         model.fit(x_train, y_train)
         list_models.append(model)
     return list_models
Ejemplo n.º 14
0
 def __call__(self, current_size, max_size=None):
     """Update the progress bar"""
     if max_size is not None:
         self.max_size = max_size
     if self.pb is None:
         self.pb = tqdm_notebook(total=self.max_size,
                                 unit="B",
                                 unit_scale=True)
     self.pb.update(current_size)
Ejemplo n.º 15
0
def get_timeseries(df, y_index, time_steps):
    dim_0 = df.shape[0] - time_steps
    dim_1 = df.shape[1]
    x = np.zeros((dim_0, time_steps, dim_1))
    y = np.zeros((dim_0, ))

    for i in tqdm_notebook(range(dim_0)):
        x[i] = df[i:time_steps + i]
        y[i] = df[time_steps + i, y_index]
    return x, y
def iterate_geom_(data_riox, train_vector):
    r_ = pd.DataFrame()
    range_max = train_vector.shape[0]
    for i in tqdm_notebook(range(range_max),
                           desc='Processing clipping raster with vector'):
        try:
            r_t = preprocessing_tif_vector(data_riox, train_vector.geometry[i])
            r_t['TRAIN_CLASS'] = train_vector.id[i]
            r_ = r_t.append(r_)
        except:
            pass
    return r_
Ejemplo n.º 17
0
def get_scores(data_loader, data_references):
  references = []
  candidates = []
  for batch_no, (v,inp) in notebook.tqdm_notebook(enumerate(data_loader)) :
    o = evaluate(inp.to(device = DEVICE),30)
    for i in range(inp.shape[0]):
      l = sum(len(s) for s in data_references[v[i]])//len(data_references[v[i]])
      candidates.append(o[i].split()[:l])
      references.append(data_references[v[i]])
  result = {}
  result['BLEU1'] = corpus_bleu(references, candidates, weights=(1.0, 0, 0, 0))
  return result
Ejemplo n.º 18
0
def read_BSSR1_scores_from_file(enrollees_id_filepath, users_id_filepath,
                                path):

    # parse the XML files
    enrollees = ET.parse(enrollees_id_filepath)
    users = ET.parse(users_id_filepath)

    dataframe = []
    files = glob.glob(path)

    #     for filepath in glob.iglob(path):
    # replace the following two lines of code with the previous line if the tqdm package is not installed
    for i in tqdm_notebook(range(len(files))):
        filepath = files[i]
        file = open(filepath, 'r')

        file_name = filepath.split('/')[-1]
        file_name_split = file_name.split('_')

        read_data = np.array(file.read().split('\n'))
        sims = read_data[2:-2].astype(np.str)
        n_cmp = int(read_data[1])
        assert sims.shape[0] == n_cmp
        assert sims.shape[0] == 6000

        # "The order of the elements in the similarity file are fixed for all similarity files in the tree.
        # They are not sorted on similarity value. The order corresponds to the entries in the enrollees.xml
        # file."

        # grab the subject_id of current user
        subject_id = users.find(
            "./*[@name='{}']".format(file_name)).attrib['subject_id']

        sims = np.insert(sims, 0, subject_id)
        dataframe.append(sims)

    file.close()

    # extract the column names for later indexing
    column_names = [e.attrib['subject_id'] for e in enrollees.findall("./*")]
    column_names_ex = column_names.copy()
    column_names_ex.insert(0, 'subject_id')

    # convert to pandas dataframe
    df = pd.DataFrame(dataframe, columns=column_names_ex)

    # set index to subject_id and organise rows according to column order
    df = df.set_index('subject_id')

    # show initial rows
    # df.head(10)

    return (df, enrollees, users, column_names)
Ejemplo n.º 19
0
def progressbar(*args, **kwargs):
    """Uses tqdm progressbar. This function exists for wrapping purposes only.
    Original docstring follows:
    ----------------------------------------
    %s
    %s
    """
    if preferences.General.nb_progressbar:
        try:
            return tqdm_notebook(*args, **kwargs)
        except:
            pass
    return tqdm(*args, **kwargs)
Ejemplo n.º 20
0
 def __call__(self, current_size, max_size=None):
     """Update the progress bar"""
     if max_size is not None:
         self.max_size = max_size
     if self.pb is None:
         self.pb = tqdm_notebook(
             total=self.max_size,
             unit=self.unit,
             unit_scale=self.unit_scale,
             desc=self.desc,
             position=self.position,
             dynamic_ncols=True,
         )
     self.pb.update(current_size)
Ejemplo n.º 21
0
def load_data(test_size=0.2):
    x, y = [], []
    for file in tqdm_notebook(glob.glob("/content/Actor_*/*.wav")):
        file_name = os.path.basename(file)
        emotion = emotions[file_name.split("-")[2]]
        if emotion not in observed_emotions:
            continue
        feature = extract_feature(file, mfcc=True, chroma=False, mel=False)
        x.append(feature)
        y.append(emotion)
    return train_test_split(np.array(x),
                            y,
                            test_size=test_size,
                            random_state=9)
Ejemplo n.º 22
0
    def _progress_register(self,
                           amount_of_work,
                           description='',
                           stage=0,
                           tqdm_args=None):
        """ Registers a progress which can be reported/displayed via a progress bar.

        Parameters
        ----------
        amount_of_work : int
            Amount of steps the underlying algorithm has to perform.
        description : str, optional
            This string will be displayed in the progress bar widget.
        stage : int, optional, default=0
            If the algorithm has multiple different stages (eg. calculate means
            in the first pass over the data, calculate covariances in the second),
            one needs to estimate different times of arrival.
        """
        if not self.show_progress:
            return

        if tqdm_args is None:
            tqdm_args = {}

        if not isinstance(amount_of_work, Integral):
            raise ValueError(
                'amount_of_work has to be of integer type. But is {}'.format(
                    type(amount_of_work)))

        # if we do not have enough work to do for the overhead of a progress bar just dont create a bar.
        if amount_of_work <= ProgressReporterMixin._pg_threshold:
            pg = None
        else:
            args = dict(total=amount_of_work,
                        desc=description,
                        dynamic_ncols=True,
                        **tqdm_args)
            if _attached_to_ipy_notebook_with_widgets():
                from tqdm.notebook import tqdm_notebook
                pg = tqdm_notebook(leave=False, **args)
            else:
                import tqdm
                pg = tqdm.tqdm(leave=True, **args)

        self._prog_rep_progressbars[stage] = pg
        self._prog_rep_descriptions[stage] = description
        assert stage in self._prog_rep_progressbars
        assert stage in self._prog_rep_descriptions
Ejemplo n.º 23
0
def evaluate(model, tokenizer, eval_dataset, batch_size):
    """

    :param model: Newly trained Bert model
    :param tokenizer:Newly trained Bert tokenizer
    :param eval_dataset:
    :param batch_size: More flexible than training, the user can get away
    with picking a higher batch_size
    :return: The perplexity of the dataset
    """
    eval_sampler = SequentialSampler(eval_dataset)  # Same order samplinng
    eval_dataloader = DataLoader(
        eval_dataset, sampler=eval_sampler, batch_size=batch_size)
    positions_to_mask = eval_dataset.positions_to_mask

    # Eval!
    logger.info("***** Running evaluation *****")
    logger.info("  Num examples = %d", len(eval_dataset))
    logger.info("  Batch size = %d", batch_size)
    eval_loss = 0.0
    nb_eval_steps = 0
    model.eval()

    # Evaluation loop
    i = 0
    for batch in tqdm_notebook(eval_dataloader, desc='Evaluating'):
        inputs, labels = custom_mask_tokens(batch, tokenizer, positions_to_mask[i])
        i += 1
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        with torch.no_grad():
            outputs = model(inputs, masked_lm_labels=labels)
            lm_loss = outputs[0]
            eval_loss += lm_loss.mean().item()
        nb_eval_steps += 1

    eval_loss = eval_loss / nb_eval_steps
    perplexity = torch.exp(torch.tensor(eval_loss)).item()

    result = {
        'perplexity': perplexity,
        'eval_loss': eval_loss
    }

    return result
Ejemplo n.º 24
0
 def bgp(self, labelled=None):
     global_niter = 0
     while global_niter < self.global_max_itr:
         self.max = 1
         for j in tqdm_notebook(
                 range(self.ndocs),
                 disable=self.silence,
                 ascii=True,
                 desc=f'docs processed (itr {global_niter})'):
             self.local_propag(j)
             if self.is_labeled and not self.unlabeled[j]:
                 self.suppress(j)
         self.global_propag()
         global_niter += 1
         if not self.silence:
             self.print_top_topics()
             if self.eval_func:
                 self.eval_func(self)
def make_progress_bar(*args, **kwargs):
    """Create iterable as progress bar if available.

    Ensure simple loop is returned or tqdm_notebook progress bar when prerequisities met

    Returns
    -------
    iterable or tqdm_notebook
        tqdm_notebook based progress bar or simple iterable
    """
    try:
        from tqdm.notebook import tqdm_notebook

        pbar = tqdm_notebook(*args, **kwargs)
    except Exception:
        logging.warning("No prerequisites installed for interactive progress bar, continuing without one.")
        return args[0]

    return pbar
Ejemplo n.º 26
0
def single_epoch_train(model, data_loader, criterion, optimizer, device):
    epoch_loss = AverageMeter()
    model.train()
    for batch in tqdm_notebook(data_loader):
        img = batch['image'].to(device)
        labels = batch['label'].to(device)  # emotion_distribution
        logits = model(img)

        # Calculate loss
        loss = criterion(logits, labels)

        # Back prop.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        b_size = len(labels)
        epoch_loss.update(loss.item(), b_size)
    return epoch_loss.avg
Ejemplo n.º 27
0
def progress_bar(x: iter, verbose: bool = True, **kwargs) -> callable:
    """
    Generate a progress bar using the tqdm library. If execution environment is Jupyter, return tqdm_notebook
    otherwise used tqdm.
    Parameters
    -----------
    x: iterable
        some iterable to pass to tqdm function
    verbose: bool, (default=True)
        Provide feedback (if False, no progress bar produced)
    kwargs:
        additional keyword arguments for tqdm
    :return: tqdm or tqdm_notebook, depending on environment
    """
    if not verbose:
        return x
    if which_environment() == 'jupyter':
        return tqdm_notebook(x, **kwargs)
    return tqdm(x, **kwargs)
Ejemplo n.º 28
0
 def _init_supervised_matrices(self):
     print('oi')
     self._init_matrices()
     for j in range(self.ndocs):
         if not self.unlabeled[j]:
             self.suppress(j)
     for i in tqdm_notebook(range(self.nwords),
                            ascii=True,
                            desc='initialing.[]:   '):
         docs = [d for d in self.X[:, i].nonzero()[0]]
         # if word w_i not belong in train documents set X_train
         if len(docs) == 0:
             self.log_B[i] = np.ones(self.n_components)
             continue
         log_F = np.log(self.X[docs, i].toarray())
         log_A_j = self.log_A[docs]
         log_A_j = log_A_j - logsumexp(log_A_j, axis=1, keepdims=True)
         self.log_B[i] = logsumexp(log_F + log_A_j, axis=0)
     self.log_B = self.log_B - logsumexp(self.log_B, axis=0)
     self.log_B = np.log(self.beta + np.exp(self.log_B))
     self.print_top_topics()
Ejemplo n.º 29
0
def validation(model, dataloader, multi):
    total_psnr = 0
    for batch, images in tqdm_notebook(enumerate(dataloader)):
        with torch.no_grad():
            input_b1 = Variable(images['input_b1'].cuda())
            target_s1 = Variable(images['target_s1'].cuda())

            if multi:
                input_b2 = Variable(images['input_b2'].cuda())
                input_b3 = Variable(images['input_b3'].cuda())
                output_l1, _, _ = model((input_b1, input_b2, input_b3))
            else:
                output_l1 = model(input_b1)

        output_l1 = tensor_to_rgb(output_l1)
        target_s1 = tensor_to_rgb(target_s1)

        # compute psnr using function from utils
        psnr = compute_psnr(target_s1, output_l1)
        total_psnr += psnr

    return total_psnr / (batch + 1)
Ejemplo n.º 30
0
    days_range[0], days_range[-1]))
start_time = time.time()

tweet = got.manager.TweetManager.getTweets(tweetCriteria)

print("Collecting data end.. {0:0.2f} Minutes".format(
    (time.time() - start_time)/60))
print("=== Total num of tweets is {} ===".format(len(tweet)))

# 원하는 변수 골라서 저장하기


# initialize
tweet_list = []

for index in tqdm_notebook(tweet):
    # 메타데이터 목록
    username = index.username
    link = index.permalink
    content = index.text
    # print(content)
    tweet_date = index.date.strftime("%Y-%m-%d")
    tweet_time = index.date.strftime("%H:%M:%S")
    retweets = index.retweets
    favorites = index.favorites

    # 결과 합치기
    info_list = [tweet_date, tweet_time, username,
                 content, link, retweets, favorites]
    tweet_list.append(info_list)